from HTMLParser import HTMLParser
class ValueParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.inside = False
self.data = []
def handle_starttag(self, tag, attrs):
if tag == "p" and (("class", "valuable information") in attrs):
self.inside = True
def handle_data(self, data):
if self.inside:
self.data.append(data)
def handle_endtag(self, tag):
if tag == "p":
self.inside = False
def get_valuables(url):
#import urllib
#f = urllib.urlopen("some.html")
data = open("some.html", "r")
v = ValueParser()
for line in data:
v.feed(line)
return v.data[:]
if __name__ == "__main__":
print get_valuables("blabla")