from htmlentitydefs import entitydefsfrom HTMLParser import HTMLParserimport sysclass TitleParser(HTMLParser): def __init__(self): self.title = '' self.readingtitle = 0 HTMLParser.__init__(self) def handle_starttag(self, tag, attrs): if tag == 'title': self.readingtitle = 1 def handle_data(self, data): if self.readingtitle: self.title += data def handle_endtag(self, tag): if tag == 'title': self.readingtitle = 0 def handle_entityref(self, name): if entitydefs.has_key(name): self.handle_data(entitydefs[name]) else: self.handle_data('&' + name + ';') def handle_charref(self, name): try: charnum = int(name) except ValueError: return if charnum < 1 or charnum > 255: return self.handle_data(chr(charnum)) def gettitle(self): return self.titlefd = open(sys.argv[1])tp = TitleParser()tp.feed(fd.read())print "Title is:", tp.gettitle()
Name (required)
email (will not be published) (required)
Website