import HTMLParserimport urllibimport sysclass parseAttrs(HTMLParser.HTMLParser): def init_parser (self): self.pieces = [] def handle_starttag(self, tag, attrs): fixedAttrs = "" for name, value in attrs: fixedAttrs += "%s="%s" " % (name, value) self.pieces.append("<%s %s>" % (tag, fixedAttrs)) def handle_charref(self, name): self.pieces.append("&#%s;" % (name)) def handle_endtag(self, tag): self.pieces.append("</%s>" % (tag)) def handle_entityref(self, ref): self.pieces.append("&%s" % (ref)) def handle_data(self, text): self.pieces.append(text) def handle_comment(self, text): self.pieces.append("<!--%s-->" % (text)) def handle_pi(self, text): self.pieces.append("<?%s>" % (text)) def handle_decl(self, text): self.pieces.append("<!%s>" % (text)) def parsed (self): return "".join(self.pieces)attrParser = parseAttrs()attrParser.init_parser()attrParser.feed(urllib.urlopen("test2.html").read())print open("test2.html").read()print attrParser.parsed()attrParser.close()
Name (required)
email (will not be published) (required)
Website