from xml.sax.handler import ContentHandlerfrom xml.sax import parseclass HeadlineHandler(ContentHandler): in_headline = 0 def __init__(self, headlines): ContentHandler.__init__(self) self.headlines = headlines self.data = [] def startElement(self, name, attrs): if name == 'h1': self.in_headline = 1 def endElement(self, name): if name == 'h1': text = ''.join(self.data) self.data = [] self.headlines.append(text) self.in_headline = 0 def characters(self, string): if self.in_headline: self.data.append(string)headlines = []parse('website.xml', HeadlineHandler(headlines))print 'The following <h1> elements were found:'for h in headlines: print h
Name (required)
email (will not be published) (required)
Website