|
| 1 | +from parser import * |
| 2 | + |
| 3 | +class HTMLParser(object): |
| 4 | + """Main parser class""" |
| 5 | + |
| 6 | + def __init__(self, strict = False): |
| 7 | + #Raise an exception on the first error encountered |
| 8 | + self.strict = strict |
| 9 | + |
| 10 | + |
| 11 | + def parse(self, stream, innerHTML=False): |
| 12 | + """Stream should be a stream of unicode bytes. Character encoding |
| 13 | + issues have not yet been dealt with.""" |
| 14 | + |
| 15 | + #We don't actually support inner HTML yet but this should allow |
| 16 | + #assertations |
| 17 | + self.innerHTML = innerHTML |
| 18 | + |
| 19 | + self.tokenizer = tokenizer.HTMLTokenizer(self) |
| 20 | + self.tokenizer.tokenize(stream) |
| 21 | + |
| 22 | + def processDoctype(self, name, error): |
| 23 | + print "DOCTYPE:", name, error |
| 24 | + |
| 25 | + def processStartTag(self, name, attributes): |
| 26 | + print "StartTag:", name, attributes |
| 27 | + |
| 28 | + def processEndTag(self, name, attributes): |
| 29 | + print "EndTag:", name, attributes |
| 30 | + |
| 31 | + def processComment(self, data): |
| 32 | + print "Comment:", data |
| 33 | + |
| 34 | + def processCharacter(self, data): |
| 35 | + print "Character:", data |
| 36 | + |
| 37 | + |
| 38 | + def processEOF(self): |
| 39 | + print "EOF" |
| 40 | + |
| 41 | + def parseError(self): |
| 42 | + print "Parse Error", self.tokenizer.state |
| 43 | + |
| 44 | + def atheistParseError(self): |
| 45 | + """This error is not an error""" |
| 46 | + print "Atheist Parse Error" |
0 commit comments