forked from html5lib/html5lib-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path__init__.py
More file actions
55 lines (45 loc) · 2.33 KB
/
__init__.py
File metadata and controls
55 lines (45 loc) · 2.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""A collection of modules for iterating through different kinds of
tree, generating tokens identical to those produced by the tokenizer
module.
To create a tree walker for a new type of tree, you need to do
implement a tree walker object (called TreeWalker by convention) that
implements a 'serialize' method taking a tree as sole argument and
returning an iterator generating tokens.
"""
from __future__ import absolute_import, division, unicode_literals
import sys
treeWalkerCache = {}
def getTreeWalker(treeType, implementation=None, **kwargs):
"""Get a TreeWalker class for various types of tree with built-in support
treeType - the name of the tree type required (case-insensitive). Supported
values are "simpletree", "dom", and "etree"
"simpletree" - a built-in DOM-ish tree type with support for some
more pythonic idioms.
"dom" - The xml.dom.minidom DOM implementation
"pulldom" - The xml.dom.pulldom event stream
"etree" - A generic walker for tree implementations exposing an
elementtree-like interface (known to work with
ElementTree, cElementTree and lxml.etree).
"lxml" - Optimized walker for lxml.etree
"genshi" - a Genshi stream
implementation - (Currently applies to the "etree" tree type only). A module
implementing the tree type e.g. xml.etree.ElementTree or
cElementTree."""
treeType = treeType.lower()
if treeType not in treeWalkerCache:
if treeType in ("dom", "pulldom", "simpletree"):
name = "%s.%s" % (__name__, treeType)
__import__(name)
mod = sys.modules[name]
treeWalkerCache[treeType] = mod.TreeWalker
elif treeType == "genshi":
from . import genshistream
treeWalkerCache[treeType] = genshistream.TreeWalker
elif treeType == "lxml":
from . import lxmletree
treeWalkerCache[treeType] = lxmletree.TreeWalker
elif treeType == "etree":
from . import etree
# XXX: NEVER cache here, caching is done in the etree submodule
return etree.getETreeModule(implementation, **kwargs).TreeWalker
return treeWalkerCache.get(treeType)