# ***** BEGIN LICENSE BLOCK ***** # Version: MPL 1.1/GPL 2.0/LGPL 2.1 # # The contents of this file are subject to the Mozilla Public License # Version 1.1 (the "License"); you may not use this file except in # compliance with the License. You may obtain a copy of the License at # http://www.mozilla.org/MPL/ # # Software distributed under the License is distributed on an "AS IS" # basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the # License for the specific language governing rights and limitations # under the License. # # The Original Code is Komodo code. # # The Initial Developer of the Original Code is ActiveState Software Inc. # Portions created by ActiveState Software Inc are Copyright (C) 2000-2007 # ActiveState Software Inc. All Rights Reserved. # # Contributor(s): # ActiveState Software Inc # # Alternatively, the contents of this file may be used under the terms of # either the GNU General Public License Version 2 or later (the "GPL"), or # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), # in which case the provisions of the GPL or the LGPL are applicable instead # of those above. If you wish to allow use of your version of this file only # under the terms of either the GPL or the LGPL, and not to allow others to # use your version of this file under the terms of the MPL, indicate your # decision by deleting the provisions above and replace them with the notice # and other provisions required by the GPL or the LGPL. If you do not delete # the provisions above, a recipient may use your version of this file under # the terms of any one of the MPL, the GPL or the LGPL. # # ***** END LICENSE BLOCK ***** import os from xml.dom import pulldom import logging import re log = logging.getLogger("koRNG") log.setLevel(logging.DEBUG) from elementtree import XMLTreeBuilder try: import cElementTree as ElementTree # effbot's C module except ImportError: log.error( "using element tree and not cElementTree, performace will suffer") import elementtree.ElementTree as ElementTree # effbot's pure Python module class NamespaceParser(XMLTreeBuilder.FancyTreeBuilder): _qname = re.compile("{(.*?)}(.*)") def start(self, element): element.namespaces = self.namespaces[:] qn = self._qname.match(element.tag) element.ns = qn.group(1) element.tagName = qn.group(2) class rng_base_dataset: def __init__(self): self.name = None self.elements = [] # root level elements self.attributes = [] self.values = [] self.refs = [] def resolveRefs(self, dataset): for ref in self.refs[:]: if ref not in list(dataset.defs.keys()): if ref not in dataset.ref_unresolved: dataset.ref_unresolved[ref] = [] dataset.ref_unresolved[ref].append(self) continue d = dataset.defs[ref] del self.refs[self.refs.index(ref)] if d.refs: d.resolveRefs(dataset) # grab what we care about from this definition self.attributes += [a for a in d.attributes if a.name] self.elements += [e for e in d.elements if e.name] self.values += d.values class rng_dataset(rng_base_dataset): def __init__(self): rng_base_dataset.__init__(self) self.name = "root" self.all_elements = {} self.elements_caseless = {} self.defs = {} self.namespace = "" self.datatypeLibrary = "" self.xmlns = "" self.ref_resolving = {} self.ref_unresolved = {} def resolveRefs(self, dataset=None): if not dataset: dataset = self rng_base_dataset.resolveRefs(self, dataset) for d in list(self.defs.values()): d.resolveRefs(dataset) for e in list(self.all_elements.values()): e.resolveRefs(dataset) for a in self.attributes[:]: a.resolveRefs(dataset) self.resolveUnresolvedRefs() def resolveCircularRefs(self): for ref in list(self.ref_circular.keys())[:]: # print "resolving earlier circular reference %s"%ref el = self.ref_circular[ref] del self.ref_circular[ref] for e in el: e.resolveRefs(self) def resolveUnresolvedRefs(self): for ref in list(self.ref_unresolved.keys())[:]: print("resolving earlier unresolved reference %s" % ref) el = self.ref_unresolved[ref] del self.ref_unresolved[ref] for e in el: e.resolveRefs(self) def element_info(self, element_name): name = element_name.lower() if name in self.elements_caseless: return self.elements_caseless[name] return None def possible_children(self, element_name=None): if not element_name: return [el.name for el in self.elements] else: name = element_name.lower() if name not in self.elements_caseless: return [] return [el.name for el in self.elements_caseless[name].elements] def possible_attributes(self, element_name): name = element_name.lower() if name in self.elements_caseless: return [a.name for a in self.elements_caseless[name].attributes] return [] def possible_attribute_values(self, element_name, attribute_name): el = self.element_info(element_name) if el: for a in el.attributes: if attribute_name == a.name: return a.values return [] def all_element_types(self): return list(self.all_elements.keys()) def dump(self, stream): print("RNG NS: %s" % self.xmlns) print("Namespace: %s" % self.namespace) print("datatypeLibrary: %s" % self.datatypeLibrary) print("-"*60) for e in self.elements: e.dump(stream) print("-"*60) for e in list(self.all_elements.values()): e.dump(stream) print("-"*60) class rng_node_info(rng_base_dataset): def __init__(self, node): rng_base_dataset.__init__(self) self.name = node.attrib.get("name") self._node = node class element_info(rng_node_info): def dump(self, stream): attrs = [] for n, v in list(self._node.attrib.items()): attrs.append('%s="%s"' % (n, v)) stream.write("\n" % ' '.join(attrs)) names = [el.name for el in self.elements] stream.write(" children %r\n" % names) for attr in self.attributes: attr.dump(stream) stream.write(" refs remaining: %r\n" % self.refs) class attribute_info(rng_node_info): def dump(self, stream): stream.write(" attr %s %r\n" % (self.name, self.values)) class definition(rng_node_info): def dump(self, stream): stream.write("definition %s has %d refs\n" % ( self.name, len(self.refs))) names = [el.name for el in self.elements] stream.write(" has %d elements %r\n" % ( len(self.elements), names)) names = [el.name for el in self.attributes] stream.write(" has %d attributes %r\n" % ( len(self.attributes), names)) stream.write(" has %d values %r\n" % ( len(self.values), self.values)) def resolveRefs(self, dataset): for e in self.elements[:]: e.resolveRefs(dataset) for a in self.attributes[:]: a.resolveRefs(dataset) rng_node_info.resolveRefs(self, dataset) class rng: def __init__(self, filename, dataset=None): if dataset is None: dataset = rng_dataset() self.dataset = dataset self._element_stack = [self.dataset] self._includes = [] self.filename = filename self.parse() def parse(self): self.tree = ElementTree.parse(self.filename, NamespaceParser()) self.root = self.tree.getroot() if self.root.tagName != "grammar": raise "Invalid RNG file [%s] root tag [%s]" % ( self.filename, self.root.tagName) self.parent_map = dict(( c, p) for p in self.tree.getiterator() for c in p) self.parseNode(self.root) self.dataset.resolveRefs() def parseNode(self, node): methodName = "handle_%s" % node.tagName # print methodName if hasattr(self, methodName): fn = getattr(self, methodName) fn(node) for child in list(node): # print "parsing child %s"%child.tagName self.parseNode(child) methodName = "handle_%s_end" % node.tagName # print methodName if hasattr(self, methodName): fn = getattr(self, methodName) fn(node) def handle_include(self, node): # XXX handle relative dirs path = node.attrib.get("href") if not os.path.exists(path): path = os.path.join(os.path.dirname(self.filename), path) # print "file included [%s]"%path rng(path, self.dataset) def handle_grammar(self, node): if not self.dataset.namespace: self.dataset.xmlns = node.attrib.get('xmlns') self.dataset.namespace = node.attrib.get('ns') self.dataset.datatypeLibrary = node.attrib.get('datatypeLibrary') # def handle_start(self, node): # self._element_stack.append(self) # def handle_start_end(self, node): # self._element_stack.pop() def handle_attribute(self, node): self._element_stack.append(attribute_info(node)) def handle_attribute_end(self, node): # attributes get added to the last item in the element stack attr = self._element_stack.pop() el = self._element_stack[-1] el.attributes.append(attr) def handle_name_end(self, node): # is the parent node an attribute? parent = self.parent_map[node] if node.text and parent.tagName == "attribute": # print "name value...%r"%node.text e = self._element_stack[-1] e.name = node.text self.dataset.all_elements[node.text] = e self.dataset.elements_caseless[node.text.lower()] = e def handle_element(self, node): # print "handle_element %s" %node.attrib.get("name") e = element_info(node) if e.name: self.dataset.all_elements[e.name] = e self.dataset.elements_caseless[e.name.lower()] = e self._element_stack.append(e) def handle_element_end(self, node): # print "handle_element_end %s" %node.attrib.get("name") el = self._element_stack.pop() self._element_stack[-1].elements.append(el) def handle_define(self, node): d = definition(node) # print "definition: %s" % d.name self.dataset.defs[d.name] = d self._element_stack.append(d) def handle_define_end(self, node): d = self._element_stack.pop() def handle_ref(self, node): self._element_stack[-1].refs.append(node.attrib.get("name")) def handle_value(self, node): self._element_stack[-1].values.append(node.text) # def handle_zeroOrMore(self, node): # pass # def handle_choice(self, node): # pass # def handle_interleave(self, node): # pass # def handle_mixed(self, node): # pass # def handle_empty(self, node): # pass # def handle_notAllowed(self, node): # pass # def handle_group(self, node): # pass # def handle_optional(self, node): # pass # def handle_text(self, node): # pass # def handle_div(self, node): # pass # def handle_list(self, node): # pass # def handle_data(self, node): # pass # def handle_except(self, node): # pass # def handle_oneOrMore(self, node): # pass # def handle_param(self, node): # pass if __name__ == "__main__": import sys if len(sys.argv) > 1: filename = sys.argv[1] machine = rng(filename) else: import os import sys # we're in src/python-sitelib, we need the contrib dir basedir = os.path.dirname(os.path.dirname(os.getcwd())) filename = os.path.join( basedir, "contrib", "catalogs", "rng", "xslt.rng") machine = rng(filename) # assert "template" in machine.possible_children("stylesheet") # assert "text" in machine.all_element_types() # assert machine.possible_children("text")==[] # assert machine.possible_children("garbage")==[] # assert "version" in machine.possible_attributes("transform") # assert machine.possible_attributes("garbage")==[] # assert "upper-first" in machine.possible_attribute_values("sort", "case-order") # assert machine.possible_attribute_values("garbage", "garbage") == [] # assert machine.possible_attribute_values("garbate", "case-order") == [] ## filename = "..\\languages\\xhtml\\xhtml-state-machine.xml" ## machine = state_machine_info(filename) ## for element in machine.all_element_types(): ## if element!="#LITERAL": # assert "lang" in machine.possible_attributes(element), "no # lang on %s" % element machine.dataset.dump(sys.stdout) # machine.dataset.element_info("tr").dump(sys.stdout)