diff --git a/src/exp2python/python/SCL/Part21.py b/src/exp2python/python/SCL/Part21.py index 3bca32325..3490b55bf 100644 --- a/src/exp2python/python/SCL/Part21.py +++ b/src/exp2python/python/SCL/Part21.py @@ -35,33 +35,51 @@ # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import logging + import ply.lex as lex import ply.yacc as yacc +from ply.lex import LexError logger = logging.getLogger(__name__) + +# ensure Python 2.6 compatibility +if not hasattr(logging, 'NullHandler'): + class NullHandler(logging.Handler): + def handle(self, record): + pass + def emit(self, record): + pass + def createLock(self): + self.lock = None + + setattr(logging, 'NullHandler', NullHandler) + logger.addHandler(logging.NullHandler()) #################################################################################################### # Common Code for Lexer / Parser #################################################################################################### -class Base: - tokens = ['INTEGER', 'REAL', 'USER_DEFINED_KEYWORD', 'STANDARD_KEYWORD', 'STRING', 'BINARY', - 'ENTITY_INSTANCE_NAME', 'ENUMERATION', 'PART21_END', 'PART21_START', 'HEADER_SEC', - 'ENDSEC', 'DATA_SEC'] +base_tokens = ['INTEGER', 'REAL', 'USER_DEFINED_KEYWORD', 'STANDARD_KEYWORD', 'STRING', 'BINARY', + 'ENTITY_INSTANCE_NAME', 'ENUMERATION', 'PART21_END', 'PART21_START', 'HEADER_SEC', + 'ENDSEC', 'DATA'] #################################################################################################### # Lexer #################################################################################################### -class Lexer(Base): - states = (('compatibility', 'inclusive'),) - - def __init__(self, debug=0, optimize=0, compatibility_mode=False, header_limit=1024, extra_tokens=None): - if extra_tokens: self.tokens += extra_tokens - self.entity_mapping = {} +class Lexer(object): + tokens = list(base_tokens) + states = (('slurp', 'exclusive'),) + + def __init__(self, debug=0, optimize=0, compatibility_mode=False, header_limit=4096): + self.base_tokens = list(base_tokens) + self.schema_dict = {} + self.active_schema = {} + self.input_length = 0 self.compatibility_mode = compatibility_mode self.header_limit = header_limit self.lexer = lex.lex(module=self, debug=debug, debuglog=logger, optimize=optimize, errorlog=logger) + self.reset() def __getattr__(self, name): if name == 'lineno': @@ -72,83 +90,118 @@ def __getattr__(self, name): raise AttributeError def input(self, s): - startidx = s.find('ISO-10303-21;', 0, self.header_limit) - if startidx == -1: - raise ValueError('ISO-10303-21 header not found') - self.lexer.input(s[startidx:]) - self.lexer.lineno += s[0:startidx].count('\n') + self.lexer.input(s) + self.input_length += len(s) - if self.compatibility_mode: - self.lexer.begin('compatibility') - else: - self.lexer.begin('INITIAL') - + def reset(self): + self.lexer.lineno = 1 + self.lexer.begin('slurp') + def token(self): try: return next(self.lexer) except StopIteration: return None - def register_entities(self, entities): + def activate_schema(self, schema_name): + if schema_name in self.schema_dict: + self.active_schema = self.schema_dict[schema_name] + else: + raise ValueError('schema not registered') + + def register_schema(self, schema_name, entities): + if schema_name in self.schema_dict: + raise ValueError('schema already registered') + + for k in entities: + if k in self.base_tokens: raise ValueError('schema cannot override base_tokens') + if isinstance(entities, list): - entities = {k: k for k in entities} + entities = dict((k, k) for k in entities) - self.entity_mapping.update(entities) + self.schema_dict[schema_name] = entities + + def t_slurp_PART21_START(self, t): + r'ISO-10303-21;' + t.lexer.begin('INITIAL') + return t + + def t_slurp_error(self, t): + offset = t.value.find('\nISO-10303-21;', 0, self.header_limit) + if offset == -1 and self.header_limit < len(t.value): # not found within header_limit + raise LexError("Scanning error. try increasing lexer header_limit parameter", + "{0}...".format(t.value[0:20])) + elif offset == -1: # not found before EOF + t.lexer.lexpos = self.input_length + else: # found ISO-10303-21; + offset += 1 # also skip the \n + t.lexer.lineno += t.value[0:offset].count('\n') + t.lexer.skip(offset) # Comment (ignored) - def t_ANY_COMMENT(self, t): + def t_COMMENT(self, t): r'/\*(.|\n)*?\*/' t.lexer.lineno += t.value.count('\n') - def t_ANY_PART21_START(self, t): - r'ISO-10303-21;' - return t - - def t_ANY_PART21_END(self, t): + def t_PART21_END(self, t): r'END-ISO-10303-21;' + t.lexer.begin('slurp') return t - def t_ANY_HEADER_SEC(self, t): + def t_HEADER_SEC(self, t): r'HEADER;' return t - def t_ANY_ENDSEC(self, t): + def t_ENDSEC(self, t): r'ENDSEC;' return t # Keywords - def t_compatibility_STANDARD_KEYWORD(self, t): + def t_STANDARD_KEYWORD(self, t): r'(?:!|)[A-Za-z_][0-9A-Za-z_]*' - t.value = t.value.upper() - if t.value == 'DATA': - t.type = 'DATA_SEC' - elif t.value in self.entity_mapping: - t.type = self.entity_mapping[t.value] - elif t.value.startswith('!'): - t.type = 'USER_DEFINED_KEYWORD' - return t - - def t_ANY_STANDARD_KEYWORD(self, t): - r'(?:!|)[A-Z_][0-9A-Z_]*' - if t.value == 'DATA': - t.type = 'DATA_SEC' - elif t.value in self.entity_mapping: - t.type = self.entity_mapping[t.value] + if self.compatibility_mode: + t.value = t.value.upper() + elif not t.value.isupper(): + raise LexError('Scanning error. Mixed/lower case keyword detected, please use compatibility_mode=True', t.value) + + if t.value in self.base_tokens: + t.type = t.value + elif t.value in self.active_schema: + t.type = self.active_schema[t.value] elif t.value.startswith('!'): t.type = 'USER_DEFINED_KEYWORD' return t - def t_ANY_newline(self, t): + def t_newline(self, t): r'\n+' t.lexer.lineno += len(t.value) - + # Simple Data Types - t_ANY_REAL = r'[+-]*[0-9][0-9]*\.[0-9]*(?:E[+-]*[0-9][0-9]*)?' - t_ANY_INTEGER = r'[+-]*[0-9][0-9]*' - t_ANY_STRING = r"'(?:[][!\"*$%&.#+,\-()?/:;<=>@{}|^`~0-9a-zA-Z_\\ ]|'')*'" - t_ANY_BINARY = r'"[0-3][0-9A-F]*"' - t_ANY_ENTITY_INSTANCE_NAME = r'\#[0-9]+' - t_ANY_ENUMERATION = r'\.[A-Z_][A-Z0-9_]*\.' + def t_REAL(self, t): + r'[+-]*[0-9][0-9]*\.[0-9]*(?:E[+-]*[0-9][0-9]*)?' + t.value = float(t.value) + return t + + def t_INTEGER(self, t): + r'[+-]*[0-9][0-9]*' + t.value = int(t.value) + return t + + def t_STRING(self, t): + r"'(?:[][!\"*$%&.#+,\-()?/:;<=>@{}|^`~0-9a-zA-Z_\\ ]|'')*'" + t.value = t.value[1:-1] + return t + + def t_BINARY(self, t): + r'"[0-3][0-9A-F]*"' + try: + t.value = int(t.value[2:-1], base=16) + except ValueError: + t.value = None + return t + + t_ENTITY_INSTANCE_NAME = r'\#[0-9]+' + t_ENUMERATION = r'\.[A-Z_][A-Z0-9_]*\.' # Punctuation literals = '()=;,*$' @@ -174,7 +227,7 @@ def __init__(self, file_description, file_name, file_schema): class HeaderEntity: def __init__(self, type_name, *params): self.type_name = type_name - self.params = list(*params) if params else [] + self.params = list(params) if params else [] class Section: def __init__(self, entities): @@ -184,61 +237,64 @@ class SimpleEntity: def __init__(self, ref, type_name, *params): self.ref = ref self.type_name = type_name - self.params = list(*params) if params else [] + self.params = list(params) if params else [] class ComplexEntity: def __init__(self, ref, *params): self.ref = ref - self.params = list(*params) if params else [] + self.params = list(params) if params else [] class TypedParameter: def __init__(self, type_name, *params): self.type_name = type_name - self.params = list(*params) if params else None + self.params = list(params) if params else None #################################################################################################### # Parser #################################################################################################### -class Parser(Base): +class Parser(object): + tokens = list(base_tokens) start = 'exchange_file' - + def __init__(self, lexer=None, debug=0): - self.parser = yacc.yacc(module=self, debug=debug, debuglog=logger, errorlog=logger) + self.lexer = lexer if lexer else Lexer() - if lexer is None: - lexer = Lexer() - self.lexer = lexer + try: self.tokens = lexer.tokens + except AttributeError: pass + self.parser = yacc.yacc(module=self, debug=debug, debuglog=logger, errorlog=logger) + self.reset() + def parse(self, p21_data, **kwargs): + #TODO: will probably need to change this function if the lexer is ever to support t_eof + self.lexer.reset() self.lexer.input(p21_data) - self.refs = {} - self.in_p21_exchange_structure = False if 'debug' in kwargs: result = self.parser.parse(lexer=self.lexer, debug=logger, - **{ k: kwargs[k] for k in kwargs if k != 'debug'}) + ** dict((k, v) for k, v in kwargs.iteritems() if k != 'debug')) else: result = self.parser.parse(lexer=self.lexer, **kwargs) return result + def reset(self): + self.refs = {} + self.is_in_exchange_structure = False + def p_exchange_file(self, p): - """exchange_file : p21_start header_section data_section_list p21_end""" + """exchange_file : check_p21_start_token header_section data_section_list check_p21_end_token""" p[0] = P21File(p[2], p[3]) - def p_p21_start(self, p): - """p21_start : PART21_START""" - if self.in_p21_exchange_structure: - raise SyntaxError - self.in_p21_exchange_structure = True + def p_check_start_token(self, p): + """check_p21_start_token : PART21_START""" + self.is_in_exchange_structure = True p[0] = p[1] - - def p_p21_end(self, p): - """p21_end : PART21_END""" - if not self.in_p21_exchange_structure: - raise SyntaxError - self.in_p21_exchange_structure = False + + def p_check_end_token(self, p): + """check_p21_end_token : PART21_END""" + self.is_in_exchange_structure = False p[0] = p[1] - + # TODO: Specialise the first 3 header entities def p_header_section(self, p): """header_section : HEADER_SEC header_entity header_entity header_entity ENDSEC""" @@ -256,8 +312,8 @@ def p_header_entity(self, p): def p_check_entity_instance_name(self, p): """check_entity_instance_name : ENTITY_INSTANCE_NAME""" if p[1] in self.refs: - logger.error('Line %i, duplicate entity instance name: %s', p.lineno(1), p[1]) - raise ValueError('Duplicate entity instance name') + logger.error('Line: {0}, SyntaxError - Duplicate Entity Instance Name: {1}'.format(p.lineno(1), p[1])) + raise SyntaxError else: self.refs[p[1]] = None p[0] = p[1] @@ -266,6 +322,11 @@ def p_simple_entity_instance(self, p): """simple_entity_instance : check_entity_instance_name '=' simple_record ';'""" p[0] = SimpleEntity(p[1], *p[3]) + def p_entity_instance_error(self, p): + """simple_entity_instance : error '=' simple_record ';' + complex_entity_instance : error '=' subsuper_record ';'""" + pass + def p_complex_entity_instance(self, p): """complex_entity_instance : check_entity_instance_name '=' subsuper_record ';'""" p[0] = ComplexEntity(p[1], p[3]) @@ -323,12 +384,12 @@ def p_parameter_empty_list(self, p): p[0] = [] def p_data_start(self, p): - """data_start : DATA_SEC '(' parameter_list ')' ';'""" + """data_start : DATA '(' parameter_list ')' ';'""" pass def p_data_start_empty(self, p): - """data_start : DATA_SEC '(' ')' ';' - | DATA_SEC ';'""" + """data_start : DATA '(' ')' ';' + | DATA ';'""" pass def p_data_section(self, p): @@ -337,10 +398,13 @@ def p_data_section(self, p): def p_entity_instance_list(self, p): """entity_instance_list : entity_instance_list entity_instance - | empty""" + | entity_instance""" try: p[0] = p[1] + [p[2],] - except IndexError: pass # p[2] doesn't exist, p[1] is None - except TypeError: p[0] = [p[2],] # p[1] is None, p[2] is valid + except IndexError: p[0] = [p[1],] + + def p_entity_instance_list_empty(self, p): + """entity_instance_list : empty""" + p[0] = [] def p_entity_instance(self, p): """entity_instance : simple_entity_instance @@ -367,34 +431,60 @@ def p_empty(self, p): pass def test_debug(): + import os.path + logging.basicConfig() logger.setLevel(logging.DEBUG) - s = open('io1-tu-203.stp', 'r').read() parser = Parser() - - try: - r = parser.parse(s, debug=1) - except SystemExit: - pass - - return (parser, r) + parser.reset() + + logger.info("***** parser debug *****") + p = os.path.expanduser('~/projects/src/stepcode/data/ap214e3/s1-c5-214/s1-c5-214.stp') + with open(p, 'rU') as f: + s = f.read() + try: + parser.parse(s, debug=1) + except SystemExit: + pass + + logger.info("***** finished *****") def test(): + import os, os.path, itertools, codecs + logging.basicConfig() - logger.setLevel(logging.ERROR) + logger.setLevel(logging.INFO) - s = open('io1-tu-203.stp', 'r').read() parser = Parser() - - try: - r = parser.parse(s) - except SystemExit: - pass - - return (parser, r) - + compat_list = [] + + def parse_check(p): + logger.info("processing {0}".format(p)) + parser.reset() + with open(p, 'rU') as f: + iso_wrapper = codecs.EncodedFile(f, 'iso-8859-1') + s = iso_wrapper.read() + parser.parse(s) + + logger.info("***** standard test *****") + for d, _, files in os.walk(os.path.expanduser('~/projects/src/stepcode')): + for f in itertools.ifilter(lambda x: x.endswith('.stp'), files): + p = os.path.join(d, f) + try: + parse_check(p) + except LexError: + logger.exception('Lexer issue, adding {0} to compatibility test list'.format(os.path.basename(p))) + compat_list.append(p) + + lexer = Lexer(compatibility_mode=True) + parser = Parser(lexer=lexer) + + logger.info("***** compatibility test *****") + for p in compat_list: + parse_check(p) + + logger.info("***** finished *****") if __name__ == '__main__': test() -