Skip to content

Commit 47379fe

Browse files
author
jackjansen
committed
Added an option to the scanner to generated marked-up HTML from the input
file. This should make it a lot easier (I hope) to get the regular expressions right. git-svn-id: http://svn.python.org/projects/python/trunk@39171 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent fea9ecc commit 47379fe

1 file changed

Lines changed: 161 additions & 6 deletions

File tree

Tools/bgen/bgen/scantools.py

Lines changed: 161 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,76 @@
3232

3333
Error = "scantools.Error"
3434

35+
BEGINHTMLREPORT="""<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
36+
<html>
37+
<head>
38+
<style type="text/css">
39+
.unmatched { }
40+
.commentstripping { color: grey; text-decoration: line-through }
41+
.comment { text-decoration: line-through }
42+
.notcomment { color: black }
43+
.incomplete { color: maroon }
44+
.constant { color: green }
45+
.pyconstant { background-color: yellow }
46+
.blconstant { background-color: yellow; color: red }
47+
.declaration { color: blue }
48+
.pydeclaration { background-color: yellow }
49+
.type { font-style: italic }
50+
.name { font-weight: bold }
51+
.value { font-style: italic }
52+
.arglist { text-decoration: underline }
53+
.blacklisted { background-color: yellow; color: red }
54+
</style>
55+
<title>Bgen scan report</title>
56+
</head>
57+
<body>
58+
<h1>Bgen scan report</h1>
59+
<h2>Legend</h2>
60+
<p>This scan report is intended to help you debug the regular expressions
61+
used by the bgen scanner. It consists of the original ".h" header file(s)
62+
marked up to show you what the regular expressions in the bgen parser matched
63+
for each line. NOTE: comments in the original source files may or may not be
64+
shown.</p>
65+
<p>The typographic conventions of this file are as follows:</p>
66+
<dl>
67+
<dt>comment stripping</dt>
68+
<dd><pre><span class="commentstripping"><span class="notcomment">comment stripping is </span><span class="comment">/* marked up */</span><span class="notcomment"> and the line is repeated if needed</span></span></pre>
69+
<p>If anything here does not appear to happen correctly look at
70+
<tt>comment1_pat</tt> and <tt>comment2_pat</tt>.</p>
71+
</dd>
72+
<dt>constant definitions</dt>
73+
<dd><pre><span class="constant">#define <span class="name">name</span> <span class="value">value</span></pre>
74+
<p>Highlights name and value of the constant. Governed by <tt>sym_pat</tt>.</p>
75+
</dd>
76+
<dt>function declaration</dt>
77+
<dd><pre><span class="declaration"><span class="type">char *</span><span class="name">rindex</span><span class="arglist">(<span class="type">const char *</span><span class="name">s</span>, <span class="type">int </span><span class="name">c</span>)</span>;</span></pre>
78+
<p>Highlights type, name and argument list. <tt>type_pat</tt>,
79+
<tt>name_pat</tt> and <tt>args_pat</tt> are combined into <tt>whole_pat</tt>, which
80+
is what is used here.</p></dd>
81+
</dd>
82+
<dt>incomplete match for function declaration</dt>
83+
<dd><pre><span class="incomplete"><span class="type">char *</span>foo;</span></pre>
84+
<p>The beginning of this looked promising, but it did not match a function declaration.
85+
In other words, it matched <tt>head_pat</tt> but not <tt>whole_pat</tt>. If the next
86+
declaration has also been gobbled up you need to look at <tt>end_pat</tt>.</p>
87+
</dd>
88+
<dt>unrecognized input</dt>
89+
<dd><pre><span class="unmatched">#include "type.h"</span></pre>
90+
<p>If there are function declarations the scanner has missed (i.e. things
91+
are in this class but you want them to be declarations) you need to adapt
92+
<tt>head_pat</tt>.
93+
</dd>
94+
</dl>
95+
<h2>Output</h2>
96+
<pre>
97+
<span class="unmatched">
98+
"""
99+
ENDHTMLREPORT="""</span>
100+
</pre>
101+
</body>
102+
</html>
103+
"""
104+
35105
class Scanner:
36106

37107
# Set to 1 in subclass to debug your scanner patterns.
@@ -232,9 +302,11 @@ def initfiles(self):
232302
self.specmine = 0
233303
self.defsmine = 0
234304
self.scanmine = 0
305+
self.htmlmine = 0
235306
self.specfile = sys.stdout
236307
self.defsfile = None
237308
self.scanfile = sys.stdin
309+
self.htmlfile = None
238310
self.lineno = 0
239311
self.line = ""
240312

@@ -286,6 +358,7 @@ def closefiles(self):
286358
self.closespec()
287359
self.closedefs()
288360
self.closescan()
361+
self.closehtml()
289362

290363
def closespec(self):
291364
tmp = self.specmine and self.specfile
@@ -301,6 +374,12 @@ def closescan(self):
301374
tmp = self.scanmine and self.scanfile
302375
self.scanfile = None
303376
if tmp: tmp.close()
377+
378+
def closehtml(self):
379+
if self.htmlfile: self.htmlfile.write(ENDHTMLREPORT)
380+
tmp = self.htmlmine and self.htmlfile
381+
self.htmlfile = None
382+
if tmp: tmp.close()
304383

305384
def setoutput(self, spec, defs = None):
306385
self.closespec()
@@ -324,6 +403,19 @@ def setoutput(self, spec, defs = None):
324403
self.defsfile = file
325404
self.defsmine = mine
326405

406+
def sethtmloutput(self, htmlfile):
407+
self.closehtml()
408+
if htmlfile:
409+
if type(htmlfile) == StringType:
410+
file = self.openoutput(htmlfile)
411+
mine = 1
412+
else:
413+
file = htmlfile
414+
mine = 0
415+
self.htmlfile = file
416+
self.htmlmine = mine
417+
self.htmlfile.write(BEGINHTMLREPORT)
418+
327419
def openoutput(self, filename):
328420
try:
329421
file = open(filename, 'w')
@@ -408,11 +500,17 @@ def scan(self):
408500
self.report("LINE: %r" % (line,))
409501
match = self.comment1.match(line)
410502
if match:
503+
self.htmlreport(line, klass='commentstripping', ranges=[(
504+
match.start('rest'), match.end('rest'), 'notcomment')])
411505
line = match.group('rest')
412506
if self.debug:
413507
self.report("\tafter comment1: %r" % (line,))
414508
match = self.comment2.match(line)
415509
while match:
510+
if match:
511+
self.htmlreport(line, klass='commentstripping', ranges=[
512+
(match.start('rest1'), match.end('rest1'), 'notcomment'),
513+
(match.start('rest2'), match.end('rest2'), 'notcomment')])
416514
line = match.group('rest1')+match.group('rest2')
417515
if self.debug:
418516
self.report("\tafter comment2: %r" % (line,))
@@ -422,27 +520,34 @@ def scan(self):
422520
if match:
423521
if self.debug:
424522
self.report("\tmatches sym.")
425-
self.dosymdef(match)
523+
self.dosymdef(match, line)
426524
continue
427525
match = self.head.match(line)
428526
if match:
429527
if self.debug:
430528
self.report("\tmatches head.")
431529
self.dofuncspec()
432530
continue
531+
self.htmlreport(line, klass='unmatched')
433532
except EOFError:
434533
self.error("Uncaught EOF error")
435534
self.reportusedtypes()
436535

437-
def dosymdef(self, match):
536+
def dosymdef(self, match, line):
438537
name, defn = match.group('name', 'defn')
538+
self.htmlreport(line, klass='constant', ranges=[
539+
(match.start('name'), match.end('name'), 'name'),
540+
(match.start('defn'), match.end('defn'), 'value')])
439541
defn = escape8bit(defn)
440542
if self.debug:
441543
self.report("\tsym: name=%r, defn=%r" % (name, defn))
442544
if not name in self.blacklistnames:
443-
self.defsfile.write("%s = %s\n" % (name, defn))
545+
oline = "%s = %s\n" % (name, defn)
546+
self.defsfile.write(oline)
547+
self.htmlreport(oline, klass="pyconstant")
444548
else:
445549
self.defsfile.write("# %s = %s\n" % (name, defn))
550+
self.htmlreport("** no output: name is blacklisted", klass="blconstant")
446551
# XXXX No way to handle greylisted names
447552

448553
def dofuncspec(self):
@@ -473,19 +578,30 @@ def processrawspec(self, raw):
473578
if not match:
474579
self.report("Bad raw spec: %r", raw)
475580
if self.debug:
476-
if not self.type.search(raw):
581+
match = self.type.search(raw)
582+
if not match:
477583
self.report("(Type already doesn't match)")
584+
self.htmlreport(raw, klass='incomplete', ranges=[(
585+
match.start('type'), match.end('type'), 'type')])
478586
else:
479587
self.report("(but type matched)")
588+
self.htmlreport(raw, klass='incomplete')
480589
return
481590
type, name, args = match.group('type', 'name', 'args')
591+
ranges=[
592+
(match.start('type'), match.end('type'), 'type'),
593+
(match.start('name'), match.end('name'), 'name'),
594+
(match.start('args'), match.end('args'), 'arglist')]
595+
self.htmlreport(raw, klass='declaration', ranges=ranges)
482596
modifiers = self.getmodifiers(match)
483597
type = self.pythonizename(type)
484598
name = self.pythonizename(name)
485599
if self.checkduplicate(name):
600+
self.htmlreport("*** no output generated: duplicate name", klass="blacklisted")
486601
return
487602
self.report("==> %s %s <==", type, name)
488603
if self.blacklisted(type, name):
604+
self.htmlreport("*** no output generated: function name or return type blacklisted", klass="blacklisted")
489605
self.report("*** %s %s blacklisted", type, name)
490606
return
491607
returnlist = [(type, name, 'ReturnMode')]
@@ -494,6 +610,7 @@ def processrawspec(self, raw):
494610
arglist = self.extractarglist(args)
495611
arglist = self.repairarglist(name, arglist)
496612
if self.unmanageable(type, name, arglist):
613+
self.htmlreport("*** no output generated: some argument blacklisted", klass="blacklisted")
497614
##for arg in arglist:
498615
## self.report(" %r", arg)
499616
self.report("*** %s %s unmanageable", type, name)
@@ -611,8 +728,12 @@ def generate(self, tp, name, arglist, modifiers=[]):
611728
classname, listname = self.destination(tp, name, arglist, modifiers)
612729
else:
613730
classname, listname = self.destination(tp, name, arglist)
614-
if not classname or not listname: return
615-
if not self.specfile: return
731+
if not classname or not listname:
732+
self.htmlreport("*** no output generated: self.destination() returned None", klass="blacklisted")
733+
return
734+
if not self.specfile:
735+
self.htmlreport("*** no output generated: no output file specified", klass="blacklisted")
736+
return
616737
self.specfile.write("f = %s(%s, %r,\n" % (classname, tp, name))
617738
for atype, aname, amode in arglist:
618739
self.typeused(atype, amode)
@@ -623,6 +744,12 @@ def generate(self, tp, name, arglist, modifiers=[]):
623744
self.generatemodifiers(classname, name, modifiers)
624745
self.specfile.write(")\n")
625746
self.specfile.write("%s.append(f)\n\n" % listname)
747+
if self.htmlfile:
748+
oline = "Adding to %s:\n%s(returntype=%s, name=%r" % (listname, classname, tp, name)
749+
for atype, aname, amode in arglist:
750+
oline += ",\n (%s, %r, %s)" % (atype, aname, amode)
751+
oline += ")\n"
752+
self.htmlreport(oline, klass="pydeclaration")
626753

627754
def destination(self, type, name, arglist):
628755
return "FunctionGenerator", "functions"
@@ -646,6 +773,34 @@ def unmanageable(self, type, name, arglist):
646773
return 1
647774
return 0
648775

776+
def htmlreport(self, line, klass=None, ranges=None):
777+
if not self.htmlfile: return
778+
if ranges is None:
779+
ranges = []
780+
if klass:
781+
ranges.insert(0, (0, len(line), klass))
782+
oline = ''
783+
i = 0
784+
for c in line:
785+
for b, e, name in ranges:
786+
if b == i:
787+
oline += '<span class="%s">' % name
788+
if e == i:
789+
oline += '</span>'
790+
i += 1
791+
792+
if c == '<': oline += '&lt;'
793+
elif c == '>': oline += '&gt;'
794+
else: oline += c
795+
for b, e, name in ranges:
796+
if b >= i:
797+
oline += '<span class="%s">' % name
798+
if e >= i:
799+
oline += '</span>'
800+
if not line or line[-1] != '\n':
801+
oline += '\n'
802+
self.htmlfile.write(oline)
803+
649804
class Scanner_PreUH3(Scanner):
650805
"""Scanner for Universal Headers before release 3"""
651806
def initpatterns(self):

0 commit comments

Comments
 (0)