Skip to content

Commit 73fc38b

Browse files
committed
1
加文件
1 parent 9862d92 commit 73fc38b

6 files changed

Lines changed: 256 additions & 0 deletions

File tree

listing20-1.txt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
2+
3+
4+
Welcome to World Wide Spam, Inc.
5+
6+
7+
These are the corporate web pages of *World Wide Spam*, Inc. We hope
8+
you find your stay enjoyable, and that you will sample many of our
9+
products.
10+
11+
A short history of the company
12+
13+
World Wide Spam was started in the summer of 2000. The business
14+
concept was to ride the dot-com wave and to make money both through
15+
bulk email and by selling canned meat online.
16+
17+
After receiving several complaints from customers who weren't
18+
satisfied by their bulk email, World Wide Spam altered their profile,
19+
and focused 100% on canned goods. Today, they rank as the world's
20+
13,892nd online supplier of SPAM.
21+
22+
Destinations
23+
24+
From this page you may visit several of our interesting web pages:
25+
26+
- What is SPAM? (http://wwspam.fu/whatisspam)
27+
28+
- How do they make it? (http://wwspam.fu/howtomakeit)
29+
30+
- Why should I eat it? (http://wwspam.fu/whyeatit)
31+
32+
How to get in touch with us
33+
34+
You can get in touch with us in *many* ways: By phone (555-1234), by
35+
email (wwspam@wwspam.fu) or by visiting our customer feedback page
36+
(http://wwspam.fu/feedback).

listing20-2.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
def lines(file):
2+
for line in file: yield line
3+
yield '\n'
4+
5+
def blocks(file):
6+
block = []
7+
for line in lines(file):
8+
if line.strip():
9+
block.append(line)
10+
elif block:
11+
yield ''.join(block).strip()
12+
block = []

listing20-3.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import sys, re
2+
from util import *
3+
4+
print '<html><head><title>...</title><body>'
5+
6+
title = True
7+
for block in blocks(sys.stdin):
8+
block = re.sub(r'\*(.+?)\*', r'<em>\1</em>', block)
9+
if title:
10+
print '<h1>'
11+
print block
12+
print '</h1>'
13+
title = False
14+
else:
15+
print '<p>'
16+
print block
17+
print '</p>'
18+
19+
print '</body></html>'

listing20-4.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
class Handler:
2+
"""
3+
An object that handles method calls from the Parser.
4+
5+
The Parser will call the start() and end() methods at the
6+
beginning of each block, with the proper block name as a
7+
parameter. The sub() method will be used in regular expression
8+
substitution. When called with a name such as 'emphasis', it will
9+
return a proper substitution function.
10+
"""
11+
def callback(self, prefix, name, *args):
12+
method = getattr(self, prefix+name, None)
13+
if callable(method): return method(*args)
14+
def start(self, name):
15+
self.callback('start_', name)
16+
def end(self, name):
17+
self.callback('end_', name)
18+
def sub(self, name):
19+
def substitution(match):
20+
result = self.callback('sub_', name, match)
21+
if result is None: match.group(0)
22+
return result
23+
return substitution
24+
25+
class HTMLRenderer(Handler):
26+
"""
27+
A specific handler used for rendering HTML.
28+
29+
The methods in HTMLRenderer are accessed from the superclass
30+
Handler's start(), end(), and sub() methods. They implement basic
31+
markup as used in HTML documents.
32+
"""
33+
def start_document(self):
34+
print '<html><head><title>...</title></head><body>'
35+
def end_document(self):
36+
print '</body></html>'
37+
def start_paragraph(self):
38+
print '<p>'
39+
def end_paragraph(self):
40+
print '</p>'
41+
def start_heading(self):
42+
print '<h2>'
43+
def end_heading(self):
44+
print '</h2>'
45+
def start_list(self):
46+
print '<ul>'
47+
def end_list(self):
48+
print '</ul>'
49+
def start_listitem(self):
50+
print '<li>'
51+
def end_listitem(self):
52+
print '</li>'
53+
def start_title(self):
54+
print '<h1>'
55+
def end_title(self):
56+
print '</h1>'
57+
def sub_emphasis(self, match):
58+
return '<em>%s</em>' % match.group(1)
59+
def sub_url(self, match):
60+
return '<a href="%s">%s</a>' % (match.group(1), match.group(1))
61+
def sub_mail(self, match):
62+
return '<a href="mailto:%s">%s</a>' % (match.group(1), match.group(1))
63+
def feed(self, data):
64+
print data

listing20-5.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
class Rule:
2+
"""
3+
Base class for all rules.
4+
"""
5+
def action(self, block, handler):
6+
handler.start(self.type)
7+
handler.feed(block)
8+
handler.end(self.type)
9+
return True
10+
11+
class HeadingRule(Rule):
12+
"""
13+
A heading is a single line that is at most 70 characters and
14+
that doesn't end with a colon.
15+
"""
16+
type = 'heading'
17+
def condition(self, block):
18+
return not '\n' in block and len(block) <= 70 and not block[-1] == ':'
19+
20+
class TitleRule(HeadingRule):
21+
"""
22+
The title is the first block in the document, provided that it is
23+
a heading.
24+
"""
25+
type = 'title'
26+
first = True
27+
28+
def condition(self, block):
29+
if not self.first: return False
30+
self.first = False
31+
return HeadingRule.condition(self, block)
32+
33+
class ListItemRule(Rule):
34+
"""
35+
A list item is a paragraph that begins with a hyphen. As part of
36+
the formatting, the hyphen is removed.
37+
"""
38+
type = 'listitem'
39+
def condition(self, block):
40+
return block[0] == '-'
41+
def action(self, block, handler):
42+
handler.start(self.type)
43+
handler.feed(block[1:].strip())
44+
handler.end(self.type)
45+
return True
46+
47+
class ListRule(ListItemRule):
48+
"""
49+
A list begins between a block that is not a list item and a
50+
subsequent list item. It ends after the last consecutive list
51+
item.
52+
"""
53+
type = 'list'
54+
inside = False
55+
def condition(self, block):
56+
return True
57+
def action(self, block, handler):
58+
if not self.inside and ListItemRule.condition(self, block):
59+
handler.start(self.type)
60+
self.inside = True
61+
elif self.inside and not ListItemRule.condition(self, block):
62+
handler.end(self.type)
63+
self.inside = False
64+
return False
65+
66+
class ParagraphRule(Rule):
67+
"""
68+
A paragraph is simply a block that isn't covered by any of the
69+
other rules.
70+
"""
71+
type = 'paragraph'
72+
def condition(self, block):
73+
return True

listing20-6.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import sys, re
2+
from handlers import *
3+
from util import *
4+
from rules import *
5+
6+
class Parser:
7+
"""
8+
A Parser reads a text file, applying rules and controlling a
9+
handler.
10+
"""
11+
def __init__(self, handler):
12+
self.handler = handler
13+
self.rules = []
14+
self.filters = []
15+
def addRule(self, rule):
16+
self.rules.append(rule)
17+
def addFilter(self, pattern, name):
18+
def filter(block, handler):
19+
return re.sub(pattern, handler.sub(name), block)
20+
self.filters.append(filter)
21+
def parse(self, file):
22+
self.handler.start('document')
23+
for block in blocks(file):
24+
for filter in self.filters:
25+
block = filter(block, self.handler)
26+
for rule in self.rules:
27+
if rule.condition(block):
28+
last = rule.action(block, self.handler)
29+
if last: break
30+
self.handler.end('document')
31+
32+
class BasicTextParser(Parser):
33+
"""
34+
A specific Parser that adds rules and filters in its
35+
constructor.
36+
"""
37+
def __init__(self, handler):
38+
Parser.__init__(self, handler)
39+
self.addRule(ListRule())
40+
self.addRule(ListItemRule())
41+
self.addRule(TitleRule())
42+
self.addRule(HeadingRule())
43+
self.addRule(ParagraphRule())
44+
45+
self.addFilter(r'\*(.+?)\*', 'emphasis')
46+
self.addFilter(r'(http://[\.a-zA-Z/]+)', 'url')
47+
self.addFilter(r'([\.a-zA-Z]+@[\.a-zA-Z]+[a-zA-Z]+)', 'mail')
48+
49+
handler = HTMLRenderer()
50+
parser = BasicTextParser(handler)
51+
52+
parser.parse(sys.stdin)

0 commit comments

Comments
 (0)