Skip to content

Commit 576fa7e

Browse files
committed
Added whitespace stripper filter. The set of space-preserved elements is <pre>, <textarea> and RCDATA elements (i.e. <script> and <style>).
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40690
1 parent b08022c commit 576fa7e

1 file changed

Lines changed: 40 additions & 0 deletions

File tree

src/filters/whitespace.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
try:
2+
frozenset
3+
except NameError:
4+
# Import from the sets module for python 2.3
5+
from sets import ImmutableSet as frozenset
6+
7+
import re
8+
9+
import _base
10+
from constants import rcdataElements
11+
12+
from constants import spaceCharacters
13+
spaceCharacters = u"".join(spaceCharacters)
14+
15+
class Filter(_base.Filter):
16+
17+
spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
18+
19+
def __iter__(self):
20+
preserve = False
21+
for token in _base.Filter.__iter__(self):
22+
type = token["type"]
23+
if not preserve and type == "StartTag" \
24+
and token["name"] in self.spacePreserveElements:
25+
preserve = True
26+
27+
elif type == "EndTag":
28+
preserve = False
29+
30+
elif not preserve and type == "SpaceCharacters":
31+
continue
32+
33+
elif not preserve and type == "Characters":
34+
token["data"] = collapse_spaces(token["data"])
35+
36+
yield token
37+
38+
def collapse_spaces(text):
39+
return re.compile(u"[%s]+" % spaceCharacters).sub(' ', text)
40+

0 commit comments

Comments
 (0)