Skip to content

Commit 2251d94

Browse files
committed
Use itertoolz.sliding_window.
1 parent 4afc5f5 commit 2251d94

1 file changed

Lines changed: 13 additions & 26 deletions

File tree

python3/word-counter.py

Lines changed: 13 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,7 @@
33
from collections import Counter
44
import argparse
55
import re
6-
from itertools import islice
7-
import operator
8-
9-
url_re = re.compile(
10-
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
11-
)
12-
6+
from toolz.itertoolz import sliding_window
137

148
parser = argparse.ArgumentParser()
159
parser.add_argument('--numWords',type=int,default=10)
@@ -18,29 +12,22 @@
1812
parser.add_argument('file',type=str)
1913
args = parser.parse_args()
2014

21-
# Inspired by http://stackoverflow.com/questions/6822725
22-
def window(seq, n):
23-
it = iter(seq)
24-
result = tuple(islice(it, n))
25-
if len(result) == n:
26-
yield result
27-
for elem in it:
28-
result = result[1:] + (elem,)
29-
useThisWindow = True
30-
for i in result:
31-
if len(i) < args.minWordLength:
32-
useThisWindow = False
33-
break
34-
elif url_re.search(i):
35-
useThisWindow = False
36-
break
37-
if useThisWindow:
38-
yield result
15+
def filter_func(tup):
16+
for word in tup:
17+
if len(word) < args.minWordLength:
18+
return False
19+
elif re.search('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',word):
20+
return False
21+
else:
22+
return True
3923

24+
def filtered_window(seq,n):
25+
return filter(filter_func, sliding_window(n,seq))
26+
4027
with open(args.file,'r') as f:
4128
content = f.read().replace('\n',' ').lower()
4229
words = re.findall(r'\S+', content)
4330
for i in range(1,args.maxTuples+1):
4431
print("\n=== Sliding Window: {} ===".format(i))
45-
for tup in Counter(window(words,i)).most_common(args.numWords):
32+
for tup in Counter(filtered_window(words,i)).most_common(args.numWords):
4633
print(" {}: '{}'".format(tup[1]," ".join(tup[0])))

0 commit comments

Comments
 (0)