Skip to content

Commit 8062954

Browse files
committed
add window(), a length-n sliding window iterator for general iterables
1 parent f2bf744 commit 8062954

File tree

3 files changed

+103
-4
lines changed

3 files changed

+103
-4
lines changed

README.md

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,14 +1012,15 @@ because ``(g, x, y)`` is just a tuple of ``g``, ``x`` and ``y``. This is by desi
10121012
- Only applicable to monotonic divergent inputs (such as ``primes``). Increasing/decreasing is auto-detected from the first non-zero diff, but the function may fail to terminate if the input is actually not monotonic, or has an upper/lower bound.
10131013
- `iindex`: like ``list.index``, but for a general iterable. Consumes the iterable, so only makes sense for memoized inputs. *Added in v0.13.1.*
10141014
- `prod`: like the builtin `sum`, but compute the product. Oddly missing from the standard library. *Added in v0.13.1.*
1015+
- `window`: sliding length-n window iterator for general iterables. Acts like the well-known [n-gram zip trick](http://www.locallyoptimal.com/blog/2013/01/20/elegant-n-gram-generation-in-python/), but the input can be any iterable. *Added in v0.14.1.*
10151016
10161017
Examples:
10171018
10181019
```python
10191020
from functools import partial
10201021
from unpythonic import scanl, scanr, foldl, foldr, flatmap, mapr, zipr, \
10211022
uniqify, uniq, flatten1, flatten, flatten_in, take, drop, \
1022-
unfold, unfold1, cons, nil, ll, curry, s, inn, iindex
1023+
unfold, unfold1, cons, nil, ll, curry, s, inn, iindex, window
10231024
10241025
assert tuple(scanl(add, 0, range(1, 5))) == (0, 1, 3, 6, 10)
10251026
assert tuple(scanr(add, 0, range(1, 5))) == (0, 4, 7, 9, 10)
@@ -1064,6 +1065,13 @@ assert not inn(1337, primes())
10641065
assert iindex(2, (1, 2, 3)) == 1
10651066
assert iindex(31337, primes()) == 3378
10661067
1068+
# window: length-n sliding window iterator for general iterables
1069+
lst = (x for x in range(5))
1070+
out = []
1071+
for a, b, c in window(lst, n=3):
1072+
out.append((a, b, c))
1073+
assert out == [(0, 1, 2), (1, 2, 3), (2, 3, 4)]
1074+
10671075
# flatmap
10681076
def msqrt(x): # multivalued sqrt
10691077
if x == 0.:
@@ -2613,7 +2621,25 @@ assert inp == deque([])
26132621
assert out == [0, 10, 1, 11, 2, 12]
26142622
```
26152623
2616-
A real use case for this is to split sequences of items, stored as lists in a deque, into shorter sequences where some condition is contiguously ``True`` or ``False``. When the condition changes state, just commit the current subsequence, and push the rest of that input sequence (still requiring analysis) back to the input deque, to be dealt with later.
2624+
``Popper`` comboes with other iterable utilities, such as ``window``:
2625+
2626+
```python
2627+
from collections import deque
2628+
from unpythonic import Popper, window
2629+
2630+
inp = deque(range(3))
2631+
out = []
2632+
for a, b in window(Popper(inp)):
2633+
out.append((a, b))
2634+
if a < 10:
2635+
inp.append(a + 10)
2636+
assert inp == deque([])
2637+
assert out == [(0, 1), (1, 2), (2, 10), (10, 11), (11, 12)]
2638+
```
2639+
2640+
(Although ``window`` invokes ``iter()`` on the ``Popper``, this works because the ``Popper`` never invokes ``iter()`` on the underlying container. Any mutations to the input container performed by the loop body will be understood by ``Popper`` and thus also seen by the ``window``. The first ``n`` elements, though, are read before the loop body gets control, because the window needs them to initialize itself.)
2641+
2642+
One possible real use case for ``Popper`` is to split sequences of items, stored as lists in a deque, into shorter sequences where some condition is contiguously ``True`` or ``False``. When the condition changes state, just commit the current subsequence, and push the rest of that input sequence (still requiring analysis) back to the input deque, to be dealt with later.
26172643
26182644
The argument to ``Popper`` (here ``lst``) contains the **remaining** items. Each iteration pops an element **from the left**. The loop terminates when ``lst`` is empty.
26192645

unpythonic/it.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
"flatten", "flatten1", "flatten_in",
2424
"iterate", "iterate1",
2525
"partition",
26-
"inn", "iindex"]
26+
"inn", "iindex",
27+
"window"]
2728

2829
from operator import itemgetter
2930
from itertools import tee, islice, zip_longest, starmap, chain, filterfalse, groupby, takewhile
@@ -613,3 +614,34 @@ def iindex(x, iterable):
613614
if elt == x:
614615
return j
615616
raise ValueError("{} is not in iterable".format(x))
617+
618+
def window(iterable, n=2):
619+
"""Sliding length-n window iterator for a general iterable.
620+
621+
Acts like ``zip(s, s[1:], ..., s[n-1:])`` for a sequence ``s``, but the input
622+
can be any iterable.
623+
624+
If there are fewer than ``n`` items in the input iterable, an empty iterator
625+
is returned.
626+
627+
Inspired by ``with_next`` discussed in:
628+
629+
https://opensource.com/article/18/3/loop-better-deeper-look-iteration-python
630+
"""
631+
if n < 2:
632+
raise ValueError("expected n >= 2, got {}".format(n))
633+
it = iter(iterable)
634+
xs = deque()
635+
for _ in range(n):
636+
try:
637+
xs.append(next(it))
638+
except StopIteration:
639+
def empty_iterable():
640+
yield from ()
641+
return empty_iterable()
642+
def windowed():
643+
while True:
644+
yield xs
645+
xs.popleft()
646+
xs.append(next(it)) # let StopIteration propagate
647+
return windowed()

unpythonic/test/test_it.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from functools import partial
44
from itertools import tee, count, takewhile
55
from operator import add, itemgetter
6+
from collections import deque
67

78
from ..it import mapr, rmap, zipr, rzip, \
89
map_longest, mapr_longest, rmap_longest, \
@@ -15,11 +16,13 @@
1516
uniqify, uniq, \
1617
flatten, flatten1, flatten_in, \
1718
unpack, \
18-
inn, iindex
19+
inn, iindex, \
20+
window
1921

2022
from ..fun import composel, identity
2123
from ..gmemo import imemoize, gmemoize
2224
from ..mathseq import s
25+
from ..misc import Popper
2326

2427
def test():
2528
def noneadd(a, b):
@@ -187,6 +190,44 @@ def primes():
187190
else:
188191
assert False
189192

193+
# window: length-n sliding window iterator for general iterables
194+
lst = list(range(5))
195+
out = []
196+
for a, b, c in window(lst, n=3):
197+
out.append((a, b, c))
198+
assert lst == list(range(5))
199+
assert out == [(0, 1, 2), (1, 2, 3), (2, 3, 4)]
200+
201+
lst = range(5)
202+
out = []
203+
for a, b, c in window(lst, n=3):
204+
out.append((a, b, c))
205+
assert lst == range(5)
206+
assert out == [(0, 1, 2), (1, 2, 3), (2, 3, 4)]
207+
208+
lst = (x for x in range(5))
209+
out = []
210+
for a, b, c in window(lst, n=3):
211+
out.append((a, b, c))
212+
assert out == [(0, 1, 2), (1, 2, 3), (2, 3, 4)]
213+
214+
# sneaky integration test: let's window a Popper...
215+
#
216+
# This works because window() iter()s the Popper, but the Popper never
217+
# iter()s the underlying container, so any mutations to the input container
218+
# performed by the loop body will be seen by the window.
219+
#
220+
# (The first n elements, though, are read before the loop body gets control,
221+
# because the window needs them to initialize itself.)
222+
inp = deque(range(3))
223+
out = []
224+
for a, b in window(Popper(inp)):
225+
out.append((a, b))
226+
if a < 10:
227+
inp.append(a + 10)
228+
assert inp == deque([])
229+
assert out == [(0, 1), (1, 2), (2, 10), (10, 11), (11, 12)]
230+
190231
print("All tests PASSED")
191232

192233
if __name__ == '__main__':

0 commit comments

Comments
 (0)