Skip to content

Commit a443718

Browse files
authored
Add functions for computing diff between two states (singer-io#54)
* Working on state diffing * Use tuples * Working on statediff * Cleanup * Correct docstring * Pylint
1 parent ff90f8d commit a443718

2 files changed

Lines changed: 175 additions & 0 deletions

File tree

singer/statediff.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import collections
2+
3+
# Named tuples for holding add, change, and remove operations
4+
Add = collections.namedtuple('Add', ['path', 'newval'])
5+
Change = collections.namedtuple('Change', ['path', 'oldval', 'newval'])
6+
Remove = collections.namedtuple('Remove', ['path', 'oldval'])
7+
8+
def paths(data, base=None):
9+
'''Walk a data structure and return a list of (path, value) tuples, where
10+
each path is the path to a leaf node in the data structure and the
11+
value is the value it points to. Each path will be a tuple.
12+
13+
'''
14+
if base is None:
15+
base = ()
16+
17+
result = []
18+
if isinstance(data, dict):
19+
for key, val in sorted(data.items()):
20+
result.extend(paths(val, base + (key,)))
21+
22+
elif isinstance(data, list):
23+
for i, val in enumerate(data):
24+
result.extend(paths(val, base + (i,)))
25+
26+
elif base:
27+
result.append((base, data))
28+
29+
return result
30+
31+
def diff(oldstate, newstate):
32+
'''Compare two states, returning a list of Add, Change, and Remove
33+
objects.
34+
35+
Add(path, newval) means path exists in newstate but not oldstate and
36+
its value in newstate is newval.
37+
38+
Change(path, oldval, newval) means that path exists in both oldstate
39+
and newstate but has different values. oldval is the val in oldstate
40+
and newval is the value in newstate.
41+
42+
Remove(path, oldval) means the path exists in oldstate but not in
43+
newstate, and the value in oldstate is oldval.
44+
45+
'''
46+
47+
# Convert oldstate and newstate from a deeply nested dict into a
48+
# single-level dict, mapping a path to a value.
49+
olddict = {k: v for (k, v) in paths(oldstate)}
50+
newdict = {k: v for (k, v) in paths(newstate)}
51+
52+
# Build the list of all paths in both oldstate and newstate to iterate
53+
# over.
54+
all_paths = set()
55+
all_paths.update(set(olddict.keys()))
56+
all_paths.update(set(newdict.keys()))
57+
58+
result = []
59+
for path in sorted(all_paths):
60+
if path in olddict:
61+
if path in newdict:
62+
if olddict[path] == newdict[path]:
63+
pass # Don't emit anything if values are the same
64+
else:
65+
result.append(Change(path, olddict[path], newdict[path]))
66+
else:
67+
result.append(Remove(path, olddict[path]))
68+
else:
69+
result.append(Add(path, newdict[path]))
70+
return result

tests/test_statediff.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import unittest
2+
import singer.statediff as statediff
3+
from singer.statediff import Add, Remove, Change
4+
5+
class TestPaths(unittest.TestCase):
6+
7+
def test_simple_dict(self):
8+
9+
self.assertEqual(
10+
[(('a',), 1),
11+
(('b',), 2)],
12+
statediff.paths({'a': 1, 'b': 2}))
13+
14+
def test_nested_dict(self):
15+
self.assertEqual(
16+
[(('a', 'b'), 1),
17+
(('a', 'c'), 2),
18+
(('d', 'e'), 3)],
19+
statediff.paths(
20+
{
21+
'a': {
22+
'b': 1,
23+
'c': 2
24+
},
25+
'd': {
26+
'e': 3
27+
}
28+
}
29+
)
30+
)
31+
32+
33+
def test_simple_array(self):
34+
self.assertEqual(
35+
[((0,), 'blue'),
36+
((1,), 'green')],
37+
statediff.paths(
38+
['blue', 'green']))
39+
40+
def test_nested_array(self):
41+
self.assertEqual(
42+
[((0, 0), 'blue'),
43+
((0, 1), 'red'),
44+
((1, 0), 'green')],
45+
statediff.paths([['blue', 'red'], ['green']]))
46+
47+
def test_arrays_in_dicts(self):
48+
self.assertEqual(
49+
[(('a', 0), 'blue'),
50+
(('a', 1), 'red'),
51+
(('b', 0), 'green')],
52+
statediff.paths(
53+
{
54+
'a': ['blue', 'red'],
55+
'b': ['green']
56+
}
57+
)
58+
)
59+
60+
def test_none(self):
61+
self.assertEqual([], statediff.paths(None))
62+
63+
class TestDiff(unittest.TestCase):
64+
65+
def test_add(self):
66+
self.assertEqual(
67+
[Add(('a',), 1),
68+
Add(('b',), 2)],
69+
statediff.diff({}, {'a': 1, 'b': 2}))
70+
71+
def test_remove(self):
72+
self.assertEqual(
73+
[Remove(('a',), 1),
74+
Remove(('b',), 2)],
75+
statediff.diff({'a': 1, 'b': 2}, {}))
76+
77+
def test_change(self):
78+
self.assertEqual(
79+
[Change(('a',), 1, 100),
80+
Change(('b',), 2, 200)],
81+
statediff.diff({'a': 1, 'b': 2},
82+
{'a': 100, 'b': 200}))
83+
84+
def test_null_input_for_old(self):
85+
self.assertEqual(
86+
[Add(('a',), 1)],
87+
statediff.diff(None, {'a': 1}))
88+
89+
def test_null_input_for_new(self):
90+
self.assertEqual(
91+
[Remove(('a',), 1)],
92+
statediff.diff({'a': 1}, None))
93+
94+
def test_null_input_for_both(self):
95+
self.assertEqual([], statediff.diff(None, None))
96+
97+
def test_null_at_leaf(self):
98+
self.assertEqual(
99+
[Change(('a',), 1, None),
100+
Change(('b',), None, 2)],
101+
statediff.diff({'a': 1, 'b': None},
102+
{'a': None, 'b': 2}))
103+
104+
105+

0 commit comments

Comments
 (0)