From 192505a31c357855a0a147a455691774a9523ac7 Mon Sep 17 00:00:00 2001 From: Bryant Gray Date: Mon, 24 Mar 2025 21:10:49 +0000 Subject: [PATCH 01/11] use lower_snake_case for `setup.cfg` --- CHANGELOG.md | 3 +++ setup.cfg | 2 +- setup.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d06a7a7..806e563 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 5.13.1 + * Switch `setup.cfg` to use underscore instead of dash because setuptools has deprecated anything other than lower_snake_case. See [setuptools issue](https://github.com/pypa/setuptools/issues/4910) for more context. + ## 5.13.0 * Add support for dev mode argument parsing [#158](https://github.com/singer-io/singer-python/pull/158) diff --git a/setup.cfg b/setup.cfg index b88034e..08aedd7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,2 @@ [metadata] -description-file = README.md +description_file = README.md diff --git a/setup.py b/setup.py index 3d95c5d..2d48224 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.13.0', + version='5.13.1', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], From fc4d3366f59c68c35a4a3dba3a17139bb68abf51 Mon Sep 17 00:00:00 2001 From: Dylan Sprayberry Date: Mon, 23 Jun 2025 19:42:05 +0000 Subject: [PATCH 02/11] Bump backoff and singer-python versions Co-authored-by: Andy Lu Co-authored-by: Ben Allred --- CHANGELOG.md | 3 +++ setup.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 806e563..3e2d826 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 5.13.2 + * Bump backoff version to 1.10.0 + ## 5.13.1 * Switch `setup.cfg` to use underscore instead of dash because setuptools has deprecated anything other than lower_snake_case. See [setuptools issue](https://github.com/pypa/setuptools/issues/4910) for more context. diff --git a/setup.py b/setup.py index 2d48224..11753df 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.13.1', + version='5.13.2', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], @@ -14,7 +14,7 @@ 'jsonschema==2.6.0', 'simplejson==3.11.1', 'python-dateutil>=2.6.0', - 'backoff==1.8.0', + 'backoff==1.10.0', 'ciso8601', ], extras_require={ From d4e350647f8002292d01e2b5ca3cd327b0e6f298 Mon Sep 17 00:00:00 2001 From: Ben Allred Date: Fri, 19 Sep 2025 09:58:03 -0600 Subject: [PATCH 03/11] Adds json schema generation (#174) * Adds json schema generation Co-authored-by: Ben Allred * fix test * Update CHANGELOG.md PR --------- Co-authored-by: Dylan Sprayberry --- CHANGELOG.md | 3 ++ setup.py | 2 +- singer/schema_generation.py | 92 +++++++++++++++++++++++++++++++++ tests/test_catalog.py | 6 +-- tests/test_exceptions.py | 24 ++++----- tests/test_schema.py | 28 +++++----- tests/test_schema_generation.py | 76 +++++++++++++++++++++++++++ tests/test_transform.py | 48 ++++++++--------- 8 files changed, 225 insertions(+), 54 deletions(-) create mode 100644 singer/schema_generation.py create mode 100644 tests/test_schema_generation.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e2d826..0bb9903 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 5.14.0 + * Adds json schema generation [#174](https://github.com/singer-io/singer-python/pull/174) + ## 5.13.2 * Bump backoff version to 1.10.0 diff --git a/setup.py b/setup.py index 11753df..a552997 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.13.2', + version='5.14.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/schema_generation.py b/singer/schema_generation.py new file mode 100644 index 0000000..3d11f74 --- /dev/null +++ b/singer/schema_generation.py @@ -0,0 +1,92 @@ +import dateutil.parser + + +def add_observation(acc, path): + + node = acc + for i in range(0, len(path) - 1): + k = path[i] + if k not in node: + node[k] = {} + node = node[k] + + node[path[-1]] = True + +# pylint: disable=too-many-branches +def add_observations(acc, path, data): + if isinstance(data, dict): + for key in data: + add_observations(acc, path + ["object", key], data[key]) + elif isinstance(data, list): + for item in data: + add_observations(acc, path + ["array"], item) + elif isinstance(data, str): + # If the string parses as a date, add an observation that its a date + try: + data = dateutil.parser.parse(data) + except (dateutil.parser.ParserError, OverflowError): + data = None + if data: + add_observation(acc, path + ["date"]) + else: + add_observation(acc, path + ["string"]) + + elif isinstance(data, bool): + add_observation(acc, path + ["boolean"]) + elif isinstance(data, int): + add_observation(acc, path + ["integer"]) + elif isinstance(data, float): + add_observation(acc, path + ["number"]) + elif data is None: + add_observation(acc, path + ["null"]) + else: + raise Exception("Unexpected value " + repr(data) + " at path " + repr(path)) + + return acc + +def to_json_schema(obs): + result = {'type': ['null']} + + for key in obs: + + if key == 'object': + result['type'] += ['object'] + if 'properties' not in result: + result['properties'] = {} + for obj_key in obs['object']: + result['properties'][obj_key] = to_json_schema(obs['object'][obj_key]) + + elif key == 'array': + result['type'] += ['array'] + result['items'] = to_json_schema(obs['array']) + + elif key == 'date': + result['type'] += ['string'] + result['format'] = 'date-time' + elif key == 'string': + result['type'] += ['string'] + + elif key == 'boolean': + result['type'] += ['boolean'] + + elif key == 'integer': + result['type'] += ['integer'] + + elif key == 'number': + # Use type=string, format=singer.decimal + result['type'] += ['string'] + result['format'] = 'singer.decimal' + + elif key == 'null': + pass + + else: + raise Exception("Unexpected data type " + key) + + return result + +def generate_schema(records): + obs = {} + for record in records: + obs = add_observations(obs, [], record) + return to_json_schema(obs) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index cd6dc50..8a72e1a 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -25,7 +25,7 @@ def test_one_selected_stream(self): CatalogEntry(tap_stream_id='c',schema=Schema(),metadata=[])]) state = {} selected_streams = catalog.get_selected_streams(state) - self.assertEquals([e for e in selected_streams],[selected_entry]) + self.assertEqual([e for e in selected_streams],[selected_entry]) def test_resumes_currently_syncing_stream(self): selected_entry_a = CatalogEntry(tap_stream_id='a', @@ -44,7 +44,7 @@ def test_resumes_currently_syncing_stream(self): selected_entry_c]) state = {'currently_syncing': 'c'} selected_streams = catalog.get_selected_streams(state) - self.assertEquals([e for e in selected_streams][0],selected_entry_c) + self.assertEqual([e for e in selected_streams][0],selected_entry_c) class TestToDictAndFromDict(unittest.TestCase): @@ -141,4 +141,4 @@ def test(self): CatalogEntry(tap_stream_id='b'), CatalogEntry(tap_stream_id='c')]) entry = catalog.get_stream('b') - self.assertEquals('b', entry.tap_stream_id) + self.assertEqual('b', entry.tap_stream_id) diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index 491595f..50cf7a1 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -14,8 +14,8 @@ def test_SingerError_prints_correctly(self): raise SingerError(error_text) expected_text = "SingerError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerConfigurationError_prints_correctly(self): error_text = "An error occured" @@ -24,8 +24,8 @@ def test_SingerConfigurationError_prints_correctly(self): raise SingerConfigurationError(error_text) expected_text = "SingerConfigurationError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerDiscoveryError_prints_correctly(self): error_text = "An error occured" @@ -34,8 +34,8 @@ def test_SingerDiscoveryError_prints_correctly(self): raise SingerDiscoveryError(error_text) expected_text = "SingerDiscoveryError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerSyncError_prints_correctly(self): error_text = "An error occured" @@ -44,8 +44,8 @@ def test_SingerSyncError_prints_correctly(self): raise SingerSyncError(error_text) expected_text = "SingerSyncError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerRetryableRequestError_prints_correctly(self): error_text = "An error occured" @@ -54,8 +54,8 @@ def test_SingerRetryableRequestError_prints_correctly(self): raise SingerRetryableRequestError(error_text) expected_text = "SingerRetryableRequestError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerError_prints_multiple_lines_correctly(self): error_text = "\n".join(["Line 1", "Line 2", "Line 3"]) @@ -64,5 +64,5 @@ def test_SingerError_prints_multiple_lines_correctly(self): raise SingerError(error_text) expected_text = "SingerError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) diff --git a/tests/test_schema.py b/tests/test_schema.py index fa28bac..5682755 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -44,38 +44,38 @@ class TestSchema(unittest.TestCase): additionalProperties=True) def test_string_to_dict(self): - self.assertEquals(self.string_dict, self.string_obj.to_dict()) + self.assertEqual(self.string_dict, self.string_obj.to_dict()) def test_integer_to_dict(self): - self.assertEquals(self.integer_dict, self.integer_obj.to_dict()) + self.assertEqual(self.integer_dict, self.integer_obj.to_dict()) def test_array_to_dict(self): - self.assertEquals(self.array_dict, self.array_obj.to_dict()) + self.assertEqual(self.array_dict, self.array_obj.to_dict()) def test_object_to_dict(self): - self.assertEquals(self.object_dict, self.object_obj.to_dict()) + self.assertEqual(self.object_dict, self.object_obj.to_dict()) def test_string_from_dict(self): - self.assertEquals(self.string_obj, Schema.from_dict(self.string_dict)) + self.assertEqual(self.string_obj, Schema.from_dict(self.string_dict)) def test_integer_from_dict(self): - self.assertEquals(self.integer_obj, Schema.from_dict(self.integer_dict)) + self.assertEqual(self.integer_obj, Schema.from_dict(self.integer_dict)) def test_array_from_dict(self): - self.assertEquals(self.array_obj, Schema.from_dict(self.array_dict)) + self.assertEqual(self.array_obj, Schema.from_dict(self.array_dict)) def test_object_from_dict(self): - self.assertEquals(self.object_obj, Schema.from_dict(self.object_dict)) + self.assertEqual(self.object_obj, Schema.from_dict(self.object_dict)) def test_repr_atomic(self): - self.assertEquals(self.string_obj, eval(repr(self.string_obj))) + self.assertEqual(self.string_obj, eval(repr(self.string_obj))) def test_repr_recursive(self): - self.assertEquals(self.object_obj, eval(repr(self.object_obj))) + self.assertEqual(self.object_obj, eval(repr(self.object_obj))) def test_object_from_dict_with_defaults(self): schema = Schema.from_dict(self.object_dict, inclusion='automatic') - self.assertEquals('whatever', schema.inclusion, - msg='The schema value should override the default') - self.assertEquals('automatic', schema.properties['a_string'].inclusion) - self.assertEquals('automatic', schema.properties['an_array'].items.inclusion) + self.assertEqual('whatever', schema.inclusion, + msg='The schema value should override the default') + self.assertEqual('automatic', schema.properties['a_string'].inclusion) + self.assertEqual('automatic', schema.properties['an_array'].items.inclusion) diff --git a/tests/test_schema_generation.py b/tests/test_schema_generation.py new file mode 100644 index 0000000..5e00738 --- /dev/null +++ b/tests/test_schema_generation.py @@ -0,0 +1,76 @@ +import unittest +from singer.schema_generation import generate_schema + +class TestSchemaGeneration(unittest.TestCase): + def test_simple_schema(self): + records = [{'a': 1, 'b': 'two', 'c': True, 'dt': '2000-01-01T00:11:22Z'}] + expected_schema = { + 'type': ['null', 'object'], + 'properties': { + 'a': {'type': ['null', 'integer']}, + 'b': {'type': ['null', 'string']}, + 'c': {'type': ['null', 'boolean']}, + 'dt': {'type': ['null', 'string'], 'format': 'date-time'} + } + } + self.assertEqual(expected_schema, generate_schema(records)) + + def test_mix_n_match_records_schema(self): + records = [ + {'a': 1, 'b': 'b'}, + {'a': 'two', 'c': 7, 'd': [1, 'two']}, + {'a': True, 'c': 7.7, 'd': {'one': 1, 'two': 'two'}} + ] + expected_schema = { + 'type': ['null', 'object'], + 'properties': { + 'a': {'type': {'null', 'integer', 'string', 'boolean'}}, + 'b': {'type': ['null', 'string']}, + 'c': {'type': {'null', 'integer', 'string'}, 'format': 'singer.decimal'}, + 'd': { + 'type': {'null', 'array', 'object'}, + 'items': {'type': {'null', 'integer', 'string'}}, + 'properties': {'one': {'type': ['null', 'integer']}, + 'two': {'type': ['null', 'string']}} + + } + } + } + actual_schema = generate_schema(records) + actual_schema['properties']['a']['type'] = set(actual_schema['properties']['a']['type']) + actual_schema['properties']['c']['type'] = set(actual_schema['properties']['c']['type']) + actual_schema['properties']['d']['type'] = set(actual_schema['properties']['d']['type']) + actual_schema['properties']['d']['items']['type'] = set(actual_schema['properties']['d']['items']['type']) + self.assertEqual(expected_schema, actual_schema) + + def test_nested_structue_schema(self): + records = [{'a': {'b': {'c': [{'d': 7}]}, 'e': [[1, 2, 3]]}}] + expected_schema = { + 'type': ['null', 'object'], + 'properties': { + 'a': { + 'type': ['null', 'object'], + 'properties': { + 'b': { + 'type': ['null', 'object'], + 'properties': { + 'c': { + 'type': ['null', 'array'], + 'items': { + 'type': ['null', 'object'], + 'properties': {'d': {'type': ['null', 'integer']}} + } + } + } + }, + 'e': { + 'type': ['null', 'array'], + 'items': { + 'type': ['null', 'array'], + 'items': {'type': ['null', 'integer']}} + } + } + } + } + } + self.assertEqual(expected_schema, generate_schema(records)) diff --git a/tests/test_transform.py b/tests/test_transform.py index 959c4b8..96e0c3b 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -264,11 +264,11 @@ def test_decimal_types_transform(self): nan = {'percentage': decimal.Decimal('NaN')} snan = {'percentage': decimal.Decimal('sNaN')} - self.assertEquals(inf, transform(inf, schema)) - self.assertEquals(negative_inf, transform(negative_inf, schema)) - self.assertEquals({'percentage': '1.4142135623730951'}, transform(root2, schema)) - self.assertEquals({'percentage': 'NaN'}, transform(nan, schema)) - self.assertEquals({'percentage': 'NaN'}, transform(snan, schema)) + self.assertEqual(inf, transform(inf, schema)) + self.assertEqual(negative_inf, transform(negative_inf, schema)) + self.assertEqual({'percentage': '1.4142135623730951'}, transform(root2, schema)) + self.assertEqual({'percentage': 'NaN'}, transform(nan, schema)) + self.assertEqual({'percentage': 'NaN'}, transform(snan, schema)) str1 = {'percentage':'0.1'} @@ -276,11 +276,11 @@ def test_decimal_types_transform(self): str3 = {'percentage': '1E+13'} str4 = {'percentage': '100'} str5 = {'percentage': '-100'} - self.assertEquals(str1, transform(str1, schema)) - self.assertEquals({'percentage': '1E-13'}, transform(str2, schema)) - self.assertEquals({'percentage': '1E+13'}, transform(str3, schema)) - self.assertEquals({'percentage': '100'}, transform(str4, schema)) - self.assertEquals({'percentage': '-100'}, transform(str5, schema)) + self.assertEqual(str1, transform(str1, schema)) + self.assertEqual({'percentage': '1E-13'}, transform(str2, schema)) + self.assertEqual({'percentage': '1E+13'}, transform(str3, schema)) + self.assertEqual({'percentage': '100'}, transform(str4, schema)) + self.assertEqual({'percentage': '-100'}, transform(str5, schema)) float1 = {'percentage': 12.0000000000000000000000000001234556} float2 = {'percentage': 0.0123} @@ -288,28 +288,28 @@ def test_decimal_types_transform(self): float4 = {'percentage': -100.0123} float5 = {'percentage': 0.000001} float6 = {'percentage': 0.0000001} - self.assertEquals({'percentage':'12.0'}, transform(float1, schema)) - self.assertEquals({'percentage':'0.0123'}, transform(float2, schema)) - self.assertEquals({'percentage':'100.0123'}, transform(float3, schema)) - self.assertEquals({'percentage':'-100.0123'}, transform(float4, schema)) - self.assertEquals({'percentage':'0.000001'}, transform(float5, schema)) - self.assertEquals({'percentage':'1E-7'}, transform(float6, schema)) + self.assertEqual({'percentage':'12.0'}, transform(float1, schema)) + self.assertEqual({'percentage':'0.0123'}, transform(float2, schema)) + self.assertEqual({'percentage':'100.0123'}, transform(float3, schema)) + self.assertEqual({'percentage':'-100.0123'}, transform(float4, schema)) + self.assertEqual({'percentage':'0.000001'}, transform(float5, schema)) + self.assertEqual({'percentage':'1E-7'}, transform(float6, schema)) int1 = {'percentage': 123} int2 = {'percentage': 0} int3 = {'percentage': -1000} - self.assertEquals({'percentage':'123'}, transform(int1, schema)) - self.assertEquals({'percentage':'0'}, transform(int2, schema)) - self.assertEquals({'percentage':'-1000'}, transform(int3, schema)) + self.assertEqual({'percentage':'123'}, transform(int1, schema)) + self.assertEqual({'percentage':'0'}, transform(int2, schema)) + self.assertEqual({'percentage':'-1000'}, transform(int3, schema)) dec1 = {'percentage': decimal.Decimal('1.1010101')} dec2 = {'percentage': decimal.Decimal('.111111111111111111111111')} dec3 = {'percentage': decimal.Decimal('-.111111111111111111111111')} dec4 = {'percentage': decimal.Decimal('100')} - self.assertEquals({'percentage':'1.1010101'}, transform(dec1, schema)) - self.assertEquals({'percentage':'0.111111111111111111111111'}, transform(dec2, schema)) - self.assertEquals({'percentage':'-0.111111111111111111111111'}, transform(dec3, schema)) - self.assertEquals({'percentage':'100'}, transform(dec4, schema)) + self.assertEqual({'percentage':'1.1010101'}, transform(dec1, schema)) + self.assertEqual({'percentage':'0.111111111111111111111111'}, transform(dec2, schema)) + self.assertEqual({'percentage':'-0.111111111111111111111111'}, transform(dec3, schema)) + self.assertEqual({'percentage':'100'}, transform(dec4, schema)) bad1 = {'percentage': 'fsdkjl'} with self.assertRaises(SchemaMismatch): @@ -317,7 +317,7 @@ def test_decimal_types_transform(self): badnull = {'percentage': None} with self.assertRaises(SchemaMismatch): - self.assertEquals({'percentage':None}, transform(badnull, schema)) + self.assertEqual({'percentage':None}, transform(badnull, schema)) class TestTransformsWithMetadata(unittest.TestCase): From a4637da37e9d513cbd912f2b0c681c7096931ded Mon Sep 17 00:00:00 2001 From: Ben Allred Date: Thu, 2 Oct 2025 12:37:19 -0600 Subject: [PATCH 04/11] SAC-28668: fix transform and schema v5 (#176) * handle empty arrays and fields that could be either formatted or nested * bump version and add changelog entry * handle string parsing similar to existin tap-s3 logic * fix syntax error * fix bad tests --- CHANGELOG.md | 7 +++++++ setup.py | 2 +- singer/schema_generation.py | 30 ++++++++++++++++++++++-------- singer/transform.py | 4 ++-- tests/test_transform.py | 4 ++-- 5 files changed, 34 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0bb9903..bc8b497 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## 5.14.1 + * Fixes json schema generation to not treat numbers as dates + * Fixes json schema generation to handle empty arrays + * Fixes record transformation to handle fields that could be either formatted string or nested data structure + * [#176](https://github.com/singer-io/singer-python/pull/176) + + ## 5.14.0 * Adds json schema generation [#174](https://github.com/singer-io/singer-python/pull/174) diff --git a/setup.py b/setup.py index a552997..84cfa4a 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.14.0', + version='5.14.1', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/schema_generation.py b/singer/schema_generation.py index 3d11f74..1b73388 100644 --- a/singer/schema_generation.py +++ b/singer/schema_generation.py @@ -18,19 +18,33 @@ def add_observations(acc, path, data): for key in data: add_observations(acc, path + ["object", key], data[key]) elif isinstance(data, list): + if len(data) == 0: + add_observations(acc, path + ["array"], None) for item in data: add_observations(acc, path + ["array"], item) elif isinstance(data, str): - # If the string parses as a date, add an observation that its a date try: - data = dateutil.parser.parse(data) - except (dateutil.parser.ParserError, OverflowError): - data = None - if data: + # If the string parses as a int, add an observation that it's a integer + int(data) + add_observation(acc, path + ["integer"]) + return acc + except (ValueError, TypeError): + pass + try: + # If the string parses as a float, add an observation that it's a number + float(data) + add_observation(acc, path + ["number"]) + return acc + except (ValueError, TypeError): + pass + try: + # If the string parses as a date, add an observation that it's a date + dateutil.parser.parse(data) add_observation(acc, path + ["date"]) - else: - add_observation(acc, path + ["string"]) - + return acc + except (dateutil.parser.ParserError, OverflowError): + pass + add_observation(acc, path + ["string"]) elif isinstance(data, bool): add_observation(acc, path + ["boolean"]) elif isinstance(data, int): diff --git a/singer/transform.py b/singer/transform.py index 3fdefdf..8cb8492 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -266,13 +266,13 @@ def _transform(self, data, typ, schema, path): else: return False, None - elif schema.get("format") == "date-time": + elif typ == "string" and schema.get("format") == "date-time": data = self._transform_datetime(data) if data is None: return False, None return True, data - elif schema.get("format") == "singer.decimal": + elif typ == "string" and schema.get("format") == "singer.decimal": if data is None: return False, None diff --git a/tests/test_transform.py b/tests/test_transform.py index 96e0c3b..308ac4e 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -25,7 +25,7 @@ def test_nested_transform(self): def test_multi_type_object_transform(self): schema = {"type": ["null", "object", "string"], - "properties": {"whatever": {"type": "date-time", + "properties": {"whatever": {"type": "string", "format": "date-time"}}} data = {"whatever": "2017-01-01"} expected = {"whatever": "2017-01-01T00:00:00.000000Z"} @@ -36,7 +36,7 @@ def test_multi_type_object_transform(self): def test_multi_type_array_transform(self): schema = {"type": ["null", "array", "integer"], - "items": {"type": "date-time", "format": "date-time"}} + "items": {"type": "string", "format": "date-time"}} data = ["2017-01-01"] expected = ["2017-01-01T00:00:00.000000Z"] self.assertEqual(expected, transform(data, schema)) From eceb55b19d658b86933c5d7a99b794871c3c06aa Mon Sep 17 00:00:00 2001 From: Ben Allred Date: Wed, 8 Oct 2025 14:59:28 -0600 Subject: [PATCH 05/11] SAC-28668: update schema generation v5 (#179) * use `anyOf` when multiple types are found * fix test * Update schema generation and bump version for v5 deploy Co-authored-by: Dylan Sprayberry Co-authored-by: Andres Pineda * Fix linting errors Co-authored-by: Bryant Gray --------- Co-authored-by: Bryant Gray Co-authored-by: Dylan Sprayberry Co-authored-by: Andres Pineda --- CHANGELOG.md | 7 ++++++- setup.py | 2 +- singer/schema_generation.py | 31 +++++++++++++++---------------- singer/transform.py | 2 ++ tests/test_schema_generation.py | 30 +++++++++++++----------------- 5 files changed, 37 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bc8b497..23690fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,17 @@ # Changelog +## 5.14.2 + * Updates json schema generation to not emit dates + * Handle multiple schemas with anyOf and emit them in a specific order + * Do not emit error messages when checking multiple schemas and a subsequent schema passes + * [#178](https://github.com/singer-io/singer-python/pull/178) + ## 5.14.1 * Fixes json schema generation to not treat numbers as dates * Fixes json schema generation to handle empty arrays * Fixes record transformation to handle fields that could be either formatted string or nested data structure * [#176](https://github.com/singer-io/singer-python/pull/176) - ## 5.14.0 * Adds json schema generation [#174](https://github.com/singer-io/singer-python/pull/174) diff --git a/setup.py b/setup.py index 84cfa4a..fdcabec 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.14.1', + version='5.14.2', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/schema_generation.py b/singer/schema_generation.py index 1b73388..003a16f 100644 --- a/singer/schema_generation.py +++ b/singer/schema_generation.py @@ -1,6 +1,3 @@ -import dateutil.parser - - def add_observation(acc, path): node = acc @@ -37,13 +34,6 @@ def add_observations(acc, path, data): return acc except (ValueError, TypeError): pass - try: - # If the string parses as a date, add an observation that it's a date - dateutil.parser.parse(data) - add_observation(acc, path + ["date"]) - return acc - except (dateutil.parser.ParserError, OverflowError): - pass add_observation(acc, path + ["string"]) elif isinstance(data, bool): add_observation(acc, path + ["boolean"]) @@ -59,9 +49,13 @@ def add_observations(acc, path, data): return acc def to_json_schema(obs): - result = {'type': ['null']} + types = [] + # add schema types in a specific order to anyOf list + for key in ['array', 'object', 'number', 'integer', 'boolean', 'string', 'null']: + if key not in obs: + continue - for key in obs: + result = {'type': ['null']} if key == 'object': result['type'] += ['object'] @@ -74,9 +68,6 @@ def to_json_schema(obs): result['type'] += ['array'] result['items'] = to_json_schema(obs['array']) - elif key == 'date': - result['type'] += ['string'] - result['format'] = 'date-time' elif key == 'string': result['type'] += ['string'] @@ -97,7 +88,15 @@ def to_json_schema(obs): else: raise Exception("Unexpected data type " + key) - return result + types.append(result) + + if len(types) == 0: + return {'type': ['null', 'string']} + + if len(types) == 1: + return types[0] + + return {'anyOf': types} def generate_schema(records): obs = {} diff --git a/singer/transform.py b/singer/transform.py index 8cb8492..1031fef 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -185,6 +185,8 @@ def _transform_anyof(self, data, schema, path): success, transformed_data = self.transform_recur(data, subschema, path) if success: return success, transformed_data + else: + self.errors.pop() else: # pylint: disable=useless-else-on-loop # exhaused all schemas and didn't return, so we failed :-( self.errors.append(Error(path, data, schema, logging_level=LOGGER.level)) diff --git a/tests/test_schema_generation.py b/tests/test_schema_generation.py index 5e00738..610ac20 100644 --- a/tests/test_schema_generation.py +++ b/tests/test_schema_generation.py @@ -10,7 +10,7 @@ def test_simple_schema(self): 'a': {'type': ['null', 'integer']}, 'b': {'type': ['null', 'string']}, 'c': {'type': ['null', 'boolean']}, - 'dt': {'type': ['null', 'string'], 'format': 'date-time'} + 'dt': {'type': ['null', 'string']} } } self.assertEqual(expected_schema, generate_schema(records)) @@ -23,24 +23,20 @@ def test_mix_n_match_records_schema(self): ] expected_schema = { 'type': ['null', 'object'], - 'properties': { - 'a': {'type': {'null', 'integer', 'string', 'boolean'}}, - 'b': {'type': ['null', 'string']}, - 'c': {'type': {'null', 'integer', 'string'}, 'format': 'singer.decimal'}, - 'd': { - 'type': {'null', 'array', 'object'}, - 'items': {'type': {'null', 'integer', 'string'}}, - 'properties': {'one': {'type': ['null', 'integer']}, - 'two': {'type': ['null', 'string']}} - - } - } + 'properties': {'a': {'anyOf': [{'type': ['null', 'integer']}, + {'type': ['null', 'boolean']}, + {'type': ['null', 'string']}]}, + 'b': {'type': ['null', 'string']}, + 'c': {'anyOf': [{'type': ['null', 'string'], 'format': 'singer.decimal'}, + {'type': ['null', 'integer']}]}, + 'd': {'anyOf': [{'type': ['null', 'array'], + 'items': {'anyOf': [{'type': ['null', 'integer']}, + {'type': ['null', 'string']}]}}, + {'type': ['null', 'object'], + 'properties': {'one': {'type': ['null', 'integer']}, + 'two': {'type': ['null', 'string']}}}]}} } actual_schema = generate_schema(records) - actual_schema['properties']['a']['type'] = set(actual_schema['properties']['a']['type']) - actual_schema['properties']['c']['type'] = set(actual_schema['properties']['c']['type']) - actual_schema['properties']['d']['type'] = set(actual_schema['properties']['d']['type']) - actual_schema['properties']['d']['items']['type'] = set(actual_schema['properties']['d']['items']['type']) self.assertEqual(expected_schema, actual_schema) def test_nested_structue_schema(self): From 73e3695a73885bf2c2f9faccb8287be1364f8fd3 Mon Sep 17 00:00:00 2001 From: Ben Allred Date: Wed, 15 Oct 2025 13:58:50 -0600 Subject: [PATCH 06/11] schema generation defaults data type to string (#181) Co-authored-by: Dylan Sprayberry Co-authored-by: Bryant Gray Co-authored-by: Andres Pineda --- CHANGELOG.md | 3 +++ setup.py | 2 +- singer/schema_generation.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23690fc..3d4e141 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 5.14.3 + * Default type for non-standard data types is string [#181](https://github.com/singer-io/singer-python/pull/181) + ## 5.14.2 * Updates json schema generation to not emit dates * Handle multiple schemas with anyOf and emit them in a specific order diff --git a/setup.py b/setup.py index fdcabec..8ed712c 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.14.2', + version='5.14.3', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/schema_generation.py b/singer/schema_generation.py index 003a16f..177d659 100644 --- a/singer/schema_generation.py +++ b/singer/schema_generation.py @@ -44,7 +44,7 @@ def add_observations(acc, path, data): elif data is None: add_observation(acc, path + ["null"]) else: - raise Exception("Unexpected value " + repr(data) + " at path " + repr(path)) + add_observation(acc, path + ["string"]) return acc From f72bf1bfe923dd62fbfc079fde060ea2eb0003a2 Mon Sep 17 00:00:00 2001 From: Ben Allred Date: Mon, 2 Feb 2026 11:39:12 -0700 Subject: [PATCH 07/11] SAC-29666: Update clear_offset to remove offset key from bookmark (#184) * update clear_offset to remove key from bookmark ----------------------------- Co-authored-by: Ben Allred * bump version to 5.15.0 ----------------------------- Co-authored-by: Ben Allred --------- Co-authored-by: Leslie VanDeMark --- CHANGELOG.md | 3 +++ setup.py | 2 +- singer/bookmarks.py | 4 +--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d4e141..ebaed0c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 5.15.0 + * Update clear_offset to remove offset bookmark [#184](https://github.com/singer-io/singer-python/pull/184) + ## 5.14.3 * Default type for non-standard data types is string [#181](https://github.com/singer-io/singer-python/pull/181) diff --git a/setup.py b/setup.py index 8ed712c..1f2112c 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.14.3', + version='5.15.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/bookmarks.py b/singer/bookmarks.py index fc6d7ca..40aa927 100644 --- a/singer/bookmarks.py +++ b/singer/bookmarks.py @@ -31,9 +31,7 @@ def set_offset(state, tap_stream_id, offset_key, offset_value): return state def clear_offset(state, tap_stream_id): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id, "offset"]) - state['bookmarks'][tap_stream_id]["offset"] = {} - return state + return clear_bookmark(state, tap_stream_id, "offset") def get_offset(state, tap_stream_id, default=None): return state.get('bookmarks', {}).get(tap_stream_id, {}).get("offset", default) From 4337e84e60c0d2de5bf8e9001a43d3d96fa8fc3c Mon Sep 17 00:00:00 2001 From: Leslie VanDeMark <38043390+leslievandemark@users.noreply.github.com> Date: Mon, 9 Feb 2026 15:43:08 -0500 Subject: [PATCH 08/11] Add activate_versions key functions for state - v5 (#189) * add activate_versions key to state functions, use state.py file for state functions ----------------------------- Co-authored-by: Ben Allred * change deprecation annotations to comment ----------------------------- Co-authored-by: Ben Allred * version bump 5.16.0 and changelog update ----------------------------- Co-authored-by: Ben Allred * whitespace cleanup ----------------------------- Co-authored-by: Ben Allred --------- Co-authored-by: Ben Allred --- CHANGELOG.md | 4 + setup.py | 2 +- singer/bookmarks.py | 39 +++---- singer/state.py | 60 ++++++++++ tests/test_bookmarks.py | 166 ---------------------------- tests/test_state.py | 235 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 314 insertions(+), 192 deletions(-) create mode 100644 singer/state.py delete mode 100644 tests/test_bookmarks.py create mode 100644 tests/test_state.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ebaed0c..eb31560 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 5.16.0 + * Add `activate_versions` state functions [#189](https://github.com/singer-io/singer-python/pull/189) + * Deprecates bookmarks.py, functions are moved to state.py + ## 5.15.0 * Update clear_offset to remove offset bookmark [#184](https://github.com/singer-io/singer-python/pull/184) diff --git a/setup.py b/setup.py index 1f2112c..fa8fb22 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.15.0', + version='5.16.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/bookmarks.py b/singer/bookmarks.py index 40aa927..2cd9636 100644 --- a/singer/bookmarks.py +++ b/singer/bookmarks.py @@ -1,44 +1,33 @@ -def ensure_bookmark_path(state, path): - submap = state - for path_component in path: - if submap.get(path_component) is None: - submap[path_component] = {} +from singer import state as st + +## Note - This file is deprecated, use state.py functions. - submap = submap[path_component] - return state +def ensure_bookmark_path(state, path): + return st.ensure_state_path(state, path) def write_bookmark(state, tap_stream_id, key, val): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id]) - state['bookmarks'][tap_stream_id][key] = val - return state + return st.write_bookmark(state, tap_stream_id, key, val) def clear_bookmark(state, tap_stream_id, key): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id]) - state['bookmarks'][tap_stream_id].pop(key, None) - return state + return st.clear_bookmark(state, tap_stream_id, key) def reset_stream(state, tap_stream_id): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id]) - state['bookmarks'][tap_stream_id] = {} - return state + return st.reset_stream(state, tap_stream_id) def get_bookmark(state, tap_stream_id, key, default=None): - return state.get('bookmarks', {}).get(tap_stream_id, {}).get(key, default) + return st.get_bookmark(state, tap_stream_id, key, default) def set_offset(state, tap_stream_id, offset_key, offset_value): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id, "offset", offset_key]) - state['bookmarks'][tap_stream_id]["offset"][offset_key] = offset_value - return state + return st.set_offset(state, tap_stream_id, offset_key, offset_value) def clear_offset(state, tap_stream_id): - return clear_bookmark(state, tap_stream_id, "offset") + return st.clear_offset(state, tap_stream_id) def get_offset(state, tap_stream_id, default=None): - return state.get('bookmarks', {}).get(tap_stream_id, {}).get("offset", default) + return st.get_offset(state, tap_stream_id, default) def set_currently_syncing(state, tap_stream_id): - state['currently_syncing'] = tap_stream_id - return state + return st.set_currently_syncing(state, tap_stream_id) def get_currently_syncing(state, default=None): - return state.get('currently_syncing', default) + return st.get_currently_syncing(state, default) diff --git a/singer/state.py b/singer/state.py new file mode 100644 index 0000000..772a919 --- /dev/null +++ b/singer/state.py @@ -0,0 +1,60 @@ +def ensure_state_path(state, path): + submap = state + for path_component in path: + if submap.get(path_component) is None: + submap[path_component] = {} + + submap = submap[path_component] + return state + +def write_bookmark(state, tap_stream_id, key, val): + state = ensure_state_path(state, ['bookmarks', tap_stream_id]) + state['bookmarks'][tap_stream_id][key] = val + return state + +def clear_bookmark(state, tap_stream_id, key): + state = ensure_state_path(state, ['bookmarks', tap_stream_id]) + state['bookmarks'][tap_stream_id].pop(key, None) + return state + +def reset_stream(state, tap_stream_id): + state = ensure_state_path(state, ['bookmarks', tap_stream_id]) + state['bookmarks'][tap_stream_id] = {} + if 'activate_versions' in state: + state = ensure_state_path(state, ['activate_versions', tap_stream_id]) + state['activate_versions'][tap_stream_id] = {} + return state + +def get_bookmark(state, tap_stream_id, key, default=None): + return state.get('bookmarks', {}).get(tap_stream_id, {}).get(key, default) + +def set_offset(state, tap_stream_id, offset_key, offset_value): + state = ensure_state_path(state, ['bookmarks', tap_stream_id, "offset", offset_key]) + state['bookmarks'][tap_stream_id]["offset"][offset_key] = offset_value + return state + +def clear_offset(state, tap_stream_id): + return clear_bookmark(state, tap_stream_id, "offset") + +def get_offset(state, tap_stream_id, default=None): + return state.get('bookmarks', {}).get(tap_stream_id, {}).get("offset", default) + +def set_currently_syncing(state, tap_stream_id): + state['currently_syncing'] = tap_stream_id + return state + +def get_currently_syncing(state, default=None): + return state.get('currently_syncing', default) + +def write_version(state, tap_stream_id, key, val): + state = ensure_state_path(state, ['activate_versions', tap_stream_id]) + state['activate_versions'][tap_stream_id][key] = val + return state + +def clear_version(state, tap_stream_id, key): + state = ensure_state_path(state, ['activate_versions', tap_stream_id]) + state['activate_versions'][tap_stream_id].pop(key, None) + return state + +def get_version(state, tap_stream_id, key, default=None): + return state.get('activate_versions', {}).get(tap_stream_id, {}).get(key, default) diff --git a/tests/test_bookmarks.py b/tests/test_bookmarks.py deleted file mode 100644 index 4902105..0000000 --- a/tests/test_bookmarks.py +++ /dev/null @@ -1,166 +0,0 @@ -import unittest -from singer import bookmarks - -class TestGetBookmark(unittest.TestCase): - def test_empty_state(self): - empty_state = {} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_bookmark(empty_state, 'some_stream', 'my_key')) - - # Case with a given default - self.assertEqual(bookmarks.get_bookmark(empty_state, 'some_stream', 'my_key', 'default_value'), - 'default_value') - - def test_empty_bookmark(self): - empty_bookmark = {'bookmarks':{}} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_bookmark(empty_bookmark, 'some_stream', 'my_key')) - - # Case with a given default - self.assertEqual(bookmarks.get_bookmark(empty_bookmark, 'some_stream', 'my_key', 'default_value'), - 'default_value') - - def test_non_empty_state(self): - stream_id_1 = 'customers' - bookmark_key_1 = 'datetime' - bookmark_val_1 = 123456789 - - non_empty_state = { - 'bookmarks' : { - stream_id_1 : { - bookmark_key_1 : bookmark_val_1 - } - } - } - - # - # Cases with no value to fall back on - # - - # Bad stream, bad key - self.assertIsNone(bookmarks.get_bookmark(non_empty_state, 'some_stream', 'my_key')) - - # Good stream, bad key - self.assertIsNone(bookmarks.get_bookmark(non_empty_state, stream_id_1, 'my_key')) - - # Good stream, good key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1), - bookmark_val_1) - - # - # Cases with a given default - # - - # Bad stream, bad key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, 'some_stream', 'my_key', 'default_value'), - 'default_value') - - # Bad stream, good key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, 'some_stream', bookmark_key_1, 'default_value'), - 'default_value') - - # Good stream, bad key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, stream_id_1, 'my_key', 'default_value'), - 'default_value') - - # Good stream, good key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1, 'default_value'), - bookmark_val_1) - - -class TestGetOffset(unittest.TestCase): - def test_empty_state(self): - empty_state = {} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_offset(empty_state, 'some_stream')) - - # Case with a given default - self.assertEqual(bookmarks.get_offset(empty_state, 'some_stream', 'default_value'), - 'default_value') - - def test_empty_bookmark(self): - empty_bookmark = {'bookmarks':{}} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_offset(empty_bookmark, 'some_stream')) - - # Case with a given default - self.assertEqual(bookmarks.get_offset(empty_bookmark, 'some_stream', 'default_value'), - 'default_value') - - def test_non_empty_state(self): - stream_id_1 = 'customers' - bookmark_key_1 = 'datetime' - bookmark_val_1 = 123456789 - offset_val = 'fizzy water' - - non_empty_state = { - 'bookmarks' : { - stream_id_1 : { - bookmark_key_1 : bookmark_val_1, - 'offset' : offset_val - } - } - } - - # - # Cases with no value to fall back on - # - - # Bad stream - self.assertIsNone(bookmarks.get_offset(non_empty_state, 'some_stream')) - - # Good stream - self.assertEqual(bookmarks.get_offset(non_empty_state, stream_id_1), - offset_val) - - # - # Case with a given default - # - - # Bad stream - self.assertEqual(bookmarks.get_offset(non_empty_state, 'some_stream', 'default_value'), - 'default_value') - - # Good stream - self.assertEqual(bookmarks.get_offset(non_empty_state, stream_id_1, 'default_value'), - offset_val) - - -class TestGetCurrentlySyncing(unittest.TestCase): - def test_empty_state(self): - empty_state = {} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_currently_syncing(empty_state)) - - # Case with a given default - self.assertEqual(bookmarks.get_currently_syncing(empty_state, 'default_value'), - 'default_value') - - def test_non_empty_state(self): - stream_id_1 = 'customers' - bookmark_key_1 = 'datetime' - bookmark_val_1 = 123456789 - offset_val = 'fizzy water' - - non_empty_state = { - 'bookmarks' : { - stream_id_1 : { - bookmark_key_1 : bookmark_val_1, - 'offset' : offset_val - } - }, - 'currently_syncing' : stream_id_1 - } - - # Case with no value to fall back on - self.assertEqual(bookmarks.get_currently_syncing(non_empty_state), - stream_id_1) - - # Case with a given default - self.assertEqual(bookmarks.get_currently_syncing(non_empty_state, 'default_value'), - stream_id_1) diff --git a/tests/test_state.py b/tests/test_state.py new file mode 100644 index 0000000..beb7b23 --- /dev/null +++ b/tests/test_state.py @@ -0,0 +1,235 @@ +import unittest +from singer import state as st + +class TestGetBookmark(unittest.TestCase): + def test_empty_state(self): + empty_state = {} + + # Case with no value to fall back on + self.assertIsNone(st.get_bookmark(empty_state, 'some_stream', 'my_key')) + + # Case with a given default + self.assertEqual(st.get_bookmark(empty_state, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + def test_empty_bookmark(self): + empty_bookmark = {'bookmarks':{}} + + # Case with no value to fall back on + self.assertIsNone(st.get_bookmark(empty_bookmark, 'some_stream', 'my_key')) + + # Case with a given default + self.assertEqual(st.get_bookmark(empty_bookmark, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + def test_non_empty_state(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + + non_empty_state = { + 'bookmarks' : { + stream_id_1 : { + bookmark_key_1 : bookmark_val_1 + } + } + } + + # + # Cases with no value to fall back on + # + + # Bad stream, bad key + self.assertIsNone(st.get_bookmark(non_empty_state, 'some_stream', 'my_key')) + + # Good stream, bad key + self.assertIsNone(st.get_bookmark(non_empty_state, stream_id_1, 'my_key')) + + # Good stream, good key + self.assertEqual(st.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1), + bookmark_val_1) + + # + # Cases with a given default + # + + # Bad stream, bad key + self.assertEqual(st.get_bookmark(non_empty_state, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + # Bad stream, good key + self.assertEqual(st.get_bookmark(non_empty_state, 'some_stream', bookmark_key_1, 'default_value'), + 'default_value') + + # Good stream, bad key + self.assertEqual(st.get_bookmark(non_empty_state, stream_id_1, 'my_key', 'default_value'), + 'default_value') + + # Good stream, good key + self.assertEqual(st.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1, 'default_value'), + bookmark_val_1) + + +class TestGetOffset(unittest.TestCase): + def test_empty_state(self): + empty_state = {} + + # Case with no value to fall back on + self.assertIsNone(st.get_offset(empty_state, 'some_stream')) + + # Case with a given default + self.assertEqual(st.get_offset(empty_state, 'some_stream', 'default_value'), + 'default_value') + + def test_empty_bookmark(self): + empty_bookmark = {'bookmarks':{}} + + # Case with no value to fall back on + self.assertIsNone(st.get_offset(empty_bookmark, 'some_stream')) + + # Case with a given default + self.assertEqual(st.get_offset(empty_bookmark, 'some_stream', 'default_value'), + 'default_value') + + def test_non_empty_state(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + offset_val = 'fizzy water' + + non_empty_state = { + 'bookmarks' : { + stream_id_1 : { + bookmark_key_1 : bookmark_val_1, + 'offset' : offset_val + } + } + } + + # + # Cases with no value to fall back on + # + + # Bad stream + self.assertIsNone(st.get_offset(non_empty_state, 'some_stream')) + + # Good stream + self.assertEqual(st.get_offset(non_empty_state, stream_id_1), + offset_val) + + # + # Case with a given default + # + + # Bad stream + self.assertEqual(st.get_offset(non_empty_state, 'some_stream', 'default_value'), + 'default_value') + + # Good stream + self.assertEqual(st.get_offset(non_empty_state, stream_id_1, 'default_value'), + offset_val) + + +class TestGetCurrentlySyncing(unittest.TestCase): + def test_empty_state(self): + empty_state = {} + + # Case with no value to fall back on + self.assertIsNone(st.get_currently_syncing(empty_state)) + + # Case with a given default + self.assertEqual(st.get_currently_syncing(empty_state, 'default_value'), + 'default_value') + + def test_non_empty_state(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + offset_val = 'fizzy water' + + non_empty_state = { + 'bookmarks' : { + stream_id_1 : { + bookmark_key_1 : bookmark_val_1, + 'offset' : offset_val + } + }, + 'currently_syncing' : stream_id_1 + } + + # Case with no value to fall back on + self.assertEqual(st.get_currently_syncing(non_empty_state), + stream_id_1) + + # Case with a given default + self.assertEqual(st.get_currently_syncing(non_empty_state, 'default_value'), + stream_id_1) + + +class TestActivateVersion(unittest.TestCase): + def test_empty_state(self): + empty_state = {} + + # Case with no value to fall back on + self.assertIsNone(st.get_version(empty_state, 'some_stream', 'my_key')) + + # Case with a given default + self.assertEqual(st.get_version(empty_state, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + def test_empty_activate_versions(self): + empty_versions = {'activate_versions':{}} + + # Case with no value to fall back on + self.assertIsNone(st.get_version(empty_versions, 'some_stream', 'my_key')) + + # Case with a given default + self.assertEqual(st.get_version(empty_versions, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + def test_non_empty_state(self): + stream_id_1 = 'customers' + version_key_1 = 'version' + version_val_1 = 123456789 + + non_empty_state = { + 'activate_versions' : { + stream_id_1 : { + version_key_1 : version_val_1 + } + } + } + + # + # Cases with no value to fall back on + # + + # Bad stream, bad key + self.assertIsNone(st.get_version(non_empty_state, 'some_stream', 'my_key')) + + # Good stream, bad key + self.assertIsNone(st.get_version(non_empty_state, stream_id_1, 'my_key')) + + # Good stream, good key + self.assertEqual(st.get_version(non_empty_state, stream_id_1, version_key_1), + version_val_1) + + # + # Cases with a given default + # + + # Bad stream, bad key + self.assertEqual(st.get_version(non_empty_state, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + # Bad stream, good key + self.assertEqual(st.get_version(non_empty_state, 'some_stream', version_key_1, 'default_value'), + 'default_value') + + # Good stream, bad key + self.assertEqual(st.get_version(non_empty_state, stream_id_1, 'my_key', 'default_value'), + 'default_value') + + # Good stream, good key + self.assertEqual(st.get_version(non_empty_state, stream_id_1, version_key_1, 'default_value'), + version_val_1) From ee0cc778d41f6cc8d7684b29ffa1a648976f3a85 Mon Sep 17 00:00:00 2001 From: Ben Allred Date: Mon, 9 Feb 2026 14:45:11 -0700 Subject: [PATCH 09/11] export singer.state functions from main module v5 (#191) * export singer.state functions from main module v5 * - bump version to 5.17.0 and add changelog entry - rename singer.state.write_bookmark to singer.state.set_bookmark - add test cases for set_bookmark, clear_bookmark, set_offset, clear_offset, set_version, and clear_version ----------------------------- Co-authored-by: Ben Allred * disable pylint warning for reimported ----------------------------- Co-authored-by: Ben Allred --------- Co-authored-by: Leslie VanDeMark --- CHANGELOG.md | 5 ++++ Makefile | 2 +- setup.py | 2 +- singer/__init__.py | 9 +++++-- singer/bookmarks.py | 2 +- singer/state.py | 4 ++-- tests/test_state.py | 58 ++++++++++++++++++++++++++++++++++++++++++--- 7 files changed, 72 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb31560..1aac76a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 5.17.0 + * Export state version functions from main module + * Rename singer.state.write_bookmark to singer.state.set_bookmark + * [#191](https://github.com/singer-io/singer-python/pull/191) + ## 5.16.0 * Add `activate_versions` state functions [#189](https://github.com/singer-io/singer-python/pull/189) * Deprecates bookmarks.py, functions are moved to state.py diff --git a/Makefile b/Makefile index 2fe943e..b651170 100644 --- a/Makefile +++ b/Makefile @@ -8,5 +8,5 @@ install: check_prereqs python3 -m pip install -e '.[dev]' test: install - pylint singer --extension-pkg-whitelist=ciso8601 -d missing-docstring,broad-except,bare-except,too-many-return-statements,too-many-branches,too-many-arguments,no-else-return,too-few-public-methods,fixme,protected-access + pylint singer --extension-pkg-whitelist=ciso8601 -d missing-docstring,broad-except,bare-except,too-many-return-statements,too-many-branches,too-many-arguments,no-else-return,too-few-public-methods,fixme,protected-access,reimported nosetests --with-doctest -v diff --git a/setup.py b/setup.py index fa8fb22..82c9d80 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.16.0', + version='5.17.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/__init__.py b/singer/__init__.py index 26f0043..613dc99 100644 --- a/singer/__init__.py +++ b/singer/__init__.py @@ -61,8 +61,10 @@ ) from singer.schema import Schema -from singer.bookmarks import ( - write_bookmark, +from singer.state import ( + set_bookmark, + # for backwards compatibility, use set_bookmark instead + set_bookmark as write_bookmark, get_bookmark, clear_bookmark, reset_stream, @@ -71,6 +73,9 @@ get_offset, set_currently_syncing, get_currently_syncing, + set_version, + clear_version, + get_version, ) from singer.exceptions import ( diff --git a/singer/bookmarks.py b/singer/bookmarks.py index 2cd9636..b506ff2 100644 --- a/singer/bookmarks.py +++ b/singer/bookmarks.py @@ -6,7 +6,7 @@ def ensure_bookmark_path(state, path): return st.ensure_state_path(state, path) def write_bookmark(state, tap_stream_id, key, val): - return st.write_bookmark(state, tap_stream_id, key, val) + return st.set_bookmark(state, tap_stream_id, key, val) def clear_bookmark(state, tap_stream_id, key): return st.clear_bookmark(state, tap_stream_id, key) diff --git a/singer/state.py b/singer/state.py index 772a919..10f0cf9 100644 --- a/singer/state.py +++ b/singer/state.py @@ -7,7 +7,7 @@ def ensure_state_path(state, path): submap = submap[path_component] return state -def write_bookmark(state, tap_stream_id, key, val): +def set_bookmark(state, tap_stream_id, key, val): state = ensure_state_path(state, ['bookmarks', tap_stream_id]) state['bookmarks'][tap_stream_id][key] = val return state @@ -46,7 +46,7 @@ def set_currently_syncing(state, tap_stream_id): def get_currently_syncing(state, default=None): return state.get('currently_syncing', default) -def write_version(state, tap_stream_id, key, val): +def set_version(state, tap_stream_id, key, val): state = ensure_state_path(state, ['activate_versions', tap_stream_id]) state['activate_versions'][tap_stream_id][key] = val return state diff --git a/tests/test_state.py b/tests/test_state.py index beb7b23..72d8512 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -1,7 +1,7 @@ import unittest from singer import state as st -class TestGetBookmark(unittest.TestCase): +class TestBookmark(unittest.TestCase): def test_empty_state(self): empty_state = {} @@ -69,8 +69,24 @@ def test_non_empty_state(self): self.assertEqual(st.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1, 'default_value'), bookmark_val_1) + def test_set_bookmark(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + + result = st.set_bookmark({'bookmarks': {stream_id_1: {bookmark_key_1: 'old-value'}}}, stream_id_1, bookmark_key_1, bookmark_val_1) + self.assertEqual(result, {'bookmarks': {stream_id_1: {bookmark_key_1: bookmark_val_1}}}) -class TestGetOffset(unittest.TestCase): + def test_clear_bookmark(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + + result = st.clear_bookmark({'bookmarks': {stream_id_1: {bookmark_key_1: bookmark_val_1}}}, stream_id_1, bookmark_key_1) + self.assertEqual(result, {'bookmarks': {stream_id_1: {}}}) + + +class TestOffset(unittest.TestCase): def test_empty_state(self): empty_state = {} @@ -129,8 +145,24 @@ def test_non_empty_state(self): self.assertEqual(st.get_offset(non_empty_state, stream_id_1, 'default_value'), offset_val) + def test_set_offset(self): + stream_id_1 = 'customers' + offset_key_1 = 'datetime' + offset_val_1 = 123456789 + + result = st.set_offset({'bookmarks': {stream_id_1: {'offset': {offset_key_1: 'old-value'}}}}, stream_id_1, offset_key_1, offset_val_1) + self.assertEqual(result, {'bookmarks': {stream_id_1: {'offset': {offset_key_1: offset_val_1}}}}) -class TestGetCurrentlySyncing(unittest.TestCase): + def test_clear_offset(self): + stream_id_1 = 'customers' + offset_key_1 = 'datetime' + offset_val_1 = 123456789 + + result = st.clear_offset({'bookmarks': {stream_id_1: {'offset': {offset_key_1: offset_val_1}}}}, stream_id_1) + self.assertEqual(result, {'bookmarks': {stream_id_1: {}}}) + + +class TestCurrentlySyncing(unittest.TestCase): def test_empty_state(self): empty_state = {} @@ -165,6 +197,10 @@ def test_non_empty_state(self): self.assertEqual(st.get_currently_syncing(non_empty_state, 'default_value'), stream_id_1) + def test_set_currently_syncing(self): + result = st.set_currently_syncing({'currently_syncing': 'foo'}, 'bar') + self.assertEqual(result, {'currently_syncing': 'bar'}) + class TestActivateVersion(unittest.TestCase): def test_empty_state(self): @@ -233,3 +269,19 @@ def test_non_empty_state(self): # Good stream, good key self.assertEqual(st.get_version(non_empty_state, stream_id_1, version_key_1, 'default_value'), version_val_1) + + def test_set_version(self): + stream_id_1 = 'customers' + version_key_1 = 'datetime' + version_val_1 = 123456789 + + result = st.set_version({'activate_versions': {stream_id_1: {version_key_1: 'old-value'}}}, stream_id_1, version_key_1, version_val_1) + self.assertEqual(result, {'activate_versions': {stream_id_1: {version_key_1: version_val_1}}}) + + def test_clear_version(self): + stream_id_1 = 'customers' + version_key_1 = 'datetime' + version_val_1 = 123456789 + + result = st.clear_version({'activate_versions': {stream_id_1: {version_key_1: version_val_1}}}, stream_id_1, version_key_1) + self.assertEqual(result, {'activate_versions': {stream_id_1: {}}}) From 01227d8165dd1fdeadd52de24512526426564ccd Mon Sep 17 00:00:00 2001 From: Ben Allred Date: Tue, 10 Feb 2026 08:51:50 -0700 Subject: [PATCH 10/11] SAC-30196: remove key from version functions v5 (#193) * update set_version, clear_version, and get_version state functions ----------------------------- Co-authored-by: Ben Allred * bump to version 5.18.0 and update changelog ----------------------------- Co-authored-by: Ben Allred --------- Co-authored-by: Leslie VanDeMark --- CHANGELOG.md | 3 +++ setup.py | 2 +- singer/state.py | 12 +++++------ tests/test_state.py | 50 +++++++++++++++------------------------------ 4 files changed, 27 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1aac76a..163f1b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 5.18.0 + * Remove `key` from version state functions [#193](https://github.com/singer-io/singer-python/pull/193) + ## 5.17.0 * Export state version functions from main module * Rename singer.state.write_bookmark to singer.state.set_bookmark diff --git a/setup.py b/setup.py index 82c9d80..21441c1 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.17.0', + version='5.18.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/state.py b/singer/state.py index 10f0cf9..a6b24fc 100644 --- a/singer/state.py +++ b/singer/state.py @@ -46,15 +46,15 @@ def set_currently_syncing(state, tap_stream_id): def get_currently_syncing(state, default=None): return state.get('currently_syncing', default) -def set_version(state, tap_stream_id, key, val): +def set_version(state, tap_stream_id, val): state = ensure_state_path(state, ['activate_versions', tap_stream_id]) - state['activate_versions'][tap_stream_id][key] = val + state['activate_versions'][tap_stream_id] = val return state -def clear_version(state, tap_stream_id, key): +def clear_version(state, tap_stream_id): state = ensure_state_path(state, ['activate_versions', tap_stream_id]) - state['activate_versions'][tap_stream_id].pop(key, None) + state['activate_versions'].pop(tap_stream_id, None) return state -def get_version(state, tap_stream_id, key, default=None): - return state.get('activate_versions', {}).get(tap_stream_id, {}).get(key, default) +def get_version(state, tap_stream_id, default=None): + return state.get('activate_versions', {}).get(tap_stream_id, default) diff --git a/tests/test_state.py b/tests/test_state.py index 72d8512..eac75e1 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -207,32 +207,29 @@ def test_empty_state(self): empty_state = {} # Case with no value to fall back on - self.assertIsNone(st.get_version(empty_state, 'some_stream', 'my_key')) + self.assertIsNone(st.get_version(empty_state, 'some_stream')) # Case with a given default - self.assertEqual(st.get_version(empty_state, 'some_stream', 'my_key', 'default_value'), + self.assertEqual(st.get_version(empty_state, 'some_stream', 'default_value'), 'default_value') def test_empty_activate_versions(self): empty_versions = {'activate_versions':{}} # Case with no value to fall back on - self.assertIsNone(st.get_version(empty_versions, 'some_stream', 'my_key')) + self.assertIsNone(st.get_version(empty_versions, 'some_stream')) # Case with a given default - self.assertEqual(st.get_version(empty_versions, 'some_stream', 'my_key', 'default_value'), + self.assertEqual(st.get_version(empty_versions, 'some_stream', 'default_value'), 'default_value') def test_non_empty_state(self): stream_id_1 = 'customers' - version_key_1 = 'version' version_val_1 = 123456789 non_empty_state = { 'activate_versions' : { - stream_id_1 : { - version_key_1 : version_val_1 - } + stream_id_1 : version_val_1 } } @@ -240,48 +237,35 @@ def test_non_empty_state(self): # Cases with no value to fall back on # - # Bad stream, bad key - self.assertIsNone(st.get_version(non_empty_state, 'some_stream', 'my_key')) - - # Good stream, bad key - self.assertIsNone(st.get_version(non_empty_state, stream_id_1, 'my_key')) + # Bad stream + self.assertIsNone(st.get_version(non_empty_state, 'some_stream')) - # Good stream, good key - self.assertEqual(st.get_version(non_empty_state, stream_id_1, version_key_1), + # Good stream + self.assertEqual(st.get_version(non_empty_state, stream_id_1), version_val_1) # # Cases with a given default # - # Bad stream, bad key - self.assertEqual(st.get_version(non_empty_state, 'some_stream', 'my_key', 'default_value'), - 'default_value') - - # Bad stream, good key - self.assertEqual(st.get_version(non_empty_state, 'some_stream', version_key_1, 'default_value'), - 'default_value') - - # Good stream, bad key - self.assertEqual(st.get_version(non_empty_state, stream_id_1, 'my_key', 'default_value'), + # Bad stream + self.assertEqual(st.get_version(non_empty_state, 'some_stream', 'default_value'), 'default_value') - # Good stream, good key - self.assertEqual(st.get_version(non_empty_state, stream_id_1, version_key_1, 'default_value'), + # Good stream + self.assertEqual(st.get_version(non_empty_state, stream_id_1, 'default_value'), version_val_1) def test_set_version(self): stream_id_1 = 'customers' - version_key_1 = 'datetime' version_val_1 = 123456789 - result = st.set_version({'activate_versions': {stream_id_1: {version_key_1: 'old-value'}}}, stream_id_1, version_key_1, version_val_1) - self.assertEqual(result, {'activate_versions': {stream_id_1: {version_key_1: version_val_1}}}) + result = st.set_version({'activate_versions': {stream_id_1: 'old-value'}}, stream_id_1, version_val_1) + self.assertEqual(result, {'activate_versions': {stream_id_1: version_val_1}}) def test_clear_version(self): stream_id_1 = 'customers' - version_key_1 = 'datetime' version_val_1 = 123456789 - result = st.clear_version({'activate_versions': {stream_id_1: {version_key_1: version_val_1}}}, stream_id_1, version_key_1) - self.assertEqual(result, {'activate_versions': {stream_id_1: {}}}) + result = st.clear_version({'activate_versions': {stream_id_1: version_val_1}}, stream_id_1) + self.assertEqual(result, {'activate_versions': {}}) From 12ee23ea20f3df1d0cf31aab9e997ed57444e022 Mon Sep 17 00:00:00 2001 From: Dylan Sprayberry <28106103+dsprayberry@users.noreply.github.com> Date: Fri, 27 Feb 2026 12:10:42 -0500 Subject: [PATCH 11/11] Rename `activate_versions` key in state to `versions` (#195) --- CHANGELOG.md | 3 +++ setup.py | 2 +- singer/state.py | 16 ++++++++-------- tests/test_state.py | 14 +++++++------- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 163f1b2..2b42de8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 5.19.0 + * Rename state key `activate_versions` to `versions` in all relevant locations [#195](https://github.com/singer-io/singer-python/pull/195) + ## 5.18.0 * Remove `key` from version state functions [#193](https://github.com/singer-io/singer-python/pull/193) diff --git a/setup.py b/setup.py index 21441c1..7cd9eba 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.18.0', + version='5.19.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/state.py b/singer/state.py index a6b24fc..f7d4357 100644 --- a/singer/state.py +++ b/singer/state.py @@ -20,9 +20,9 @@ def clear_bookmark(state, tap_stream_id, key): def reset_stream(state, tap_stream_id): state = ensure_state_path(state, ['bookmarks', tap_stream_id]) state['bookmarks'][tap_stream_id] = {} - if 'activate_versions' in state: - state = ensure_state_path(state, ['activate_versions', tap_stream_id]) - state['activate_versions'][tap_stream_id] = {} + if 'versions' in state: + state = ensure_state_path(state, ['versions', tap_stream_id]) + state['versions'][tap_stream_id] = {} return state def get_bookmark(state, tap_stream_id, key, default=None): @@ -47,14 +47,14 @@ def get_currently_syncing(state, default=None): return state.get('currently_syncing', default) def set_version(state, tap_stream_id, val): - state = ensure_state_path(state, ['activate_versions', tap_stream_id]) - state['activate_versions'][tap_stream_id] = val + state = ensure_state_path(state, ['versions', tap_stream_id]) + state['versions'][tap_stream_id] = val return state def clear_version(state, tap_stream_id): - state = ensure_state_path(state, ['activate_versions', tap_stream_id]) - state['activate_versions'].pop(tap_stream_id, None) + state = ensure_state_path(state, ['versions', tap_stream_id]) + state['versions'].pop(tap_stream_id, None) return state def get_version(state, tap_stream_id, default=None): - return state.get('activate_versions', {}).get(tap_stream_id, default) + return state.get('versions', {}).get(tap_stream_id, default) diff --git a/tests/test_state.py b/tests/test_state.py index eac75e1..10a9c14 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -213,8 +213,8 @@ def test_empty_state(self): self.assertEqual(st.get_version(empty_state, 'some_stream', 'default_value'), 'default_value') - def test_empty_activate_versions(self): - empty_versions = {'activate_versions':{}} + def test_empty_versions(self): + empty_versions = {'versions':{}} # Case with no value to fall back on self.assertIsNone(st.get_version(empty_versions, 'some_stream')) @@ -228,7 +228,7 @@ def test_non_empty_state(self): version_val_1 = 123456789 non_empty_state = { - 'activate_versions' : { + 'versions' : { stream_id_1 : version_val_1 } } @@ -260,12 +260,12 @@ def test_set_version(self): stream_id_1 = 'customers' version_val_1 = 123456789 - result = st.set_version({'activate_versions': {stream_id_1: 'old-value'}}, stream_id_1, version_val_1) - self.assertEqual(result, {'activate_versions': {stream_id_1: version_val_1}}) + result = st.set_version({'versions': {stream_id_1: 'old-value'}}, stream_id_1, version_val_1) + self.assertEqual(result, {'versions': {stream_id_1: version_val_1}}) def test_clear_version(self): stream_id_1 = 'customers' version_val_1 = 123456789 - result = st.clear_version({'activate_versions': {stream_id_1: version_val_1}}, stream_id_1) - self.assertEqual(result, {'activate_versions': {}}) + result = st.clear_version({'versions': {stream_id_1: version_val_1}}, stream_id_1) + self.assertEqual(result, {'versions': {}})