From 24ff5dd5ff5d8781ec55a626754469311dd3ddca Mon Sep 17 00:00:00 2001 From: Andrew Madonna Date: Mon, 12 Nov 2018 18:45:55 -0500 Subject: [PATCH 01/45] Retain arugment paths --- singer/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/singer/utils.py b/singer/utils.py index 4cd179d..85f3d39 100644 --- a/singer/utils.py +++ b/singer/utils.py @@ -165,14 +165,18 @@ def parse_args(required_config_keys): args = parser.parse_args() if args.config: + setattr(args, 'config_path', args.config) args.config = load_json(args.config) if args.state: + setattr(args, 'state_path', args.state) args.state = load_json(args.state) else: args.state = {} if args.properties: + setattr(args, 'properties_path', args.properties) args.properties = load_json(args.properties) if args.catalog: + setattr(args, 'catalog_path', args.catalog) args.catalog = Catalog.load(args.catalog) check_config(args.config, required_config_keys) From 4f507e8a9f0a09cdada983a4f275f6e0f4d1b63d Mon Sep 17 00:00:00 2001 From: nick-mccoy Date: Wed, 5 Jun 2019 20:36:45 +0000 Subject: [PATCH 02/45] bump version to 5.6.1 --- CHANGELOG.md | 3 +++ setup.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1e5e4c..0729b7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 5.6.1 + * Retain argument paths in `parse_args` [#88](https://github.com/singer-io/singer-python/pull/88) + ## 5.5.0 * Add the ability to specify a default value when getting a bookmark [#95](https://github.com/singer-io/singer-python/pull/95) diff --git a/setup.py b/setup.py index 9be4c05..3980c5f 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.6.0', + version='5.6.1', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], From 008fa3867062adf7f9e3c3375fa151a0d6f5ae7d Mon Sep 17 00:00:00 2001 From: Kyle Allan Date: Wed, 19 Jun 2019 11:09:04 -0400 Subject: [PATCH 03/45] Added functions for get_selected_streams (#100) * Added functions for get_selected_streams * refactor functions onto class; pylint --- singer/__init__.py | 5 ++++- singer/catalog.py | 37 ++++++++++++++++++++++++++++++++++--- tests/test_catalog.py | 36 +++++++++++++++++++++++++++++++++++- 3 files changed, 73 insertions(+), 5 deletions(-) diff --git a/singer/__init__.py b/singer/__init__.py index 2ad7333..d3b2c87 100644 --- a/singer/__init__.py +++ b/singer/__init__.py @@ -55,7 +55,10 @@ resolve_schema_references ) -from singer.catalog import Catalog +from singer.catalog import ( + Catalog, + CatalogEntry +) from singer.schema import Schema from singer.bookmarks import ( diff --git a/singer/catalog.py b/singer/catalog.py index 8d52262..6574bbe 100644 --- a/singer/catalog.py +++ b/singer/catalog.py @@ -1,9 +1,14 @@ '''Provides an object model for a Singer Catalog.''' - import json import sys -from singer.schema import Schema +from . import metadata as metadata_module +from .bookmarks import get_currently_syncing +from .logger import get_logger +from .schema import Schema + +LOGGER = get_logger() + # pylint: disable=too-many-instance-attributes class CatalogEntry(): @@ -33,7 +38,9 @@ def __eq__(self, other): return self.__dict__ == other.__dict__ def is_selected(self): - return self.schema.selected # pylint: disable=no-member + mdata = metadata_module.to_map(self.metadata) + # pylint: disable=no-member + return self.schema.selected or metadata_module.get(mdata, (), 'selected') def to_dict(self): result = {} @@ -116,3 +123,27 @@ def get_stream(self, tap_stream_id): if stream.tap_stream_id == tap_stream_id: return stream return None + + def _shuffle_streams(self, state): + currently_syncing = get_currently_syncing(state) + + if currently_syncing is None: + return self.streams + + matching_index = 0 + for i, catalog_entry in enumerate(self.streams): + if catalog_entry.tap_stream_id == currently_syncing: + matching_index = i + break + top_half = self.streams[matching_index:] + bottom_half = self.streams[:matching_index] + return top_half + bottom_half + + + def get_selected_streams(self, state): + for stream in self._shuffle_streams(state): + if not stream.is_selected(): + LOGGER.info('Skipping stream: %s', stream.tap_stream_id) + continue + + yield stream diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 4cb9390..0420f6a 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -3,6 +3,40 @@ from singer.schema import Schema from singer.catalog import Catalog, CatalogEntry +class TestGetSelectedStreams(unittest.TestCase): + def test_one_selected_stream(self): + selected_entry = CatalogEntry(tap_stream_id='a', + schema=Schema(), + metadata=[{'metadata': + {'selected': True}, + 'breadcrumb': []}]) + catalog = Catalog( + [selected_entry, + CatalogEntry(tap_stream_id='b',schema=Schema(),metadata=[]), + CatalogEntry(tap_stream_id='c',schema=Schema(),metadata=[])]) + state = {} + selected_streams = catalog.get_selected_streams(state) + self.assertEquals([e for e in selected_streams],[selected_entry]) + + def test_resumes_currently_syncing_stream(self): + selected_entry_a = CatalogEntry(tap_stream_id='a', + schema=Schema(), + metadata=[{'metadata': + {'selected': True}, + 'breadcrumb': []}]) + selected_entry_c = CatalogEntry(tap_stream_id='c', + schema=Schema(), + metadata=[{'metadata': + {'selected': True}, + 'breadcrumb': []}]) + catalog = Catalog( + [selected_entry_a, + CatalogEntry(tap_stream_id='b',schema=Schema(),metadata=[]), + selected_entry_c]) + state = {'currently_syncing': 'c'} + selected_streams = catalog.get_selected_streams(state) + self.assertEquals([e for e in selected_streams][0],selected_entry_c) + class TestToDictAndFromDict(unittest.TestCase): dict_form = { @@ -89,7 +123,7 @@ def test_from_dict(self): def test_to_dict(self): self.assertEqual(self.dict_form, self.obj_form.to_dict()) - + class TestGetStream(unittest.TestCase): def test(self): From b4b4cc25adb831dae4ec5219a4976de3fc318f77 Mon Sep 17 00:00:00 2001 From: Kyle Allan Date: Fri, 21 Jun 2019 08:49:43 -0400 Subject: [PATCH 04/45] add functions to write_catalog (#101) --- singer/catalog.py | 7 +++++++ tests/test_catalog.py | 11 ++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/singer/catalog.py b/singer/catalog.py index 6574bbe..1767ff1 100644 --- a/singer/catalog.py +++ b/singer/catalog.py @@ -10,6 +10,13 @@ LOGGER = get_logger() +def write_catalog(catalog): + # If the catalog has no streams, log a warning + if not catalog.streams: + LOGGER.warning("Catalog being written with no streams.") + + json.dump(catalog.to_dict(), sys.stdout, indent=2) + # pylint: disable=too-many-instance-attributes class CatalogEntry(): diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 0420f6a..cd6dc50 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -1,7 +1,16 @@ import unittest from singer.schema import Schema -from singer.catalog import Catalog, CatalogEntry +from singer.catalog import Catalog, CatalogEntry, write_catalog + +class TestWriteCatalog(unittest.TestCase): + def test_write_empty_catalog(self): + catalog = Catalog([]) + write_catalog(catalog) + + def test_write_catalog_with_streams(self): + catalog = Catalog([CatalogEntry(tap_stream_id='a',schema=Schema(),metadata=[])]) + write_catalog(catalog) class TestGetSelectedStreams(unittest.TestCase): def test_one_selected_stream(self): From 23397ae5a916f65ca85c03d8d8fa9d1f40c90ed4 Mon Sep 17 00:00:00 2001 From: Paul Santa Clara Date: Fri, 28 Jun 2019 13:11:44 -0400 Subject: [PATCH 05/45] bump version of backoff so that it can work with aiohttp --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3980c5f..970449a 100755 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ 'jsonschema==2.6.0', 'simplejson==3.11.1', 'python-dateutil>=2.6.0', - 'backoff==1.3.2', + 'backoff==1.8.0', ], extras_require={ 'dev': [ From a08c2a25bfd1ee3767de36a09f52cd8d0697d7a2 Mon Sep 17 00:00:00 2001 From: Paul Santa Clara Date: Mon, 1 Jul 2019 10:35:08 -0400 Subject: [PATCH 06/45] bumping version --- CHANGELOG.md | 3 +++ setup.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0729b7e..606bd77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,3 +31,6 @@ ## 5.0.5 * Sets the default format for dates to use %04Y so dates < 1000 are formatted with leading zeroes [#65](https://github.com/singer-io/singer-python/pull/65) + +## 5.7.0 + * Bumping backoff dependency to 1.8.0 for aiohttp support diff --git a/setup.py b/setup.py index 970449a..4284678 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.6.1', + version='5.7.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], From 6118a614ec1cfa931d789a5b56756478e7d5371b Mon Sep 17 00:00:00 2001 From: Dan Mosora <30501696+dmosorast@users.noreply.github.com> Date: Mon, 1 Jul 2019 11:43:36 -0400 Subject: [PATCH 07/45] Update CHANGELOG.md --- CHANGELOG.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 606bd77..6831843 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 5.7.0 + * Bumping backoff dependency to 1.8.0 for aiohttp support + * Added `get_selected_streams` to the `Catalog` class that orders streams returned with `currently_syncing` from state (if present) at the front of the list. [#100](https://github.com/singer-io/singer-python/pull/100) + * Added helper called `write_catalog` for use in discovery mode [#101](https://github.com/singer-io/singer-python/pull/101) + ## 5.6.1 * Retain argument paths in `parse_args` [#88](https://github.com/singer-io/singer-python/pull/88) @@ -31,6 +36,3 @@ ## 5.0.5 * Sets the default format for dates to use %04Y so dates < 1000 are formatted with leading zeroes [#65](https://github.com/singer-io/singer-python/pull/65) - -## 5.7.0 - * Bumping backoff dependency to 1.8.0 for aiohttp support From ddab7337c5c9855ee7d81d75afbf4f3e72565f47 Mon Sep 17 00:00:00 2001 From: Paul Santa Clara Date: Wed, 24 Jul 2019 11:41:45 -0400 Subject: [PATCH 08/45] parse time_extracted with faster ciso8601 lib. parse line json loads numbers as decimals --- setup.py | 1 + singer/messages.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 4284678..cb8cf2a 100755 --- a/setup.py +++ b/setup.py @@ -15,6 +15,7 @@ 'simplejson==3.11.1', 'python-dateutil>=2.6.0', 'backoff==1.8.0', + 'ciso8601', ], extras_require={ 'dev': [ diff --git a/singer/messages.py b/singer/messages.py index 0602cf5..6de1df9 100644 --- a/singer/messages.py +++ b/singer/messages.py @@ -5,6 +5,9 @@ import simplejson as json import singer.utils as u +import ciso8601 +from .logger import get_logger +LOGGER = get_logger() class Message(): '''Base class for messages.''' @@ -180,13 +183,20 @@ def parse_message(msg): # lossy conversions. However, this will affect # very few data points and we have chosen to # leave conversion as is for now. - obj = json.loads(msg) + obj = json.loads(msg, use_decimal=True) msg_type = _required_key(obj, 'type') if msg_type == 'RECORD': time_extracted = obj.get('time_extracted') if time_extracted: - time_extracted = dateutil.parser.parse(time_extracted) + try: + time_extracted = ciso8601.parse_datetime(time_extracted) + except: + LOGGER.warning("unable to parse time_extracted with ciso8601 library") + time_extracted = None + + + # time_extracted = dateutil.parser.parse(time_extracted) return RecordMessage(stream=_required_key(obj, 'stream'), record=_required_key(obj, 'record'), version=obj.get('version'), From aa9190bf7503d6ec2e5dbe3f22138caa98c0465c Mon Sep 17 00:00:00 2001 From: Paul Santa Clara Date: Wed, 24 Jul 2019 13:15:23 -0400 Subject: [PATCH 09/45] adding tests --- tests/test_singer.py | 60 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/tests/test_singer.py b/tests/test_singer.py index 87c7cf3..4fb74de 100644 --- a/tests/test_singer.py +++ b/tests/test_singer.py @@ -2,6 +2,8 @@ import unittest import datetime import dateutil +from decimal import Decimal + class TestSinger(unittest.TestCase): def test_parse_message_record_good(self): @@ -89,7 +91,6 @@ def test_parse_message_state_missing_value(self): singer.parse_message('{"type": "STATE"}') def test_round_trip(self): - record_message = singer.RecordMessage( record={'name': 'foo'}, stream='users') @@ -102,7 +103,7 @@ def test_round_trip(self): 'name': {'type': 'string'}}}) state_message = singer.StateMessage(value={'seq': 1}) - + self.assertEqual(record_message, singer.parse_message(singer.format_message(record_message))) self.assertEqual(schema_message, @@ -124,5 +125,60 @@ def test_write_schema(self): def test_write_state(self): singer.write_state({"foo": 1}) +class TestParsingNumbers(unittest.TestCase): + def create_record(self, value): + raw = '{"type": "RECORD", "stream": "test", "record": {"value": ' + value + '}}' + parsed = singer.parse_message(raw) + return parsed.record['value'] + + def test_parse_int_zero(self): + value = self.create_record('0') + self.assertEqual(type(value), int) + self.assertEqual(value, 0) + + def test_parse_regular_decimal(self): + value = self.create_record('3.14') + self.assertEqual(Decimal('3.14'), value) + + def test_parse_large_decimal(self): + value = self.create_record('9999999999999999.9999') + self.assertEqual(Decimal('9999999999999999.9999'), value) + + def test_parse_small_decimal(self): + value = self.create_record('-9999999999999999.9999') + self.assertEqual(Decimal('-9999999999999999.9999'), value) + + def test_parse_absurdly_large_decimal(self): + value_str = '9' * 1024 + '.' + '9' * 1024 + value = self.create_record(value_str) + self.assertEqual(Decimal(value_str), value) + + def test_parse_absurdly_large_int(self): + value_str = '9' * 1024 + value = self.create_record(value_str) + self.assertEqual(int(value_str), value) + self.assertEqual(int, type(value)) + + def test_parse_bulk_decs(self): + value_strs = [ + '-9999999999999999.9999999999999999999999', + '0', + '9999999999999999.9999999999999999999999', + '-7187498962233394.3739812942138415666763', + '9273972760690975.2044306442955715221042', + '29515565286974.1188802122612813004366', + '9176089101347578.2596296292040288441238', + '-8416853039392703.306423225471199148379', + '1285266411314091.3002668125515694162268', + '6051872750342125.3812886238958681227336', + '-1132031605459408.5571559429308939781468', + '-6387836755056303.0038029604189860431045', + '4526059300505414' + ] + for value_str in value_strs: + value = self.create_record(value_str) + self.assertEqual(Decimal(value_str), value) + + if __name__ == '__main__': unittest.main() From c3a05e8998f9cc869618cb96162c2c54707357c5 Mon Sep 17 00:00:00 2001 From: Paul Santa Clara Date: Wed, 24 Jul 2019 13:17:08 -0400 Subject: [PATCH 10/45] linting --- singer/messages.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/singer/messages.py b/singer/messages.py index 6de1df9..3848801 100644 --- a/singer/messages.py +++ b/singer/messages.py @@ -1,11 +1,10 @@ import sys -import dateutil.parser import pytz import simplejson as json +import ciso8601 import singer.utils as u -import ciso8601 from .logger import get_logger LOGGER = get_logger() @@ -183,7 +182,7 @@ def parse_message(msg): # lossy conversions. However, this will affect # very few data points and we have chosen to # leave conversion as is for now. - obj = json.loads(msg, use_decimal=True) + obj = json.loads(msg, use_decimal=True) msg_type = _required_key(obj, 'type') if msg_type == 'RECORD': From 9b99c6e0efc18836e6a07f1092aed8ba253f403f Mon Sep 17 00:00:00 2001 From: Paul Santa Clara Date: Wed, 24 Jul 2019 13:19:28 -0400 Subject: [PATCH 11/45] bumping minor version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cb8cf2a..ac2dd71 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.7.0', + version='5.8.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], From 0720f5cf068476e64d93a71287bc0ff3f3eeadb3 Mon Sep 17 00:00:00 2001 From: Dan Mosora Date: Wed, 28 Aug 2019 15:19:08 +0000 Subject: [PATCH 12/45] Allow empty key_properties and replication keys in standard metadata --- singer/metadata.py | 4 ++-- tests/test_metadata.py | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/singer/metadata.py b/singer/metadata.py index dd2b85c..41153ea 100644 --- a/singer/metadata.py +++ b/singer/metadata.py @@ -26,11 +26,11 @@ def get_standard_metadata(schema=None, schema_name=None, key_properties=None, valid_replication_keys=None, replication_method=None): mdata = {} - if key_properties: + if key_properties is not None: mdata = write(mdata, (), 'table-key-properties', key_properties) if replication_method: mdata = write(mdata, (), 'forced-replication-method', replication_method) - if valid_replication_keys: + if valid_replication_keys is not None: mdata = write(mdata, (), 'valid-replication-keys', valid_replication_keys) if schema: mdata = write(mdata, (), 'inclusion', 'available') diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 8fa271d..fd97ef2 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -325,3 +325,11 @@ def test_standard_metadata(self): for obj in expected_metadata: if obj in test_value: self.assertIn(obj, test_value) + + def test_empty_key_properties_are_written(self): + mdata = get_standard_metadata(key_properties=[]) + self.assertEqual(mdata, [{'breadcrumb': (), 'metadata': {'table-key-properties': []}}]) + + def test_empty_valid_replication_keys_are_written(self): + mdata = get_standard_metadata(valid_replication_keys=[]) + self.assertEqual(mdata, [{'breadcrumb': (), 'metadata': {'valid-replication-keys': []}}]) From 5a0dc35aca8acc23f44871c8f0dd51934f390b2c Mon Sep 17 00:00:00 2001 From: Dan Mosora Date: Wed, 28 Aug 2019 17:22:27 +0000 Subject: [PATCH 13/45] Version 5.8.1 and changelog --- CHANGELOG.md | 3 +++ setup.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6831843..52fe8b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 5.8.1 + * Allow empty lists for `key-properties` and `valid-replication-keys` in `get_standard_metadata` [#106](https://github.com/singer-io/singer-python/pull/106) + ## 5.7.0 * Bumping backoff dependency to 1.8.0 for aiohttp support * Added `get_selected_streams` to the `Catalog` class that orders streams returned with `currently_syncing` from state (if present) at the front of the list. [#100](https://github.com/singer-io/singer-python/pull/100) diff --git a/setup.py b/setup.py index ac2dd71..5a384fe 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.8.0', + version='5.8.1', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], From fbde60aaa3ee19411354b03e3dc78d4d92ec479b Mon Sep 17 00:00:00 2001 From: Chris Merrick Date: Thu, 24 Oct 2019 18:07:49 -0400 Subject: [PATCH 14/45] Add PR template --- .github/pull_request_template.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/pull_request_template.md diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..6e46b00 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,11 @@ +# Description of change +(write a short description or paste a link to JIRA) + +# Manual QA steps + - + +# Risks + - + +# Rollback steps + - revert this branch From bf8faa1d2ca10656f5ac824915ca31e843ed33b7 Mon Sep 17 00:00:00 2001 From: cosimon Date: Mon, 4 Nov 2019 16:33:11 -0500 Subject: [PATCH 15/45] Updates transform logging to only output the raw data in DEBUG (#111) * Updates tranform logging to only output the raw data in DEBUG * Fix pylint warning * Change how we make the message * Fix another pylint warning --- singer/statediff.py | 4 ++-- singer/transform.py | 19 ++++++++++++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/singer/statediff.py b/singer/statediff.py index 3ebddcb..bc21fd5 100644 --- a/singer/statediff.py +++ b/singer/statediff.py @@ -46,8 +46,8 @@ def diff(oldstate, newstate): # Convert oldstate and newstate from a deeply nested dict into a # single-level dict, mapping a path to a value. - olddict = {k: v for (k, v) in paths(oldstate)} - newdict = {k: v for (k, v) in paths(newstate)} + olddict = dict(paths(oldstate)) + newdict = dict(paths(newstate)) # Build the list of all paths in both oldstate and newstate to iterate # over. diff --git a/singer/transform.py b/singer/transform.py index d290386..c165e38 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -1,4 +1,5 @@ import datetime +import logging import re from jsonschema import RefResolver @@ -55,19 +56,27 @@ class SchemaKey: any_of = 'anyOf' class Error: - def __init__(self, path, data, schema=None): + def __init__(self, path, data, schema=None, logging_level=logging.INFO): self.path = path self.data = data self.schema = schema + self.logging_level = logging_level def tostr(self): path = ".".join(map(str, self.path)) if self.schema: - msg = "does not match {}".format(self.schema) + if self.logging_level >= logging.INFO: + msg = "data does not match {}".format(self.schema) + else: + msg = "does not match {}".format(self.schema) else: msg = "not in schema" - return "{}: {} {}".format(path, self.data, msg) + if self.logging_level >= logging.INFO: + output = "{}: {}".format(path, msg) + else: + output = "{}: {} {}".format(path, self.data, msg) + return output class Transformer: @@ -154,7 +163,7 @@ def transform_recur(self, data, schema, path): return success, transformed_data else: # pylint: disable=useless-else-on-loop # exhaused all types and didn't return, so we failed :-( - self.errors.append(Error(path, data, schema)) + self.errors.append(Error(path, data, schema, logging_level=LOGGER.level)) return False, None def _transform_anyof(self, data, schema, path): @@ -165,7 +174,7 @@ def _transform_anyof(self, data, schema, path): return success, transformed_data else: # pylint: disable=useless-else-on-loop # exhaused all schemas and didn't return, so we failed :-( - self.errors.append(Error(path, data, schema)) + self.errors.append(Error(path, data, schema, logging_level=LOGGER.level)) return False, None def _transform_object(self, data, schema, path, pattern_properties): From 6c6c773d8b6dc6223551e598574eb0df41f0c415 Mon Sep 17 00:00:00 2001 From: cosimon Date: Mon, 4 Nov 2019 16:36:37 -0500 Subject: [PATCH 16/45] Bump to v5.9.0 (#112) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5a384fe..8df52bb 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.8.1', + version='5.9.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], From 9953adbd45441da61f9f4ab42734c0c7b9e8c914 Mon Sep 17 00:00:00 2001 From: Jude188 <17158624+Jude188@users.noreply.github.com> Date: Tue, 27 Oct 2020 14:15:17 +0000 Subject: [PATCH 17/45] Filter nested properties based on metadata (#130) * Support filtering of nested fields Update filter_data_by_metadata function to allow filtering of nested fields - e.g. if property `address` has selected set to True, but property `address.street` has selected set to False, only the street would be excluded. Processes data recursively. * Update transform.py make formatting a little clearer * Update transform.py Fix array type breadcrumb name * Update transform.py breadcrumb path documentation * Update transform.py change based on tests - must remove field from data object, not just set value to None. * Update transform.py line lenght :) * Add tests for filtering nested fields * Make pylint happy * Simplify one line Co-authored-by: Chris Goddard --- Makefile | 2 +- singer/transform.py | 36 ++++++++++++++++++++++------------ tests/test_transform.py | 43 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 296c20f..2fe943e 100644 --- a/Makefile +++ b/Makefile @@ -8,5 +8,5 @@ install: check_prereqs python3 -m pip install -e '.[dev]' test: install - pylint singer -d missing-docstring,broad-except,bare-except,too-many-return-statements,too-many-branches,too-many-arguments,no-else-return,too-few-public-methods,fixme,protected-access + pylint singer --extension-pkg-whitelist=ciso8601 -d missing-docstring,broad-except,bare-except,too-many-return-statements,too-many-branches,too-many-arguments,no-else-return,too-few-public-methods,fixme,protected-access nosetests --with-doctest -v diff --git a/singer/transform.py b/singer/transform.py index c165e38..f0b8556 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -36,6 +36,16 @@ def unix_seconds_to_datetime(value): return strftime(datetime.datetime.fromtimestamp(int(value), datetime.timezone.utc)) +def breadcrumb_path(breadcrumb): + """ + Transform breadcrumb into familiar object dot-notation + """ + name = ".".join(breadcrumb) + name = name.replace('properties.', '') + name = name.replace('.items', '[]') + return name + + class SchemaMismatch(Exception): def __init__(self, errors): if not errors: @@ -46,7 +56,7 @@ def __init__(self, errors): msg = "Errors during transform\n\t{}".format("\n\t".join(estrs)) msg += "\n\n\nErrors during transform: [{}]".format(", ".join(estrs)) - super(SchemaMismatch, self).__init__(msg) + super().__init__(msg) class SchemaKey: ref = "$ref" @@ -110,25 +120,27 @@ def __enter__(self): def __exit__(self, *args): self.log_warning() - def filter_data_by_metadata(self, data, metadata): + def filter_data_by_metadata(self, data, metadata, parent=()): if isinstance(data, dict) and metadata: for field_name in list(data.keys()): - selected = singer.metadata.get(metadata, ('properties', field_name), 'selected') - inclusion = singer.metadata.get(metadata, ('properties', field_name), 'inclusion') + breadcrumb = parent + ('properties', field_name) + selected = singer.metadata.get(metadata, breadcrumb, 'selected') + inclusion = singer.metadata.get(metadata, breadcrumb, 'inclusion') if inclusion == 'automatic': continue - if selected is False: + if (selected is False) or (inclusion == 'unsupported'): data.pop(field_name, None) # Track that a field was filtered because the customer - # didn't select it. - self.filtered.add(field_name) + # didn't select it or the tap declared it as unsupported. + self.filtered.add(breadcrumb_path(breadcrumb)) + else: + data[field_name] = self.filter_data_by_metadata( + data[field_name], metadata, breadcrumb) - if inclusion == 'unsupported': - data.pop(field_name, None) - # Track that the field was filtered because the tap - # declared it as unsupported. - self.filtered.add(field_name) + if isinstance(data, list) and metadata: + breadcrumb = parent + ('items',) + data = [self.filter_data_by_metadata(d, metadata, breadcrumb) for d in data] return data diff --git a/tests/test_transform.py b/tests/test_transform.py index 3ba57fa..c6861ef 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -295,6 +295,49 @@ def test_drops_fields_which_are_unsupported(self): dict_value = {"name": "chicken"} self.assertEqual({}, transform(dict_value, schema, NO_INTEGER_DATETIME_PARSING, metadata=metadata)) + def test_drops_nested_object_fields_which_are_unselected(self): + schema = {"type": "object", + "properties": {"addr": {"type": "object", + "properties": {"addr1": {"type": "string"}, + "city": {"type": "string"}, + "state": {"type": "string"}, + 'amount': {'type': 'integer'}}}}} + metadata = { + ('properties','addr'): {"selected": True}, + ('properties','addr', 'properties','amount'): {"selected": False} + } + data = {'addr': + {'addr1': 'address_1', 'city': 'city_1', 'state': 'state_1', 'amount': '123'} + } + expected = {'addr': + {'addr1': 'address_1', 'city': 'city_1', 'state': 'state_1'}, + } + self.assertDictEqual(expected, transform(data, schema, NO_INTEGER_DATETIME_PARSING, metadata=metadata)) + + def test_drops_nested_array_fields_which_are_unselected(self): + schema = {"type": "object", + "properties": {"addrs": {"type": "array", + "items": {"type": "object", + "properties": {"addr1": {"type": "string"}, + "city": {"type": "string"}, + "state": {"type": "string"}, + 'amount': {'type': 'integer'}}}}}} + metadata = { + ('properties','addrs'): {"selected": True}, + ('properties','addrs','items','properties','amount'): {"selected": False} + } + data = {'addrs': [ + {'addr1': 'address_1', 'city': 'city_1', 'state': 'state_1', 'amount': '123'}, + {'addr1': 'address_2', 'city': 'city_2', 'state': 'state_2', 'amount': '456'} + ] + } + expected = {'addrs': [ + {'addr1': 'address_1', 'city': 'city_1', 'state': 'state_1'}, + {'addr1': 'address_2', 'city': 'city_2', 'state': 'state_2'} + ] + } + self.assertDictEqual(expected, transform(data, schema, NO_INTEGER_DATETIME_PARSING, metadata=metadata)) + class TestResolveSchemaReferences(unittest.TestCase): def test_internal_refs_resolve(self): schema = {"type": "object", From eeed831bf4255856804795b7038bf3000062cf84 Mon Sep 17 00:00:00 2001 From: Dan Mosora <30501696+dmosorast@users.noreply.github.com> Date: Tue, 27 Oct 2020 13:41:12 -0400 Subject: [PATCH 18/45] Version 5.9.1 and changelog (#131) --- CHANGELOG.md | 3 +++ setup.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52fe8b8..aabc092 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 5.9.1 + * Add nested schema support to Transformer's `filter_data_by_metadata` function [#130](https://github.com/singer-io/singer-python/pull/130) + ## 5.8.1 * Allow empty lists for `key-properties` and `valid-replication-keys` in `get_standard_metadata` [#106](https://github.com/singer-io/singer-python/pull/106) diff --git a/setup.py b/setup.py index 8df52bb..4a9cd14 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.9.0', + version='5.9.1', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], From 6472683c7b46b9caa0969c5d44a62db07b811b30 Mon Sep 17 00:00:00 2001 From: Andy Lu Date: Fri, 4 Dec 2020 15:00:41 -0600 Subject: [PATCH 19/45] Fix tests for get_standard_metadata (#134) * Fix testing of get_standard_metadata * Fix maxDiff issue * Add docstring * Revert change to get_standard_metadata() * Update how asserts work * Fix missing import * Revert changes to breadcrumbs * Rename test_kp to has_pk * Add test numbers * Add missing test * Fix failing test Co-authored-by: Jude188 <17158624+Jude188@users.noreply.github.com> --- tests/test_metadata.py | 138 ++++++++++++++++++++++++----------------- 1 file changed, 80 insertions(+), 58 deletions(-) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index fd97ef2..9cc2bb2 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -1,8 +1,8 @@ from pprint import pprint import unittest -from singer.metadata import get_standard_metadata +from singer.metadata import get_standard_metadata, to_map -def make_expected_metadata(base_obj, dict_of_extras): +def make_expected_metadata(base_obj, dict_of_extras, has_pk=False): metadata_value = {**base_obj} metadata_value.update(dict_of_extras) @@ -13,7 +13,7 @@ def make_expected_metadata(base_obj, dict_of_extras): }, { 'metadata': { - 'inclusion': 'available', + 'inclusion': 'automatic' if has_pk else 'available', }, 'breadcrumb': ('properties', 'id') }, @@ -33,9 +33,19 @@ def make_expected_metadata(base_obj, dict_of_extras): class TestStandardMetadata(unittest.TestCase): - #maxDiff = None - def test_standard_metadata(self): + """ + There's four inputs we want to test: schema, key_properties, replication_method, valid_replication_keys. + + When `schema` is a non-null input, we expect `"inclusion": "available"` metadata for the `()` breadcrumb. + + When `key_properties` is a non-null input, we expect `table-key-properties` metadata for the `()` breadcrumb. + + When `replication_method` is a non-null input, we expect `forced-replication-method` metadata for the `()` breadcrumb. + + When `valid_replication_keys` is a non-null input, we expect `valid-replication-keys` metadata for the `()` breadcrumb. + """ + self.maxDiff = None # Some contants shared by a number of expected metadata objects tap_stream_id = 'employees' @@ -44,7 +54,7 @@ def test_standard_metadata(self): test_rk = ['id', 'created'] metadata_kp = {'table-key-properties': ['id']} metadata_rm = {'forced-replication-method': 'INCREMENTAL'} - metadata_rk = {'valid_replication_keys': ['id','created']} + metadata_rk = {'valid-replication-keys': ['id','created']} schema_present_base_obj = {'inclusion': 'available'} test_schema = { 'type': ['null', 'object'], @@ -61,7 +71,7 @@ def test_standard_metadata(self): # dictionary of parameters for `get_standard_metadata()` and the # second element is the expected metadata test_variables = [ - ( + ( # test_number=0 { 'schema': test_schema, 'schema_name': tap_stream_id, @@ -74,7 +84,7 @@ def test_standard_metadata(self): {'schema-name': tap_stream_id,} ) ), - ( + ( # test_number=1 { 'schema': test_schema, 'schema_name': tap_stream_id, @@ -84,11 +94,11 @@ def test_standard_metadata(self): }, make_expected_metadata( schema_present_base_obj, - {'valid_replication_keys': ['id','created'], + {'valid-replication-keys': ['id','created'], 'schema-name':tap_stream_id} ) ), - ( + ( # test_number=2 { 'schema': test_schema, 'schema_name': tap_stream_id, @@ -102,7 +112,7 @@ def test_standard_metadata(self): 'schema-name':tap_stream_id} ) ), - ( + ( # test_number=3 { 'schema': test_schema, 'schema_name': tap_stream_id, @@ -112,12 +122,12 @@ def test_standard_metadata(self): }, make_expected_metadata( schema_present_base_obj, - {'valid_replication_keys': ['id','created'], + {'valid-replication-keys': ['id','created'], 'forced-replication-method': 'INCREMENTAL', 'schema-name':tap_stream_id} ) ), - ( + ( # test_number=4 { 'schema': test_schema, 'schema_name': tap_stream_id, @@ -128,10 +138,11 @@ def test_standard_metadata(self): make_expected_metadata( schema_present_base_obj, {'table-key-properties': ['id'], - 'schema-name':tap_stream_id} + 'schema-name':tap_stream_id}, + has_pk=True ) ), - ( + ( # test_number=5 { 'schema': test_schema, 'schema_name': tap_stream_id, @@ -140,14 +151,14 @@ def test_standard_metadata(self): 'valid_replication_keys': test_rk }, make_expected_metadata( - schema_present_base_obj, {'table-key-properties': ['id'], - 'valid_replication_keys': ['id','created'], - 'schema-name':tap_stream_id} + 'valid-replication-keys': ['id','created'], + 'schema-name':tap_stream_id}, + has_pk=True ) ), - ( + ( # test_number=6 { 'schema': test_schema, 'schema_name': tap_stream_id, @@ -159,10 +170,11 @@ def test_standard_metadata(self): schema_present_base_obj, {'table-key-properties': ['id'], 'forced-replication-method': 'INCREMENTAL', - 'schema-name':tap_stream_id} + 'schema-name':tap_stream_id}, + has_pk=True ) ), - ( + ( # test_number=7 { 'schema': test_schema, 'schema_name': tap_stream_id, @@ -174,25 +186,21 @@ def test_standard_metadata(self): schema_present_base_obj, {'table-key-properties': ['id'], 'forced-replication-method': 'INCREMENTAL', - 'valid_replication_keys': ['id','created'], - 'schema-name':tap_stream_id} + 'valid-replication-keys': ['id','created'], + 'schema-name':tap_stream_id}, + has_pk=True ) ), - ( + ( # test_number=8 { 'schema': None, 'key_properties': None, 'replication_method': None, 'valid_replication_keys': None }, - [ - { - 'metadata': {}, - 'breadcrumb': [] - } - ] + [] ), - ( + ( # test_number=9 { 'schema': None, 'key_properties': None, @@ -202,14 +210,13 @@ def test_standard_metadata(self): [ { 'metadata': { - 'inclusion': 'available', - 'valid_replication_keys': ['id','created'] + 'valid-replication-keys': ['id','created'] }, 'breadcrumb': [] } ] ), - ( + ( # test_number=10 { 'schema': None, 'key_properties': None, @@ -219,14 +226,13 @@ def test_standard_metadata(self): [ { 'metadata': { - 'inclusion': 'available', 'forced-replication-method': 'INCREMENTAL' }, 'breadcrumb': [] } ] ), - ( + ( # test_number=11 { 'schema': None, 'key_properties': None, @@ -236,15 +242,14 @@ def test_standard_metadata(self): [ { 'metadata': { - 'inclusion': 'available', 'forced-replication-method': 'INCREMENTAL', - 'valid_replication_keys': ['id','created'] + 'valid-replication-keys': ['id','created'] }, 'breadcrumb': [] } ] ), - ( + ( # test_number=12 { 'schema': None, 'key_properties': test_kp, @@ -254,14 +259,13 @@ def test_standard_metadata(self): [ { 'metadata': { - 'inclusion': 'available', 'table-key-properties': ['id'], }, 'breadcrumb': [] } ] ), - ( + ( # test_number=13 { 'schema': None, 'key_properties': test_kp, @@ -271,15 +275,31 @@ def test_standard_metadata(self): [ { 'metadata': { - 'inclusion': 'available', 'table-key-properties': ['id'], - 'valid_replication_keys': ['id','created'] + 'valid-replication-keys': ['id','created'] + }, + 'breadcrumb': [] + } + ] + ), + ( # test_number=14 + { + 'schema': None, + 'key_properties': test_kp, + 'replication_method': test_rm, + 'valid_replication_keys': None + }, + [ + { + 'metadata': { + 'table-key-properties': ['id'], + 'forced-replication-method': 'INCREMENTAL', }, 'breadcrumb': [] } ] ), - ( + ( # test_number=15 { 'schema': None, 'key_properties': test_kp, @@ -289,10 +309,9 @@ def test_standard_metadata(self): [ { 'metadata': { - 'inclusion': 'available', 'table-key-properties': ['id'], 'forced-replication-method': 'INCREMENTAL', - 'valid_replication_keys': ['id','created'] + 'valid-replication-keys': ['id','created'] }, 'breadcrumb': [] } @@ -300,15 +319,16 @@ def test_standard_metadata(self): ) ] - for var in test_variables: - function_params = var[0] - expected_metadata = var[1] + for i, var in enumerate(test_variables): + with self.subTest(test_number=i): + function_params = var[0] + expected_metadata = var[1] - test_value = get_standard_metadata(**function_params) + test_value = get_standard_metadata(**function_params) - for obj in expected_metadata: - if obj in test_value: - self.assertIn(obj, test_value) + expected_value = to_map(expected_metadata) + actual_value = to_map(test_value) + self.assertDictEqual(expected_value, actual_value) # Test one function call where the parameters are not splat in test_value = get_standard_metadata(test_schema, @@ -320,11 +340,13 @@ def test_standard_metadata(self): expected_metadata = make_expected_metadata(schema_present_base_obj, {'table-key-properties': ['id'], 'forced-replication-method': 'INCREMENTAL', - 'valid_replication_keys': ['id','created'], - 'schema-name':tap_stream_id}) - for obj in expected_metadata: - if obj in test_value: - self.assertIn(obj, test_value) + 'valid-replication-keys': ['id','created'], + 'schema-name':tap_stream_id}, + has_pk=True) + self.assertDictEqual( + to_map(expected_metadata), + to_map(test_value) + ) def test_empty_key_properties_are_written(self): mdata = get_standard_metadata(key_properties=[]) From bfa184e04d9fe5b6edddb00cd8413505106cfd23 Mon Sep 17 00:00:00 2001 From: Andy Lu Date: Wed, 3 Feb 2021 08:53:14 -0600 Subject: [PATCH 20/45] Add exceptions (#138) * Add exceptions.py, add unit tests * Pin pip and setuptools versions * Make pylint happy --- .circleci/config.yml | 2 +- singer/__init__.py | 8 +++++ singer/exceptions.py | 30 ++++++++++++++++++ tests/test_exceptions.py | 68 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 singer/exceptions.py create mode 100644 tests/test_exceptions.py diff --git a/.circleci/config.yml b/.circleci/config.yml index eda5d4c..ae6734c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -16,7 +16,7 @@ jobs: mkdir -p ~/.virtualenvs python3 -m venv ~/.virtualenvs/singer-python source ~/.virtualenvs/singer-python/bin/activate - pip install -U pip setuptools + pip install -U 'pip<19.2' 'setuptools<51.0.0' make install - run: name: 'Run tests' diff --git a/singer/__init__.py b/singer/__init__.py index d3b2c87..26f0043 100644 --- a/singer/__init__.py +++ b/singer/__init__.py @@ -73,6 +73,14 @@ get_currently_syncing, ) +from singer.exceptions import ( + SingerConfigurationError, + SingerDiscoveryError, + SingerError, + SingerRetryableRequestError, + SingerSyncError, +) + if __name__ == "__main__": import doctest doctest.testmod() diff --git a/singer/exceptions.py b/singer/exceptions.py new file mode 100644 index 0000000..9231328 --- /dev/null +++ b/singer/exceptions.py @@ -0,0 +1,30 @@ +""" +The exceptions module contains Exception subclasses whose instances might be +raised by the singer library or taps that use the singer library. +""" + +class SingerError(Exception): + """The base Exeception class for singer""" + def __init__(self, message): + """Create an exeception with a multiline error message + + The first line is the error's class name. The subsequent lines are + the message that class was created with. + """ + super().__init__('{}\n{}'.format(self.__class__.__name__, message)) + + +class SingerConfigurationError(SingerError): + """The base class of errors encountered before discovery and before sync mode""" + + +class SingerDiscoveryError(SingerError): + """The base class of errors encountered in discovery mode""" + + +class SingerSyncError(SingerError): + """The base class of errors encountered in sync mode""" + + +class SingerRetryableRequestError(SingerError): + """This error is meant to be thrown when a tap encounters a retryable request""" diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py new file mode 100644 index 0000000..491595f --- /dev/null +++ b/tests/test_exceptions.py @@ -0,0 +1,68 @@ +import unittest + +from singer.exceptions import SingerConfigurationError +from singer.exceptions import SingerDiscoveryError +from singer.exceptions import SingerError +from singer.exceptions import SingerRetryableRequestError +from singer.exceptions import SingerSyncError + +class TestSingerErrors(unittest.TestCase): + def test_SingerError_prints_correctly(self): + error_text = "An error occured" + + with self.assertRaises(SingerError) as test_run: + raise SingerError(error_text) + + expected_text = "SingerError\n" + error_text + self.assertEquals(expected_text, + str(test_run.exception)) + + def test_SingerConfigurationError_prints_correctly(self): + error_text = "An error occured" + + with self.assertRaises(SingerConfigurationError) as test_run: + raise SingerConfigurationError(error_text) + + expected_text = "SingerConfigurationError\n" + error_text + self.assertEquals(expected_text, + str(test_run.exception)) + + def test_SingerDiscoveryError_prints_correctly(self): + error_text = "An error occured" + + with self.assertRaises(SingerDiscoveryError) as test_run: + raise SingerDiscoveryError(error_text) + + expected_text = "SingerDiscoveryError\n" + error_text + self.assertEquals(expected_text, + str(test_run.exception)) + + def test_SingerSyncError_prints_correctly(self): + error_text = "An error occured" + + with self.assertRaises(SingerSyncError) as test_run: + raise SingerSyncError(error_text) + + expected_text = "SingerSyncError\n" + error_text + self.assertEquals(expected_text, + str(test_run.exception)) + + def test_SingerRetryableRequestError_prints_correctly(self): + error_text = "An error occured" + + with self.assertRaises(SingerRetryableRequestError) as test_run: + raise SingerRetryableRequestError(error_text) + + expected_text = "SingerRetryableRequestError\n" + error_text + self.assertEquals(expected_text, + str(test_run.exception)) + + def test_SingerError_prints_multiple_lines_correctly(self): + error_text = "\n".join(["Line 1", "Line 2", "Line 3"]) + + with self.assertRaises(SingerError) as test_run: + raise SingerError(error_text) + + expected_text = "SingerError\n" + error_text + self.assertEquals(expected_text, + str(test_run.exception)) From f2b383324a3a97b98d6e29f69dbadae2632dbd2f Mon Sep 17 00:00:00 2001 From: Andy Lu Date: Wed, 3 Feb 2021 09:08:43 -0600 Subject: [PATCH 21/45] Bump to v5.10.0, update changelog (#139) --- CHANGELOG.md | 3 +++ setup.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aabc092..4e16fc6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 5.10.0 + * Add exception classes [#138](https://github.com/singer-io/singer-python/pull/138) + ## 5.9.1 * Add nested schema support to Transformer's `filter_data_by_metadata` function [#130](https://github.com/singer-io/singer-python/pull/130) diff --git a/setup.py b/setup.py index 4a9cd14..104e68c 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.9.1', + version='5.10.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], From 599fadf6c14be64ad4557d10bc10d86ed77ab70b Mon Sep 17 00:00:00 2001 From: Collin Simon Date: Thu, 25 Feb 2021 15:17:47 -0500 Subject: [PATCH 22/45] Split the lines when logging the exception (#141) * Split the lines when logging the exception This will cause each line to be prepended with `CRITICAL` --- singer/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/singer/utils.py b/singer/utils.py index 85f3d39..492e03b 100644 --- a/singer/utils.py +++ b/singer/utils.py @@ -228,7 +228,8 @@ def wrapped(*args, **kwargs): try: return fnc(*args, **kwargs) except Exception as exc: - logger.critical(exc) + for line in str(exc).splitlines(): + logger.critical(line) raise return wrapped return decorator From 49c9f08f8a5e3c87e21367003e776c85a4f12f18 Mon Sep 17 00:00:00 2001 From: Collin Simon Date: Thu, 25 Feb 2021 15:40:47 -0500 Subject: [PATCH 23/45] Bump to v5.11.0 (#142) --- CHANGELOG.md | 3 +++ setup.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e16fc6..c48c6c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 5.11.0 + * Make `utils.handle_top_exception()` critically log each line of the exception separately so each line is prepended with `CRITICAL` [#141](https://github.com/singer-io/singer-python/pull/141) + ## 5.10.0 * Add exception classes [#138](https://github.com/singer-io/singer-python/pull/138) diff --git a/setup.py b/setup.py index 104e68c..231a23f 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.10.0', + version='5.11.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], From 089214092510608dabdde4b5f364da3777ca9db8 Mon Sep 17 00:00:00 2001 From: Dan Mosora <30501696+dmosorast@users.noreply.github.com> Date: Wed, 3 Mar 2021 15:07:47 -0500 Subject: [PATCH 24/45] Add support for custom decimal string formatter (#125) * Handle possible input types to format as decimals * Stringify resulting data post-decimal conversion, fix return * Pylint fixes * added tests for singer.decimal logic * modified transform to parse NaN values as NaNs * changed logic to transform occurrence of snan into a nan Co-authored-by: Jacob Baca Co-authored-by: Jacob Baca <52418765+jacobrobertbaca@users.noreply.github.com> --- singer/transform.py | 19 +++++++++++++ tests/test_transform.py | 63 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/singer/transform.py b/singer/transform.py index f0b8556..31fcd1b 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -1,4 +1,5 @@ import datetime +import decimal import logging import re from jsonschema import RefResolver @@ -271,7 +272,25 @@ def _transform(self, data, typ, schema, path): return False, None return True, data + elif schema.get("format") == "singer.decimal": + if data is None: + return False, None + + if isinstance(data, (str, float, int)): + try: + return True, str(decimal.Decimal(str(data)).normalize()) + except: + return False, None + elif isinstance(data, decimal.Decimal): + try: + if data.is_snan(): + return True, 'NaN' + else: + return True, str(data.normalize()) + except: + return False, None + return False, None elif typ == "object": # Objects do not necessarily specify properties return self._transform_object(data, diff --git a/tests/test_transform.py b/tests/test_transform.py index c6861ef..beb0f42 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -1,4 +1,5 @@ import unittest +import decimal from singer import transform from singer.transform import * @@ -252,6 +253,68 @@ def test_null_object_transform(self): empty_data = {'addrs': {}} self.assertDictEqual(empty_data, transform(empty_data, schema)) + def test_decimal_types_transform(self): + schema = {"type": "object", + "properties": {"percentage": {"type": ["string"], + "format": "singer.decimal"}}} + + inf = {'percentage': 'Infinity'} + negative_inf = {'percentage': '-Infinity'} + root2 = {'percentage': 1.4142135623730951} + nan = {'percentage': decimal.Decimal('NaN')} + snan = {'percentage': decimal.Decimal('sNaN')} + + self.assertEquals(inf, transform(inf, schema)) + self.assertEquals(negative_inf, transform(negative_inf, schema)) + self.assertEquals({'percentage': '1.4142135623730951'}, transform(root2, schema)) + self.assertEquals({'percentage': 'NaN'}, transform(nan, schema)) + self.assertEquals({'percentage': 'NaN'}, transform(snan, schema)) + + + str1 = {'percentage':'0.1'} + str2 = {'percentage': '0.0000000000001'} + str3 = {'percentage': '1E+13'} + str4 = {'percentage': '100'} + str5 = {'percentage': '-100'} + self.assertEquals(str1, transform(str1, schema)) + self.assertEquals({'percentage': '1E-13'}, transform(str2, schema)) + self.assertEquals({'percentage': '1E+13'}, transform(str3, schema)) + self.assertEquals({'percentage': '1E+2'}, transform(str4, schema)) + self.assertEquals({'percentage': '-1E+2'}, transform(str5, schema)) + + float1 = {'percentage': 12.0000000000000000000000000001234556} + float2 = {'percentage': 0.0123} + float3 = {'percentage': 100.0123} + float4 = {'percentage': -100.0123} + self.assertEquals({'percentage':'12'}, transform(float1, schema)) + self.assertEquals({'percentage':'0.0123'}, transform(float2, schema)) + self.assertEquals({'percentage':'100.0123'}, transform(float3, schema)) + self.assertEquals({'percentage':'-100.0123'}, transform(float4, schema)) + + int1 = {'percentage': 123} + int2 = {'percentage': 0} + int3 = {'percentage': -1000} + self.assertEquals({'percentage':'123'}, transform(int1, schema)) + self.assertEquals({'percentage':'0'}, transform(int2, schema)) + self.assertEquals({'percentage':'-1E+3'}, transform(int3, schema)) + + dec1 = {'percentage': decimal.Decimal('1.1010101')} + dec2 = {'percentage': decimal.Decimal('.111111111111111111111111')} + dec3 = {'percentage': decimal.Decimal('-.111111111111111111111111')} + dec4 = {'percentage': decimal.Decimal('100')} + self.assertEquals({'percentage':'1.1010101'}, transform(dec1, schema)) + self.assertEquals({'percentage':'0.111111111111111111111111'}, transform(dec2, schema)) + self.assertEquals({'percentage':'-0.111111111111111111111111'}, transform(dec3, schema)) + self.assertEquals({'percentage':'1E+2'}, transform(dec4, schema)) + + bad1 = {'percentage': 'fsdkjl'} + with self.assertRaises(SchemaMismatch): + transform(bad1, schema) + + badnull = {'percentage': None} + with self.assertRaises(SchemaMismatch): + self.assertEquals({'percentage':None}, transform(badnull, schema)) + class TestTransformsWithMetadata(unittest.TestCase): def test_drops_no_data_when_not_dict(self): From 2f618c6194f154be7ba37f288d456f6abd4d78ec Mon Sep 17 00:00:00 2001 From: Jacob Baca <52418765+jacobrobertbaca@users.noreply.github.com> Date: Wed, 3 Mar 2021 15:16:02 -0500 Subject: [PATCH 25/45] changed from version 5.11.0 to 5.12.0 (#143) --- CHANGELOG.md | 3 +++ setup.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c48c6c4..0133b7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 5.12.0 + * Added support for `singer.decimal` types to transformer [#125](https://github.com/singer-io/singer-python/pull/125) + ## 5.11.0 * Make `utils.handle_top_exception()` critically log each line of the exception separately so each line is prepended with `CRITICAL` [#141](https://github.com/singer-io/singer-python/pull/141) diff --git a/setup.py b/setup.py index 231a23f..3b4de0d 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.11.0', + version='5.12.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], From d48271bc28b6d59a04d5e06abdf824895c8f42e6 Mon Sep 17 00:00:00 2001 From: Leslie VanDeMark <38043390+leslievandemark@users.noreply.github.com> Date: Mon, 22 Mar 2021 16:35:16 -0400 Subject: [PATCH 26/45] removed normalize() from singer.decimal to avoid scientific notation (#146) Co-authored-by: Zach Harris --- singer/transform.py | 4 ++-- tests/test_transform.py | 14 +++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/singer/transform.py b/singer/transform.py index 31fcd1b..3fdefdf 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -278,7 +278,7 @@ def _transform(self, data, typ, schema, path): if isinstance(data, (str, float, int)): try: - return True, str(decimal.Decimal(str(data)).normalize()) + return True, str(decimal.Decimal(str(data))) except: return False, None elif isinstance(data, decimal.Decimal): @@ -286,7 +286,7 @@ def _transform(self, data, typ, schema, path): if data.is_snan(): return True, 'NaN' else: - return True, str(data.normalize()) + return True, str(data) except: return False, None diff --git a/tests/test_transform.py b/tests/test_transform.py index beb0f42..959c4b8 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -279,24 +279,28 @@ def test_decimal_types_transform(self): self.assertEquals(str1, transform(str1, schema)) self.assertEquals({'percentage': '1E-13'}, transform(str2, schema)) self.assertEquals({'percentage': '1E+13'}, transform(str3, schema)) - self.assertEquals({'percentage': '1E+2'}, transform(str4, schema)) - self.assertEquals({'percentage': '-1E+2'}, transform(str5, schema)) + self.assertEquals({'percentage': '100'}, transform(str4, schema)) + self.assertEquals({'percentage': '-100'}, transform(str5, schema)) float1 = {'percentage': 12.0000000000000000000000000001234556} float2 = {'percentage': 0.0123} float3 = {'percentage': 100.0123} float4 = {'percentage': -100.0123} - self.assertEquals({'percentage':'12'}, transform(float1, schema)) + float5 = {'percentage': 0.000001} + float6 = {'percentage': 0.0000001} + self.assertEquals({'percentage':'12.0'}, transform(float1, schema)) self.assertEquals({'percentage':'0.0123'}, transform(float2, schema)) self.assertEquals({'percentage':'100.0123'}, transform(float3, schema)) self.assertEquals({'percentage':'-100.0123'}, transform(float4, schema)) + self.assertEquals({'percentage':'0.000001'}, transform(float5, schema)) + self.assertEquals({'percentage':'1E-7'}, transform(float6, schema)) int1 = {'percentage': 123} int2 = {'percentage': 0} int3 = {'percentage': -1000} self.assertEquals({'percentage':'123'}, transform(int1, schema)) self.assertEquals({'percentage':'0'}, transform(int2, schema)) - self.assertEquals({'percentage':'-1E+3'}, transform(int3, schema)) + self.assertEquals({'percentage':'-1000'}, transform(int3, schema)) dec1 = {'percentage': decimal.Decimal('1.1010101')} dec2 = {'percentage': decimal.Decimal('.111111111111111111111111')} @@ -305,7 +309,7 @@ def test_decimal_types_transform(self): self.assertEquals({'percentage':'1.1010101'}, transform(dec1, schema)) self.assertEquals({'percentage':'0.111111111111111111111111'}, transform(dec2, schema)) self.assertEquals({'percentage':'-0.111111111111111111111111'}, transform(dec3, schema)) - self.assertEquals({'percentage':'1E+2'}, transform(dec4, schema)) + self.assertEquals({'percentage':'100'}, transform(dec4, schema)) bad1 = {'percentage': 'fsdkjl'} with self.assertRaises(SchemaMismatch): From 66688d736fa699beeed63af91db2fdc1f8914bb1 Mon Sep 17 00:00:00 2001 From: Leslie VanDeMark <38043390+leslievandemark@users.noreply.github.com> Date: Mon, 22 Mar 2021 16:40:43 -0400 Subject: [PATCH 27/45] version bump to 5.12.1 (#147) Co-authored-by: Zach Harris --- CHANGELOG.md | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0133b7f..6a9ebcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,6 @@ # Changelog +## 5.12.1 + * Removes normalize function from `singer.decimal` to avoid scientific notation [#146](https://github.com/singer-io/singer-python/pull/146) ## 5.12.0 * Added support for `singer.decimal` types to transformer [#125](https://github.com/singer-io/singer-python/pull/125) diff --git a/setup.py b/setup.py index 3b4de0d..90df264 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.12.0', + version='5.12.1', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], From 0c066de21111d8572425083b4a8792d193c80af1 Mon Sep 17 00:00:00 2001 From: Leslie VanDeMark <38043390+leslievandemark@users.noreply.github.com> Date: Wed, 8 Sep 2021 13:36:26 -0400 Subject: [PATCH 28/45] remove pinned `pytz` version (#152) * remove pinned version * use a floor of 2018.4 for pytz * update changelog Co-authored-by: Collin Simon --- CHANGELOG.md | 4 ++++ setup.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a9ebcc..ffcb8e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ # Changelog + +## 5.12.2 + * Removes pinned `pytz` version [#152](https://github.com/singer-io/singer-python/pull/152) + ## 5.12.1 * Removes normalize function from `singer.decimal` to avoid scientific notation [#146](https://github.com/singer-io/singer-python/pull/146) diff --git a/setup.py b/setup.py index 90df264..cd5825c 100755 --- a/setup.py +++ b/setup.py @@ -4,13 +4,13 @@ import subprocess setup(name="singer-python", - version='5.12.1', + version='5.12.2', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], url="http://singer.io", install_requires=[ - 'pytz==2018.4', + 'pytz>=2018.4', 'jsonschema==2.6.0', 'simplejson==3.11.1', 'python-dateutil>=2.6.0', From bc3d942e0a5c8f8162154b6fb585a93230307672 Mon Sep 17 00:00:00 2001 From: Rushikesh Todkar <98420315+RushiT0122@users.noreply.github.com> Date: Thu, 3 Nov 2022 14:18:23 +0530 Subject: [PATCH 29/45] parse dev mode argument (#158) * parse dev mode argument * update the short flag for dev mode Replace `-dev` to `-D` * Updated the short flag for dev mode * - remove dev mode short flag Co-authored-by: RushiT0122 --- singer/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/singer/utils.py b/singer/utils.py index 492e03b..48675eb 100644 --- a/singer/utils.py +++ b/singer/utils.py @@ -134,6 +134,7 @@ def parse_args(required_config_keys): -d,--discover Run in discover mode -p,--properties Properties file: DEPRECATED, please use --catalog instead --catalog Catalog file + --dev Runs the tap in dev mode Returns the parsed args object from argparse. For each argument that point to JSON files (config, state, properties), we will automatically @@ -163,6 +164,11 @@ def parse_args(required_config_keys): action='store_true', help='Do schema discovery') + parser.add_argument( + '--dev', + action='store_true', + help='Runs tap in dev mode') + args = parser.parse_args() if args.config: setattr(args, 'config_path', args.config) From 2c053f4c5f468235bba85a203a252438b1b3c704 Mon Sep 17 00:00:00 2001 From: Rushikesh Todkar <98420315+RushiT0122@users.noreply.github.com> Date: Mon, 7 Nov 2022 15:00:12 +0530 Subject: [PATCH 30/45] Bump version 5.13.0 (#160) * Bump version 5.13.0 * update changelog for dev mode argument parsing * change versioning to minor version bump Co-authored-by: RushiT0122 --- CHANGELOG.md | 3 +++ setup.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ffcb8e6..d06a7a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 5.13.0 + * Add support for dev mode argument parsing [#158](https://github.com/singer-io/singer-python/pull/158) + ## 5.12.2 * Removes pinned `pytz` version [#152](https://github.com/singer-io/singer-python/pull/152) diff --git a/setup.py b/setup.py index cd5825c..3d95c5d 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.12.2', + version='5.13.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], From 0d0ff1e990f1a7cf5c48fe6a75ee99a8aa9122c5 Mon Sep 17 00:00:00 2001 From: Leslie VanDeMark <38043390+leslievandemark@users.noreply.github.com> Date: Tue, 16 Jan 2024 10:27:14 -0500 Subject: [PATCH 31/45] Bump backoff to be compatible with newer python versions (#165) * bump backoff for pyton 3.11 compatibility * update pip version * try new circleci yml * remove 'make' * make pylint happy * make pylint happy * make pylint happy again * backoff version is a breaking change for old python versions * Changelog update --- .circleci/config.yml | 35 ++++++++++++++++++++++------------- CHANGELOG.md | 3 +++ setup.py | 4 ++-- singer/catalog.py | 2 +- singer/exceptions.py | 2 +- singer/messages.py | 8 ++++---- singer/transform.py | 8 ++++---- singer/utils.py | 4 ++-- 8 files changed, 39 insertions(+), 27 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index ae6734c..a64745e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,26 +1,35 @@ -version: 2 +version: 2.1 + +workflows: + build: + jobs: + - build: + context: + - circleci-user + jobs: build: docker: - - image: ubuntu:16.04 + - image: 218546966473.dkr.ecr.us-east-1.amazonaws.com/sources-python:1.1.0 steps: - checkout - - run: - name: 'Install python 3.5.2' - command: | - apt update - apt install --yes python3 python3-pip python3-venv - run: name: 'Setup virtualenv' command: | - mkdir -p ~/.virtualenvs + pyenv global 3.11.7 python3 -m venv ~/.virtualenvs/singer-python source ~/.virtualenvs/singer-python/bin/activate - pip install -U 'pip<19.2' 'setuptools<51.0.0' - make install + pip install -U 'pip==20.3.4' 'setuptools<51.0.0' + pip install .[dev] + - run: + name: 'Pylint' + command: | + source ~/.virtualenvs/singer-python/bin/activate + pip install pylint + pylint singer --extension-pkg-whitelist=ciso8601 -d missing-docstring,broad-exception-raised,broad-exception-caught,bare-except,too-many-return-statements,too-many-branches,too-many-arguments,no-else-return,too-few-public-methods,fixme,protected-access,consider-using-f-string - run: - name: 'Run tests' + name: 'Run Tests' command: | - # Need to re-activate the virtualenv source ~/.virtualenvs/singer-python/bin/activate - make test + pip install nose2 + nose2 -v -s tests diff --git a/CHANGELOG.md b/CHANGELOG.md index d06a7a7..21dbe09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 6.0.0 + * Bump backoff version to 2.2.1. This version drops support for python 3.5, but adds it for 3.1o [#165](https://github.com/singer-io/singer-python/pull/165) + ## 5.13.0 * Add support for dev mode argument parsing [#158](https://github.com/singer-io/singer-python/pull/158) diff --git a/setup.py b/setup.py index 3d95c5d..48305ab 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='5.13.0', + version='6.0.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], @@ -14,7 +14,7 @@ 'jsonschema==2.6.0', 'simplejson==3.11.1', 'python-dateutil>=2.6.0', - 'backoff==1.8.0', + 'backoff==2.2.1', 'ciso8601', ], extras_require={ diff --git a/singer/catalog.py b/singer/catalog.py index 1767ff1..373a606 100644 --- a/singer/catalog.py +++ b/singer/catalog.py @@ -92,7 +92,7 @@ def __eq__(self, other): @classmethod def load(cls, filename): - with open(filename) as fp: # pylint: disable=invalid-name + with open(filename, encoding="utf-8") as fp: return Catalog.from_dict(json.load(fp)) @classmethod diff --git a/singer/exceptions.py b/singer/exceptions.py index 9231328..b13016d 100644 --- a/singer/exceptions.py +++ b/singer/exceptions.py @@ -11,7 +11,7 @@ def __init__(self, message): The first line is the error's class name. The subsequent lines are the message that class was created with. """ - super().__init__('{}\n{}'.format(self.__class__.__name__, message)) + super().__init__(f"{self.__class__.__name__}\n{message}") class SingerConfigurationError(SingerError): diff --git a/singer/messages.py b/singer/messages.py index 3848801..4a87235 100644 --- a/singer/messages.py +++ b/singer/messages.py @@ -11,16 +11,16 @@ class Message(): '''Base class for messages.''' - def asdict(self): # pylint: disable=no-self-use + def asdict(self): raise Exception('Not implemented') def __eq__(self, other): return isinstance(other, Message) and self.asdict() == other.asdict() def __repr__(self): - pairs = ["{}={}".format(k, v) for k, v in self.asdict().items()] + pairs = [f"{k}={v}" for k, v in self.asdict().items()] attrstr = ", ".join(pairs) - return "{}({})".format(self.__class__.__name__, attrstr) + return f"{self.__class__.__name__}({attrstr})" def __str__(self): return str(self.asdict()) @@ -169,7 +169,7 @@ def asdict(self): def _required_key(msg, k): if k not in msg: - raise Exception("Message is missing required key '{}': {}".format(k, msg)) + raise Exception(f"Message is missing required key '{k}': {msg}") return msg[k] diff --git a/singer/transform.py b/singer/transform.py index 3fdefdf..69f812a 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -77,16 +77,16 @@ def tostr(self): path = ".".join(map(str, self.path)) if self.schema: if self.logging_level >= logging.INFO: - msg = "data does not match {}".format(self.schema) + msg = f"data does not match {self.schema}" else: - msg = "does not match {}".format(self.schema) + msg = f"does not match {self.schema}" else: msg = "not in schema" if self.logging_level >= logging.INFO: - output = "{}: {}".format(path, msg) + output = f"{path}: {msg}" else: - output = "{}: {} {}".format(path, self.data, msg) + output = f"{path}: {self.data} {msg}" return output diff --git a/singer/utils.py b/singer/utils.py index 48675eb..6620005 100644 --- a/singer/utils.py +++ b/singer/utils.py @@ -105,7 +105,7 @@ def chunk(array, num): def load_json(path): - with open(path) as fil: + with open(path, encoding="utf-8") as fil: return json.load(fil) @@ -193,7 +193,7 @@ def parse_args(required_config_keys): def check_config(config, required_keys): missing_keys = [key for key in required_keys if key not in config] if missing_keys: - raise Exception("Config is missing required keys: {}".format(missing_keys)) + raise Exception(f"Config is missing required keys: {missing_keys}") def backoff(exceptions, giveup): From d6f0d2026645d7cc45b01a6116701e3564b42628 Mon Sep 17 00:00:00 2001 From: Bryant Gray Date: Tue, 19 Mar 2024 13:45:29 -0400 Subject: [PATCH 32/45] Relax dependency version requirements (#167) * Relax dependency constraints * Bump version to `6.0.1` * pin backoff and simplejson to major version * Don't allow older versions * Update changelog * Pin minumum and major versions --- CHANGELOG.md | 5 ++++- setup.py | 12 ++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 21dbe09..3633703 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,10 @@ # Changelog +## 6.0.1 + * Pin backoff and simplejson to any version greater than or equal to the previously allowed version, up to the next major version [#167](https://github.com/singer-io/singer-python/pull/167) + ## 6.0.0 - * Bump backoff version to 2.2.1. This version drops support for python 3.5, but adds it for 3.1o [#165](https://github.com/singer-io/singer-python/pull/165) + * Bump backoff version to 2.2.1. This version drops support for python 3.5, but adds it for 3.10 [#165](https://github.com/singer-io/singer-python/pull/165) ## 5.13.0 * Add support for dev mode argument parsing [#158](https://github.com/singer-io/singer-python/pull/158) diff --git a/setup.py b/setup.py index 48305ab..91f3ed0 100755 --- a/setup.py +++ b/setup.py @@ -4,18 +4,18 @@ import subprocess setup(name="singer-python", - version='6.0.0', + version='6.0.1', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], url="http://singer.io", install_requires=[ 'pytz>=2018.4', - 'jsonschema==2.6.0', - 'simplejson==3.11.1', - 'python-dateutil>=2.6.0', - 'backoff==2.2.1', - 'ciso8601', + 'jsonschema>=2.6.0,==2.*', + 'simplejson>=3.13.2,==3.*', + 'python-dateutil>=2.7.3,==2.*', + 'backoff>=2.2.1,==2.*', + 'ciso8601>=2.3.1,==2.*', ], extras_require={ 'dev': [ From 0cb22883deb76f1e8c116d5056a935410527948b Mon Sep 17 00:00:00 2001 From: Sourabh Gandhi <105213416+sgandhi1311@users.noreply.github.com> Date: Tue, 13 Aug 2024 18:47:08 +0530 Subject: [PATCH 33/45] Make `ensure_ascii` Dynamic with Default Set to `True` in JSON Serialization (#168) * add parameter - ensure_ascii to load non ascii characters when set to false * add unit test for ensuring ascii characters while loading * update setup and changelog --- CHANGELOG.md | 3 +++ setup.py | 2 +- singer/messages.py | 8 ++++---- tests/test_singer.py | 26 ++++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3633703..4a06bd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 6.1.0 + * Make ensure_ascii Dynamic with Default Set to True in JSON Serialization. Required to handle the special characters [#168](https://github.com/singer-io/singer-python/pull/168) + ## 6.0.1 * Pin backoff and simplejson to any version greater than or equal to the previously allowed version, up to the next major version [#167](https://github.com/singer-io/singer-python/pull/167) diff --git a/setup.py b/setup.py index 91f3ed0..7ed9177 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='6.0.1', + version='6.1.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/messages.py b/singer/messages.py index 4a87235..de6e076 100644 --- a/singer/messages.py +++ b/singer/messages.py @@ -218,12 +218,12 @@ def parse_message(msg): return None -def format_message(message): - return json.dumps(message.asdict(), use_decimal=True) +def format_message(message, ensure_ascii=True): + return json.dumps(message.asdict(), use_decimal=True, ensure_ascii=ensure_ascii) -def write_message(message): - sys.stdout.write(format_message(message) + '\n') +def write_message(message, ensure_ascii=True): + sys.stdout.write(format_message(message, ensure_ascii=ensure_ascii) + '\n') sys.stdout.flush() diff --git a/tests/test_singer.py b/tests/test_singer.py index 4fb74de..7f69bb5 100644 --- a/tests/test_singer.py +++ b/tests/test_singer.py @@ -1,5 +1,6 @@ import singer import unittest +from unittest.mock import patch import datetime import dateutil from decimal import Decimal @@ -179,6 +180,31 @@ def test_parse_bulk_decs(self): value = self.create_record(value_str) self.assertEqual(Decimal(value_str), value) + @patch('sys.stdout') + def test_ensure_ascii_false(self, mock_stdout): + """ + Setting ensure_ascii=False will preserve special characters like é + in their original form. + """ + rec = {"name": "José"} + expected_output = '{"type": "RECORD", "stream": "test_stream", "record": {"name": "José"}}\n' + rec_message = singer.RecordMessage(stream="test_stream", record=rec) + result = singer.write_message(rec_message, ensure_ascii=False) + mock_stdout.write.assert_called_once_with(expected_output) + mock_stdout.flush.assert_called_once() + + @patch('sys.stdout') + def test_ensure_ascii_true(self, mock_stdout): + """ + ensure_ascii defaults to True, special characters like é are + escaped into their ASCII representation (e.g., \u00e9) + """ + rec = {"name": "José"} + expected_output = '{"type": "RECORD", "stream": "test_stream", "record": {"name": "Jos\\u00e9"}}\n' + rec_message = singer.RecordMessage(stream="test_stream", record=rec) + result = singer.write_message(rec_message) + mock_stdout.write.assert_called_once_with(expected_output) + mock_stdout.flush.assert_called_once() if __name__ == '__main__': unittest.main() From ae50276b7055248273d0458b85d891773f7d4597 Mon Sep 17 00:00:00 2001 From: Eivin Giske Skaaren Date: Tue, 3 Sep 2024 12:15:18 +0200 Subject: [PATCH 34/45] Enable copilot usage in PR template according to Qlik policy --- .github/pull_request_template.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 6e46b00..ef49bc0 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -9,3 +9,7 @@ # Rollback steps - revert this branch + +#### AI generated code +https://internal.qlik.dev/general/ways-of-working/code-reviews/#guidelines-for-ai-generated-code +- [ ] this PR has been written with the help of GitHub Copilot or another generative AI tool From f0e1e6ec69b5c57116c3aa29a66ff7485f162fc6 Mon Sep 17 00:00:00 2001 From: Bryant Gray Date: Mon, 24 Mar 2025 16:59:41 -0400 Subject: [PATCH 35/45] Use underscore instead of dash in setup.cfg (#171) * Use underscore instead of dash in setup.cfg https://github.com/pypa/setuptools/issues/4910 * disable some pylint warnings --- setup.cfg | 2 +- setup.py | 2 +- singer/catalog.py | 1 + singer/schema.py | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index b88034e..08aedd7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,2 @@ [metadata] -description-file = README.md +description_file = README.md diff --git a/setup.py b/setup.py index 7ed9177..c48fb80 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='6.1.0', + version='6.1.1', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/catalog.py b/singer/catalog.py index 373a606..d8ef147 100644 --- a/singer/catalog.py +++ b/singer/catalog.py @@ -20,6 +20,7 @@ def write_catalog(catalog): # pylint: disable=too-many-instance-attributes class CatalogEntry(): + # pylint: disable=too-many-positional-arguments def __init__(self, tap_stream_id=None, stream=None, key_properties=None, schema=None, replication_key=None, is_view=None, database=None, table=None, row_count=None, diff --git a/singer/schema.py b/singer/schema.py index b4da4ac..2fcafd6 100644 --- a/singer/schema.py +++ b/singer/schema.py @@ -31,7 +31,7 @@ class Schema(): # pylint: disable=too-many-instance-attributes ''' - # pylint: disable=too-many-locals + # pylint: disable=too-many-locals,too-many-positional-arguments def __init__(self, type=None, format=None, properties=None, items=None, selected=None, inclusion=None, description=None, minimum=None, maximum=None, exclusiveMinimum=None, exclusiveMaximum=None, From 1e0bccb34b1d2e2e346648c013a0e534a1727911 Mon Sep 17 00:00:00 2001 From: Ben Allred Date: Fri, 19 Sep 2025 09:58:13 -0600 Subject: [PATCH 36/45] add json schema generation (#175) Co-authored-by: Dylan Sprayberry --- CHANGELOG.md | 3 ++ setup.py | 2 +- singer/schema_generation.py | 92 +++++++++++++++++++++++++++++++++ tests/test_catalog.py | 6 +-- tests/test_exceptions.py | 24 ++++----- tests/test_schema.py | 28 +++++----- tests/test_schema_generation.py | 76 +++++++++++++++++++++++++++ tests/test_transform.py | 48 ++++++++--------- 8 files changed, 225 insertions(+), 54 deletions(-) create mode 100644 singer/schema_generation.py create mode 100644 tests/test_schema_generation.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a06bd4..59b0bd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 6.2.0 + * Adds json schema generation [#175](https://github.com/singer-io/singer-python/pull/175) + ## 6.1.0 * Make ensure_ascii Dynamic with Default Set to True in JSON Serialization. Required to handle the special characters [#168](https://github.com/singer-io/singer-python/pull/168) diff --git a/setup.py b/setup.py index c48fb80..4435246 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='6.1.1', + version='6.2.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/schema_generation.py b/singer/schema_generation.py new file mode 100644 index 0000000..3d11f74 --- /dev/null +++ b/singer/schema_generation.py @@ -0,0 +1,92 @@ +import dateutil.parser + + +def add_observation(acc, path): + + node = acc + for i in range(0, len(path) - 1): + k = path[i] + if k not in node: + node[k] = {} + node = node[k] + + node[path[-1]] = True + +# pylint: disable=too-many-branches +def add_observations(acc, path, data): + if isinstance(data, dict): + for key in data: + add_observations(acc, path + ["object", key], data[key]) + elif isinstance(data, list): + for item in data: + add_observations(acc, path + ["array"], item) + elif isinstance(data, str): + # If the string parses as a date, add an observation that its a date + try: + data = dateutil.parser.parse(data) + except (dateutil.parser.ParserError, OverflowError): + data = None + if data: + add_observation(acc, path + ["date"]) + else: + add_observation(acc, path + ["string"]) + + elif isinstance(data, bool): + add_observation(acc, path + ["boolean"]) + elif isinstance(data, int): + add_observation(acc, path + ["integer"]) + elif isinstance(data, float): + add_observation(acc, path + ["number"]) + elif data is None: + add_observation(acc, path + ["null"]) + else: + raise Exception("Unexpected value " + repr(data) + " at path " + repr(path)) + + return acc + +def to_json_schema(obs): + result = {'type': ['null']} + + for key in obs: + + if key == 'object': + result['type'] += ['object'] + if 'properties' not in result: + result['properties'] = {} + for obj_key in obs['object']: + result['properties'][obj_key] = to_json_schema(obs['object'][obj_key]) + + elif key == 'array': + result['type'] += ['array'] + result['items'] = to_json_schema(obs['array']) + + elif key == 'date': + result['type'] += ['string'] + result['format'] = 'date-time' + elif key == 'string': + result['type'] += ['string'] + + elif key == 'boolean': + result['type'] += ['boolean'] + + elif key == 'integer': + result['type'] += ['integer'] + + elif key == 'number': + # Use type=string, format=singer.decimal + result['type'] += ['string'] + result['format'] = 'singer.decimal' + + elif key == 'null': + pass + + else: + raise Exception("Unexpected data type " + key) + + return result + +def generate_schema(records): + obs = {} + for record in records: + obs = add_observations(obs, [], record) + return to_json_schema(obs) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index cd6dc50..8a72e1a 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -25,7 +25,7 @@ def test_one_selected_stream(self): CatalogEntry(tap_stream_id='c',schema=Schema(),metadata=[])]) state = {} selected_streams = catalog.get_selected_streams(state) - self.assertEquals([e for e in selected_streams],[selected_entry]) + self.assertEqual([e for e in selected_streams],[selected_entry]) def test_resumes_currently_syncing_stream(self): selected_entry_a = CatalogEntry(tap_stream_id='a', @@ -44,7 +44,7 @@ def test_resumes_currently_syncing_stream(self): selected_entry_c]) state = {'currently_syncing': 'c'} selected_streams = catalog.get_selected_streams(state) - self.assertEquals([e for e in selected_streams][0],selected_entry_c) + self.assertEqual([e for e in selected_streams][0],selected_entry_c) class TestToDictAndFromDict(unittest.TestCase): @@ -141,4 +141,4 @@ def test(self): CatalogEntry(tap_stream_id='b'), CatalogEntry(tap_stream_id='c')]) entry = catalog.get_stream('b') - self.assertEquals('b', entry.tap_stream_id) + self.assertEqual('b', entry.tap_stream_id) diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index 491595f..50cf7a1 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -14,8 +14,8 @@ def test_SingerError_prints_correctly(self): raise SingerError(error_text) expected_text = "SingerError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerConfigurationError_prints_correctly(self): error_text = "An error occured" @@ -24,8 +24,8 @@ def test_SingerConfigurationError_prints_correctly(self): raise SingerConfigurationError(error_text) expected_text = "SingerConfigurationError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerDiscoveryError_prints_correctly(self): error_text = "An error occured" @@ -34,8 +34,8 @@ def test_SingerDiscoveryError_prints_correctly(self): raise SingerDiscoveryError(error_text) expected_text = "SingerDiscoveryError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerSyncError_prints_correctly(self): error_text = "An error occured" @@ -44,8 +44,8 @@ def test_SingerSyncError_prints_correctly(self): raise SingerSyncError(error_text) expected_text = "SingerSyncError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerRetryableRequestError_prints_correctly(self): error_text = "An error occured" @@ -54,8 +54,8 @@ def test_SingerRetryableRequestError_prints_correctly(self): raise SingerRetryableRequestError(error_text) expected_text = "SingerRetryableRequestError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerError_prints_multiple_lines_correctly(self): error_text = "\n".join(["Line 1", "Line 2", "Line 3"]) @@ -64,5 +64,5 @@ def test_SingerError_prints_multiple_lines_correctly(self): raise SingerError(error_text) expected_text = "SingerError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) diff --git a/tests/test_schema.py b/tests/test_schema.py index fa28bac..5682755 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -44,38 +44,38 @@ class TestSchema(unittest.TestCase): additionalProperties=True) def test_string_to_dict(self): - self.assertEquals(self.string_dict, self.string_obj.to_dict()) + self.assertEqual(self.string_dict, self.string_obj.to_dict()) def test_integer_to_dict(self): - self.assertEquals(self.integer_dict, self.integer_obj.to_dict()) + self.assertEqual(self.integer_dict, self.integer_obj.to_dict()) def test_array_to_dict(self): - self.assertEquals(self.array_dict, self.array_obj.to_dict()) + self.assertEqual(self.array_dict, self.array_obj.to_dict()) def test_object_to_dict(self): - self.assertEquals(self.object_dict, self.object_obj.to_dict()) + self.assertEqual(self.object_dict, self.object_obj.to_dict()) def test_string_from_dict(self): - self.assertEquals(self.string_obj, Schema.from_dict(self.string_dict)) + self.assertEqual(self.string_obj, Schema.from_dict(self.string_dict)) def test_integer_from_dict(self): - self.assertEquals(self.integer_obj, Schema.from_dict(self.integer_dict)) + self.assertEqual(self.integer_obj, Schema.from_dict(self.integer_dict)) def test_array_from_dict(self): - self.assertEquals(self.array_obj, Schema.from_dict(self.array_dict)) + self.assertEqual(self.array_obj, Schema.from_dict(self.array_dict)) def test_object_from_dict(self): - self.assertEquals(self.object_obj, Schema.from_dict(self.object_dict)) + self.assertEqual(self.object_obj, Schema.from_dict(self.object_dict)) def test_repr_atomic(self): - self.assertEquals(self.string_obj, eval(repr(self.string_obj))) + self.assertEqual(self.string_obj, eval(repr(self.string_obj))) def test_repr_recursive(self): - self.assertEquals(self.object_obj, eval(repr(self.object_obj))) + self.assertEqual(self.object_obj, eval(repr(self.object_obj))) def test_object_from_dict_with_defaults(self): schema = Schema.from_dict(self.object_dict, inclusion='automatic') - self.assertEquals('whatever', schema.inclusion, - msg='The schema value should override the default') - self.assertEquals('automatic', schema.properties['a_string'].inclusion) - self.assertEquals('automatic', schema.properties['an_array'].items.inclusion) + self.assertEqual('whatever', schema.inclusion, + msg='The schema value should override the default') + self.assertEqual('automatic', schema.properties['a_string'].inclusion) + self.assertEqual('automatic', schema.properties['an_array'].items.inclusion) diff --git a/tests/test_schema_generation.py b/tests/test_schema_generation.py new file mode 100644 index 0000000..5e00738 --- /dev/null +++ b/tests/test_schema_generation.py @@ -0,0 +1,76 @@ +import unittest +from singer.schema_generation import generate_schema + +class TestSchemaGeneration(unittest.TestCase): + def test_simple_schema(self): + records = [{'a': 1, 'b': 'two', 'c': True, 'dt': '2000-01-01T00:11:22Z'}] + expected_schema = { + 'type': ['null', 'object'], + 'properties': { + 'a': {'type': ['null', 'integer']}, + 'b': {'type': ['null', 'string']}, + 'c': {'type': ['null', 'boolean']}, + 'dt': {'type': ['null', 'string'], 'format': 'date-time'} + } + } + self.assertEqual(expected_schema, generate_schema(records)) + + def test_mix_n_match_records_schema(self): + records = [ + {'a': 1, 'b': 'b'}, + {'a': 'two', 'c': 7, 'd': [1, 'two']}, + {'a': True, 'c': 7.7, 'd': {'one': 1, 'two': 'two'}} + ] + expected_schema = { + 'type': ['null', 'object'], + 'properties': { + 'a': {'type': {'null', 'integer', 'string', 'boolean'}}, + 'b': {'type': ['null', 'string']}, + 'c': {'type': {'null', 'integer', 'string'}, 'format': 'singer.decimal'}, + 'd': { + 'type': {'null', 'array', 'object'}, + 'items': {'type': {'null', 'integer', 'string'}}, + 'properties': {'one': {'type': ['null', 'integer']}, + 'two': {'type': ['null', 'string']}} + + } + } + } + actual_schema = generate_schema(records) + actual_schema['properties']['a']['type'] = set(actual_schema['properties']['a']['type']) + actual_schema['properties']['c']['type'] = set(actual_schema['properties']['c']['type']) + actual_schema['properties']['d']['type'] = set(actual_schema['properties']['d']['type']) + actual_schema['properties']['d']['items']['type'] = set(actual_schema['properties']['d']['items']['type']) + self.assertEqual(expected_schema, actual_schema) + + def test_nested_structue_schema(self): + records = [{'a': {'b': {'c': [{'d': 7}]}, 'e': [[1, 2, 3]]}}] + expected_schema = { + 'type': ['null', 'object'], + 'properties': { + 'a': { + 'type': ['null', 'object'], + 'properties': { + 'b': { + 'type': ['null', 'object'], + 'properties': { + 'c': { + 'type': ['null', 'array'], + 'items': { + 'type': ['null', 'object'], + 'properties': {'d': {'type': ['null', 'integer']}} + } + } + } + }, + 'e': { + 'type': ['null', 'array'], + 'items': { + 'type': ['null', 'array'], + 'items': {'type': ['null', 'integer']}} + } + } + } + } + } + self.assertEqual(expected_schema, generate_schema(records)) diff --git a/tests/test_transform.py b/tests/test_transform.py index 959c4b8..96e0c3b 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -264,11 +264,11 @@ def test_decimal_types_transform(self): nan = {'percentage': decimal.Decimal('NaN')} snan = {'percentage': decimal.Decimal('sNaN')} - self.assertEquals(inf, transform(inf, schema)) - self.assertEquals(negative_inf, transform(negative_inf, schema)) - self.assertEquals({'percentage': '1.4142135623730951'}, transform(root2, schema)) - self.assertEquals({'percentage': 'NaN'}, transform(nan, schema)) - self.assertEquals({'percentage': 'NaN'}, transform(snan, schema)) + self.assertEqual(inf, transform(inf, schema)) + self.assertEqual(negative_inf, transform(negative_inf, schema)) + self.assertEqual({'percentage': '1.4142135623730951'}, transform(root2, schema)) + self.assertEqual({'percentage': 'NaN'}, transform(nan, schema)) + self.assertEqual({'percentage': 'NaN'}, transform(snan, schema)) str1 = {'percentage':'0.1'} @@ -276,11 +276,11 @@ def test_decimal_types_transform(self): str3 = {'percentage': '1E+13'} str4 = {'percentage': '100'} str5 = {'percentage': '-100'} - self.assertEquals(str1, transform(str1, schema)) - self.assertEquals({'percentage': '1E-13'}, transform(str2, schema)) - self.assertEquals({'percentage': '1E+13'}, transform(str3, schema)) - self.assertEquals({'percentage': '100'}, transform(str4, schema)) - self.assertEquals({'percentage': '-100'}, transform(str5, schema)) + self.assertEqual(str1, transform(str1, schema)) + self.assertEqual({'percentage': '1E-13'}, transform(str2, schema)) + self.assertEqual({'percentage': '1E+13'}, transform(str3, schema)) + self.assertEqual({'percentage': '100'}, transform(str4, schema)) + self.assertEqual({'percentage': '-100'}, transform(str5, schema)) float1 = {'percentage': 12.0000000000000000000000000001234556} float2 = {'percentage': 0.0123} @@ -288,28 +288,28 @@ def test_decimal_types_transform(self): float4 = {'percentage': -100.0123} float5 = {'percentage': 0.000001} float6 = {'percentage': 0.0000001} - self.assertEquals({'percentage':'12.0'}, transform(float1, schema)) - self.assertEquals({'percentage':'0.0123'}, transform(float2, schema)) - self.assertEquals({'percentage':'100.0123'}, transform(float3, schema)) - self.assertEquals({'percentage':'-100.0123'}, transform(float4, schema)) - self.assertEquals({'percentage':'0.000001'}, transform(float5, schema)) - self.assertEquals({'percentage':'1E-7'}, transform(float6, schema)) + self.assertEqual({'percentage':'12.0'}, transform(float1, schema)) + self.assertEqual({'percentage':'0.0123'}, transform(float2, schema)) + self.assertEqual({'percentage':'100.0123'}, transform(float3, schema)) + self.assertEqual({'percentage':'-100.0123'}, transform(float4, schema)) + self.assertEqual({'percentage':'0.000001'}, transform(float5, schema)) + self.assertEqual({'percentage':'1E-7'}, transform(float6, schema)) int1 = {'percentage': 123} int2 = {'percentage': 0} int3 = {'percentage': -1000} - self.assertEquals({'percentage':'123'}, transform(int1, schema)) - self.assertEquals({'percentage':'0'}, transform(int2, schema)) - self.assertEquals({'percentage':'-1000'}, transform(int3, schema)) + self.assertEqual({'percentage':'123'}, transform(int1, schema)) + self.assertEqual({'percentage':'0'}, transform(int2, schema)) + self.assertEqual({'percentage':'-1000'}, transform(int3, schema)) dec1 = {'percentage': decimal.Decimal('1.1010101')} dec2 = {'percentage': decimal.Decimal('.111111111111111111111111')} dec3 = {'percentage': decimal.Decimal('-.111111111111111111111111')} dec4 = {'percentage': decimal.Decimal('100')} - self.assertEquals({'percentage':'1.1010101'}, transform(dec1, schema)) - self.assertEquals({'percentage':'0.111111111111111111111111'}, transform(dec2, schema)) - self.assertEquals({'percentage':'-0.111111111111111111111111'}, transform(dec3, schema)) - self.assertEquals({'percentage':'100'}, transform(dec4, schema)) + self.assertEqual({'percentage':'1.1010101'}, transform(dec1, schema)) + self.assertEqual({'percentage':'0.111111111111111111111111'}, transform(dec2, schema)) + self.assertEqual({'percentage':'-0.111111111111111111111111'}, transform(dec3, schema)) + self.assertEqual({'percentage':'100'}, transform(dec4, schema)) bad1 = {'percentage': 'fsdkjl'} with self.assertRaises(SchemaMismatch): @@ -317,7 +317,7 @@ def test_decimal_types_transform(self): badnull = {'percentage': None} with self.assertRaises(SchemaMismatch): - self.assertEquals({'percentage':None}, transform(badnull, schema)) + self.assertEqual({'percentage':None}, transform(badnull, schema)) class TestTransformsWithMetadata(unittest.TestCase): From 3cbe7ca68596b51e38564b2193873765afb4bd86 Mon Sep 17 00:00:00 2001 From: Ben Allred Date: Thu, 2 Oct 2025 12:37:23 -0600 Subject: [PATCH 37/45] Sac 28668 fix transform and schema (#177) * handle empty arrays and fields that could be either formatted or nested * remove ipdb * bump version and add changelog entry * handle string parsing similar to existin tap-s3 logic * fix syntax error * fix bad tests --- CHANGELOG.md | 6 ++++++ setup.py | 2 +- singer/schema_generation.py | 30 ++++++++++++++++++++++-------- singer/transform.py | 4 ++-- tests/test_transform.py | 4 ++-- 5 files changed, 33 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59b0bd4..1080b69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 6.2.1 + * Fixes json schema generation to not treat numbers as dates + * Fixes json schema generation to handle empty arrays + * Fixes record transformation to handle fields that could be either formatted string or nested data structure + * [#177](https://github.com/singer-io/singer-python/pull/177) + ## 6.2.0 * Adds json schema generation [#175](https://github.com/singer-io/singer-python/pull/175) diff --git a/setup.py b/setup.py index 4435246..c34951f 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='6.2.0', + version='6.2.1', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/schema_generation.py b/singer/schema_generation.py index 3d11f74..1b73388 100644 --- a/singer/schema_generation.py +++ b/singer/schema_generation.py @@ -18,19 +18,33 @@ def add_observations(acc, path, data): for key in data: add_observations(acc, path + ["object", key], data[key]) elif isinstance(data, list): + if len(data) == 0: + add_observations(acc, path + ["array"], None) for item in data: add_observations(acc, path + ["array"], item) elif isinstance(data, str): - # If the string parses as a date, add an observation that its a date try: - data = dateutil.parser.parse(data) - except (dateutil.parser.ParserError, OverflowError): - data = None - if data: + # If the string parses as a int, add an observation that it's a integer + int(data) + add_observation(acc, path + ["integer"]) + return acc + except (ValueError, TypeError): + pass + try: + # If the string parses as a float, add an observation that it's a number + float(data) + add_observation(acc, path + ["number"]) + return acc + except (ValueError, TypeError): + pass + try: + # If the string parses as a date, add an observation that it's a date + dateutil.parser.parse(data) add_observation(acc, path + ["date"]) - else: - add_observation(acc, path + ["string"]) - + return acc + except (dateutil.parser.ParserError, OverflowError): + pass + add_observation(acc, path + ["string"]) elif isinstance(data, bool): add_observation(acc, path + ["boolean"]) elif isinstance(data, int): diff --git a/singer/transform.py b/singer/transform.py index 69f812a..f125fd5 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -266,13 +266,13 @@ def _transform(self, data, typ, schema, path): else: return False, None - elif schema.get("format") == "date-time": + elif typ == "string" and schema.get("format") == "date-time": data = self._transform_datetime(data) if data is None: return False, None return True, data - elif schema.get("format") == "singer.decimal": + elif typ == "string" and schema.get("format") == "singer.decimal": if data is None: return False, None diff --git a/tests/test_transform.py b/tests/test_transform.py index 96e0c3b..308ac4e 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -25,7 +25,7 @@ def test_nested_transform(self): def test_multi_type_object_transform(self): schema = {"type": ["null", "object", "string"], - "properties": {"whatever": {"type": "date-time", + "properties": {"whatever": {"type": "string", "format": "date-time"}}} data = {"whatever": "2017-01-01"} expected = {"whatever": "2017-01-01T00:00:00.000000Z"} @@ -36,7 +36,7 @@ def test_multi_type_object_transform(self): def test_multi_type_array_transform(self): schema = {"type": ["null", "array", "integer"], - "items": {"type": "date-time", "format": "date-time"}} + "items": {"type": "string", "format": "date-time"}} data = ["2017-01-01"] expected = ["2017-01-01T00:00:00.000000Z"] self.assertEqual(expected, transform(data, schema)) From bb412f4795b85b8f932ad3fb958c4fbc17667581 Mon Sep 17 00:00:00 2001 From: Ben Allred Date: Wed, 8 Oct 2025 14:59:17 -0600 Subject: [PATCH 38/45] SAC-28668: update schema generation v6 (#180) * use `anyOf` when multiple types are found * fix test * Update schema generation and bump version for v6 deploy Co-authored-by: Bryant Gray Co-authored-by: Andres Pineda * Grab error list changes from v5 Co-authored-by: Bryant Gray Co-authored-by: Andres Pineda * Fix linting error Co-authored-by: Bryant Gray --------- Co-authored-by: Bryant Gray Co-authored-by: Andres Pineda --- CHANGELOG.md | 6 ++++++ setup.py | 2 +- singer/schema_generation.py | 31 +++++++++++++++---------------- singer/transform.py | 2 ++ tests/test_schema_generation.py | 30 +++++++++++++----------------- 5 files changed, 37 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1080b69..18a4ea5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 6.2.2 + * Updates json schema generation to not emit dates + * Handle multiple schemas with anyOf and emit them in a specific order + * Do not emit error messages when checking multiple schemas and a subsequent schema passes + * [#179](https://github.com/singer-io/singer-python/pull/179) + ## 6.2.1 * Fixes json schema generation to not treat numbers as dates * Fixes json schema generation to handle empty arrays diff --git a/setup.py b/setup.py index c34951f..c7ea106 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='6.2.1', + version='6.2.2', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/schema_generation.py b/singer/schema_generation.py index 1b73388..003a16f 100644 --- a/singer/schema_generation.py +++ b/singer/schema_generation.py @@ -1,6 +1,3 @@ -import dateutil.parser - - def add_observation(acc, path): node = acc @@ -37,13 +34,6 @@ def add_observations(acc, path, data): return acc except (ValueError, TypeError): pass - try: - # If the string parses as a date, add an observation that it's a date - dateutil.parser.parse(data) - add_observation(acc, path + ["date"]) - return acc - except (dateutil.parser.ParserError, OverflowError): - pass add_observation(acc, path + ["string"]) elif isinstance(data, bool): add_observation(acc, path + ["boolean"]) @@ -59,9 +49,13 @@ def add_observations(acc, path, data): return acc def to_json_schema(obs): - result = {'type': ['null']} + types = [] + # add schema types in a specific order to anyOf list + for key in ['array', 'object', 'number', 'integer', 'boolean', 'string', 'null']: + if key not in obs: + continue - for key in obs: + result = {'type': ['null']} if key == 'object': result['type'] += ['object'] @@ -74,9 +68,6 @@ def to_json_schema(obs): result['type'] += ['array'] result['items'] = to_json_schema(obs['array']) - elif key == 'date': - result['type'] += ['string'] - result['format'] = 'date-time' elif key == 'string': result['type'] += ['string'] @@ -97,7 +88,15 @@ def to_json_schema(obs): else: raise Exception("Unexpected data type " + key) - return result + types.append(result) + + if len(types) == 0: + return {'type': ['null', 'string']} + + if len(types) == 1: + return types[0] + + return {'anyOf': types} def generate_schema(records): obs = {} diff --git a/singer/transform.py b/singer/transform.py index f125fd5..3a9fc96 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -185,6 +185,8 @@ def _transform_anyof(self, data, schema, path): success, transformed_data = self.transform_recur(data, subschema, path) if success: return success, transformed_data + else: + self.errors.pop() else: # pylint: disable=useless-else-on-loop # exhaused all schemas and didn't return, so we failed :-( self.errors.append(Error(path, data, schema, logging_level=LOGGER.level)) diff --git a/tests/test_schema_generation.py b/tests/test_schema_generation.py index 5e00738..610ac20 100644 --- a/tests/test_schema_generation.py +++ b/tests/test_schema_generation.py @@ -10,7 +10,7 @@ def test_simple_schema(self): 'a': {'type': ['null', 'integer']}, 'b': {'type': ['null', 'string']}, 'c': {'type': ['null', 'boolean']}, - 'dt': {'type': ['null', 'string'], 'format': 'date-time'} + 'dt': {'type': ['null', 'string']} } } self.assertEqual(expected_schema, generate_schema(records)) @@ -23,24 +23,20 @@ def test_mix_n_match_records_schema(self): ] expected_schema = { 'type': ['null', 'object'], - 'properties': { - 'a': {'type': {'null', 'integer', 'string', 'boolean'}}, - 'b': {'type': ['null', 'string']}, - 'c': {'type': {'null', 'integer', 'string'}, 'format': 'singer.decimal'}, - 'd': { - 'type': {'null', 'array', 'object'}, - 'items': {'type': {'null', 'integer', 'string'}}, - 'properties': {'one': {'type': ['null', 'integer']}, - 'two': {'type': ['null', 'string']}} - - } - } + 'properties': {'a': {'anyOf': [{'type': ['null', 'integer']}, + {'type': ['null', 'boolean']}, + {'type': ['null', 'string']}]}, + 'b': {'type': ['null', 'string']}, + 'c': {'anyOf': [{'type': ['null', 'string'], 'format': 'singer.decimal'}, + {'type': ['null', 'integer']}]}, + 'd': {'anyOf': [{'type': ['null', 'array'], + 'items': {'anyOf': [{'type': ['null', 'integer']}, + {'type': ['null', 'string']}]}}, + {'type': ['null', 'object'], + 'properties': {'one': {'type': ['null', 'integer']}, + 'two': {'type': ['null', 'string']}}}]}} } actual_schema = generate_schema(records) - actual_schema['properties']['a']['type'] = set(actual_schema['properties']['a']['type']) - actual_schema['properties']['c']['type'] = set(actual_schema['properties']['c']['type']) - actual_schema['properties']['d']['type'] = set(actual_schema['properties']['d']['type']) - actual_schema['properties']['d']['items']['type'] = set(actual_schema['properties']['d']['items']['type']) self.assertEqual(expected_schema, actual_schema) def test_nested_structue_schema(self): From 9145ecb84f65c279a13e9eea5b2b704cb967e2c1 Mon Sep 17 00:00:00 2001 From: Ben Allred Date: Wed, 15 Oct 2025 13:58:57 -0600 Subject: [PATCH 39/45] Default to string for schema generation (#182) Co-authored-by: Dylan Sprayberry Co-authored-by: Bryant Gray Co-authored-by: Andres Pineda --- CHANGELOG.md | 3 +++ setup.py | 2 +- singer/schema_generation.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18a4ea5..43ce58e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 6.2.3 + * Default type for non-standard data types is string [#182](https://github.com/singer-io/singer-python/pull/182) + ## 6.2.2 * Updates json schema generation to not emit dates * Handle multiple schemas with anyOf and emit them in a specific order diff --git a/setup.py b/setup.py index c7ea106..9940901 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='6.2.2', + version='6.2.3', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/schema_generation.py b/singer/schema_generation.py index 003a16f..177d659 100644 --- a/singer/schema_generation.py +++ b/singer/schema_generation.py @@ -44,7 +44,7 @@ def add_observations(acc, path, data): elif data is None: add_observation(acc, path + ["null"]) else: - raise Exception("Unexpected value " + repr(data) + " at path " + repr(path)) + add_observation(acc, path + ["string"]) return acc From ccf2266b3dce333d9865d64878ebf4e0855833d8 Mon Sep 17 00:00:00 2001 From: Sourabh Gandhi <105213416+sgandhi1311@users.noreply.github.com> Date: Wed, 22 Oct 2025 19:08:01 +0530 Subject: [PATCH 40/45] Support allow_nan in message JSON output (#183) * allow nan values to replicate * update setup and changelog * make pylint happy * add test cases for allow nan --- CHANGELOG.md | 3 +++ setup.py | 2 +- singer/messages.py | 13 ++++++--- tests/test_transform.py | 60 ++++++++++++++++++++++++++++++++++++++++- 4 files changed, 72 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 43ce58e..a878fa9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 6.3.0 + * Support allow_nan in message JSON output [#183](https://github.com/singer-io/singer-python/pull/183) + ## 6.2.3 * Default type for non-standard data types is string [#182](https://github.com/singer-io/singer-python/pull/182) diff --git a/setup.py b/setup.py index 9940901..a25f30b 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='6.2.3', + version='6.3.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/messages.py b/singer/messages.py index de6e076..941670c 100644 --- a/singer/messages.py +++ b/singer/messages.py @@ -218,12 +218,17 @@ def parse_message(msg): return None -def format_message(message, ensure_ascii=True): - return json.dumps(message.asdict(), use_decimal=True, ensure_ascii=ensure_ascii) +def format_message(message, ensure_ascii=True, allow_nan=False): + return json.dumps( + message.asdict(), + use_decimal=True, + ensure_ascii=ensure_ascii, + allow_nan=allow_nan + ) -def write_message(message, ensure_ascii=True): - sys.stdout.write(format_message(message, ensure_ascii=ensure_ascii) + '\n') +def write_message(message, ensure_ascii=True, allow_nan=False): + sys.stdout.write(format_message(message, ensure_ascii=ensure_ascii, allow_nan=allow_nan) + '\n') sys.stdout.flush() diff --git a/tests/test_transform.py b/tests/test_transform.py index 308ac4e..b398e93 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -1,9 +1,12 @@ +import io +import sys import unittest import decimal +import simplejson as json +import singer.messages as messages from singer import transform from singer.transform import * - class TestTransform(unittest.TestCase): def test_integer_transform(self): schema = {'type': 'integer'} @@ -486,3 +489,58 @@ def test_pattern_properties_match_multiple(self): dict_value = {"name": "chicken", "unit_cost": 1.45, "SKU": '123456'} expected = dict(dict_value) self.assertEqual(expected, transform(dict_value, schema)) + +class DummyMessage: + """A dummy message object with an asdict() method.""" + def __init__(self, value): + self.value = value + + def asdict(self): + return {"value": self.value} + + +class TestAllowNan(unittest.TestCase): + """Unit tests for allow_nan support in singer.messages.""" + + def test_format_message_allow_nan_true(self): + """Should serialize NaN successfully when allow_nan=True.""" + msg = DummyMessage(float("nan")) + result = messages.format_message(msg, allow_nan=True) + + # The output JSON should contain NaN literal (not quoted) + self.assertIn("NaN", result) + + # Replace NaN with null to make it valid JSON for parsing check + json.loads(result.replace("NaN", "null")) + + def test_format_message_allow_nan_false(self): + """Should raise ValueError when allow_nan=False and value is NaN.""" + msg = DummyMessage(float("nan")) + with self.assertRaises(ValueError): + messages.format_message(msg, allow_nan=False) + + def test_write_message_allow_nan_true(self): + """Should write to stdout successfully when allow_nan=True.""" + msg = DummyMessage(float("nan")) + fake_stdout = io.StringIO() + original_stdout = sys.stdout + sys.stdout = fake_stdout + try: + messages.write_message(msg, allow_nan=True) + output = fake_stdout.getvalue() + self.assertIn("NaN", output) + self.assertTrue(output.endswith("\n")) + finally: + sys.stdout = original_stdout + + def test_write_message_allow_nan_false(self): + """Should raise ValueError when allow_nan=False and message has NaN.""" + msg = DummyMessage(float("nan")) + fake_stdout = io.StringIO() + original_stdout = sys.stdout + sys.stdout = fake_stdout + try: + with self.assertRaises(ValueError): + messages.write_message(msg, allow_nan=False) + finally: + sys.stdout = original_stdout From 6ae25be46879066d95d881de857c9a15226b0b1a Mon Sep 17 00:00:00 2001 From: Ben Allred Date: Mon, 2 Feb 2026 11:39:23 -0700 Subject: [PATCH 41/45] SAC-29666: Update clear_offset to remove offset key from bookmark (#185) * Update clear_offset to remove offset key from bookmark ----------------------------- Co-authored-by: Ben Allred * bump to version 6.4.0 ----------------------------- Co-authored-by: Ben Allred --------- Co-authored-by: Leslie VanDeMark --- CHANGELOG.md | 3 +++ setup.py | 2 +- singer/bookmarks.py | 4 +--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a878fa9..5b5fbd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 6.4.0 + * Update clear_offset to remove offset key from bookmark [#185](https://github.com/singer-io/singer-python/pull/185) + ## 6.3.0 * Support allow_nan in message JSON output [#183](https://github.com/singer-io/singer-python/pull/183) diff --git a/setup.py b/setup.py index a25f30b..5b8d135 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='6.3.0', + version='6.4.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/bookmarks.py b/singer/bookmarks.py index fc6d7ca..40aa927 100644 --- a/singer/bookmarks.py +++ b/singer/bookmarks.py @@ -31,9 +31,7 @@ def set_offset(state, tap_stream_id, offset_key, offset_value): return state def clear_offset(state, tap_stream_id): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id, "offset"]) - state['bookmarks'][tap_stream_id]["offset"] = {} - return state + return clear_bookmark(state, tap_stream_id, "offset") def get_offset(state, tap_stream_id, default=None): return state.get('bookmarks', {}).get(tap_stream_id, {}).get("offset", default) From d4a74494f08b55bf5b7962bbe0b3eb9abb895d27 Mon Sep 17 00:00:00 2001 From: Leslie VanDeMark <38043390+leslievandemark@users.noreply.github.com> Date: Mon, 9 Feb 2026 15:43:19 -0500 Subject: [PATCH 42/45] Add activate_versions key to state (#188) * Add activate_versions state modification fns, new state.py file ----------------------------- Co-authored-by: Ben Allred * change deprecation annotation to comment ----------------------------- Co-authored-by: Ben Allred * bump version to 6.5.0, changelog update ----------------------------- Co-authored-by: Ben Allred * whitespace cleanup ----------------------------- Co-authored-by: Ben Allred --------- Co-authored-by: Ben Allred --- CHANGELOG.md | 4 + setup.py | 2 +- singer/bookmarks.py | 39 +++---- singer/state.py | 60 ++++++++++ tests/test_bookmarks.py | 166 ---------------------------- tests/test_state.py | 235 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 314 insertions(+), 192 deletions(-) create mode 100644 singer/state.py delete mode 100644 tests/test_bookmarks.py create mode 100644 tests/test_state.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b5fbd4..155f5af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 6.5.0 + * Add `activate_versions` state functions [#188](https://github.com/singer-io/singer-python/pull/188) + * Deprecates bookmarks.py, functions are moved to state.py + ## 6.4.0 * Update clear_offset to remove offset key from bookmark [#185](https://github.com/singer-io/singer-python/pull/185) diff --git a/setup.py b/setup.py index 5b8d135..9005c39 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='6.4.0', + version='6.5.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/bookmarks.py b/singer/bookmarks.py index 40aa927..2cd9636 100644 --- a/singer/bookmarks.py +++ b/singer/bookmarks.py @@ -1,44 +1,33 @@ -def ensure_bookmark_path(state, path): - submap = state - for path_component in path: - if submap.get(path_component) is None: - submap[path_component] = {} +from singer import state as st + +## Note - This file is deprecated, use state.py functions. - submap = submap[path_component] - return state +def ensure_bookmark_path(state, path): + return st.ensure_state_path(state, path) def write_bookmark(state, tap_stream_id, key, val): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id]) - state['bookmarks'][tap_stream_id][key] = val - return state + return st.write_bookmark(state, tap_stream_id, key, val) def clear_bookmark(state, tap_stream_id, key): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id]) - state['bookmarks'][tap_stream_id].pop(key, None) - return state + return st.clear_bookmark(state, tap_stream_id, key) def reset_stream(state, tap_stream_id): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id]) - state['bookmarks'][tap_stream_id] = {} - return state + return st.reset_stream(state, tap_stream_id) def get_bookmark(state, tap_stream_id, key, default=None): - return state.get('bookmarks', {}).get(tap_stream_id, {}).get(key, default) + return st.get_bookmark(state, tap_stream_id, key, default) def set_offset(state, tap_stream_id, offset_key, offset_value): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id, "offset", offset_key]) - state['bookmarks'][tap_stream_id]["offset"][offset_key] = offset_value - return state + return st.set_offset(state, tap_stream_id, offset_key, offset_value) def clear_offset(state, tap_stream_id): - return clear_bookmark(state, tap_stream_id, "offset") + return st.clear_offset(state, tap_stream_id) def get_offset(state, tap_stream_id, default=None): - return state.get('bookmarks', {}).get(tap_stream_id, {}).get("offset", default) + return st.get_offset(state, tap_stream_id, default) def set_currently_syncing(state, tap_stream_id): - state['currently_syncing'] = tap_stream_id - return state + return st.set_currently_syncing(state, tap_stream_id) def get_currently_syncing(state, default=None): - return state.get('currently_syncing', default) + return st.get_currently_syncing(state, default) diff --git a/singer/state.py b/singer/state.py new file mode 100644 index 0000000..772a919 --- /dev/null +++ b/singer/state.py @@ -0,0 +1,60 @@ +def ensure_state_path(state, path): + submap = state + for path_component in path: + if submap.get(path_component) is None: + submap[path_component] = {} + + submap = submap[path_component] + return state + +def write_bookmark(state, tap_stream_id, key, val): + state = ensure_state_path(state, ['bookmarks', tap_stream_id]) + state['bookmarks'][tap_stream_id][key] = val + return state + +def clear_bookmark(state, tap_stream_id, key): + state = ensure_state_path(state, ['bookmarks', tap_stream_id]) + state['bookmarks'][tap_stream_id].pop(key, None) + return state + +def reset_stream(state, tap_stream_id): + state = ensure_state_path(state, ['bookmarks', tap_stream_id]) + state['bookmarks'][tap_stream_id] = {} + if 'activate_versions' in state: + state = ensure_state_path(state, ['activate_versions', tap_stream_id]) + state['activate_versions'][tap_stream_id] = {} + return state + +def get_bookmark(state, tap_stream_id, key, default=None): + return state.get('bookmarks', {}).get(tap_stream_id, {}).get(key, default) + +def set_offset(state, tap_stream_id, offset_key, offset_value): + state = ensure_state_path(state, ['bookmarks', tap_stream_id, "offset", offset_key]) + state['bookmarks'][tap_stream_id]["offset"][offset_key] = offset_value + return state + +def clear_offset(state, tap_stream_id): + return clear_bookmark(state, tap_stream_id, "offset") + +def get_offset(state, tap_stream_id, default=None): + return state.get('bookmarks', {}).get(tap_stream_id, {}).get("offset", default) + +def set_currently_syncing(state, tap_stream_id): + state['currently_syncing'] = tap_stream_id + return state + +def get_currently_syncing(state, default=None): + return state.get('currently_syncing', default) + +def write_version(state, tap_stream_id, key, val): + state = ensure_state_path(state, ['activate_versions', tap_stream_id]) + state['activate_versions'][tap_stream_id][key] = val + return state + +def clear_version(state, tap_stream_id, key): + state = ensure_state_path(state, ['activate_versions', tap_stream_id]) + state['activate_versions'][tap_stream_id].pop(key, None) + return state + +def get_version(state, tap_stream_id, key, default=None): + return state.get('activate_versions', {}).get(tap_stream_id, {}).get(key, default) diff --git a/tests/test_bookmarks.py b/tests/test_bookmarks.py deleted file mode 100644 index 4902105..0000000 --- a/tests/test_bookmarks.py +++ /dev/null @@ -1,166 +0,0 @@ -import unittest -from singer import bookmarks - -class TestGetBookmark(unittest.TestCase): - def test_empty_state(self): - empty_state = {} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_bookmark(empty_state, 'some_stream', 'my_key')) - - # Case with a given default - self.assertEqual(bookmarks.get_bookmark(empty_state, 'some_stream', 'my_key', 'default_value'), - 'default_value') - - def test_empty_bookmark(self): - empty_bookmark = {'bookmarks':{}} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_bookmark(empty_bookmark, 'some_stream', 'my_key')) - - # Case with a given default - self.assertEqual(bookmarks.get_bookmark(empty_bookmark, 'some_stream', 'my_key', 'default_value'), - 'default_value') - - def test_non_empty_state(self): - stream_id_1 = 'customers' - bookmark_key_1 = 'datetime' - bookmark_val_1 = 123456789 - - non_empty_state = { - 'bookmarks' : { - stream_id_1 : { - bookmark_key_1 : bookmark_val_1 - } - } - } - - # - # Cases with no value to fall back on - # - - # Bad stream, bad key - self.assertIsNone(bookmarks.get_bookmark(non_empty_state, 'some_stream', 'my_key')) - - # Good stream, bad key - self.assertIsNone(bookmarks.get_bookmark(non_empty_state, stream_id_1, 'my_key')) - - # Good stream, good key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1), - bookmark_val_1) - - # - # Cases with a given default - # - - # Bad stream, bad key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, 'some_stream', 'my_key', 'default_value'), - 'default_value') - - # Bad stream, good key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, 'some_stream', bookmark_key_1, 'default_value'), - 'default_value') - - # Good stream, bad key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, stream_id_1, 'my_key', 'default_value'), - 'default_value') - - # Good stream, good key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1, 'default_value'), - bookmark_val_1) - - -class TestGetOffset(unittest.TestCase): - def test_empty_state(self): - empty_state = {} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_offset(empty_state, 'some_stream')) - - # Case with a given default - self.assertEqual(bookmarks.get_offset(empty_state, 'some_stream', 'default_value'), - 'default_value') - - def test_empty_bookmark(self): - empty_bookmark = {'bookmarks':{}} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_offset(empty_bookmark, 'some_stream')) - - # Case with a given default - self.assertEqual(bookmarks.get_offset(empty_bookmark, 'some_stream', 'default_value'), - 'default_value') - - def test_non_empty_state(self): - stream_id_1 = 'customers' - bookmark_key_1 = 'datetime' - bookmark_val_1 = 123456789 - offset_val = 'fizzy water' - - non_empty_state = { - 'bookmarks' : { - stream_id_1 : { - bookmark_key_1 : bookmark_val_1, - 'offset' : offset_val - } - } - } - - # - # Cases with no value to fall back on - # - - # Bad stream - self.assertIsNone(bookmarks.get_offset(non_empty_state, 'some_stream')) - - # Good stream - self.assertEqual(bookmarks.get_offset(non_empty_state, stream_id_1), - offset_val) - - # - # Case with a given default - # - - # Bad stream - self.assertEqual(bookmarks.get_offset(non_empty_state, 'some_stream', 'default_value'), - 'default_value') - - # Good stream - self.assertEqual(bookmarks.get_offset(non_empty_state, stream_id_1, 'default_value'), - offset_val) - - -class TestGetCurrentlySyncing(unittest.TestCase): - def test_empty_state(self): - empty_state = {} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_currently_syncing(empty_state)) - - # Case with a given default - self.assertEqual(bookmarks.get_currently_syncing(empty_state, 'default_value'), - 'default_value') - - def test_non_empty_state(self): - stream_id_1 = 'customers' - bookmark_key_1 = 'datetime' - bookmark_val_1 = 123456789 - offset_val = 'fizzy water' - - non_empty_state = { - 'bookmarks' : { - stream_id_1 : { - bookmark_key_1 : bookmark_val_1, - 'offset' : offset_val - } - }, - 'currently_syncing' : stream_id_1 - } - - # Case with no value to fall back on - self.assertEqual(bookmarks.get_currently_syncing(non_empty_state), - stream_id_1) - - # Case with a given default - self.assertEqual(bookmarks.get_currently_syncing(non_empty_state, 'default_value'), - stream_id_1) diff --git a/tests/test_state.py b/tests/test_state.py new file mode 100644 index 0000000..beb7b23 --- /dev/null +++ b/tests/test_state.py @@ -0,0 +1,235 @@ +import unittest +from singer import state as st + +class TestGetBookmark(unittest.TestCase): + def test_empty_state(self): + empty_state = {} + + # Case with no value to fall back on + self.assertIsNone(st.get_bookmark(empty_state, 'some_stream', 'my_key')) + + # Case with a given default + self.assertEqual(st.get_bookmark(empty_state, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + def test_empty_bookmark(self): + empty_bookmark = {'bookmarks':{}} + + # Case with no value to fall back on + self.assertIsNone(st.get_bookmark(empty_bookmark, 'some_stream', 'my_key')) + + # Case with a given default + self.assertEqual(st.get_bookmark(empty_bookmark, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + def test_non_empty_state(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + + non_empty_state = { + 'bookmarks' : { + stream_id_1 : { + bookmark_key_1 : bookmark_val_1 + } + } + } + + # + # Cases with no value to fall back on + # + + # Bad stream, bad key + self.assertIsNone(st.get_bookmark(non_empty_state, 'some_stream', 'my_key')) + + # Good stream, bad key + self.assertIsNone(st.get_bookmark(non_empty_state, stream_id_1, 'my_key')) + + # Good stream, good key + self.assertEqual(st.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1), + bookmark_val_1) + + # + # Cases with a given default + # + + # Bad stream, bad key + self.assertEqual(st.get_bookmark(non_empty_state, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + # Bad stream, good key + self.assertEqual(st.get_bookmark(non_empty_state, 'some_stream', bookmark_key_1, 'default_value'), + 'default_value') + + # Good stream, bad key + self.assertEqual(st.get_bookmark(non_empty_state, stream_id_1, 'my_key', 'default_value'), + 'default_value') + + # Good stream, good key + self.assertEqual(st.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1, 'default_value'), + bookmark_val_1) + + +class TestGetOffset(unittest.TestCase): + def test_empty_state(self): + empty_state = {} + + # Case with no value to fall back on + self.assertIsNone(st.get_offset(empty_state, 'some_stream')) + + # Case with a given default + self.assertEqual(st.get_offset(empty_state, 'some_stream', 'default_value'), + 'default_value') + + def test_empty_bookmark(self): + empty_bookmark = {'bookmarks':{}} + + # Case with no value to fall back on + self.assertIsNone(st.get_offset(empty_bookmark, 'some_stream')) + + # Case with a given default + self.assertEqual(st.get_offset(empty_bookmark, 'some_stream', 'default_value'), + 'default_value') + + def test_non_empty_state(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + offset_val = 'fizzy water' + + non_empty_state = { + 'bookmarks' : { + stream_id_1 : { + bookmark_key_1 : bookmark_val_1, + 'offset' : offset_val + } + } + } + + # + # Cases with no value to fall back on + # + + # Bad stream + self.assertIsNone(st.get_offset(non_empty_state, 'some_stream')) + + # Good stream + self.assertEqual(st.get_offset(non_empty_state, stream_id_1), + offset_val) + + # + # Case with a given default + # + + # Bad stream + self.assertEqual(st.get_offset(non_empty_state, 'some_stream', 'default_value'), + 'default_value') + + # Good stream + self.assertEqual(st.get_offset(non_empty_state, stream_id_1, 'default_value'), + offset_val) + + +class TestGetCurrentlySyncing(unittest.TestCase): + def test_empty_state(self): + empty_state = {} + + # Case with no value to fall back on + self.assertIsNone(st.get_currently_syncing(empty_state)) + + # Case with a given default + self.assertEqual(st.get_currently_syncing(empty_state, 'default_value'), + 'default_value') + + def test_non_empty_state(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + offset_val = 'fizzy water' + + non_empty_state = { + 'bookmarks' : { + stream_id_1 : { + bookmark_key_1 : bookmark_val_1, + 'offset' : offset_val + } + }, + 'currently_syncing' : stream_id_1 + } + + # Case with no value to fall back on + self.assertEqual(st.get_currently_syncing(non_empty_state), + stream_id_1) + + # Case with a given default + self.assertEqual(st.get_currently_syncing(non_empty_state, 'default_value'), + stream_id_1) + + +class TestActivateVersion(unittest.TestCase): + def test_empty_state(self): + empty_state = {} + + # Case with no value to fall back on + self.assertIsNone(st.get_version(empty_state, 'some_stream', 'my_key')) + + # Case with a given default + self.assertEqual(st.get_version(empty_state, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + def test_empty_activate_versions(self): + empty_versions = {'activate_versions':{}} + + # Case with no value to fall back on + self.assertIsNone(st.get_version(empty_versions, 'some_stream', 'my_key')) + + # Case with a given default + self.assertEqual(st.get_version(empty_versions, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + def test_non_empty_state(self): + stream_id_1 = 'customers' + version_key_1 = 'version' + version_val_1 = 123456789 + + non_empty_state = { + 'activate_versions' : { + stream_id_1 : { + version_key_1 : version_val_1 + } + } + } + + # + # Cases with no value to fall back on + # + + # Bad stream, bad key + self.assertIsNone(st.get_version(non_empty_state, 'some_stream', 'my_key')) + + # Good stream, bad key + self.assertIsNone(st.get_version(non_empty_state, stream_id_1, 'my_key')) + + # Good stream, good key + self.assertEqual(st.get_version(non_empty_state, stream_id_1, version_key_1), + version_val_1) + + # + # Cases with a given default + # + + # Bad stream, bad key + self.assertEqual(st.get_version(non_empty_state, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + # Bad stream, good key + self.assertEqual(st.get_version(non_empty_state, 'some_stream', version_key_1, 'default_value'), + 'default_value') + + # Good stream, bad key + self.assertEqual(st.get_version(non_empty_state, stream_id_1, 'my_key', 'default_value'), + 'default_value') + + # Good stream, good key + self.assertEqual(st.get_version(non_empty_state, stream_id_1, version_key_1, 'default_value'), + version_val_1) From fbcc1ec0bc79643932e7b36a475e99f4bfae98b7 Mon Sep 17 00:00:00 2001 From: Ben Allred Date: Mon, 9 Feb 2026 14:45:22 -0700 Subject: [PATCH 43/45] export singer.state functions from main module (#190) * export singer.state functions from main module * - export singer.state functions from main module - bump singer-python version and add changelog entry - rename singer.state.write_bookmark to singer.state.set_bookmark - add test cases for set_bookmark, clear_bookmark, set_offset, clear_offset, set_version, and clear_version ----------------------------- Co-authored-by: Ben Allred * update changelog description to include rename of singer.state.write_bookmark to singer.state.set_bookmark ----------------------------- Co-authored-by: Ben Allred * disable pylint warning ----------------------------- Co-authored-by: Ben Allred * disable pylint warning for reimported ----------------------------- Co-authored-by: Ben Allred --------- Co-authored-by: Leslie VanDeMark --- .circleci/config.yml | 2 +- CHANGELOG.md | 5 ++++ setup.py | 2 +- singer/__init__.py | 9 +++++-- singer/bookmarks.py | 2 +- singer/state.py | 4 +-- tests/test_state.py | 58 +++++++++++++++++++++++++++++++++++++++++--- 7 files changed, 72 insertions(+), 10 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a64745e..693d3b9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -26,7 +26,7 @@ jobs: command: | source ~/.virtualenvs/singer-python/bin/activate pip install pylint - pylint singer --extension-pkg-whitelist=ciso8601 -d missing-docstring,broad-exception-raised,broad-exception-caught,bare-except,too-many-return-statements,too-many-branches,too-many-arguments,no-else-return,too-few-public-methods,fixme,protected-access,consider-using-f-string + pylint singer --extension-pkg-whitelist=ciso8601 -d missing-docstring,broad-exception-raised,broad-exception-caught,bare-except,too-many-return-statements,too-many-branches,too-many-arguments,no-else-return,too-few-public-methods,fixme,protected-access,consider-using-f-string,reimported - run: name: 'Run Tests' command: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 155f5af..9bf321c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 6.6.0 + * Export singer.state functions from singer + * Rename singer.state.write_bookmark to singer.state.set_bookmark + * [#190](https://github.com/singer-io/singer-python/pull/190) + ## 6.5.0 * Add `activate_versions` state functions [#188](https://github.com/singer-io/singer-python/pull/188) * Deprecates bookmarks.py, functions are moved to state.py diff --git a/setup.py b/setup.py index 9005c39..ff99fb8 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='6.5.0', + version='6.6.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/__init__.py b/singer/__init__.py index 26f0043..613dc99 100644 --- a/singer/__init__.py +++ b/singer/__init__.py @@ -61,8 +61,10 @@ ) from singer.schema import Schema -from singer.bookmarks import ( - write_bookmark, +from singer.state import ( + set_bookmark, + # for backwards compatibility, use set_bookmark instead + set_bookmark as write_bookmark, get_bookmark, clear_bookmark, reset_stream, @@ -71,6 +73,9 @@ get_offset, set_currently_syncing, get_currently_syncing, + set_version, + clear_version, + get_version, ) from singer.exceptions import ( diff --git a/singer/bookmarks.py b/singer/bookmarks.py index 2cd9636..b506ff2 100644 --- a/singer/bookmarks.py +++ b/singer/bookmarks.py @@ -6,7 +6,7 @@ def ensure_bookmark_path(state, path): return st.ensure_state_path(state, path) def write_bookmark(state, tap_stream_id, key, val): - return st.write_bookmark(state, tap_stream_id, key, val) + return st.set_bookmark(state, tap_stream_id, key, val) def clear_bookmark(state, tap_stream_id, key): return st.clear_bookmark(state, tap_stream_id, key) diff --git a/singer/state.py b/singer/state.py index 772a919..10f0cf9 100644 --- a/singer/state.py +++ b/singer/state.py @@ -7,7 +7,7 @@ def ensure_state_path(state, path): submap = submap[path_component] return state -def write_bookmark(state, tap_stream_id, key, val): +def set_bookmark(state, tap_stream_id, key, val): state = ensure_state_path(state, ['bookmarks', tap_stream_id]) state['bookmarks'][tap_stream_id][key] = val return state @@ -46,7 +46,7 @@ def set_currently_syncing(state, tap_stream_id): def get_currently_syncing(state, default=None): return state.get('currently_syncing', default) -def write_version(state, tap_stream_id, key, val): +def set_version(state, tap_stream_id, key, val): state = ensure_state_path(state, ['activate_versions', tap_stream_id]) state['activate_versions'][tap_stream_id][key] = val return state diff --git a/tests/test_state.py b/tests/test_state.py index beb7b23..72d8512 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -1,7 +1,7 @@ import unittest from singer import state as st -class TestGetBookmark(unittest.TestCase): +class TestBookmark(unittest.TestCase): def test_empty_state(self): empty_state = {} @@ -69,8 +69,24 @@ def test_non_empty_state(self): self.assertEqual(st.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1, 'default_value'), bookmark_val_1) + def test_set_bookmark(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + + result = st.set_bookmark({'bookmarks': {stream_id_1: {bookmark_key_1: 'old-value'}}}, stream_id_1, bookmark_key_1, bookmark_val_1) + self.assertEqual(result, {'bookmarks': {stream_id_1: {bookmark_key_1: bookmark_val_1}}}) -class TestGetOffset(unittest.TestCase): + def test_clear_bookmark(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + + result = st.clear_bookmark({'bookmarks': {stream_id_1: {bookmark_key_1: bookmark_val_1}}}, stream_id_1, bookmark_key_1) + self.assertEqual(result, {'bookmarks': {stream_id_1: {}}}) + + +class TestOffset(unittest.TestCase): def test_empty_state(self): empty_state = {} @@ -129,8 +145,24 @@ def test_non_empty_state(self): self.assertEqual(st.get_offset(non_empty_state, stream_id_1, 'default_value'), offset_val) + def test_set_offset(self): + stream_id_1 = 'customers' + offset_key_1 = 'datetime' + offset_val_1 = 123456789 + + result = st.set_offset({'bookmarks': {stream_id_1: {'offset': {offset_key_1: 'old-value'}}}}, stream_id_1, offset_key_1, offset_val_1) + self.assertEqual(result, {'bookmarks': {stream_id_1: {'offset': {offset_key_1: offset_val_1}}}}) -class TestGetCurrentlySyncing(unittest.TestCase): + def test_clear_offset(self): + stream_id_1 = 'customers' + offset_key_1 = 'datetime' + offset_val_1 = 123456789 + + result = st.clear_offset({'bookmarks': {stream_id_1: {'offset': {offset_key_1: offset_val_1}}}}, stream_id_1) + self.assertEqual(result, {'bookmarks': {stream_id_1: {}}}) + + +class TestCurrentlySyncing(unittest.TestCase): def test_empty_state(self): empty_state = {} @@ -165,6 +197,10 @@ def test_non_empty_state(self): self.assertEqual(st.get_currently_syncing(non_empty_state, 'default_value'), stream_id_1) + def test_set_currently_syncing(self): + result = st.set_currently_syncing({'currently_syncing': 'foo'}, 'bar') + self.assertEqual(result, {'currently_syncing': 'bar'}) + class TestActivateVersion(unittest.TestCase): def test_empty_state(self): @@ -233,3 +269,19 @@ def test_non_empty_state(self): # Good stream, good key self.assertEqual(st.get_version(non_empty_state, stream_id_1, version_key_1, 'default_value'), version_val_1) + + def test_set_version(self): + stream_id_1 = 'customers' + version_key_1 = 'datetime' + version_val_1 = 123456789 + + result = st.set_version({'activate_versions': {stream_id_1: {version_key_1: 'old-value'}}}, stream_id_1, version_key_1, version_val_1) + self.assertEqual(result, {'activate_versions': {stream_id_1: {version_key_1: version_val_1}}}) + + def test_clear_version(self): + stream_id_1 = 'customers' + version_key_1 = 'datetime' + version_val_1 = 123456789 + + result = st.clear_version({'activate_versions': {stream_id_1: {version_key_1: version_val_1}}}, stream_id_1, version_key_1) + self.assertEqual(result, {'activate_versions': {stream_id_1: {}}}) From f47930e9afb8952abf5a3bd873bae016f325127e Mon Sep 17 00:00:00 2001 From: Ben Allred Date: Tue, 10 Feb 2026 08:51:41 -0700 Subject: [PATCH 44/45] SAC-30196: remove key from version functions (#192) * remove key from version state functions * bump to version 6.7.0 and update changelog ----------------------------- Co-authored-by: Ben Allred --------- Co-authored-by: Leslie VanDeMark --- CHANGELOG.md | 3 +++ setup.py | 2 +- singer/state.py | 12 +++++------ tests/test_state.py | 50 +++++++++++++++------------------------------ 4 files changed, 27 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bf321c..ea3c6bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 6.7.0 + * Remove `key` from set_version, get_version, and clear_version state functions [#192](https://github.com/singer-io/singer-python/pull/192) + ## 6.6.0 * Export singer.state functions from singer * Rename singer.state.write_bookmark to singer.state.set_bookmark diff --git a/setup.py b/setup.py index ff99fb8..3c53bc1 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='6.6.0', + version='6.7.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/state.py b/singer/state.py index 10f0cf9..a6b24fc 100644 --- a/singer/state.py +++ b/singer/state.py @@ -46,15 +46,15 @@ def set_currently_syncing(state, tap_stream_id): def get_currently_syncing(state, default=None): return state.get('currently_syncing', default) -def set_version(state, tap_stream_id, key, val): +def set_version(state, tap_stream_id, val): state = ensure_state_path(state, ['activate_versions', tap_stream_id]) - state['activate_versions'][tap_stream_id][key] = val + state['activate_versions'][tap_stream_id] = val return state -def clear_version(state, tap_stream_id, key): +def clear_version(state, tap_stream_id): state = ensure_state_path(state, ['activate_versions', tap_stream_id]) - state['activate_versions'][tap_stream_id].pop(key, None) + state['activate_versions'].pop(tap_stream_id, None) return state -def get_version(state, tap_stream_id, key, default=None): - return state.get('activate_versions', {}).get(tap_stream_id, {}).get(key, default) +def get_version(state, tap_stream_id, default=None): + return state.get('activate_versions', {}).get(tap_stream_id, default) diff --git a/tests/test_state.py b/tests/test_state.py index 72d8512..eac75e1 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -207,32 +207,29 @@ def test_empty_state(self): empty_state = {} # Case with no value to fall back on - self.assertIsNone(st.get_version(empty_state, 'some_stream', 'my_key')) + self.assertIsNone(st.get_version(empty_state, 'some_stream')) # Case with a given default - self.assertEqual(st.get_version(empty_state, 'some_stream', 'my_key', 'default_value'), + self.assertEqual(st.get_version(empty_state, 'some_stream', 'default_value'), 'default_value') def test_empty_activate_versions(self): empty_versions = {'activate_versions':{}} # Case with no value to fall back on - self.assertIsNone(st.get_version(empty_versions, 'some_stream', 'my_key')) + self.assertIsNone(st.get_version(empty_versions, 'some_stream')) # Case with a given default - self.assertEqual(st.get_version(empty_versions, 'some_stream', 'my_key', 'default_value'), + self.assertEqual(st.get_version(empty_versions, 'some_stream', 'default_value'), 'default_value') def test_non_empty_state(self): stream_id_1 = 'customers' - version_key_1 = 'version' version_val_1 = 123456789 non_empty_state = { 'activate_versions' : { - stream_id_1 : { - version_key_1 : version_val_1 - } + stream_id_1 : version_val_1 } } @@ -240,48 +237,35 @@ def test_non_empty_state(self): # Cases with no value to fall back on # - # Bad stream, bad key - self.assertIsNone(st.get_version(non_empty_state, 'some_stream', 'my_key')) - - # Good stream, bad key - self.assertIsNone(st.get_version(non_empty_state, stream_id_1, 'my_key')) + # Bad stream + self.assertIsNone(st.get_version(non_empty_state, 'some_stream')) - # Good stream, good key - self.assertEqual(st.get_version(non_empty_state, stream_id_1, version_key_1), + # Good stream + self.assertEqual(st.get_version(non_empty_state, stream_id_1), version_val_1) # # Cases with a given default # - # Bad stream, bad key - self.assertEqual(st.get_version(non_empty_state, 'some_stream', 'my_key', 'default_value'), - 'default_value') - - # Bad stream, good key - self.assertEqual(st.get_version(non_empty_state, 'some_stream', version_key_1, 'default_value'), - 'default_value') - - # Good stream, bad key - self.assertEqual(st.get_version(non_empty_state, stream_id_1, 'my_key', 'default_value'), + # Bad stream + self.assertEqual(st.get_version(non_empty_state, 'some_stream', 'default_value'), 'default_value') - # Good stream, good key - self.assertEqual(st.get_version(non_empty_state, stream_id_1, version_key_1, 'default_value'), + # Good stream + self.assertEqual(st.get_version(non_empty_state, stream_id_1, 'default_value'), version_val_1) def test_set_version(self): stream_id_1 = 'customers' - version_key_1 = 'datetime' version_val_1 = 123456789 - result = st.set_version({'activate_versions': {stream_id_1: {version_key_1: 'old-value'}}}, stream_id_1, version_key_1, version_val_1) - self.assertEqual(result, {'activate_versions': {stream_id_1: {version_key_1: version_val_1}}}) + result = st.set_version({'activate_versions': {stream_id_1: 'old-value'}}, stream_id_1, version_val_1) + self.assertEqual(result, {'activate_versions': {stream_id_1: version_val_1}}) def test_clear_version(self): stream_id_1 = 'customers' - version_key_1 = 'datetime' version_val_1 = 123456789 - result = st.clear_version({'activate_versions': {stream_id_1: {version_key_1: version_val_1}}}, stream_id_1, version_key_1) - self.assertEqual(result, {'activate_versions': {stream_id_1: {}}}) + result = st.clear_version({'activate_versions': {stream_id_1: version_val_1}}, stream_id_1) + self.assertEqual(result, {'activate_versions': {}}) From e90a59203115bbd1fd50202b8d28f5762b42a2a1 Mon Sep 17 00:00:00 2001 From: Dylan Sprayberry <28106103+dsprayberry@users.noreply.github.com> Date: Fri, 27 Feb 2026 12:09:40 -0500 Subject: [PATCH 45/45] SAC-30196: Rename `activate_versions` key to `versions` in state (#194) * Rename `activate_versions` state key to `versions` Co-authored-by: Bryant Gray * More activate_versions -> versions * Corresponding test fixes Co-authored-by: Bryant Gray Co-authored-by: Casey Alvarez * Changelog --------- Co-authored-by: Bryant Gray Co-authored-by: Casey Alvarez --- CHANGELOG.md | 3 +++ setup.py | 2 +- singer/state.py | 16 ++++++++-------- tests/test_state.py | 16 ++++++++-------- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea3c6bb..e98553c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 6.8.0 + * Rename state key `activate_versions` to `versions` in all relevant locations [#194](https://github.com/singer-io/singer-python/pull/194) + ## 6.7.0 * Remove `key` from set_version, get_version, and clear_version state functions [#192](https://github.com/singer-io/singer-python/pull/192) diff --git a/setup.py b/setup.py index 3c53bc1..e0b32eb 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess setup(name="singer-python", - version='6.7.0', + version='6.8.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], diff --git a/singer/state.py b/singer/state.py index a6b24fc..f7d4357 100644 --- a/singer/state.py +++ b/singer/state.py @@ -20,9 +20,9 @@ def clear_bookmark(state, tap_stream_id, key): def reset_stream(state, tap_stream_id): state = ensure_state_path(state, ['bookmarks', tap_stream_id]) state['bookmarks'][tap_stream_id] = {} - if 'activate_versions' in state: - state = ensure_state_path(state, ['activate_versions', tap_stream_id]) - state['activate_versions'][tap_stream_id] = {} + if 'versions' in state: + state = ensure_state_path(state, ['versions', tap_stream_id]) + state['versions'][tap_stream_id] = {} return state def get_bookmark(state, tap_stream_id, key, default=None): @@ -47,14 +47,14 @@ def get_currently_syncing(state, default=None): return state.get('currently_syncing', default) def set_version(state, tap_stream_id, val): - state = ensure_state_path(state, ['activate_versions', tap_stream_id]) - state['activate_versions'][tap_stream_id] = val + state = ensure_state_path(state, ['versions', tap_stream_id]) + state['versions'][tap_stream_id] = val return state def clear_version(state, tap_stream_id): - state = ensure_state_path(state, ['activate_versions', tap_stream_id]) - state['activate_versions'].pop(tap_stream_id, None) + state = ensure_state_path(state, ['versions', tap_stream_id]) + state['versions'].pop(tap_stream_id, None) return state def get_version(state, tap_stream_id, default=None): - return state.get('activate_versions', {}).get(tap_stream_id, default) + return state.get('versions', {}).get(tap_stream_id, default) diff --git a/tests/test_state.py b/tests/test_state.py index eac75e1..4d30322 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -202,7 +202,7 @@ def test_set_currently_syncing(self): self.assertEqual(result, {'currently_syncing': 'bar'}) -class TestActivateVersion(unittest.TestCase): +class TestVersions(unittest.TestCase): def test_empty_state(self): empty_state = {} @@ -213,8 +213,8 @@ def test_empty_state(self): self.assertEqual(st.get_version(empty_state, 'some_stream', 'default_value'), 'default_value') - def test_empty_activate_versions(self): - empty_versions = {'activate_versions':{}} + def test_empty_versions(self): + empty_versions = {'versions':{}} # Case with no value to fall back on self.assertIsNone(st.get_version(empty_versions, 'some_stream')) @@ -228,7 +228,7 @@ def test_non_empty_state(self): version_val_1 = 123456789 non_empty_state = { - 'activate_versions' : { + 'versions' : { stream_id_1 : version_val_1 } } @@ -260,12 +260,12 @@ def test_set_version(self): stream_id_1 = 'customers' version_val_1 = 123456789 - result = st.set_version({'activate_versions': {stream_id_1: 'old-value'}}, stream_id_1, version_val_1) - self.assertEqual(result, {'activate_versions': {stream_id_1: version_val_1}}) + result = st.set_version({'versions': {stream_id_1: 'old-value'}}, stream_id_1, version_val_1) + self.assertEqual(result, {'versions': {stream_id_1: version_val_1}}) def test_clear_version(self): stream_id_1 = 'customers' version_val_1 = 123456789 - result = st.clear_version({'activate_versions': {stream_id_1: version_val_1}}, stream_id_1) - self.assertEqual(result, {'activate_versions': {}}) + result = st.clear_version({'versions': {stream_id_1: version_val_1}}, stream_id_1) + self.assertEqual(result, {'versions': {}})