diff --git a/.circleci/config.yml b/.circleci/config.yml index ae6734c..693d3b9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,26 +1,35 @@ -version: 2 +version: 2.1 + +workflows: + build: + jobs: + - build: + context: + - circleci-user + jobs: build: docker: - - image: ubuntu:16.04 + - image: 218546966473.dkr.ecr.us-east-1.amazonaws.com/sources-python:1.1.0 steps: - checkout - - run: - name: 'Install python 3.5.2' - command: | - apt update - apt install --yes python3 python3-pip python3-venv - run: name: 'Setup virtualenv' command: | - mkdir -p ~/.virtualenvs + pyenv global 3.11.7 python3 -m venv ~/.virtualenvs/singer-python source ~/.virtualenvs/singer-python/bin/activate - pip install -U 'pip<19.2' 'setuptools<51.0.0' - make install + pip install -U 'pip==20.3.4' 'setuptools<51.0.0' + pip install .[dev] + - run: + name: 'Pylint' + command: | + source ~/.virtualenvs/singer-python/bin/activate + pip install pylint + pylint singer --extension-pkg-whitelist=ciso8601 -d missing-docstring,broad-exception-raised,broad-exception-caught,bare-except,too-many-return-statements,too-many-branches,too-many-arguments,no-else-return,too-few-public-methods,fixme,protected-access,consider-using-f-string,reimported - run: - name: 'Run tests' + name: 'Run Tests' command: | - # Need to re-activate the virtualenv source ~/.virtualenvs/singer-python/bin/activate - make test + pip install nose2 + nose2 -v -s tests diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 6e46b00..ef49bc0 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -9,3 +9,7 @@ # Rollback steps - revert this branch + +#### AI generated code +https://internal.qlik.dev/general/ways-of-working/code-reviews/#guidelines-for-ai-generated-code +- [ ] this PR has been written with the help of GitHub Copilot or another generative AI tool diff --git a/CHANGELOG.md b/CHANGELOG.md index ffcb8e6..e98553c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,56 @@ # Changelog +## 6.8.0 + * Rename state key `activate_versions` to `versions` in all relevant locations [#194](https://github.com/singer-io/singer-python/pull/194) + +## 6.7.0 + * Remove `key` from set_version, get_version, and clear_version state functions [#192](https://github.com/singer-io/singer-python/pull/192) + +## 6.6.0 + * Export singer.state functions from singer + * Rename singer.state.write_bookmark to singer.state.set_bookmark + * [#190](https://github.com/singer-io/singer-python/pull/190) + +## 6.5.0 + * Add `activate_versions` state functions [#188](https://github.com/singer-io/singer-python/pull/188) + * Deprecates bookmarks.py, functions are moved to state.py + +## 6.4.0 + * Update clear_offset to remove offset key from bookmark [#185](https://github.com/singer-io/singer-python/pull/185) + +## 6.3.0 + * Support allow_nan in message JSON output [#183](https://github.com/singer-io/singer-python/pull/183) + +## 6.2.3 + * Default type for non-standard data types is string [#182](https://github.com/singer-io/singer-python/pull/182) + +## 6.2.2 + * Updates json schema generation to not emit dates + * Handle multiple schemas with anyOf and emit them in a specific order + * Do not emit error messages when checking multiple schemas and a subsequent schema passes + * [#179](https://github.com/singer-io/singer-python/pull/179) + +## 6.2.1 + * Fixes json schema generation to not treat numbers as dates + * Fixes json schema generation to handle empty arrays + * Fixes record transformation to handle fields that could be either formatted string or nested data structure + * [#177](https://github.com/singer-io/singer-python/pull/177) + +## 6.2.0 + * Adds json schema generation [#175](https://github.com/singer-io/singer-python/pull/175) + +## 6.1.0 + * Make ensure_ascii Dynamic with Default Set to True in JSON Serialization. Required to handle the special characters [#168](https://github.com/singer-io/singer-python/pull/168) + +## 6.0.1 + * Pin backoff and simplejson to any version greater than or equal to the previously allowed version, up to the next major version [#167](https://github.com/singer-io/singer-python/pull/167) + +## 6.0.0 + * Bump backoff version to 2.2.1. This version drops support for python 3.5, but adds it for 3.10 [#165](https://github.com/singer-io/singer-python/pull/165) + +## 5.13.0 + * Add support for dev mode argument parsing [#158](https://github.com/singer-io/singer-python/pull/158) + ## 5.12.2 * Removes pinned `pytz` version [#152](https://github.com/singer-io/singer-python/pull/152) diff --git a/setup.cfg b/setup.cfg index b88034e..08aedd7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,2 @@ [metadata] -description-file = README.md +description_file = README.md diff --git a/setup.py b/setup.py index cd5825c..e0b32eb 100755 --- a/setup.py +++ b/setup.py @@ -4,18 +4,18 @@ import subprocess setup(name="singer-python", - version='5.12.2', + version='6.8.0', description="Singer.io utility library", author="Stitch", classifiers=['Programming Language :: Python :: 3 :: Only'], url="http://singer.io", install_requires=[ 'pytz>=2018.4', - 'jsonschema==2.6.0', - 'simplejson==3.11.1', - 'python-dateutil>=2.6.0', - 'backoff==1.8.0', - 'ciso8601', + 'jsonschema>=2.6.0,==2.*', + 'simplejson>=3.13.2,==3.*', + 'python-dateutil>=2.7.3,==2.*', + 'backoff>=2.2.1,==2.*', + 'ciso8601>=2.3.1,==2.*', ], extras_require={ 'dev': [ diff --git a/singer/__init__.py b/singer/__init__.py index 26f0043..613dc99 100644 --- a/singer/__init__.py +++ b/singer/__init__.py @@ -61,8 +61,10 @@ ) from singer.schema import Schema -from singer.bookmarks import ( - write_bookmark, +from singer.state import ( + set_bookmark, + # for backwards compatibility, use set_bookmark instead + set_bookmark as write_bookmark, get_bookmark, clear_bookmark, reset_stream, @@ -71,6 +73,9 @@ get_offset, set_currently_syncing, get_currently_syncing, + set_version, + clear_version, + get_version, ) from singer.exceptions import ( diff --git a/singer/bookmarks.py b/singer/bookmarks.py index fc6d7ca..b506ff2 100644 --- a/singer/bookmarks.py +++ b/singer/bookmarks.py @@ -1,46 +1,33 @@ -def ensure_bookmark_path(state, path): - submap = state - for path_component in path: - if submap.get(path_component) is None: - submap[path_component] = {} +from singer import state as st + +## Note - This file is deprecated, use state.py functions. - submap = submap[path_component] - return state +def ensure_bookmark_path(state, path): + return st.ensure_state_path(state, path) def write_bookmark(state, tap_stream_id, key, val): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id]) - state['bookmarks'][tap_stream_id][key] = val - return state + return st.set_bookmark(state, tap_stream_id, key, val) def clear_bookmark(state, tap_stream_id, key): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id]) - state['bookmarks'][tap_stream_id].pop(key, None) - return state + return st.clear_bookmark(state, tap_stream_id, key) def reset_stream(state, tap_stream_id): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id]) - state['bookmarks'][tap_stream_id] = {} - return state + return st.reset_stream(state, tap_stream_id) def get_bookmark(state, tap_stream_id, key, default=None): - return state.get('bookmarks', {}).get(tap_stream_id, {}).get(key, default) + return st.get_bookmark(state, tap_stream_id, key, default) def set_offset(state, tap_stream_id, offset_key, offset_value): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id, "offset", offset_key]) - state['bookmarks'][tap_stream_id]["offset"][offset_key] = offset_value - return state + return st.set_offset(state, tap_stream_id, offset_key, offset_value) def clear_offset(state, tap_stream_id): - state = ensure_bookmark_path(state, ['bookmarks', tap_stream_id, "offset"]) - state['bookmarks'][tap_stream_id]["offset"] = {} - return state + return st.clear_offset(state, tap_stream_id) def get_offset(state, tap_stream_id, default=None): - return state.get('bookmarks', {}).get(tap_stream_id, {}).get("offset", default) + return st.get_offset(state, tap_stream_id, default) def set_currently_syncing(state, tap_stream_id): - state['currently_syncing'] = tap_stream_id - return state + return st.set_currently_syncing(state, tap_stream_id) def get_currently_syncing(state, default=None): - return state.get('currently_syncing', default) + return st.get_currently_syncing(state, default) diff --git a/singer/catalog.py b/singer/catalog.py index 1767ff1..d8ef147 100644 --- a/singer/catalog.py +++ b/singer/catalog.py @@ -20,6 +20,7 @@ def write_catalog(catalog): # pylint: disable=too-many-instance-attributes class CatalogEntry(): + # pylint: disable=too-many-positional-arguments def __init__(self, tap_stream_id=None, stream=None, key_properties=None, schema=None, replication_key=None, is_view=None, database=None, table=None, row_count=None, @@ -92,7 +93,7 @@ def __eq__(self, other): @classmethod def load(cls, filename): - with open(filename) as fp: # pylint: disable=invalid-name + with open(filename, encoding="utf-8") as fp: return Catalog.from_dict(json.load(fp)) @classmethod diff --git a/singer/exceptions.py b/singer/exceptions.py index 9231328..b13016d 100644 --- a/singer/exceptions.py +++ b/singer/exceptions.py @@ -11,7 +11,7 @@ def __init__(self, message): The first line is the error's class name. The subsequent lines are the message that class was created with. """ - super().__init__('{}\n{}'.format(self.__class__.__name__, message)) + super().__init__(f"{self.__class__.__name__}\n{message}") class SingerConfigurationError(SingerError): diff --git a/singer/messages.py b/singer/messages.py index 3848801..941670c 100644 --- a/singer/messages.py +++ b/singer/messages.py @@ -11,16 +11,16 @@ class Message(): '''Base class for messages.''' - def asdict(self): # pylint: disable=no-self-use + def asdict(self): raise Exception('Not implemented') def __eq__(self, other): return isinstance(other, Message) and self.asdict() == other.asdict() def __repr__(self): - pairs = ["{}={}".format(k, v) for k, v in self.asdict().items()] + pairs = [f"{k}={v}" for k, v in self.asdict().items()] attrstr = ", ".join(pairs) - return "{}({})".format(self.__class__.__name__, attrstr) + return f"{self.__class__.__name__}({attrstr})" def __str__(self): return str(self.asdict()) @@ -169,7 +169,7 @@ def asdict(self): def _required_key(msg, k): if k not in msg: - raise Exception("Message is missing required key '{}': {}".format(k, msg)) + raise Exception(f"Message is missing required key '{k}': {msg}") return msg[k] @@ -218,12 +218,17 @@ def parse_message(msg): return None -def format_message(message): - return json.dumps(message.asdict(), use_decimal=True) +def format_message(message, ensure_ascii=True, allow_nan=False): + return json.dumps( + message.asdict(), + use_decimal=True, + ensure_ascii=ensure_ascii, + allow_nan=allow_nan + ) -def write_message(message): - sys.stdout.write(format_message(message) + '\n') +def write_message(message, ensure_ascii=True, allow_nan=False): + sys.stdout.write(format_message(message, ensure_ascii=ensure_ascii, allow_nan=allow_nan) + '\n') sys.stdout.flush() diff --git a/singer/schema.py b/singer/schema.py index b4da4ac..2fcafd6 100644 --- a/singer/schema.py +++ b/singer/schema.py @@ -31,7 +31,7 @@ class Schema(): # pylint: disable=too-many-instance-attributes ''' - # pylint: disable=too-many-locals + # pylint: disable=too-many-locals,too-many-positional-arguments def __init__(self, type=None, format=None, properties=None, items=None, selected=None, inclusion=None, description=None, minimum=None, maximum=None, exclusiveMinimum=None, exclusiveMaximum=None, diff --git a/singer/schema_generation.py b/singer/schema_generation.py new file mode 100644 index 0000000..177d659 --- /dev/null +++ b/singer/schema_generation.py @@ -0,0 +1,105 @@ +def add_observation(acc, path): + + node = acc + for i in range(0, len(path) - 1): + k = path[i] + if k not in node: + node[k] = {} + node = node[k] + + node[path[-1]] = True + +# pylint: disable=too-many-branches +def add_observations(acc, path, data): + if isinstance(data, dict): + for key in data: + add_observations(acc, path + ["object", key], data[key]) + elif isinstance(data, list): + if len(data) == 0: + add_observations(acc, path + ["array"], None) + for item in data: + add_observations(acc, path + ["array"], item) + elif isinstance(data, str): + try: + # If the string parses as a int, add an observation that it's a integer + int(data) + add_observation(acc, path + ["integer"]) + return acc + except (ValueError, TypeError): + pass + try: + # If the string parses as a float, add an observation that it's a number + float(data) + add_observation(acc, path + ["number"]) + return acc + except (ValueError, TypeError): + pass + add_observation(acc, path + ["string"]) + elif isinstance(data, bool): + add_observation(acc, path + ["boolean"]) + elif isinstance(data, int): + add_observation(acc, path + ["integer"]) + elif isinstance(data, float): + add_observation(acc, path + ["number"]) + elif data is None: + add_observation(acc, path + ["null"]) + else: + add_observation(acc, path + ["string"]) + + return acc + +def to_json_schema(obs): + types = [] + # add schema types in a specific order to anyOf list + for key in ['array', 'object', 'number', 'integer', 'boolean', 'string', 'null']: + if key not in obs: + continue + + result = {'type': ['null']} + + if key == 'object': + result['type'] += ['object'] + if 'properties' not in result: + result['properties'] = {} + for obj_key in obs['object']: + result['properties'][obj_key] = to_json_schema(obs['object'][obj_key]) + + elif key == 'array': + result['type'] += ['array'] + result['items'] = to_json_schema(obs['array']) + + elif key == 'string': + result['type'] += ['string'] + + elif key == 'boolean': + result['type'] += ['boolean'] + + elif key == 'integer': + result['type'] += ['integer'] + + elif key == 'number': + # Use type=string, format=singer.decimal + result['type'] += ['string'] + result['format'] = 'singer.decimal' + + elif key == 'null': + pass + + else: + raise Exception("Unexpected data type " + key) + + types.append(result) + + if len(types) == 0: + return {'type': ['null', 'string']} + + if len(types) == 1: + return types[0] + + return {'anyOf': types} + +def generate_schema(records): + obs = {} + for record in records: + obs = add_observations(obs, [], record) + return to_json_schema(obs) diff --git a/singer/state.py b/singer/state.py new file mode 100644 index 0000000..f7d4357 --- /dev/null +++ b/singer/state.py @@ -0,0 +1,60 @@ +def ensure_state_path(state, path): + submap = state + for path_component in path: + if submap.get(path_component) is None: + submap[path_component] = {} + + submap = submap[path_component] + return state + +def set_bookmark(state, tap_stream_id, key, val): + state = ensure_state_path(state, ['bookmarks', tap_stream_id]) + state['bookmarks'][tap_stream_id][key] = val + return state + +def clear_bookmark(state, tap_stream_id, key): + state = ensure_state_path(state, ['bookmarks', tap_stream_id]) + state['bookmarks'][tap_stream_id].pop(key, None) + return state + +def reset_stream(state, tap_stream_id): + state = ensure_state_path(state, ['bookmarks', tap_stream_id]) + state['bookmarks'][tap_stream_id] = {} + if 'versions' in state: + state = ensure_state_path(state, ['versions', tap_stream_id]) + state['versions'][tap_stream_id] = {} + return state + +def get_bookmark(state, tap_stream_id, key, default=None): + return state.get('bookmarks', {}).get(tap_stream_id, {}).get(key, default) + +def set_offset(state, tap_stream_id, offset_key, offset_value): + state = ensure_state_path(state, ['bookmarks', tap_stream_id, "offset", offset_key]) + state['bookmarks'][tap_stream_id]["offset"][offset_key] = offset_value + return state + +def clear_offset(state, tap_stream_id): + return clear_bookmark(state, tap_stream_id, "offset") + +def get_offset(state, tap_stream_id, default=None): + return state.get('bookmarks', {}).get(tap_stream_id, {}).get("offset", default) + +def set_currently_syncing(state, tap_stream_id): + state['currently_syncing'] = tap_stream_id + return state + +def get_currently_syncing(state, default=None): + return state.get('currently_syncing', default) + +def set_version(state, tap_stream_id, val): + state = ensure_state_path(state, ['versions', tap_stream_id]) + state['versions'][tap_stream_id] = val + return state + +def clear_version(state, tap_stream_id): + state = ensure_state_path(state, ['versions', tap_stream_id]) + state['versions'].pop(tap_stream_id, None) + return state + +def get_version(state, tap_stream_id, default=None): + return state.get('versions', {}).get(tap_stream_id, default) diff --git a/singer/transform.py b/singer/transform.py index 3fdefdf..3a9fc96 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -77,16 +77,16 @@ def tostr(self): path = ".".join(map(str, self.path)) if self.schema: if self.logging_level >= logging.INFO: - msg = "data does not match {}".format(self.schema) + msg = f"data does not match {self.schema}" else: - msg = "does not match {}".format(self.schema) + msg = f"does not match {self.schema}" else: msg = "not in schema" if self.logging_level >= logging.INFO: - output = "{}: {}".format(path, msg) + output = f"{path}: {msg}" else: - output = "{}: {} {}".format(path, self.data, msg) + output = f"{path}: {self.data} {msg}" return output @@ -185,6 +185,8 @@ def _transform_anyof(self, data, schema, path): success, transformed_data = self.transform_recur(data, subschema, path) if success: return success, transformed_data + else: + self.errors.pop() else: # pylint: disable=useless-else-on-loop # exhaused all schemas and didn't return, so we failed :-( self.errors.append(Error(path, data, schema, logging_level=LOGGER.level)) @@ -266,13 +268,13 @@ def _transform(self, data, typ, schema, path): else: return False, None - elif schema.get("format") == "date-time": + elif typ == "string" and schema.get("format") == "date-time": data = self._transform_datetime(data) if data is None: return False, None return True, data - elif schema.get("format") == "singer.decimal": + elif typ == "string" and schema.get("format") == "singer.decimal": if data is None: return False, None diff --git a/singer/utils.py b/singer/utils.py index 492e03b..6620005 100644 --- a/singer/utils.py +++ b/singer/utils.py @@ -105,7 +105,7 @@ def chunk(array, num): def load_json(path): - with open(path) as fil: + with open(path, encoding="utf-8") as fil: return json.load(fil) @@ -134,6 +134,7 @@ def parse_args(required_config_keys): -d,--discover Run in discover mode -p,--properties Properties file: DEPRECATED, please use --catalog instead --catalog Catalog file + --dev Runs the tap in dev mode Returns the parsed args object from argparse. For each argument that point to JSON files (config, state, properties), we will automatically @@ -163,6 +164,11 @@ def parse_args(required_config_keys): action='store_true', help='Do schema discovery') + parser.add_argument( + '--dev', + action='store_true', + help='Runs tap in dev mode') + args = parser.parse_args() if args.config: setattr(args, 'config_path', args.config) @@ -187,7 +193,7 @@ def parse_args(required_config_keys): def check_config(config, required_keys): missing_keys = [key for key in required_keys if key not in config] if missing_keys: - raise Exception("Config is missing required keys: {}".format(missing_keys)) + raise Exception(f"Config is missing required keys: {missing_keys}") def backoff(exceptions, giveup): diff --git a/tests/test_bookmarks.py b/tests/test_bookmarks.py deleted file mode 100644 index 4902105..0000000 --- a/tests/test_bookmarks.py +++ /dev/null @@ -1,166 +0,0 @@ -import unittest -from singer import bookmarks - -class TestGetBookmark(unittest.TestCase): - def test_empty_state(self): - empty_state = {} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_bookmark(empty_state, 'some_stream', 'my_key')) - - # Case with a given default - self.assertEqual(bookmarks.get_bookmark(empty_state, 'some_stream', 'my_key', 'default_value'), - 'default_value') - - def test_empty_bookmark(self): - empty_bookmark = {'bookmarks':{}} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_bookmark(empty_bookmark, 'some_stream', 'my_key')) - - # Case with a given default - self.assertEqual(bookmarks.get_bookmark(empty_bookmark, 'some_stream', 'my_key', 'default_value'), - 'default_value') - - def test_non_empty_state(self): - stream_id_1 = 'customers' - bookmark_key_1 = 'datetime' - bookmark_val_1 = 123456789 - - non_empty_state = { - 'bookmarks' : { - stream_id_1 : { - bookmark_key_1 : bookmark_val_1 - } - } - } - - # - # Cases with no value to fall back on - # - - # Bad stream, bad key - self.assertIsNone(bookmarks.get_bookmark(non_empty_state, 'some_stream', 'my_key')) - - # Good stream, bad key - self.assertIsNone(bookmarks.get_bookmark(non_empty_state, stream_id_1, 'my_key')) - - # Good stream, good key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1), - bookmark_val_1) - - # - # Cases with a given default - # - - # Bad stream, bad key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, 'some_stream', 'my_key', 'default_value'), - 'default_value') - - # Bad stream, good key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, 'some_stream', bookmark_key_1, 'default_value'), - 'default_value') - - # Good stream, bad key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, stream_id_1, 'my_key', 'default_value'), - 'default_value') - - # Good stream, good key - self.assertEqual(bookmarks.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1, 'default_value'), - bookmark_val_1) - - -class TestGetOffset(unittest.TestCase): - def test_empty_state(self): - empty_state = {} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_offset(empty_state, 'some_stream')) - - # Case with a given default - self.assertEqual(bookmarks.get_offset(empty_state, 'some_stream', 'default_value'), - 'default_value') - - def test_empty_bookmark(self): - empty_bookmark = {'bookmarks':{}} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_offset(empty_bookmark, 'some_stream')) - - # Case with a given default - self.assertEqual(bookmarks.get_offset(empty_bookmark, 'some_stream', 'default_value'), - 'default_value') - - def test_non_empty_state(self): - stream_id_1 = 'customers' - bookmark_key_1 = 'datetime' - bookmark_val_1 = 123456789 - offset_val = 'fizzy water' - - non_empty_state = { - 'bookmarks' : { - stream_id_1 : { - bookmark_key_1 : bookmark_val_1, - 'offset' : offset_val - } - } - } - - # - # Cases with no value to fall back on - # - - # Bad stream - self.assertIsNone(bookmarks.get_offset(non_empty_state, 'some_stream')) - - # Good stream - self.assertEqual(bookmarks.get_offset(non_empty_state, stream_id_1), - offset_val) - - # - # Case with a given default - # - - # Bad stream - self.assertEqual(bookmarks.get_offset(non_empty_state, 'some_stream', 'default_value'), - 'default_value') - - # Good stream - self.assertEqual(bookmarks.get_offset(non_empty_state, stream_id_1, 'default_value'), - offset_val) - - -class TestGetCurrentlySyncing(unittest.TestCase): - def test_empty_state(self): - empty_state = {} - - # Case with no value to fall back on - self.assertIsNone(bookmarks.get_currently_syncing(empty_state)) - - # Case with a given default - self.assertEqual(bookmarks.get_currently_syncing(empty_state, 'default_value'), - 'default_value') - - def test_non_empty_state(self): - stream_id_1 = 'customers' - bookmark_key_1 = 'datetime' - bookmark_val_1 = 123456789 - offset_val = 'fizzy water' - - non_empty_state = { - 'bookmarks' : { - stream_id_1 : { - bookmark_key_1 : bookmark_val_1, - 'offset' : offset_val - } - }, - 'currently_syncing' : stream_id_1 - } - - # Case with no value to fall back on - self.assertEqual(bookmarks.get_currently_syncing(non_empty_state), - stream_id_1) - - # Case with a given default - self.assertEqual(bookmarks.get_currently_syncing(non_empty_state, 'default_value'), - stream_id_1) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index cd6dc50..8a72e1a 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -25,7 +25,7 @@ def test_one_selected_stream(self): CatalogEntry(tap_stream_id='c',schema=Schema(),metadata=[])]) state = {} selected_streams = catalog.get_selected_streams(state) - self.assertEquals([e for e in selected_streams],[selected_entry]) + self.assertEqual([e for e in selected_streams],[selected_entry]) def test_resumes_currently_syncing_stream(self): selected_entry_a = CatalogEntry(tap_stream_id='a', @@ -44,7 +44,7 @@ def test_resumes_currently_syncing_stream(self): selected_entry_c]) state = {'currently_syncing': 'c'} selected_streams = catalog.get_selected_streams(state) - self.assertEquals([e for e in selected_streams][0],selected_entry_c) + self.assertEqual([e for e in selected_streams][0],selected_entry_c) class TestToDictAndFromDict(unittest.TestCase): @@ -141,4 +141,4 @@ def test(self): CatalogEntry(tap_stream_id='b'), CatalogEntry(tap_stream_id='c')]) entry = catalog.get_stream('b') - self.assertEquals('b', entry.tap_stream_id) + self.assertEqual('b', entry.tap_stream_id) diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index 491595f..50cf7a1 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -14,8 +14,8 @@ def test_SingerError_prints_correctly(self): raise SingerError(error_text) expected_text = "SingerError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerConfigurationError_prints_correctly(self): error_text = "An error occured" @@ -24,8 +24,8 @@ def test_SingerConfigurationError_prints_correctly(self): raise SingerConfigurationError(error_text) expected_text = "SingerConfigurationError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerDiscoveryError_prints_correctly(self): error_text = "An error occured" @@ -34,8 +34,8 @@ def test_SingerDiscoveryError_prints_correctly(self): raise SingerDiscoveryError(error_text) expected_text = "SingerDiscoveryError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerSyncError_prints_correctly(self): error_text = "An error occured" @@ -44,8 +44,8 @@ def test_SingerSyncError_prints_correctly(self): raise SingerSyncError(error_text) expected_text = "SingerSyncError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerRetryableRequestError_prints_correctly(self): error_text = "An error occured" @@ -54,8 +54,8 @@ def test_SingerRetryableRequestError_prints_correctly(self): raise SingerRetryableRequestError(error_text) expected_text = "SingerRetryableRequestError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) def test_SingerError_prints_multiple_lines_correctly(self): error_text = "\n".join(["Line 1", "Line 2", "Line 3"]) @@ -64,5 +64,5 @@ def test_SingerError_prints_multiple_lines_correctly(self): raise SingerError(error_text) expected_text = "SingerError\n" + error_text - self.assertEquals(expected_text, - str(test_run.exception)) + self.assertEqual(expected_text, + str(test_run.exception)) diff --git a/tests/test_schema.py b/tests/test_schema.py index fa28bac..5682755 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -44,38 +44,38 @@ class TestSchema(unittest.TestCase): additionalProperties=True) def test_string_to_dict(self): - self.assertEquals(self.string_dict, self.string_obj.to_dict()) + self.assertEqual(self.string_dict, self.string_obj.to_dict()) def test_integer_to_dict(self): - self.assertEquals(self.integer_dict, self.integer_obj.to_dict()) + self.assertEqual(self.integer_dict, self.integer_obj.to_dict()) def test_array_to_dict(self): - self.assertEquals(self.array_dict, self.array_obj.to_dict()) + self.assertEqual(self.array_dict, self.array_obj.to_dict()) def test_object_to_dict(self): - self.assertEquals(self.object_dict, self.object_obj.to_dict()) + self.assertEqual(self.object_dict, self.object_obj.to_dict()) def test_string_from_dict(self): - self.assertEquals(self.string_obj, Schema.from_dict(self.string_dict)) + self.assertEqual(self.string_obj, Schema.from_dict(self.string_dict)) def test_integer_from_dict(self): - self.assertEquals(self.integer_obj, Schema.from_dict(self.integer_dict)) + self.assertEqual(self.integer_obj, Schema.from_dict(self.integer_dict)) def test_array_from_dict(self): - self.assertEquals(self.array_obj, Schema.from_dict(self.array_dict)) + self.assertEqual(self.array_obj, Schema.from_dict(self.array_dict)) def test_object_from_dict(self): - self.assertEquals(self.object_obj, Schema.from_dict(self.object_dict)) + self.assertEqual(self.object_obj, Schema.from_dict(self.object_dict)) def test_repr_atomic(self): - self.assertEquals(self.string_obj, eval(repr(self.string_obj))) + self.assertEqual(self.string_obj, eval(repr(self.string_obj))) def test_repr_recursive(self): - self.assertEquals(self.object_obj, eval(repr(self.object_obj))) + self.assertEqual(self.object_obj, eval(repr(self.object_obj))) def test_object_from_dict_with_defaults(self): schema = Schema.from_dict(self.object_dict, inclusion='automatic') - self.assertEquals('whatever', schema.inclusion, - msg='The schema value should override the default') - self.assertEquals('automatic', schema.properties['a_string'].inclusion) - self.assertEquals('automatic', schema.properties['an_array'].items.inclusion) + self.assertEqual('whatever', schema.inclusion, + msg='The schema value should override the default') + self.assertEqual('automatic', schema.properties['a_string'].inclusion) + self.assertEqual('automatic', schema.properties['an_array'].items.inclusion) diff --git a/tests/test_schema_generation.py b/tests/test_schema_generation.py new file mode 100644 index 0000000..610ac20 --- /dev/null +++ b/tests/test_schema_generation.py @@ -0,0 +1,72 @@ +import unittest +from singer.schema_generation import generate_schema + +class TestSchemaGeneration(unittest.TestCase): + def test_simple_schema(self): + records = [{'a': 1, 'b': 'two', 'c': True, 'dt': '2000-01-01T00:11:22Z'}] + expected_schema = { + 'type': ['null', 'object'], + 'properties': { + 'a': {'type': ['null', 'integer']}, + 'b': {'type': ['null', 'string']}, + 'c': {'type': ['null', 'boolean']}, + 'dt': {'type': ['null', 'string']} + } + } + self.assertEqual(expected_schema, generate_schema(records)) + + def test_mix_n_match_records_schema(self): + records = [ + {'a': 1, 'b': 'b'}, + {'a': 'two', 'c': 7, 'd': [1, 'two']}, + {'a': True, 'c': 7.7, 'd': {'one': 1, 'two': 'two'}} + ] + expected_schema = { + 'type': ['null', 'object'], + 'properties': {'a': {'anyOf': [{'type': ['null', 'integer']}, + {'type': ['null', 'boolean']}, + {'type': ['null', 'string']}]}, + 'b': {'type': ['null', 'string']}, + 'c': {'anyOf': [{'type': ['null', 'string'], 'format': 'singer.decimal'}, + {'type': ['null', 'integer']}]}, + 'd': {'anyOf': [{'type': ['null', 'array'], + 'items': {'anyOf': [{'type': ['null', 'integer']}, + {'type': ['null', 'string']}]}}, + {'type': ['null', 'object'], + 'properties': {'one': {'type': ['null', 'integer']}, + 'two': {'type': ['null', 'string']}}}]}} + } + actual_schema = generate_schema(records) + self.assertEqual(expected_schema, actual_schema) + + def test_nested_structue_schema(self): + records = [{'a': {'b': {'c': [{'d': 7}]}, 'e': [[1, 2, 3]]}}] + expected_schema = { + 'type': ['null', 'object'], + 'properties': { + 'a': { + 'type': ['null', 'object'], + 'properties': { + 'b': { + 'type': ['null', 'object'], + 'properties': { + 'c': { + 'type': ['null', 'array'], + 'items': { + 'type': ['null', 'object'], + 'properties': {'d': {'type': ['null', 'integer']}} + } + } + } + }, + 'e': { + 'type': ['null', 'array'], + 'items': { + 'type': ['null', 'array'], + 'items': {'type': ['null', 'integer']}} + } + } + } + } + } + self.assertEqual(expected_schema, generate_schema(records)) diff --git a/tests/test_singer.py b/tests/test_singer.py index 4fb74de..7f69bb5 100644 --- a/tests/test_singer.py +++ b/tests/test_singer.py @@ -1,5 +1,6 @@ import singer import unittest +from unittest.mock import patch import datetime import dateutil from decimal import Decimal @@ -179,6 +180,31 @@ def test_parse_bulk_decs(self): value = self.create_record(value_str) self.assertEqual(Decimal(value_str), value) + @patch('sys.stdout') + def test_ensure_ascii_false(self, mock_stdout): + """ + Setting ensure_ascii=False will preserve special characters like é + in their original form. + """ + rec = {"name": "José"} + expected_output = '{"type": "RECORD", "stream": "test_stream", "record": {"name": "José"}}\n' + rec_message = singer.RecordMessage(stream="test_stream", record=rec) + result = singer.write_message(rec_message, ensure_ascii=False) + mock_stdout.write.assert_called_once_with(expected_output) + mock_stdout.flush.assert_called_once() + + @patch('sys.stdout') + def test_ensure_ascii_true(self, mock_stdout): + """ + ensure_ascii defaults to True, special characters like é are + escaped into their ASCII representation (e.g., \u00e9) + """ + rec = {"name": "José"} + expected_output = '{"type": "RECORD", "stream": "test_stream", "record": {"name": "Jos\\u00e9"}}\n' + rec_message = singer.RecordMessage(stream="test_stream", record=rec) + result = singer.write_message(rec_message) + mock_stdout.write.assert_called_once_with(expected_output) + mock_stdout.flush.assert_called_once() if __name__ == '__main__': unittest.main() diff --git a/tests/test_state.py b/tests/test_state.py new file mode 100644 index 0000000..4d30322 --- /dev/null +++ b/tests/test_state.py @@ -0,0 +1,271 @@ +import unittest +from singer import state as st + +class TestBookmark(unittest.TestCase): + def test_empty_state(self): + empty_state = {} + + # Case with no value to fall back on + self.assertIsNone(st.get_bookmark(empty_state, 'some_stream', 'my_key')) + + # Case with a given default + self.assertEqual(st.get_bookmark(empty_state, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + def test_empty_bookmark(self): + empty_bookmark = {'bookmarks':{}} + + # Case with no value to fall back on + self.assertIsNone(st.get_bookmark(empty_bookmark, 'some_stream', 'my_key')) + + # Case with a given default + self.assertEqual(st.get_bookmark(empty_bookmark, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + def test_non_empty_state(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + + non_empty_state = { + 'bookmarks' : { + stream_id_1 : { + bookmark_key_1 : bookmark_val_1 + } + } + } + + # + # Cases with no value to fall back on + # + + # Bad stream, bad key + self.assertIsNone(st.get_bookmark(non_empty_state, 'some_stream', 'my_key')) + + # Good stream, bad key + self.assertIsNone(st.get_bookmark(non_empty_state, stream_id_1, 'my_key')) + + # Good stream, good key + self.assertEqual(st.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1), + bookmark_val_1) + + # + # Cases with a given default + # + + # Bad stream, bad key + self.assertEqual(st.get_bookmark(non_empty_state, 'some_stream', 'my_key', 'default_value'), + 'default_value') + + # Bad stream, good key + self.assertEqual(st.get_bookmark(non_empty_state, 'some_stream', bookmark_key_1, 'default_value'), + 'default_value') + + # Good stream, bad key + self.assertEqual(st.get_bookmark(non_empty_state, stream_id_1, 'my_key', 'default_value'), + 'default_value') + + # Good stream, good key + self.assertEqual(st.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1, 'default_value'), + bookmark_val_1) + + def test_set_bookmark(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + + result = st.set_bookmark({'bookmarks': {stream_id_1: {bookmark_key_1: 'old-value'}}}, stream_id_1, bookmark_key_1, bookmark_val_1) + self.assertEqual(result, {'bookmarks': {stream_id_1: {bookmark_key_1: bookmark_val_1}}}) + + def test_clear_bookmark(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + + result = st.clear_bookmark({'bookmarks': {stream_id_1: {bookmark_key_1: bookmark_val_1}}}, stream_id_1, bookmark_key_1) + self.assertEqual(result, {'bookmarks': {stream_id_1: {}}}) + + +class TestOffset(unittest.TestCase): + def test_empty_state(self): + empty_state = {} + + # Case with no value to fall back on + self.assertIsNone(st.get_offset(empty_state, 'some_stream')) + + # Case with a given default + self.assertEqual(st.get_offset(empty_state, 'some_stream', 'default_value'), + 'default_value') + + def test_empty_bookmark(self): + empty_bookmark = {'bookmarks':{}} + + # Case with no value to fall back on + self.assertIsNone(st.get_offset(empty_bookmark, 'some_stream')) + + # Case with a given default + self.assertEqual(st.get_offset(empty_bookmark, 'some_stream', 'default_value'), + 'default_value') + + def test_non_empty_state(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + offset_val = 'fizzy water' + + non_empty_state = { + 'bookmarks' : { + stream_id_1 : { + bookmark_key_1 : bookmark_val_1, + 'offset' : offset_val + } + } + } + + # + # Cases with no value to fall back on + # + + # Bad stream + self.assertIsNone(st.get_offset(non_empty_state, 'some_stream')) + + # Good stream + self.assertEqual(st.get_offset(non_empty_state, stream_id_1), + offset_val) + + # + # Case with a given default + # + + # Bad stream + self.assertEqual(st.get_offset(non_empty_state, 'some_stream', 'default_value'), + 'default_value') + + # Good stream + self.assertEqual(st.get_offset(non_empty_state, stream_id_1, 'default_value'), + offset_val) + + def test_set_offset(self): + stream_id_1 = 'customers' + offset_key_1 = 'datetime' + offset_val_1 = 123456789 + + result = st.set_offset({'bookmarks': {stream_id_1: {'offset': {offset_key_1: 'old-value'}}}}, stream_id_1, offset_key_1, offset_val_1) + self.assertEqual(result, {'bookmarks': {stream_id_1: {'offset': {offset_key_1: offset_val_1}}}}) + + def test_clear_offset(self): + stream_id_1 = 'customers' + offset_key_1 = 'datetime' + offset_val_1 = 123456789 + + result = st.clear_offset({'bookmarks': {stream_id_1: {'offset': {offset_key_1: offset_val_1}}}}, stream_id_1) + self.assertEqual(result, {'bookmarks': {stream_id_1: {}}}) + + +class TestCurrentlySyncing(unittest.TestCase): + def test_empty_state(self): + empty_state = {} + + # Case with no value to fall back on + self.assertIsNone(st.get_currently_syncing(empty_state)) + + # Case with a given default + self.assertEqual(st.get_currently_syncing(empty_state, 'default_value'), + 'default_value') + + def test_non_empty_state(self): + stream_id_1 = 'customers' + bookmark_key_1 = 'datetime' + bookmark_val_1 = 123456789 + offset_val = 'fizzy water' + + non_empty_state = { + 'bookmarks' : { + stream_id_1 : { + bookmark_key_1 : bookmark_val_1, + 'offset' : offset_val + } + }, + 'currently_syncing' : stream_id_1 + } + + # Case with no value to fall back on + self.assertEqual(st.get_currently_syncing(non_empty_state), + stream_id_1) + + # Case with a given default + self.assertEqual(st.get_currently_syncing(non_empty_state, 'default_value'), + stream_id_1) + + def test_set_currently_syncing(self): + result = st.set_currently_syncing({'currently_syncing': 'foo'}, 'bar') + self.assertEqual(result, {'currently_syncing': 'bar'}) + + +class TestVersions(unittest.TestCase): + def test_empty_state(self): + empty_state = {} + + # Case with no value to fall back on + self.assertIsNone(st.get_version(empty_state, 'some_stream')) + + # Case with a given default + self.assertEqual(st.get_version(empty_state, 'some_stream', 'default_value'), + 'default_value') + + def test_empty_versions(self): + empty_versions = {'versions':{}} + + # Case with no value to fall back on + self.assertIsNone(st.get_version(empty_versions, 'some_stream')) + + # Case with a given default + self.assertEqual(st.get_version(empty_versions, 'some_stream', 'default_value'), + 'default_value') + + def test_non_empty_state(self): + stream_id_1 = 'customers' + version_val_1 = 123456789 + + non_empty_state = { + 'versions' : { + stream_id_1 : version_val_1 + } + } + + # + # Cases with no value to fall back on + # + + # Bad stream + self.assertIsNone(st.get_version(non_empty_state, 'some_stream')) + + # Good stream + self.assertEqual(st.get_version(non_empty_state, stream_id_1), + version_val_1) + + # + # Cases with a given default + # + + # Bad stream + self.assertEqual(st.get_version(non_empty_state, 'some_stream', 'default_value'), + 'default_value') + + # Good stream + self.assertEqual(st.get_version(non_empty_state, stream_id_1, 'default_value'), + version_val_1) + + def test_set_version(self): + stream_id_1 = 'customers' + version_val_1 = 123456789 + + result = st.set_version({'versions': {stream_id_1: 'old-value'}}, stream_id_1, version_val_1) + self.assertEqual(result, {'versions': {stream_id_1: version_val_1}}) + + def test_clear_version(self): + stream_id_1 = 'customers' + version_val_1 = 123456789 + + result = st.clear_version({'versions': {stream_id_1: version_val_1}}, stream_id_1) + self.assertEqual(result, {'versions': {}}) diff --git a/tests/test_transform.py b/tests/test_transform.py index 959c4b8..b398e93 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -1,9 +1,12 @@ +import io +import sys import unittest import decimal +import simplejson as json +import singer.messages as messages from singer import transform from singer.transform import * - class TestTransform(unittest.TestCase): def test_integer_transform(self): schema = {'type': 'integer'} @@ -25,7 +28,7 @@ def test_nested_transform(self): def test_multi_type_object_transform(self): schema = {"type": ["null", "object", "string"], - "properties": {"whatever": {"type": "date-time", + "properties": {"whatever": {"type": "string", "format": "date-time"}}} data = {"whatever": "2017-01-01"} expected = {"whatever": "2017-01-01T00:00:00.000000Z"} @@ -36,7 +39,7 @@ def test_multi_type_object_transform(self): def test_multi_type_array_transform(self): schema = {"type": ["null", "array", "integer"], - "items": {"type": "date-time", "format": "date-time"}} + "items": {"type": "string", "format": "date-time"}} data = ["2017-01-01"] expected = ["2017-01-01T00:00:00.000000Z"] self.assertEqual(expected, transform(data, schema)) @@ -264,11 +267,11 @@ def test_decimal_types_transform(self): nan = {'percentage': decimal.Decimal('NaN')} snan = {'percentage': decimal.Decimal('sNaN')} - self.assertEquals(inf, transform(inf, schema)) - self.assertEquals(negative_inf, transform(negative_inf, schema)) - self.assertEquals({'percentage': '1.4142135623730951'}, transform(root2, schema)) - self.assertEquals({'percentage': 'NaN'}, transform(nan, schema)) - self.assertEquals({'percentage': 'NaN'}, transform(snan, schema)) + self.assertEqual(inf, transform(inf, schema)) + self.assertEqual(negative_inf, transform(negative_inf, schema)) + self.assertEqual({'percentage': '1.4142135623730951'}, transform(root2, schema)) + self.assertEqual({'percentage': 'NaN'}, transform(nan, schema)) + self.assertEqual({'percentage': 'NaN'}, transform(snan, schema)) str1 = {'percentage':'0.1'} @@ -276,11 +279,11 @@ def test_decimal_types_transform(self): str3 = {'percentage': '1E+13'} str4 = {'percentage': '100'} str5 = {'percentage': '-100'} - self.assertEquals(str1, transform(str1, schema)) - self.assertEquals({'percentage': '1E-13'}, transform(str2, schema)) - self.assertEquals({'percentage': '1E+13'}, transform(str3, schema)) - self.assertEquals({'percentage': '100'}, transform(str4, schema)) - self.assertEquals({'percentage': '-100'}, transform(str5, schema)) + self.assertEqual(str1, transform(str1, schema)) + self.assertEqual({'percentage': '1E-13'}, transform(str2, schema)) + self.assertEqual({'percentage': '1E+13'}, transform(str3, schema)) + self.assertEqual({'percentage': '100'}, transform(str4, schema)) + self.assertEqual({'percentage': '-100'}, transform(str5, schema)) float1 = {'percentage': 12.0000000000000000000000000001234556} float2 = {'percentage': 0.0123} @@ -288,28 +291,28 @@ def test_decimal_types_transform(self): float4 = {'percentage': -100.0123} float5 = {'percentage': 0.000001} float6 = {'percentage': 0.0000001} - self.assertEquals({'percentage':'12.0'}, transform(float1, schema)) - self.assertEquals({'percentage':'0.0123'}, transform(float2, schema)) - self.assertEquals({'percentage':'100.0123'}, transform(float3, schema)) - self.assertEquals({'percentage':'-100.0123'}, transform(float4, schema)) - self.assertEquals({'percentage':'0.000001'}, transform(float5, schema)) - self.assertEquals({'percentage':'1E-7'}, transform(float6, schema)) + self.assertEqual({'percentage':'12.0'}, transform(float1, schema)) + self.assertEqual({'percentage':'0.0123'}, transform(float2, schema)) + self.assertEqual({'percentage':'100.0123'}, transform(float3, schema)) + self.assertEqual({'percentage':'-100.0123'}, transform(float4, schema)) + self.assertEqual({'percentage':'0.000001'}, transform(float5, schema)) + self.assertEqual({'percentage':'1E-7'}, transform(float6, schema)) int1 = {'percentage': 123} int2 = {'percentage': 0} int3 = {'percentage': -1000} - self.assertEquals({'percentage':'123'}, transform(int1, schema)) - self.assertEquals({'percentage':'0'}, transform(int2, schema)) - self.assertEquals({'percentage':'-1000'}, transform(int3, schema)) + self.assertEqual({'percentage':'123'}, transform(int1, schema)) + self.assertEqual({'percentage':'0'}, transform(int2, schema)) + self.assertEqual({'percentage':'-1000'}, transform(int3, schema)) dec1 = {'percentage': decimal.Decimal('1.1010101')} dec2 = {'percentage': decimal.Decimal('.111111111111111111111111')} dec3 = {'percentage': decimal.Decimal('-.111111111111111111111111')} dec4 = {'percentage': decimal.Decimal('100')} - self.assertEquals({'percentage':'1.1010101'}, transform(dec1, schema)) - self.assertEquals({'percentage':'0.111111111111111111111111'}, transform(dec2, schema)) - self.assertEquals({'percentage':'-0.111111111111111111111111'}, transform(dec3, schema)) - self.assertEquals({'percentage':'100'}, transform(dec4, schema)) + self.assertEqual({'percentage':'1.1010101'}, transform(dec1, schema)) + self.assertEqual({'percentage':'0.111111111111111111111111'}, transform(dec2, schema)) + self.assertEqual({'percentage':'-0.111111111111111111111111'}, transform(dec3, schema)) + self.assertEqual({'percentage':'100'}, transform(dec4, schema)) bad1 = {'percentage': 'fsdkjl'} with self.assertRaises(SchemaMismatch): @@ -317,7 +320,7 @@ def test_decimal_types_transform(self): badnull = {'percentage': None} with self.assertRaises(SchemaMismatch): - self.assertEquals({'percentage':None}, transform(badnull, schema)) + self.assertEqual({'percentage':None}, transform(badnull, schema)) class TestTransformsWithMetadata(unittest.TestCase): @@ -486,3 +489,58 @@ def test_pattern_properties_match_multiple(self): dict_value = {"name": "chicken", "unit_cost": 1.45, "SKU": '123456'} expected = dict(dict_value) self.assertEqual(expected, transform(dict_value, schema)) + +class DummyMessage: + """A dummy message object with an asdict() method.""" + def __init__(self, value): + self.value = value + + def asdict(self): + return {"value": self.value} + + +class TestAllowNan(unittest.TestCase): + """Unit tests for allow_nan support in singer.messages.""" + + def test_format_message_allow_nan_true(self): + """Should serialize NaN successfully when allow_nan=True.""" + msg = DummyMessage(float("nan")) + result = messages.format_message(msg, allow_nan=True) + + # The output JSON should contain NaN literal (not quoted) + self.assertIn("NaN", result) + + # Replace NaN with null to make it valid JSON for parsing check + json.loads(result.replace("NaN", "null")) + + def test_format_message_allow_nan_false(self): + """Should raise ValueError when allow_nan=False and value is NaN.""" + msg = DummyMessage(float("nan")) + with self.assertRaises(ValueError): + messages.format_message(msg, allow_nan=False) + + def test_write_message_allow_nan_true(self): + """Should write to stdout successfully when allow_nan=True.""" + msg = DummyMessage(float("nan")) + fake_stdout = io.StringIO() + original_stdout = sys.stdout + sys.stdout = fake_stdout + try: + messages.write_message(msg, allow_nan=True) + output = fake_stdout.getvalue() + self.assertIn("NaN", output) + self.assertTrue(output.endswith("\n")) + finally: + sys.stdout = original_stdout + + def test_write_message_allow_nan_false(self): + """Should raise ValueError when allow_nan=False and message has NaN.""" + msg = DummyMessage(float("nan")) + fake_stdout = io.StringIO() + original_stdout = sys.stdout + sys.stdout = fake_stdout + try: + with self.assertRaises(ValueError): + messages.write_message(msg, allow_nan=False) + finally: + sys.stdout = original_stdout