From 50681c12003d898aa8667e27d4a5ccf80911e1e3 Mon Sep 17 00:00:00 2001 From: Jason Kao <100613312+jsonpr@users.noreply.github.com> Date: Tue, 8 Aug 2023 14:45:51 -0400 Subject: [PATCH 1/6] Update README.md (#35) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3c5af2b..49b6245 100644 --- a/README.md +++ b/README.md @@ -5,5 +5,5 @@ This is the high-level package to use for developing CloudQuery plugins in Pytho ## Installation ```commandline -pip install plugin-sdk-python -``` \ No newline at end of file +pip install cloudquery-plugin-sdk +``` From a72df48f3b44dc48fca9707b7937748b0dc77b10 Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Thu, 10 Aug 2023 15:02:49 +0100 Subject: [PATCH 2/6] feat!: Remove docs generation (#36) * Remove docs generation --- cloudquery/sdk/docs/__init__.py | 0 cloudquery/sdk/docs/generator.py | 145 ------------------ cloudquery/sdk/docs/markdown_templates.py | 58 ------- cloudquery/sdk/serve/plugin.py | 42 +---- tests/docs/__init__.py | 0 tests/docs/snapshots/README.md | 9 -- tests/docs/snapshots/test_table.md | 12 -- tests/docs/snapshots/test_table_child.md | 20 --- .../docs/snapshots/test_table_composite_pk.md | 15 -- tests/docs/snapshots/test_table_grandchild.md | 15 -- tests/docs/snapshots/test_table_relations.md | 19 --- tests/docs/test_generator.py | 105 ------------- tests/scheduler/table_resolver.py | 6 +- 13 files changed, 3 insertions(+), 443 deletions(-) delete mode 100644 cloudquery/sdk/docs/__init__.py delete mode 100644 cloudquery/sdk/docs/generator.py delete mode 100644 cloudquery/sdk/docs/markdown_templates.py delete mode 100644 tests/docs/__init__.py delete mode 100644 tests/docs/snapshots/README.md delete mode 100644 tests/docs/snapshots/test_table.md delete mode 100644 tests/docs/snapshots/test_table_child.md delete mode 100644 tests/docs/snapshots/test_table_composite_pk.md delete mode 100644 tests/docs/snapshots/test_table_grandchild.md delete mode 100644 tests/docs/snapshots/test_table_relations.md delete mode 100644 tests/docs/test_generator.py diff --git a/cloudquery/sdk/docs/__init__.py b/cloudquery/sdk/docs/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cloudquery/sdk/docs/generator.py b/cloudquery/sdk/docs/generator.py deleted file mode 100644 index 7bc569e..0000000 --- a/cloudquery/sdk/docs/generator.py +++ /dev/null @@ -1,145 +0,0 @@ -import json -import os -import pathlib -import re -from typing import List - -import jinja2 - -from cloudquery.sdk.docs.markdown_templates import ALL_TABLES, ALL_TABLES_ENTRY, TABLE -from cloudquery.sdk.schema import Table - - -class JsonTable: - def __init__(self): - self.name = "" - self.title = "" - self.description = "" - self.columns = [] - self.relations = [] - - def to_dict(self): - return { - "name": self.name, - "title": self.title, - "description": self.description, - "columns": [col.to_dict() for col in self.columns], - "relations": [rel.to_dict() for rel in self.relations], - } - - -class JsonColumn: - def __init__(self): - self.name = "" - self.type = "" - self.is_primary_key = None - self.is_incremental_key = None - - def to_dict(self): - return { - "name": self.name, - "type": self.type, - "is_primary_key": self.is_primary_key, - "is_incremental_key": self.is_incremental_key, - } - - -class Generator: - def __init__(self, plugin_name: str, tables: List[Table]): - self._plugin_name = plugin_name - self._tables = sorted(tables) - - def generate(self, directory: str, format: str): - os.makedirs(directory, exist_ok=True) - if format == "markdown": - self._generate_markdown(directory) - elif format == "json": - self._generate_json(directory) - - def _generate_json(self, directory: str): - json_tables = self._jsonify_tables(self._tables) - buffer = bytes(json.dumps(json_tables, indent=2, ensure_ascii=False), "utf-8") - output_path = pathlib.Path(directory) / "__tables.json" - with output_path.open("wb") as f: - f.write(buffer) - return None - - def _jsonify_tables(self, tables): - json_tables = [] - for table in tables: - json_columns = [] - for col in table.columns: - json_column = JsonColumn() - json_column.name = col.name - json_column.type = str(col.type) - json_column.is_primary_key = col.primary_key - json_column.is_incremental_key = col.incremental_key - json_columns.append(json_column.__dict__) - - json_table = JsonTable() - json_table.name = table.name - json_table.title = table.title - json_table.description = table.description - json_table.columns = json_columns - json_table.relations = self._jsonify_tables(table.relations) - json_tables.append(json_table.__dict__) - - return json_tables - - def _generate_markdown(self, directory: str): - env = jinja2.Environment() - env.globals["indent_to_depth"] = self._indent_to_depth - env.globals["all_tables_entry"] = self._all_tables_entry - all_tables_template = env.from_string(ALL_TABLES) - rendered_all_tables = all_tables_template.render( - plugin_name=self._plugin_name, tables=self._tables - ) - formatted_all_tables = self._format_markdown(rendered_all_tables) - - with open(os.path.join(directory, "README.md"), "w") as f: - f.write(formatted_all_tables) - - for table in self._tables: - self._render_table(directory, env, table) - - def _render_table(self, directory: str, env: jinja2.Environment, table: Table): - table_template = env.from_string(TABLE) - table_md = table_template.render(table=table) - formatted_table_md = self._format_markdown(table_md) - with open(os.path.join(directory, table.name + ".md"), "w") as f: - f.write(formatted_table_md) - for relation in table.relations: - self._render_table(directory, env, relation) - - def _all_tables_entry(self, table: Table): - env = jinja2.Environment() - env.globals["indent_to_depth"] = self._indent_to_depth - env.globals["all_tables_entry"] = self._all_tables_entry - env.globals["indent_table_to_depth"] = self._indent_table_to_depth - entry_template = env.from_string(ALL_TABLES_ENTRY) - return entry_template.render(table=table) - - @staticmethod - def _indent_table_to_depth(table: Table) -> str: - s = "" - t = table - while t.parent is not None: - s += " " - t = t.parent - return s - - @staticmethod - def _indent_to_depth(text: str, depth: int) -> str: - indentation = depth * 4 # You can adjust the number of spaces as needed - lines = text.split("\n") - indented_lines = [(" " * indentation) + line for line in lines] - return "\n".join(indented_lines) - - @staticmethod - def _format_markdown(text: str) -> str: - re_match_newlines = re.compile(r"\n{3,}") - re_match_headers = re.compile(r"(#{1,6}.+)\n+") - - text = re_match_newlines.sub(r"\n\n", text) - text = re_match_headers.sub(r"\1\n\n", text) - return text diff --git a/cloudquery/sdk/docs/markdown_templates.py b/cloudquery/sdk/docs/markdown_templates.py deleted file mode 100644 index e27f421..0000000 --- a/cloudquery/sdk/docs/markdown_templates.py +++ /dev/null @@ -1,58 +0,0 @@ -ALL_TABLES = """# Source Plugin: {{ plugin_name }} -## Tables -{%- for table in tables %} -{{ all_tables_entry(table) }} -{%- endfor %}""" - -ALL_TABLES_ENTRY = """{{ indent_table_to_depth(table) }}- [{{ table.name }}]({{ table.name }}.md){% if table.is_incremental %} (Incremental){% endif %} -{%- for rel in table.relations %} -{{ all_tables_entry(rel) }} -{%- endfor %}""" - -TABLE = """# Table: {{ table.name }} - -This table shows data for {{ table.title }}. - -{{ table.description }} -{% set length = table.primary_keys | length %} -{% if length == 0 %} -This table does not have a primary key. -{% elif length == 1 %} -The primary key for this table is **{{ table.primary_keys[0] }}**. -{% else %} -The composite primary key for this table is ( -{%- for pk in table.primary_keys %} - {{- ", " if loop.index > 1 else "" }}**{{ pk }}** -{%- endfor -%} -). -{% endif %} - -{% if table.is_incremental %} -It supports incremental syncs{% set ik_length = table.incremental_keys | length %} -{%- if ik_length == 1 %} based on the **{{ table.incremental_keys[0] }}** column{% else %} based on the ( -{%- for ik in table.incremental_keys %} - {{- ", " if loop.index > 1 else "" }}**{{ ik }}** -{%- endfor -%} -) columns{% endif %}. -{%- endif %} - -{% if table.relations or table.parent %} -## Relations -{% endif %} -{% if table.parent %} -This table depends on [{{ table.parent.name }}]({{ table.parent.name }}.md). -{% endif %} -{% if table.relations %} -The following tables depend on {{ table.name }}: -{% for rel in table.relations %} - - [{{ rel.name }}]({{ rel.name }}.md) -{%- endfor %} -{% endif %} - -## Columns -| Name | Type | -| ------------- | ------------- | -{%- for column in table.columns %} -|{{ column.name }}{% if column.primary_key %} (PK){% endif %}{% if column.incremental_key %} (Incremental Key){% endif %}|`{{ column.type }}`| -{%- endfor %} -""" diff --git a/cloudquery/sdk/serve/plugin.py b/cloudquery/sdk/serve/plugin.py index f298c9a..23f56cc 100644 --- a/cloudquery/sdk/serve/plugin.py +++ b/cloudquery/sdk/serve/plugin.py @@ -10,12 +10,9 @@ from cloudquery.plugin_v3 import plugin_pb2_grpc from structlog import wrap_logger -from cloudquery.sdk.docs.generator import Generator from cloudquery.sdk.internal.servers.discovery_v1.discovery import DiscoveryServicer from cloudquery.sdk.internal.servers.plugin_v3 import PluginServicer -from cloudquery.sdk.plugin.plugin import Plugin, TableOptions - -DOC_FORMATS = ["json", "markdown"] +from cloudquery.sdk.plugin.plugin import Plugin _IS_WINDOWS = sys.platform == "win32" @@ -136,34 +133,10 @@ def run(self, args): help="network to serve on. can be tcp or unix", ) - doc_parser = subparsers.add_parser( - "doc", - formatter_class=argparse.RawTextHelpFormatter, - help="Generate documentation for tables", - description="""Generate documentation for tables. - -If format is markdown, a destination directory will be created (if necessary) containing markdown files. -Example: -doc ./output - -If format is JSON, a destination directory will be created (if necessary) with a single json file called __tables.json. -Example: -doc --format json . -""", - ) - doc_parser.add_argument("directory", type=str) - doc_parser.add_argument( - "--format", - type=str, - default="json", - help="output format. one of: {}".format(",".join(DOC_FORMATS)), - ) parsed_args = parser.parse_args(args) if parsed_args.command == "serve": self._serve(parsed_args) - elif parsed_args.command == "doc": - self._generate_docs(parsed_args) else: parser.print_help() sys.exit(1) @@ -185,16 +158,3 @@ def _serve(self, args): def stop(self): self._server.stop(5) - - def _generate_docs(self, args): - generator = Generator( - self._plugin.name(), - self._plugin.get_tables( - options=TableOptions( - tables=["*"], - skip_tables=[], - skip_dependent_tables=False, - ) - ), - ) - generator.generate(args.directory, args.format) diff --git a/tests/docs/__init__.py b/tests/docs/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/docs/snapshots/README.md b/tests/docs/snapshots/README.md deleted file mode 100644 index 572188e..0000000 --- a/tests/docs/snapshots/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# Source Plugin: test_plugin - -## Tables - -- [test_table](test_table.md) -- [test_table_composite_pk](test_table_composite_pk.md) (Incremental) -- [test_table_relations](test_table_relations.md) (Incremental) - - [test_table_child](test_table_child.md) - - [test_table_grandchild](test_table_grandchild.md) \ No newline at end of file diff --git a/tests/docs/snapshots/test_table.md b/tests/docs/snapshots/test_table.md deleted file mode 100644 index a00dc43..0000000 --- a/tests/docs/snapshots/test_table.md +++ /dev/null @@ -1,12 +0,0 @@ -# Table: test_table - -This table shows data for Test Table. - -The primary key for this table is **string**. - -## Columns - -| Name | Type | -| ------------- | ------------- | -|string (PK)|`string`| -|int32|`int32`| \ No newline at end of file diff --git a/tests/docs/snapshots/test_table_child.md b/tests/docs/snapshots/test_table_child.md deleted file mode 100644 index f06e9ee..0000000 --- a/tests/docs/snapshots/test_table_child.md +++ /dev/null @@ -1,20 +0,0 @@ -# Table: test_table_child - -This table shows data for Child Table. - -The primary key for this table is **pk1**. - -## Relations - -This table depends on [test_table_relations](test_table_relations.md). - -The following tables depend on test_table_child: - - - [test_table_grandchild](test_table_grandchild.md) - -## Columns - -| Name | Type | -| ------------- | ------------- | -|pk1 (PK)|`string`| -|fk1|`string`| \ No newline at end of file diff --git a/tests/docs/snapshots/test_table_composite_pk.md b/tests/docs/snapshots/test_table_composite_pk.md deleted file mode 100644 index 3ebead8..0000000 --- a/tests/docs/snapshots/test_table_composite_pk.md +++ /dev/null @@ -1,15 +0,0 @@ -# Table: test_table_composite_pk - -This table shows data for Composite PKs. - -The composite primary key for this table is (**pk1**, **pk2**). - -It supports incremental syncs based on the (**pk1**, **pk2**) columns. - -## Columns - -| Name | Type | -| ------------- | ------------- | -|pk1 (PK) (Incremental Key)|`string`| -|pk2 (PK) (Incremental Key)|`string`| -|int32|`int32`| \ No newline at end of file diff --git a/tests/docs/snapshots/test_table_grandchild.md b/tests/docs/snapshots/test_table_grandchild.md deleted file mode 100644 index 3568be9..0000000 --- a/tests/docs/snapshots/test_table_grandchild.md +++ /dev/null @@ -1,15 +0,0 @@ -# Table: test_table_grandchild - -This table shows data for Grandchild Table. - -The primary key for this table is **pk1**. - -## Relations - -This table depends on [test_table_child](test_table_child.md). - -## Columns - -| Name | Type | -| ------------- | ------------- | -|pk1 (PK)|`string`| \ No newline at end of file diff --git a/tests/docs/snapshots/test_table_relations.md b/tests/docs/snapshots/test_table_relations.md deleted file mode 100644 index b1e4664..0000000 --- a/tests/docs/snapshots/test_table_relations.md +++ /dev/null @@ -1,19 +0,0 @@ -# Table: test_table_relations - -This table shows data for Table Relations. - -The primary key for this table is **pk1**. - -It supports incremental syncs based on the () columns. - -## Relations - -The following tables depend on test_table_relations: - - - [test_table_child](test_table_child.md) - -## Columns - -| Name | Type | -| ------------- | ------------- | -|pk1 (PK)|`string`| \ No newline at end of file diff --git a/tests/docs/test_generator.py b/tests/docs/test_generator.py deleted file mode 100644 index 2bcae3f..0000000 --- a/tests/docs/test_generator.py +++ /dev/null @@ -1,105 +0,0 @@ -import glob -import os -import unittest -from tempfile import TemporaryDirectory - -import pyarrow as pa - -from cloudquery.sdk.docs.generator import Generator -from cloudquery.sdk.schema import Table, Column - -dirname = os.path.dirname(__file__) -SNAPSHOT_DIRECTORY = os.path.join(dirname, "snapshots") - - -def read_snapshot(name): - with open(os.path.join(SNAPSHOT_DIRECTORY, name)) as f: - return f.read() - - -def update_snapshot(name, content): - with open(os.path.join(SNAPSHOT_DIRECTORY, name), "w") as f: - f.write(content) - - -class T(unittest.TestCase): - def test_docs_generator_markdown(self): - tables = [ - Table( - name="test_table", - title="Test Table", - columns=[ - Column("string", pa.string(), primary_key=True), - Column("int32", pa.int32()), - ], - ), - Table( - name="test_table_composite_pk", - title="Composite PKs", - is_incremental=True, - columns=[ - Column("pk1", pa.string(), primary_key=True, incremental_key=True), - Column("pk2", pa.string(), primary_key=True, incremental_key=True), - Column("int32", pa.int32()), - ], - ), - Table( - name="test_table_relations", - title="Table Relations", - is_incremental=True, - columns=[ - Column("pk1", pa.string(), primary_key=True), - ], - relations=[ - Table( - name="test_table_child", - title="Child Table", - columns=[ - Column("pk1", pa.string(), primary_key=True), - Column("fk1", pa.string()), - ], - relations=[ - Table( - name="test_table_grandchild", - title="Grandchild Table", - columns=[Column("pk1", pa.string(), primary_key=True)], - ) - ], - ) - ], - ), - ] - - # set parent relations - tables[2].relations[0].parent = tables[2] - tables[2].relations[0].relations[0].parent = tables[2].relations[0] - - gen = Generator("test_plugin", tables) - with TemporaryDirectory() as d: - gen.generate(d, format="markdown") - - files = glob.glob(os.path.join(d, "*.md")) - file_names = [os.path.basename(f) for f in files] - assert sorted(file_names) == sorted( - [ - "README.md", - "test_table_composite_pk.md", - "test_table.md", - "test_table_relations.md", - "test_table_child.md", - "test_table_grandchild.md", - ] - ) - - updated_snapshots = False - for file_name in file_names: - with self.subTest(msg=file_name): - with open(os.path.join(d, file_name)) as f: - content = f.read() - try: - snapshot = read_snapshot(file_name) - self.assertEqual(snapshot, content) - except FileNotFoundError: - update_snapshot(file_name, content) - updated_snapshots = True - assert not updated_snapshots, "Updated snapshots" diff --git a/tests/scheduler/table_resolver.py b/tests/scheduler/table_resolver.py index 3a11575..b89e20b 100644 --- a/tests/scheduler/table_resolver.py +++ b/tests/scheduler/table_resolver.py @@ -7,7 +7,7 @@ @dataclass -class TestItem: +class Item: test_column2: int = 1 @@ -20,9 +20,7 @@ def test_table_resolver_resolve_column(): ], ) resource_dict = Resource(table=test_table, parent=None, item={"test_column": 1}) - resource_obj = Resource( - table=test_table, parent=None, item=TestItem(test_column2=2) - ) + resource_obj = Resource(table=test_table, parent=None, item=Item(test_column2=2)) test_resolver = TableResolver(table=test_table, child_resolvers=[]) test_resolver.resolve_column( client=None, resource=resource_dict, column_name="test_column" From 381f9ef455246d0f0662311b2513a966ff3d2f5e Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Mon, 14 Aug 2023 09:03:05 +0100 Subject: [PATCH 3/6] Add support for no_connection param to init (#38) --- cloudquery/sdk/internal/servers/plugin_v3/plugin.py | 7 ++++--- cloudquery/sdk/plugin/plugin.py | 5 ++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cloudquery/sdk/internal/servers/plugin_v3/plugin.py b/cloudquery/sdk/internal/servers/plugin_v3/plugin.py index fb98011..419bdd3 100644 --- a/cloudquery/sdk/internal/servers/plugin_v3/plugin.py +++ b/cloudquery/sdk/internal/servers/plugin_v3/plugin.py @@ -1,8 +1,9 @@ +from typing import Generator + import pyarrow as pa import structlog - -from typing import Generator from cloudquery.plugin_v3 import plugin_pb2, plugin_pb2_grpc, arrow + from cloudquery.sdk.message import ( SyncInsertMessage, SyncMigrateTableMessage, @@ -27,7 +28,7 @@ def GetVersion(self, request, context): return plugin_pb2.GetVersion.Response(version=self._plugin.version()) def Init(self, request: plugin_pb2.Init.Request, context): - self._plugin.init(request.spec) + self._plugin.init(request.spec, no_connection=request.no_connection) return plugin_pb2.Init.Response() def GetTables(self, request: plugin_pb2.GetTables.Request, context): diff --git a/cloudquery/sdk/plugin/plugin.py b/cloudquery/sdk/plugin/plugin.py index 7fc76f6..2462bc1 100644 --- a/cloudquery/sdk/plugin/plugin.py +++ b/cloudquery/sdk/plugin/plugin.py @@ -1,9 +1,8 @@ -import queue from dataclasses import dataclass from typing import List, Generator -from cloudquery.sdk.schema import Table from cloudquery.sdk import message +from cloudquery.sdk.schema import Table MIGRATE_MODE_STRINGS = ["safe", "force"] @@ -35,7 +34,7 @@ def __init__(self, name: str, version: str) -> None: self._name = name self._version = version - def init(self, spec: bytes) -> None: + def init(self, spec: bytes, no_connection: bool = False) -> None: pass def set_logger(self, logger) -> None: From 149c69974c1fd36d7f41ed36e6b545352b29ce5b Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Mon, 14 Aug 2023 09:03:28 +0100 Subject: [PATCH 4/6] fix: Fix filter_dfs to be recursive, add support for skip_dependent_tables (#39) We were missing support for filtering of nested tables before, as well as `skip_dependent_tables`. This is essentially a direct port of the Go version (https://github.com/cloudquery/plugin-sdk-python/issues/28) Closes https://github.com/cloudquery/plugin-sdk-python/issues/28 --- cloudquery/sdk/schema/table.py | 101 ++++++++++++++++++++++++++++----- tests/schema/test_table.py | 84 ++++++++++++++++++++++++++- 2 files changed, 171 insertions(+), 14 deletions(-) diff --git a/cloudquery/sdk/schema/table.py b/cloudquery/sdk/schema/table.py index d0b56ef..63c6228 100644 --- a/cloudquery/sdk/schema/table.py +++ b/cloudquery/sdk/schema/table.py @@ -1,7 +1,9 @@ from __future__ import annotations -from typing import List, Generator, Any +import copy import fnmatch +from typing import List + import pyarrow as pa from cloudquery.sdk.schema import arrow @@ -87,17 +89,90 @@ def tables_to_arrow_schemas(tables: List[Table]): def filter_dfs( - tables: List[Table], include_tables: List[str], skip_tables: List[str] + tables: List[Table], + include_tables: List[str], + skip_tables: List[str], + skip_dependent_tables: bool = False, ) -> List[Table]: - filtered: List[Table] = [] + flattened_tables = flatten_tables(tables) + for include_pattern in include_tables: + matched = any( + fnmatch.fnmatch(table.name, include_pattern) for table in flattened_tables + ) + if not matched: + raise ValueError( + f"tables include a pattern {include_pattern} with no matches" + ) + + for exclude_pattern in skip_tables: + matched = any( + fnmatch.fnmatch(table.name, exclude_pattern) for table in flattened_tables + ) + if not matched: + raise ValueError( + f"skip_tables include a pattern {exclude_pattern} with no matches" + ) + + def include_func(t): + return any( + fnmatch.fnmatch(t.name, include_pattern) + for include_pattern in include_tables + ) + + def exclude_func(t): + return any( + fnmatch.fnmatch(t.name, exclude_pattern) for exclude_pattern in skip_tables + ) + + return filter_dfs_func(tables, include_func, exclude_func, skip_dependent_tables) + + +def filter_dfs_func(tt: List[Table], include, exclude, skip_dependent_tables: bool): + filtered_tables = [] + for t in tt: + filtered_table = copy.deepcopy(t) + filtered_table = _filter_dfs_impl( + filtered_table, False, include, exclude, skip_dependent_tables + ) + if filtered_table is not None: + filtered_tables.append(filtered_table) + return filtered_tables + + +def _filter_dfs_impl(t, parent_matched, include, exclude, skip_dependent_tables): + def filter_dfs_child(r, matched, include, exclude, skip_dependent_tables): + filtered_child = _filter_dfs_impl( + r, matched, include, exclude, skip_dependent_tables + ) + if filtered_child is not None: + return True, r + return matched, None + + if exclude(t): + return None + + matched = parent_matched and not skip_dependent_tables + if include(t): + matched = True + + filtered_relations = [] + for r in t.relations: + matched, filtered_child = filter_dfs_child( + r, matched, include, exclude, skip_dependent_tables + ) + if filtered_child is not None: + filtered_relations.append(filtered_child) + + t.relations = filtered_relations + + if matched: + return t + return None + + +def flatten_tables(tables: List[Table]) -> List[Table]: + flattened: List[Table] = [] for table in tables: - matched = False - for include_table in include_tables: - if fnmatch.fnmatch(table.name, include_table): - matched = True - for skip_table in skip_tables: - if fnmatch.fnmatch(table.name, skip_table): - matched = False - if matched: - filtered.append(table) - return filtered + flattened.append(table) + flattened.extend(flatten_tables(table.relations)) + return flattened diff --git a/tests/schema/test_table.py b/tests/schema/test_table.py index 9958ad7..e7bb073 100644 --- a/tests/schema/test_table.py +++ b/tests/schema/test_table.py @@ -1,8 +1,90 @@ import pyarrow as pa +import pytest -from cloudquery.sdk.schema import Table, Column +from cloudquery.sdk.schema import Table, Column, filter_dfs +from cloudquery.sdk.schema.table import flatten_tables def test_table(): table = Table("test_table", [Column("test_column", pa.int32())]) table.to_arrow_schema() + + +def test_filter_dfs_warns_no_matches(): + with pytest.raises(ValueError): + tables = [Table("test1", []), Table("test2", [])] + filter_dfs(tables, include_tables=["test3"], skip_tables=[]) + + with pytest.raises(ValueError): + tables = [Table("test1", []), Table("test2", [])] + filter_dfs(tables, include_tables=["*"], skip_tables=["test3"]) + + +def test_filter_dfs(): + table_grandchild = Table("test_grandchild", [Column("test_column", pa.int32())]) + table_child = Table( + "test_child", + [Column("test_column", pa.int32())], + relations=[ + table_grandchild, + ], + ) + table_top1 = Table( + "test_top1", + [Column("test_column", pa.int32())], + relations=[ + table_child, + ], + ) + table_top2 = Table("test_top2", [Column("test_column", pa.int32())]) + + tables = [table_top1, table_top2] + + cases = [ + { + "include_tables": ["*"], + "skip_tables": [], + "skip_dependent_tables": False, + "expect_top": ["test_top1", "test_top2"], + "expect_flattened": [ + "test_top1", + "test_top2", + "test_child", + "test_grandchild", + ], + }, + { + "include_tables": ["*"], + "skip_tables": ["test_top1"], + "skip_dependent_tables": False, + "expect_top": ["test_top2"], + "expect_flattened": ["test_top2"], + }, + { + "include_tables": ["test_top1"], + "skip_tables": ["test_top2"], + "skip_dependent_tables": True, + "expect_top": ["test_top1"], + "expect_flattened": ["test_top1"], + }, + { + "include_tables": ["test_child"], + "skip_tables": [], + "skip_dependent_tables": True, + "expect_top": ["test_top1"], + "expect_flattened": ["test_top1", "test_child"], + }, + ] + for case in cases: + got = filter_dfs( + tables=tables, + include_tables=case["include_tables"], + skip_tables=case["skip_tables"], + skip_dependent_tables=case["skip_dependent_tables"], + ) + assert sorted([t.name for t in got]) == sorted(case["expect_top"]), case + + got_flattened = flatten_tables(got) + want_flattened = sorted(case["expect_flattened"]) + got_flattened = sorted([t.name for t in got_flattened]) + assert got_flattened == want_flattened, case From 0003c405cac2c57c8d436499e8ba576ab5d04f3b Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Mon, 14 Aug 2023 10:40:36 +0100 Subject: [PATCH 5/6] fix: Add missing metadata fields (#40) * Add missing metadata fields --- cloudquery/sdk/schema/arrow.py | 2 ++ cloudquery/sdk/schema/table.py | 20 ++++++++++++++++++-- tests/schema/test_table.py | 18 ++++++++++++++++-- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/cloudquery/sdk/schema/arrow.py b/cloudquery/sdk/schema/arrow.py index 2f21bc1..23deed2 100644 --- a/cloudquery/sdk/schema/arrow.py +++ b/cloudquery/sdk/schema/arrow.py @@ -7,3 +7,5 @@ METADATA_FALSE = b"false" METADATA_TABLE_NAME = b"cq:table_name" METADATA_TABLE_DESCRIPTION = b"cq:table_description" +METADATA_TABLE_TITLE = b"cq:table_title" +METADATA_TABLE_DEPENDS_ON = b"cq:table_depends_on" diff --git a/cloudquery/sdk/schema/table.py b/cloudquery/sdk/schema/table.py index 63c6228..7198d63 100644 --- a/cloudquery/sdk/schema/table.py +++ b/cloudquery/sdk/schema/table.py @@ -57,12 +57,24 @@ def from_arrow_schema(cls, schema: pa.Schema) -> Table: columns = [] for field in schema: columns.append(Column.from_arrow_field(field)) + parent = None + if arrow.METADATA_TABLE_DEPENDS_ON in schema.metadata: + parent = Table( + name=schema.metadata[arrow.METADATA_TABLE_DEPENDS_ON].decode("utf-8"), + columns=[], + ) return cls( - name=schema.metadata[arrow.METADATA_TABLE_NAME].decode("utf-8"), + name=schema.metadata.get(arrow.METADATA_TABLE_NAME, b"").decode("utf-8"), + title=schema.metadata.get(arrow.METADATA_TABLE_TITLE, b"").decode("utf-8"), columns=columns, description=schema.metadata.get(arrow.METADATA_TABLE_DESCRIPTION).decode( "utf-8" ), + is_incremental=schema.metadata.get( + arrow.METADATA_INCREMENTAL, arrow.METADATA_FALSE + ) + == arrow.METADATA_TRUE, + parent=parent, ) def to_arrow_schema(self): @@ -70,7 +82,11 @@ def to_arrow_schema(self): md = { arrow.METADATA_TABLE_NAME: self.name, arrow.METADATA_TABLE_DESCRIPTION: self.description, - # arrow.METADATA_CONSTRAINT_NAME: + arrow.METADATA_TABLE_TITLE: self.title, + arrow.METADATA_TABLE_DEPENDS_ON: self.parent.name if self.parent else "", + arrow.METADATA_INCREMENTAL: arrow.METADATA_TRUE + if self.is_incremental + else arrow.METADATA_FALSE, } for column in self.columns: fields.append(column.to_arrow_field()) diff --git a/tests/schema/test_table.py b/tests/schema/test_table.py index e7bb073..235c110 100644 --- a/tests/schema/test_table.py +++ b/tests/schema/test_table.py @@ -6,8 +6,22 @@ def test_table(): - table = Table("test_table", [Column("test_column", pa.int32())]) - table.to_arrow_schema() + table = Table( + name="test_table", + columns=[Column("test_column", pa.int32())], + title="Test Table", + description="Test description", + parent=Table(name="parent_table", columns=[]), + relations=[], + is_incremental=True, + ) + sch = table.to_arrow_schema() + got = Table.from_arrow_schema(sch) + assert got.name == table.name + assert got.title == table.title + assert got.description == table.description + assert got.is_incremental == table.is_incremental + assert got.parent.name == table.parent.name def test_filter_dfs_warns_no_matches(): From e7c2134beefb362a7782f7840664128f30b54e3c Mon Sep 17 00:00:00 2001 From: CloudQuery Bot <102256036+cq-bot@users.noreply.github.com> Date: Mon, 14 Aug 2023 13:02:48 +0300 Subject: [PATCH 6/6] chore(main): Release v0.1.0 (#37) --- CHANGELOG.md | 17 +++++++++++++++++ setup.py | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0360595..8774346 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # Changelog +## [0.1.0](https://github.com/cloudquery/plugin-sdk-python/compare/v0.0.11...v0.1.0) (2023-08-14) + + +### ⚠ BREAKING CHANGES + +* Remove docs generation ([#36](https://github.com/cloudquery/plugin-sdk-python/issues/36)) + +### Features + +* Remove docs generation ([#36](https://github.com/cloudquery/plugin-sdk-python/issues/36)) ([a72df48](https://github.com/cloudquery/plugin-sdk-python/commit/a72df48f3b44dc48fca9707b7937748b0dc77b10)) + + +### Bug Fixes + +* Add missing metadata fields ([#40](https://github.com/cloudquery/plugin-sdk-python/issues/40)) ([0003c40](https://github.com/cloudquery/plugin-sdk-python/commit/0003c405cac2c57c8d436499e8ba576ab5d04f3b)) +* Fix filter_dfs to be recursive, add support for skip_dependent_tables ([#39](https://github.com/cloudquery/plugin-sdk-python/issues/39)) ([149c699](https://github.com/cloudquery/plugin-sdk-python/commit/149c69974c1fd36d7f41ed36e6b545352b29ce5b)) + ## [0.0.11](https://github.com/cloudquery/plugin-sdk-python/compare/v0.0.10...v0.0.11) (2023-08-08) diff --git a/setup.py b/setup.py index 7682091..e877f24 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ ] setuptools.setup( name=name, - version="0.0.11", + version="0.1.0", description=description, long_description=long_description, author="CloudQuery LTD",