From 50681c12003d898aa8667e27d4a5ccf80911e1e3 Mon Sep 17 00:00:00 2001
From: Jason Kao <100613312+jsonpr@users.noreply.github.com>
Date: Tue, 8 Aug 2023 14:45:51 -0400
Subject: [PATCH 1/6] Update README.md (#35)

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 3c5af2b..49b6245 100644
--- a/README.md
+++ b/README.md
@@ -5,5 +5,5 @@ This is the high-level package to use for developing CloudQuery plugins in Pytho
 ## Installation
 
 ```commandline
-pip install plugin-sdk-python
-```
\ No newline at end of file
+pip install cloudquery-plugin-sdk
+```

From a72df48f3b44dc48fca9707b7937748b0dc77b10 Mon Sep 17 00:00:00 2001
From: Herman Schaaf <hermanschaaf@gmail.com>
Date: Thu, 10 Aug 2023 15:02:49 +0100
Subject: [PATCH 2/6] feat!: Remove docs generation (#36)

* Remove docs generation
---
 cloudquery/sdk/docs/__init__.py               |   0
 cloudquery/sdk/docs/generator.py              | 145 ------------------
 cloudquery/sdk/docs/markdown_templates.py     |  58 -------
 cloudquery/sdk/serve/plugin.py                |  42 +----
 tests/docs/__init__.py                        |   0
 tests/docs/snapshots/README.md                |   9 --
 tests/docs/snapshots/test_table.md            |  12 --
 tests/docs/snapshots/test_table_child.md      |  20 ---
 .../docs/snapshots/test_table_composite_pk.md |  15 --
 tests/docs/snapshots/test_table_grandchild.md |  15 --
 tests/docs/snapshots/test_table_relations.md  |  19 ---
 tests/docs/test_generator.py                  | 105 -------------
 tests/scheduler/table_resolver.py             |   6 +-
 13 files changed, 3 insertions(+), 443 deletions(-)
 delete mode 100644 cloudquery/sdk/docs/__init__.py
 delete mode 100644 cloudquery/sdk/docs/generator.py
 delete mode 100644 cloudquery/sdk/docs/markdown_templates.py
 delete mode 100644 tests/docs/__init__.py
 delete mode 100644 tests/docs/snapshots/README.md
 delete mode 100644 tests/docs/snapshots/test_table.md
 delete mode 100644 tests/docs/snapshots/test_table_child.md
 delete mode 100644 tests/docs/snapshots/test_table_composite_pk.md
 delete mode 100644 tests/docs/snapshots/test_table_grandchild.md
 delete mode 100644 tests/docs/snapshots/test_table_relations.md
 delete mode 100644 tests/docs/test_generator.py

diff --git a/cloudquery/sdk/docs/__init__.py b/cloudquery/sdk/docs/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/cloudquery/sdk/docs/generator.py b/cloudquery/sdk/docs/generator.py
deleted file mode 100644
index 7bc569e..0000000
--- a/cloudquery/sdk/docs/generator.py
+++ /dev/null
@@ -1,145 +0,0 @@
-import json
-import os
-import pathlib
-import re
-from typing import List
-
-import jinja2
-
-from cloudquery.sdk.docs.markdown_templates import ALL_TABLES, ALL_TABLES_ENTRY, TABLE
-from cloudquery.sdk.schema import Table
-
-
-class JsonTable:
-    def __init__(self):
-        self.name = ""
-        self.title = ""
-        self.description = ""
-        self.columns = []
-        self.relations = []
-
-    def to_dict(self):
-        return {
-            "name": self.name,
-            "title": self.title,
-            "description": self.description,
-            "columns": [col.to_dict() for col in self.columns],
-            "relations": [rel.to_dict() for rel in self.relations],
-        }
-
-
-class JsonColumn:
-    def __init__(self):
-        self.name = ""
-        self.type = ""
-        self.is_primary_key = None
-        self.is_incremental_key = None
-
-    def to_dict(self):
-        return {
-            "name": self.name,
-            "type": self.type,
-            "is_primary_key": self.is_primary_key,
-            "is_incremental_key": self.is_incremental_key,
-        }
-
-
-class Generator:
-    def __init__(self, plugin_name: str, tables: List[Table]):
-        self._plugin_name = plugin_name
-        self._tables = sorted(tables)
-
-    def generate(self, directory: str, format: str):
-        os.makedirs(directory, exist_ok=True)
-        if format == "markdown":
-            self._generate_markdown(directory)
-        elif format == "json":
-            self._generate_json(directory)
-
-    def _generate_json(self, directory: str):
-        json_tables = self._jsonify_tables(self._tables)
-        buffer = bytes(json.dumps(json_tables, indent=2, ensure_ascii=False), "utf-8")
-        output_path = pathlib.Path(directory) / "__tables.json"
-        with output_path.open("wb") as f:
-            f.write(buffer)
-        return None
-
-    def _jsonify_tables(self, tables):
-        json_tables = []
-        for table in tables:
-            json_columns = []
-            for col in table.columns:
-                json_column = JsonColumn()
-                json_column.name = col.name
-                json_column.type = str(col.type)
-                json_column.is_primary_key = col.primary_key
-                json_column.is_incremental_key = col.incremental_key
-                json_columns.append(json_column.__dict__)
-
-            json_table = JsonTable()
-            json_table.name = table.name
-            json_table.title = table.title
-            json_table.description = table.description
-            json_table.columns = json_columns
-            json_table.relations = self._jsonify_tables(table.relations)
-            json_tables.append(json_table.__dict__)
-
-        return json_tables
-
-    def _generate_markdown(self, directory: str):
-        env = jinja2.Environment()
-        env.globals["indent_to_depth"] = self._indent_to_depth
-        env.globals["all_tables_entry"] = self._all_tables_entry
-        all_tables_template = env.from_string(ALL_TABLES)
-        rendered_all_tables = all_tables_template.render(
-            plugin_name=self._plugin_name, tables=self._tables
-        )
-        formatted_all_tables = self._format_markdown(rendered_all_tables)
-
-        with open(os.path.join(directory, "README.md"), "w") as f:
-            f.write(formatted_all_tables)
-
-        for table in self._tables:
-            self._render_table(directory, env, table)
-
-    def _render_table(self, directory: str, env: jinja2.Environment, table: Table):
-        table_template = env.from_string(TABLE)
-        table_md = table_template.render(table=table)
-        formatted_table_md = self._format_markdown(table_md)
-        with open(os.path.join(directory, table.name + ".md"), "w") as f:
-            f.write(formatted_table_md)
-        for relation in table.relations:
-            self._render_table(directory, env, relation)
-
-    def _all_tables_entry(self, table: Table):
-        env = jinja2.Environment()
-        env.globals["indent_to_depth"] = self._indent_to_depth
-        env.globals["all_tables_entry"] = self._all_tables_entry
-        env.globals["indent_table_to_depth"] = self._indent_table_to_depth
-        entry_template = env.from_string(ALL_TABLES_ENTRY)
-        return entry_template.render(table=table)
-
-    @staticmethod
-    def _indent_table_to_depth(table: Table) -> str:
-        s = ""
-        t = table
-        while t.parent is not None:
-            s += "  "
-            t = t.parent
-        return s
-
-    @staticmethod
-    def _indent_to_depth(text: str, depth: int) -> str:
-        indentation = depth * 4  # You can adjust the number of spaces as needed
-        lines = text.split("\n")
-        indented_lines = [(" " * indentation) + line for line in lines]
-        return "\n".join(indented_lines)
-
-    @staticmethod
-    def _format_markdown(text: str) -> str:
-        re_match_newlines = re.compile(r"\n{3,}")
-        re_match_headers = re.compile(r"(#{1,6}.+)\n+")
-
-        text = re_match_newlines.sub(r"\n\n", text)
-        text = re_match_headers.sub(r"\1\n\n", text)
-        return text
diff --git a/cloudquery/sdk/docs/markdown_templates.py b/cloudquery/sdk/docs/markdown_templates.py
deleted file mode 100644
index e27f421..0000000
--- a/cloudquery/sdk/docs/markdown_templates.py
+++ /dev/null
@@ -1,58 +0,0 @@
-ALL_TABLES = """# Source Plugin: {{ plugin_name }}
-## Tables
-{%- for table in tables %}
-{{ all_tables_entry(table) }}
-{%- endfor %}"""
-
-ALL_TABLES_ENTRY = """{{ indent_table_to_depth(table) }}- [{{ table.name }}]({{ table.name }}.md){% if table.is_incremental %} (Incremental){% endif %}
-{%- for rel in table.relations %}
-{{ all_tables_entry(rel) }}
-{%- endfor %}"""
-
-TABLE = """# Table: {{ table.name }}
-
-This table shows data for {{ table.title }}.
-
-{{ table.description }}
-{% set length = table.primary_keys | length %}
-{% if length == 0 %}
-This table does not have a primary key.
-{% elif length == 1 %}
-The primary key for this table is **{{ table.primary_keys[0] }}**.
-{% else %}
-The composite primary key for this table is (
-{%- for pk in table.primary_keys %}
-    {{- ", " if loop.index > 1 else "" }}**{{ pk }}**
-{%- endfor -%}
-).
-{% endif %}
-
-{% if table.is_incremental %}
-It supports incremental syncs{% set ik_length = table.incremental_keys | length %}
-{%- if ik_length == 1 %} based on the **{{ table.incremental_keys[0] }}** column{% else %} based on the (
-{%- for ik in table.incremental_keys %}
-    {{- ", " if loop.index > 1 else "" }}**{{ ik }}**
-{%- endfor -%}
-) columns{% endif %}.
-{%- endif %}
-
-{% if table.relations or table.parent %}
-## Relations
-{% endif %}
-{% if table.parent %}
-This table depends on [{{ table.parent.name }}]({{ table.parent.name }}.md).
-{% endif %}
-{% if table.relations %}
-The following tables depend on {{ table.name }}:
-{% for rel in table.relations %}
-  - [{{ rel.name }}]({{ rel.name }}.md)
-{%- endfor %}
-{% endif %}
-
-## Columns
-| Name          | Type          |
-| ------------- | ------------- |
-{%- for column in table.columns %}
-|{{ column.name }}{% if column.primary_key %} (PK){% endif %}{% if column.incremental_key %} (Incremental Key){% endif %}|`{{ column.type }}`|
-{%- endfor %}
-"""
diff --git a/cloudquery/sdk/serve/plugin.py b/cloudquery/sdk/serve/plugin.py
index f298c9a..23f56cc 100644
--- a/cloudquery/sdk/serve/plugin.py
+++ b/cloudquery/sdk/serve/plugin.py
@@ -10,12 +10,9 @@
 from cloudquery.plugin_v3 import plugin_pb2_grpc
 from structlog import wrap_logger
 
-from cloudquery.sdk.docs.generator import Generator
 from cloudquery.sdk.internal.servers.discovery_v1.discovery import DiscoveryServicer
 from cloudquery.sdk.internal.servers.plugin_v3 import PluginServicer
-from cloudquery.sdk.plugin.plugin import Plugin, TableOptions
-
-DOC_FORMATS = ["json", "markdown"]
+from cloudquery.sdk.plugin.plugin import Plugin
 
 _IS_WINDOWS = sys.platform == "win32"
 
@@ -136,34 +133,10 @@ def run(self, args):
             help="network to serve on. can be tcp or unix",
         )
 
-        doc_parser = subparsers.add_parser(
-            "doc",
-            formatter_class=argparse.RawTextHelpFormatter,
-            help="Generate documentation for tables",
-            description="""Generate documentation for tables.
-
-If format is markdown, a destination directory will be created (if necessary) containing markdown files.
-Example:
-doc ./output 
-
-If format is JSON, a destination directory will be created (if necessary) with a single json file called __tables.json.
-Example:
-doc --format json .
-""",
-        )
-        doc_parser.add_argument("directory", type=str)
-        doc_parser.add_argument(
-            "--format",
-            type=str,
-            default="json",
-            help="output format. one of: {}".format(",".join(DOC_FORMATS)),
-        )
         parsed_args = parser.parse_args(args)
 
         if parsed_args.command == "serve":
             self._serve(parsed_args)
-        elif parsed_args.command == "doc":
-            self._generate_docs(parsed_args)
         else:
             parser.print_help()
             sys.exit(1)
@@ -185,16 +158,3 @@ def _serve(self, args):
 
     def stop(self):
         self._server.stop(5)
-
-    def _generate_docs(self, args):
-        generator = Generator(
-            self._plugin.name(),
-            self._plugin.get_tables(
-                options=TableOptions(
-                    tables=["*"],
-                    skip_tables=[],
-                    skip_dependent_tables=False,
-                )
-            ),
-        )
-        generator.generate(args.directory, args.format)
diff --git a/tests/docs/__init__.py b/tests/docs/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/docs/snapshots/README.md b/tests/docs/snapshots/README.md
deleted file mode 100644
index 572188e..0000000
--- a/tests/docs/snapshots/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Source Plugin: test_plugin
-
-## Tables
-
-- [test_table](test_table.md)
-- [test_table_composite_pk](test_table_composite_pk.md) (Incremental)
-- [test_table_relations](test_table_relations.md) (Incremental)
-  - [test_table_child](test_table_child.md)
-    - [test_table_grandchild](test_table_grandchild.md)
\ No newline at end of file
diff --git a/tests/docs/snapshots/test_table.md b/tests/docs/snapshots/test_table.md
deleted file mode 100644
index a00dc43..0000000
--- a/tests/docs/snapshots/test_table.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# Table: test_table
-
-This table shows data for Test Table.
-
-The primary key for this table is **string**.
-
-## Columns
-
-| Name          | Type          |
-| ------------- | ------------- |
-|string (PK)|`string`|
-|int32|`int32`|
\ No newline at end of file
diff --git a/tests/docs/snapshots/test_table_child.md b/tests/docs/snapshots/test_table_child.md
deleted file mode 100644
index f06e9ee..0000000
--- a/tests/docs/snapshots/test_table_child.md
+++ /dev/null
@@ -1,20 +0,0 @@
-# Table: test_table_child
-
-This table shows data for Child Table.
-
-The primary key for this table is **pk1**.
-
-## Relations
-
-This table depends on [test_table_relations](test_table_relations.md).
-
-The following tables depend on test_table_child:
-
-  - [test_table_grandchild](test_table_grandchild.md)
-
-## Columns
-
-| Name          | Type          |
-| ------------- | ------------- |
-|pk1 (PK)|`string`|
-|fk1|`string`|
\ No newline at end of file
diff --git a/tests/docs/snapshots/test_table_composite_pk.md b/tests/docs/snapshots/test_table_composite_pk.md
deleted file mode 100644
index 3ebead8..0000000
--- a/tests/docs/snapshots/test_table_composite_pk.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Table: test_table_composite_pk
-
-This table shows data for Composite PKs.
-
-The composite primary key for this table is (**pk1**, **pk2**).
-
-It supports incremental syncs based on the (**pk1**, **pk2**) columns.
-
-## Columns
-
-| Name          | Type          |
-| ------------- | ------------- |
-|pk1 (PK) (Incremental Key)|`string`|
-|pk2 (PK) (Incremental Key)|`string`|
-|int32|`int32`|
\ No newline at end of file
diff --git a/tests/docs/snapshots/test_table_grandchild.md b/tests/docs/snapshots/test_table_grandchild.md
deleted file mode 100644
index 3568be9..0000000
--- a/tests/docs/snapshots/test_table_grandchild.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Table: test_table_grandchild
-
-This table shows data for Grandchild Table.
-
-The primary key for this table is **pk1**.
-
-## Relations
-
-This table depends on [test_table_child](test_table_child.md).
-
-## Columns
-
-| Name          | Type          |
-| ------------- | ------------- |
-|pk1 (PK)|`string`|
\ No newline at end of file
diff --git a/tests/docs/snapshots/test_table_relations.md b/tests/docs/snapshots/test_table_relations.md
deleted file mode 100644
index b1e4664..0000000
--- a/tests/docs/snapshots/test_table_relations.md
+++ /dev/null
@@ -1,19 +0,0 @@
-# Table: test_table_relations
-
-This table shows data for Table Relations.
-
-The primary key for this table is **pk1**.
-
-It supports incremental syncs based on the () columns.
-
-## Relations
-
-The following tables depend on test_table_relations:
-
-  - [test_table_child](test_table_child.md)
-
-## Columns
-
-| Name          | Type          |
-| ------------- | ------------- |
-|pk1 (PK)|`string`|
\ No newline at end of file
diff --git a/tests/docs/test_generator.py b/tests/docs/test_generator.py
deleted file mode 100644
index 2bcae3f..0000000
--- a/tests/docs/test_generator.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import glob
-import os
-import unittest
-from tempfile import TemporaryDirectory
-
-import pyarrow as pa
-
-from cloudquery.sdk.docs.generator import Generator
-from cloudquery.sdk.schema import Table, Column
-
-dirname = os.path.dirname(__file__)
-SNAPSHOT_DIRECTORY = os.path.join(dirname, "snapshots")
-
-
-def read_snapshot(name):
-    with open(os.path.join(SNAPSHOT_DIRECTORY, name)) as f:
-        return f.read()
-
-
-def update_snapshot(name, content):
-    with open(os.path.join(SNAPSHOT_DIRECTORY, name), "w") as f:
-        f.write(content)
-
-
-class T(unittest.TestCase):
-    def test_docs_generator_markdown(self):
-        tables = [
-            Table(
-                name="test_table",
-                title="Test Table",
-                columns=[
-                    Column("string", pa.string(), primary_key=True),
-                    Column("int32", pa.int32()),
-                ],
-            ),
-            Table(
-                name="test_table_composite_pk",
-                title="Composite PKs",
-                is_incremental=True,
-                columns=[
-                    Column("pk1", pa.string(), primary_key=True, incremental_key=True),
-                    Column("pk2", pa.string(), primary_key=True, incremental_key=True),
-                    Column("int32", pa.int32()),
-                ],
-            ),
-            Table(
-                name="test_table_relations",
-                title="Table Relations",
-                is_incremental=True,
-                columns=[
-                    Column("pk1", pa.string(), primary_key=True),
-                ],
-                relations=[
-                    Table(
-                        name="test_table_child",
-                        title="Child Table",
-                        columns=[
-                            Column("pk1", pa.string(), primary_key=True),
-                            Column("fk1", pa.string()),
-                        ],
-                        relations=[
-                            Table(
-                                name="test_table_grandchild",
-                                title="Grandchild Table",
-                                columns=[Column("pk1", pa.string(), primary_key=True)],
-                            )
-                        ],
-                    )
-                ],
-            ),
-        ]
-
-        # set parent relations
-        tables[2].relations[0].parent = tables[2]
-        tables[2].relations[0].relations[0].parent = tables[2].relations[0]
-
-        gen = Generator("test_plugin", tables)
-        with TemporaryDirectory() as d:
-            gen.generate(d, format="markdown")
-
-            files = glob.glob(os.path.join(d, "*.md"))
-            file_names = [os.path.basename(f) for f in files]
-            assert sorted(file_names) == sorted(
-                [
-                    "README.md",
-                    "test_table_composite_pk.md",
-                    "test_table.md",
-                    "test_table_relations.md",
-                    "test_table_child.md",
-                    "test_table_grandchild.md",
-                ]
-            )
-
-            updated_snapshots = False
-            for file_name in file_names:
-                with self.subTest(msg=file_name):
-                    with open(os.path.join(d, file_name)) as f:
-                        content = f.read()
-                        try:
-                            snapshot = read_snapshot(file_name)
-                            self.assertEqual(snapshot, content)
-                        except FileNotFoundError:
-                            update_snapshot(file_name, content)
-                            updated_snapshots = True
-            assert not updated_snapshots, "Updated snapshots"
diff --git a/tests/scheduler/table_resolver.py b/tests/scheduler/table_resolver.py
index 3a11575..b89e20b 100644
--- a/tests/scheduler/table_resolver.py
+++ b/tests/scheduler/table_resolver.py
@@ -7,7 +7,7 @@
 
 
 @dataclass
-class TestItem:
+class Item:
     test_column2: int = 1
 
 
@@ -20,9 +20,7 @@ def test_table_resolver_resolve_column():
         ],
     )
     resource_dict = Resource(table=test_table, parent=None, item={"test_column": 1})
-    resource_obj = Resource(
-        table=test_table, parent=None, item=TestItem(test_column2=2)
-    )
+    resource_obj = Resource(table=test_table, parent=None, item=Item(test_column2=2))
     test_resolver = TableResolver(table=test_table, child_resolvers=[])
     test_resolver.resolve_column(
         client=None, resource=resource_dict, column_name="test_column"

From 381f9ef455246d0f0662311b2513a966ff3d2f5e Mon Sep 17 00:00:00 2001
From: Herman Schaaf <hermanschaaf@gmail.com>
Date: Mon, 14 Aug 2023 09:03:05 +0100
Subject: [PATCH 3/6] Add support for no_connection param to init (#38)

---
 cloudquery/sdk/internal/servers/plugin_v3/plugin.py | 7 ++++---
 cloudquery/sdk/plugin/plugin.py                     | 5 ++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/cloudquery/sdk/internal/servers/plugin_v3/plugin.py b/cloudquery/sdk/internal/servers/plugin_v3/plugin.py
index fb98011..419bdd3 100644
--- a/cloudquery/sdk/internal/servers/plugin_v3/plugin.py
+++ b/cloudquery/sdk/internal/servers/plugin_v3/plugin.py
@@ -1,8 +1,9 @@
+from typing import Generator
+
 import pyarrow as pa
 import structlog
-
-from typing import Generator
 from cloudquery.plugin_v3 import plugin_pb2, plugin_pb2_grpc, arrow
+
 from cloudquery.sdk.message import (
     SyncInsertMessage,
     SyncMigrateTableMessage,
@@ -27,7 +28,7 @@ def GetVersion(self, request, context):
         return plugin_pb2.GetVersion.Response(version=self._plugin.version())
 
     def Init(self, request: plugin_pb2.Init.Request, context):
-        self._plugin.init(request.spec)
+        self._plugin.init(request.spec, no_connection=request.no_connection)
         return plugin_pb2.Init.Response()
 
     def GetTables(self, request: plugin_pb2.GetTables.Request, context):
diff --git a/cloudquery/sdk/plugin/plugin.py b/cloudquery/sdk/plugin/plugin.py
index 7fc76f6..2462bc1 100644
--- a/cloudquery/sdk/plugin/plugin.py
+++ b/cloudquery/sdk/plugin/plugin.py
@@ -1,9 +1,8 @@
-import queue
 from dataclasses import dataclass
 from typing import List, Generator
 
-from cloudquery.sdk.schema import Table
 from cloudquery.sdk import message
+from cloudquery.sdk.schema import Table
 
 MIGRATE_MODE_STRINGS = ["safe", "force"]
 
@@ -35,7 +34,7 @@ def __init__(self, name: str, version: str) -> None:
         self._name = name
         self._version = version
 
-    def init(self, spec: bytes) -> None:
+    def init(self, spec: bytes, no_connection: bool = False) -> None:
         pass
 
     def set_logger(self, logger) -> None:

From 149c69974c1fd36d7f41ed36e6b545352b29ce5b Mon Sep 17 00:00:00 2001
From: Herman Schaaf <hermanschaaf@gmail.com>
Date: Mon, 14 Aug 2023 09:03:28 +0100
Subject: [PATCH 4/6] fix: Fix filter_dfs to be recursive, add support for
 skip_dependent_tables (#39)

We were missing support for filtering of nested tables before, as well as `skip_dependent_tables`.  This is essentially a direct port of the Go version (https://github.com/cloudquery/plugin-sdk-python/issues/28)


Closes https://github.com/cloudquery/plugin-sdk-python/issues/28
---
 cloudquery/sdk/schema/table.py | 101 ++++++++++++++++++++++++++++-----
 tests/schema/test_table.py     |  84 ++++++++++++++++++++++++++-
 2 files changed, 171 insertions(+), 14 deletions(-)

diff --git a/cloudquery/sdk/schema/table.py b/cloudquery/sdk/schema/table.py
index d0b56ef..63c6228 100644
--- a/cloudquery/sdk/schema/table.py
+++ b/cloudquery/sdk/schema/table.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
 
-from typing import List, Generator, Any
+import copy
 import fnmatch
+from typing import List
+
 import pyarrow as pa
 
 from cloudquery.sdk.schema import arrow
@@ -87,17 +89,90 @@ def tables_to_arrow_schemas(tables: List[Table]):
 
 
 def filter_dfs(
-    tables: List[Table], include_tables: List[str], skip_tables: List[str]
+    tables: List[Table],
+    include_tables: List[str],
+    skip_tables: List[str],
+    skip_dependent_tables: bool = False,
 ) -> List[Table]:
-    filtered: List[Table] = []
+    flattened_tables = flatten_tables(tables)
+    for include_pattern in include_tables:
+        matched = any(
+            fnmatch.fnmatch(table.name, include_pattern) for table in flattened_tables
+        )
+        if not matched:
+            raise ValueError(
+                f"tables include a pattern {include_pattern} with no matches"
+            )
+
+    for exclude_pattern in skip_tables:
+        matched = any(
+            fnmatch.fnmatch(table.name, exclude_pattern) for table in flattened_tables
+        )
+        if not matched:
+            raise ValueError(
+                f"skip_tables include a pattern {exclude_pattern} with no matches"
+            )
+
+    def include_func(t):
+        return any(
+            fnmatch.fnmatch(t.name, include_pattern)
+            for include_pattern in include_tables
+        )
+
+    def exclude_func(t):
+        return any(
+            fnmatch.fnmatch(t.name, exclude_pattern) for exclude_pattern in skip_tables
+        )
+
+    return filter_dfs_func(tables, include_func, exclude_func, skip_dependent_tables)
+
+
+def filter_dfs_func(tt: List[Table], include, exclude, skip_dependent_tables: bool):
+    filtered_tables = []
+    for t in tt:
+        filtered_table = copy.deepcopy(t)
+        filtered_table = _filter_dfs_impl(
+            filtered_table, False, include, exclude, skip_dependent_tables
+        )
+        if filtered_table is not None:
+            filtered_tables.append(filtered_table)
+    return filtered_tables
+
+
+def _filter_dfs_impl(t, parent_matched, include, exclude, skip_dependent_tables):
+    def filter_dfs_child(r, matched, include, exclude, skip_dependent_tables):
+        filtered_child = _filter_dfs_impl(
+            r, matched, include, exclude, skip_dependent_tables
+        )
+        if filtered_child is not None:
+            return True, r
+        return matched, None
+
+    if exclude(t):
+        return None
+
+    matched = parent_matched and not skip_dependent_tables
+    if include(t):
+        matched = True
+
+    filtered_relations = []
+    for r in t.relations:
+        matched, filtered_child = filter_dfs_child(
+            r, matched, include, exclude, skip_dependent_tables
+        )
+        if filtered_child is not None:
+            filtered_relations.append(filtered_child)
+
+    t.relations = filtered_relations
+
+    if matched:
+        return t
+    return None
+
+
+def flatten_tables(tables: List[Table]) -> List[Table]:
+    flattened: List[Table] = []
     for table in tables:
-        matched = False
-        for include_table in include_tables:
-            if fnmatch.fnmatch(table.name, include_table):
-                matched = True
-        for skip_table in skip_tables:
-            if fnmatch.fnmatch(table.name, skip_table):
-                matched = False
-        if matched:
-            filtered.append(table)
-    return filtered
+        flattened.append(table)
+        flattened.extend(flatten_tables(table.relations))
+    return flattened
diff --git a/tests/schema/test_table.py b/tests/schema/test_table.py
index 9958ad7..e7bb073 100644
--- a/tests/schema/test_table.py
+++ b/tests/schema/test_table.py
@@ -1,8 +1,90 @@
 import pyarrow as pa
+import pytest
 
-from cloudquery.sdk.schema import Table, Column
+from cloudquery.sdk.schema import Table, Column, filter_dfs
+from cloudquery.sdk.schema.table import flatten_tables
 
 
 def test_table():
     table = Table("test_table", [Column("test_column", pa.int32())])
     table.to_arrow_schema()
+
+
+def test_filter_dfs_warns_no_matches():
+    with pytest.raises(ValueError):
+        tables = [Table("test1", []), Table("test2", [])]
+        filter_dfs(tables, include_tables=["test3"], skip_tables=[])
+
+    with pytest.raises(ValueError):
+        tables = [Table("test1", []), Table("test2", [])]
+        filter_dfs(tables, include_tables=["*"], skip_tables=["test3"])
+
+
+def test_filter_dfs():
+    table_grandchild = Table("test_grandchild", [Column("test_column", pa.int32())])
+    table_child = Table(
+        "test_child",
+        [Column("test_column", pa.int32())],
+        relations=[
+            table_grandchild,
+        ],
+    )
+    table_top1 = Table(
+        "test_top1",
+        [Column("test_column", pa.int32())],
+        relations=[
+            table_child,
+        ],
+    )
+    table_top2 = Table("test_top2", [Column("test_column", pa.int32())])
+
+    tables = [table_top1, table_top2]
+
+    cases = [
+        {
+            "include_tables": ["*"],
+            "skip_tables": [],
+            "skip_dependent_tables": False,
+            "expect_top": ["test_top1", "test_top2"],
+            "expect_flattened": [
+                "test_top1",
+                "test_top2",
+                "test_child",
+                "test_grandchild",
+            ],
+        },
+        {
+            "include_tables": ["*"],
+            "skip_tables": ["test_top1"],
+            "skip_dependent_tables": False,
+            "expect_top": ["test_top2"],
+            "expect_flattened": ["test_top2"],
+        },
+        {
+            "include_tables": ["test_top1"],
+            "skip_tables": ["test_top2"],
+            "skip_dependent_tables": True,
+            "expect_top": ["test_top1"],
+            "expect_flattened": ["test_top1"],
+        },
+        {
+            "include_tables": ["test_child"],
+            "skip_tables": [],
+            "skip_dependent_tables": True,
+            "expect_top": ["test_top1"],
+            "expect_flattened": ["test_top1", "test_child"],
+        },
+    ]
+    for case in cases:
+        got = filter_dfs(
+            tables=tables,
+            include_tables=case["include_tables"],
+            skip_tables=case["skip_tables"],
+            skip_dependent_tables=case["skip_dependent_tables"],
+        )
+        assert sorted([t.name for t in got]) == sorted(case["expect_top"]), case
+
+        got_flattened = flatten_tables(got)
+        want_flattened = sorted(case["expect_flattened"])
+        got_flattened = sorted([t.name for t in got_flattened])
+        assert got_flattened == want_flattened, case

From 0003c405cac2c57c8d436499e8ba576ab5d04f3b Mon Sep 17 00:00:00 2001
From: Herman Schaaf <hermanschaaf@gmail.com>
Date: Mon, 14 Aug 2023 10:40:36 +0100
Subject: [PATCH 5/6] fix: Add missing metadata fields (#40)

* Add missing metadata fields
---
 cloudquery/sdk/schema/arrow.py |  2 ++
 cloudquery/sdk/schema/table.py | 20 ++++++++++++++++++--
 tests/schema/test_table.py     | 18 ++++++++++++++++--
 3 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/cloudquery/sdk/schema/arrow.py b/cloudquery/sdk/schema/arrow.py
index 2f21bc1..23deed2 100644
--- a/cloudquery/sdk/schema/arrow.py
+++ b/cloudquery/sdk/schema/arrow.py
@@ -7,3 +7,5 @@
 METADATA_FALSE = b"false"
 METADATA_TABLE_NAME = b"cq:table_name"
 METADATA_TABLE_DESCRIPTION = b"cq:table_description"
+METADATA_TABLE_TITLE = b"cq:table_title"
+METADATA_TABLE_DEPENDS_ON = b"cq:table_depends_on"
diff --git a/cloudquery/sdk/schema/table.py b/cloudquery/sdk/schema/table.py
index 63c6228..7198d63 100644
--- a/cloudquery/sdk/schema/table.py
+++ b/cloudquery/sdk/schema/table.py
@@ -57,12 +57,24 @@ def from_arrow_schema(cls, schema: pa.Schema) -> Table:
         columns = []
         for field in schema:
             columns.append(Column.from_arrow_field(field))
+        parent = None
+        if arrow.METADATA_TABLE_DEPENDS_ON in schema.metadata:
+            parent = Table(
+                name=schema.metadata[arrow.METADATA_TABLE_DEPENDS_ON].decode("utf-8"),
+                columns=[],
+            )
         return cls(
-            name=schema.metadata[arrow.METADATA_TABLE_NAME].decode("utf-8"),
+            name=schema.metadata.get(arrow.METADATA_TABLE_NAME, b"").decode("utf-8"),
+            title=schema.metadata.get(arrow.METADATA_TABLE_TITLE, b"").decode("utf-8"),
             columns=columns,
             description=schema.metadata.get(arrow.METADATA_TABLE_DESCRIPTION).decode(
                 "utf-8"
             ),
+            is_incremental=schema.metadata.get(
+                arrow.METADATA_INCREMENTAL, arrow.METADATA_FALSE
+            )
+            == arrow.METADATA_TRUE,
+            parent=parent,
         )
 
     def to_arrow_schema(self):
@@ -70,7 +82,11 @@ def to_arrow_schema(self):
         md = {
             arrow.METADATA_TABLE_NAME: self.name,
             arrow.METADATA_TABLE_DESCRIPTION: self.description,
-            # arrow.METADATA_CONSTRAINT_NAME:
+            arrow.METADATA_TABLE_TITLE: self.title,
+            arrow.METADATA_TABLE_DEPENDS_ON: self.parent.name if self.parent else "",
+            arrow.METADATA_INCREMENTAL: arrow.METADATA_TRUE
+            if self.is_incremental
+            else arrow.METADATA_FALSE,
         }
         for column in self.columns:
             fields.append(column.to_arrow_field())
diff --git a/tests/schema/test_table.py b/tests/schema/test_table.py
index e7bb073..235c110 100644
--- a/tests/schema/test_table.py
+++ b/tests/schema/test_table.py
@@ -6,8 +6,22 @@
 
 
 def test_table():
-    table = Table("test_table", [Column("test_column", pa.int32())])
-    table.to_arrow_schema()
+    table = Table(
+        name="test_table",
+        columns=[Column("test_column", pa.int32())],
+        title="Test Table",
+        description="Test description",
+        parent=Table(name="parent_table", columns=[]),
+        relations=[],
+        is_incremental=True,
+    )
+    sch = table.to_arrow_schema()
+    got = Table.from_arrow_schema(sch)
+    assert got.name == table.name
+    assert got.title == table.title
+    assert got.description == table.description
+    assert got.is_incremental == table.is_incremental
+    assert got.parent.name == table.parent.name
 
 
 def test_filter_dfs_warns_no_matches():

From e7c2134beefb362a7782f7840664128f30b54e3c Mon Sep 17 00:00:00 2001
From: CloudQuery Bot <102256036+cq-bot@users.noreply.github.com>
Date: Mon, 14 Aug 2023 13:02:48 +0300
Subject: [PATCH 6/6] chore(main): Release v0.1.0 (#37)

---
 CHANGELOG.md | 17 +++++++++++++++++
 setup.py     |  2 +-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0360595..8774346 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,22 @@
 # Changelog
 
+## [0.1.0](https://github.com/cloudquery/plugin-sdk-python/compare/v0.0.11...v0.1.0) (2023-08-14)
+
+
+### ⚠ BREAKING CHANGES
+
+* Remove docs generation ([#36](https://github.com/cloudquery/plugin-sdk-python/issues/36))
+
+### Features
+
+* Remove docs generation ([#36](https://github.com/cloudquery/plugin-sdk-python/issues/36)) ([a72df48](https://github.com/cloudquery/plugin-sdk-python/commit/a72df48f3b44dc48fca9707b7937748b0dc77b10))
+
+
+### Bug Fixes
+
+* Add missing metadata fields ([#40](https://github.com/cloudquery/plugin-sdk-python/issues/40)) ([0003c40](https://github.com/cloudquery/plugin-sdk-python/commit/0003c405cac2c57c8d436499e8ba576ab5d04f3b))
+* Fix filter_dfs to be recursive, add support for skip_dependent_tables ([#39](https://github.com/cloudquery/plugin-sdk-python/issues/39)) ([149c699](https://github.com/cloudquery/plugin-sdk-python/commit/149c69974c1fd36d7f41ed36e6b545352b29ce5b))
+
 ## [0.0.11](https://github.com/cloudquery/plugin-sdk-python/compare/v0.0.10...v0.0.11) (2023-08-08)
 
 
diff --git a/setup.py b/setup.py
index 7682091..e877f24 100644
--- a/setup.py
+++ b/setup.py
@@ -53,7 +53,7 @@
 ]
 setuptools.setup(
     name=name,
-    version="0.0.11",
+    version="0.1.0",
     description=description,
     long_description=long_description,
     author="CloudQuery LTD",