From 89a630edf67977b269e7a84aa0bbcbceebee1f33 Mon Sep 17 00:00:00 2001 From: enya-yx Date: Tue, 3 Jan 2023 17:10:34 +0800 Subject: [PATCH 01/17] Add pytest cases and check test coverage for sql-registry and purview-registry --- registry/test/.coveragerc | 5 + registry/test/run_test_cases.sh | 0 registry/test/test_purview_registry.py | 159 +++++++++++++++++++++++++ registry/test/test_sql_registry.py | 150 +++++++++++++++++++++++ 4 files changed, 314 insertions(+) create mode 100644 registry/test/.coveragerc create mode 100644 registry/test/run_test_cases.sh create mode 100644 registry/test/test_purview_registry.py create mode 100644 registry/test/test_sql_registry.py diff --git a/registry/test/.coveragerc b/registry/test/.coveragerc new file mode 100644 index 000000000..4ed0f4927 --- /dev/null +++ b/registry/test/.coveragerc @@ -0,0 +1,5 @@ +[run] +[report] +exclude_lines = + pragma: no cover + @abstract \ No newline at end of file diff --git a/registry/test/run_test_cases.sh b/registry/test/run_test_cases.sh new file mode 100644 index 000000000..e69de29bb diff --git a/registry/test/test_purview_registry.py b/registry/test/test_purview_registry.py new file mode 100644 index 000000000..4baec72fb --- /dev/null +++ b/registry/test/test_purview_registry.py @@ -0,0 +1,159 @@ +import sys, os +sys.path.append(os.path.join(os.path.dirname(sys.path[0]),'purview-registry')) +from datetime import datetime +import unittest, pytest +from unicodedata import name +from registry.models import AnchorDef, AnchorFeatureDef, DerivedFeatureDef, ExpressionTransformation, WindowAggregationTransformation, UdfTransformation, FeatureType, ProjectDef, SourceDef, TensorCategory, TypedKey, ValueType, VectorType, EntityType +from registry.purview_registry import PurviewRegistry, ConflictError +''' +def basic_setup(): + purview_name = os.getenv('PURVIEW_NAME') + return PurviewRegistry("eyxpurview") + +import sys, os +sys.path.append(os.path.join(os.path.dirname(sys.path[0]),'sql-registry')) +from datetime import datetime +import unittest, pytest + +from registry.db_registry import DbRegistry, quote, ConflictError +from registry.models import AnchorDef, AnchorFeatureDef, DerivedFeatureDef, ExpressionTransformation, WindowAggregationTransformation, UdfTransformation, FeatureType, ProjectDef, SourceDef, TensorCategory, Transformation, TypedKey, ValueType, VectorType, EntityType +''' +class PurviewRegistryTest(unittest.TestCase): + + def setup(self): + purview_name = os.getenv('PURVIEW_NAME') + if purview_name is None: + raise RuntimeError("Failed to run Purview registry test case. Cannot get environment variable: 'PURVIEW_NAME'") + self.registry = PurviewRegistry(purview_name) + + def cleanup(self, ids): + for id in ids: + self.registry.delete_entity(id) + + def create_and_get_project(self, project_name): + project_id = self.registry.create_project(ProjectDef(project_name)) + assert project_id is not None + project = self.registry.get_entity(project_id) + assert project.qualified_name == project_name + assert self.registry.get_entity_id(project_name) == str(project_id) + return project_id + + def create_and_get_data_source(self, project_id, qualified_name, name, path, type): + source_id = self.registry.create_project_datasource(project_id, SourceDef( + qualified_name=qualified_name, name=name, path=path, type=type)) + assert source_id is not None + source = self.registry.get_entity(source_id) + assert source.qualified_name == qualified_name + return source_id + + def create_and_get_anchor(self, project_id, source_id, qualified_name, name): + anchor_id = self.registry.create_project_anchor(project_id, AnchorDef( + qualified_name=qualified_name, name=name, source_id=source_id)) + assert anchor_id is not None + anchor = self.registry.get_entity(anchor_id) + assert anchor.qualified_name == qualified_name + # anchor1 has source "source1" + assert anchor.attributes.source['guid'] == str(source_id) + return anchor_id + + def create_and_get_anchor_feature(self, project_id, anchor_id, qualified_name, name, feature_type, transformation, keys): + af_id = self.registry.create_project_anchor_feature(project_id, anchor_id, AnchorFeatureDef( + qualified_name=qualified_name, name=name, feature_type=feature_type, transformation=transformation, key=keys)) + assert af_id is not None + af = self.registry.get_entity(af_id) + assert af.qualified_name == qualified_name + return af_id + + def create_and_get_derived_feature(self, project_id, qualified_name, name, feature_type, transformation, keys, input_anchor_features, input_derived_features): + df_id = self.registry.create_project_derived_feature(project_id, DerivedFeatureDef(qualified_name=qualified_name, + name=name, feature_type=feature_type, transformation=transformation, key=keys, input_anchor_features=input_anchor_features, input_derived_features=input_derived_features)) + assert df_id is not None + df = self.registry.get_entity(df_id) + assert df.qualified_name == qualified_name + af_id = input_anchor_features[0] + af1_downstream_entities = self.registry.get_dependent_entities(af_id) + assert len(af1_downstream_entities) > 0 + return df_id + + def test_registry(self): + self.setup() + now = datetime.now() + project_name = ''.join(["unit_test_project", str(now.minute), str(now.second)]) + # test create project + project_id = self.create_and_get_project(project_name) + # re-create project, should return the same id + id = self.registry.create_project(ProjectDef(project_name)) + assert project_id == id + projects = self.registry.get_projects() + assert len(projects) >= 1 + project_ids = self.registry.get_projects_ids() + assert len(project_ids.keys()) >= 1 + + # test create data source + source_id = self.create_and_get_data_source(project_id, project_name+"__source", "source","hdfs://somewhere","hdfs") + + # test create anchor + anchor_id = self.create_and_get_anchor(project_id, source_id, project_name+"__anchor", "anchor") + + # test create anchor feature + ft1 = FeatureType(type=VectorType.TENSOR, tensor_category=TensorCategory.DENSE, + dimension_type=[], val_type=ValueType.INT) + t1 = ExpressionTransformation("af") + t2 = WindowAggregationTransformation("def_expr","agg_func","window","group_by","filter",limit=10) + t3 = UdfTransformation(name = "udf_trans") + k = TypedKey(key_column="c1", key_column_type=ValueType.INT, full_name="tk", description="description", key_column_alias="alias") + af_id = self.create_and_get_anchor_feature(project_id, anchor_id,project_name+"__anchor__af", name="af", feature_type=ft1, transformation=t1, keys=[k]) + af_id2 = self.create_and_get_anchor_feature(project_id, anchor_id,project_name+"__anchor__af2", name="af2", feature_type=ft1, transformation=t2, keys=[k]) + + # test create derived feature + df_id = self.create_and_get_derived_feature(project_id, project_name+"__df", "df", ft1, t1, [k], [af_id], []) + df_id2 = self.create_and_get_derived_feature(project_id, project_name+"__df2", "df2", ft1, t2, [k], [af_id], [df_id]) + df_id3 = self.create_and_get_derived_feature(project_id, project_name+"__df3", "df3", ft1, t3, [k], [af_id], [df_id, df_id2]) + + # test all entities and relations + features = self.registry.get_project_features(project_id, project_name) + assert(len(features) > 0) + entities_and_relations =self.registry.get_lineage(df_id).to_dict() + assert(len(entities_and_relations['guidEntityMap']) > 0) + assert(len(entities_and_relations['relations']) > 0) + + entities_and_relations2 = self.registry.get_project(project_id).to_dict() + assert(len(entities_and_relations2['guidEntityMap']) > 0) + assert(len(entities_and_relations2['relations']) > 0) + + # test downstreams entities + project_downstream_ids = self.registry.get_dependent_entities(project_id) + assert(len(project_downstream_ids) > 0 ) + source_downstream_ids = self.registry.get_dependent_entities(source_id) + assert(len(source_downstream_ids) > 0) + anchor_downstream_ids = self.registry.get_dependent_entities(anchor_id) + assert(len(anchor_downstream_ids) > 0) + + # test search entities + entities = self.registry.search_entity(project_name+"__anchor", [EntityType.Anchor], project_id) + #assert(len(entities) > 0) + entities = self.registry.search_entity(project_name, [EntityType.Project]) + assert(len(entities) > 0) + + # test create entities with existing names + ''' + with pytest.raises(ConflictError): + project_id3 = self.registry.create_project(ProjectDef(project_name+"__anchor")) + assert project_id3 == anchor_id + with pytest.raises(ConflictError): + source_id2 = self.registry.create_project_datasource(project_id, SourceDef( + qualified_name=project_name+"__anchor", name="anchor", path="somepath", type="hdfs")) + assert source_id2 == anchor_id + with pytest.raises(ConflictError): + anchor_id2 = self.registry.create_project_anchor(project_id, AnchorDef( + qualified_name=project_name+"__source", name="source", source_id=source_id)) + assert anchor_id2 == source_id + af_id3 = self.registry.create_project_anchor_feature(project_id, anchor_id, AnchorFeatureDef( + qualified_name=project_name+"__anchor__af", name="af", feature_type=ft1, transformation=t1, key=[k])) + assert af_id3 == af_id + df_id4 = self.registry.create_project_derived_feature(project_id, DerivedFeatureDef(qualified_name=project_name+"__df", + name="df", feature_type=ft1, transformation=t1, key=[k], input_anchor_features=[], input_derived_features=[])) + assert df_id4 == df_id + ''' + self.cleanup([project_id, source_id, anchor_id, af_id, af_id2, df_id, df_id2,df_id3]) + assert len(self.registry.get_projects()) == len(projects)-1 \ No newline at end of file diff --git a/registry/test/test_sql_registry.py b/registry/test/test_sql_registry.py new file mode 100644 index 000000000..1bc6823cc --- /dev/null +++ b/registry/test/test_sql_registry.py @@ -0,0 +1,150 @@ +import sys, os +sys.path.append(os.path.join(os.path.dirname(sys.path[0]),'sql-registry')) +from datetime import datetime +#sys.path.append(os.path.join(os.path.dirname(sys.path[0]),'purview-registry')) +import unittest, pytest + +from registry.db_registry import DbRegistry, quote, ConflictError +from registry.models import AnchorDef, AnchorFeatureDef, DerivedFeatureDef, ExpressionTransformation, WindowAggregationTransformation, UdfTransformation, FeatureType, ProjectDef, SourceDef, TensorCategory, Transformation, TypedKey, ValueType, VectorType, EntityType + +class SqlRegistryTest(unittest.TestCase): + + def setup(self): + if os.getenv('CONNECTION_STR') is None: + raise RuntimeError("Failed to run SQL registry test case. Cannot get environment variable: 'CONNECTION_STR'") + self.registry = DbRegistry() + + def cleanup(self, ids): + with self.registry.conn.transaction() as c: + ids = quote(ids) + c.execute( + f"delete from edges where from_id in ({ids}) or to_id in ({ids})") + c.execute( + f"delete from entities where entity_id in ({ids})") + + def create_and_get_project(self, project_name): + project_id = self.registry.create_project(ProjectDef(project_name)) + assert project_id is not None + project = self.registry.get_entity(project_id) + assert project.qualified_name == project_name + assert self.registry.get_entity_id(project_name) == str(project_id) + return project_id + + def create_and_get_data_source(self, project_id, qualified_name, name, path, type): + source_id = self.registry.create_project_datasource(project_id, SourceDef( + qualified_name=qualified_name, name=name, path=path, type=type)) + assert source_id is not None + source = self.registry.get_entity(source_id) + assert source.qualified_name == qualified_name + return source_id + + def create_and_get_anchor(self, project_id, source_id, qualified_name, name): + anchor_id = self.registry.create_project_anchor(project_id, AnchorDef( + qualified_name=qualified_name, name=name, source_id=source_id)) + assert anchor_id is not None + anchor = self.registry.get_entity(anchor_id) + assert anchor.qualified_name == qualified_name + # anchor1 has source "source1" + assert anchor.attributes.source.id == source_id + return anchor_id + + def create_and_get_anchor_feature(self, project_id, anchor_id, qualified_name, name, feature_type, transformation, keys): + af_id = self.registry.create_project_anchor_feature(project_id, anchor_id, AnchorFeatureDef( + qualified_name=qualified_name, name=name, feature_type=feature_type, transformation=transformation, key=keys)) + assert af_id is not None + af = self.registry.get_entity(af_id) + assert af.qualified_name == qualified_name + return af_id + + def create_and_get_derived_feature(self, project_id, qualified_name, name, feature_type, transformation, keys, input_anchor_features, inpur_derived_features): + df_id = self.registry.create_project_derived_feature(project_id, DerivedFeatureDef(qualified_name=qualified_name, + name=name, feature_type=feature_type, transformation=transformation, key=keys, input_anchor_features=input_anchor_features, input_derived_features=inpur_derived_features)) + assert df_id is not None + df = self.registry.get_entity(df_id) + assert df.qualified_name == qualified_name + # df1 has only 1 input anchor feature "af1" + assert df.attributes.input_anchor_features[0].id == input_anchor_features[0] + af_id = input_anchor_features[0] + af1_downstream_entities = self.registry.get_dependent_entities(af_id) + assert len(af1_downstream_entities) > 0 + return df_id + + def test_registry(self): + self.setup() + now = datetime.now() + project_name = ''.join(["unit_test_project", str(now.minute), str(now.second)]) + # test create project + project_id = self.create_and_get_project(project_name) + # re-create project, should return the same id + id = self.registry.create_project(ProjectDef(project_name)) + assert project_id == id + project_id2 = self.create_and_get_project("unit_test_project2") + projects = self.registry.get_projects() + assert len(projects) >= 2 + project_ids = self.registry.get_projects_ids() + assert len(project_ids.keys()) >= 2 + + # test create data source + source_id = self.create_and_get_data_source(project_id, project_name+"__source", "source","hdfs://somewhere","hdfs") + + # test create anchor + anchor_id = self.create_and_get_anchor(project_id, source_id, project_name+"__anchor", "anchor") + + # test create anchor feature + ft1 = FeatureType(type=VectorType.TENSOR, tensor_category=TensorCategory.DENSE, + dimension_type=[], val_type=ValueType.INT) + t1 = ExpressionTransformation("af") + t2 = WindowAggregationTransformation("def_expr","agg_func","window","group_by","filter",limit=10) + t3 = UdfTransformation(name = "udf_trans") + k = TypedKey(key_column="c1", key_column_type=ValueType.INT, full_name="tk", description="description", key_column_alias="alias") + af_id = self.create_and_get_anchor_feature(project_id, anchor_id,project_name+"__anchor__af", name="af", feature_type=ft1, transformation=t1, keys=[k]) + af_id2 = self.create_and_get_anchor_feature(project_id, anchor_id,project_name+"__anchor__af2", name="af2", feature_type=ft1, transformation=t2, keys=[k]) + # test create derived feature + df_id = self.create_and_get_derived_feature(project_id, project_name+"__df", "df", ft1, t1, [k], [af_id], []) + df_id2 = self.create_and_get_derived_feature(project_id, project_name+"__df2", "df2", ft1, t1, [k], [af_id], [df_id]) + df_id3 = self.create_and_get_derived_feature(project_id, project_name+"__df3", "df3", ft1, t1, [k], [af_id], [df_id, df_id2]) + # test all entities and relations + entities_and_relations =self.registry.get_lineage(df_id).to_dict() + assert(len(entities_and_relations['guidEntityMap']) > 0) + assert(len(entities_and_relations['relations']) > 0) + + entities_and_relations2 = self.registry.get_project(project_id).to_dict() + assert(len(entities_and_relations2['guidEntityMap']) > 0) + assert(len(entities_and_relations2['relations']) > 0) + + # test downstreams entities + project_downstream_ids = self.registry.get_dependent_entities(project_id) + assert(len(project_downstream_ids) > 0 ) + source_downstream_ids = self.registry.get_dependent_entities(source_id) + assert(len(source_downstream_ids) > 0) + anchor_downstream_ids = self.registry.get_dependent_entities(anchor_id) + assert(len(anchor_downstream_ids) > 0) + + # test search entities + entities = self.registry.search_entity(project_name+"__anchor", [EntityType.Anchor], project_id) + assert(len(entities) > 0) + entities = self.registry.search_entity(project_name, [EntityType.Project]) + assert(len(entities) > 0) + + # test create entities with existing names + with pytest.raises(ConflictError): + project_id3 = self.registry.create_project(ProjectDef(project_name+"__anchor")) + assert project_id3 == anchor_id + with pytest.raises(ConflictError): + source_id2 = self.registry.create_project_datasource(project_id, SourceDef( + qualified_name=project_name+"__anchor", name="anchor", path="somepath", type="hdfs")) + assert source_id2 == anchor_id + with pytest.raises(ConflictError): + anchor_id2 = self.registry.create_project_anchor(project_id, AnchorDef( + qualified_name=project_name+"__source", name="source", source_id=source_id)) + assert anchor_id2 == source_id + af_id3 = self.registry.create_project_anchor_feature(project_id, anchor_id, AnchorFeatureDef( + qualified_name=project_name+"__anchor__af", name="af", feature_type=ft1, transformation=t1, key=[k])) + assert af_id3 == af_id + df_id4 = self.registry.create_project_derived_feature(project_id, DerivedFeatureDef(qualified_name=project_name+"__df", + name="df", feature_type=ft1, transformation=t1, key=[k], input_anchor_features=[], input_derived_features=[])) + assert df_id4 == df_id + + self.registry.delete_entity(project_id2) + self.cleanup([project_id, source_id, anchor_id, af_id, af_id2, df_id, df_id2,df_id3]) + assert len(self.registry.get_projects()) == len(projects)-2 \ No newline at end of file From 8c4b6ca11a7ccaae43cdd66b81a2ef0596c1da8b Mon Sep 17 00:00:00 2001 From: enya-yx Date: Tue, 3 Jan 2023 17:17:54 +0800 Subject: [PATCH 02/17] quick fix --- registry/sql-registry/registry/database.py | 4 +++- registry/test/run_test_cases.sh | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) mode change 100644 => 100755 registry/test/run_test_cases.sh diff --git a/registry/sql-registry/registry/database.py b/registry/sql-registry/registry/database.py index 21b8a2aca..084bc4cbe 100644 --- a/registry/sql-registry/registry/database.py +++ b/registry/sql-registry/registry/database.py @@ -20,12 +20,14 @@ class DbConnection(ABC): def query(self, sql: str, *args, **kwargs) -> list[dict]: pass +# already has one in 'db_registry.py'; shall we remove it? +''' def quote(id): if isinstance(id, str): return f"'{id}'" else: return ",".join([f"'{i}'" for i in id]) - +''' def parse_conn_str(s: str) -> dict: """ diff --git a/registry/test/run_test_cases.sh b/registry/test/run_test_cases.sh old mode 100644 new mode 100755 index e69de29bb..b1cb3000d --- a/registry/test/run_test_cases.sh +++ b/registry/test/run_test_cases.sh @@ -0,0 +1,6 @@ +# run test cases and check coverage for purview-registry; +# need to set "PURVIEW_NAME" in env +pytest --cov-report term-missing --cov=registry/purview-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_purview_registry.py +# run test cases and check coverage for sql-registry; +# need to set "CONNECTION_STR" in env +pytest --cov-report term-missing --cov=registry/sql-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_sql_registry.py From 823a32b5a18ad1606adc4eea105ce087d0a903f0 Mon Sep 17 00:00:00 2001 From: enya-yx Date: Tue, 3 Jan 2023 17:32:09 +0800 Subject: [PATCH 03/17] quick fix --- registry/purview-registry/registry/purview_registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/registry/purview-registry/registry/purview_registry.py b/registry/purview-registry/registry/purview_registry.py index 97aa2f654..dea0c127d 100644 --- a/registry/purview-registry/registry/purview_registry.py +++ b/registry/purview-registry/registry/purview_registry.py @@ -326,7 +326,7 @@ def search_entity(self, entity_type = entity['entityType'] if type and entity_type in [str(x) for x in type]: if project: - if not (qualified_name.startswith(project) or entity_id == str(project)): + if not (qualified_name.startswith(str(project)) or entity_id == str(project)): continue result.append(EntityRef(UUID(entity_id),entity_type,qualified_name)) return result From 46fed4864522daeee13d8b70e91b15c273cffae8 Mon Sep 17 00:00:00 2001 From: enya-yx Date: Thu, 5 Jan 2023 15:00:54 +0800 Subject: [PATCH 04/17] quick fix & insert registry test into workflow --- .github/workflows/pull_request_push_test.yml | 31 ++++++++++++++++ registry/sql-registry/requirements.txt | 2 ++ registry/test/test_purview_registry.py | 37 +------------------- registry/test/test_sql_registry.py | 2 +- 4 files changed, 35 insertions(+), 37 deletions(-) diff --git a/.github/workflows/pull_request_push_test.yml b/.github/workflows/pull_request_push_test.yml index beb47b94f..7916a6961 100644 --- a/.github/workflows/pull_request_push_test.yml +++ b/.github/workflows/pull_request_push_test.yml @@ -257,6 +257,37 @@ jobs: run: | # skip cloud related tests pytest --cov-report term-missing --cov=feathr_project/feathr/spark_provider feathr_project/test/test_local_spark_e2e.py --cov-config=.github/workflows/.coveragerc_local + + registry_test: + runs-on: ubuntu-latest + if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'registry test')) + steps: + - uses: actions/checkout@v2 + with: + ref: ${{ github.event.pull_request.head.sha }} + - name: Set up JDK 8 + uses: actions/setup-java@v2 + with: + java-version: "8" + distribution: "temurin" + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install Feathr Package + run: | + python -m pip install --upgrade pip + if [ -f ./registry/purview-registry/requirements.txt ]; then pip install -r ./registry/purview-registry/requirements.txt; fi + if [ -f ./registry/sql-registry/requirements.txt ]; then pip install -r ./registry/sql-registry/requirements.txt; fi + - name: Run Registry Test Cases + env: + AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}} + AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}} + AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}} + PURVIEW_NAME: 'feathrazuretest3-purview1' + CONNECTION_STR: 'Server=tcp:feathrtestsql4.database.windows.net,1433;Initial Catalog=testsql;Persist Securit y Info=False;User ID=feathr@feathrtestsql4;Password=Password01!;MultipleActiveResultSets=False;Encrypt=True;TrustSrverCertificate=False;Connection Timeout=30;' + run: | + ./registry/test/run_test_cases.sh failure_notification: # If any failure, warning message will be sent diff --git a/registry/sql-registry/requirements.txt b/registry/sql-registry/requirements.txt index c6d61de98..afa204392 100644 --- a/registry/sql-registry/requirements.txt +++ b/registry/sql-registry/requirements.txt @@ -1,3 +1,5 @@ +Cython +pydantic pymssql fastapi uvicorn \ No newline at end of file diff --git a/registry/test/test_purview_registry.py b/registry/test/test_purview_registry.py index 4baec72fb..8f72ceeea 100644 --- a/registry/test/test_purview_registry.py +++ b/registry/test/test_purview_registry.py @@ -5,19 +5,7 @@ from unicodedata import name from registry.models import AnchorDef, AnchorFeatureDef, DerivedFeatureDef, ExpressionTransformation, WindowAggregationTransformation, UdfTransformation, FeatureType, ProjectDef, SourceDef, TensorCategory, TypedKey, ValueType, VectorType, EntityType from registry.purview_registry import PurviewRegistry, ConflictError -''' -def basic_setup(): - purview_name = os.getenv('PURVIEW_NAME') - return PurviewRegistry("eyxpurview") - -import sys, os -sys.path.append(os.path.join(os.path.dirname(sys.path[0]),'sql-registry')) -from datetime import datetime -import unittest, pytest -from registry.db_registry import DbRegistry, quote, ConflictError -from registry.models import AnchorDef, AnchorFeatureDef, DerivedFeatureDef, ExpressionTransformation, WindowAggregationTransformation, UdfTransformation, FeatureType, ProjectDef, SourceDef, TensorCategory, Transformation, TypedKey, ValueType, VectorType, EntityType -''' class PurviewRegistryTest(unittest.TestCase): def setup(self): @@ -131,29 +119,6 @@ def test_registry(self): # test search entities entities = self.registry.search_entity(project_name+"__anchor", [EntityType.Anchor], project_id) - #assert(len(entities) > 0) - entities = self.registry.search_entity(project_name, [EntityType.Project]) - assert(len(entities) > 0) - # test create entities with existing names - ''' - with pytest.raises(ConflictError): - project_id3 = self.registry.create_project(ProjectDef(project_name+"__anchor")) - assert project_id3 == anchor_id - with pytest.raises(ConflictError): - source_id2 = self.registry.create_project_datasource(project_id, SourceDef( - qualified_name=project_name+"__anchor", name="anchor", path="somepath", type="hdfs")) - assert source_id2 == anchor_id - with pytest.raises(ConflictError): - anchor_id2 = self.registry.create_project_anchor(project_id, AnchorDef( - qualified_name=project_name+"__source", name="source", source_id=source_id)) - assert anchor_id2 == source_id - af_id3 = self.registry.create_project_anchor_feature(project_id, anchor_id, AnchorFeatureDef( - qualified_name=project_name+"__anchor__af", name="af", feature_type=ft1, transformation=t1, key=[k])) - assert af_id3 == af_id - df_id4 = self.registry.create_project_derived_feature(project_id, DerivedFeatureDef(qualified_name=project_name+"__df", - name="df", feature_type=ft1, transformation=t1, key=[k], input_anchor_features=[], input_derived_features=[])) - assert df_id4 == df_id - ''' self.cleanup([project_id, source_id, anchor_id, af_id, af_id2, df_id, df_id2,df_id3]) - assert len(self.registry.get_projects()) == len(projects)-1 \ No newline at end of file + \ No newline at end of file diff --git a/registry/test/test_sql_registry.py b/registry/test/test_sql_registry.py index 1bc6823cc..58e0b2a70 100644 --- a/registry/test/test_sql_registry.py +++ b/registry/test/test_sql_registry.py @@ -147,4 +147,4 @@ def test_registry(self): self.registry.delete_entity(project_id2) self.cleanup([project_id, source_id, anchor_id, af_id, af_id2, df_id, df_id2,df_id3]) - assert len(self.registry.get_projects()) == len(projects)-2 \ No newline at end of file + \ No newline at end of file From 5cd67e24a5418db0a064944dc0eb62f804e9df1d Mon Sep 17 00:00:00 2001 From: enya-yx Date: Thu, 5 Jan 2023 15:07:47 +0800 Subject: [PATCH 05/17] quick fix --- .github/workflows/pull_request_push_test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pull_request_push_test.yml b/.github/workflows/pull_request_push_test.yml index 7916a6961..1bc5b7afa 100644 --- a/.github/workflows/pull_request_push_test.yml +++ b/.github/workflows/pull_request_push_test.yml @@ -287,7 +287,8 @@ jobs: PURVIEW_NAME: 'feathrazuretest3-purview1' CONNECTION_STR: 'Server=tcp:feathrtestsql4.database.windows.net,1433;Initial Catalog=testsql;Persist Securit y Info=False;User ID=feathr@feathrtestsql4;Password=Password01!;MultipleActiveResultSets=False;Encrypt=True;TrustSrverCertificate=False;Connection Timeout=30;' run: | - ./registry/test/run_test_cases.sh + pytest --cov-report term-missing --cov=registry/sql-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_sql_registry.py + pytest --cov-report term-missing --cov=registry/purview-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_purview_registry.py failure_notification: # If any failure, warning message will be sent From 7aca5c51796d047e91c06c572d3e2eb4be99b6cf Mon Sep 17 00:00:00 2001 From: enya-yx Date: Thu, 5 Jan 2023 15:10:41 +0800 Subject: [PATCH 06/17] quick fix --- .github/workflows/pull_request_push_test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pull_request_push_test.yml b/.github/workflows/pull_request_push_test.yml index 1bc5b7afa..fc3b02bf5 100644 --- a/.github/workflows/pull_request_push_test.yml +++ b/.github/workflows/pull_request_push_test.yml @@ -277,6 +277,7 @@ jobs: - name: Install Feathr Package run: | python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi if [ -f ./registry/purview-registry/requirements.txt ]; then pip install -r ./registry/purview-registry/requirements.txt; fi if [ -f ./registry/sql-registry/requirements.txt ]; then pip install -r ./registry/sql-registry/requirements.txt; fi - name: Run Registry Test Cases From a4ca7ca84cb697d0607ac828b874e0e03e23230e Mon Sep 17 00:00:00 2001 From: enya-yx Date: Thu, 5 Jan 2023 15:23:45 +0800 Subject: [PATCH 07/17] quick fix --- .github/workflows/pull_request_push_test.yml | 2 +- registry/test/requirements.txt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 registry/test/requirements.txt diff --git a/.github/workflows/pull_request_push_test.yml b/.github/workflows/pull_request_push_test.yml index fc3b02bf5..8a5433aa3 100644 --- a/.github/workflows/pull_request_push_test.yml +++ b/.github/workflows/pull_request_push_test.yml @@ -277,7 +277,7 @@ jobs: - name: Install Feathr Package run: | python -m pip install --upgrade pip - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + if [ -f ./registry/test/requirements.txt ]; then pip install -r ./registry/test/requirements.txt; fi if [ -f ./registry/purview-registry/requirements.txt ]; then pip install -r ./registry/purview-registry/requirements.txt; fi if [ -f ./registry/sql-registry/requirements.txt ]; then pip install -r ./registry/sql-registry/requirements.txt; fi - name: Run Registry Test Cases diff --git a/registry/test/requirements.txt b/registry/test/requirements.txt new file mode 100644 index 000000000..bfbd2ad5c --- /dev/null +++ b/registry/test/requirements.txt @@ -0,0 +1,4 @@ +pytest>=7 +pytest-cov +pytest-xdist +pytest-mock>=3.8.1 \ No newline at end of file From 86ae6c478d244913804f1ebe9369f87f19ff930c Mon Sep 17 00:00:00 2001 From: enya-yx Date: Thu, 5 Jan 2023 15:40:22 +0800 Subject: [PATCH 08/17] quick fix --- registry/sql-registry/registry/models.py | 28 ++++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/registry/sql-registry/registry/models.py b/registry/sql-registry/registry/models.py index f032099af..61edbfb52 100644 --- a/registry/sql-registry/registry/models.py +++ b/registry/sql-registry/registry/models.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from enum import Enum -from typing import Optional, Union +from typing import Optional, Union, List from uuid import UUID import json import re @@ -133,7 +133,7 @@ class FeatureType(ToDict): def __init__(self, type: Union[str, VectorType], tensor_category: Union[str, TensorCategory], - dimension_type: list[Union[str, ValueType]], + dimension_type: List[Union[str, ValueType]], val_type: Union[str, ValueType]): self.type = _to_type(type, VectorType) self.tensor_category = _to_type(tensor_category, TensorCategory) @@ -353,7 +353,7 @@ def to_dict(self) -> dict: class ProjectAttributes(Attributes): def __init__(self, name: str, - children: list[Union[dict, Entity]] = [], + children: List[Union[dict, Entity]] = [], tags: dict = {}, **kwargs): self.name = name @@ -371,7 +371,7 @@ def children(self): return self._children @children.setter - def children(self, v: list[Union[dict, Entity]]): + def children(self, v: List[Union[dict, Entity]]): for f in v: if isinstance(f, Entity): self._children.append(f) @@ -524,7 +524,7 @@ def __init__(self, name: str, type: Union[dict, FeatureType], transformation: Union[dict, Transformation], - key: list[Union[dict, TypedKey]], + key: List[Union[dict, TypedKey]], tags: dict = {}): self.qualified_name = qualified_name self.name = name @@ -554,9 +554,9 @@ def __init__(self, name: str, type: Union[dict, FeatureType], transformation: Union[dict, Transformation], - key: list[Union[dict, TypedKey]], - input_anchor_features: list[Union[dict, EntityRef, Entity]] = [], - input_derived_features: list[Union[dict, EntityRef, Entity]] = [], + key: List[Union[dict, TypedKey]], + input_anchor_features: List[Union[dict, EntityRef, Entity]] = [], + input_derived_features: List[Union[dict, EntityRef, Entity]] = [], tags: dict = {}, **kwargs): self.qualified_name = qualified_name @@ -650,7 +650,7 @@ def to_dict(self) -> dict: class EntitiesAndRelations(ToDict): - def __init__(self, entities: list[Entity], edges: list[Edge]): + def __init__(self, entities: List[Entity], edges: List[Edge]): self.entities = dict([(e.id, e) for e in entities]) self.edges = set(edges) @@ -723,7 +723,7 @@ def __init__(self, name: str, feature_type: Union[dict, FeatureType], transformation: Union[dict, Transformation], - key: list[Union[dict, TypedKey]], + key: List[Union[dict, TypedKey]], qualified_name: str = "", tags: dict = {}): self.qualified_name = qualified_name @@ -747,9 +747,9 @@ def __init__(self, name: str, feature_type: Union[dict, FeatureType], transformation: Union[dict, Transformation], - key: list[Union[dict, TypedKey]], - input_anchor_features: list[Union[str, UUID]], - input_derived_features: list[Union[str, UUID]], + key: List[Union[dict, TypedKey]], + input_anchor_features: List[Union[str, UUID]], + input_derived_features: List[Union[str, UUID]], qualified_name: str = "", tags: dict = {}): self.qualified_name = qualified_name @@ -761,7 +761,7 @@ def __init__(self, self.input_derived_features = _to_uuid(input_derived_features) self.tags = tags - def to_attr(self, input_features: list[EntityRef]) -> DerivedFeatureAttributes: + def to_attr(self, input_features: List[EntityRef]) -> DerivedFeatureAttributes: attr = DerivedFeatureAttributes(qualified_name=self.qualified_name, name=self.name, type=self.feature_type, From b6275ce903e13c2dfaefcf8a760464b9bd604e59 Mon Sep 17 00:00:00 2001 From: enya-yx Date: Thu, 5 Jan 2023 15:48:34 +0800 Subject: [PATCH 09/17] quick fix --- registry/purview-registry/registry/models.py | 28 +++++++++---------- .../registry/purview_registry.py | 26 ++++++++--------- registry/sql-registry/registry/database.py | 5 ++-- registry/sql-registry/registry/db_registry.py | 22 +++++++-------- registry/sql-registry/registry/interface.py | 14 +++++----- 5 files changed, 48 insertions(+), 47 deletions(-) diff --git a/registry/purview-registry/registry/models.py b/registry/purview-registry/registry/models.py index 8bcdf79de..b2b0e1b1b 100644 --- a/registry/purview-registry/registry/models.py +++ b/registry/purview-registry/registry/models.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from enum import Enum -from typing import Optional, Union +from typing import Optional, Union, List from uuid import UUID import json import re @@ -142,7 +142,7 @@ class FeatureType(ToDict): def __init__(self, type: Union[str, VectorType], tensor_category: Union[str, TensorCategory], - dimension_type: list[Union[str, ValueType]], + dimension_type: List[Union[str, ValueType]], val_type: Union[str, ValueType]): self.type = _to_type(type, VectorType) self.tensor_category = _to_type(tensor_category, TensorCategory) @@ -369,7 +369,7 @@ def to_min_repr(self) -> dict: class ProjectAttributes(Attributes): def __init__(self, name: str, - children: list[Union[dict, Entity]] = [], + children: List[Union[dict, Entity]] = [], tags: dict = {}, **kwargs): self.name = name @@ -387,7 +387,7 @@ def children(self): return self._children @children.setter - def children(self, v: list[Union[dict, Entity]]): + def children(self, v: List[Union[dict, Entity]]): for f in v: if isinstance(f, Entity): self._children.append(f) @@ -547,7 +547,7 @@ def __init__(self, name: str, type: Union[dict, FeatureType], transformation: Union[dict, Transformation], - key: list[Union[dict, TypedKey]], + key: List[Union[dict, TypedKey]], tags: dict = {}): self.qualified_name = qualified_name self.name = name @@ -577,9 +577,9 @@ def __init__(self, name: str, type: Union[dict, FeatureType], transformation: Union[dict, Transformation], - key: list[Union[dict, TypedKey]], - input_anchor_features: list[Union[dict, EntityRef, Entity]] = [], - input_derived_features: list[Union[dict, EntityRef, Entity]] = [], + key: List[Union[dict, TypedKey]], + input_anchor_features: List[Union[dict, EntityRef, Entity]] = [], + input_derived_features: List[Union[dict, EntityRef, Entity]] = [], tags: dict = {}, **kwargs): self.qualified_name = qualified_name @@ -674,7 +674,7 @@ def to_dict(self) -> dict: class EntitiesAndRelations(ToDict): - def __init__(self, entities: list[Entity], edges: list[Edge]): + def __init__(self, entities: List[Entity], edges: List[Edge]): self.entities = dict([(e.id, e) for e in entities]) self.edges = set(edges) @@ -747,7 +747,7 @@ def __init__(self, name: str, feature_type: Union[dict, FeatureType], transformation: Union[dict, Transformation], - key: list[Union[dict, TypedKey]], + key: List[Union[dict, TypedKey]], qualified_name: str = "", tags: dict = {}): self.qualified_name = qualified_name @@ -771,9 +771,9 @@ def __init__(self, name: str, feature_type: Union[dict, FeatureType], transformation: Union[dict, Transformation], - key: list[Union[dict, TypedKey]], - input_anchor_features: list[Union[str, UUID]], - input_derived_features: list[Union[str, UUID]], + key: List[Union[dict, TypedKey]], + input_anchor_features: List[Union[str, UUID]], + input_derived_features: List[Union[str, UUID]], qualified_name: str = "", tags: dict = {}): self.qualified_name = qualified_name @@ -785,7 +785,7 @@ def __init__(self, self.input_derived_features = _to_uuid(input_derived_features) self.tags = tags - def to_attr(self, input_features: list[EntityRef]) -> DerivedFeatureAttributes: + def to_attr(self, input_features: List[EntityRef]) -> DerivedFeatureAttributes: attr = DerivedFeatureAttributes(qualified_name=self.qualified_name, name=self.name, type=self.feature_type, diff --git a/registry/purview-registry/registry/purview_registry.py b/registry/purview-registry/registry/purview_registry.py index dea0c127d..2cbc97b8a 100644 --- a/registry/purview-registry/registry/purview_registry.py +++ b/registry/purview-registry/registry/purview_registry.py @@ -1,7 +1,7 @@ import copy from http.client import CONFLICT, HTTPException import itertools -from typing import Any, Optional, Tuple, Union +from typing import Any, Optional, Tuple, Union, List from urllib.error import HTTPError from uuid import UUID @@ -52,7 +52,7 @@ def __init__(self,azure_purview_name: str, registry_delimiter: str = "__", crede if register_types: self._register_feathr_feature_types() - def get_projects(self) -> list[str]: + def get_projects(self) -> List[str]: """ Returns the names of all projects """ @@ -117,7 +117,7 @@ def _atlasEntity_to_entity(self, purview_entity): return base_entity - def get_entities(self, ids: list[UUID],recursive=False) -> list[Entity]: + def get_entities(self, ids: List[UUID],recursive=False) -> List[Entity]: """ Get list of entities by their ids """ @@ -132,7 +132,7 @@ def get_entity_id(self, id_or_name: Union[str, UUID]) -> UUID: # It is a name return self._get_id_by_qualfiedName(id_or_name) - def get_all_neighbours(self,id_or_name: Union[str, UUID]) -> list[Edge]: + def get_all_neighbours(self,id_or_name: Union[str, UUID]) -> List[Edge]: entity = self.get_entity(id_or_name) relation_lookup = {x.name.upper():x for x in RelationshipType} related_entities = self.purview_client.get_entity_lineage(str(entity.id),direction="BOTH")['guidEntityMap'] @@ -162,7 +162,7 @@ def get_all_neighbours(self,id_or_name: Union[str, UUID]) -> list[Edge]: for x in out_edges]) return result_edges - def get_neighbors(self, id_or_name: Union[str, UUID], relationship: RelationshipType) -> list[Edge]: + def get_neighbors(self, id_or_name: Union[str, UUID], relationship: RelationshipType) -> List[Edge]: """ Get list of edges with specified type that connect to this entity. The edge contains fromId and toId so we can follow to the entity it connects to @@ -199,7 +199,7 @@ def get_lineage(self, id_or_name: Union[str, UUID]) -> EntitiesAndRelations: upstream_entities + downstream_entities, upstream_edges + downstream_edges) - def get_dependent_entities(self, entity_id: Union[str, UUID]) -> list[Entity]: + def get_dependent_entities(self, entity_id: Union[str, UUID]) -> List[Entity]: """ Given entity id, returns list of all entities that are downstream/dependent on given entity """ @@ -228,7 +228,7 @@ def delete_entity(self, entity_id: Union[str, UUID]): #Delete entity self.purview_client.delete_entity(str(entity_id)) - def _get_edges(self, ids: list[UUID]) -> list[Edge]: + def _get_edges(self, ids: List[UUID]) -> List[Edge]: all_edges = set() for id in ids: neighbours = self.get_all_neighbours(id) @@ -242,7 +242,7 @@ def _create_edge_from_process(self, name:str, guid: str) -> Edge: names = name.split(self.registry_delimiter) return Edge(guid, names[1], names[2], RelationshipType.new(names[0])) - def get_project_features(self, project:str, keywords:Optional[str] = None) -> list[Entity]: + def get_project_features(self, project:str, keywords:Optional[str] = None) -> List[Entity]: project_id = self.get_entity_id(project) if not project_id: return None @@ -313,8 +313,8 @@ def get_project(self, id_or_name: Union[str, UUID]) -> EntitiesAndRelations: def search_entity(self, keyword: str, - type: list[EntityType], - project: Optional[Union[str, UUID]] = None) -> list[EntityRef]: + type: List[EntityType], + project: Optional[Union[str, UUID]] = None) -> List[EntityRef]: """ Search entities with specified type that also match the keyword in a project """ @@ -462,7 +462,7 @@ def create_project_derived_feature(self, project_id: UUID, definition: DerivedFe + consume_produce_pairs) return derived_feature_entity.id - def _bfs(self, id: UUID, conn_type: RelationshipType) -> Tuple[list[Entity], list[Edge]]: + def _bfs(self, id: UUID, conn_type: RelationshipType) -> Tuple[List[Entity], List[Edge]]: """ Breadth first traversal Starts from `id`, follow edges with `conn_type` only. @@ -486,7 +486,7 @@ def _bfs(self, id: UUID, conn_type: RelationshipType) -> Tuple[list[Entity], lis - def _bfs_step(self, ids: list[UUID], conn_type: RelationshipType) -> list[Edge]: + def _bfs_step(self, ids: List[UUID], conn_type: RelationshipType) -> List[Edge]: """ One step of the BFS process Returns all edges that connect to node ids the next step @@ -607,7 +607,7 @@ def _register_feathr_feature_types(self): force_update=True) logger.info("Feathr Feature Type System Initialized.") - def _upload_entity_batch(self, entity_batch:list[AtlasEntity]): + def _upload_entity_batch(self, entity_batch:List[AtlasEntity]): # we only support entity creation, update is not supported. # setting lastModifiedTS ==0 will ensure this, if another entity with ts>=1 exist # upload function will fail with 412 Precondition fail. diff --git a/registry/sql-registry/registry/database.py b/registry/sql-registry/registry/database.py index 084bc4cbe..8de3d8b87 100644 --- a/registry/sql-registry/registry/database.py +++ b/registry/sql-registry/registry/database.py @@ -3,6 +3,7 @@ import logging import threading import os +from typing import List # Checks if the platform is Max (Darwin). # If so, imports _scproxy that is necessary for pymssql to work on MacOS @@ -17,7 +18,7 @@ class DbConnection(ABC): @abstractmethod - def query(self, sql: str, *args, **kwargs) -> list[dict]: + def query(self, sql: str, *args, **kwargs) -> List[dict]: pass # already has one in 'db_registry.py'; shall we remove it? @@ -66,7 +67,7 @@ def __init__(self, params): def make_connection(self): self.conn = pymssql.connect(**self.params) - def query(self, sql: str, *args, **kwargs) -> list[dict]: + def query(self, sql: str, *args, **kwargs) -> List[dict]: """ Make SQL query and return result """ diff --git a/registry/sql-registry/registry/db_registry.py b/registry/sql-registry/registry/db_registry.py index d0b4c75c5..fc0c260f1 100644 --- a/registry/sql-registry/registry/db_registry.py +++ b/registry/sql-registry/registry/db_registry.py @@ -1,6 +1,6 @@ from typing import Optional, Tuple, Union from uuid import UUID, uuid4 - +from typing import List from pydantic import UUID4 from registry import Registry from registry import connect @@ -23,7 +23,7 @@ class DbRegistry(Registry): def __init__(self): self.conn = connect() - def get_projects(self) -> list[str]: + def get_projects(self) -> List[str]: ret = self.conn.query( f"select qualified_name from entities where entity_type=%s", str(EntityType.Project)) return list([r["qualified_name"] for r in ret]) @@ -39,7 +39,7 @@ def get_projects_ids(self) -> dict: def get_entity(self, id_or_name: Union[str, UUID]) -> Entity: return self._fill_entity(self._get_entity(id_or_name)) - def get_entities(self, ids: list[UUID]) -> list[Entity]: + def get_entities(self, ids: List[UUID]) -> List[Entity]: return list([self._fill_entity(e) for e in self._get_entities(ids)]) def get_entity_id(self, id_or_name: Union[str, UUID]) -> UUID: @@ -55,7 +55,7 @@ def get_entity_id(self, id_or_name: Union[str, UUID]) -> UUID: raise KeyError(f"Entity {id_or_name} not found") return ret[0]["entity_id"] - def get_neighbors(self, id_or_name: Union[str, UUID], relationship: RelationshipType) -> list[Edge]: + def get_neighbors(self, id_or_name: Union[str, UUID], relationship: RelationshipType) -> List[Edge]: rows = self.conn.query(fr''' select edge_id, from_id, to_id, conn_type from edges @@ -106,7 +106,7 @@ def get_project(self, id_or_name: Union[str, UUID]) -> EntitiesAndRelations: all_edges = self._get_edges(ids) return EntitiesAndRelations([project] + children, list(edges.union(all_edges))) - def get_dependent_entities(self, entity_id: Union[str, UUID]) -> list[Entity]: + def get_dependent_entities(self, entity_id: Union[str, UUID]) -> List[Entity]: """ Given entity id, returns list of all entities that are downstream/dependant on the given entity """ @@ -134,10 +134,10 @@ def delete_entity(self, entity_id: Union[str, UUID]): def search_entity(self, keyword: str, - type: list[EntityType], + type: List[EntityType], project: Optional[Union[str, UUID]] = None, start: Optional[int] = None, - size: Optional[int] = None) -> list[EntityRef]: + size: Optional[int] = None) -> List[EntityRef]: """ WARN: This search function is implemented via `like` operator, which could be extremely slow. """ @@ -456,7 +456,7 @@ def _fill_entity(self, e: Entity) -> Entity: return e return e - def _get_edges(self, ids: list[UUID], types: list[RelationshipType] = []) -> list[Edge]: + def _get_edges(self, ids: List[UUID], types: List[RelationshipType] = []) -> List[Edge]: if not ids: return [] sql = fr"""select edge_id, from_id, to_id, conn_type from edges @@ -486,7 +486,7 @@ def _get_entity(self, id_or_name: Union[str, UUID]) -> Entity: row["attributes"] = json.loads(row["attributes"]) return _to_type(row, Entity) - def _get_entities(self, ids: list[UUID]) -> list[Entity]: + def _get_entities(self, ids: List[UUID]) -> List[Entity]: if not ids: return [] rows = self.conn.query(fr'''select entity_id, qualified_name, entity_type, attributes @@ -499,7 +499,7 @@ def _get_entities(self, ids: list[UUID]) -> list[Entity]: ret.append(Entity(**row)) return ret - def _bfs(self, id: UUID, conn_type: RelationshipType) -> Tuple[list[Entity], list[Edge]]: + def _bfs(self, id: UUID, conn_type: RelationshipType) -> Tuple[List[Entity], List[Edge]]: """ Breadth first traversal Starts from `id`, follow edges with `conn_type` only. @@ -522,7 +522,7 @@ def _bfs(self, id: UUID, conn_type: RelationshipType) -> Tuple[list[Entity], lis edges = list([Edge(**c) for c in connections]) return (entities, edges) - def _bfs_step(self, ids: list[UUID], conn_type: RelationshipType) -> set[dict]: + def _bfs_step(self, ids: List[UUID], conn_type: RelationshipType) -> set[dict]: """ One step of the BFS process Returns all edges that connect to node ids the next step diff --git a/registry/sql-registry/registry/interface.py b/registry/sql-registry/registry/interface.py index 62f6071cd..c00db09e2 100644 --- a/registry/sql-registry/registry/interface.py +++ b/registry/sql-registry/registry/interface.py @@ -1,5 +1,5 @@ from abc import ABC, abstractclassmethod, abstractmethod -from typing import Union +from typing import Union, List from uuid import UUID from registry.database import DbConnection @@ -8,7 +8,7 @@ class Registry(ABC): @abstractmethod - def get_projects(self) -> list[str]: + def get_projects(self) -> List[str]: """ Returns the names of all projects """ @@ -29,7 +29,7 @@ def get_entity(self, id_or_name: Union[str, UUID]) -> Entity: pass @abstractmethod - def get_entities(self, ids: list[UUID]) -> list[Entity]: + def get_entities(self, ids: List[UUID]) -> List[Entity]: """ Get list of entities by their ids """ @@ -43,7 +43,7 @@ def get_entity_id(self, id_or_name: Union[str, UUID]) -> UUID: pass @abstractmethod - def get_neighbors(self, id_or_name: Union[str, UUID], relationship: RelationshipType) -> list[Edge]: + def get_neighbors(self, id_or_name: Union[str, UUID], relationship: RelationshipType) -> List[Edge]: """ Get list of edges with specified type that connect to this entity. The edge contains fromId and toId so we can follow to the entity it connects to @@ -68,10 +68,10 @@ def get_project(self, id_or_name: Union[str, UUID]) -> EntitiesAndRelations: @abstractmethod def search_entity(self, keyword: str, - type: list[EntityType], + type: List[EntityType], project: Optional[Union[str, UUID]] = None, start: Optional[int] = None, - size: Optional[int] = None) -> list[EntityRef]: + size: Optional[int] = None) -> List[EntityRef]: """ Search entities with specified type that also match the keyword in a project """ @@ -113,7 +113,7 @@ def create_project_derived_feature(self, project_id: UUID, definition: DerivedFe pass @abstractmethod - def get_dependent_entities(self, entity_id: Union[str, UUID]) -> list[Entity]: + def get_dependent_entities(self, entity_id: Union[str, UUID]) -> List[Entity]: """ Given entity id, returns list of all entities that are downstream/dependant on the given entity """ From cc17192348bf747f7d7595a8ffde4e24cc4154ac Mon Sep 17 00:00:00 2001 From: enya-yx Date: Thu, 5 Jan 2023 15:56:28 +0800 Subject: [PATCH 10/17] quick fix --- registry/sql-registry/registry/db_registry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/registry/sql-registry/registry/db_registry.py b/registry/sql-registry/registry/db_registry.py index fc0c260f1..5a4cc1666 100644 --- a/registry/sql-registry/registry/db_registry.py +++ b/registry/sql-registry/registry/db_registry.py @@ -1,6 +1,6 @@ from typing import Optional, Tuple, Union from uuid import UUID, uuid4 -from typing import List +from typing import List, Set from pydantic import UUID4 from registry import Registry from registry import connect @@ -522,7 +522,7 @@ def _bfs(self, id: UUID, conn_type: RelationshipType) -> Tuple[List[Entity], Lis edges = list([Edge(**c) for c in connections]) return (entities, edges) - def _bfs_step(self, ids: List[UUID], conn_type: RelationshipType) -> set[dict]: + def _bfs_step(self, ids: List[UUID], conn_type: RelationshipType) -> Set[dict]: """ One step of the BFS process Returns all edges that connect to node ids the next step From 3ba61a8576d2fa0c12f2e9fe90409b4cfb1715a1 Mon Sep 17 00:00:00 2001 From: enya-yx Date: Thu, 5 Jan 2023 16:02:54 +0800 Subject: [PATCH 11/17] quick fix --- registry/purview-registry/registry/interface.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/registry/purview-registry/registry/interface.py b/registry/purview-registry/registry/interface.py index 2e60cc32d..c04c94b6f 100644 --- a/registry/purview-registry/registry/interface.py +++ b/registry/purview-registry/registry/interface.py @@ -1,12 +1,12 @@ from abc import ABC, abstractclassmethod, abstractmethod -from typing import Union +from typing import Union, List from uuid import UUID from registry.models import * class Registry(ABC): @abstractmethod - def get_projects(self) -> list[str]: + def get_projects(self) -> List[str]: """ Returns the names of all projects """ @@ -27,7 +27,7 @@ def get_entity(self, id_or_name: Union[str, UUID],recursive = False) -> Entity: pass @abstractmethod - def get_entities(self, ids: list[UUID]) -> list[Entity]: + def get_entities(self, ids: List[UUID]) -> List[Entity]: """ Get list of entities by their ids """ @@ -41,7 +41,7 @@ def get_entity_id(self, id_or_name: Union[str, UUID]) -> UUID: pass @abstractmethod - def get_neighbors(self, id_or_name: Union[str, UUID], relationship: RelationshipType) -> list[Edge]: + def get_neighbors(self, id_or_name: Union[str, UUID], relationship: RelationshipType) -> List[Edge]: """ Get list of edges with specified type that connect to this entity. The edge contains fromId and toId so we can follow to the entity it connects to @@ -66,8 +66,8 @@ def get_project(self, id_or_name: Union[str, UUID]) -> EntitiesAndRelations: @abstractmethod def search_entity(self, keyword: str, - type: list[EntityType], - project: Optional[Union[str, UUID]] = None) -> list[EntityRef]: + type: List[EntityType], + project: Optional[Union[str, UUID]] = None) -> List[EntityRef]: """ Search entities with specified type that also match the keyword in a project """ @@ -94,7 +94,7 @@ def create_project_derived_feature(self, project_id: UUID, definition: DerivedFe pass @abstractmethod - def get_dependent_entities(self, entity_id: Union[str, UUID]) -> list[Entity]: + def get_dependent_entities(self, entity_id: Union[str, UUID]) -> List[Entity]: """ Given entity id, returns list of all entities that are downstream/dependent on given entity """ From c7e2e370bc469fed4d050fa6197b497ee9dde9a0 Mon Sep 17 00:00:00 2001 From: enya-yx Date: Thu, 5 Jan 2023 16:46:57 +0800 Subject: [PATCH 12/17] quick fix --- .github/workflows/pull_request_push_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pull_request_push_test.yml b/.github/workflows/pull_request_push_test.yml index 8a5433aa3..88f27cbbf 100644 --- a/.github/workflows/pull_request_push_test.yml +++ b/.github/workflows/pull_request_push_test.yml @@ -285,8 +285,8 @@ jobs: AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}} AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}} AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}} - PURVIEW_NAME: 'feathrazuretest3-purview1' - CONNECTION_STR: 'Server=tcp:feathrtestsql4.database.windows.net,1433;Initial Catalog=testsql;Persist Securit y Info=False;User ID=feathr@feathrtestsql4;Password=Password01!;MultipleActiveResultSets=False;Encrypt=True;TrustSrverCertificate=False;Connection Timeout=30;' + PURVIEW_NAME: ${{secrets.PURVIEW_NAME}} + CONNECTION_STR: ${{secrets.CONNECTION_STR}} run: | pytest --cov-report term-missing --cov=registry/sql-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_sql_registry.py pytest --cov-report term-missing --cov=registry/purview-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_purview_registry.py From eed1d0382406cf9a3a4d6c119052faf2bf8eed99 Mon Sep 17 00:00:00 2001 From: enya-yx Date: Mon, 9 Jan 2023 17:08:16 +0800 Subject: [PATCH 13/17] modify types annotation --- registry/purview-registry/main.py | 32 +++---- .../purview-registry/registry/interface.py | 4 +- registry/purview-registry/registry/models.py | 96 +++++++++---------- .../registry/purview_registry.py | 4 +- registry/sql-registry/main.py | 32 +++---- registry/sql-registry/registry/database.py | 8 +- registry/sql-registry/registry/db_registry.py | 6 +- registry/sql-registry/registry/interface.py | 4 +- registry/sql-registry/registry/models.py | 94 +++++++++--------- 9 files changed, 140 insertions(+), 140 deletions(-) diff --git a/registry/purview-registry/main.py b/registry/purview-registry/main.py index 8044a0ef8..53c00a279 100644 --- a/registry/purview-registry/main.py +++ b/registry/purview-registry/main.py @@ -1,7 +1,7 @@ import os import traceback from re import sub -from typing import Optional +from typing import Optional, Dict, List from uuid import UUID from fastapi import APIRouter, FastAPI, HTTPException from fastapi.responses import JSONResponse @@ -46,7 +46,7 @@ def to_camel(s): ) -def exc_to_content(e: Exception) -> dict: +def exc_to_content(e: Exception) -> Dict: content={"message": str(e)} if os.environ.get("REGISTRY_DEBUGGING"): content["traceback"] = "".join(traceback.TracebackException.from_exception(e).format()) @@ -97,19 +97,19 @@ async def index_error_handler(_, exc: IndexError): ) @router.get("/projects",tags=["Project"]) -def get_projects() -> list[str]: +def get_projects() -> List[str]: return registry.get_projects() @router.get("/projects-ids") -def get_projects_ids() -> dict: +def get_projects_ids() -> Dict: return registry.get_projects_ids() @router.get("/projects/{project}",tags=["Project"]) -def get_projects(project: str) -> dict: +def get_projects(project: str) -> Dict: return to_camel(registry.get_project(project).to_dict()) @router.get("/dependent/{entity}") -def get_dependent_entities(entity: str) -> list: +def get_dependent_entities(entity: str) -> List: entity_id = registry.get_entity_id(entity) downstream_entities = registry.get_dependent_entities(entity_id) return list([e.to_dict() for e in downstream_entities]) @@ -126,7 +126,7 @@ def delete_entity(entity: str): registry.delete_entity(entity_id) @router.get("/projects/{project}/datasources",tags=["Project"]) -def get_project_datasources(project: str) -> list: +def get_project_datasources(project: str) -> List: p = registry.get_entity(project,True) source_ids = [s.id for s in p.attributes.sources] sources = registry.get_entities(source_ids) @@ -134,7 +134,7 @@ def get_project_datasources(project: str) -> list: @router.get("/projects/{project}/datasources/{datasource}",tags=["Project"]) -def get_datasource(project: str, datasource: str) -> dict: +def get_datasource(project: str, datasource: str) -> Dict: p = registry.get_entity(project,True) for s in p.attributes.sources: if str(s.id) == datasource: @@ -145,13 +145,13 @@ def get_datasource(project: str, datasource: str) -> dict: @router.get("/projects/{project}/features",tags=["Project"]) -def get_project_features(project: str, keyword: Optional[str] = None) -> list: +def get_project_features(project: str, keyword: Optional[str] = None) -> List: atlasEntities = registry.get_project_features(project, keywords=keyword) return list([to_camel(e.to_dict()) for e in atlasEntities]) @router.get("/features/{feature}",tags=["Feature"]) -def get_feature(feature: str) -> dict: +def get_feature(feature: str) -> Dict: e = registry.get_entity(feature,True) if e.entity_type not in [EntityType.DerivedFeature, EntityType.AnchorFeature]: raise HTTPException( @@ -159,33 +159,33 @@ def get_feature(feature: str) -> dict: return to_camel(e.to_dict()) @router.get("/features/{feature}/lineage",tags=["Feature"]) -def get_feature_lineage(feature: str) -> dict: +def get_feature_lineage(feature: str) -> Dict: lineage = registry.get_lineage(feature) return to_camel(lineage.to_dict()) @router.post("/projects",tags=["Project"]) -def new_project(definition: dict) -> UUID: +def new_project(definition: Dict) -> UUID: id = registry.create_project(ProjectDef(**to_snake(definition))) return {"guid": str(id)} @router.post("/projects/{project}/datasources",tags=["Project"]) -def new_project_datasource(project: str, definition: dict) -> UUID: +def new_project_datasource(project: str, definition: Dict) -> UUID: project_id = registry.get_entity_id(project) id = registry.create_project_datasource(project_id, SourceDef(**to_snake(definition))) return {"guid": str(id)} @router.post("/projects/{project}/anchors",tags=["Project"]) -def new_project_anchor(project: str, definition: dict) -> UUID: +def new_project_anchor(project: str, definition: Dict) -> UUID: project_id = registry.get_entity_id(project) id = registry.create_project_anchor(project_id, AnchorDef(**to_snake(definition))) return {"guid": str(id)} @router.post("/projects/{project}/anchors/{anchor}/features",tags=["Project"]) -def new_project_anchor_feature(project: str, anchor: str, definition: dict) -> UUID: +def new_project_anchor_feature(project: str, anchor: str, definition: Dict) -> UUID: project_id = registry.get_entity_id(project) anchor_id = registry.get_entity_id(anchor) id = registry.create_project_anchor_feature(project_id, anchor_id, AnchorFeatureDef(**to_snake(definition))) @@ -193,7 +193,7 @@ def new_project_anchor_feature(project: str, anchor: str, definition: dict) -> U @router.post("/projects/{project}/derivedfeatures",tags=["Project"]) -def new_project_derived_feature(project: str, definition: dict) -> UUID: +def new_project_derived_feature(project: str, definition: Dict) -> UUID: project_id = registry.get_entity_id(project) id = registry.create_project_derived_feature(project_id, DerivedFeatureDef(**to_snake(definition))) return {"guid": str(id)} diff --git a/registry/purview-registry/registry/interface.py b/registry/purview-registry/registry/interface.py index c04c94b6f..10867d0ef 100644 --- a/registry/purview-registry/registry/interface.py +++ b/registry/purview-registry/registry/interface.py @@ -1,5 +1,5 @@ from abc import ABC, abstractclassmethod, abstractmethod -from typing import Union, List +from typing import Union, List, Dict from uuid import UUID from registry.models import * @@ -13,7 +13,7 @@ def get_projects(self) -> List[str]: pass @abstractmethod - def get_projects_ids(self) -> dict: + def get_projects_ids(self) -> Dict: """ Returns the ids to names mapping of all projects """ diff --git a/registry/purview-registry/registry/models.py b/registry/purview-registry/registry/models.py index b2b0e1b1b..143da5055 100644 --- a/registry/purview-registry/registry/models.py +++ b/registry/purview-registry/registry/models.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from enum import Enum -from typing import Optional, Union, List +from typing import Optional, Union, List, Dict from uuid import UUID import json import re @@ -131,7 +131,7 @@ class ToDict(ABC): This ABC is used to convert object to dict, then JSON. """ @abstractmethod - def to_dict(self) -> dict: + def to_dict(self) -> Dict: pass def to_json(self, indent=None) -> str: @@ -155,7 +155,7 @@ def __eq__(self, o: object) -> bool: and self.dimension_type == o.dimension_type \ and self.val_type == o.val_type - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "type": self.type.name, "tensorCategory": self.tensor_category.name, @@ -184,7 +184,7 @@ def __eq__(self, o: object) -> bool: and self.key_column_type == o.key_column_type \ and self.key_column_alias == o.key_column_alias - def to_dict(self) -> dict: + def to_dict(self) -> Dict: ret = { "key_column": self.key_column, "key_column_type": self.key_column_type.name, @@ -220,7 +220,7 @@ def __eq__(self, o: object) -> bool: return False return self.transform_expr == o.transform_expr - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "transform_expr": self.transform_expr } @@ -251,7 +251,7 @@ def __eq__(self, o: object) -> bool: and self.filter == o.filter \ and self.limit == o.limit - def to_dict(self) -> dict: + def to_dict(self) -> Dict: ret = { "def_expr": self.def_expr, } @@ -277,7 +277,7 @@ def __eq__(self, o: object) -> bool: return False return self.name == o.name - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "name": self.name } @@ -288,7 +288,7 @@ def __init__(self, id: UUID, type: Union[str, EntityType], qualified_name: Optional[str] = None, - uniq_attr: dict = {}): + uniq_attr: Dict = {}): self.id = id self.type = _to_type(type, EntityType) if qualified_name is not None: @@ -307,7 +307,7 @@ def qualified_name(self) -> EntityType: def get_ref(self): return self - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "guid": str(self.id), "typeName": str(self.type), @@ -332,7 +332,7 @@ def __init__(self, entity_id: Union[str, UUID], qualified_name: str, entity_type: Union[str, EntityType], - attributes: Union[dict, Attributes], + attributes: Union[Dict, Attributes], **kwargs): self.id = _to_uuid(entity_id) self.qualified_name = qualified_name @@ -348,7 +348,7 @@ def get_ref(self) -> EntityRef: self.attributes.entity_type, self.qualified_name) - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "guid": str(self.id), "lastModifiedTS": "1", @@ -358,7 +358,7 @@ def to_dict(self) -> dict: "attributes": self.attributes.to_dict(), } - def to_min_repr(self) -> dict: + def to_min_repr(self) -> Dict: return { 'qualifiedName':self.qualified_name, 'guid':str(self.id), @@ -369,8 +369,8 @@ def to_min_repr(self) -> dict: class ProjectAttributes(Attributes): def __init__(self, name: str, - children: List[Union[dict, Entity]] = [], - tags: dict = {}, + children: List[Union[Dict, Entity]] = [], + tags: Dict = {}, **kwargs): self.name = name self.tags = tags @@ -387,7 +387,7 @@ def children(self): return self._children @children.setter - def children(self, v: List[Union[dict, Entity]]): + def children(self, v: List[Union[Dict, Entity]]): for f in v: if isinstance(f, Entity): self._children.append(f) @@ -416,7 +416,7 @@ def derived_features(self): return [ e for e in self.children if e.entity_type == EntityType.DerivedFeature] - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "qualifiedName": self.name, "name": self.name, @@ -437,7 +437,7 @@ def __init__(self, preprocessing: Optional[str] = None, event_timestamp_column: Optional[str] = None, timestamp_format: Optional[str] = None, - tags: dict = {}): + tags: Dict = {}): self.qualified_name = qualified_name self.name = name self.type = type @@ -451,7 +451,7 @@ def __init__(self, def entity_type(self) -> EntityType: return EntityType.Source - def to_dict(self) -> dict: + def to_dict(self) -> Dict: ret = { "qualifiedName": self.qualified_name, "name": self.name, @@ -475,9 +475,9 @@ class AnchorAttributes(Attributes): def __init__(self, qualified_name: str, name: str, - # source: Optional[Union[dict, EntityRef, Entity]] = None, - # features: list[Union[dict, EntityRef, Entity]] = [], - tags: dict = {}, + # source: Optional[Union[Dict, EntityRef, Entity]] = None, + # features: list[Union[Dict, EntityRef, Entity]] = [], + tags: Dict = {}, **kwargs): self.qualified_name = qualified_name self.name = name @@ -527,7 +527,7 @@ def features(self, features): else: raise TypeError(f) - def to_dict(self) -> dict: + def to_dict(self) -> Dict: ret = { "qualifiedName": self.qualified_name, "name": self.name, @@ -545,10 +545,10 @@ class AnchorFeatureAttributes(Attributes): def __init__(self, qualified_name: str, name: str, - type: Union[dict, FeatureType], - transformation: Union[dict, Transformation], - key: List[Union[dict, TypedKey]], - tags: dict = {}): + type: Union[Dict, FeatureType], + transformation: Union[Dict, Transformation], + key: List[Union[Dict, TypedKey]], + tags: Dict = {}): self.qualified_name = qualified_name self.name = name self.type = _to_type(type, FeatureType) @@ -560,7 +560,7 @@ def __init__(self, def entity_type(self) -> EntityType: return EntityType.AnchorFeature - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "qualifiedName": self.qualified_name, "name": self.name, @@ -575,12 +575,12 @@ class DerivedFeatureAttributes(Attributes): def __init__(self, qualified_name: str, name: str, - type: Union[dict, FeatureType], - transformation: Union[dict, Transformation], - key: List[Union[dict, TypedKey]], - input_anchor_features: List[Union[dict, EntityRef, Entity]] = [], - input_derived_features: List[Union[dict, EntityRef, Entity]] = [], - tags: dict = {}, + type: Union[Dict, FeatureType], + transformation: Union[Dict, Transformation], + key: List[Union[Dict, TypedKey]], + input_anchor_features: List[Union[Dict, EntityRef, Entity]] = [], + input_derived_features: List[Union[Dict, EntityRef, Entity]] = [], + tags: Dict = {}, **kwargs): self.qualified_name = qualified_name self.name = name @@ -600,7 +600,7 @@ def input_features(self): return self._input_anchor_features + self._input_derived_features @input_features.setter - def input_features(self, input_features_list: Union[dict, Entity, EntityRef]): + def input_features(self, input_features_list: Union[Dict, Entity, EntityRef]): self._input_anchor_features = [] self._input_derived_features = [] for feature in input_features_list: @@ -632,7 +632,7 @@ def input_anchor_features(self): def input_derived_features(self): return self._input_derived_features - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "qualifiedName": self.qualified_name, "name": self.name, @@ -663,7 +663,7 @@ def __eq__(self, o: object) -> bool: def __hash__(self) -> int: return hash((self.from_id, self.to_id, self.conn_type)) - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "relationshipId": str(self.id), "fromEntityId": str(self.from_id), @@ -678,7 +678,7 @@ def __init__(self, entities: List[Entity], edges: List[Edge]): self.entities = dict([(e.id, e) for e in entities]) self.edges = set(edges) - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "guidEntityMap": dict([(str(id), self.entities[id].to_dict()) for id in self.entities]), "relations": list([e.to_dict() for e in self.edges]), @@ -686,7 +686,7 @@ def to_dict(self) -> dict: class ProjectDef: - def __init__(self, name: str, qualified_name: str = "", tags: dict = {}): + def __init__(self, name: str, qualified_name: str = "", tags: Dict = {}): self.name = name self.qualified_name = qualified_name self.tags = tags @@ -704,7 +704,7 @@ def __init__(self, preprocessing: Optional[str] = None, event_timestamp_column: Optional[str] = None, timestamp_format: Optional[str] = None, - tags: dict = {}): + tags: Dict = {}): self.qualified_name = qualified_name self.name = name self.path = path @@ -729,7 +729,7 @@ def __init__(self, name: str, source_id: Union[str, UUID], qualified_name: str = "", - tags: dict = {}): + tags: Dict = {}): self.qualified_name = qualified_name self.name = name self.source_id = _to_uuid(source_id) @@ -745,11 +745,11 @@ def to_attr(self, source: EntityRef) -> AnchorAttributes: class AnchorFeatureDef: def __init__(self, name: str, - feature_type: Union[dict, FeatureType], - transformation: Union[dict, Transformation], - key: List[Union[dict, TypedKey]], + feature_type: Union[Dict, FeatureType], + transformation: Union[Dict, Transformation], + key: List[Union[Dict, TypedKey]], qualified_name: str = "", - tags: dict = {}): + tags: Dict = {}): self.qualified_name = qualified_name self.name = name self.feature_type = _to_type(feature_type, FeatureType) @@ -769,13 +769,13 @@ def to_attr(self) -> AnchorFeatureAttributes: class DerivedFeatureDef: def __init__(self, name: str, - feature_type: Union[dict, FeatureType], - transformation: Union[dict, Transformation], - key: List[Union[dict, TypedKey]], + feature_type: Union[Dict, FeatureType], + transformation: Union[Dict, Transformation], + key: List[Union[Dict, TypedKey]], input_anchor_features: List[Union[str, UUID]], input_derived_features: List[Union[str, UUID]], qualified_name: str = "", - tags: dict = {}): + tags: Dict = {}): self.qualified_name = qualified_name self.name = name self.feature_type = _to_type(feature_type, FeatureType) diff --git a/registry/purview-registry/registry/purview_registry.py b/registry/purview-registry/registry/purview_registry.py index 2cbc97b8a..957c3b017 100644 --- a/registry/purview-registry/registry/purview_registry.py +++ b/registry/purview-registry/registry/purview_registry.py @@ -1,7 +1,7 @@ import copy from http.client import CONFLICT, HTTPException import itertools -from typing import Any, Optional, Tuple, Union, List +from typing import Any, Optional, Tuple, Union, List, Dict from urllib.error import HTTPError from uuid import UUID @@ -61,7 +61,7 @@ def get_projects(self) -> List[str]: result_entities = result['value'] return [x['qualifiedName'] for x in result_entities] - def get_projects_ids(self) -> dict: + def get_projects_ids(self) -> Dict: """ Returns the names and ids of all projects""" searchTerm = {"entityType": str(EntityType.Project)} diff --git a/registry/sql-registry/main.py b/registry/sql-registry/main.py index dcb4d79cb..c11329608 100644 --- a/registry/sql-registry/main.py +++ b/registry/sql-registry/main.py @@ -1,6 +1,6 @@ import os import traceback -from typing import Optional +from typing import Optional, Dict, List from uuid import UUID from fastapi import APIRouter, FastAPI, HTTPException from fastapi.responses import JSONResponse @@ -30,7 +30,7 @@ allow_headers=["*"], ) -def exc_to_content(e: Exception) -> dict: +def exc_to_content(e: Exception) -> Dict: content={"message": str(e)} if os.environ.get("REGISTRY_DEBUGGING"): content["traceback"] = "".join(traceback.TracebackException.from_exception(e).format()) @@ -75,19 +75,19 @@ async def index_error_handler(_, exc: IndexError): @router.get("/projects") -def get_projects() -> list[str]: +def get_projects() -> List[str]: return registry.get_projects() @router.get("/projects-ids") -def get_projects_ids() -> dict: +def get_projects_ids() -> Dict: return registry.get_projects_ids() @router.get("/projects/{project}") -def get_projects(project: str) -> dict: +def get_projects(project: str) -> Dict: return registry.get_project(project).to_dict() @router.get("/dependent/{entity}") -def get_dependent_entities(entity: str) -> list: +def get_dependent_entities(entity: str) -> List: entity_id = registry.get_entity_id(entity) downstream_entities = registry.get_dependent_entities(entity_id) return list([e.to_dict() for e in downstream_entities]) @@ -104,7 +104,7 @@ def delete_entity(entity: str): registry.delete_entity(entity_id) @router.get("/projects/{project}/datasources") -def get_project_datasources(project: str) -> list: +def get_project_datasources(project: str) -> List: p = registry.get_entity(project) source_ids = [s.id for s in p.attributes.sources] sources = registry.get_entities(source_ids) @@ -112,7 +112,7 @@ def get_project_datasources(project: str) -> list: @router.get("/projects/{project}/datasources/{datasource}") -def get_datasource(project: str, datasource: str) -> dict: +def get_datasource(project: str, datasource: str) -> Dict: p = registry.get_entity(project) for s in p.attributes.sources: if str(s.id) == datasource: @@ -123,7 +123,7 @@ def get_datasource(project: str, datasource: str) -> dict: @router.get("/projects/{project}/features") -def get_project_features(project: str, keyword: Optional[str] = None, page: Optional[int] = None, limit: Optional[int] = None) -> list: +def get_project_features(project: str, keyword: Optional[str] = None, page: Optional[int] = None, limit: Optional[int] = None) -> List: if keyword: start = None size = None @@ -144,7 +144,7 @@ def get_project_features(project: str, keyword: Optional[str] = None, page: Opti @router.get("/features/{feature}") -def get_feature(feature: str) -> dict: +def get_feature(feature: str) -> Dict: e = registry.get_entity(feature) if e.entity_type not in [EntityType.DerivedFeature, EntityType.AnchorFeature]: raise HTTPException( @@ -152,32 +152,32 @@ def get_feature(feature: str) -> dict: return e.to_dict() @router.get("/features/{feature}/lineage") -def get_feature_lineage(feature: str) -> dict: +def get_feature_lineage(feature: str) -> Dict: lineage = registry.get_lineage(feature) return lineage.to_dict() @router.post("/projects") -def new_project(definition: dict) -> dict: +def new_project(definition: Dict) -> Dict: id = registry.create_project(ProjectDef(**to_snake(definition))) return {"guid": str(id)} @router.post("/projects/{project}/datasources") -def new_project_datasource(project: str, definition: dict) -> dict: +def new_project_datasource(project: str, definition: Dict) -> Dict: project_id = registry.get_entity_id(project) id = registry.create_project_datasource(project_id, SourceDef(**to_snake(definition))) return {"guid": str(id)} @router.post("/projects/{project}/anchors") -def new_project_anchor(project: str, definition: dict) -> dict: +def new_project_anchor(project: str, definition: Dict) -> Dict: project_id = registry.get_entity_id(project) id = registry.create_project_anchor(project_id, AnchorDef(**to_snake(definition))) return {"guid": str(id)} @router.post("/projects/{project}/anchors/{anchor}/features") -def new_project_anchor_feature(project: str, anchor: str, definition: dict) -> dict: +def new_project_anchor_feature(project: str, anchor: str, definition: Dict) -> Dict: project_id = registry.get_entity_id(project) anchor_id = registry.get_entity_id(anchor) id = registry.create_project_anchor_feature(project_id, anchor_id, AnchorFeatureDef(**to_snake(definition))) @@ -185,7 +185,7 @@ def new_project_anchor_feature(project: str, anchor: str, definition: dict) -> d @router.post("/projects/{project}/derivedfeatures") -def new_project_derived_feature(project: str, definition: dict) -> dict: +def new_project_derived_feature(project: str, definition: Dict) -> Dict: project_id = registry.get_entity_id(project) id = registry.create_project_derived_feature(project_id, DerivedFeatureDef(**to_snake(definition))) return {"guid": str(id)} diff --git a/registry/sql-registry/registry/database.py b/registry/sql-registry/registry/database.py index 8de3d8b87..770ad0ec3 100644 --- a/registry/sql-registry/registry/database.py +++ b/registry/sql-registry/registry/database.py @@ -3,7 +3,7 @@ import logging import threading import os -from typing import List +from typing import List, Dict # Checks if the platform is Max (Darwin). # If so, imports _scproxy that is necessary for pymssql to work on MacOS @@ -18,7 +18,7 @@ class DbConnection(ABC): @abstractmethod - def query(self, sql: str, *args, **kwargs) -> List[dict]: + def query(self, sql: str, *args, **kwargs) -> List[Dict]: pass # already has one in 'db_registry.py'; shall we remove it? @@ -30,7 +30,7 @@ def quote(id): return ",".join([f"'{i}'" for i in id]) ''' -def parse_conn_str(s: str) -> dict: +def parse_conn_str(s: str) -> Dict: """ TODO: Not a sound and safe implementation, but useful enough in this case as the connection string is provided by users themselves. @@ -67,7 +67,7 @@ def __init__(self, params): def make_connection(self): self.conn = pymssql.connect(**self.params) - def query(self, sql: str, *args, **kwargs) -> List[dict]: + def query(self, sql: str, *args, **kwargs) -> List[Dict]: """ Make SQL query and return result """ diff --git a/registry/sql-registry/registry/db_registry.py b/registry/sql-registry/registry/db_registry.py index 5a4cc1666..e202fec2b 100644 --- a/registry/sql-registry/registry/db_registry.py +++ b/registry/sql-registry/registry/db_registry.py @@ -1,6 +1,6 @@ from typing import Optional, Tuple, Union from uuid import UUID, uuid4 -from typing import List, Set +from typing import List, Set, Dict from pydantic import UUID4 from registry import Registry from registry import connect @@ -28,7 +28,7 @@ def get_projects(self) -> List[str]: f"select qualified_name from entities where entity_type=%s", str(EntityType.Project)) return list([r["qualified_name"] for r in ret]) - def get_projects_ids(self) -> dict: + def get_projects_ids(self) -> Dict: projects = {} ret = self.conn.query( f"select entity_id, qualified_name from entities where entity_type=%s", str(EntityType.Project)) @@ -522,7 +522,7 @@ def _bfs(self, id: UUID, conn_type: RelationshipType) -> Tuple[List[Entity], Lis edges = list([Edge(**c) for c in connections]) return (entities, edges) - def _bfs_step(self, ids: List[UUID], conn_type: RelationshipType) -> Set[dict]: + def _bfs_step(self, ids: List[UUID], conn_type: RelationshipType) -> Set[Dict]: """ One step of the BFS process Returns all edges that connect to node ids the next step diff --git a/registry/sql-registry/registry/interface.py b/registry/sql-registry/registry/interface.py index c00db09e2..8791845c8 100644 --- a/registry/sql-registry/registry/interface.py +++ b/registry/sql-registry/registry/interface.py @@ -1,5 +1,5 @@ from abc import ABC, abstractclassmethod, abstractmethod -from typing import Union, List +from typing import Union, List, Dict from uuid import UUID from registry.database import DbConnection @@ -15,7 +15,7 @@ def get_projects(self) -> List[str]: pass @abstractmethod - def get_projects_ids(self) -> dict: + def get_projects_ids(self) -> Dict: """ Returns the ids to names mapping of all projects """ diff --git a/registry/sql-registry/registry/models.py b/registry/sql-registry/registry/models.py index 61edbfb52..3da5cc9df 100644 --- a/registry/sql-registry/registry/models.py +++ b/registry/sql-registry/registry/models.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from enum import Enum -from typing import Optional, Union, List +from typing import Optional, Union, List, Dict from uuid import UUID import json import re @@ -122,7 +122,7 @@ class ToDict(ABC): This ABC is used to convert object to dict, then JSON. """ @abstractmethod - def to_dict(self) -> dict: + def to_dict(self) -> Dict: pass def to_json(self, indent=None) -> str: @@ -146,7 +146,7 @@ def __eq__(self, o: object) -> bool: and self.dimension_type == o.dimension_type \ and self.val_type == o.val_type - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "type": self.type.name, "tensorCategory": self.tensor_category.name, @@ -175,7 +175,7 @@ def __eq__(self, o: object) -> bool: and self.key_column_type == o.key_column_type \ and self.key_column_alias == o.key_column_alias - def to_dict(self) -> dict: + def to_dict(self) -> Dict: ret = { "keyColumn": self.key_column, "keyColumnType": self.key_column_type.name, @@ -211,7 +211,7 @@ def __eq__(self, o: object) -> bool: return False return self.transform_expr == o.transform_expr - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "transformExpr": self.transform_expr } @@ -242,7 +242,7 @@ def __eq__(self, o: object) -> bool: and self.filter == o.filter \ and self.limit == o.limit - def to_dict(self) -> dict: + def to_dict(self) -> Dict: ret = { "defExpr": self.def_expr, } @@ -268,7 +268,7 @@ def __eq__(self, o: object) -> bool: return False return self.name == o.name - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "name": self.name } @@ -279,7 +279,7 @@ def __init__(self, id: UUID, type: Union[str, EntityType], qualified_name: Optional[str] = None, - uniq_attr: dict = {}): + uniq_attr: Dict = {}): self.id = id self.type = _to_type(type, EntityType) if qualified_name is not None: @@ -298,7 +298,7 @@ def qualified_name(self) -> EntityType: def get_ref(self): return self - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "guid": str(self.id), "typeName": str(self.type), @@ -323,7 +323,7 @@ def __init__(self, entity_id: Union[str, UUID], qualified_name: str, entity_type: Union[str, EntityType], - attributes: Union[dict, Attributes], + attributes: Union[Dict, Attributes], **kwargs): self.id = _to_uuid(entity_id) self.qualified_name = qualified_name @@ -339,7 +339,7 @@ def get_ref(self) -> EntityRef: self.attributes.entity_type, self.qualified_name) - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "guid": str(self.id), "lastModifiedTS": "1", @@ -353,8 +353,8 @@ def to_dict(self) -> dict: class ProjectAttributes(Attributes): def __init__(self, name: str, - children: List[Union[dict, Entity]] = [], - tags: dict = {}, + children: List[Union[Dict, Entity]] = [], + tags: Dict = {}, **kwargs): self.name = name self.tags = tags @@ -371,7 +371,7 @@ def children(self): return self._children @children.setter - def children(self, v: List[Union[dict, Entity]]): + def children(self, v: List[Union[Dict, Entity]]): for f in v: if isinstance(f, Entity): self._children.append(f) @@ -400,7 +400,7 @@ def derived_features(self): return [ e for e in self.children if e.entity_type == EntityType.DerivedFeature] - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "qualifiedName": self.name, "name": self.name, @@ -421,7 +421,7 @@ def __init__(self, preprocessing: Optional[str] = None, event_timestamp_column: Optional[str] = None, timestamp_format: Optional[str] = None, - tags: dict = {}): + tags: Dict = {}): self.qualified_name = qualified_name self.name = name self.type = type @@ -435,7 +435,7 @@ def __init__(self, def entity_type(self) -> EntityType: return EntityType.Source - def to_dict(self) -> dict: + def to_dict(self) -> Dict: ret = { "qualifiedName": self.qualified_name, "name": self.name, @@ -456,9 +456,9 @@ class AnchorAttributes(Attributes): def __init__(self, qualified_name: str, name: str, - # source: Optional[Union[dict, EntityRef, Entity]] = None, - # features: list[Union[dict, EntityRef, Entity]] = [], - tags: dict = {}, + # source: Optional[Union[Dict, EntityRef, Entity]] = None, + # features: list[Union[Dict, EntityRef, Entity]] = [], + tags: Dict = {}, **kwargs): self.qualified_name = qualified_name self.name = name @@ -506,7 +506,7 @@ def features(self, features): else: raise TypeError(f) - def to_dict(self) -> dict: + def to_dict(self) -> Dict: ret = { "qualifiedName": self.qualified_name, "name": self.name, @@ -522,10 +522,10 @@ class AnchorFeatureAttributes(Attributes): def __init__(self, qualified_name: str, name: str, - type: Union[dict, FeatureType], - transformation: Union[dict, Transformation], - key: List[Union[dict, TypedKey]], - tags: dict = {}): + type: Union[Dict, FeatureType], + transformation: Union[Dict, Transformation], + key: List[Union[Dict, TypedKey]], + tags: Dict = {}): self.qualified_name = qualified_name self.name = name self.type = _to_type(type, FeatureType) @@ -537,7 +537,7 @@ def __init__(self, def entity_type(self) -> EntityType: return EntityType.AnchorFeature - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "qualifiedName": self.qualified_name, "name": self.name, @@ -552,12 +552,12 @@ class DerivedFeatureAttributes(Attributes): def __init__(self, qualified_name: str, name: str, - type: Union[dict, FeatureType], - transformation: Union[dict, Transformation], - key: List[Union[dict, TypedKey]], - input_anchor_features: List[Union[dict, EntityRef, Entity]] = [], - input_derived_features: List[Union[dict, EntityRef, Entity]] = [], - tags: dict = {}, + type: Union[Dict, FeatureType], + transformation: Union[Dict, Transformation], + key: List[Union[Dict, TypedKey]], + input_anchor_features: List[Union[Dict, EntityRef, Entity]] = [], + input_derived_features: List[Union[Dict, EntityRef, Entity]] = [], + tags: Dict = {}, **kwargs): self.qualified_name = qualified_name self.name = name @@ -577,7 +577,7 @@ def input_features(self): return self._input_anchor_features + self._input_derived_features @input_features.setter - def input_features(self, input_features_list: Union[dict, Entity, EntityRef]): + def input_features(self, input_features_list: Union[Dict, Entity, EntityRef]): self._input_anchor_features = [] self._input_derived_features = [] for feature in input_features_list: @@ -609,7 +609,7 @@ def input_anchor_features(self): def input_derived_features(self): return self._input_derived_features - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "qualifiedName": self.qualified_name, "name": self.name, @@ -640,7 +640,7 @@ def __eq__(self, o: object) -> bool: def __hash__(self) -> int: return hash((self.from_id, self.to_id, self.conn_type)) - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "relationshipId": str(self.id), "fromEntityId": str(self.from_id), @@ -654,7 +654,7 @@ def __init__(self, entities: List[Entity], edges: List[Edge]): self.entities = dict([(e.id, e) for e in entities]) self.edges = set(edges) - def to_dict(self) -> dict: + def to_dict(self) -> Dict: return { "guidEntityMap": dict([(str(id), self.entities[id].to_dict()) for id in self.entities]), "relations": list([e.to_dict() for e in self.edges]), @@ -662,7 +662,7 @@ def to_dict(self) -> dict: class ProjectDef: - def __init__(self, name: str, qualified_name: str = "", tags: dict = {}): + def __init__(self, name: str, qualified_name: str = "", tags: Dict = {}): self.name = name self.qualified_name = qualified_name self.tags = tags @@ -680,7 +680,7 @@ def __init__(self, preprocessing: Optional[str] = None, event_timestamp_column: Optional[str] = None, timestamp_format: Optional[str] = None, - tags: dict = {}): + tags: Dict = {}): self.qualified_name = qualified_name self.name = name self.path = path @@ -705,7 +705,7 @@ def __init__(self, name: str, source_id: Union[str, UUID], qualified_name: str = "", - tags: dict = {}): + tags: Dict = {}): self.qualified_name = qualified_name self.name = name self.source_id = _to_uuid(source_id) @@ -721,11 +721,11 @@ def to_attr(self, source: EntityRef) -> AnchorAttributes: class AnchorFeatureDef: def __init__(self, name: str, - feature_type: Union[dict, FeatureType], - transformation: Union[dict, Transformation], - key: List[Union[dict, TypedKey]], + feature_type: Union[Dict, FeatureType], + transformation: Union[Dict, Transformation], + key: List[Union[Dict, TypedKey]], qualified_name: str = "", - tags: dict = {}): + tags: Dict = {}): self.qualified_name = qualified_name self.name = name self.feature_type = _to_type(feature_type, FeatureType) @@ -745,13 +745,13 @@ def to_attr(self) -> AnchorFeatureAttributes: class DerivedFeatureDef: def __init__(self, name: str, - feature_type: Union[dict, FeatureType], - transformation: Union[dict, Transformation], - key: List[Union[dict, TypedKey]], + feature_type: Union[Dict, FeatureType], + transformation: Union[Dict, Transformation], + key: List[Union[Dict, TypedKey]], input_anchor_features: List[Union[str, UUID]], input_derived_features: List[Union[str, UUID]], qualified_name: str = "", - tags: dict = {}): + tags: Dict = {}): self.qualified_name = qualified_name self.name = name self.feature_type = _to_type(feature_type, FeatureType) From 5d9014d875445ed63c983e2d6eed563423166269 Mon Sep 17 00:00:00 2001 From: enya-yx Date: Mon, 9 Jan 2023 17:25:28 +0800 Subject: [PATCH 14/17] add purview name for temporary test --- registry/test/test_purview_registry.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/registry/test/test_purview_registry.py b/registry/test/test_purview_registry.py index 8f72ceeea..929572f73 100644 --- a/registry/test/test_purview_registry.py +++ b/registry/test/test_purview_registry.py @@ -10,8 +10,13 @@ class PurviewRegistryTest(unittest.TestCase): def setup(self): purview_name = os.getenv('PURVIEW_NAME') + # set 'PURVIEW_NAME' for temporary test + if purview_name is None: + purview_name = 'feathrazuretest3-purview1' + ''' if purview_name is None: raise RuntimeError("Failed to run Purview registry test case. Cannot get environment variable: 'PURVIEW_NAME'") + ''' self.registry = PurviewRegistry(purview_name) def cleanup(self, ids): From 5a354ab545e5f1ec871465e08139cf3d5b124009 Mon Sep 17 00:00:00 2001 From: enya-yx Date: Mon, 9 Jan 2023 17:28:03 +0800 Subject: [PATCH 15/17] run purview test case for temporary test --- .github/workflows/pull_request_push_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request_push_test.yml b/.github/workflows/pull_request_push_test.yml index 88f27cbbf..c173677be 100644 --- a/.github/workflows/pull_request_push_test.yml +++ b/.github/workflows/pull_request_push_test.yml @@ -288,8 +288,8 @@ jobs: PURVIEW_NAME: ${{secrets.PURVIEW_NAME}} CONNECTION_STR: ${{secrets.CONNECTION_STR}} run: | - pytest --cov-report term-missing --cov=registry/sql-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_sql_registry.py pytest --cov-report term-missing --cov=registry/purview-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_purview_registry.py + # pytest --cov-report term-missing --cov=registry/sql-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_sql_registry.py failure_notification: # If any failure, warning message will be sent From e2ec8cfd5448000b3a296fcc62212fd6218d0fbd Mon Sep 17 00:00:00 2001 From: enya-yx Date: Mon, 9 Jan 2023 18:18:37 +0800 Subject: [PATCH 16/17] Revert "run purview test case for temporary test" This reverts commit 5a354ab545e5f1ec871465e08139cf3d5b124009. --- .github/workflows/pull_request_push_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request_push_test.yml b/.github/workflows/pull_request_push_test.yml index c173677be..88f27cbbf 100644 --- a/.github/workflows/pull_request_push_test.yml +++ b/.github/workflows/pull_request_push_test.yml @@ -288,8 +288,8 @@ jobs: PURVIEW_NAME: ${{secrets.PURVIEW_NAME}} CONNECTION_STR: ${{secrets.CONNECTION_STR}} run: | + pytest --cov-report term-missing --cov=registry/sql-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_sql_registry.py pytest --cov-report term-missing --cov=registry/purview-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_purview_registry.py - # pytest --cov-report term-missing --cov=registry/sql-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_sql_registry.py failure_notification: # If any failure, warning message will be sent From 3cf2ca037aa70dc214b514e9560184337eb8806d Mon Sep 17 00:00:00 2001 From: enya-yx Date: Mon, 9 Jan 2023 18:19:04 +0800 Subject: [PATCH 17/17] Revert "add purview name for temporary test" This reverts commit 5d9014d875445ed63c983e2d6eed563423166269. --- registry/test/test_purview_registry.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/registry/test/test_purview_registry.py b/registry/test/test_purview_registry.py index 929572f73..8f72ceeea 100644 --- a/registry/test/test_purview_registry.py +++ b/registry/test/test_purview_registry.py @@ -10,13 +10,8 @@ class PurviewRegistryTest(unittest.TestCase): def setup(self): purview_name = os.getenv('PURVIEW_NAME') - # set 'PURVIEW_NAME' for temporary test - if purview_name is None: - purview_name = 'feathrazuretest3-purview1' - ''' if purview_name is None: raise RuntimeError("Failed to run Purview registry test case. Cannot get environment variable: 'PURVIEW_NAME'") - ''' self.registry = PurviewRegistry(purview_name) def cleanup(self, ids):