from dataclasses import replace from codegen.jsonschema import Schema REMOVED_FIELDS = { # fields that were deprecated a long time ago "resources.Pipeline": { # 'trigger' is deprecated, use 'continuous' or schedule pipeline refresh using job instead "trigger", }, "pipelines.PipelineLibrary": [ # 'whl' is deprecated, install libraries through notebooks and %pip command "whl", ], } EXTRA_REQUIRED_FIELDS: dict[str, list[str]] = { "jobs.SparkJarTask": ["main_class_name"], } def add_extra_required_fields(schemas: dict[str, Schema]): output = {} for name, schema in schemas.items(): if extra_required := EXTRA_REQUIRED_FIELDS.get(name): new_required = [*schema.required, *extra_required] new_required = list(set(new_required)) if set(new_required) == set(schema.required): raise ValueError( f"Extra required fields for {name} are already present in the schema" ) new_schema = replace(schema, required=new_required) output[name] = new_schema else: output[name] = schema return output def remove_unsupported_fields(schemas: dict[str, Schema]): output = {} for name, schema in schemas.items(): if removed_fields := REMOVED_FIELDS.get(name): new_properties = { field: prop for field, prop in schema.properties.items() if field not in removed_fields } if new_properties.keys() == schema.properties.keys(): raise ValueError(f"No fields to remove in schema {name}") new_schema = replace(schema, properties=new_properties) output[name] = new_schema else: output[name] = schema return output