From 3dbd7f32cdcb87dd0b7cd4dd9b71c2552b25b30e Mon Sep 17 00:00:00 2001 From: Herman Schaaf Date: Tue, 8 Aug 2023 10:46:56 +0100 Subject: [PATCH 1/2] feat: Add more metadata to tables needed for docs generation (#1129) This adds metadata fields for table titles and parent relationships, so that this information can be sent over the wire in Arrow format. We will use this in the CLI to generate markdown and json docs for source plugins. --- schema/arrow.go | 2 ++ schema/table.go | 26 +++++++++++++++++++------- schema/table_test.go | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/schema/arrow.go b/schema/arrow.go index 56e51de354..8e83d8e994 100644 --- a/schema/arrow.go +++ b/schema/arrow.go @@ -14,6 +14,8 @@ const ( MetadataFalse = "false" MetadataTableName = "cq:table_name" MetadataTableDescription = "cq:table_description" + MetadataTableTitle = "cq:table_title" + MetadataTableDependsOn = "cq:table_depends_on" ) type Schemas []*arrow.Schema diff --git a/schema/table.go b/schema/table.go index 611ffc383a..de984eecd1 100644 --- a/schema/table.go +++ b/schema/table.go @@ -26,7 +26,7 @@ type Transform func(table *Table) error type Tables []*Table -// This is deprecated +// Deprecated: SyncSummary is deprecated. type SyncSummary struct { Resources uint64 Errors uint64 @@ -94,7 +94,7 @@ var ( reValidColumnName = regexp.MustCompile(`^[a-z_][a-z\d_]*$`) ) -// AddCqIds adds the cq_id and cq_parent_id columns to the table and all its relations +// AddCqIDs adds the cq_id and cq_parent_id columns to the table and all its relations // set cq_id as primary key if no other primary keys func AddCqIDs(table *Table) { havePks := len(table.PrimaryKeys()) > 0 @@ -126,9 +126,9 @@ func NewTablesFromArrowSchemas(schemas []*arrow.Schema) (Tables, error) { return tables, nil } -// Create a CloudQuery Table abstraction from an arrow schema -// arrow schema is a low level representation of a table that can be sent -// over the wire in a cross-language way +// NewTableFromArrowSchema creates a CloudQuery Table abstraction from an Arrow schema. +// The Arrow schema is a low level representation of a table that can be sent +// over the wire in a cross-language way. func NewTableFromArrowSchema(sc *arrow.Schema) (*Table, error) { tableMD := sc.Metadata() name, found := tableMD.GetValue(MetadataTableName) @@ -137,6 +137,12 @@ func NewTableFromArrowSchema(sc *arrow.Schema) (*Table, error) { } description, _ := tableMD.GetValue(MetadataTableDescription) constraintName, _ := tableMD.GetValue(MetadataConstraintName) + title, _ := tableMD.GetValue(MetadataTableTitle) + dependsOn, _ := tableMD.GetValue(MetadataTableDependsOn) + var parent *Table + if dependsOn != "" { + parent = &Table{Name: dependsOn} + } fields := sc.Fields() columns := make(ColumnList, len(fields)) for i, field := range fields { @@ -147,6 +153,8 @@ func NewTableFromArrowSchema(sc *arrow.Schema) (*Table, error) { Description: description, PkConstraintName: constraintName, Columns: columns, + Title: title, + Parent: parent, } if isIncremental, found := tableMD.GetValue(MetadataIncremental); found { table.IsIncremental = isIncremental == MetadataTrue @@ -391,12 +399,15 @@ func (t *Table) ToArrowSchema() *arrow.Schema { md := map[string]string{ MetadataTableName: t.Name, MetadataTableDescription: t.Description, + MetadataTableTitle: t.Title, MetadataConstraintName: t.PkConstraintName, - MetadataIncremental: MetadataFalse, } if t.IsIncremental { md[MetadataIncremental] = MetadataTrue } + if t.Parent != nil { + md[MetadataTableDependsOn] = t.Parent.Name + } schemaMd := arrow.MetadataFrom(md) for i, c := range t.Columns { fields[i] = c.ToArrowField() @@ -404,7 +415,7 @@ func (t *Table) ToArrowSchema() *arrow.Schema { return arrow.NewSchema(fields, &schemaMd) } -// Get Changes returns changes between two tables when t is the new one and old is the old one. +// GetChanges returns changes between two tables when t is the new one and old is the old one. func (t *Table) GetChanges(old *Table) []TableColumnChange { var changes []TableColumnChange for _, c := range t.Columns { @@ -475,6 +486,7 @@ func (t *Table) Column(name string) *Column { return nil } +// OverwriteOrAddColumn overwrites or adds columns. // If the column with the same name exists, overwrites it. // Otherwise, adds the column to the beginning of the table. func (t *Table) OverwriteOrAddColumn(column *Column) { diff --git a/schema/table_test.go b/schema/table_test.go index c70c2b3f2b..25d14deb22 100644 --- a/schema/table_test.go +++ b/schema/table_test.go @@ -4,6 +4,7 @@ import ( "testing" "github.com/apache/arrow/go/v13/arrow" + "github.com/cloudquery/plugin-sdk/v4/types" "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/require" ) @@ -346,3 +347,36 @@ func TestTableGetChanges(t *testing.T) { }) } } + +func TestTableToAndFromArrow(t *testing.T) { + // The attributes in this table should all be preserved when converting to and from Arrow. + table := &Table{ + Name: "test_table", + Description: "Test table description", + Title: "Test Table", + Parent: &Table{ + Name: "parent_table", + }, + IsIncremental: true, + Columns: []Column{ + {Name: "bool", Type: arrow.FixedWidthTypes.Boolean}, + {Name: "int", Type: arrow.PrimitiveTypes.Int64}, + {Name: "float", Type: arrow.PrimitiveTypes.Float64}, + {Name: "string", Type: arrow.BinaryTypes.String}, + {Name: "json", Type: types.ExtensionTypes.JSON}, + {Name: "unique", Type: arrow.BinaryTypes.String, Unique: true}, + {Name: "primary_key", Type: arrow.BinaryTypes.String, PrimaryKey: true}, + {Name: "not_null", Type: arrow.BinaryTypes.String, NotNull: true}, + {Name: "incremental_key", Type: arrow.BinaryTypes.String, IncrementalKey: true}, + {Name: "multiple_attributes", Type: arrow.BinaryTypes.String, PrimaryKey: true, IncrementalKey: true, NotNull: true, Unique: true}, + }, + } + arrowSchema := table.ToArrowSchema() + tableFromArrow, err := NewTableFromArrowSchema(arrowSchema) + if err != nil { + t.Fatal(err) + } + if diff := cmp.Diff(table, tableFromArrow); diff != "" { + t.Errorf("diff (+got, -want): %v", diff) + } +} From ed96059e82c98435e8f365b847dc28f7cdd5f3b1 Mon Sep 17 00:00:00 2001 From: CloudQuery Bot <102256036+cq-bot@users.noreply.github.com> Date: Tue, 8 Aug 2023 14:00:32 +0300 Subject: [PATCH 2/2] chore(main): Release v4.3.0 (#1134) :robot: I have created a release *beep* *boop* --- ## [4.3.0](https://github.com/cloudquery/plugin-sdk/compare/v4.2.6...v4.3.0) (2023-08-08) ### Features * Add more metadata to tables needed for docs generation ([#1129](https://github.com/cloudquery/plugin-sdk/issues/1129)) ([3dbd7f3](https://github.com/cloudquery/plugin-sdk/commit/3dbd7f32cdcb87dd0b7cd4dd9b71c2552b25b30e)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0b5b224e3..a71dc3657b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [4.3.0](https://github.com/cloudquery/plugin-sdk/compare/v4.2.6...v4.3.0) (2023-08-08) + + +### Features + +* Add more metadata to tables needed for docs generation ([#1129](https://github.com/cloudquery/plugin-sdk/issues/1129)) ([3dbd7f3](https://github.com/cloudquery/plugin-sdk/commit/3dbd7f32cdcb87dd0b7cd4dd9b71c2552b25b30e)) + ## [4.2.6](https://github.com/cloudquery/plugin-sdk/compare/v4.2.5...v4.2.6) (2023-08-08)