diff --git a/jsonschema/docs/.snapshots/TestFileDestination.md b/jsonschema/docs/.snapshots/TestFileDestination.md new file mode 100644 index 0000000..ff9eafb --- /dev/null +++ b/jsonschema/docs/.snapshots/TestFileDestination.md @@ -0,0 +1,89 @@ +# Table of contents + +* [`Spec`](#Spec) + * [`Spec`](#Spec-1) + * [`Spec`](#Spec-2) + * [`Spec`](#Spec-3) + * [`Duration`](#Duration) + +## Spec + +* `format` (`string`) (required) (possible values: `csv`, `json`, `parquet`) + + Output format. + +* `format_spec` ([`Spec`](#Spec-1), [`Spec`](#Spec-2) or [`Spec`](#Spec-3)) (nullable) + +* `compression` (`string`) (possible values: ` `, `gzip`) + + Compression type. + Empty or missing stands for no compression. + +* `path` (`string`) (required) + + Path template string that determines where files will be written. + + The path supports the following placeholder variables: + - `{{TABLE}}` will be replaced with the table name + - `{{FORMAT}}` will be replaced with the file format, such as `csv`, `json` or `parquet`. If compression is enabled, the format will be `csv.gz`, `json.gz` etc. + - `{{UUID}}` will be replaced with a random UUID to uniquely identify each file + - `{{YEAR}}` will be replaced with the current year in `YYYY` format + - `{{MONTH}}` will be replaced with the current month in `MM` format + - `{{DAY}}` will be replaced with the current day in `DD` format + - `{{HOUR}}` will be replaced with the current hour in `HH` format + - `{{MINUTE}}` will be replaced with the current minute in `mm` format + + **Note** that timestamps are in `UTC` and will be the current time at the time the file is written, not when the sync started. + +* `no_rotate` (`boolean`) (default: `false`) + + If set to `true`, the plugin will write to one file per table. + Otherwise, for every batch a new file will be created with a different `.` suffix. + +* `batch_size` (`integer`) (nullable) (range: `[1,+∞)`) (default: `10000`) + + This parameter controls the maximum amount of items may be grouped together to be written in a single write. + + Defaults to `10000` unless `no_rotate` is `true` (will be `0` then). + +* `batch_size_bytes` (`integer`) (nullable) (range: `[1,+∞)`) (default: `52428800`) + + This parameter controls the maximum size of items that may be grouped together to be written in a single write. + + Defaults to `52428800` (50 MiB) unless `no_rotate` is `true` (will be `0` then). + +* `batch_timeout` ([`Duration`](#Duration)) (nullable) (default: `30s`) + + This parameter controls the maximum interval between batch writes. + + Defaults to `30s` unless `no_rotate` is `true` (will be `0s` then). + +### Spec + + CloudQuery CSV file output spec. + +* `skip_header` (`boolean`) (default: `false`) + + Specifies if the first line of a file should be the header. + +* `delimiter` (`string`) ([pattern](https://json-schema.org/draft/2020-12/json-schema-validation#section-6.3.3): `^.$`) (default: `,`) + + Character that will be used as the delimiter. + +### Spec + + CloudQuery JSON file output spec. + +(`object`) + +### Spec + + CloudQuery Parquet file output spec. + +(`object`) + +### Duration + +CloudQuery configtype.Duration + +(`string`) ([pattern](https://json-schema.org/draft/2020-12/json-schema-validation#section-6.3.3): `^[-+]?([0-9]*(\\.[0-9]*)?[a-z]+)+$`) diff --git a/jsonschema/docs/.snapshots/TestFiletypes.md b/jsonschema/docs/.snapshots/TestFiletypes.md index 0ac10bb..5728366 100644 --- a/jsonschema/docs/.snapshots/TestFiletypes.md +++ b/jsonschema/docs/.snapshots/TestFiletypes.md @@ -1,9 +1,9 @@ # Table of contents * [`FileSpec`](#FileSpec) - * [`CSVSpec`](#CSVSpec) - * [`JSONSpec`](#JSONSpec) - * [`ParquetSpec`](#ParquetSpec) + * [`Spec`](#Spec) + * [`Spec`](#Spec-1) + * [`Spec`](#Spec-2) ## FileSpec @@ -11,14 +11,14 @@ Output format. -* `format_spec` ([`CSVSpec`](#CSVSpec), [`JSONSpec`](#JSONSpec) or [`ParquetSpec`](#ParquetSpec)) (nullable) +* `format_spec` ([`Spec`](#Spec), [`Spec`](#Spec-1) or [`Spec`](#Spec-2)) (nullable) * `compression` (`string`) (possible values: ` `, `gzip`) Compression type. Empty or missing stands for no compression. -### CSVSpec +### Spec CloudQuery CSV file output spec. @@ -30,13 +30,13 @@ Character that will be used as the delimiter. -### JSONSpec +### Spec CloudQuery JSON file output spec. (`object`) -### ParquetSpec +### Spec CloudQuery Parquet file output spec. diff --git a/jsonschema/docs/docs_test.go b/jsonschema/docs/docs_test.go index cfc422a..5183f22 100644 --- a/jsonschema/docs/docs_test.go +++ b/jsonschema/docs/docs_test.go @@ -44,3 +44,7 @@ func TestClickHouse(t *testing.T) { func TestFiletypes(t *testing.T) { genSnapshot(t, "testdata/filetypes.json") } + +func TestFileDestination(t *testing.T) { + genSnapshot(t, "testdata/file-destination.json") +} diff --git a/jsonschema/docs/testdata/file-destination.json b/jsonschema/docs/testdata/file-destination.json new file mode 100644 index 0000000..ffbfe0e --- /dev/null +++ b/jsonschema/docs/testdata/file-destination.json @@ -0,0 +1,266 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/cloudquery/cloudquery/plugins/destination/file/client/spec/spec", + "$ref": "#/$defs/Spec", + "$defs": { + "Duration": { + "type": "string", + "pattern": "^[-+]?([0-9]*(\\.[0-9]*)?[a-z]+)+$", + "title": "CloudQuery configtype.Duration" + }, + "Spec": { + "allOf": [ + { + "if": { + "properties": { + "no_rotate": { + "type": "boolean", + "const": true + } + }, + "required": [ + "no_rotate" + ] + }, + "then": { + "not": { + "properties": { + "path": { + "type": "string", + "pattern": "^.*\\{\\{UUID\\}\\}.*$" + } + }, + "title": "Require {{UUID}} to be present in path" + } + }, + "title": "Disallow {{UUID}} in path when using no_rotate" + }, + { + "if": { + "properties": { + "no_rotate": { + "type": "boolean", + "const": true + } + }, + "required": [ + "no_rotate" + ] + }, + "then": { + "properties": { + "batch_size": { + "type": "null" + }, + "batch_size_bytes": { + "type": "null" + }, + "batch_timeout": { + "type": "null" + } + } + }, + "title": "Disallow batching when using no_rotate" + }, + { + "if": { + "properties": { + "no_rotate": { + "type": "boolean", + "const": false + } + }, + "title": "Disallow setting no_rotate to true" + }, + "then": { + "properties": { + "path": { + "type": "string", + "pattern": "^.*\\{\\{UUID\\}\\}.*$" + } + }, + "title": "Require {{UUID}} to be present in path" + }, + "title": "Require {{UUID}} in path when batching" + } + ], + "oneOf": [ + { + "properties": { + "format": { + "type": "string", + "const": "csv" + }, + "format_spec": { + "oneOf": [ + { + "$ref": "#/$defs/Spec-1" + }, + { + "type": "null" + } + ] + } + } + }, + { + "properties": { + "format": { + "type": "string", + "const": "json" + }, + "format_spec": { + "oneOf": [ + { + "$ref": "#/$defs/Spec-2" + }, + { + "type": "null" + } + ] + } + } + }, + { + "properties": { + "format": { + "type": "string", + "const": "parquet" + }, + "format_spec": { + "oneOf": [ + { + "$ref": "#/$defs/Spec-3" + }, + { + "type": "null" + } + ] + } + } + } + ], + "properties": { + "format": { + "type": "string", + "enum": [ + "csv", + "json", + "parquet" + ], + "description": "Output format." + }, + "format_spec": { + "oneOf": [ + { + "anyOf": [ + { + "$ref": "#/$defs/Spec-1" + }, + { + "$ref": "#/$defs/Spec-2" + }, + { + "$ref": "#/$defs/Spec-3" + } + ] + }, + { + "type": "null" + } + ] + }, + "compression": { + "type": "string", + "enum": [ + "", + "gzip" + ], + "description": "Compression type.\nEmpty or missing stands for no compression." + }, + "path": { + "type": "string", + "minLength": 1, + "description": "Path template string that determines where files will be written.\n\nThe path supports the following placeholder variables:\n- `{{TABLE}}` will be replaced with the table name\n- `{{FORMAT}}` will be replaced with the file format, such as `csv`, `json` or `parquet`. If compression is enabled, the format will be `csv.gz`, `json.gz` etc.\n- `{{UUID}}` will be replaced with a random UUID to uniquely identify each file\n- `{{YEAR}}` will be replaced with the current year in `YYYY` format\n- `{{MONTH}}` will be replaced with the current month in `MM` format\n- `{{DAY}}` will be replaced with the current day in `DD` format\n- `{{HOUR}}` will be replaced with the current hour in `HH` format\n- `{{MINUTE}}` will be replaced with the current minute in `mm` format\n\n **Note** that timestamps are in `UTC` and will be the current time at the time the file is written, not when the sync started." + }, + "no_rotate": { + "type": "boolean", + "description": "If set to `true`, the plugin will write to one file per table.\nOtherwise, for every batch a new file will be created with a different `.\u003cUUID\u003e` suffix.", + "default": false + }, + "batch_size": { + "oneOf": [ + { + "type": "integer", + "minimum": 1, + "description": "This parameter controls the maximum amount of items may be grouped together to be written in a single write.\n\nDefaults to `10000` unless `no_rotate` is `true` (will be `0` then).", + "default": 10000 + }, + { + "type": "null" + } + ] + }, + "batch_size_bytes": { + "oneOf": [ + { + "type": "integer", + "minimum": 1, + "description": "This parameter controls the maximum size of items that may be grouped together to be written in a single write.\n\nDefaults to `52428800` (50 MiB) unless `no_rotate` is `true` (will be `0` then).", + "default": 52428800 + }, + { + "type": "null" + } + ] + }, + "batch_timeout": { + "oneOf": [ + { + "$ref": "#/$defs/Duration", + "description": "This parameter controls the maximum interval between batch writes.\n\nDefaults to `30s` unless `no_rotate` is `true` (will be `0s` then).", + "default": "30s" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "format", + "path" + ] + }, + "Spec-1": { + "properties": { + "skip_header": { + "type": "boolean", + "description": "Specifies if the first line of a file should be the header.", + "default": false + }, + "delimiter": { + "type": "string", + "pattern": "^.$", + "description": "Character that will be used as the delimiter.", + "default": "," + } + }, + "additionalProperties": false, + "type": "object", + "description": "CloudQuery CSV file output spec." + }, + "Spec-2": { + "additionalProperties": false, + "type": "object", + "description": "CloudQuery JSON file output spec." + }, + "Spec-3": { + "additionalProperties": false, + "type": "object", + "description": "CloudQuery Parquet file output spec." + } + } +} diff --git a/jsonschema/docs/testdata/filetypes.json b/jsonschema/docs/testdata/filetypes.json index e0a6bca..cc2193c 100644 --- a/jsonschema/docs/testdata/filetypes.json +++ b/jsonschema/docs/testdata/filetypes.json @@ -4,37 +4,6 @@ "$ref": "#/$defs/FileSpec", "$defs": { "FileSpec": { - "$id": "/schemas/FileSpec", - "$defs": { - "CSVSpec": { - "properties": { - "skip_header": { - "type": "boolean", - "description": "Specifies if the first line of a file should be the header.", - "default": false - }, - "delimiter": { - "type": "string", - "pattern": "^.$", - "description": "Character that will be used as the delimiter.", - "default": "," - } - }, - "additionalProperties": false, - "type": "object", - "description": "CloudQuery CSV file output spec." - }, - "JSONSpec": { - "additionalProperties": false, - "type": "object", - "description": "CloudQuery JSON file output spec." - }, - "ParquetSpec": { - "additionalProperties": false, - "type": "object", - "description": "CloudQuery Parquet file output spec." - } - }, "oneOf": [ { "properties": { @@ -45,7 +14,7 @@ "format_spec": { "oneOf": [ { - "$ref": "#/$defs/CSVSpec" + "$ref": "#/$defs/Spec" }, { "type": "null" @@ -63,7 +32,7 @@ "format_spec": { "oneOf": [ { - "$ref": "#/$defs/JSONSpec" + "$ref": "#/$defs/Spec-1" }, { "type": "null" @@ -81,7 +50,7 @@ "format_spec": { "oneOf": [ { - "$ref": "#/$defs/ParquetSpec" + "$ref": "#/$defs/Spec-2" }, { "type": "null" @@ -106,13 +75,13 @@ { "anyOf": [ { - "$ref": "#/$defs/CSVSpec" + "$ref": "#/$defs/Spec" }, { - "$ref": "#/$defs/JSONSpec" + "$ref": "#/$defs/Spec-1" }, { - "$ref": "#/$defs/ParquetSpec" + "$ref": "#/$defs/Spec-2" } ] }, @@ -135,6 +104,34 @@ "required": [ "format" ] + }, + "Spec": { + "properties": { + "skip_header": { + "type": "boolean", + "description": "Specifies if the first line of a file should be the header.", + "default": false + }, + "delimiter": { + "type": "string", + "pattern": "^.$", + "description": "Character that will be used as the delimiter.", + "default": "," + } + }, + "additionalProperties": false, + "type": "object", + "description": "CloudQuery CSV file output spec." + }, + "Spec-1": { + "additionalProperties": false, + "type": "object", + "description": "CloudQuery JSON file output spec." + }, + "Spec-2": { + "additionalProperties": false, + "type": "object", + "description": "CloudQuery Parquet file output spec." } } }