diff --git a/.github/workflows/scala-steward.yml b/.github/workflows/scala-steward.yml deleted file mode 100644 index f472a7dd..00000000 --- a/.github/workflows/scala-steward.yml +++ /dev/null @@ -1,25 +0,0 @@ -on: - schedule: - - cron: '0 0 * * 0' - workflow_dispatch: - -name: Launch Scala Steward - -jobs: - scala-steward: - runs-on: ubuntu-latest - name: Launch Scala Steward - steps: - - name: Configure GPG Key - run: | - mkdir -p ~/.gnupg/ - printf "$GPG_SIGNING_KEY" | base64 --decode > ~/.gnupg/private.key - gpg --pinentry-mode=loopback --passphrase ${{ secrets.GPG_PERSONAL_PASSPHRASE }} --batch --import ~/.gnupg/private.key - env: - GPG_SIGNING_KEY: ${{ secrets.GPG_PERSONAL }} - - name: Launch Scala Steward - uses: scala-steward-org/scala-steward-action@v2 - with: - github-token: ${{ secrets.GH_PERSONAL_TOKEN }} - sign-commits: true - signing-key: ${{ secrets.GPG_PERSONAL_KEY_ID }} \ No newline at end of file diff --git a/.scalafmt.conf b/.scalafmt.conf index 269221cb..c3c6c1b6 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -1,5 +1,5 @@ runner.dialect = scala213source3 -version = "3.5.8" +version = "3.5.9" maxColumn = 120 docstrings.blankFirstLine = no docstrings.style = SpaceAsterisk diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cf37eec..2d5cacff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +### Big Data Types v1.3.0 +- Added BigQuerySchemas as an interface to create Schemas without creating tables +- Updated Scala to Scala 3.1.X + - This version is no longer compatible with Scala 3.0.X + - The Scala version has been upgraded due to some dependencies (Circe - ScalaTest) that have updated Scala to 3.1.X + ### Big Data Types v1.2.0 - New module for Circe (JSON) - Conversion from Circe to other types diff --git a/README.md b/README.md index 04aebcbd..b71abc2b 100644 --- a/README.md +++ b/README.md @@ -14,14 +14,14 @@ Check the [Documentation website](https://data-tools.github.io/big-data-types) t # Available conversions: -| From / To |Scala Types |BigQuery |Spark |Cassandra | Circe (JSON) | -|--------------|:----------------:|:----------------:|:----------------:|:----------------:|:------------:| -| Scala Types | - |:white_check_mark:|:white_check_mark:|:white_check_mark:| | -| BigQuery | | - |:white_check_mark:|:white_check_mark:| | -| Spark | |:white_check_mark:| - |:white_check_mark:| | -| Cassandra | |:white_check_mark:|:white_check_mark:| - | | -| Circe (JSON) | |:white_check_mark:|:white_check_mark:|:white_check_mark:| | +| From / To | | Scala Types | BigQuery | Spark | Cassandra | Circe (JSON) | +|:------------:|:---------------------------------------------------------------------------------------------------------------------:|:-----------:|:------------------:|:------------------:|:------------------:|:------------:| +| Scala | | - | :white_check_mark: | :white_check_mark: | :white_check_mark: | | +| BigQuery | | | - | :white_check_mark: | :white_check_mark: | | +| Spark | | | :white_check_mark: | - | :white_check_mark: | | +| Cassandra | | | :white_check_mark: | :white_check_mark: | - | | +| Circe (JSON) | | | :white_check_mark: | :white_check_mark: | :white_check_mark: | | Versions for Scala ![Scala 2.12](https://img.shields.io/badge/Scala-2.12-red) ,![Scala_2.13](https://img.shields.io/badge/Scala-2.13-red) -and ![Scala 3.0](https://img.shields.io/badge/Scala-3.0-red) are available in Maven +and ![Scala 3.x](https://img.shields.io/badge/Scala-3.x-red) are available in Maven diff --git a/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryDefinitions.scala b/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryDefinitions.scala index 9c222f17..d34da39f 100644 --- a/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryDefinitions.scala +++ b/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryDefinitions.scala @@ -2,6 +2,8 @@ package org.datatools.bigdatatypes.bigquery import com.google.cloud.bigquery.{Schema, StandardTableDefinition, TimePartitioning} import org.datatools.bigdatatypes.bigquery.JavaConverters.toJava +import org.datatools.bigdatatypes.conversions.SqlInstanceConversion +import org.datatools.bigdatatypes.formats.Formats.implicitDefaultFormats private[bigquery] object BigQueryDefinitions { diff --git a/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQuerySchemas.scala b/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQuerySchemas.scala new file mode 100644 index 00000000..90c2f686 --- /dev/null +++ b/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQuerySchemas.scala @@ -0,0 +1,32 @@ +package org.datatools.bigdatatypes.bigquery + +import com.google.cloud.bigquery.Schema +import org.datatools.bigdatatypes.bigquery.JavaConverters.toJava + +/** + * Public API for generating BigQuery Schemas. + * Any type implementing [[SqlTypeToBigQuery]] or [[SqlInstanceToBigQuery]] can be converted into a BigQuery [[Schema]] + * If multiple types are given, the resulting schema will be the concatenation of them. + */ +object BigQuerySchemas { + + /** + * Given any type that implements [[SqlTypeToBigQuery]] returns the BigQuery Schema for that type + * @tparam A is any type implementing [[SqlTypeToBigQuery]] + * @return [[Schema]] ready to be used in BigQuery + */ + def schema[A: SqlTypeToBigQuery]: Schema = BigQueryDefinitions.generateSchema[A] + def schema[A: SqlTypeToBigQuery, B: SqlTypeToBigQuery]: Schema = BigQueryDefinitions.generateSchema[A, B] + def schema[A: SqlTypeToBigQuery, B: SqlTypeToBigQuery, C: SqlTypeToBigQuery]: Schema = BigQueryDefinitions.generateSchema[A, B, C] + def schema[A: SqlTypeToBigQuery, B: SqlTypeToBigQuery, C: SqlTypeToBigQuery, D: SqlTypeToBigQuery]: Schema = BigQueryDefinitions.generateSchema[A, B, C, D] + def schema[A: SqlTypeToBigQuery, B: SqlTypeToBigQuery, C: SqlTypeToBigQuery, D: SqlTypeToBigQuery, E: SqlTypeToBigQuery]: Schema = BigQueryDefinitions.generateSchema[A, B, C, D, E] + + /** + * Given an instance of a Product, extracts the BQ [[Schema]] from its type + * @param value an instance of any Product + * @tparam A is any Product type + * @return [[Schema]] with the same structure as the given input + */ + def schema[A <: Product](value: A)(implicit a: SqlTypeToBigQuery[A]): Schema = + Schema.of(toJava(SqlTypeToBigQuery[A].bigQueryFields)) +} diff --git a/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryTable.scala b/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryTable.scala index ac02acdb..2ffd7b16 100644 --- a/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryTable.scala +++ b/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryTable.scala @@ -14,6 +14,10 @@ import org.datatools.bigdatatypes.bigquery.BigQueryDefinitions.{generateSchema, import scala.util.{Failure, Try} +/** + * Methods in this object are creating real tables into a BigQuery environment. + * If only the schema of the table is desired, please use [[BigQuerySchemas]] + */ object BigQueryTable { lazy val service: BigQuery = BigQueryOptions.getDefaultInstance.getService diff --git a/bigquery/src/main/scala_2.13+/org/datatools/bigdatatypes/bigquery/JavaConverters.scala b/bigquery/src/main/scala_2.13+/org/datatools/bigdatatypes/bigquery/JavaConverters.scala index 332fc1d4..68e04f9b 100644 --- a/bigquery/src/main/scala_2.13+/org/datatools/bigdatatypes/bigquery/JavaConverters.scala +++ b/bigquery/src/main/scala_2.13+/org/datatools/bigdatatypes/bigquery/JavaConverters.scala @@ -5,6 +5,6 @@ import scala.jdk.CollectionConverters.{IterableHasAsJava, IterableHasAsScala} object JavaConverters { - def toJava[A](value: List[A]): lang.Iterable[A] = value.asJava + def toJava[A](value: Seq[A]): lang.Iterable[A] = value.asJava def toScala[A](value: lang.Iterable[A]): List[A] = value.asScala.toList } diff --git a/bigquery/src/main/scala_2.13-/org/datatools/bigdatatypes/bigquery/JavaConverters.scala b/bigquery/src/main/scala_2.13-/org/datatools/bigdatatypes/bigquery/JavaConverters.scala index 02f6f930..6ff08690 100644 --- a/bigquery/src/main/scala_2.13-/org/datatools/bigdatatypes/bigquery/JavaConverters.scala +++ b/bigquery/src/main/scala_2.13-/org/datatools/bigdatatypes/bigquery/JavaConverters.scala @@ -5,6 +5,6 @@ import scala.collection.JavaConverters.{asJavaIterableConverter, iterableAsScala object JavaConverters { - def toJava[A](value: List[A]): lang.Iterable[A] = value.asJava + def toJava[A](value: Seq[A]): lang.Iterable[A] = value.asJava def toScala[A](value: lang.Iterable[A]): List[A] = value.asScala.toList } diff --git a/bigquery/src/test/scala/org/datatools/bigdatatypes/bigquery/BigQuerySchemasSpec.scala b/bigquery/src/test/scala/org/datatools/bigdatatypes/bigquery/BigQuerySchemasSpec.scala new file mode 100644 index 00000000..197a9386 --- /dev/null +++ b/bigquery/src/test/scala/org/datatools/bigdatatypes/bigquery/BigQuerySchemasSpec.scala @@ -0,0 +1,71 @@ +package org.datatools.bigdatatypes.bigquery + +import com.google.cloud.bigquery.Field.Mode +import com.google.cloud.bigquery.{Field, Schema, StandardSQLTypeName} +import org.datatools.bigdatatypes.TestTypes.ListOfStruct +import org.datatools.bigdatatypes.bigquery.JavaConverters.toJava +import org.datatools.bigdatatypes.{BigQueryTestTypes, UnitSpec} +import org.datatools.bigdatatypes.formats.Formats.implicitDefaultFormats +import org.datatools.bigdatatypes.conversions.SqlTypeConversion.* + +class BigQuerySchemasSpec extends UnitSpec { + + val elements1: Seq[Field] = List( + Field.newBuilder("a", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build() + ) + val elements2: Seq[Field] = List( + Field.newBuilder("a", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("b", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build() + ) + val elements3: Seq[Field] = List( + Field.newBuilder("a", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("b", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("c", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build() + ) + val elements4: Seq[Field] = List( + Field.newBuilder("a", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("b", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("c", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("d", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build() + ) + val elements5: Seq[Field] = List( + Field.newBuilder("a", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("b", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("c", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("d", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("e", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build() + ) + case class Simple1(a: Int) + case class Simple2(b: Int) + case class Simple3(c: Int) + case class Simple4(d: Int) + case class Simple5(e: Int) + + behavior of "BigQuerySchemas" + + "Case class with Struct List" should "be converted into BQ Schema" in { + val fields: Schema = BigQuerySchemas.schema[ListOfStruct] + fields shouldBe Schema.of(toJava(BigQueryTestTypes.basicNestedWithList)) + } + + "2 classes" should "be converted into a BQ Schema" in { + BigQuerySchemas.schema[Simple1, Simple2] shouldBe Schema.of(toJava(elements2)) + } + + "3 classes" should "be converted into a BQ Schema" in { + BigQuerySchemas.schema[Simple1, Simple2, Simple3] shouldBe Schema.of(toJava(elements3)) + } + + "4 classes" should "be converted into a BQ Schema" in { + BigQuerySchemas.schema[Simple1, Simple2, Simple3, Simple4] shouldBe Schema.of(toJava(elements4)) + } + + "5 classes" should "be converted into a BQ Schema" in { + BigQuerySchemas.schema[Simple1, Simple2, Simple3, Simple4, Simple5] shouldBe Schema.of(toJava(elements5)) + } + + "An instance" should "be converted into a BQ Schema" in { + val s = Simple1(1) + BigQuerySchemas.schema[Simple1](s) shouldBe Schema.of(toJava(elements1)) + } +} diff --git a/build.sbt b/build.sbt index afed95e2..189dc930 100644 --- a/build.sbt +++ b/build.sbt @@ -1,12 +1,12 @@ //used to build Sonatype releases -lazy val versionNumber = "1.2.0" +lazy val versionNumber = "1.3.0" lazy val projectName = "big-data-types" version := versionNumber name := projectName lazy val scala213 = "2.13.7" lazy val scala212 = "2.12.15" -lazy val scala3 = "3.0.2" +lazy val scala3 = "3.1.0" lazy val supportedScalaVersions = List(scala3, scala213, scala212) scalaVersion := scala213 @@ -39,27 +39,27 @@ lazy val scalacCommon = Seq("-Xsource:3") //Dependencies lazy val coreDependencies2 = Seq( - "ch.qos.logback" % "logback-classic" % "1.2.11", + "ch.qos.logback" % "logback-classic" % "1.4.1", "org.clapper" %% "grizzled-slf4j" % "1.3.4", - "com.chuusai" %% "shapeless" % "2.3.9", + "com.chuusai" %% "shapeless" % "2.3.10", scalatest % Test ) lazy val coreDependencies3 = Seq( - "ch.qos.logback" % "logback-classic" % "1.2.11", + "ch.qos.logback" % "logback-classic" % "1.4.1", "org.clapper" % "grizzled-slf4j_2.13" % "1.3.4", scalatest % Test ) lazy val bigqueryDependencies = Seq( "com.google.auto.value" % "auto-value-annotations" % "1.9", // needed for an incompatibility between BQ & Scala3 - "com.google.cloud" % "google-cloud-bigquery" % "2.13.1", + "com.google.cloud" % "google-cloud-bigquery" % "2.16.1", scalatest % "it,test" ) lazy val sparkDependencies = Seq( - "org.apache.spark" %% "spark-core" % "3.2.1" % Provided, - "org.apache.spark" %% "spark-sql" % "3.2.1" % Provided, + "org.apache.spark" %% "spark-core" % "3.3.0" % Provided, + "org.apache.spark" %% "spark-sql" % "3.3.0" % Provided, scalatest % Test ) @@ -69,14 +69,15 @@ lazy val cassandraDependencies = Seq( scalatest % Test ) -val circeVersion = "0.14.1" +val circeVersion = "0.14.3" + lazy val jsonCirceDependencies = Seq( - "io.circe" %% "circe-core", - "io.circe" %% "circe-generic", - "io.circe" %% "circe-parser" - ).map(_ % circeVersion) + "io.circe" %% "circe-core", + "io.circe" %% "circe-generic", + "io.circe" %% "circe-parser" +).map(_ % circeVersion) -lazy val scalatest = "org.scalatest" %% "scalatest" % "3.2.11" +lazy val scalatest = "org.scalatest" %% "scalatest" % "3.2.13" //Project settings lazy val root = (project in file(".")) diff --git a/docs/Modules/Circe.md b/docs/Modules/Circe.md index c5a8a301..9ceec6ac 100644 --- a/docs/Modules/Circe.md +++ b/docs/Modules/Circe.md @@ -5,7 +5,7 @@ sidebar_position: 6 [Circe](https://circe.github.io/circe/) is a JSON library for Scala. -The Circe module of this library allows to convert `Json` objects (from Circe) to any other type in the library. +The Circe module of this library allows to convert `Json` objects (from [Circe](https://circe.github.io/circe/)) to any other type in the library. :::caution For now only conversions from Circe to other types are available. Other types to Circe are not ready yet. ::: @@ -16,13 +16,16 @@ but more specific types like `integer`, `float` or others do not exists. Because of that, any conversion between types will convert `number` into `Decimal` types, as `Decimal` is the only one that can ensure the precision of any arbitrary number ::: -
About Circe and private types
-

-Circe has more specific types than `JNumber`, like `JLong`, `JDouble` and other, -but all of them are private to Circe itself, so we can not use them, not even for matching types during conversions. -In any case, even if we were able to use them, when parsing a JSON string (probably most of the cases) -we can not detect the specific types -

+
+ About Circe and private types +

+ Circe has more specific types than `JNumber`, like `JLong`, `JDouble` and others, + but all of them are private to Circe itself, so we can not use them, not even for matching types during conversions. + In any case, even if we were able to use them, when parsing a JSON string (probably most of the cases) + we can not detect the specific types, we could only guess them from the data. +

+
+ ```scala import io.Circe.Json diff --git a/docs/intro.md b/docs/intro.md index fbccb59e..68934679 100644 --- a/docs/intro.md +++ b/docs/intro.md @@ -33,12 +33,12 @@ or a BigQuery table into a Cassandra table without having code that relates thos ### Available conversions: -| From / To |Scala Types |BigQuery |Spark |Cassandra | Circe (JSON) | -|--------------|:----------------:|:----------------:|:----------------:|:----------------:|:------------:| -| Scala Types | - |:white_check_mark:|:white_check_mark:|:white_check_mark:| | -| BigQuery | | - |:white_check_mark:|:white_check_mark:| | -| Spark | |:white_check_mark:| - |:white_check_mark:| | -| Cassandra | |:white_check_mark:|:white_check_mark:| - | | -| Circe (JSON) | |:white_check_mark:|:white_check_mark:|:white_check_mark:| | +| From / To | |Scala Types |BigQuery |Spark |Cassandra | Circe (JSON) | +|:---------------:|:---------------------------------------------------------------------------------------------------------------------:|:----------------:|:----------------:|:----------------:|:----------------:|:------------:| +| Scala | | - |:white_check_mark:|:white_check_mark:|:white_check_mark:| | +| BigQuery | | | - |:white_check_mark:|:white_check_mark:| | +| Spark | | |:white_check_mark:| - |:white_check_mark:| | +| Cassandra | | |:white_check_mark:|:white_check_mark:| - | | +| Circe (JSON) | | |:white_check_mark:|:white_check_mark:|:white_check_mark:| | diff --git a/project/plugins.sbt b/project/plugins.sbt index 3c2f8ba3..0833a6e0 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,4 +1,4 @@ -addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.9.3") +addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.0.3") addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.9.13") addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.1.2") addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "1.2.0") diff --git a/website/static/img/logos/bigquery.png b/website/static/img/logos/bigquery.png new file mode 100644 index 00000000..a89625c8 Binary files /dev/null and b/website/static/img/logos/bigquery.png differ diff --git a/website/static/img/logos/cassandra.png b/website/static/img/logos/cassandra.png new file mode 100644 index 00000000..49434aef Binary files /dev/null and b/website/static/img/logos/cassandra.png differ diff --git a/website/static/img/logos/circe.png b/website/static/img/logos/circe.png new file mode 100644 index 00000000..23dbe5fc Binary files /dev/null and b/website/static/img/logos/circe.png differ diff --git a/website/static/img/logos/scala.png b/website/static/img/logos/scala.png new file mode 100644 index 00000000..3e67cafc Binary files /dev/null and b/website/static/img/logos/scala.png differ diff --git a/website/static/img/logos/spark.png b/website/static/img/logos/spark.png new file mode 100644 index 00000000..ec8a301c Binary files /dev/null and b/website/static/img/logos/spark.png differ