diff --git a/.librarian/state.yaml b/.librarian/state.yaml index 8d67105e3..71bcf16ad 100644 --- a/.librarian/state.yaml +++ b/.librarian/state.yaml @@ -1,7 +1,7 @@ image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620 libraries: - id: google-cloud-bigquery - version: 3.39.0 + version: 3.40.0 last_generated_commit: "" apis: [] source_roots: diff --git a/CHANGELOG.md b/CHANGELOG.md index 4cf177cc5..242165933 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.40.0](https://github.com/googleapis/google-cloud-python/compare/google-cloud-bigquery-v3.39.0...google-cloud-bigquery-v3.40.0) (2026-01-08) + + +### Features + +* support load_table and list_rows with picosecond timestamp (#2351) ([46764a59ca7a21ed14ad2c91eb7f98c302736c22](https://github.com/googleapis/google-cloud-python/commit/46764a59ca7a21ed14ad2c91eb7f98c302736c22)) +* support timestamp_precision in table schema (#2333) ([8d5785aea50b9f9e5b13bd4c91e8a08d6dac7778](https://github.com/googleapis/google-cloud-python/commit/8d5785aea50b9f9e5b13bd4c91e8a08d6dac7778)) + ## [3.39.0](https://github.com/googleapis/google-cloud-python/compare/google-cloud-bigquery-v3.38.0...google-cloud-bigquery-v3.39.0) (2025-12-12) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index c7d7705e0..a35fe1677 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -32,6 +32,8 @@ from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes +from google.cloud.bigquery import enums + from google.auth import credentials as ga_credentials # type: ignore from google.api_core import client_options as client_options_lib @@ -252,11 +254,15 @@ def bytes_to_py(self, value, field): if _not_null(value, field): return base64.standard_b64decode(_to_bytes(value)) - def timestamp_to_py(self, value, field): - """Coerce 'value' to a datetime, if set or not nullable.""" + def timestamp_to_py(self, value, field) -> Union[datetime.datetime, str, None]: + """Coerce 'value' to a datetime, if set or not nullable. If timestamp + is of picosecond precision, preserve the string format.""" + if field.timestamp_precision == enums.TimestampPrecision.PICOSECOND: + return value if _not_null(value, field): # value will be a integer in seconds, to microsecond precision, in UTC. return _datetime_from_microseconds(int(value)) + return None def datetime_to_py(self, value, field): """Coerce 'value' to a datetime, if set or not nullable. diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 27e90246f..30f89759e 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -49,6 +49,7 @@ import google.api_core.exceptions as core_exceptions from google.api_core import retry as retries +from google.cloud.bigquery import enums from google.cloud.bigquery import job import google.cloud.bigquery.job.query import google.cloud.bigquery.query @@ -265,6 +266,7 @@ def _to_query_request( query: str, location: Optional[str] = None, timeout: Optional[float] = None, + timestamp_precision: Optional[enums.TimestampPrecision] = None, ) -> Dict[str, Any]: """Transform from Job resource to QueryRequest resource. @@ -285,10 +287,15 @@ def _to_query_request( # Default to standard SQL. request_body.setdefault("useLegacySql", False) - # Since jobs.query can return results, ensure we use the lossless timestamp - # format. See: https://github.com/googleapis/python-bigquery/issues/395 request_body.setdefault("formatOptions", {}) - request_body["formatOptions"]["useInt64Timestamp"] = True # type: ignore + + # Cannot specify both use_int64_timestamp and timestamp_output_format. + if timestamp_precision == enums.TimestampPrecision.PICOSECOND: + request_body["formatOptions"]["timestampOutputFormat"] = "ISO8601_STRING" # type: ignore + else: + # Since jobs.query can return results, ensure we use the lossless + # timestamp format. See: https://github.com/googleapis/python-bigquery/issues/395 + request_body["formatOptions"]["useInt64Timestamp"] = True # type: ignore if timeout is not None: # Subtract a buffer for context switching, network latency, etc. @@ -370,6 +377,7 @@ def query_jobs_query( retry: retries.Retry, timeout: Optional[float], job_retry: Optional[retries.Retry], + timestamp_precision: Optional[enums.TimestampPrecision] = None, ) -> job.QueryJob: """Initiate a query using jobs.query with jobCreationMode=JOB_CREATION_REQUIRED. @@ -377,7 +385,11 @@ def query_jobs_query( """ path = _to_query_path(project) request_body = _to_query_request( - query=query, job_config=job_config, location=location, timeout=timeout + query=query, + job_config=job_config, + location=location, + timeout=timeout, + timestamp_precision=timestamp_precision, ) def do_query(): diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index c50e7c2d7..e3a3cdb11 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3469,6 +3469,8 @@ def query( timeout: TimeoutType = DEFAULT_TIMEOUT, job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY, api_method: Union[str, enums.QueryApiMethod] = enums.QueryApiMethod.INSERT, + *, + timestamp_precision: Optional[enums.TimestampPrecision] = None, ) -> job.QueryJob: """Run a SQL query. @@ -3524,6 +3526,11 @@ def query( See :class:`google.cloud.bigquery.enums.QueryApiMethod` for details on the difference between the query start methods. + timestamp_precision (Optional[enums.TimestampPrecision]): + [Private Preview] If set to `enums.TimestampPrecision.PICOSECOND`, + timestamp columns of picosecond precision will be returned with + full precision. Otherwise, will truncate to microsecond + precision. Only applies when api_method == `enums.QueryApiMethod.QUERY`. Returns: google.cloud.bigquery.job.QueryJob: A new query job instance. @@ -3543,6 +3550,15 @@ def query( "`job_id` was provided, but the 'QUERY' `api_method` was requested." ) + if ( + timestamp_precision == enums.TimestampPrecision.PICOSECOND + and api_method != enums.QueryApiMethod.QUERY + ): + raise ValueError( + "Picosecond Timestamp is only supported when `api_method " + "== enums.QueryApiMethod.QUERY`." + ) + if project is None: project = self.project @@ -3568,6 +3584,7 @@ def query( retry, timeout, job_retry, + timestamp_precision=timestamp_precision, ) elif api_method == enums.QueryApiMethod.INSERT: return _job_helpers.query_jobs_insert( @@ -4062,6 +4079,8 @@ def list_rows( page_size: Optional[int] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + *, + timestamp_precision: Optional[enums.TimestampPrecision] = None, ) -> RowIterator: """List the rows of the table. @@ -4110,6 +4129,11 @@ def list_rows( before using ``retry``. If multiple requests are made under the hood, ``timeout`` applies to each individual request. + timestamp_precision (Optional[enums.TimestampPrecision]): + [Private Preview] If set to `enums.TimestampPrecision.PICOSECOND`, + timestamp columns of picosecond precision will be returned with + full precision. Otherwise, will truncate to microsecond + precision. Returns: google.cloud.bigquery.table.RowIterator: @@ -4143,7 +4167,12 @@ def list_rows( if start_index is not None: params["startIndex"] = start_index - params["formatOptions.useInt64Timestamp"] = True + # Cannot specify both use_int64_timestamp and timestamp_output_format. + if timestamp_precision == enums.TimestampPrecision.PICOSECOND: + params["formatOptions.timestampOutputFormat"] = "ISO8601_STRING" + else: + params["formatOptions.useInt64Timestamp"] = True + row_iterator = RowIterator( client=self, api_request=functools.partial(self._call_api, retry, timeout=timeout), diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 1b1eb241a..dc67f9674 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -480,3 +480,18 @@ class SourceColumnMatch(str, enum.Enum): NAME = "NAME" """Matches by name. This reads the header row as column names and reorders columns to match the field names in the schema.""" + + +class TimestampPrecision(enum.Enum): + """Precision (maximum number of total digits in base 10) for seconds of + TIMESTAMP type.""" + + MICROSECOND = None + """ + Default, for TIMESTAMP type with microsecond precision. + """ + + PICOSECOND = 12 + """ + For TIMESTAMP type with picosecond precision. + """ diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 8cdb779ac..9c74f7124 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -759,6 +759,36 @@ def column_name_character_map(self, value: Optional[str]): value = ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED self._set_sub_prop("columnNameCharacterMap", value) + @property + def timestamp_target_precision(self) -> Optional[List[int]]: + """Optional[list[int]]: [Private Preview] Precisions (maximum number of + total digits in base 10) for seconds of TIMESTAMP types that are + allowed to the destination table for autodetection mode. + + Available for the formats: CSV. + + For the CSV Format, Possible values include: + None, [], or [6]: timestamp(6) for all auto detected TIMESTAMP + columns. + [6, 12]: timestamp(6) for all auto detected TIMESTAMP columns that + have less than 6 digits of subseconds. timestamp(12) for all auto + detected TIMESTAMP columns that have more than 6 digits of + subseconds. + [12]: timestamp(12) for all auto detected TIMESTAMP columns. + + The order of the elements in this array is ignored. Inputs that have + higher precision than the highest target precision in this array will + be truncated. + """ + return self._get_sub_prop("timestampTargetPrecision") + + @timestamp_target_precision.setter + def timestamp_target_precision(self, value: Optional[List[int]]): + if value is not None: + self._set_sub_prop("timestampTargetPrecision", value) + else: + self._del_sub_prop("timestampTargetPrecision") + class LoadJob(_AsyncJob): """Asynchronous job for loading data into a table. diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 456730b00..1809df21f 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -196,6 +196,14 @@ class SchemaField(object): Only valid for top-level schema fields (not nested fields). If the type is FOREIGN, this field is required. + + timestamp_precision: Optional[enums.TimestampPrecision] + Precision (maximum number of total digits in base 10) for seconds + of TIMESTAMP type. + + Defaults to `enums.TimestampPrecision.MICROSECOND` (`None`) for + microsecond precision. Use `enums.TimestampPrecision.PICOSECOND` + (`12`) for picosecond precision. """ def __init__( @@ -213,6 +221,7 @@ def __init__( range_element_type: Union[FieldElementType, str, None] = None, rounding_mode: Union[enums.RoundingMode, str, None] = None, foreign_type_definition: Optional[str] = None, + timestamp_precision: Optional[enums.TimestampPrecision] = None, ): self._properties: Dict[str, Any] = { "name": name, @@ -237,6 +246,13 @@ def __init__( if isinstance(policy_tags, PolicyTagList) else None ) + if isinstance(timestamp_precision, enums.TimestampPrecision): + self._properties["timestampPrecision"] = timestamp_precision.value + elif timestamp_precision is not None: + raise ValueError( + "timestamp_precision must be class enums.TimestampPrecision " + f"or None, got {type(timestamp_precision)} instead." + ) if isinstance(range_element_type, str): self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): @@ -254,15 +270,22 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. Args: - api_repr (Mapping[str, str]): The serialized representation - of the SchemaField, such as what is output by - :meth:`to_api_repr`. + api_repr (dict): The serialized representation of the SchemaField, + such as what is output by :meth:`to_api_repr`. Returns: google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object. """ placeholder = cls("this_will_be_replaced", "PLACEHOLDER") + # The API would return a string despite we send an integer. To ensure + # success of resending received schema, we convert string to integer + # to ensure consistency. + try: + api_repr["timestampPrecision"] = int(api_repr["timestampPrecision"]) + except (TypeError, KeyError): + pass + # Note: we don't make a copy of api_repr because this can cause # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD # fields. See https://github.com/googleapis/python-bigquery/issues/6 @@ -374,6 +397,16 @@ def policy_tags(self): resource = self._properties.get("policyTags") return PolicyTagList.from_api_repr(resource) if resource is not None else None + @property + def timestamp_precision(self) -> enums.TimestampPrecision: + """Precision (maximum number of total digits in base 10) for seconds of + TIMESTAMP type. + + Returns: + enums.TimestampPrecision: value of TimestampPrecision. + """ + return enums.TimestampPrecision(self._properties.get("timestampPrecision")) + def to_api_repr(self) -> dict: """Return a dictionary representing this schema field. @@ -408,6 +441,8 @@ def _key(self): None if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) ) + timestamp_precision = self._properties.get("timestampPrecision") + return ( self.name, field_type, @@ -417,6 +452,7 @@ def _key(self): self.description, self.fields, policy_tags, + timestamp_precision, ) def to_standard_sql(self) -> standard_sql.StandardSqlField: @@ -467,10 +503,9 @@ def __hash__(self): return hash(self._key()) def __repr__(self): - key = self._key() - policy_tags = key[-1] + *initial_tags, policy_tags, timestamp_precision_tag = self._key() policy_tags_inst = None if policy_tags is None else PolicyTagList(policy_tags) - adjusted_key = key[:-1] + (policy_tags_inst,) + adjusted_key = (*initial_tags, policy_tags_inst, timestamp_precision_tag) return f"{self.__class__.__name__}{adjusted_key}" @@ -530,9 +565,11 @@ def _to_schema_fields(schema): if isinstance(schema, Sequence): # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields return [ - field - if isinstance(field, SchemaField) - else SchemaField.from_api_repr(field) + ( + field + if isinstance(field, SchemaField) + else SchemaField.from_api_repr(field) + ) for field in schema ] diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 1d5e35889..6b0fa0fba 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.39.0" +__version__ = "3.40.0" diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 8955e0cfd..ec5c7f2af 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -41,4 +41,4 @@ Shapely==2.1.2; python_version >= '3.10' six==1.17.0 typing-extensions==4.15.0 typing-inspect==0.9.0 -urllib3==2.5.0 +urllib3==2.6.0 diff --git a/tests/data/pico.csv b/tests/data/pico.csv new file mode 100644 index 000000000..bcc853040 --- /dev/null +++ b/tests/data/pico.csv @@ -0,0 +1,3 @@ +2025-01-01T00:00:00.123456789012Z +2025-01-02T00:00:00.123456789012Z +2025-01-03T00:00:00.123456789012Z \ No newline at end of file diff --git a/tests/data/pico_schema.json b/tests/data/pico_schema.json new file mode 100644 index 000000000..8227917ea --- /dev/null +++ b/tests/data/pico_schema.json @@ -0,0 +1,8 @@ +[ + { + "name": "pico_col", + "type": "TIMESTAMP", + "mode": "NULLABLE", + "timestampPrecision": "12" + } +] diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 8efa042af..123aeb6e7 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -98,12 +98,14 @@ def load_scalars_table( data_path: str = "scalars.jsonl", source_format=enums.SourceFormat.NEWLINE_DELIMITED_JSON, schema_source="scalars_schema.json", + timestamp_target_precision=None, ) -> str: schema = bigquery_client.schema_from_json(DATA_DIR / schema_source) table_id = data_path.replace(".", "_") + hex(random.randrange(1000000)) job_config = bigquery.LoadJobConfig() job_config.schema = schema job_config.source_format = source_format + job_config.timestamp_target_precision = timestamp_target_precision full_table_id = f"{project_id}.{dataset_id}.{table_id}" with open(DATA_DIR / data_path, "rb") as data_file: job = bigquery_client.load_table_from_file( @@ -169,6 +171,23 @@ def scalars_table_csv( bigquery_client.delete_table(full_table_id, not_found_ok=True) +@pytest.fixture(scope="session") +def scalars_table_pico( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +): + full_table_id = load_scalars_table( + bigquery_client, + project_id, + dataset_id, + data_path="pico.csv", + source_format=enums.SourceFormat.CSV, + schema_source="pico_schema.json", + timestamp_target_precision=[12], + ) + yield full_table_id + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + @pytest.fixture def test_table_name(request, replace_non_anum=re.compile(r"[^a-zA-Z0-9_]").sub): return replace_non_anum("_", request.node.name) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 6584ca03c..7e773598e 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -74,6 +74,16 @@ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] +SCHEMA_PICOSECOND = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField( + "time_pico", + "TIMESTAMP", + mode="REQUIRED", + timestamp_precision=enums.TimestampPrecision.PICOSECOND, + ), +] CLUSTERING_SCHEMA = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), @@ -631,6 +641,19 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self): self.assertEqual(time_partitioning.field, "transaction_time") self.assertEqual(table.clustering_fields, ["user_email", "store_code"]) + def test_create_table_w_picosecond_timestamp(self): + dataset = self.temp_dataset(_make_dataset_id("create_table")) + table_id = "test_table" + table_arg = Table(dataset.table(table_id), schema=SCHEMA_PICOSECOND) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + self.assertEqual(table.table_id, table_id) + self.assertEqual(table.schema, SCHEMA_PICOSECOND) + def test_delete_dataset_with_string(self): dataset_id = _make_dataset_id("delete_table_true_with_string") project = Config.CLIENT.project @@ -1272,6 +1295,29 @@ def test_load_table_from_json_schema_autodetect_table_exists(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 2) + def test_load_table_from_csv_w_picosecond_timestamp(self): + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_json_basic_use".format( + Config.CLIENT.project, dataset_id + ) + + table_schema = Config.CLIENT.schema_from_json(DATA_PATH / "pico_schema.json") + # create the table before loading so that the column order is predictable + table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + # do not pass an explicit job config to trigger automatic schema detection + with open(DATA_PATH / "pico.csv", "rb") as f: + load_job = Config.CLIENT.load_table_from_file(f, table_id) + load_job.result() + + table = Config.CLIENT.get_table(table) + self.assertEqual(list(table.schema), table_schema) + self.assertEqual(table.num_rows, 3) + def test_load_avro_from_uri_then_dump_table(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SourceFormat diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py index 108b842ce..02b07744b 100644 --- a/tests/system/test_list_rows.py +++ b/tests/system/test_list_rows.py @@ -132,3 +132,23 @@ def test_list_rows_range(bigquery_client: bigquery.Client, scalars_table_csv: st row_null = rows[1] assert row_null["range_date"] is None + + +def test_list_rows_pico(bigquery_client: bigquery.Client, scalars_table_pico: str): + rows = bigquery_client.list_rows( + scalars_table_pico, timestamp_precision=enums.TimestampPrecision.PICOSECOND + ) + rows = list(rows) + row = rows[0] + assert row["pico_col"] == "2025-01-01T00:00:00.123456789012Z" + + +def test_list_rows_pico_truncate( + bigquery_client: bigquery.Client, scalars_table_pico: str +): + # For a picosecond timestamp column, if the user does not explicitly set + # timestamp_precision, will return truncated microsecond precision. + rows = bigquery_client.list_rows(scalars_table_pico) + rows = list(rows) + row = rows[0] + assert row["pico_col"] == "1735689600123456" diff --git a/tests/system/test_query.py b/tests/system/test_query.py index d94a117e3..b8bb06a4c 100644 --- a/tests/system/test_query.py +++ b/tests/system/test_query.py @@ -21,6 +21,7 @@ import pytest from google.cloud import bigquery +from google.cloud.bigquery import enums from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import ScalarQueryParameterType @@ -546,3 +547,15 @@ def test_session(bigquery_client: bigquery.Client, query_api_method: str): assert len(rows) == 1 assert rows[0][0] == 5 + + +def test_query_picosecond(bigquery_client: bigquery.Client): + job = bigquery_client.query( + "SELECT CAST('2025-10-20' AS TIMESTAMP(12));", + api_method="QUERY", + timestamp_precision=enums.TimestampPrecision.PICOSECOND, + ) + + result = job.result() + rows = list(result) + assert rows[0][0] == "2025-10-20T00:00:00.000000000000Z" diff --git a/tests/unit/_helpers/test_cell_data_parser.py b/tests/unit/_helpers/test_cell_data_parser.py index 14721a26c..f75e63b48 100644 --- a/tests/unit/_helpers/test_cell_data_parser.py +++ b/tests/unit/_helpers/test_cell_data_parser.py @@ -290,17 +290,26 @@ def test_bytes_to_py_w_base64_encoded_text(object_under_test): def test_timestamp_to_py_w_string_int_value(object_under_test): from google.cloud._helpers import _EPOCH - coerced = object_under_test.timestamp_to_py("1234567", object()) + coerced = object_under_test.timestamp_to_py("1234567", create_field()) assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) def test_timestamp_to_py_w_int_value(object_under_test): from google.cloud._helpers import _EPOCH - coerced = object_under_test.timestamp_to_py(1234567, object()) + coerced = object_under_test.timestamp_to_py(1234567, create_field()) assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) +def test_timestamp_to_py_w_picosecond_precision(object_under_test): + from google.cloud.bigquery import enums + + pico_schema = create_field(timestamp_precision=enums.TimestampPrecision.PICOSECOND) + pico_timestamp = "2025-01-01T00:00:00.123456789012Z" + coerced = object_under_test.timestamp_to_py(pico_timestamp, pico_schema) + assert coerced == pico_timestamp + + def test_datetime_to_py_w_string_value(object_under_test): coerced = object_under_test.datetime_to_py("2016-12-02T18:51:33", object()) assert coerced == datetime.datetime(2016, 12, 2, 18, 51, 33) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 27d3cead1..2e046bfbf 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -1061,9 +1061,40 @@ def test_column_name_character_map_none(self): "parquetOptions": {"enableListInference": True}, "columnNameCharacterMap": "V2", "someNewField": "some-value", + "timestampTargetPrecision": [6, 12], } } + def test_timestamp_target_precision_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.timestamp_target_precision) + + def test_timestamp_target_precision_hit(self): + timestamp_target_precision = [6, 12] + config = self._get_target_class()() + config._properties["load"][ + "timestampTargetPrecision" + ] = timestamp_target_precision + self.assertEqual(config.timestamp_target_precision, timestamp_target_precision) + + def test_timestamp_target_precision_setter(self): + timestamp_target_precision = [6, 12] + config = self._get_target_class()() + config.timestamp_target_precision = timestamp_target_precision + self.assertEqual( + config._properties["load"]["timestampTargetPrecision"], + timestamp_target_precision, + ) + + def test_timestamp_target_precision_setter_w_none(self): + timestamp_target_precision = [6, 12] + config = self._get_target_class()() + config._properties["load"][ + "timestampTargetPrecision" + ] = timestamp_target_precision + config.timestamp_target_precision = None + self.assertFalse("timestampTargetPrecision" in config._properties["load"]) + def test_from_api_repr(self): from google.cloud.bigquery.job import ( CreateDisposition, @@ -1103,6 +1134,7 @@ def test_from_api_repr(self): self.assertTrue(config.parquet_options.enable_list_inference) self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2) self.assertEqual(config._properties["load"]["someNewField"], "some-value") + self.assertEqual(config.timestamp_target_precision, [6, 12]) def test_to_api_repr(self): from google.cloud.bigquery.job import ( @@ -1140,6 +1172,7 @@ def test_to_api_repr(self): config.parquet_options = parquet_options config.column_name_character_map = ColumnNameCharacterMap.V2 config._properties["load"]["someNewField"] = "some-value" + config.timestamp_target_precision = [6, 12] api_repr = config.to_api_repr() diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 10cbefe13..19390c7ec 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -335,6 +335,7 @@ def test_query_jobs_query_defaults(): assert request["location"] == "asia-northeast1" assert request["formatOptions"]["useInt64Timestamp"] is True assert "timeoutMs" not in request + assert "timestampOutputFormat" not in request["formatOptions"] def test_query_jobs_query_sets_format_options(): @@ -400,6 +401,35 @@ def test_query_jobs_query_sets_timeout(timeout, expected_timeout): assert request["timeoutMs"] == expected_timeout +def test_query_jobs_query_picosecond(): + mock_client = mock.create_autospec(Client) + mock_retry = mock.create_autospec(retries.Retry) + mock_job_retry = mock.create_autospec(retries.Retry) + mock_client._call_api.return_value = { + "jobReference": { + "projectId": "test-project", + "jobId": "abc", + "location": "asia-northeast1", + } + } + _job_helpers.query_jobs_query( + mock_client, + "SELECT * FROM test", + None, + "asia-northeast1", + "test-project", + mock_retry, + None, + mock_job_retry, + enums.TimestampPrecision.PICOSECOND, + ) + + _, call_kwargs = mock_client._call_api.call_args + request = call_kwargs["data"] + assert "useInt64Timestamp" not in request["formatOptions"] + assert request["formatOptions"]["timestampOutputFormat"] == "ISO8601_STRING" + + def test_query_and_wait_uses_jobs_insert(): """With unsupported features, call jobs.insert instead of jobs.query.""" client = mock.create_autospec(Client) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 213f382dc..1c4a9badb 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -55,7 +55,7 @@ from google.cloud import bigquery from google.cloud.bigquery.dataset import DatasetReference, Dataset -from google.cloud.bigquery.enums import UpdateMode, DatasetView +from google.cloud.bigquery.enums import UpdateMode, DatasetView, TimestampPrecision from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions import google.cloud.bigquery.retry @@ -5214,6 +5214,56 @@ def test_query_w_query_parameters(self): }, ) + def test_query_pico_timestamp(self): + query = "select *;" + response = { + "jobReference": { + "projectId": self.PROJECT, + "location": "EU", + "jobId": "abcd", + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(response) + + client.query( + query, + location="EU", + api_method="QUERY", + timestamp_precision=TimestampPrecision.PICOSECOND, + ) + + # Check that query actually starts the job. + expected_resource = { + "query": query, + "useLegacySql": False, + "location": "EU", + "formatOptions": {"timestampOutputFormat": "ISO8601_STRING"}, + "requestId": mock.ANY, + } + conn.api_request.assert_called_once_with( + method="POST", + path=f"/projects/{self.PROJECT}/queries", + data=expected_resource, + timeout=None, + ) + + def test_query_pico_timestamp_insert_error(self): + query = "select *;" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with pytest.raises(ValueError, match="Picosecond Timestamp is only"): + client.query( + query, + location="EU", + api_method="INSERT", + timestamp_precision=TimestampPrecision.PICOSECOND, + ) + def test_query_job_rpc_fail_w_random_error(self): from google.api_core.exceptions import Unknown from google.cloud.bigquery.job import QueryJob @@ -6817,6 +6867,39 @@ def test_list_rows(self): timeout=7.5, ) + def test_list_rows_pico_timestamp(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + + PATH = "projects/%s/datasets/%s/tables/%s/data" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection({}, {}) + pico_col = SchemaField( + "full_name", + "TIMESTAMP", + mode="REQUIRED", + timestamp_precision=TimestampPrecision.PICOSECOND, + ) + table = Table(self.TABLE_REF, schema=[pico_col]) + + iterator = client.list_rows( + table, timestamp_precision=TimestampPrecision.PICOSECOND + ) + next(iterator.pages) + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={"formatOptions.timestampOutputFormat": "ISO8601_STRING"}, + timeout=None, + ) + def test_list_rows_w_start_index_w_page_size(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index c63a8312c..f61b22035 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -52,6 +52,9 @@ def test_constructor_defaults(self): self.assertIsNone(field.default_value_expression) self.assertEqual(field.rounding_mode, None) self.assertEqual(field.foreign_type_definition, None) + self.assertEqual( + field.timestamp_precision, enums.TimestampPrecision.MICROSECOND + ) def test_constructor_explicit(self): FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field" @@ -69,6 +72,7 @@ def test_constructor_explicit(self): default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION, rounding_mode=enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED, foreign_type_definition="INTEGER", + timestamp_precision=enums.TimestampPrecision.PICOSECOND, ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") @@ -87,6 +91,10 @@ def test_constructor_explicit(self): ) self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") self.assertEqual(field.foreign_type_definition, "INTEGER") + self.assertEqual( + field.timestamp_precision, + enums.TimestampPrecision.PICOSECOND, + ) def test_constructor_explicit_none(self): field = self._make_one("test", "STRING", description=None, policy_tags=None) @@ -189,6 +197,23 @@ def test_to_api_repr_with_subfield(self): }, ) + def test_to_api_repr_w_timestamp_precision(self): + field = self._make_one( + "foo", + "TIMESTAMP", + "NULLABLE", + timestamp_precision=enums.TimestampPrecision.PICOSECOND, + ) + self.assertEqual( + field.to_api_repr(), + { + "mode": "NULLABLE", + "name": "foo", + "type": "TIMESTAMP", + "timestampPrecision": 12, + }, + ) + def test_from_api_repr(self): field = self._get_target_class().from_api_repr( { @@ -198,6 +223,7 @@ def test_from_api_repr(self): "name": "foo", "type": "record", "roundingMode": "ROUNDING_MODE_UNSPECIFIED", + "timestampPrecision": 12, } ) self.assertEqual(field.name, "foo") @@ -210,6 +236,10 @@ def test_from_api_repr(self): self.assertEqual(field.fields[0].mode, "NULLABLE") self.assertEqual(field.range_element_type, None) self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") + self.assertEqual( + field.timestamp_precision, + enums.TimestampPrecision.PICOSECOND, + ) def test_from_api_repr_policy(self): field = self._get_target_class().from_api_repr( @@ -264,6 +294,17 @@ def test_from_api_repr_defaults(self): self.assertNotIn("policyTags", field._properties) self.assertNotIn("rangeElementType", field._properties) + def test_from_api_repr_timestamp_precision_str(self): + # The backend would return timestampPrecision field as a string, even + # if we send over an integer. This test verifies we manually converted + # it into integer to ensure resending could succeed. + field = self._get_target_class().from_api_repr( + { + "timestampPrecision": "12", + } + ) + self.assertEqual(field._properties["timestampPrecision"], 12) + def test_name_property(self): name = "lemon-ness" schema_field = self._make_one(name, "INTEGER") @@ -323,6 +364,22 @@ def test_foreign_type_definition_property_str(self): schema_field._properties["foreignTypeDefinition"] = FOREIGN_TYPE_DEFINITION self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION) + def test_timestamp_precision_unsupported_type(self): + with pytest.raises(ValueError) as e: + self._make_one("test", "TIMESTAMP", timestamp_precision=12) + + assert "timestamp_precision must be class enums.TimestampPrecision" in str( + e.value + ) + + def test_timestamp_precision_property(self): + TIMESTAMP_PRECISION = enums.TimestampPrecision.PICOSECOND + schema_field = self._make_one("test", "TIMESTAMP") + schema_field._properties[ + "timestampPrecision" + ] = enums.TimestampPrecision.PICOSECOND.value + self.assertEqual(schema_field.timestamp_precision, TIMESTAMP_PRECISION) + def test_to_standard_sql_simple_type(self): examples = ( # a few legacy types @@ -637,7 +694,9 @@ def test___hash__not_equals(self): def test___repr__(self): field1 = self._make_one("field1", "STRING") - expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None)" + expected = ( + "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None, None)" + ) self.assertEqual(repr(field1), expected) def test___repr__evaluable_no_policy_tags(self):