diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/_mutate_rows.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/_mutate_rows.py index 6efb9e5f25be..974e450d232b 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/_mutate_rows.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/_mutate_rows.py @@ -22,10 +22,8 @@ import google.cloud.bigtable.data.exceptions as bt_exceptions import google.cloud.bigtable_v2.types.bigtable as types_pb from google.cloud.bigtable.data._cross_sync import CrossSync -from google.cloud.bigtable.data._helpers import ( - _attempt_timeout_generator, - _retry_exception_factory, -) +from google.cloud.bigtable.data._helpers import _attempt_timeout_generator +from google.cloud.bigtable.data._metrics import tracked_retry # mutate_rows requests are limited to this number of mutations from google.cloud.bigtable.data.mutations import ( @@ -34,6 +32,7 @@ ) if TYPE_CHECKING: + from google.cloud.bigtable.data._metrics import ActiveOperationMetric from google.cloud.bigtable.data.mutations import RowMutationEntry if CrossSync.is_async: @@ -72,6 +71,8 @@ class _MutateRowsOperationAsync: operation_timeout: the timeout to use for the entire operation, in seconds. attempt_timeout: the timeout to use for each mutate_rows attempt, in seconds. If not specified, the request will run until operation_timeout is reached. + metric: the metric object representing the active operation + retryable_exceptions: a list of exceptions that should be retried """ @CrossSync.convert @@ -82,6 +83,7 @@ def __init__( mutation_entries: list["RowMutationEntry"], operation_timeout: float, attempt_timeout: float | None, + metric: ActiveOperationMetric, retryable_exceptions: Sequence[type[Exception]] = (), ): # check that mutations are within limits @@ -101,13 +103,12 @@ def __init__( # Entry level errors bt_exceptions._MutateRowsIncomplete, ) - sleep_generator = retries.exponential_sleep_generator(0.01, 2, 60) - self._operation = lambda: CrossSync.retry_target( - self._run_attempt, - self.is_retryable, - sleep_generator, - operation_timeout, - exception_factory=_retry_exception_factory, + self._operation = lambda: tracked_retry( + retry_fn=CrossSync.retry_target, + operation=metric, + target=self._run_attempt, + predicate=self.is_retryable, + timeout=operation_timeout, ) # initialize state self.timeout_generator = _attempt_timeout_generator( @@ -116,6 +117,8 @@ def __init__( self.mutations = [_EntryWithProto(m, m._to_pb()) for m in mutation_entries] self.remaining_indices = list(range(len(self.mutations))) self.errors: dict[int, list[Exception]] = {} + # set up metrics + self._operation_metric = metric @CrossSync.convert async def start(self): @@ -125,34 +128,35 @@ async def start(self): Raises: MutationsExceptionGroup: if any mutations failed """ - try: - # trigger mutate_rows - await self._operation() - except Exception as exc: - # exceptions raised by retryable are added to the list of exceptions for all unfinalized mutations - incomplete_indices = self.remaining_indices.copy() - for idx in incomplete_indices: - self._handle_entry_error(idx, exc) - finally: - # raise exception detailing incomplete mutations - all_errors: list[Exception] = [] - for idx, exc_list in self.errors.items(): - if len(exc_list) == 0: - raise core_exceptions.ClientError( - f"Mutation {idx} failed with no associated errors" + with self._operation_metric: + try: + # trigger mutate_rows + await self._operation() + except Exception as exc: + # exceptions raised by retryable are added to the list of exceptions for all unfinalized mutations + incomplete_indices = self.remaining_indices.copy() + for idx in incomplete_indices: + self._handle_entry_error(idx, exc) + finally: + # raise exception detailing incomplete mutations + all_errors: list[Exception] = [] + for idx, exc_list in self.errors.items(): + if len(exc_list) == 0: + raise core_exceptions.ClientError( + f"Mutation {idx} failed with no associated errors" + ) + elif len(exc_list) == 1: + cause_exc = exc_list[0] + else: + cause_exc = bt_exceptions.RetryExceptionGroup(exc_list) + entry = self.mutations[idx].entry + all_errors.append( + bt_exceptions.FailedMutationEntryError(idx, entry, cause_exc) + ) + if all_errors: + raise bt_exceptions.MutationsExceptionGroup( + all_errors, len(self.mutations) ) - elif len(exc_list) == 1: - cause_exc = exc_list[0] - else: - cause_exc = bt_exceptions.RetryExceptionGroup(exc_list) - entry = self.mutations[idx].entry - all_errors.append( - bt_exceptions.FailedMutationEntryError(idx, entry, cause_exc) - ) - if all_errors: - raise bt_exceptions.MutationsExceptionGroup( - all_errors, len(self.mutations) - ) @CrossSync.convert async def _run_attempt(self): @@ -164,6 +168,8 @@ async def _run_attempt(self): retry after the attempt is complete GoogleAPICallError: if the gapic rpc fails """ + # register attempt start + self._operation_metric.start_attempt() request_entries = [self.mutations[idx].proto for idx in self.remaining_indices] # track mutations in this request that have not been finalized yet active_request_indices = { diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/_read_rows.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/_read_rows.py index f8e203bc10b3..ab7eb3ceccb3 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/_read_rows.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/_read_rows.py @@ -15,16 +15,17 @@ from __future__ import annotations +import time from typing import TYPE_CHECKING, Sequence from google.api_core import retry as retries -from google.api_core.retry import exponential_sleep_generator +from grpc import StatusCode from google.cloud.bigtable.data._cross_sync import CrossSync from google.cloud.bigtable.data._helpers import ( _attempt_timeout_generator, - _retry_exception_factory, ) +from google.cloud.bigtable.data._metrics import tracked_retry from google.cloud.bigtable.data.exceptions import ( InvalidChunk, _ResetRow, @@ -38,6 +39,8 @@ from google.cloud.bigtable_v2.types import RowSet as RowSetPB if TYPE_CHECKING: + from google.cloud.bigtable.data._metrics import ActiveOperationMetric + if CrossSync.is_async: from google.cloud.bigtable.data._async.client import ( _DataApiTargetAsync as TargetType, @@ -68,6 +71,7 @@ class _ReadRowsOperationAsync: target: The table or view to send the request to operation_timeout: The total time to allow for the operation, in seconds attempt_timeout: The time to allow for each individual attempt, in seconds + metric: the metric object representing the active operation retryable_exceptions: A list of exceptions that should trigger a retry """ @@ -79,6 +83,7 @@ class _ReadRowsOperationAsync: "_predicate", "_last_yielded_row_key", "_remaining_count", + "_operation_metric", ) def __init__( @@ -87,6 +92,7 @@ def __init__( target: TargetType, operation_timeout: float, attempt_timeout: float, + metric: ActiveOperationMetric, retryable_exceptions: Sequence[type[Exception]] = (), ): self.attempt_timeout_gen = _attempt_timeout_generator( @@ -105,6 +111,7 @@ def __init__( self._predicate = retries.if_exception_type(*retryable_exceptions) self._last_yielded_row_key: bytes | None = None self._remaining_count: int | None = self.request.rows_limit or None + self._operation_metric = metric def start_operation(self) -> CrossSync.Iterable[Row]: """ @@ -113,12 +120,12 @@ def start_operation(self) -> CrossSync.Iterable[Row]: Yields: Row: The next row in the stream """ - return CrossSync.retry_target_stream( - self._read_rows_attempt, - self._predicate, - exponential_sleep_generator(0.01, 60, multiplier=2), - self.operation_timeout, - exception_factory=_retry_exception_factory, + return tracked_retry( + retry_fn=CrossSync.retry_target_stream, + operation=self._operation_metric, + target=self._read_rows_attempt, + predicate=self._predicate, + timeout=self.operation_timeout, ) def _read_rows_attempt(self) -> CrossSync.Iterable[Row]: @@ -131,6 +138,7 @@ def _read_rows_attempt(self) -> CrossSync.Iterable[Row]: Yields: Row: The next row in the stream """ + self._operation_metric.start_attempt() # revise request keys and ranges between attempts if self._last_yielded_row_key is not None: # if this is a retry, try to trim down the request to avoid ones we've already processed @@ -208,12 +216,11 @@ async def chunk_stream( raise InvalidChunk("emit count exceeds row limit") current_key = None - @staticmethod @CrossSync.convert( replace_symbols={"__aiter__": "__iter__", "__anext__": "__next__"}, ) async def merge_rows( - chunks: CrossSync.Iterable[ReadRowsResponsePB.CellChunk] | None, + self, chunks: CrossSync.Iterable[ReadRowsResponsePB.CellChunk] | None ) -> CrossSync.Iterable[Row]: """ Merge chunks into rows @@ -223,108 +230,125 @@ async def merge_rows( Yields: Row: the next row in the stream """ - if chunks is None: - return - it = chunks.__aiter__() - # For each row - while True: - try: - c = await it.__anext__() - except CrossSync.StopIteration: - # stream complete + try: + if chunks is None: + self._operation_metric.end_with_success() return - row_key = c.row_key - - if not row_key: - raise InvalidChunk("first row chunk is missing key") - - cells = [] - - # shared per cell storage - family: str | None = None - qualifier: bytes | None = None - - try: - # for each cell - while True: - if c.reset_row: - raise _ResetRow(c) - k = c.row_key - f = c.family_name.value - q = c.qualifier.value if c.HasField("qualifier") else None - if k and k != row_key: - raise InvalidChunk("unexpected new row key") - if f: - family = f - if q is not None: - qualifier = q - else: - raise InvalidChunk("new family without qualifier") - elif family is None: - raise InvalidChunk("missing family") - elif q is not None: - if family is None: - raise InvalidChunk("new qualifier without family") - qualifier = q - elif qualifier is None: - raise InvalidChunk("missing qualifier") - - ts = c.timestamp_micros - labels = c.labels if c.labels else [] - value = c.value - - # merge split cells - if c.value_size > 0: - buffer = [value] - while c.value_size > 0: - # throws when premature end - c = await it.__anext__() - - t = c.timestamp_micros - cl = c.labels - k = c.row_key - if ( - c.HasField("family_name") - and c.family_name.value != family - ): - raise InvalidChunk("family changed mid cell") - if ( - c.HasField("qualifier") - and c.qualifier.value != qualifier - ): - raise InvalidChunk("qualifier changed mid cell") - if t and t != ts: - raise InvalidChunk("timestamp changed mid cell") - if cl and cl != labels: - raise InvalidChunk("labels changed mid cell") - if k and k != row_key: - raise InvalidChunk("row key changed mid cell") - - if c.reset_row: - raise _ResetRow(c) - buffer.append(c.value) - value = b"".join(buffer) - cells.append( - Cell(value, row_key, family, qualifier, ts, list(labels)) - ) - if c.commit_row: - yield Row(row_key, cells) - break + it = chunks.__aiter__() + # For each row + while True: + try: c = await it.__anext__() - except _ResetRow as e: - c = e.chunk - if ( - c.row_key - or c.HasField("family_name") - or c.HasField("qualifier") - or c.timestamp_micros - or c.labels - or c.value - ): - raise InvalidChunk("reset row with data") - continue - except CrossSync.StopIteration: - raise InvalidChunk("premature end of stream") + except CrossSync.StopIteration: + # stream complete + self._operation_metric.end_with_success() + return + row_key = c.row_key + + if not row_key: + raise InvalidChunk("first row chunk is missing key") + + cells = [] + + # shared per cell storage + family: str | None = None + qualifier: bytes | None = None + + try: + # for each cell + while True: + if c.reset_row: + raise _ResetRow(c) + k = c.row_key + f = c.family_name.value + q = c.qualifier.value if c.HasField("qualifier") else None + if k and k != row_key: + raise InvalidChunk("unexpected new row key") + if f: + family = f + if q is not None: + qualifier = q + else: + raise InvalidChunk("new family without qualifier") + elif family is None: + raise InvalidChunk("missing family") + elif q is not None: + if family is None: + raise InvalidChunk("new qualifier without family") + qualifier = q + elif qualifier is None: + raise InvalidChunk("missing qualifier") + + ts = c.timestamp_micros + labels = c.labels if c.labels else [] + value = c.value + + # merge split cells + if c.value_size > 0: + buffer = [value] + while c.value_size > 0: + # throws when premature end + c = await it.__anext__() + + t = c.timestamp_micros + cl = c.labels + k = c.row_key + if ( + c.HasField("family_name") + and c.family_name.value != family + ): + raise InvalidChunk("family changed mid cell") + if ( + c.HasField("qualifier") + and c.qualifier.value != qualifier + ): + raise InvalidChunk("qualifier changed mid cell") + if t and t != ts: + raise InvalidChunk("timestamp changed mid cell") + if cl and cl != labels: + raise InvalidChunk("labels changed mid cell") + if k and k != row_key: + raise InvalidChunk("row key changed mid cell") + + if c.reset_row: + raise _ResetRow(c) + buffer.append(c.value) + value = b"".join(buffer) + cells.append( + Cell(value, row_key, family, qualifier, ts, list(labels)) + ) + if c.commit_row: + block_time = time.monotonic_ns() + yield Row(row_key, cells) + # most metric operations use setters, but this one updates + # the value directly to avoid extra overhead + if self._operation_metric.active_attempt is not None: + self._operation_metric.active_attempt.application_blocking_time_ns += ( # type: ignore + time.monotonic_ns() - block_time + ) + break + c = await it.__anext__() + except _ResetRow as e: + c = e.chunk + if ( + c.row_key + or c.HasField("family_name") + or c.HasField("qualifier") + or c.timestamp_micros + or c.labels + or c.value + ): + raise InvalidChunk("reset row with data") + continue + except CrossSync.StopIteration: + raise InvalidChunk("premature end of stream") + except GeneratorExit as close_exception: + # handle aclose() + self._operation_metric.end_with_status(StatusCode.CANCELLED) + raise close_exception + except Exception as generic_exception: + # handle exceptions in retry wrapper + raise generic_exception @staticmethod def _revise_request_rowset( diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py index b2c13521240f..27fe61f25a7c 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/client.py @@ -68,6 +68,10 @@ OperationType, tracked_retry, ) +from google.cloud.bigtable.data._metrics.handlers.gcp_exporter import ( + BigtableMetricsExporter, + GoogleCloudMetricsHandler, +) from google.cloud.bigtable.data.exceptions import ( FailedQueryShardError, ShardedReadRowsExceptionGroup, @@ -260,6 +264,12 @@ def __init__( "is the default." ) self._is_closed = CrossSync.Event() + # create a metrics exporter using the same client configuration + self._gcp_metrics_exporter = BigtableMetricsExporter( + project_id=self.project, + credentials=credentials, + client_options=client_options, + ) self.transport = cast(TransportType, self._gapic_client.transport) # keep track of active instances to for warmup on channel refresh self._active_instances: Set[_WarmedInstanceKey] = set() @@ -1055,7 +1065,17 @@ def __init__( default_retryable_errors or () ) - self._metrics = BigtableClientSideMetricsController() + self._metrics = BigtableClientSideMetricsController( + handlers=[ + GoogleCloudMetricsHandler( + exporter=client._gcp_metrics_exporter, + instance_id=instance_id, + table_id=table_id, + app_profile_id=app_profile_id, + client_version=client._client_version(), + ) + ] + ) try: self._register_instance_future = CrossSync.create_task( @@ -1131,6 +1151,9 @@ async def read_rows_stream( self, operation_timeout=operation_timeout, attempt_timeout=attempt_timeout, + metric=self._metrics.create_operation( + OperationType.READ_ROWS, is_streaming=True + ), retryable_exceptions=retryable_excs, ) return row_merger.start_operation() @@ -1223,15 +1246,28 @@ async def read_row( if row_key is None: raise ValueError("row_key must be string or bytes") query = ReadRowsQuery(row_keys=row_key, row_filter=row_filter, limit=1) - results = await self.read_rows( + + operation_timeout, attempt_timeout = _get_timeouts( + operation_timeout, attempt_timeout, self + ) + retryable_excs = _get_retryable_errors(retryable_errors, self) + + row_merger = CrossSync._ReadRowsOperation( query, + self, operation_timeout=operation_timeout, attempt_timeout=attempt_timeout, - retryable_errors=retryable_errors, + metric=self._metrics.create_operation( + OperationType.READ_ROWS, is_streaming=False + ), + retryable_exceptions=retryable_excs, ) - if len(results) == 0: + results_generator = row_merger.start_operation() + try: + results = [a async for a in results_generator] + return results[0] + except IndexError: return None - return results[0] @CrossSync.convert async def read_rows_sharded( @@ -1370,20 +1406,17 @@ async def row_exists( from any retries that failed google.api_core.exceptions.GoogleAPIError: raised if the request encounters an unrecoverable error """ - if row_key is None: - raise ValueError("row_key must be string or bytes") - strip_filter = StripValueTransformerFilter(flag=True) limit_filter = CellsRowLimitFilter(1) chain_filter = RowFilterChain(filters=[limit_filter, strip_filter]) - query = ReadRowsQuery(row_keys=row_key, limit=1, row_filter=chain_filter) - results = await self.read_rows( - query, + result = await self.read_row( + row_key=row_key, + row_filter=chain_filter, operation_timeout=operation_timeout, attempt_timeout=attempt_timeout, retryable_errors=retryable_errors, ) - return len(results) > 0 + return result is not None @CrossSync.convert async def sample_row_keys( @@ -1643,6 +1676,7 @@ async def bulk_mutate_rows( mutation_entries, operation_timeout, attempt_timeout, + metric=self._metrics.create_operation(OperationType.BULK_MUTATE_ROWS), retryable_exceptions=retryable_excs, ) await operation.start() diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/mutations_batcher.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/mutations_batcher.py index 405983393ee7..13e45721245a 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/mutations_batcher.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_async/mutations_batcher.py @@ -16,6 +16,7 @@ import atexit import concurrent.futures +import time import warnings from collections import deque from typing import TYPE_CHECKING, Sequence, cast @@ -26,6 +27,7 @@ _get_retryable_errors, _get_timeouts, ) +from google.cloud.bigtable.data._metrics import ActiveOperationMetric, OperationType from google.cloud.bigtable.data.exceptions import ( FailedMutationEntryError, MutationsExceptionGroup, @@ -36,6 +38,7 @@ ) if TYPE_CHECKING: + from google.cloud.bigtable.data._metrics import BigtableClientSideMetricsController from google.cloud.bigtable.data.mutations import RowMutationEntry if CrossSync.is_async: @@ -181,6 +184,24 @@ async def add_to_flow(self, mutations: RowMutationEntry | list[RowMutationEntry] ) yield mutations[start_idx:end_idx] + @CrossSync.convert(replace_symbols={"__anext__": "__next__"}) + async def add_to_flow_with_metrics( + self, + mutations: RowMutationEntry | list[RowMutationEntry], + metrics_controller: BigtableClientSideMetricsController, + ): + inner_generator = self.add_to_flow(mutations) + while True: + # start a new metric + metric = metrics_controller.create_operation(OperationType.BULK_MUTATE_ROWS) + flow_start_time = time.monotonic_ns() + try: + value = await inner_generator.__anext__() + except CrossSync.StopIteration: + return + metric.flow_throttling_time_ns = time.monotonic_ns() - flow_start_time + yield value, metric + @CrossSync.convert_class(sync_name="MutationsBatcher") class MutationsBatcherAsync: @@ -357,9 +378,14 @@ async def _flush_internal(self, new_entries: list[RowMutationEntry]): """ # flush new entries in_process_requests: list[CrossSync.Future[list[FailedMutationEntryError]]] = [] - async for batch in self._flow_control.add_to_flow(new_entries): + async for batch, metric in self._flow_control.add_to_flow_with_metrics( + new_entries, self._target._metrics + ): batch_task = CrossSync.create_task( - self._execute_mutate_rows, batch, sync_executor=self._sync_rpc_executor + self._execute_mutate_rows, + batch, + metric, + sync_executor=self._sync_rpc_executor, ) in_process_requests.append(batch_task) # wait for all inflight requests to complete @@ -370,7 +396,7 @@ async def _flush_internal(self, new_entries: list[RowMutationEntry]): @CrossSync.convert async def _execute_mutate_rows( - self, batch: list[RowMutationEntry] + self, batch: list[RowMutationEntry], metric: ActiveOperationMetric ) -> list[FailedMutationEntryError]: """ Helper to execute mutation operation on a batch @@ -391,6 +417,7 @@ async def _execute_mutate_rows( batch, operation_timeout=self._operation_timeout, attempt_timeout=self._attempt_timeout, + metric=metric, retryable_exceptions=self._retryable_errors, ) await operation.start() diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_metrics/__init__.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_metrics/__init__.py index 77b8580bc524..f58596ef8a88 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_metrics/__init__.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_metrics/__init__.py @@ -19,6 +19,12 @@ OperationState, OperationType, ) +from google.cloud.bigtable.data._metrics.handlers.gcp_exporter import ( + GoogleCloudMetricsHandler, +) +from google.cloud.bigtable.data._metrics.handlers.opentelemetry import ( + OpenTelemetryMetricsHandler, +) from google.cloud.bigtable.data._metrics.metrics_controller import ( BigtableClientSideMetricsController, ) @@ -26,6 +32,8 @@ __all__ = ( "BigtableClientSideMetricsController", + "OpenTelemetryMetricsHandler", + "GoogleCloudMetricsHandler", "OperationType", "OperationState", "ActiveOperationMetric", diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_metrics/handlers/gcp_exporter.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_metrics/handlers/gcp_exporter.py new file mode 100644 index 000000000000..9ff989ac970c --- /dev/null +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_metrics/handlers/gcp_exporter.py @@ -0,0 +1,268 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import time + +from google.api.distribution_pb2 import Distribution +from google.api.metric_pb2 import Metric as GMetric +from google.api.metric_pb2 import MetricDescriptor +from google.api.monitored_resource_pb2 import MonitoredResource +from google.api_core import gapic_v1 +from google.cloud.monitoring_v3 import ( + CreateTimeSeriesRequest, + MetricServiceClient, + Point, + TimeInterval, + TimeSeries, + TypedValue, +) +from google.protobuf.timestamp_pb2 import Timestamp +from opentelemetry.sdk.metrics import MeterProvider, view +from opentelemetry.sdk.metrics.export import ( + HistogramDataPoint, + MetricExporter, + MetricExportResult, + MetricsData, + NumberDataPoint, + PeriodicExportingMetricReader, +) + +from google.cloud.bigtable.data._metrics.handlers.opentelemetry import ( + OpenTelemetryMetricsHandler, + _OpenTelemetryInstruments, +) + +# create OpenTelemetry views for Bigtable metrics +# avoid reformatting into individual lines +# fmt: off +MILLIS_AGGREGATION = view.ExplicitBucketHistogramAggregation( + [ + 0, 1, 2, 3, 4, 5, 6, 8, 10, 13, 16, 20, 25, 30, 40, + 50, 65, 80, 100, 130, 160, 200, 250, 300, 400, 500, 650, + 800, 1_000, 2_000, 5_000, 10_000, 20_000, 50_000, 100_000, + 200_000, 400_000, 800_000, 1_600_000, 3_200_000 + ] +) +# fmt: on +COUNT_AGGREGATION = view.SumAggregation() +INSTRUMENT_NAMES = ( + "operation_latencies", + "first_response_latencies", + "attempt_latencies", + "retry_count", + "server_latencies", + "connectivity_error_count", + "application_latencies", + "throttling_latencies", +) +VIEW_LIST = [ + view.View( + instrument_name=n, + name=n, + aggregation=MILLIS_AGGREGATION + if n.endswith("latencies") + else COUNT_AGGREGATION, + ) + for n in INSTRUMENT_NAMES +] + + +class GoogleCloudMetricsHandler(OpenTelemetryMetricsHandler): + """ + Maintains an internal set of OpenTelemetry metrics for the Bigtable client library, + and periodically exports them to Google Cloud Monitoring. + + The OpenTelemetry metrics that are tracked are as follows: + - operation_latencies: latency of each client method call, over all of it's attempts. + - first_response_latencies: latency of receiving the first row in a ReadRows operation. + - attempt_latencies: latency of each client attempt RPC. + - retry_count: Number of additional RPCs sent after the initial attempt. + - server_latencies: latency recorded on the server side for each attempt. + - connectivity_error_count: number of attempts that failed to reach Google's network. + - application_latencies: the time spent waiting for the application to process the next response. + - throttling_latencies: latency introduced by waiting when there are too many outstanding requests in a bulk operation. + + Args: + exporter: The exporter object used to write metrics to Cloud Montitoring. + Should correspond 1:1 with a bigtable client, and share auth configuration + export_interval: The interval (in seconds) at which to export metrics to Cloud Monitoring. + *args: configuration positional arguments passed down to super class + *kwargs: configuration keyword arguments passed down to super class + """ + + def __init__(self, exporter, *args, export_interval=60, **kwargs): + # periodically executes exporter + gcp_reader = PeriodicExportingMetricReader( + exporter, export_interval_millis=export_interval * 1000 + ) + # use private meter provider to store instruments and views + self.meter_provider = MeterProvider( + metric_readers=[gcp_reader], views=VIEW_LIST + ) + otel = _OpenTelemetryInstruments(meter_provider=self.meter_provider) + super().__init__(*args, instruments=otel, **kwargs) + + def close(self): + self.meter_provider.shutdown() + + +class BigtableMetricsExporter(MetricExporter): + """ + OpenTelemetry Exporter implementation for sending metrics to Google Cloud Monitoring. + + We must use a custom exporter because the public one doesn't support writing to internal + metrics like `bigtable.googleapis.com/internal/client/` + + Each GoogleCloudMetricsHandler will maintain its own exporter instance associated with the + project_id it is configured with. + + Args: + project_id: GCP project id to associate metrics with + """ + + def __init__(self, project_id: str, *client_args, **client_kwargs): + super().__init__() + self.client = MetricServiceClient(*client_args, **client_kwargs) + self.prefix = "bigtable.googleapis.com/internal/client" + self.project_id = project_id + + def export( + self, metrics_data: MetricsData, timeout_millis: float = 10_000, **kwargs + ) -> MetricExportResult: + """ + Write a set of metrics to Cloud Monitoring. + This method is called by the OpenTelemetry SDK + """ + deadline = time.time() + (timeout_millis / 1000) + metric_kind = MetricDescriptor.MetricKind.CUMULATIVE + all_series: list[TimeSeries] = [] + # process each metric from OTel format into Cloud Monitoring format + for resource_metric in metrics_data.resource_metrics: + for scope_metric in resource_metric.scope_metrics: + for metric in scope_metric.metrics: + for data_point in [ + pt for pt in metric.data.data_points if pt.attributes + ]: + if data_point.attributes: + monitored_resource = MonitoredResource( + type="bigtable_client_raw", + labels={ + "project_id": self.project_id, + "instance": data_point.attributes[ + "resource_instance" + ], + "cluster": data_point.attributes[ + "resource_cluster" + ], + "table": data_point.attributes["resource_table"], + "zone": data_point.attributes["resource_zone"], + }, + ) + point = self._to_point(data_point) + series = TimeSeries( + resource=monitored_resource, + metric_kind=metric_kind, + points=[point], + metric=GMetric( + type=f"{self.prefix}/{metric.name}", + labels={ + k: v + for k, v in data_point.attributes.items() + if not k.startswith("resource_") + }, + ), + unit=metric.unit, + ) + all_series.append(series) + # send all metrics to Cloud Monitoring + try: + self._batch_write(all_series, deadline) + return MetricExportResult.SUCCESS + except Exception: + return MetricExportResult.FAILURE + + def _batch_write( + self, series: list[TimeSeries], deadline=None, max_batch_size=200 + ) -> None: + """ + Adapted from CloudMonitoringMetricsExporter + https://github.com/GoogleCloudPlatform/opentelemetry-operations-python/blob/3668dfe7ce3b80dd01f42af72428de957b58b316/opentelemetry-exporter-gcp-monitoring/src/opentelemetry/exporter/cloud_monitoring/__init__.py#L82 + + Args: + series: list of TimeSeries to write. Will be split into batches if necessary + deadline: designates the time.time() at which to stop writing. If None, uses API default + max_batch_size: maximum number of time series to write at once. + Cloud Monitoring allows up to 200 per request + """ + write_ind = 0 + while write_ind < len(series): + # find time left for next batch + timeout = deadline - time.time() if deadline else gapic_v1.method.DEFAULT + # write next batch + self.client.create_service_time_series( + CreateTimeSeriesRequest( + name=f"projects/{self.project_id}", + time_series=series[write_ind : write_ind + max_batch_size], + ), + timeout=timeout, + ) + write_ind += max_batch_size + + @staticmethod + def _to_point(data_point: NumberDataPoint | HistogramDataPoint) -> Point: + """ + Adapted from CloudMonitoringMetricsExporter + https://github.com/GoogleCloudPlatform/opentelemetry-operations-python/blob/3668dfe7ce3b80dd01f42af72428de957b58b316/opentelemetry-exporter-gcp-monitoring/src/opentelemetry/exporter/cloud_monitoring/__init__.py#L82 + """ + if isinstance(data_point, HistogramDataPoint): + mean = data_point.sum / data_point.count if data_point.count else 0.0 + point_value = TypedValue( + distribution_value=Distribution( + count=data_point.count, + mean=mean, + bucket_counts=data_point.bucket_counts, + bucket_options=Distribution.BucketOptions( + explicit_buckets=Distribution.BucketOptions.Explicit( + bounds=data_point.explicit_bounds, + ) + ), + ) + ) + else: + if isinstance(data_point.value, int): + point_value = TypedValue(int64_value=data_point.value) + else: + point_value = TypedValue(double_value=data_point.value) + start_time = Timestamp() + start_time.FromNanoseconds(data_point.start_time_unix_nano) + end_time = Timestamp() + end_time.FromNanoseconds(data_point.time_unix_nano) + interval = TimeInterval(start_time=start_time, end_time=end_time) + return Point(interval=interval, value=point_value) + + def shutdown(self, timeout_millis: float = 30_000, **kwargs): + """ + Adapted from CloudMonitoringMetricsExporter + https://github.com/GoogleCloudPlatform/opentelemetry-operations-python/blob/3668dfe7ce3b80dd01f42af72428de957b58b316/opentelemetry-exporter-gcp-monitoring/src/opentelemetry/exporter/cloud_monitoring/__init__.py#L82 + """ + pass + + def force_flush(self, timeout_millis: float = 10_000): + """ + Adapted from CloudMonitoringMetricsExporter + https://github.com/GoogleCloudPlatform/opentelemetry-operations-python/blob/3668dfe7ce3b80dd01f42af72428de957b58b316/opentelemetry-exporter-gcp-monitoring/src/opentelemetry/exporter/cloud_monitoring/__init__.py#L82 + """ + return True diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_metrics/handlers/opentelemetry.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_metrics/handlers/opentelemetry.py new file mode 100644 index 000000000000..2f78dfa28d3b --- /dev/null +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_metrics/handlers/opentelemetry.py @@ -0,0 +1,237 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import os +import socket +import uuid + +from google.cloud.bigtable import __version__ as bigtable_version +from google.cloud.bigtable.data._metrics.data_model import ( + DEFAULT_CLUSTER_ID, + DEFAULT_ZONE, + ActiveOperationMetric, + CompletedAttemptMetric, + CompletedOperationMetric, + OperationType, +) +from google.cloud.bigtable.data._metrics.handlers._base import MetricsHandler + +# conversion factor for converting from nanoseconds to milliseconds +NS_TO_MS = 1e6 + + +class _OpenTelemetryInstruments: + """ + class that holds OpenTelelmetry instrument objects + """ + + def __init__(self, meter_provider=None): + if meter_provider is None: + # use global meter provider + from opentelemetry import metrics + + meter_provider = metrics + # grab meter for this module + meter = meter_provider.get_meter("bigtable.googleapis.com") + # create instruments + self.operation_latencies = meter.create_histogram( + name="operation_latencies", + description=""" + The total end-to-end latency across all RPC attempts associated with a Bigtable operation. + This metric measures an operation's round trip from the client to Bigtable and back to the client and includes all retries. + + For ReadRows requests, the operation latencies include the application processing time for each returned message. + """, + unit="ms", + ) + self.first_response_latencies = meter.create_histogram( + name="first_response_latencies", + description="Latencies from when a client sends a request and receives the first row of the response.", + unit="ms", + ) + self.attempt_latencies = meter.create_histogram( + name="attempt_latencies", + description=""" + The latencies of a client RPC attempt. + + Under normal circumstances, this value is identical to operation_latencies. + If the client receives transient errors, however, then operation_latencies is the sum of all attempt_latencies and the exponential delays. + """, + unit="ms", + ) + self.retry_count = meter.create_counter( + name="retry_count", + description=""" + A counter that records the number of attempts that an operation required to complete. + Under normal circumstances, this value is empty. + """, + ) + self.server_latencies = meter.create_histogram( + name="server_latencies", + description="Latencies between the time when the Google frontend receives an RPC and when it sends the first byte of the response.", + unit="ms", + ) + self.connectivity_error_count = meter.create_counter( + name="connectivity_error_count", + description=""" + The number of requests that failed to reach Google's network. + In normal cases, this number is 0. When the number is not 0, it can indicate connectivity issues between the application and the Google network. + """, + ) + self.application_latencies = meter.create_histogram( + name="application_latencies", + description=""" + The time from when the client receives the response to a request until the application reads the response. + This metric is most relevant for ReadRows requests. + The start and stop times for this metric depend on the way that you send the read request; see Application blocking latencies timer examples for details. + """, + unit="ms", + ) + self.throttling_latencies = meter.create_histogram( + name="throttling_latencies", + description="Latencies introduced when the client blocks the sending of more requests to the server because of too many pending requests in a bulk operation.", + unit="ms", + ) + + +class OpenTelemetryMetricsHandler(MetricsHandler): + """ + Maintains a set of OpenTelemetry metrics for the Bigtable client library, + and updates them with each completed operation and attempt. + + The OpenTelemetry metrics that are tracked are as follows: + - operation_latencies: latency of each client method call, over all of it's attempts. + - first_response_latencies: latency of receiving the first row in a ReadRows operation. + - attempt_latencies: latency of each client attempt RPC. + - retry_count: Number of additional RPCs sent after the initial attempt. + - server_latencies: latency recorded on the server side for each attempt. + - connectivity_error_count: number of attempts that failed to reach Google's network. + - application_latencies: the time spent waiting for the application to process the next response. + - throttling_latencies: latency introduced by waiting when there are too many outstanding requests in a bulk operation. + """ + + def __init__( + self, + *, + instance_id: str, + table_id: str, + app_profile_id: str | None = None, + client_uid: str | None = None, + client_version: str | None = None, + instruments: _OpenTelemetryInstruments = _OpenTelemetryInstruments(), + ): + super().__init__() + self.otel = instruments + client_version = client_version or bigtable_version + # fixed labels sent with each metric update + self.shared_labels = { + "client_name": f"python-bigtable/{client_version}", + "client_uid": client_uid or self._generate_client_uid(), + "resource_instance": instance_id, + "resource_table": table_id, + "app_profile": app_profile_id or "default", + } + + @staticmethod + def _generate_client_uid(): + """ + client_uid will take the format `python-@` where uuid is a + random value, pid is the process id, and hostname is the hostname of the machine. + + If not found, localhost will be used in place of hostname, and a random number + will be used in place of pid. + """ + try: + hostname = socket.gethostname() or "localhost" + except Exception: + hostname = "localhost" + try: + pid = os.getpid() or "" + except Exception: + pid = "" + return f"python-{uuid.uuid4()}-{pid}@{hostname}" + + def on_operation_complete(self, op: CompletedOperationMetric) -> None: + """ + Update the metrics associated with a completed operation: + - operation_latencies + - retry_count + - first_response_latencies + """ + labels = { + "method": op.op_type.value, + "status": op.final_status.name, + "resource_zone": op.zone, + "resource_cluster": op.cluster_id, + **self.shared_labels, + } + is_streaming = str(op.is_streaming) + + self.otel.operation_latencies.record( + op.duration_ns / NS_TO_MS, {"streaming": is_streaming, **labels} + ) + if ( + op.op_type == OperationType.READ_ROWS + and op.first_response_latency_ns is not None + ): + self.otel.first_response_latencies.record( + op.first_response_latency_ns / NS_TO_MS, labels + ) + # only record completed attempts if there were retries + if op.completed_attempts: + self.otel.retry_count.add(len(op.completed_attempts) - 1, labels) + + def on_attempt_complete( + self, attempt: CompletedAttemptMetric, op: ActiveOperationMetric + ): + """ + Update the metrics associated with a completed attempt: + - attempt_latencies + - server_latencies + - connectivity_error_count + - application_latencies + - throttling_latencies + """ + labels = { + "method": op.op_type.value, + "resource_zone": op.zone or DEFAULT_ZONE, # fallback to default if unset + "resource_cluster": op.cluster_id or DEFAULT_CLUSTER_ID, + **self.shared_labels, + } + status = attempt.end_status.name + is_streaming = str(op.is_streaming) + + self.otel.attempt_latencies.record( + attempt.duration_ns / NS_TO_MS, + {"streaming": is_streaming, "status": status, **labels}, + ) + flow_throttling = ( + op.flow_throttling_time_ns / NS_TO_MS if op.flow_throttling_time_ns else 0 + ) + self.otel.throttling_latencies.record(flow_throttling, labels) + self.otel.application_latencies.record( + (attempt.application_blocking_time_ns + attempt.backoff_before_attempt_ns) + / NS_TO_MS, + labels, + ) + if attempt.gfe_latency_ns is not None: + self.otel.server_latencies.record( + attempt.gfe_latency_ns / NS_TO_MS, + {"streaming": is_streaming, "status": status, **labels}, + ) + else: + # gfe headers not attached. Record a connectivity error. + # TODO: this should not be recorded as an error when direct path is enabled + self.otel.connectivity_error_count.add(1, {"status": status, **labels}) diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/_mutate_rows.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/_mutate_rows.py index c1c508a526f2..40e19dd85847 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/_mutate_rows.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/_mutate_rows.py @@ -25,16 +25,15 @@ import google.cloud.bigtable.data.exceptions as bt_exceptions import google.cloud.bigtable_v2.types.bigtable as types_pb from google.cloud.bigtable.data._cross_sync import CrossSync -from google.cloud.bigtable.data._helpers import ( - _attempt_timeout_generator, - _retry_exception_factory, -) +from google.cloud.bigtable.data._helpers import _attempt_timeout_generator +from google.cloud.bigtable.data._metrics import tracked_retry from google.cloud.bigtable.data.mutations import ( _MUTATE_ROWS_REQUEST_MUTATION_LIMIT, _EntryWithProto, ) if TYPE_CHECKING: + from google.cloud.bigtable.data._metrics import ActiveOperationMetric from google.cloud.bigtable.data._sync_autogen.client import ( _DataApiTarget as TargetType, ) @@ -61,6 +60,8 @@ class _MutateRowsOperation: operation_timeout: the timeout to use for the entire operation, in seconds. attempt_timeout: the timeout to use for each mutate_rows attempt, in seconds. If not specified, the request will run until operation_timeout is reached. + metric: the metric object representing the active operation + retryable_exceptions: a list of exceptions that should be retried """ def __init__( @@ -70,6 +71,7 @@ def __init__( mutation_entries: list["RowMutationEntry"], operation_timeout: float, attempt_timeout: float | None, + metric: ActiveOperationMetric, retryable_exceptions: Sequence[type[Exception]] = (), ): total_mutations = sum((len(entry.mutations) for entry in mutation_entries)) @@ -82,13 +84,12 @@ def __init__( self.is_retryable = retries.if_exception_type( *retryable_exceptions, bt_exceptions._MutateRowsIncomplete ) - sleep_generator = retries.exponential_sleep_generator(0.01, 2, 60) - self._operation = lambda: CrossSync._Sync_Impl.retry_target( - self._run_attempt, - self.is_retryable, - sleep_generator, - operation_timeout, - exception_factory=_retry_exception_factory, + self._operation = lambda: tracked_retry( + retry_fn=CrossSync._Sync_Impl.retry_target, + operation=metric, + target=self._run_attempt, + predicate=self.is_retryable, + timeout=operation_timeout, ) self.timeout_generator = _attempt_timeout_generator( attempt_timeout, operation_timeout @@ -96,37 +97,39 @@ def __init__( self.mutations = [_EntryWithProto(m, m._to_pb()) for m in mutation_entries] self.remaining_indices = list(range(len(self.mutations))) self.errors: dict[int, list[Exception]] = {} + self._operation_metric = metric def start(self): """Start the operation, and run until completion Raises: MutationsExceptionGroup: if any mutations failed""" - try: - self._operation() - except Exception as exc: - incomplete_indices = self.remaining_indices.copy() - for idx in incomplete_indices: - self._handle_entry_error(idx, exc) - finally: - all_errors: list[Exception] = [] - for idx, exc_list in self.errors.items(): - if len(exc_list) == 0: - raise core_exceptions.ClientError( - f"Mutation {idx} failed with no associated errors" + with self._operation_metric: + try: + self._operation() + except Exception as exc: + incomplete_indices = self.remaining_indices.copy() + for idx in incomplete_indices: + self._handle_entry_error(idx, exc) + finally: + all_errors: list[Exception] = [] + for idx, exc_list in self.errors.items(): + if len(exc_list) == 0: + raise core_exceptions.ClientError( + f"Mutation {idx} failed with no associated errors" + ) + elif len(exc_list) == 1: + cause_exc = exc_list[0] + else: + cause_exc = bt_exceptions.RetryExceptionGroup(exc_list) + entry = self.mutations[idx].entry + all_errors.append( + bt_exceptions.FailedMutationEntryError(idx, entry, cause_exc) + ) + if all_errors: + raise bt_exceptions.MutationsExceptionGroup( + all_errors, len(self.mutations) ) - elif len(exc_list) == 1: - cause_exc = exc_list[0] - else: - cause_exc = bt_exceptions.RetryExceptionGroup(exc_list) - entry = self.mutations[idx].entry - all_errors.append( - bt_exceptions.FailedMutationEntryError(idx, entry, cause_exc) - ) - if all_errors: - raise bt_exceptions.MutationsExceptionGroup( - all_errors, len(self.mutations) - ) def _run_attempt(self): """Run a single attempt of the mutate_rows rpc. @@ -135,6 +138,7 @@ def _run_attempt(self): _MutateRowsIncomplete: if there are failed mutations eligible for retry after the attempt is complete GoogleAPICallError: if the gapic rpc fails""" + self._operation_metric.start_attempt() request_entries = [self.mutations[idx].proto for idx in self.remaining_indices] active_request_indices = { req_idx: orig_idx for req_idx, orig_idx in enumerate(self.remaining_indices) diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/_read_rows.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/_read_rows.py index a74374988161..b9c2a4bf8cb6 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/_read_rows.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/_read_rows.py @@ -18,16 +18,15 @@ from __future__ import annotations +import time from typing import TYPE_CHECKING, Sequence from google.api_core import retry as retries -from google.api_core.retry import exponential_sleep_generator +from grpc import StatusCode from google.cloud.bigtable.data._cross_sync import CrossSync -from google.cloud.bigtable.data._helpers import ( - _attempt_timeout_generator, - _retry_exception_factory, -) +from google.cloud.bigtable.data._helpers import _attempt_timeout_generator +from google.cloud.bigtable.data._metrics import tracked_retry from google.cloud.bigtable.data.exceptions import ( InvalidChunk, _ResetRow, @@ -41,6 +40,7 @@ from google.cloud.bigtable_v2.types import RowSet as RowSetPB if TYPE_CHECKING: + from google.cloud.bigtable.data._metrics import ActiveOperationMetric from google.cloud.bigtable.data._sync_autogen.client import ( _DataApiTarget as TargetType, ) @@ -63,6 +63,7 @@ class _ReadRowsOperation: target: The table or view to send the request to operation_timeout: The total time to allow for the operation, in seconds attempt_timeout: The time to allow for each individual attempt, in seconds + metric: the metric object representing the active operation retryable_exceptions: A list of exceptions that should trigger a retry """ @@ -74,6 +75,7 @@ class _ReadRowsOperation: "_predicate", "_last_yielded_row_key", "_remaining_count", + "_operation_metric", ) def __init__( @@ -82,6 +84,7 @@ def __init__( target: TargetType, operation_timeout: float, attempt_timeout: float, + metric: ActiveOperationMetric, retryable_exceptions: Sequence[type[Exception]] = (), ): self.attempt_timeout_gen = _attempt_timeout_generator( @@ -98,18 +101,19 @@ def __init__( self._predicate = retries.if_exception_type(*retryable_exceptions) self._last_yielded_row_key: bytes | None = None self._remaining_count: int | None = self.request.rows_limit or None + self._operation_metric = metric def start_operation(self) -> CrossSync._Sync_Impl.Iterable[Row]: """Start the read_rows operation, retrying on retryable errors. Yields: Row: The next row in the stream""" - return CrossSync._Sync_Impl.retry_target_stream( - self._read_rows_attempt, - self._predicate, - exponential_sleep_generator(0.01, 60, multiplier=2), - self.operation_timeout, - exception_factory=_retry_exception_factory, + return tracked_retry( + retry_fn=CrossSync._Sync_Impl.retry_target_stream, + operation=self._operation_metric, + target=self._read_rows_attempt, + predicate=self._predicate, + timeout=self.operation_timeout, ) def _read_rows_attempt(self) -> CrossSync._Sync_Impl.Iterable[Row]: @@ -120,6 +124,7 @@ def _read_rows_attempt(self) -> CrossSync._Sync_Impl.Iterable[Row]: Yields: Row: The next row in the stream""" + self._operation_metric.start_attempt() if self._last_yielded_row_key is not None: try: self.request.rows = self._revise_request_rowset( @@ -181,9 +186,8 @@ def chunk_stream( raise InvalidChunk("emit count exceeds row limit") current_key = None - @staticmethod def merge_rows( - chunks: CrossSync._Sync_Impl.Iterable[ReadRowsResponsePB.CellChunk] | None, + self, chunks: CrossSync._Sync_Impl.Iterable[ReadRowsResponsePB.CellChunk] | None ) -> CrossSync._Sync_Impl.Iterable[Row]: """Merge chunks into rows @@ -191,94 +195,107 @@ def merge_rows( chunks: the chunk stream to merge Yields: Row: the next row in the stream""" - if chunks is None: - return - it = chunks.__iter__() - while True: - try: - c = it.__next__() - except CrossSync._Sync_Impl.StopIteration: + try: + if chunks is None: + self._operation_metric.end_with_success() return - row_key = c.row_key - if not row_key: - raise InvalidChunk("first row chunk is missing key") - cells = [] - family: str | None = None - qualifier: bytes | None = None - try: - while True: - if c.reset_row: - raise _ResetRow(c) - k = c.row_key - f = c.family_name.value - q = c.qualifier.value if c.HasField("qualifier") else None - if k and k != row_key: - raise InvalidChunk("unexpected new row key") - if f: - family = f - if q is not None: - qualifier = q - else: - raise InvalidChunk("new family without qualifier") - elif family is None: - raise InvalidChunk("missing family") - elif q is not None: - if family is None: - raise InvalidChunk("new qualifier without family") - qualifier = q - elif qualifier is None: - raise InvalidChunk("missing qualifier") - ts = c.timestamp_micros - labels = c.labels if c.labels else [] - value = c.value - if c.value_size > 0: - buffer = [value] - while c.value_size > 0: - c = it.__next__() - t = c.timestamp_micros - cl = c.labels - k = c.row_key - if ( - c.HasField("family_name") - and c.family_name.value != family - ): - raise InvalidChunk("family changed mid cell") - if ( - c.HasField("qualifier") - and c.qualifier.value != qualifier - ): - raise InvalidChunk("qualifier changed mid cell") - if t and t != ts: - raise InvalidChunk("timestamp changed mid cell") - if cl and cl != labels: - raise InvalidChunk("labels changed mid cell") - if k and k != row_key: - raise InvalidChunk("row key changed mid cell") - if c.reset_row: - raise _ResetRow(c) - buffer.append(c.value) - value = b"".join(buffer) - cells.append( - Cell(value, row_key, family, qualifier, ts, list(labels)) - ) - if c.commit_row: - yield Row(row_key, cells) - break + it = chunks.__iter__() + while True: + try: c = it.__next__() - except _ResetRow as e: - c = e.chunk - if ( - c.row_key - or c.HasField("family_name") - or c.HasField("qualifier") - or c.timestamp_micros - or c.labels - or c.value - ): - raise InvalidChunk("reset row with data") - continue - except CrossSync._Sync_Impl.StopIteration: - raise InvalidChunk("premature end of stream") + except CrossSync._Sync_Impl.StopIteration: + self._operation_metric.end_with_success() + return + row_key = c.row_key + if not row_key: + raise InvalidChunk("first row chunk is missing key") + cells = [] + family: str | None = None + qualifier: bytes | None = None + try: + while True: + if c.reset_row: + raise _ResetRow(c) + k = c.row_key + f = c.family_name.value + q = c.qualifier.value if c.HasField("qualifier") else None + if k and k != row_key: + raise InvalidChunk("unexpected new row key") + if f: + family = f + if q is not None: + qualifier = q + else: + raise InvalidChunk("new family without qualifier") + elif family is None: + raise InvalidChunk("missing family") + elif q is not None: + if family is None: + raise InvalidChunk("new qualifier without family") + qualifier = q + elif qualifier is None: + raise InvalidChunk("missing qualifier") + ts = c.timestamp_micros + labels = c.labels if c.labels else [] + value = c.value + if c.value_size > 0: + buffer = [value] + while c.value_size > 0: + c = it.__next__() + t = c.timestamp_micros + cl = c.labels + k = c.row_key + if ( + c.HasField("family_name") + and c.family_name.value != family + ): + raise InvalidChunk("family changed mid cell") + if ( + c.HasField("qualifier") + and c.qualifier.value != qualifier + ): + raise InvalidChunk("qualifier changed mid cell") + if t and t != ts: + raise InvalidChunk("timestamp changed mid cell") + if cl and cl != labels: + raise InvalidChunk("labels changed mid cell") + if k and k != row_key: + raise InvalidChunk("row key changed mid cell") + if c.reset_row: + raise _ResetRow(c) + buffer.append(c.value) + value = b"".join(buffer) + cells.append( + Cell(value, row_key, family, qualifier, ts, list(labels)) + ) + if c.commit_row: + block_time = time.monotonic_ns() + yield Row(row_key, cells) + if self._operation_metric.active_attempt is not None: + self._operation_metric.active_attempt.application_blocking_time_ns += ( + time.monotonic_ns() - block_time + ) + break + c = it.__next__() + except _ResetRow as e: + c = e.chunk + if ( + c.row_key + or c.HasField("family_name") + or c.HasField("qualifier") + or c.timestamp_micros + or c.labels + or c.value + ): + raise InvalidChunk("reset row with data") + continue + except CrossSync._Sync_Impl.StopIteration: + raise InvalidChunk("premature end of stream") + except GeneratorExit as close_exception: + self._operation_metric.end_with_status(StatusCode.CANCELLED) + raise close_exception + except Exception as generic_exception: + raise generic_exception @staticmethod def _revise_request_rowset(row_set: RowSetPB, last_seen_row_key: bytes) -> RowSetPB: diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/client.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/client.py index 9dc118de0289..87bce38672d0 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/client.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/client.py @@ -62,6 +62,10 @@ OperationType, tracked_retry, ) +from google.cloud.bigtable.data._metrics.handlers.gcp_exporter import ( + BigtableMetricsExporter, + GoogleCloudMetricsHandler, +) from google.cloud.bigtable.data._sync_autogen._swappable_channel import ( SwappableChannel as SwappableChannelType, ) @@ -191,6 +195,11 @@ def __init__( f"The configured universe domain ({self.universe_domain}) does not match the universe domain found in the credentials ({self._credentials.universe_domain}). If you haven't configured the universe domain explicitly, `googleapis.com` is the default." ) self._is_closed = CrossSync._Sync_Impl.Event() + self._gcp_metrics_exporter = BigtableMetricsExporter( + project_id=self.project, + credentials=credentials, + client_options=client_options, + ) self.transport = cast(TransportType, self._gapic_client.transport) self._active_instances: Set[_WarmedInstanceKey] = set() self._instance_owners: dict[_WarmedInstanceKey, Set[int]] = {} @@ -837,7 +846,17 @@ def __init__( self.default_retryable_errors: Sequence[type[Exception]] = ( default_retryable_errors or () ) - self._metrics = BigtableClientSideMetricsController() + self._metrics = BigtableClientSideMetricsController( + handlers=[ + GoogleCloudMetricsHandler( + exporter=client._gcp_metrics_exporter, + instance_id=instance_id, + table_id=table_id, + app_profile_id=app_profile_id, + client_version=client._client_version(), + ) + ] + ) try: self._register_instance_future = CrossSync._Sync_Impl.create_task( self.client._register_instance, @@ -906,6 +925,9 @@ def read_rows_stream( self, operation_timeout=operation_timeout, attempt_timeout=attempt_timeout, + metric=self._metrics.create_operation( + OperationType.READ_ROWS, is_streaming=True + ), retryable_exceptions=retryable_excs, ) return row_merger.start_operation() @@ -992,15 +1014,26 @@ def read_row( if row_key is None: raise ValueError("row_key must be string or bytes") query = ReadRowsQuery(row_keys=row_key, row_filter=row_filter, limit=1) - results = self.read_rows( + operation_timeout, attempt_timeout = _get_timeouts( + operation_timeout, attempt_timeout, self + ) + retryable_excs = _get_retryable_errors(retryable_errors, self) + row_merger = CrossSync._Sync_Impl._ReadRowsOperation( query, + self, operation_timeout=operation_timeout, attempt_timeout=attempt_timeout, - retryable_errors=retryable_errors, + metric=self._metrics.create_operation( + OperationType.READ_ROWS, is_streaming=False + ), + retryable_exceptions=retryable_excs, ) - if len(results) == 0: + results_generator = row_merger.start_operation() + try: + results = [a for a in results_generator] + return results[0] + except IndexError: return None - return results[0] def read_rows_sharded( self, @@ -1122,19 +1155,17 @@ def row_exists( will be chained with a RetryExceptionGroup containing GoogleAPIError exceptions from any retries that failed google.api_core.exceptions.GoogleAPIError: raised if the request encounters an unrecoverable error""" - if row_key is None: - raise ValueError("row_key must be string or bytes") strip_filter = StripValueTransformerFilter(flag=True) limit_filter = CellsRowLimitFilter(1) chain_filter = RowFilterChain(filters=[limit_filter, strip_filter]) - query = ReadRowsQuery(row_keys=row_key, limit=1, row_filter=chain_filter) - results = self.read_rows( - query, + result = self.read_row( + row_key=row_key, + row_filter=chain_filter, operation_timeout=operation_timeout, attempt_timeout=attempt_timeout, retryable_errors=retryable_errors, ) - return len(results) > 0 + return result is not None def sample_row_keys( self, @@ -1372,6 +1403,7 @@ def bulk_mutate_rows( mutation_entries, operation_timeout, attempt_timeout, + metric=self._metrics.create_operation(OperationType.BULK_MUTATE_ROWS), retryable_exceptions=retryable_excs, ) operation.start() diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/mutations_batcher.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/mutations_batcher.py index 5be449a49d4a..107c2cbf591b 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/mutations_batcher.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/_sync_autogen/mutations_batcher.py @@ -19,6 +19,7 @@ import atexit import concurrent.futures +import time import warnings from collections import deque from typing import TYPE_CHECKING, Sequence, cast @@ -29,6 +30,7 @@ _get_retryable_errors, _get_timeouts, ) +from google.cloud.bigtable.data._metrics import ActiveOperationMetric, OperationType from google.cloud.bigtable.data.exceptions import ( FailedMutationEntryError, MutationsExceptionGroup, @@ -39,6 +41,7 @@ ) if TYPE_CHECKING: + from google.cloud.bigtable.data._metrics import BigtableClientSideMetricsController from google.cloud.bigtable.data._sync_autogen.client import ( _DataApiTarget as TargetType, ) @@ -154,6 +157,22 @@ def add_to_flow(self, mutations: RowMutationEntry | list[RowMutationEntry]): ) yield mutations[start_idx:end_idx] + def add_to_flow_with_metrics( + self, + mutations: RowMutationEntry | list[RowMutationEntry], + metrics_controller: BigtableClientSideMetricsController, + ): + inner_generator = self.add_to_flow(mutations) + while True: + metric = metrics_controller.create_operation(OperationType.BULK_MUTATE_ROWS) + flow_start_time = time.monotonic_ns() + try: + value = inner_generator.__next__() + except CrossSync._Sync_Impl.StopIteration: + return + metric.flow_throttling_time_ns = time.monotonic_ns() - flow_start_time + yield (value, metric) + class MutationsBatcher: """ @@ -309,9 +328,14 @@ def _flush_internal(self, new_entries: list[RowMutationEntry]): in_process_requests: list[ CrossSync._Sync_Impl.Future[list[FailedMutationEntryError]] ] = [] - for batch in self._flow_control.add_to_flow(new_entries): + for batch, metric in self._flow_control.add_to_flow_with_metrics( + new_entries, self._target._metrics + ): batch_task = CrossSync._Sync_Impl.create_task( - self._execute_mutate_rows, batch, sync_executor=self._sync_rpc_executor + self._execute_mutate_rows, + batch, + metric, + sync_executor=self._sync_rpc_executor, ) in_process_requests.append(batch_task) found_exceptions = self._wait_for_batch_results(*in_process_requests) @@ -319,7 +343,7 @@ def _flush_internal(self, new_entries: list[RowMutationEntry]): self._add_exceptions(found_exceptions) def _execute_mutate_rows( - self, batch: list[RowMutationEntry] + self, batch: list[RowMutationEntry], metric: ActiveOperationMetric ) -> list[FailedMutationEntryError]: """Helper to execute mutation operation on a batch @@ -338,6 +362,7 @@ def _execute_mutate_rows( batch, operation_timeout=self._operation_timeout, attempt_timeout=self._attempt_timeout, + metric=metric, retryable_exceptions=self._retryable_errors, ) operation.start() diff --git a/packages/google-cloud-bigtable/noxfile.py b/packages/google-cloud-bigtable/noxfile.py index 2a1c8d05a876..3e8793569fb5 100644 --- a/packages/google-cloud-bigtable/noxfile.py +++ b/packages/google-cloud-bigtable/noxfile.py @@ -59,6 +59,7 @@ ] SYSTEM_TEST_EXTERNAL_DEPENDENCIES: List[str] = [ "pytest-asyncio==0.21.2", + "pytest-order==1.3.0", BLACK_VERSION, "pyyaml==6.0.2", ] diff --git a/packages/google-cloud-bigtable/setup.py b/packages/google-cloud-bigtable/setup.py index 80b0380b2b96..d526143f9132 100644 --- a/packages/google-cloud-bigtable/setup.py +++ b/packages/google-cloud-bigtable/setup.py @@ -40,6 +40,7 @@ dependencies = [ "google-api-core[grpc] >= 2.11.0, <3.0.0", + "google-cloud-monitoring >= 2.0.0, <3.0.0dev", # Exclude incompatible versions of `google-auth` # See https://github.com/googleapis/google-cloud-python/issues/12364 "google-auth >= 2.14.1, <3.0.0,!=2.24.0,!=2.25.0", @@ -51,6 +52,9 @@ "google-cloud-core >= 1.4.4, <3.0.0", "grpc-google-iam-v1 >= 0.12.4, <1.0.0", "google-crc32c>=1.5.0, <2.0.0dev", + "googleapis-common-protos[grpc] >= 1.57.0, <2.0.0dev", + "opentelemetry-api >= 1.0.0, <2.0.0dev", + "opentelemetry-sdk >= 1.0.0, <2.0.0dev", ] extras = { "libcst": "libcst >= 0.2.5", diff --git a/packages/google-cloud-bigtable/tests/system/data/__init__.py b/packages/google-cloud-bigtable/tests/system/data/__init__.py index 939955635979..0702dfc1610d 100644 --- a/packages/google-cloud-bigtable/tests/system/data/__init__.py +++ b/packages/google-cloud-bigtable/tests/system/data/__init__.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import datetime import os import uuid @@ -35,9 +36,12 @@ class SystemTestRunner: """ @pytest.fixture(scope="session") - def init_table_id(self): + def init_table_id(self, start_timestamp): """ The table_id to use when creating a new test table + + Args + start_timestamp: accessed when building table to ensure timestamp data is loaded before tests are run """ return f"test-table-{uuid.uuid4().hex}" @@ -75,6 +79,13 @@ def column_family_config(self): ), } + @pytest.fixture(scope="session") + def start_timestamp(self): + """ + A timestamp taken before any tests are run. Used to fetch back metrics relevant to the tests + """ + return datetime.datetime.now(datetime.timezone.utc) + @pytest.fixture(scope="session") def admin_client(self): """ diff --git a/packages/google-cloud-bigtable/tests/system/data/test_metrics_async.py b/packages/google-cloud-bigtable/tests/system/data/test_metrics_async.py index 48678160e867..53eb57e08b5f 100644 --- a/packages/google-cloud-bigtable/tests/system/data/test_metrics_async.py +++ b/packages/google-cloud-bigtable/tests/system/data/test_metrics_async.py @@ -26,6 +26,7 @@ CompletedOperationMetric, ) from google.cloud.bigtable.data._metrics.handlers._base import MetricsHandler +from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery from google.cloud.bigtable_v2.types import ResponseParams from . import TEST_FAMILY, SystemTestRunner @@ -211,7 +212,8 @@ async def temp_rows(self, table): @CrossSync.pytest_fixture(scope="session") async def table(self, client, table_id, instance_id, handler): async with client.get_table(instance_id, table_id) as table: - table._metrics.add_handler(handler) + # override handlers with custom test object + table._metrics.handlers = [handler] yield table @CrossSync.convert @@ -225,6 +227,1220 @@ async def authorized_view( table._metrics.add_handler(handler) yield table + @CrossSync.pytest + async def test_read_rows(self, table, temp_rows, handler, cluster_config): + await temp_rows.add_row(b"row_key_1") + await temp_rows.add_row(b"row_key_2") + handler.clear() + row_list = await table.read_rows(ReadRowsQuery()) + assert len(row_list) == 2 + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.value[0] == 0 + assert operation.is_streaming is True + assert operation.op_type.value == "ReadRows" + assert len(operation.completed_attempts) == 1 + assert operation.completed_attempts[0] == handler.completed_attempts[0] + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + assert operation.duration_ns > 0 and operation.duration_ns < 1e9 + assert ( + operation.first_response_latency_ns is not None + and operation.first_response_latency_ns < operation.duration_ns + ) + assert operation.flow_throttling_time_ns == 0 + # validate attempt + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.duration_ns > 0 and attempt.duration_ns < operation.duration_ns + assert attempt.end_status.value[0] == 0 + assert attempt.backoff_before_attempt_ns == 0 + assert ( + attempt.gfe_latency_ns > 0 and attempt.gfe_latency_ns < attempt.duration_ns + ) + assert ( + attempt.application_blocking_time_ns > 0 + and attempt.application_blocking_time_ns < operation.duration_ns + ) + + @CrossSync.pytest + async def test_read_rows_failure_with_retries( + self, table, temp_rows, handler, error_injector + ): + """ + Test failure in grpc layer by injecting errors into an interceptor + with retryable errors, then a terminal one + """ + await temp_rows.add_row(b"row_key_1") + handler.clear() + expected_zone = "my_zone" + expected_cluster = "my_cluster" + num_retryable = 2 + for i in range(num_retryable): + error_injector.push( + self._make_exception(StatusCode.ABORTED, cluster_id=expected_cluster) + ) + error_injector.push( + self._make_exception(StatusCode.PERMISSION_DENIED, zone_id=expected_zone) + ) + with pytest.raises(PermissionDenied): + await table.read_rows(ReadRowsQuery(), retryable_errors=[Aborted]) + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == num_retryable + 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == num_retryable + 1 + assert operation.cluster_id == expected_cluster + assert operation.zone == expected_zone + # validate attempts + for i in range(num_retryable): + attempt = handler.completed_attempts[i] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "ABORTED" + assert attempt.gfe_latency_ns is None + final_attempt = handler.completed_attempts[num_retryable] + assert isinstance(final_attempt, CompletedAttemptMetric) + assert final_attempt.end_status.name == "PERMISSION_DENIED" + assert final_attempt.gfe_latency_ns is None + + @CrossSync.pytest + async def test_read_rows_failure_timeout(self, table, temp_rows, handler): + """ + Test failure in gapic layer by passing very low timeout + + No grpc headers expected + """ + await temp_rows.add_row(b"row_key_1") + handler.clear() + with pytest.raises(GoogleAPICallError): + await table.read_rows(ReadRowsQuery(), operation_timeout=0.001) + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == "" + assert operation.zone == "global" + # validate attempt + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "DEADLINE_EXCEEDED" + assert attempt.gfe_latency_ns is None + + @CrossSync.pytest + async def test_read_rows_failure_unauthorized( + self, handler, authorized_view, cluster_config + ): + """ + Test failure in backend by accessing an unauthorized family + """ + from google.cloud.bigtable.data.row_filters import FamilyNameRegexFilter + + with pytest.raises(GoogleAPICallError) as e: + await authorized_view.read_rows( + ReadRowsQuery(row_filter=FamilyNameRegexFilter("unauthorized")) + ) + assert e.value.grpc_status_code.name == "PERMISSION_DENIED" + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + # validate attempt + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "PERMISSION_DENIED" + assert ( + attempt.gfe_latency_ns >= 0 + and attempt.gfe_latency_ns < operation.duration_ns + ) + + @CrossSync.pytest + async def test_read_rows_stream(self, table, temp_rows, handler, cluster_config): + await temp_rows.add_row(b"row_key_1") + await temp_rows.add_row(b"row_key_2") + handler.clear() + # full table scan + generator = await table.read_rows_stream(ReadRowsQuery()) + row_list = [r async for r in generator] + assert len(row_list) == 2 + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.value[0] == 0 + assert operation.is_streaming is True + assert operation.op_type.value == "ReadRows" + assert len(operation.completed_attempts) == 1 + assert operation.completed_attempts[0] == handler.completed_attempts[0] + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + assert operation.duration_ns > 0 and operation.duration_ns < 1e9 + assert ( + operation.first_response_latency_ns is not None + and operation.first_response_latency_ns < operation.duration_ns + ) + assert operation.flow_throttling_time_ns == 0 + # validate attempt + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.duration_ns > 0 and attempt.duration_ns < operation.duration_ns + assert attempt.end_status.value[0] == 0 + assert attempt.backoff_before_attempt_ns == 0 + assert ( + attempt.gfe_latency_ns > 0 and attempt.gfe_latency_ns < attempt.duration_ns + ) + assert ( + attempt.application_blocking_time_ns > 0 + and attempt.application_blocking_time_ns < operation.duration_ns + ) + + @CrossSync.pytest + @CrossSync.convert(replace_symbols={"__anext__": "__next__", "aclose": "close"}) + async def test_read_rows_stream_failure_closed( + self, table, temp_rows, handler, error_injector + ): + """ + Test how metrics collection handles closed generator + """ + await temp_rows.add_row(b"row_key_1") + await temp_rows.add_row(b"row_key_2") + handler.clear() + generator = await table.read_rows_stream(ReadRowsQuery()) + await generator.__anext__() + await generator.aclose() + with pytest.raises(CrossSync.StopIteration): + await generator.__anext__() + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert operation.final_status.name == "CANCELLED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == "" + assert operation.zone == "global" + # validate attempt + attempt = handler.completed_attempts[0] + assert attempt.end_status.name == "CANCELLED" + assert attempt.gfe_latency_ns is None + + @CrossSync.pytest + async def test_read_rows_stream_failure_with_retries( + self, table, temp_rows, handler, error_injector + ): + """ + Test failure in grpc layer by injecting errors into an interceptor + with retryable errors, then a terminal one + """ + await temp_rows.add_row(b"row_key_1") + handler.clear() + expected_zone = "my_zone" + expected_cluster = "my_cluster" + num_retryable = 2 + for i in range(num_retryable): + error_injector.push( + self._make_exception(StatusCode.ABORTED, cluster_id=expected_cluster) + ) + error_injector.push( + self._make_exception(StatusCode.PERMISSION_DENIED, zone_id=expected_zone) + ) + generator = await table.read_rows_stream( + ReadRowsQuery(), retryable_errors=[Aborted] + ) + with pytest.raises(PermissionDenied): + [_ async for _ in generator] + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == num_retryable + 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == num_retryable + 1 + assert operation.cluster_id == expected_cluster + assert operation.zone == expected_zone + # validate attempts + for i in range(num_retryable): + attempt = handler.completed_attempts[i] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "ABORTED" + assert attempt.gfe_latency_ns is None + final_attempt = handler.completed_attempts[num_retryable] + assert isinstance(final_attempt, CompletedAttemptMetric) + assert final_attempt.end_status.name == "PERMISSION_DENIED" + assert final_attempt.gfe_latency_ns is None + + @CrossSync.pytest + async def test_read_rows_stream_failure_timeout(self, table, temp_rows, handler): + """ + Test failure in gapic layer by passing very low timeout + + No grpc headers expected + """ + await temp_rows.add_row(b"row_key_1") + handler.clear() + generator = await table.read_rows_stream( + ReadRowsQuery(), operation_timeout=0.001 + ) + with pytest.raises(GoogleAPICallError): + [_ async for _ in generator] + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == "" + assert operation.zone == "global" + # validate attempt + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "DEADLINE_EXCEEDED" + assert attempt.gfe_latency_ns is None + + @CrossSync.pytest + async def test_read_rows_stream_failure_unauthorized( + self, handler, authorized_view, cluster_config + ): + """ + Test failure in backend by accessing an unauthorized family + """ + from google.cloud.bigtable.data.row_filters import FamilyNameRegexFilter + + with pytest.raises(GoogleAPICallError) as e: + generator = await authorized_view.read_rows_stream( + ReadRowsQuery(row_filter=FamilyNameRegexFilter("unauthorized")) + ) + [_ async for _ in generator] + assert e.value.grpc_status_code.name == "PERMISSION_DENIED" + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + # validate attempt + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "PERMISSION_DENIED" + assert ( + attempt.gfe_latency_ns >= 0 + and attempt.gfe_latency_ns < operation.duration_ns + ) + + @CrossSync.pytest + async def test_read_rows_stream_failure_unauthorized_with_retries( + self, handler, authorized_view, cluster_config + ): + """ + retry unauthorized request multiple times before timing out + """ + from google.cloud.bigtable.data.row_filters import FamilyNameRegexFilter + + with pytest.raises(GoogleAPICallError) as e: + generator = await authorized_view.read_rows_stream( + ReadRowsQuery(row_filter=FamilyNameRegexFilter("unauthorized")), + retryable_errors=[PermissionDenied], + operation_timeout=0.5, + ) + [_ async for _ in generator] + assert e.value.grpc_status_code.name == "DEADLINE_EXCEEDED" + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) > 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) > 1 + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + # validate attempts + for attempt in handler.completed_attempts: + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name in ["PERMISSION_DENIED", "DEADLINE_EXCEEDED"] + + @CrossSync.pytest + async def test_read_rows_stream_failure_mid_stream( + self, table, temp_rows, handler, error_injector + ): + """ + Test failure in grpc stream + """ + await temp_rows.add_row(b"row_key_1") + handler.clear() + error_injector.fail_mid_stream = True + error_injector.push(self._make_exception(StatusCode.ABORTED)) + error_injector.push(self._make_exception(StatusCode.PERMISSION_DENIED)) + generator = await table.read_rows_stream( + ReadRowsQuery(), retryable_errors=[Aborted] + ) + with pytest.raises(PermissionDenied): + [_ async for _ in generator] + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 2 + # validate operation + operation = handler.completed_operations[0] + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == 2 + # validate retried attempt + attempt = handler.completed_attempts[0] + assert attempt.end_status.name == "ABORTED" + # validate final attempt + final_attempt = handler.completed_attempts[-1] + assert final_attempt.end_status.name == "PERMISSION_DENIED" + + @CrossSync.pytest + async def test_read_row(self, table, temp_rows, handler, cluster_config): + await temp_rows.add_row(b"row_key_1") + handler.clear() + await table.read_row(b"row_key_1") + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.value[0] == 0 + assert operation.is_streaming is False + assert operation.op_type.value == "ReadRows" + assert len(operation.completed_attempts) == 1 + assert operation.completed_attempts[0] == handler.completed_attempts[0] + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + assert operation.duration_ns > 0 and operation.duration_ns < 1e9 + assert ( + operation.first_response_latency_ns > 0 + and operation.first_response_latency_ns < operation.duration_ns + ) + assert operation.flow_throttling_time_ns == 0 + # validate attempt + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.duration_ns > 0 and attempt.duration_ns < operation.duration_ns + assert attempt.end_status.value[0] == 0 + assert attempt.backoff_before_attempt_ns == 0 + assert ( + attempt.gfe_latency_ns > 0 and attempt.gfe_latency_ns < attempt.duration_ns + ) + assert ( + attempt.application_blocking_time_ns > 0 + and attempt.application_blocking_time_ns < operation.duration_ns + ) + + @CrossSync.pytest + async def test_read_row_failure_with_retries( + self, table, temp_rows, handler, error_injector + ): + """ + Test failure in grpc layer by injecting errors into an interceptor + with retryable errors, then a terminal one + """ + await temp_rows.add_row(b"row_key_1") + handler.clear() + expected_zone = "my_zone" + expected_cluster = "my_cluster" + num_retryable = 2 + for i in range(num_retryable): + error_injector.push( + self._make_exception(StatusCode.ABORTED, cluster_id=expected_cluster) + ) + error_injector.push( + self._make_exception(StatusCode.PERMISSION_DENIED, zone_id=expected_zone) + ) + with pytest.raises(PermissionDenied): + await table.read_row(b"row_key_1", retryable_errors=[Aborted]) + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == num_retryable + 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == num_retryable + 1 + assert operation.cluster_id == expected_cluster + assert operation.zone == expected_zone + # validate attempts + for i in range(num_retryable): + attempt = handler.completed_attempts[i] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "ABORTED" + assert attempt.gfe_latency_ns is None + final_attempt = handler.completed_attempts[num_retryable] + assert isinstance(final_attempt, CompletedAttemptMetric) + assert final_attempt.end_status.name == "PERMISSION_DENIED" + assert final_attempt.gfe_latency_ns is None + + @CrossSync.pytest + async def test_read_row_failure_timeout(self, table, temp_rows, handler): + """ + Test failure in gapic layer by passing very low timeout + + No grpc headers expected + """ + await temp_rows.add_row(b"row_key_1") + handler.clear() + with pytest.raises(GoogleAPICallError): + await table.read_row(b"row_key_1", operation_timeout=0.001) + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == "" + assert operation.zone == "global" + # validate attempt + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "DEADLINE_EXCEEDED" + assert attempt.gfe_latency_ns is None + + @CrossSync.pytest + async def test_read_row_failure_unauthorized( + self, handler, authorized_view, cluster_config + ): + """ + Test failure in backend by accessing an unauthorized family + """ + from google.cloud.bigtable.data.row_filters import FamilyNameRegexFilter + + with pytest.raises(GoogleAPICallError) as e: + await authorized_view.read_row( + b"any_row", row_filter=FamilyNameRegexFilter("unauthorized") + ) + assert e.value.grpc_status_code.name == "PERMISSION_DENIED" + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + # validate attempt + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "PERMISSION_DENIED" + assert ( + attempt.gfe_latency_ns >= 0 + and attempt.gfe_latency_ns < operation.duration_ns + ) + + @CrossSync.pytest + async def test_read_rows_sharded(self, table, temp_rows, handler, cluster_config): + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery + + await temp_rows.add_row(b"a") + await temp_rows.add_row(b"b") + await temp_rows.add_row(b"c") + await temp_rows.add_row(b"d") + query1 = ReadRowsQuery(row_keys=[b"a", b"c"]) + query2 = ReadRowsQuery(row_keys=[b"b", b"d"]) + handler.clear() + row_list = await table.read_rows_sharded([query1, query2]) + assert len(row_list) == 4 + # validate counts + assert len(handler.completed_operations) == 2 + assert len(handler.completed_attempts) == 2 + # validate operations + for operation in handler.completed_operations: + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.value[0] == 0 + assert operation.is_streaming is True + assert operation.op_type.value == "ReadRows" + assert len(operation.completed_attempts) == 1 + attempt = operation.completed_attempts[0] + assert attempt in handler.completed_attempts + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + assert operation.duration_ns > 0 and operation.duration_ns < 1e9 + assert ( + operation.first_response_latency_ns is not None + and operation.first_response_latency_ns < operation.duration_ns + ) + assert operation.flow_throttling_time_ns == 0 + # validate attempt + assert isinstance(attempt, CompletedAttemptMetric) + assert ( + attempt.duration_ns > 0 and attempt.duration_ns < operation.duration_ns + ) + assert attempt.end_status.value[0] == 0 + assert attempt.backoff_before_attempt_ns == 0 + assert ( + attempt.gfe_latency_ns > 0 + and attempt.gfe_latency_ns < attempt.duration_ns + ) + assert ( + attempt.application_blocking_time_ns > 0 + and attempt.application_blocking_time_ns < operation.duration_ns + ) + + @CrossSync.pytest + async def test_read_rows_sharded_failure_with_retries( + self, table, temp_rows, handler, error_injector + ): + """ + Test failure in grpc layer by injecting errors into an interceptor + with retryable errors + """ + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery + + await temp_rows.add_row(b"a") + await temp_rows.add_row(b"b") + query1 = ReadRowsQuery(row_keys=[b"a"]) + query2 = ReadRowsQuery(row_keys=[b"b"]) + handler.clear() + + error_injector.push(self._make_exception(StatusCode.ABORTED)) + await table.read_rows_sharded([query1, query2], retryable_errors=[Aborted]) + + assert len(handler.completed_operations) == 2 + assert len(handler.completed_attempts) == 3 + # validate operations + for op in handler.completed_operations: + assert op.final_status.name == "OK" + assert op.op_type.value == "ReadRows" + assert op.is_streaming is True + # validate attempts + assert ( + len([a for a in handler.completed_attempts if a.end_status.name == "OK"]) + == 2 + ) + assert ( + len( + [ + a + for a in handler.completed_attempts + if a.end_status.name == "ABORTED" + ] + ) + == 1 + ) + + @CrossSync.pytest + async def test_read_rows_sharded_failure_timeout(self, table, temp_rows, handler): + """ + Test failure in gapic layer by passing very low timeout + + No grpc headers expected + """ + from google.api_core.exceptions import DeadlineExceeded + + from google.cloud.bigtable.data.exceptions import ShardedReadRowsExceptionGroup + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery + + await temp_rows.add_row(b"a") + await temp_rows.add_row(b"b") + query1 = ReadRowsQuery(row_keys=[b"a"]) + query2 = ReadRowsQuery(row_keys=[b"b"]) + handler.clear() + with pytest.raises(ShardedReadRowsExceptionGroup) as e: + await table.read_rows_sharded([query1, query2], operation_timeout=0.005) + assert len(e.value.exceptions) == 2 + for sub_exc in e.value.exceptions: + assert isinstance(sub_exc.__cause__, DeadlineExceeded) + # both shards should fail + assert len(handler.completed_operations) == 2 + assert len(handler.completed_attempts) == 2 + # validate operations + for operation in handler.completed_operations: + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == "" + assert operation.zone == "global" + # validate attempt + attempt = operation.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "DEADLINE_EXCEEDED" + assert attempt.gfe_latency_ns is None + + @CrossSync.pytest + async def test_read_rows_sharded_failure_unauthorized( + self, handler, authorized_view, cluster_config + ): + """ + Test failure in backend by accessing an unauthorized family + """ + from google.cloud.bigtable.data.exceptions import ShardedReadRowsExceptionGroup + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery + from google.cloud.bigtable.data.row_filters import FamilyNameRegexFilter + + query1 = ReadRowsQuery(row_filter=FamilyNameRegexFilter("unauthorized")) + query2 = ReadRowsQuery(row_filter=FamilyNameRegexFilter(TEST_FAMILY)) + handler.clear() + with pytest.raises(ShardedReadRowsExceptionGroup) as e: + await authorized_view.read_rows_sharded([query1, query2]) + assert len(e.value.exceptions) == 1 + assert isinstance(e.value.exceptions[0].__cause__, GoogleAPICallError) + assert ( + e.value.exceptions[0].__cause__.grpc_status_code.name == "PERMISSION_DENIED" + ) + # one shard will fail, the other will succeed + assert len(handler.completed_operations) == 2 + assert len(handler.completed_attempts) == 2 + # sort operations by status + failed_op = next( + op for op in handler.completed_operations if op.final_status.name != "OK" + ) + success_op = next( + op for op in handler.completed_operations if op.final_status.name == "OK" + ) + # validate failed operation + assert failed_op.final_status.name == "PERMISSION_DENIED" + assert failed_op.op_type.value == "ReadRows" + assert failed_op.is_streaming is True + assert len(failed_op.completed_attempts) == 1 + assert failed_op.cluster_id == next(iter(cluster_config.keys())) + assert ( + failed_op.zone + == cluster_config[failed_op.cluster_id].location.split("/")[-1] + ) + # validate failed attempt + failed_attempt = failed_op.completed_attempts[0] + assert failed_attempt.end_status.name == "PERMISSION_DENIED" + assert ( + failed_attempt.gfe_latency_ns >= 0 + and failed_attempt.gfe_latency_ns < failed_op.duration_ns + ) + # validate successful operation + assert success_op.final_status.name == "OK" + assert success_op.op_type.value == "ReadRows" + assert success_op.is_streaming is True + assert len(success_op.completed_attempts) == 1 + # validate successful attempt + success_attempt = success_op.completed_attempts[0] + assert success_attempt.end_status.name == "OK" + + @CrossSync.pytest + async def test_read_rows_sharded_failure_mid_stream( + self, table, temp_rows, handler, error_injector + ): + """ + Test failure in grpc stream + """ + from google.cloud.bigtable.data.exceptions import ShardedReadRowsExceptionGroup + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery + + await temp_rows.add_row(b"a") + await temp_rows.add_row(b"b") + query1 = ReadRowsQuery(row_keys=[b"a"]) + query2 = ReadRowsQuery(row_keys=[b"b"]) + handler.clear() + error_injector.fail_mid_stream = True + error_injector.push(self._make_exception(StatusCode.ABORTED)) + error_injector.push(self._make_exception(StatusCode.PERMISSION_DENIED)) + with pytest.raises(ShardedReadRowsExceptionGroup) as e: + await table.read_rows_sharded([query1, query2], retryable_errors=[Aborted]) + assert len(e.value.exceptions) == 1 + assert isinstance(e.value.exceptions[0].__cause__, PermissionDenied) + # one shard will fail, the other will succeed + # the failing shard will have one retry + assert len(handler.completed_operations) == 2 + assert len(handler.completed_attempts) == 3 + # sort operations by status + failed_op = next( + op for op in handler.completed_operations if op.final_status.name != "OK" + ) + success_op = next( + op for op in handler.completed_operations if op.final_status.name == "OK" + ) + # validate failed operation + assert failed_op.final_status.name == "PERMISSION_DENIED" + assert failed_op.op_type.value == "ReadRows" + assert failed_op.is_streaming is True + assert len(failed_op.completed_attempts) == 1 + # validate successful operation + assert success_op.final_status.name == "OK" + assert len(success_op.completed_attempts) == 2 + # validate failed attempt + attempt = failed_op.completed_attempts[0] + assert attempt.end_status.name == "PERMISSION_DENIED" + # validate retried attempt + retried_attempt = success_op.completed_attempts[0] + assert retried_attempt.end_status.name == "ABORTED" + # validate successful attempt + success_attempt = success_op.completed_attempts[-1] + assert success_attempt.end_status.name == "OK" + + @CrossSync.pytest + async def test_bulk_mutate_rows(self, table, temp_rows, handler, cluster_config): + from google.cloud.bigtable.data.mutations import RowMutationEntry + + new_value = uuid.uuid4().hex.encode() + row_key, mutation = await temp_rows.create_row_and_mutation( + table, new_value=new_value + ) + bulk_mutation = RowMutationEntry(row_key, [mutation]) + + handler.clear() + await table.bulk_mutate_rows([bulk_mutation]) + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.value[0] == 0 + assert operation.is_streaming is False + assert operation.op_type.value == "MutateRows" + assert len(operation.completed_attempts) == 1 + assert operation.completed_attempts[0] == handler.completed_attempts[0] + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + assert operation.duration_ns > 0 and operation.duration_ns < 1e9 + assert ( + operation.first_response_latency_ns is None + ) # populated for read_rows only + assert operation.flow_throttling_time_ns == 0 + # validate attempt + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.duration_ns > 0 and attempt.duration_ns < operation.duration_ns + assert attempt.end_status.value[0] == 0 + assert attempt.backoff_before_attempt_ns == 0 + assert ( + attempt.gfe_latency_ns > 0 and attempt.gfe_latency_ns < attempt.duration_ns + ) + assert attempt.application_blocking_time_ns == 0 + + @CrossSync.pytest + async def test_bulk_mutate_rows_failure_with_retries( + self, table, temp_rows, handler, error_injector + ): + """ + Test failure in grpc layer by injecting errors into an interceptor + with retryable errors, then a terminal one + """ + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.mutations import RowMutationEntry, SetCell + + row_key = b"row_key_1" + mutation = SetCell(TEST_FAMILY, b"q", b"v") + entry = RowMutationEntry(row_key, [mutation]) + assert entry.is_idempotent() + + handler.clear() + expected_zone = "my_zone" + expected_cluster = "my_cluster" + num_retryable = 2 + for i in range(num_retryable): + error_injector.push( + self._make_exception(StatusCode.ABORTED, cluster_id=expected_cluster) + ) + error_injector.push( + self._make_exception(StatusCode.PERMISSION_DENIED, zone_id=expected_zone) + ) + with pytest.raises(MutationsExceptionGroup): + await table.bulk_mutate_rows([entry], retryable_errors=[Aborted]) + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == num_retryable + 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "MutateRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == num_retryable + 1 + assert operation.cluster_id == expected_cluster + assert operation.zone == expected_zone + # validate attempts + for i in range(num_retryable): + attempt = handler.completed_attempts[i] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "ABORTED" + assert attempt.gfe_latency_ns is None + final_attempt = handler.completed_attempts[num_retryable] + assert isinstance(final_attempt, CompletedAttemptMetric) + assert final_attempt.end_status.name == "PERMISSION_DENIED" + assert final_attempt.gfe_latency_ns is None + + @CrossSync.pytest + async def test_bulk_mutate_rows_failure_timeout(self, table, temp_rows, handler): + """ + Test failure in gapic layer by passing very low timeout + + No grpc headers expected + """ + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.mutations import RowMutationEntry, SetCell + + row_key = b"row_key_1" + mutation = SetCell(TEST_FAMILY, b"q", b"v") + entry = RowMutationEntry(row_key, [mutation]) + + handler.clear() + with pytest.raises(MutationsExceptionGroup): + await table.bulk_mutate_rows([entry], operation_timeout=0.001) + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "MutateRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == "" + assert operation.zone == "global" + # validate attempt + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "DEADLINE_EXCEEDED" + assert attempt.gfe_latency_ns is None + + @CrossSync.pytest + async def test_bulk_mutate_rows_failure_unauthorized( + self, handler, authorized_view, cluster_config + ): + """ + Test failure in backend by accessing an unauthorized family + """ + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.mutations import RowMutationEntry, SetCell + + row_key = b"row_key_1" + mutation = SetCell("unauthorized", b"q", b"v") + entry = RowMutationEntry(row_key, [mutation]) + + handler.clear() + with pytest.raises(MutationsExceptionGroup): + await authorized_view.bulk_mutate_rows([entry]) + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "MutateRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + # validate attempt + attempt = handler.completed_attempts[0] + assert attempt.end_status.name == "PERMISSION_DENIED" + assert ( + attempt.gfe_latency_ns >= 0 + and attempt.gfe_latency_ns < operation.duration_ns + ) + + @CrossSync.pytest + async def test_bulk_mutate_rows_failure_unauthorized_with_retries( + self, handler, authorized_view, cluster_config + ): + """ + retry unauthorized request multiple times before timing out + + For bulk_mutate, the rpc returns success, with failures returned in the response. + For this reason, We expect the attempts to be marked as successful, even though + the underlying mutation is retried + """ + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.mutations import RowMutationEntry, SetCell + + row_key = b"row_key_1" + mutation = SetCell("unauthorized", b"q", b"v") + entry = RowMutationEntry(row_key, [mutation]) + + handler.clear() + with pytest.raises(MutationsExceptionGroup) as e: + await authorized_view.bulk_mutate_rows( + [entry], retryable_errors=[PermissionDenied], operation_timeout=0.5 + ) + assert len(e.value.exceptions) == 1 + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) > 1 + # validate operation + operation = handler.completed_operations[0] + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "MutateRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) > 1 + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + # validate attempts + for attempt in handler.completed_attempts: + assert attempt.end_status.name in ["OK", "DEADLINE_EXCEEDED"] + + @CrossSync.pytest + async def test_mutate_rows_batcher(self, table, temp_rows, handler, cluster_config): + from google.cloud.bigtable.data.mutations import RowMutationEntry + + new_value, new_value2 = [uuid.uuid4().hex.encode() for _ in range(2)] + row_key, mutation = await temp_rows.create_row_and_mutation( + table, new_value=new_value + ) + row_key2, mutation2 = await temp_rows.create_row_and_mutation( + table, new_value=new_value2 + ) + bulk_mutation = RowMutationEntry(row_key, [mutation]) + bulk_mutation2 = RowMutationEntry(row_key2, [mutation2]) + + handler.clear() + async with table.mutations_batcher() as batcher: + await batcher.append(bulk_mutation) + await batcher.append(bulk_mutation2) + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # bacher expects to cancel staged operation on close + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.value[0] == 0 + assert operation.is_streaming is False + assert operation.op_type.value == "MutateRows" + assert len(operation.completed_attempts) == 1 + assert operation.completed_attempts[0] == handler.completed_attempts[0] + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + assert operation.duration_ns > 0 and operation.duration_ns < 1e9 + assert ( + operation.first_response_latency_ns is None + ) # populated for read_rows only + assert ( + operation.flow_throttling_time_ns > 0 + and operation.flow_throttling_time_ns < operation.duration_ns + ) + # validate attempt + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.duration_ns > 0 and attempt.duration_ns < operation.duration_ns + assert attempt.end_status.value[0] == 0 + assert attempt.backoff_before_attempt_ns == 0 + assert ( + attempt.gfe_latency_ns > 0 and attempt.gfe_latency_ns < attempt.duration_ns + ) + assert attempt.application_blocking_time_ns == 0 + + @CrossSync.pytest + async def test_mutate_rows_batcher_failure_with_retries( + self, table, handler, error_injector + ): + """ + Test failure in grpc layer by injecting errors into an interceptor + with retryable errors, then a terminal one + """ + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.mutations import RowMutationEntry, SetCell + + row_key = b"row_key_1" + mutation = SetCell(TEST_FAMILY, b"q", b"v") + entry = RowMutationEntry(row_key, [mutation]) + assert entry.is_idempotent() + + handler.clear() + expected_zone = "my_zone" + expected_cluster = "my_cluster" + num_retryable = 2 + for i in range(num_retryable): + error_injector.push( + self._make_exception(StatusCode.ABORTED, cluster_id=expected_cluster) + ) + error_injector.push( + self._make_exception(StatusCode.PERMISSION_DENIED, zone_id=expected_zone) + ) + with pytest.raises(MutationsExceptionGroup): + async with table.mutations_batcher( + batch_retryable_errors=[Aborted] + ) as batcher: + await batcher.append(entry) + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == num_retryable + 1 + # validate operation + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "MutateRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == num_retryable + 1 + assert operation.cluster_id == expected_cluster + assert operation.zone == expected_zone + # validate attempts + for i in range(num_retryable): + attempt = handler.completed_attempts[i] + assert attempt.end_status.name == "ABORTED" + assert attempt.gfe_latency_ns is None + final_attempt = handler.completed_attempts[num_retryable] + assert final_attempt.end_status.name == "PERMISSION_DENIED" + assert final_attempt.gfe_latency_ns is None + + @CrossSync.pytest + async def test_mutate_rows_batcher_failure_timeout(self, table, temp_rows, handler): + """ + Test failure in gapic layer by passing very low timeout + + No grpc headers expected + """ + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.mutations import RowMutationEntry, SetCell + + row_key = b"row_key_1" + mutation = SetCell(TEST_FAMILY, b"q", b"v") + entry = RowMutationEntry(row_key, [mutation]) + + with pytest.raises(MutationsExceptionGroup): + async with table.mutations_batcher( + batch_operation_timeout=0.001 + ) as batcher: + await batcher.append(entry) + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "MutateRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == "" + assert operation.zone == "global" + # validate attempt + attempt = handler.completed_attempts[0] + assert attempt.end_status.name == "DEADLINE_EXCEEDED" + assert attempt.gfe_latency_ns is None + + @CrossSync.pytest + async def test_mutate_rows_batcher_failure_unauthorized( + self, handler, authorized_view, cluster_config + ): + """ + Test failure in backend by accessing an unauthorized family + """ + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.mutations import RowMutationEntry, SetCell + + row_key = b"row_key_1" + mutation = SetCell("unauthorized", b"q", b"v") + entry = RowMutationEntry(row_key, [mutation]) + + with pytest.raises(MutationsExceptionGroup) as e: + async with authorized_view.mutations_batcher() as batcher: + await batcher.append(entry) + assert len(e.value.exceptions) == 1 + assert isinstance(e.value.exceptions[0].__cause__, GoogleAPICallError) + assert ( + e.value.exceptions[0].__cause__.grpc_status_code.name == "PERMISSION_DENIED" + ) + # validate counts + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + # validate operation + operation = handler.completed_operations[0] + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "MutateRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + # validate attempt + attempt = handler.completed_attempts[0] + assert attempt.end_status.name == "PERMISSION_DENIED" + assert ( + attempt.gfe_latency_ns >= 0 + and attempt.gfe_latency_ns < operation.duration_ns + ) + @pytest.mark.skipif( bool(os.environ.get(BIGTABLE_EMULATOR)), reason="emulator doesn't suport cluster_config", diff --git a/packages/google-cloud-bigtable/tests/system/data/test_metrics_autogen.py b/packages/google-cloud-bigtable/tests/system/data/test_metrics_autogen.py index bee213626f7e..997ebd09853c 100644 --- a/packages/google-cloud-bigtable/tests/system/data/test_metrics_autogen.py +++ b/packages/google-cloud-bigtable/tests/system/data/test_metrics_autogen.py @@ -34,6 +34,7 @@ CompletedOperationMetric, ) from google.cloud.bigtable.data._metrics.handlers._base import MetricsHandler +from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery from google.cloud.bigtable_v2.types import ResponseParams from . import TEST_FAMILY, SystemTestRunner @@ -174,7 +175,7 @@ def temp_rows(self, table): @pytest.fixture(scope="session") def table(self, client, table_id, instance_id, handler): with client.get_table(instance_id, table_id) as table: - table._metrics.add_handler(handler) + table._metrics.handlers = [handler] yield table @pytest.fixture(scope="session") @@ -187,6 +188,1022 @@ def authorized_view( table._metrics.add_handler(handler) yield table + def test_read_rows(self, table, temp_rows, handler, cluster_config): + temp_rows.add_row(b"row_key_1") + temp_rows.add_row(b"row_key_2") + handler.clear() + row_list = table.read_rows(ReadRowsQuery()) + assert len(row_list) == 2 + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.value[0] == 0 + assert operation.is_streaming is True + assert operation.op_type.value == "ReadRows" + assert len(operation.completed_attempts) == 1 + assert operation.completed_attempts[0] == handler.completed_attempts[0] + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + assert operation.duration_ns > 0 and operation.duration_ns < 1000000000.0 + assert ( + operation.first_response_latency_ns is not None + and operation.first_response_latency_ns < operation.duration_ns + ) + assert operation.flow_throttling_time_ns == 0 + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.duration_ns > 0 and attempt.duration_ns < operation.duration_ns + assert attempt.end_status.value[0] == 0 + assert attempt.backoff_before_attempt_ns == 0 + assert ( + attempt.gfe_latency_ns > 0 and attempt.gfe_latency_ns < attempt.duration_ns + ) + assert ( + attempt.application_blocking_time_ns > 0 + and attempt.application_blocking_time_ns < operation.duration_ns + ) + + def test_read_rows_failure_with_retries( + self, table, temp_rows, handler, error_injector + ): + """Test failure in grpc layer by injecting errors into an interceptor + with retryable errors, then a terminal one""" + temp_rows.add_row(b"row_key_1") + handler.clear() + expected_zone = "my_zone" + expected_cluster = "my_cluster" + num_retryable = 2 + for i in range(num_retryable): + error_injector.push( + self._make_exception(StatusCode.ABORTED, cluster_id=expected_cluster) + ) + error_injector.push( + self._make_exception(StatusCode.PERMISSION_DENIED, zone_id=expected_zone) + ) + with pytest.raises(PermissionDenied): + table.read_rows(ReadRowsQuery(), retryable_errors=[Aborted]) + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == num_retryable + 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == num_retryable + 1 + assert operation.cluster_id == expected_cluster + assert operation.zone == expected_zone + for i in range(num_retryable): + attempt = handler.completed_attempts[i] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "ABORTED" + assert attempt.gfe_latency_ns is None + final_attempt = handler.completed_attempts[num_retryable] + assert isinstance(final_attempt, CompletedAttemptMetric) + assert final_attempt.end_status.name == "PERMISSION_DENIED" + assert final_attempt.gfe_latency_ns is None + + def test_read_rows_failure_timeout(self, table, temp_rows, handler): + """Test failure in gapic layer by passing very low timeout + + No grpc headers expected""" + temp_rows.add_row(b"row_key_1") + handler.clear() + with pytest.raises(GoogleAPICallError): + table.read_rows(ReadRowsQuery(), operation_timeout=0.001) + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == "" + assert operation.zone == "global" + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "DEADLINE_EXCEEDED" + assert attempt.gfe_latency_ns is None + + def test_read_rows_failure_unauthorized( + self, handler, authorized_view, cluster_config + ): + """Test failure in backend by accessing an unauthorized family""" + from google.cloud.bigtable.data.row_filters import FamilyNameRegexFilter + + with pytest.raises(GoogleAPICallError) as e: + authorized_view.read_rows( + ReadRowsQuery(row_filter=FamilyNameRegexFilter("unauthorized")) + ) + assert e.value.grpc_status_code.name == "PERMISSION_DENIED" + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "PERMISSION_DENIED" + assert ( + attempt.gfe_latency_ns >= 0 + and attempt.gfe_latency_ns < operation.duration_ns + ) + + def test_read_rows_stream(self, table, temp_rows, handler, cluster_config): + temp_rows.add_row(b"row_key_1") + temp_rows.add_row(b"row_key_2") + handler.clear() + generator = table.read_rows_stream(ReadRowsQuery()) + row_list = [r for r in generator] + assert len(row_list) == 2 + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.value[0] == 0 + assert operation.is_streaming is True + assert operation.op_type.value == "ReadRows" + assert len(operation.completed_attempts) == 1 + assert operation.completed_attempts[0] == handler.completed_attempts[0] + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + assert operation.duration_ns > 0 and operation.duration_ns < 1000000000.0 + assert ( + operation.first_response_latency_ns is not None + and operation.first_response_latency_ns < operation.duration_ns + ) + assert operation.flow_throttling_time_ns == 0 + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.duration_ns > 0 and attempt.duration_ns < operation.duration_ns + assert attempt.end_status.value[0] == 0 + assert attempt.backoff_before_attempt_ns == 0 + assert ( + attempt.gfe_latency_ns > 0 and attempt.gfe_latency_ns < attempt.duration_ns + ) + assert ( + attempt.application_blocking_time_ns > 0 + and attempt.application_blocking_time_ns < operation.duration_ns + ) + + def test_read_rows_stream_failure_closed( + self, table, temp_rows, handler, error_injector + ): + """Test how metrics collection handles closed generator""" + temp_rows.add_row(b"row_key_1") + temp_rows.add_row(b"row_key_2") + handler.clear() + generator = table.read_rows_stream(ReadRowsQuery()) + generator.__next__() + generator.close() + with pytest.raises(CrossSync._Sync_Impl.StopIteration): + generator.__next__() + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert operation.final_status.name == "CANCELLED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == "" + assert operation.zone == "global" + attempt = handler.completed_attempts[0] + assert attempt.end_status.name == "CANCELLED" + assert attempt.gfe_latency_ns is None + + def test_read_rows_stream_failure_with_retries( + self, table, temp_rows, handler, error_injector + ): + """Test failure in grpc layer by injecting errors into an interceptor + with retryable errors, then a terminal one""" + temp_rows.add_row(b"row_key_1") + handler.clear() + expected_zone = "my_zone" + expected_cluster = "my_cluster" + num_retryable = 2 + for i in range(num_retryable): + error_injector.push( + self._make_exception(StatusCode.ABORTED, cluster_id=expected_cluster) + ) + error_injector.push( + self._make_exception(StatusCode.PERMISSION_DENIED, zone_id=expected_zone) + ) + generator = table.read_rows_stream(ReadRowsQuery(), retryable_errors=[Aborted]) + with pytest.raises(PermissionDenied): + [_ for _ in generator] + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == num_retryable + 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == num_retryable + 1 + assert operation.cluster_id == expected_cluster + assert operation.zone == expected_zone + for i in range(num_retryable): + attempt = handler.completed_attempts[i] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "ABORTED" + assert attempt.gfe_latency_ns is None + final_attempt = handler.completed_attempts[num_retryable] + assert isinstance(final_attempt, CompletedAttemptMetric) + assert final_attempt.end_status.name == "PERMISSION_DENIED" + assert final_attempt.gfe_latency_ns is None + + def test_read_rows_stream_failure_timeout(self, table, temp_rows, handler): + """Test failure in gapic layer by passing very low timeout + + No grpc headers expected""" + temp_rows.add_row(b"row_key_1") + handler.clear() + generator = table.read_rows_stream(ReadRowsQuery(), operation_timeout=0.001) + with pytest.raises(GoogleAPICallError): + [_ for _ in generator] + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == "" + assert operation.zone == "global" + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "DEADLINE_EXCEEDED" + assert attempt.gfe_latency_ns is None + + def test_read_rows_stream_failure_unauthorized( + self, handler, authorized_view, cluster_config + ): + """Test failure in backend by accessing an unauthorized family""" + from google.cloud.bigtable.data.row_filters import FamilyNameRegexFilter + + with pytest.raises(GoogleAPICallError) as e: + generator = authorized_view.read_rows_stream( + ReadRowsQuery(row_filter=FamilyNameRegexFilter("unauthorized")) + ) + [_ for _ in generator] + assert e.value.grpc_status_code.name == "PERMISSION_DENIED" + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "PERMISSION_DENIED" + assert ( + attempt.gfe_latency_ns >= 0 + and attempt.gfe_latency_ns < operation.duration_ns + ) + + def test_read_rows_stream_failure_unauthorized_with_retries( + self, handler, authorized_view, cluster_config + ): + """retry unauthorized request multiple times before timing out""" + from google.cloud.bigtable.data.row_filters import FamilyNameRegexFilter + + with pytest.raises(GoogleAPICallError) as e: + generator = authorized_view.read_rows_stream( + ReadRowsQuery(row_filter=FamilyNameRegexFilter("unauthorized")), + retryable_errors=[PermissionDenied], + operation_timeout=0.5, + ) + [_ for _ in generator] + assert e.value.grpc_status_code.name == "DEADLINE_EXCEEDED" + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) > 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) > 1 + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + for attempt in handler.completed_attempts: + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name in ["PERMISSION_DENIED", "DEADLINE_EXCEEDED"] + + def test_read_rows_stream_failure_mid_stream( + self, table, temp_rows, handler, error_injector + ): + """Test failure in grpc stream""" + temp_rows.add_row(b"row_key_1") + handler.clear() + error_injector.fail_mid_stream = True + error_injector.push(self._make_exception(StatusCode.ABORTED)) + error_injector.push(self._make_exception(StatusCode.PERMISSION_DENIED)) + generator = table.read_rows_stream(ReadRowsQuery(), retryable_errors=[Aborted]) + with pytest.raises(PermissionDenied): + [_ for _ in generator] + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 2 + operation = handler.completed_operations[0] + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == 2 + attempt = handler.completed_attempts[0] + assert attempt.end_status.name == "ABORTED" + final_attempt = handler.completed_attempts[-1] + assert final_attempt.end_status.name == "PERMISSION_DENIED" + + def test_read_row(self, table, temp_rows, handler, cluster_config): + temp_rows.add_row(b"row_key_1") + handler.clear() + table.read_row(b"row_key_1") + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.value[0] == 0 + assert operation.is_streaming is False + assert operation.op_type.value == "ReadRows" + assert len(operation.completed_attempts) == 1 + assert operation.completed_attempts[0] == handler.completed_attempts[0] + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + assert operation.duration_ns > 0 and operation.duration_ns < 1000000000.0 + assert ( + operation.first_response_latency_ns > 0 + and operation.first_response_latency_ns < operation.duration_ns + ) + assert operation.flow_throttling_time_ns == 0 + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.duration_ns > 0 and attempt.duration_ns < operation.duration_ns + assert attempt.end_status.value[0] == 0 + assert attempt.backoff_before_attempt_ns == 0 + assert ( + attempt.gfe_latency_ns > 0 and attempt.gfe_latency_ns < attempt.duration_ns + ) + assert ( + attempt.application_blocking_time_ns > 0 + and attempt.application_blocking_time_ns < operation.duration_ns + ) + + def test_read_row_failure_with_retries( + self, table, temp_rows, handler, error_injector + ): + """Test failure in grpc layer by injecting errors into an interceptor + with retryable errors, then a terminal one""" + temp_rows.add_row(b"row_key_1") + handler.clear() + expected_zone = "my_zone" + expected_cluster = "my_cluster" + num_retryable = 2 + for i in range(num_retryable): + error_injector.push( + self._make_exception(StatusCode.ABORTED, cluster_id=expected_cluster) + ) + error_injector.push( + self._make_exception(StatusCode.PERMISSION_DENIED, zone_id=expected_zone) + ) + with pytest.raises(PermissionDenied): + table.read_row(b"row_key_1", retryable_errors=[Aborted]) + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == num_retryable + 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == num_retryable + 1 + assert operation.cluster_id == expected_cluster + assert operation.zone == expected_zone + for i in range(num_retryable): + attempt = handler.completed_attempts[i] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "ABORTED" + assert attempt.gfe_latency_ns is None + final_attempt = handler.completed_attempts[num_retryable] + assert isinstance(final_attempt, CompletedAttemptMetric) + assert final_attempt.end_status.name == "PERMISSION_DENIED" + assert final_attempt.gfe_latency_ns is None + + def test_read_row_failure_timeout(self, table, temp_rows, handler): + """Test failure in gapic layer by passing very low timeout + + No grpc headers expected""" + temp_rows.add_row(b"row_key_1") + handler.clear() + with pytest.raises(GoogleAPICallError): + table.read_row(b"row_key_1", operation_timeout=0.001) + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == "" + assert operation.zone == "global" + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "DEADLINE_EXCEEDED" + assert attempt.gfe_latency_ns is None + + def test_read_row_failure_unauthorized( + self, handler, authorized_view, cluster_config + ): + """Test failure in backend by accessing an unauthorized family""" + from google.cloud.bigtable.data.row_filters import FamilyNameRegexFilter + + with pytest.raises(GoogleAPICallError) as e: + authorized_view.read_row( + b"any_row", row_filter=FamilyNameRegexFilter("unauthorized") + ) + assert e.value.grpc_status_code.name == "PERMISSION_DENIED" + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "PERMISSION_DENIED" + assert ( + attempt.gfe_latency_ns >= 0 + and attempt.gfe_latency_ns < operation.duration_ns + ) + + def test_read_rows_sharded(self, table, temp_rows, handler, cluster_config): + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery + + temp_rows.add_row(b"a") + temp_rows.add_row(b"b") + temp_rows.add_row(b"c") + temp_rows.add_row(b"d") + query1 = ReadRowsQuery(row_keys=[b"a", b"c"]) + query2 = ReadRowsQuery(row_keys=[b"b", b"d"]) + handler.clear() + row_list = table.read_rows_sharded([query1, query2]) + assert len(row_list) == 4 + assert len(handler.completed_operations) == 2 + assert len(handler.completed_attempts) == 2 + for operation in handler.completed_operations: + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.value[0] == 0 + assert operation.is_streaming is True + assert operation.op_type.value == "ReadRows" + assert len(operation.completed_attempts) == 1 + attempt = operation.completed_attempts[0] + assert attempt in handler.completed_attempts + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + assert operation.duration_ns > 0 and operation.duration_ns < 1000000000.0 + assert ( + operation.first_response_latency_ns is not None + and operation.first_response_latency_ns < operation.duration_ns + ) + assert operation.flow_throttling_time_ns == 0 + assert isinstance(attempt, CompletedAttemptMetric) + assert ( + attempt.duration_ns > 0 and attempt.duration_ns < operation.duration_ns + ) + assert attempt.end_status.value[0] == 0 + assert attempt.backoff_before_attempt_ns == 0 + assert ( + attempt.gfe_latency_ns > 0 + and attempt.gfe_latency_ns < attempt.duration_ns + ) + assert ( + attempt.application_blocking_time_ns > 0 + and attempt.application_blocking_time_ns < operation.duration_ns + ) + + def test_read_rows_sharded_failure_with_retries( + self, table, temp_rows, handler, error_injector + ): + """Test failure in grpc layer by injecting errors into an interceptor + with retryable errors""" + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery + + temp_rows.add_row(b"a") + temp_rows.add_row(b"b") + query1 = ReadRowsQuery(row_keys=[b"a"]) + query2 = ReadRowsQuery(row_keys=[b"b"]) + handler.clear() + error_injector.push(self._make_exception(StatusCode.ABORTED)) + table.read_rows_sharded([query1, query2], retryable_errors=[Aborted]) + assert len(handler.completed_operations) == 2 + assert len(handler.completed_attempts) == 3 + for op in handler.completed_operations: + assert op.final_status.name == "OK" + assert op.op_type.value == "ReadRows" + assert op.is_streaming is True + assert ( + len([a for a in handler.completed_attempts if a.end_status.name == "OK"]) + == 2 + ) + assert ( + len( + [ + a + for a in handler.completed_attempts + if a.end_status.name == "ABORTED" + ] + ) + == 1 + ) + + def test_read_rows_sharded_failure_timeout(self, table, temp_rows, handler): + """Test failure in gapic layer by passing very low timeout + + No grpc headers expected""" + from google.api_core.exceptions import DeadlineExceeded + + from google.cloud.bigtable.data.exceptions import ShardedReadRowsExceptionGroup + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery + + temp_rows.add_row(b"a") + temp_rows.add_row(b"b") + query1 = ReadRowsQuery(row_keys=[b"a"]) + query2 = ReadRowsQuery(row_keys=[b"b"]) + handler.clear() + with pytest.raises(ShardedReadRowsExceptionGroup) as e: + table.read_rows_sharded([query1, query2], operation_timeout=0.005) + assert len(e.value.exceptions) == 2 + for sub_exc in e.value.exceptions: + assert isinstance(sub_exc.__cause__, DeadlineExceeded) + assert len(handler.completed_operations) == 2 + assert len(handler.completed_attempts) == 2 + for operation in handler.completed_operations: + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "ReadRows" + assert operation.is_streaming is True + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == "" + assert operation.zone == "global" + attempt = operation.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "DEADLINE_EXCEEDED" + assert attempt.gfe_latency_ns is None + + def test_read_rows_sharded_failure_unauthorized( + self, handler, authorized_view, cluster_config + ): + """Test failure in backend by accessing an unauthorized family""" + from google.cloud.bigtable.data.exceptions import ShardedReadRowsExceptionGroup + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery + from google.cloud.bigtable.data.row_filters import FamilyNameRegexFilter + + query1 = ReadRowsQuery(row_filter=FamilyNameRegexFilter("unauthorized")) + query2 = ReadRowsQuery(row_filter=FamilyNameRegexFilter(TEST_FAMILY)) + handler.clear() + with pytest.raises(ShardedReadRowsExceptionGroup) as e: + authorized_view.read_rows_sharded([query1, query2]) + assert len(e.value.exceptions) == 1 + assert isinstance(e.value.exceptions[0].__cause__, GoogleAPICallError) + assert ( + e.value.exceptions[0].__cause__.grpc_status_code.name == "PERMISSION_DENIED" + ) + assert len(handler.completed_operations) == 2 + assert len(handler.completed_attempts) == 2 + failed_op = next( + (op for op in handler.completed_operations if op.final_status.name != "OK") + ) + success_op = next( + (op for op in handler.completed_operations if op.final_status.name == "OK") + ) + assert failed_op.final_status.name == "PERMISSION_DENIED" + assert failed_op.op_type.value == "ReadRows" + assert failed_op.is_streaming is True + assert len(failed_op.completed_attempts) == 1 + assert failed_op.cluster_id == next(iter(cluster_config.keys())) + assert ( + failed_op.zone + == cluster_config[failed_op.cluster_id].location.split("/")[-1] + ) + failed_attempt = failed_op.completed_attempts[0] + assert failed_attempt.end_status.name == "PERMISSION_DENIED" + assert ( + failed_attempt.gfe_latency_ns >= 0 + and failed_attempt.gfe_latency_ns < failed_op.duration_ns + ) + assert success_op.final_status.name == "OK" + assert success_op.op_type.value == "ReadRows" + assert success_op.is_streaming is True + assert len(success_op.completed_attempts) == 1 + success_attempt = success_op.completed_attempts[0] + assert success_attempt.end_status.name == "OK" + + def test_read_rows_sharded_failure_mid_stream( + self, table, temp_rows, handler, error_injector + ): + """Test failure in grpc stream""" + from google.cloud.bigtable.data.exceptions import ShardedReadRowsExceptionGroup + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery + + temp_rows.add_row(b"a") + temp_rows.add_row(b"b") + query1 = ReadRowsQuery(row_keys=[b"a"]) + query2 = ReadRowsQuery(row_keys=[b"b"]) + handler.clear() + error_injector.fail_mid_stream = True + error_injector.push(self._make_exception(StatusCode.ABORTED)) + error_injector.push(self._make_exception(StatusCode.PERMISSION_DENIED)) + with pytest.raises(ShardedReadRowsExceptionGroup) as e: + table.read_rows_sharded([query1, query2], retryable_errors=[Aborted]) + assert len(e.value.exceptions) == 1 + assert isinstance(e.value.exceptions[0].__cause__, PermissionDenied) + assert len(handler.completed_operations) == 2 + assert len(handler.completed_attempts) == 3 + failed_op = next( + (op for op in handler.completed_operations if op.final_status.name != "OK") + ) + success_op = next( + (op for op in handler.completed_operations if op.final_status.name == "OK") + ) + assert failed_op.final_status.name == "PERMISSION_DENIED" + assert failed_op.op_type.value == "ReadRows" + assert failed_op.is_streaming is True + assert len(failed_op.completed_attempts) == 1 + assert success_op.final_status.name == "OK" + assert len(success_op.completed_attempts) == 2 + attempt = failed_op.completed_attempts[0] + assert attempt.end_status.name == "PERMISSION_DENIED" + retried_attempt = success_op.completed_attempts[0] + assert retried_attempt.end_status.name == "ABORTED" + success_attempt = success_op.completed_attempts[-1] + assert success_attempt.end_status.name == "OK" + + def test_bulk_mutate_rows(self, table, temp_rows, handler, cluster_config): + from google.cloud.bigtable.data.mutations import RowMutationEntry + + new_value = uuid.uuid4().hex.encode() + row_key, mutation = temp_rows.create_row_and_mutation( + table, new_value=new_value + ) + bulk_mutation = RowMutationEntry(row_key, [mutation]) + handler.clear() + table.bulk_mutate_rows([bulk_mutation]) + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.value[0] == 0 + assert operation.is_streaming is False + assert operation.op_type.value == "MutateRows" + assert len(operation.completed_attempts) == 1 + assert operation.completed_attempts[0] == handler.completed_attempts[0] + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + assert operation.duration_ns > 0 and operation.duration_ns < 1000000000.0 + assert operation.first_response_latency_ns is None + assert operation.flow_throttling_time_ns == 0 + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.duration_ns > 0 and attempt.duration_ns < operation.duration_ns + assert attempt.end_status.value[0] == 0 + assert attempt.backoff_before_attempt_ns == 0 + assert ( + attempt.gfe_latency_ns > 0 and attempt.gfe_latency_ns < attempt.duration_ns + ) + assert attempt.application_blocking_time_ns == 0 + + def test_bulk_mutate_rows_failure_with_retries( + self, table, temp_rows, handler, error_injector + ): + """Test failure in grpc layer by injecting errors into an interceptor + with retryable errors, then a terminal one""" + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.mutations import RowMutationEntry, SetCell + + row_key = b"row_key_1" + mutation = SetCell(TEST_FAMILY, b"q", b"v") + entry = RowMutationEntry(row_key, [mutation]) + assert entry.is_idempotent() + handler.clear() + expected_zone = "my_zone" + expected_cluster = "my_cluster" + num_retryable = 2 + for i in range(num_retryable): + error_injector.push( + self._make_exception(StatusCode.ABORTED, cluster_id=expected_cluster) + ) + error_injector.push( + self._make_exception(StatusCode.PERMISSION_DENIED, zone_id=expected_zone) + ) + with pytest.raises(MutationsExceptionGroup): + table.bulk_mutate_rows([entry], retryable_errors=[Aborted]) + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == num_retryable + 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "MutateRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == num_retryable + 1 + assert operation.cluster_id == expected_cluster + assert operation.zone == expected_zone + for i in range(num_retryable): + attempt = handler.completed_attempts[i] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "ABORTED" + assert attempt.gfe_latency_ns is None + final_attempt = handler.completed_attempts[num_retryable] + assert isinstance(final_attempt, CompletedAttemptMetric) + assert final_attempt.end_status.name == "PERMISSION_DENIED" + assert final_attempt.gfe_latency_ns is None + + def test_bulk_mutate_rows_failure_timeout(self, table, temp_rows, handler): + """Test failure in gapic layer by passing very low timeout + + No grpc headers expected""" + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.mutations import RowMutationEntry, SetCell + + row_key = b"row_key_1" + mutation = SetCell(TEST_FAMILY, b"q", b"v") + entry = RowMutationEntry(row_key, [mutation]) + handler.clear() + with pytest.raises(MutationsExceptionGroup): + table.bulk_mutate_rows([entry], operation_timeout=0.001) + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "MutateRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == "" + assert operation.zone == "global" + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.end_status.name == "DEADLINE_EXCEEDED" + assert attempt.gfe_latency_ns is None + + def test_bulk_mutate_rows_failure_unauthorized( + self, handler, authorized_view, cluster_config + ): + """Test failure in backend by accessing an unauthorized family""" + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.mutations import RowMutationEntry, SetCell + + row_key = b"row_key_1" + mutation = SetCell("unauthorized", b"q", b"v") + entry = RowMutationEntry(row_key, [mutation]) + handler.clear() + with pytest.raises(MutationsExceptionGroup): + authorized_view.bulk_mutate_rows([entry]) + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "MutateRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + attempt = handler.completed_attempts[0] + assert attempt.end_status.name == "PERMISSION_DENIED" + assert ( + attempt.gfe_latency_ns >= 0 + and attempt.gfe_latency_ns < operation.duration_ns + ) + + def test_bulk_mutate_rows_failure_unauthorized_with_retries( + self, handler, authorized_view, cluster_config + ): + """retry unauthorized request multiple times before timing out + + For bulk_mutate, the rpc returns success, with failures returned in the response. + For this reason, We expect the attempts to be marked as successful, even though + the underlying mutation is retried""" + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.mutations import RowMutationEntry, SetCell + + row_key = b"row_key_1" + mutation = SetCell("unauthorized", b"q", b"v") + entry = RowMutationEntry(row_key, [mutation]) + handler.clear() + with pytest.raises(MutationsExceptionGroup) as e: + authorized_view.bulk_mutate_rows( + [entry], retryable_errors=[PermissionDenied], operation_timeout=0.5 + ) + assert len(e.value.exceptions) == 1 + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) > 1 + operation = handler.completed_operations[0] + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "MutateRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) > 1 + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + for attempt in handler.completed_attempts: + assert attempt.end_status.name in ["OK", "DEADLINE_EXCEEDED"] + + def test_mutate_rows_batcher(self, table, temp_rows, handler, cluster_config): + from google.cloud.bigtable.data.mutations import RowMutationEntry + + new_value, new_value2 = [uuid.uuid4().hex.encode() for _ in range(2)] + row_key, mutation = temp_rows.create_row_and_mutation( + table, new_value=new_value + ) + row_key2, mutation2 = temp_rows.create_row_and_mutation( + table, new_value=new_value2 + ) + bulk_mutation = RowMutationEntry(row_key, [mutation]) + bulk_mutation2 = RowMutationEntry(row_key2, [mutation2]) + handler.clear() + with table.mutations_batcher() as batcher: + batcher.append(bulk_mutation) + batcher.append(bulk_mutation2) + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.value[0] == 0 + assert operation.is_streaming is False + assert operation.op_type.value == "MutateRows" + assert len(operation.completed_attempts) == 1 + assert operation.completed_attempts[0] == handler.completed_attempts[0] + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + assert operation.duration_ns > 0 and operation.duration_ns < 1000000000.0 + assert operation.first_response_latency_ns is None + assert ( + operation.flow_throttling_time_ns > 0 + and operation.flow_throttling_time_ns < operation.duration_ns + ) + attempt = handler.completed_attempts[0] + assert isinstance(attempt, CompletedAttemptMetric) + assert attempt.duration_ns > 0 and attempt.duration_ns < operation.duration_ns + assert attempt.end_status.value[0] == 0 + assert attempt.backoff_before_attempt_ns == 0 + assert ( + attempt.gfe_latency_ns > 0 and attempt.gfe_latency_ns < attempt.duration_ns + ) + assert attempt.application_blocking_time_ns == 0 + + def test_mutate_rows_batcher_failure_with_retries( + self, table, handler, error_injector + ): + """Test failure in grpc layer by injecting errors into an interceptor + with retryable errors, then a terminal one""" + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.mutations import RowMutationEntry, SetCell + + row_key = b"row_key_1" + mutation = SetCell(TEST_FAMILY, b"q", b"v") + entry = RowMutationEntry(row_key, [mutation]) + assert entry.is_idempotent() + handler.clear() + expected_zone = "my_zone" + expected_cluster = "my_cluster" + num_retryable = 2 + for i in range(num_retryable): + error_injector.push( + self._make_exception(StatusCode.ABORTED, cluster_id=expected_cluster) + ) + error_injector.push( + self._make_exception(StatusCode.PERMISSION_DENIED, zone_id=expected_zone) + ) + with pytest.raises(MutationsExceptionGroup): + with table.mutations_batcher(batch_retryable_errors=[Aborted]) as batcher: + batcher.append(entry) + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == num_retryable + 1 + operation = handler.completed_operations[0] + assert isinstance(operation, CompletedOperationMetric) + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "MutateRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == num_retryable + 1 + assert operation.cluster_id == expected_cluster + assert operation.zone == expected_zone + for i in range(num_retryable): + attempt = handler.completed_attempts[i] + assert attempt.end_status.name == "ABORTED" + assert attempt.gfe_latency_ns is None + final_attempt = handler.completed_attempts[num_retryable] + assert final_attempt.end_status.name == "PERMISSION_DENIED" + assert final_attempt.gfe_latency_ns is None + + def test_mutate_rows_batcher_failure_timeout(self, table, temp_rows, handler): + """Test failure in gapic layer by passing very low timeout + + No grpc headers expected""" + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.mutations import RowMutationEntry, SetCell + + row_key = b"row_key_1" + mutation = SetCell(TEST_FAMILY, b"q", b"v") + entry = RowMutationEntry(row_key, [mutation]) + with pytest.raises(MutationsExceptionGroup): + with table.mutations_batcher(batch_operation_timeout=0.001) as batcher: + batcher.append(entry) + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert operation.final_status.name == "DEADLINE_EXCEEDED" + assert operation.op_type.value == "MutateRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == "" + assert operation.zone == "global" + attempt = handler.completed_attempts[0] + assert attempt.end_status.name == "DEADLINE_EXCEEDED" + assert attempt.gfe_latency_ns is None + + def test_mutate_rows_batcher_failure_unauthorized( + self, handler, authorized_view, cluster_config + ): + """Test failure in backend by accessing an unauthorized family""" + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.mutations import RowMutationEntry, SetCell + + row_key = b"row_key_1" + mutation = SetCell("unauthorized", b"q", b"v") + entry = RowMutationEntry(row_key, [mutation]) + with pytest.raises(MutationsExceptionGroup) as e: + with authorized_view.mutations_batcher() as batcher: + batcher.append(entry) + assert len(e.value.exceptions) == 1 + assert isinstance(e.value.exceptions[0].__cause__, GoogleAPICallError) + assert ( + e.value.exceptions[0].__cause__.grpc_status_code.name == "PERMISSION_DENIED" + ) + assert len(handler.completed_operations) == 1 + assert len(handler.completed_attempts) == 1 + operation = handler.completed_operations[0] + assert operation.final_status.name == "PERMISSION_DENIED" + assert operation.op_type.value == "MutateRows" + assert operation.is_streaming is False + assert len(operation.completed_attempts) == 1 + assert operation.cluster_id == next(iter(cluster_config.keys())) + assert ( + operation.zone + == cluster_config[operation.cluster_id].location.split("/")[-1] + ) + attempt = handler.completed_attempts[0] + assert attempt.end_status.name == "PERMISSION_DENIED" + assert ( + attempt.gfe_latency_ns >= 0 + and attempt.gfe_latency_ns < operation.duration_ns + ) + @pytest.mark.skipif( bool(os.environ.get(BIGTABLE_EMULATOR)), reason="emulator doesn't suport cluster_config", diff --git a/packages/google-cloud-bigtable/tests/system/data/test_system_async.py b/packages/google-cloud-bigtable/tests/system/data/test_system_async.py index b65f05e4bd17..0b0fb7b1cc4a 100644 --- a/packages/google-cloud-bigtable/tests/system/data/test_system_async.py +++ b/packages/google-cloud-bigtable/tests/system/data/test_system_async.py @@ -18,11 +18,12 @@ import pytest from google.api_core import retry -from google.api_core.exceptions import ClientError, PermissionDenied +from google.api_core.exceptions import ClientError, PermissionDenied, ServerError from google.cloud.environment_vars import BIGTABLE_EMULATOR from google.type import date_pb2 from google.cloud.bigtable.data._cross_sync import CrossSync +from google.cloud.bigtable.data._metrics import OperationType from google.cloud.bigtable.data.execute_query.metadata import SqlType from google.cloud.bigtable.data.read_modify_write_rules import _MAX_INCREMENT_VALUE @@ -1317,3 +1318,46 @@ async def test_execute_metadata_on_empty_response( assert md[TEST_AGGREGATE_FAMILY].column_type == SqlType.Map( SqlType.Bytes(), SqlType.Int64() ) + + @pytest.fixture(scope="session") + def metrics_client(self, client): + yield client._gcp_metrics_exporter.client + + @pytest.mark.order("last") + @pytest.mark.parametrize( + "metric,methods", + [ + ("attempt_latencies", [m.value for m in OperationType]), + ("operation_latencies", [m.value for m in OperationType]), + ("retry_count", [m.value for m in OperationType]), + ("first_response_latencies", [OperationType.READ_ROWS]), + ("server_latencies", [m.value for m in OperationType]), + ("connectivity_error_count", [m.value for m in OperationType]), + ("application_blocking_latencies", [OperationType.READ_ROWS]), + ], + ) + @retry.Retry(predicate=retry.if_exception_type(AssertionError, ServerError)) + def test_metric_existence( + self, client, table_id, metrics_client, start_timestamp, metric, methods + ): + """ + Checks to make sure metrics were exported by tests + + Runs at the end of test suite, to let other tests write metrics + """ + end_timestamp = datetime.datetime.now(datetime.timezone.utc) + for m in methods: + metric_filter = ( + f'metric.type = "bigtable.googleapis.com/client/{metric}" ' + + f'AND metric.labels.client_name = "python-bigtable/{client._client_version()}" ' + + f'AND resource.labels.table = "{table_id}" ' + ) + results = list( + metrics_client.list_time_series( + name=f"projects/{client.project}", + filter=metric_filter, + interval={"start_time": start_timestamp, "end_time": end_timestamp}, + view=0, + ) + ) + assert len(results) > 0, f"No data found for {metric} {m}" diff --git a/packages/google-cloud-bigtable/tests/system/data/test_system_autogen.py b/packages/google-cloud-bigtable/tests/system/data/test_system_autogen.py index c31b2c20a4b8..8ef43fc26988 100644 --- a/packages/google-cloud-bigtable/tests/system/data/test_system_autogen.py +++ b/packages/google-cloud-bigtable/tests/system/data/test_system_autogen.py @@ -21,11 +21,12 @@ import pytest from google.api_core import retry -from google.api_core.exceptions import ClientError, PermissionDenied +from google.api_core.exceptions import ClientError, PermissionDenied, ServerError from google.cloud.environment_vars import BIGTABLE_EMULATOR from google.type import date_pb2 from google.cloud.bigtable.data._cross_sync import CrossSync +from google.cloud.bigtable.data._metrics import OperationType from google.cloud.bigtable.data.execute_query.metadata import SqlType from google.cloud.bigtable.data.read_modify_write_rules import _MAX_INCREMENT_VALUE from google.cloud.bigtable_v2.services.bigtable.transports.grpc import ( @@ -1075,3 +1076,44 @@ def test_execute_metadata_on_empty_response( assert md[TEST_AGGREGATE_FAMILY].column_type == SqlType.Map( SqlType.Bytes(), SqlType.Int64() ) + + @pytest.fixture(scope="session") + def metrics_client(self, client): + yield client._gcp_metrics_exporter.client + + @pytest.mark.order("last") + @pytest.mark.parametrize( + "metric,methods", + [ + ("attempt_latencies", [m.value for m in OperationType]), + ("operation_latencies", [m.value for m in OperationType]), + ("retry_count", [m.value for m in OperationType]), + ("first_response_latencies", [OperationType.READ_ROWS]), + ("server_latencies", [m.value for m in OperationType]), + ("connectivity_error_count", [m.value for m in OperationType]), + ("application_blocking_latencies", [OperationType.READ_ROWS]), + ], + ) + @retry.Retry(predicate=retry.if_exception_type(AssertionError, ServerError)) + def test_metric_existence( + self, client, table_id, metrics_client, start_timestamp, metric, methods + ): + """Checks to make sure metrics were exported by tests + + Runs at the end of test suite, to let other tests write metrics""" + end_timestamp = datetime.datetime.now(datetime.timezone.utc) + for m in methods: + metric_filter = ( + f'metric.type = "bigtable.googleapis.com/client/{metric}" ' + + f'AND metric.labels.client_name = "python-bigtable/{client._client_version()}" ' + + f'AND resource.labels.table = "{table_id}" ' + ) + results = list( + metrics_client.list_time_series( + name=f"projects/{client.project}", + filter=metric_filter, + interval={"start_time": start_timestamp, "end_time": end_timestamp}, + view=0, + ) + ) + assert len(results) > 0, f"No data found for {metric} {m}" diff --git a/packages/google-cloud-bigtable/tests/unit/data/_async/test__mutate_rows.py b/packages/google-cloud-bigtable/tests/unit/data/_async/test__mutate_rows.py index 82f234350a8c..0f2f26af9b6b 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_async/test__mutate_rows.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_async/test__mutate_rows.py @@ -17,6 +17,7 @@ from google.rpc import status_pb2 from google.cloud.bigtable.data._cross_sync import CrossSync +from google.cloud.bigtable.data._metrics import ActiveOperationMetric from google.cloud.bigtable.data.mutations import DeleteAllFromRow, RowMutationEntry from google.cloud.bigtable_v2.types import MutateRowsResponse @@ -45,6 +46,9 @@ def _make_one(self, *args, **kwargs): kwargs["attempt_timeout"] = kwargs.pop("attempt_timeout", 0.1) kwargs["retryable_exceptions"] = kwargs.pop("retryable_exceptions", ()) kwargs["mutation_entries"] = kwargs.pop("mutation_entries", []) + kwargs["metric"] = kwargs.pop( + "metric", ActiveOperationMetric("MUTATE_ROWS") + ) return self._target_class()(*args, **kwargs) def _make_mutation(self, count=1, size=1): @@ -87,6 +91,7 @@ def test_ctor(self): entries = [self._make_mutation(), self._make_mutation()] operation_timeout = 0.05 attempt_timeout = 0.01 + metric = mock.Mock() retryable_exceptions = () instance = self._make_one( client, @@ -94,6 +99,7 @@ def test_ctor(self): entries, operation_timeout, attempt_timeout, + metric, retryable_exceptions, ) # running gapic_fn should trigger a client call with baked-in args @@ -113,6 +119,7 @@ def test_ctor(self): assert instance.is_retryable(RuntimeError("")) is False assert instance.remaining_indices == list(range(len(entries))) assert instance.errors == {} + assert instance._operation_metric == metric def test_ctor_too_many_entries(self): """ @@ -136,6 +143,7 @@ def test_ctor_too_many_entries(self): entries, operation_timeout, attempt_timeout, + mock.Mock(), ) assert "mutate_rows requests can contain at most 100000 mutations" in str( e.value @@ -149,6 +157,7 @@ async def test_mutate_rows_operation(self): """ client = mock.Mock() table = mock.Mock() + metric = ActiveOperationMetric("MUTATE_ROWS") entries = [self._make_mutation(), self._make_mutation()] operation_timeout = 0.05 cls = self._target_class() @@ -156,7 +165,7 @@ async def test_mutate_rows_operation(self): f"{cls.__module__}.{cls.__name__}._run_attempt", CrossSync.Mock() ) as attempt_mock: instance = self._make_one( - client, table, entries, operation_timeout, operation_timeout + client, table, entries, operation_timeout, operation_timeout, metric ) await instance.start() assert attempt_mock.call_count == 1 @@ -170,6 +179,7 @@ async def test_mutate_rows_attempt_exception(self, exc_type): client = CrossSync.Mock() table = mock.Mock() table._request_path = {"table_name": "table"} + metric = ActiveOperationMetric("MUTATE_ROWS") table.app_profile_id = None entries = [self._make_mutation(), self._make_mutation()] operation_timeout = 0.05 @@ -178,7 +188,7 @@ async def test_mutate_rows_attempt_exception(self, exc_type): found_exc = None try: instance = self._make_one( - client, table, entries, operation_timeout, operation_timeout + client, table, entries, operation_timeout, operation_timeout, metric ) await instance._run_attempt() except Exception as e: @@ -202,6 +212,7 @@ async def test_mutate_rows_exception(self, exc_type): client = mock.Mock() table = mock.Mock() + metric = ActiveOperationMetric("MUTATE_ROWS") entries = [self._make_mutation(), self._make_mutation()] operation_timeout = 0.05 expected_cause = exc_type("abort") @@ -214,7 +225,7 @@ async def test_mutate_rows_exception(self, exc_type): found_exc = None try: instance = self._make_one( - client, table, entries, operation_timeout, operation_timeout + client, table, entries, operation_timeout, operation_timeout, metric ) await instance.start() except MutationsExceptionGroup as e: @@ -238,6 +249,7 @@ async def test_mutate_rows_exception_retryable_eventually_pass(self, exc_type): client = mock.Mock() table = mock.Mock() + metric = ActiveOperationMetric("MUTATE_ROWS") entries = [self._make_mutation()] operation_timeout = 1 expected_cause = exc_type("retry") @@ -254,6 +266,7 @@ async def test_mutate_rows_exception_retryable_eventually_pass(self, exc_type): entries, operation_timeout, operation_timeout, + metric, retryable_exceptions=(exc_type,), ) await instance.start() @@ -273,6 +286,7 @@ async def test_mutate_rows_incomplete_ignored(self): client = mock.Mock() table = mock.Mock() + metric = ActiveOperationMetric("MUTATE_ROWS") entries = [self._make_mutation()] operation_timeout = 0.05 with mock.patch.object( @@ -284,7 +298,7 @@ async def test_mutate_rows_incomplete_ignored(self): found_exc = None try: instance = self._make_one( - client, table, entries, operation_timeout, operation_timeout + client, table, entries, operation_timeout, operation_timeout, metric ) await instance.start() except MutationsExceptionGroup as e: diff --git a/packages/google-cloud-bigtable/tests/unit/data/_async/test__read_rows.py b/packages/google-cloud-bigtable/tests/unit/data/_async/test__read_rows.py index 7fad973c43a3..29e2d3e84c0d 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_async/test__read_rows.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_async/test__read_rows.py @@ -15,6 +15,7 @@ import pytest from google.cloud.bigtable.data._cross_sync import CrossSync +from google.cloud.bigtable.data._metrics import ActiveOperationMetric # try/except added for compatibility with python < 3.8 try: @@ -59,6 +60,7 @@ def test_ctor(self): expected_operation_timeout = 42 expected_request_timeout = 44 time_gen_mock = mock.Mock() + expected_metric = mock.Mock() subpath = "_async" if CrossSync.is_async else "_sync_autogen" with mock.patch( f"google.cloud.bigtable.data.{subpath}._read_rows._attempt_timeout_generator", @@ -69,6 +71,7 @@ def test_ctor(self): table, operation_timeout=expected_operation_timeout, attempt_timeout=expected_request_timeout, + metric=expected_metric, ) assert time_gen_mock.call_count == 1 time_gen_mock.assert_called_once_with( @@ -81,6 +84,7 @@ def test_ctor(self): assert instance.request.table_name == "test_table" assert instance.request.app_profile_id == table.app_profile_id assert instance.request.rows_limit == row_limit + assert instance._operation_metric == expected_metric @pytest.mark.parametrize( "in_keys,last_key,expected", @@ -269,7 +273,9 @@ async def mock_stream(): table = mock.Mock() table._request_path = {"table_name": "table_name"} table.app_profile_id = "app_profile_id" - instance = self._make_one(query, table, 10, 10) + instance = self._make_one( + query, table, 10, 10, ActiveOperationMetric("READ_ROWS") + ) assert instance._remaining_count == start_limit # read emit_num rows async for val in instance.chunk_stream(awaitable_stream()): @@ -308,7 +314,9 @@ async def mock_stream(): table = mock.Mock() table._request_path = {"table_name": "table_name"} table.app_profile_id = "app_profile_id" - instance = self._make_one(query, table, 10, 10) + instance = self._make_one( + query, table, 10, 10, ActiveOperationMetric("READ_ROWS") + ) assert instance._remaining_count == start_limit with pytest.raises(InvalidChunk) as e: # read emit_num rows @@ -334,7 +342,9 @@ async def mock_stream(): with mock.patch.object( self._get_target_class(), "_read_rows_attempt" ) as mock_attempt: - instance = self._make_one(mock.Mock(), mock.Mock(), 1, 1) + instance = self._make_one( + mock.Mock(), mock.Mock(), 1, 1, ActiveOperationMetric("READ_ROWS") + ) wrapped_gen = mock_stream() mock_attempt.return_value = wrapped_gen gen = instance.start_operation() diff --git a/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py b/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py index 6c6719615c40..55e94a35a142 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_async/test_client.py @@ -112,6 +112,10 @@ def _make_client(cls, *args, use_emulator=True, use_mtls="auto", **kwargs): @CrossSync.pytest async def test_ctor(self): + from google.cloud.bigtable.data._metrics.handlers.gcp_exporter import ( + BigtableMetricsExporter, + ) + expected_project = "project-id" expected_credentials = AnonymousCredentials() client = self._make_client( @@ -125,6 +129,8 @@ async def test_ctor(self): assert client._channel_refresh_task is not None assert client.transport._credentials == expected_credentials assert isinstance(client._metrics_interceptor, CrossSync.MetricsInterceptor) + assert client._gcp_metrics_exporter is not None + assert isinstance(client._gcp_metrics_exporter, BigtableMetricsExporter) await client.close() @CrossSync.pytest @@ -192,7 +198,36 @@ async def test_ctor_dict_options(self): await client.close() @CrossSync.pytest - async def test_veneer_grpc_headers(self): + async def test_metrics_exporter_init_shares_arguments(self): + expected_credentials = AnonymousCredentials() + expected_project = "custom_project" + expected_options = client_options.ClientOptions() + expected_options.credentials_file = None + expected_options.quota_project_id = None + with mock.patch( + "google.cloud.bigtable.data._metrics.handlers.gcp_exporter.BigtableMetricsExporter.__init__", + return_value=None, + ) as exporter_mock: + async with self._make_client( + project=expected_project, + credentials=expected_credentials, + client_options=expected_options, + ): + exporter_mock.assert_called_once_with( + project_id=expected_project, + credentials=expected_credentials, + client_options=expected_options, + ) + + @CrossSync.pytest + async def test_metrics_exporter_init_implicit_project(self): + async with self._make_client() as client: + assert client._gcp_metrics_exporter.project_id == client.project + + @CrossSync.pytest + @mock.patch("google.cloud.bigtable.data._async.client.BigtableMetricsExporter") + @mock.patch("google.cloud.bigtable.data._sync_autogen.client.BigtableMetricsExporter") + async def test_veneer_grpc_headers(self, exporter_mock, exporter_mock_sync): client_component = "data-async" if CrossSync.is_async else "data" VENEER_HEADER_REGEX = re.compile( r"gapic\/[0-9]+\.[\w.-]+ gax\/[0-9]+\.[\w.-]+ gccl\/[0-9]+\.[\w.-]+-" @@ -1178,6 +1213,7 @@ async def test_ctor(self): from google.cloud.bigtable.data._helpers import _WarmedInstanceKey from google.cloud.bigtable.data._metrics import ( BigtableClientSideMetricsController, + GoogleCloudMetricsHandler, ) expected_table_id = "table-id" @@ -1221,6 +1257,8 @@ async def test_ctor(self): assert instance_key in client._active_instances assert client._instance_owners[instance_key] == {id(table)} assert isinstance(table._metrics, BigtableClientSideMetricsController) + assert len(table._metrics.handlers) == 1 + assert isinstance(table._metrics.handlers[0], GoogleCloudMetricsHandler) assert table.default_operation_timeout == expected_operation_timeout assert table.default_attempt_timeout == expected_attempt_timeout assert ( @@ -1296,7 +1334,7 @@ async def test_ctor_invalid_timeout_values(self): @CrossSync.drop def test_table_ctor_sync(self): # initializing client in a sync context should raise RuntimeError - client = mock.Mock() + client = self._make_client() with pytest.raises(RuntimeError) as e: TableAsync(client, "instance-id", "table-id") assert e.match("TableAsync must be created within an async event loop context.") @@ -1535,6 +1573,7 @@ async def test_ctor(self): from google.cloud.bigtable.data._helpers import _WarmedInstanceKey from google.cloud.bigtable.data._metrics import ( BigtableClientSideMetricsController, + GoogleCloudMetricsHandler, ) expected_table_id = "table-id" @@ -1585,6 +1624,8 @@ async def test_ctor(self): assert instance_key in client._active_instances assert client._instance_owners[instance_key] == {id(view)} assert isinstance(view._metrics, BigtableClientSideMetricsController) + assert len(view._metrics.handlers) == 1 + assert isinstance(view._metrics.handlers[0], GoogleCloudMetricsHandler) assert view.default_operation_timeout == expected_operation_timeout assert view.default_attempt_timeout == expected_attempt_timeout assert ( @@ -1630,6 +1671,10 @@ def _make_client(self, *args, **kwargs): @CrossSync.convert def _make_table(self, *args, **kwargs): + from google.cloud.bigtable.data._metrics.handlers.gcp_exporter import ( + BigtableMetricsExporter, + ) + client_mock = mock.Mock() client_mock._register_instance.side_effect = ( lambda *args, **kwargs: CrossSync.yield_to_event_loop() @@ -1645,6 +1690,7 @@ def _make_table(self, *args, **kwargs): ) client_mock._gapic_client.table_path.return_value = kwargs["table_id"] client_mock._gapic_client.instance_path.return_value = kwargs["instance_id"] + client_mock._gcp_metrics_exporter = BigtableMetricsExporter("project") return CrossSync.TestTable._get_target_class()(client_mock, *args, **kwargs) def _make_stats(self): @@ -1996,9 +2042,21 @@ async def test_read_row(self): async with self._make_client() as client: table = client.get_table("instance", "table") row_key = b"test_1" - with mock.patch.object(table, "read_rows") as read_rows: + with mock.patch.object( + CrossSync, "_ReadRowsOperation" + ) as mock_op_constructor: + mock_op = mock.Mock() expected_result = object() - read_rows.side_effect = lambda *args, **kwargs: [expected_result] + + if CrossSync.is_async: + + async def mock_generator(): + yield expected_result + + mock_op.start_operation.return_value = mock_generator() + else: + mock_op.start_operation.return_value = [expected_result] + mock_op_constructor.return_value = mock_op expected_op_timeout = 8 expected_req_timeout = 4 row = await table.read_row( @@ -2007,16 +2065,17 @@ async def test_read_row(self): attempt_timeout=expected_req_timeout, ) assert row == expected_result - assert read_rows.call_count == 1 - args, kwargs = read_rows.call_args_list[0] + assert mock_op_constructor.call_count == 1 + args, kwargs = mock_op_constructor.call_args_list[0] assert kwargs["operation_timeout"] == expected_op_timeout assert kwargs["attempt_timeout"] == expected_req_timeout - assert len(args) == 1 + assert len(args) == 2 assert isinstance(args[0], ReadRowsQuery) query = args[0] assert query.row_keys == [row_key] assert query.row_ranges == [] assert query.limit == 1 + assert args[1] is table @CrossSync.pytest async def test_read_row_w_filter(self): @@ -2024,14 +2083,24 @@ async def test_read_row_w_filter(self): async with self._make_client() as client: table = client.get_table("instance", "table") row_key = b"test_1" - with mock.patch.object(table, "read_rows") as read_rows: + with mock.patch.object( + CrossSync, "_ReadRowsOperation" + ) as mock_op_constructor: + mock_op = mock.Mock() expected_result = object() - read_rows.side_effect = lambda *args, **kwargs: [expected_result] + + if CrossSync.is_async: + + async def mock_generator(): + yield expected_result + + mock_op.start_operation.return_value = mock_generator() + else: + mock_op.start_operation.return_value = [expected_result] + mock_op_constructor.return_value = mock_op expected_op_timeout = 8 expected_req_timeout = 4 - mock_filter = mock.Mock() - expected_filter = {"filter": "mock filter"} - mock_filter._to_dict.return_value = expected_filter + expected_filter = mock.Mock() row = await table.read_row( row_key, operation_timeout=expected_op_timeout, @@ -2039,11 +2108,11 @@ async def test_read_row_w_filter(self): row_filter=expected_filter, ) assert row == expected_result - assert read_rows.call_count == 1 - args, kwargs = read_rows.call_args_list[0] + assert mock_op_constructor.call_count == 1 + args, kwargs = mock_op_constructor.call_args_list[0] assert kwargs["operation_timeout"] == expected_op_timeout assert kwargs["attempt_timeout"] == expected_req_timeout - assert len(args) == 1 + assert len(args) == 2 assert isinstance(args[0], ReadRowsQuery) query = args[0] assert query.row_keys == [row_key] @@ -2057,9 +2126,21 @@ async def test_read_row_no_response(self): async with self._make_client() as client: table = client.get_table("instance", "table") row_key = b"test_1" - with mock.patch.object(table, "read_rows") as read_rows: - # return no rows - read_rows.side_effect = lambda *args, **kwargs: [] + with mock.patch.object( + CrossSync, "_ReadRowsOperation" + ) as mock_op_constructor: + mock_op = mock.Mock() + + if CrossSync.is_async: + + async def mock_generator(): + if False: + yield + + mock_op.start_operation.return_value = mock_generator() + else: + mock_op.start_operation.return_value = [] + mock_op_constructor.return_value = mock_op expected_op_timeout = 8 expected_req_timeout = 4 result = await table.read_row( @@ -2068,8 +2149,8 @@ async def test_read_row_no_response(self): attempt_timeout=expected_req_timeout, ) assert result is None - assert read_rows.call_count == 1 - args, kwargs = read_rows.call_args_list[0] + assert mock_op_constructor.call_count == 1 + args, kwargs = mock_op_constructor.call_args_list[0] assert kwargs["operation_timeout"] == expected_op_timeout assert kwargs["attempt_timeout"] == expected_req_timeout assert isinstance(args[0], ReadRowsQuery) @@ -2092,22 +2173,36 @@ async def test_row_exists(self, return_value, expected_result): async with self._make_client() as client: table = client.get_table("instance", "table") row_key = b"test_1" - with mock.patch.object(table, "read_rows") as read_rows: - # return no rows - read_rows.side_effect = lambda *args, **kwargs: return_value - expected_op_timeout = 1 - expected_req_timeout = 2 + with mock.patch.object( + CrossSync, "_ReadRowsOperation" + ) as mock_op_constructor: + mock_op = mock.Mock() + if CrossSync.is_async: + + async def mock_generator(): + for item in return_value: + yield item + + mock_op.start_operation.return_value = mock_generator() + else: + mock_op.start_operation.return_value = return_value + mock_op_constructor.return_value = mock_op + expected_op_timeout = 2 + expected_req_timeout = 1 result = await table.row_exists( row_key, operation_timeout=expected_op_timeout, attempt_timeout=expected_req_timeout, ) assert expected_result == result - assert read_rows.call_count == 1 - args, kwargs = read_rows.call_args_list[0] + assert mock_op_constructor.call_count == 1 + args, kwargs = mock_op_constructor.call_args_list[0] assert kwargs["operation_timeout"] == expected_op_timeout assert kwargs["attempt_timeout"] == expected_req_timeout - assert isinstance(args[0], ReadRowsQuery) + query = args[0] + assert isinstance(query, ReadRowsQuery) + assert query.row_keys == [row_key] + assert query.limit == 1 expected_filter = { "chain": { "filters": [ @@ -2116,10 +2211,6 @@ async def test_row_exists(self, return_value, expected_result): ] } } - query = args[0] - assert query.row_keys == [row_key] - assert query.row_ranges == [] - assert query.limit == 1 assert query.filter._to_dict() == expected_filter diff --git a/packages/google-cloud-bigtable/tests/unit/data/_async/test_mutations_batcher.py b/packages/google-cloud-bigtable/tests/unit/data/_async/test_mutations_batcher.py index 75de7c281332..1d85b6ac9f9b 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_async/test_mutations_batcher.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_async/test_mutations_batcher.py @@ -307,6 +307,10 @@ def _get_target_class(self): def _make_one(self, table=None, **kwargs): from google.api_core.exceptions import DeadlineExceeded, ServiceUnavailable + from google.cloud.bigtable.data._metrics import ( + BigtableClientSideMetricsController, + ) + if table is None: table = mock.Mock() table._request_path = {"table_name": "table"} @@ -317,6 +321,7 @@ def _make_one(self, table=None, **kwargs): DeadlineExceeded, ServiceUnavailable, ) + table._metrics = BigtableClientSideMetricsController([]) return self._get_target_class()(table, **kwargs) @@ -935,14 +940,16 @@ async def test__execute_mutate_rows(self): table.default_mutate_rows_retryable_errors = () async with self._make_one(table) as instance: batch = [self._make_mutation()] - result = await instance._execute_mutate_rows(batch) + expected_metric = mock.Mock() + result = await instance._execute_mutate_rows(batch, expected_metric) assert start_operation.call_count == 1 args, kwargs = mutate_rows.call_args assert args[0] == table.client._gapic_client assert args[1] == table assert args[2] == batch - kwargs["operation_timeout"] == 17 - kwargs["attempt_timeout"] == 13 + assert kwargs["operation_timeout"] == 17 + assert kwargs["attempt_timeout"] == 13 + assert kwargs["metric"] == expected_metric assert result == [] @CrossSync.pytest @@ -963,7 +970,7 @@ async def test__execute_mutate_rows_returns_errors(self): table.default_mutate_rows_retryable_errors = () async with self._make_one(table) as instance: batch = [self._make_mutation()] - result = await instance._execute_mutate_rows(batch) + result = await instance._execute_mutate_rows(batch, mock.Mock()) assert len(result) == 2 assert result[0] == err1 assert result[1] == err2 @@ -1093,7 +1100,9 @@ async def test_timeout_args_passed(self): assert instance._operation_timeout == expected_operation_timeout assert instance._attempt_timeout == expected_attempt_timeout # make simulated gapic call - await instance._execute_mutate_rows([self._make_mutation()]) + await instance._execute_mutate_rows( + [self._make_mutation()], mock.Mock() + ) assert mutate_rows.call_count == 1 kwargs = mutate_rows.call_args[1] assert kwargs["operation_timeout"] == expected_operation_timeout @@ -1192,13 +1201,21 @@ async def test_customizable_retryable_errors( Test that retryable functions support user-configurable arguments, and that the configured retryables are passed down to the gapic layer. """ + from google.cloud.bigtable.data._metrics import ActiveOperationMetric + from google.cloud.bigtable.data._metrics.handlers.gcp_exporter import ( + BigtableMetricsExporter, + ) + + mock_client = mock.Mock() + mock_client._gcp_metrics_exporter = BigtableMetricsExporter("project") + with mock.patch.object( google.api_core.retry, "if_exception_type" ) as predicate_builder_mock: with mock.patch.object(CrossSync, "retry_target") as retry_fn_mock: table = None with mock.patch("asyncio.create_task"): - table = CrossSync.Table(mock.Mock(), "instance", "table") + table = CrossSync.Table(mock_client, "instance", "table") async with self._make_one( table, batch_retryable_errors=input_retryables ) as instance: @@ -1207,14 +1224,16 @@ async def test_customizable_retryable_errors( predicate_builder_mock.return_value = expected_predicate retry_fn_mock.side_effect = RuntimeError("stop early") mutation = self._make_mutation(count=1, size=1) - await instance._execute_mutate_rows([mutation]) + await instance._execute_mutate_rows( + [mutation], ActiveOperationMetric("MUTATE_ROWS") + ) # passed in errors should be used to build the predicate predicate_builder_mock.assert_called_once_with( *expected_retryables, _MutateRowsIncomplete ) - retry_call_args = retry_fn_mock.call_args_list[0].args + retry_call_kwargs = retry_fn_mock.call_args_list[0].kwargs # output of if_exception_type should be sent in to retry constructor - assert retry_call_args[1] is expected_predicate + assert retry_call_kwargs["predicate"] is expected_predicate @CrossSync.pytest async def test_large_batch_write(self): diff --git a/packages/google-cloud-bigtable/tests/unit/data/_async/test_read_rows_acceptance.py b/packages/google-cloud-bigtable/tests/unit/data/_async/test_read_rows_acceptance.py index d69b776bfe42..53689c9c33f7 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_async/test_read_rows_acceptance.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_async/test_read_rows_acceptance.py @@ -21,6 +21,7 @@ import pytest from google.cloud.bigtable.data._cross_sync import CrossSync +from google.cloud.bigtable.data._metrics import ActiveOperationMetric from google.cloud.bigtable.data.exceptions import InvalidChunk from google.cloud.bigtable.data.row import Row from google.cloud.bigtable_v2 import ReadRowsResponse @@ -36,8 +37,11 @@ class TestReadRowsAcceptanceAsync: @staticmethod @CrossSync.convert - def _get_operation_class(): - return CrossSync._ReadRowsOperation + def _make_operation(): + metric = ActiveOperationMetric("READ_ROWS") + op = CrossSync._ReadRowsOperation(mock.Mock(), mock.Mock(), 5, 5, metric) + op._remaining_count = None + return op @staticmethod @CrossSync.convert @@ -80,13 +84,8 @@ async def _process_chunks(self, *chunks): async def _row_stream(): yield ReadRowsResponse(chunks=chunks) - instance = mock.Mock() - instance._remaining_count = None - instance._last_yielded_row_key = None - chunker = self._get_operation_class().chunk_stream( - instance, self._coro_wrapper(_row_stream()) - ) - merger = self._get_operation_class().merge_rows(chunker) + chunker = self._make_operation().chunk_stream(self._coro_wrapper(_row_stream())) + merger = self._make_operation().merge_rows(chunker) results = [] async for row in merger: results.append(row) @@ -103,13 +102,10 @@ async def _scenerio_stream(): try: results = [] - instance = mock.Mock() - instance._last_yielded_row_key = None - instance._remaining_count = None - chunker = self._get_operation_class().chunk_stream( - instance, self._coro_wrapper(_scenerio_stream()) + chunker = self._make_operation().chunk_stream( + self._coro_wrapper(_scenerio_stream()) ) - merger = self._get_operation_class().merge_rows(chunker) + merger = self._make_operation().merge_rows(chunker) async for row in merger: for cell in row: cell_result = ReadRowsTest.Result( @@ -196,13 +192,10 @@ async def test_out_of_order_rows(self): async def _row_stream(): yield ReadRowsResponse(last_scanned_row_key=b"a") - instance = mock.Mock() - instance._remaining_count = None - instance._last_yielded_row_key = b"b" - chunker = self._get_operation_class().chunk_stream( - instance, self._coro_wrapper(_row_stream()) - ) - merger = self._get_operation_class().merge_rows(chunker) + op = self._make_operation() + op._last_yielded_row_key = b"b" + chunker = op.chunk_stream(self._coro_wrapper(_row_stream())) + merger = self._make_operation().merge_rows(chunker) with pytest.raises(InvalidChunk): async for _ in merger: pass diff --git a/packages/google-cloud-bigtable/tests/unit/data/_metrics/test_gcp_exporter_handler.py b/packages/google-cloud-bigtable/tests/unit/data/_metrics/test_gcp_exporter_handler.py new file mode 100644 index 000000000000..87e96a37563b --- /dev/null +++ b/packages/google-cloud-bigtable/tests/unit/data/_metrics/test_gcp_exporter_handler.py @@ -0,0 +1,433 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import mock +import pytest +from google.api.distribution_pb2 import Distribution +from google.cloud.monitoring_v3 import ( + Point, + TimeSeries, +) +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import ( + AggregationTemporality, + HistogramDataPoint, + Metric, + MetricExportResult, + MetricsData, + NumberDataPoint, + ResourceMetrics, + ScopeMetrics, + Sum, +) + +from google.cloud.bigtable.data._metrics.handlers.gcp_exporter import ( + BigtableMetricsExporter, + GoogleCloudMetricsHandler, +) +from google.cloud.bigtable.data._metrics.handlers.opentelemetry import ( + _OpenTelemetryInstruments, +) + + +class TestGoogleCloudMetricsHandler: + def _make_one(self, *args, **kwargs): + return GoogleCloudMetricsHandler(*args, **kwargs) + + def test_ctor_defaults(self): + from google.cloud.bigtable import __version__ as CLIENT_VERSION + + expected_instance = "my_instance" + expected_table = "my_table" + expected_exporter = BigtableMetricsExporter("project") + with mock.patch.object( + GoogleCloudMetricsHandler, "_generate_client_uid" + ) as uid_mock: + handler = self._make_one( + expected_exporter, + instance_id=expected_instance, + table_id=expected_table, + ) + assert isinstance(handler.meter_provider, MeterProvider) + assert isinstance(handler.otel, _OpenTelemetryInstruments) + assert handler.shared_labels["resource_instance"] == expected_instance + assert handler.shared_labels["resource_table"] == expected_table + assert handler.shared_labels["app_profile"] == "default" + assert ( + handler.shared_labels["client_name"] == f"python-bigtable/{CLIENT_VERSION}" + ) + assert handler.shared_labels["client_uid"] == uid_mock() + + def test_ctor_explicit(self): + expected_instance = "my_instance" + expected_table = "my_table" + expected_version = "my_version" + expected_uid = "my_uid" + expected_app_profile = "my_profile" + expected_exporter = BigtableMetricsExporter("project") + handler = self._make_one( + expected_exporter, + instance_id=expected_instance, + table_id=expected_table, + app_profile_id=expected_app_profile, + client_uid=expected_uid, + client_version=expected_version, + ) + assert handler.shared_labels["resource_instance"] == expected_instance + assert handler.shared_labels["resource_table"] == expected_table + assert handler.shared_labels["app_profile"] == expected_app_profile + assert ( + handler.shared_labels["client_name"] + == f"python-bigtable/{expected_version}" + ) + assert handler.shared_labels["client_uid"] == expected_uid + + @mock.patch( + "google.cloud.bigtable.data._metrics.handlers.gcp_exporter.PeriodicExportingMetricReader" + ) + @mock.patch( + "google.cloud.bigtable.data._metrics.handlers.gcp_exporter.MeterProvider" + ) + @mock.patch( + "google.cloud.bigtable.data._metrics.handlers.gcp_exporter._OpenTelemetryInstruments" + ) + @mock.patch( + "google.cloud.bigtable.data._metrics.handlers.gcp_exporter.OpenTelemetryMetricsHandler.__init__" + ) + def test_ctor_with_mocks( + self, mock_super_init, mock_otel_instruments, mock_meter_provider, mock_reader + ): + from google.cloud.bigtable.data._metrics.handlers.gcp_exporter import ( + VIEW_LIST, + ) + + exporter = mock.Mock() + export_interval = 90 + kwargs = {"instance_id": "test_instance", "table_id": "test_table"} + handler = self._make_one(exporter, export_interval=export_interval, **kwargs) + # check PeriodicExportingMetricReader + mock_reader.assert_called_once_with( + exporter, export_interval_millis=export_interval * 1000 + ) + # check MeterProvider + mock_meter_provider.assert_called_once_with( + metric_readers=[mock_reader.return_value], views=VIEW_LIST + ) + # check _OpenTelemetryInstruments + mock_otel_instruments.assert_called_once_with( + meter_provider=mock_meter_provider.return_value + ) + # check super().__init__ call + mock_super_init.assert_called_once_with( + instruments=mock_otel_instruments.return_value, **kwargs + ) + assert handler.meter_provider == mock_meter_provider.return_value + + def test_close(self): + mock_instance = mock.Mock() + assert mock_instance.meter_provider.shutdown.call_count == 0 + GoogleCloudMetricsHandler.close(mock_instance) + assert mock_instance.meter_provider.shutdown.call_count == 1 + + +class TestBigtableMetricsExporter: + def _make_one(self, *args, **kwargs): + return BigtableMetricsExporter(*args, **kwargs) + + def test_ctor_defaults(self): + from google.cloud.monitoring_v3 import MetricServiceClient + + expected_project = "custom" + instance = self._make_one(expected_project) + assert instance.project_id == expected_project + assert instance.prefix == "bigtable.googleapis.com/internal/client" + assert isinstance(instance.client, MetricServiceClient) + + def test_ctor_mocks(self): + expected_project = "custom" + with mock.patch( + "google.cloud.monitoring_v3.MetricServiceClient.__init__", + return_value=None, + ) as mock_client: + args = [mock.Mock(), object()] + kwargs = {"a": "b"} + instance = self._make_one(expected_project, *args, **kwargs) + assert instance.project_id == expected_project + assert instance.prefix == "bigtable.googleapis.com/internal/client" + mock_client.assert_called_once_with(*args, **kwargs) + + @pytest.mark.parametrize( + "value,expected_field", + [ + (123, "int64_value"), + (123.456, "double_value"), + ], + ) + def test__to_point_w_number(self, value, expected_field): + """Test that NumberDataPoint is converted to a Point correctly.""" + instance = self._make_one("project") + expected_start_time_nanos = 100 + expected_end_time_nanos = 200 + dp = NumberDataPoint( + attributes={}, + start_time_unix_nano=expected_start_time_nanos, + time_unix_nano=expected_end_time_nanos, + value=value, + ) + point = instance._to_point(dp) + assert isinstance(point, Point) + assert getattr(point.value, expected_field) == value + assert ( + point.interval.start_time.second * 10**9 + ) + point.interval.start_time.nanosecond == expected_start_time_nanos + assert ( + point.interval.end_time.second * 10**9 + ) + point.interval.end_time.nanosecond == expected_end_time_nanos + + def test__to_point_w_histogram(self): + """Test that HistogramDataPoint is converted to a Point correctly.""" + instance = self._make_one("project") + expected_start_time_nanos = 100 + expected_end_time_nanos = 200 + expected_count = 10 + expected_sum = 100.0 + expected_bucket_counts = [1, 2, 7] + expected_explicit_bounds = [10, 20] + dp = HistogramDataPoint( + attributes={}, + start_time_unix_nano=expected_start_time_nanos, + time_unix_nano=expected_end_time_nanos, + count=expected_count, + sum=expected_sum, + bucket_counts=expected_bucket_counts, + explicit_bounds=expected_explicit_bounds, + min=0, + max=50, + ) + point = instance._to_point(dp) + assert isinstance(point, Point) + dist = point.value.distribution_value + assert isinstance(dist, Distribution) + assert dist.count == expected_count + assert dist.mean == expected_sum / expected_count + assert list(dist.bucket_counts) == expected_bucket_counts + assert ( + list(dist.bucket_options.explicit_buckets.bounds) + == expected_explicit_bounds + ) + assert ( + point.interval.start_time.second * 10**9 + ) + point.interval.start_time.nanosecond == expected_start_time_nanos + assert ( + point.interval.end_time.second * 10**9 + ) + point.interval.end_time.nanosecond == expected_end_time_nanos + + def test__to_point_w_histogram_zero_count(self): + """Test that HistogramDataPoint with zero count is converted to a Point correctly.""" + instance = self._make_one("project") + dp = HistogramDataPoint( + attributes={}, + start_time_unix_nano=100, + time_unix_nano=200, + count=0, + sum=0, + bucket_counts=[], + explicit_bounds=[], + min=0, + max=0, + ) + point = instance._to_point(dp) + assert isinstance(point, Point) + dist = point.value.distribution_value + assert isinstance(dist, Distribution) + assert dist.count == 0 + assert dist.mean == 0.0 + + @pytest.mark.parametrize( + "num_series, batch_size, expected_calls, expected_batch_sizes", + [ + (10, 200, 1, [10]), + (200, 200, 1, [200]), + (500, 200, 3, [200, 200, 100]), + (0, 200, 0, []), + ], + ) + def test__batch_write( + self, num_series, batch_size, expected_calls, expected_batch_sizes + ): + """Test that _batch_write splits series into batches correctly.""" + instance = self._make_one("project") + instance.client = mock.Mock() + series = [TimeSeries() for _ in range(num_series)] + instance._batch_write(series, max_batch_size=batch_size) + assert instance.client.create_service_time_series.call_count == expected_calls + for i, call in enumerate( + instance.client.create_service_time_series.call_args_list + ): + call_args, _ = call + assert len(call_args[0].time_series) == expected_batch_sizes[i] + + def test__batch_write_with_deadline(self): + """Test that _batch_write passes deadlines to gapic correctly.""" + import time + + from google.api_core import gapic_v1 + + instance = self._make_one("project") + instance.client = mock.Mock() + series = [TimeSeries() for _ in range(10)] + # test with deadline + deadline = time.time() + 10 + instance._batch_write(series, deadline=deadline) + ( + call_args, + call_kwargs, + ) = instance.client.create_service_time_series.call_args_list[0] + assert "timeout" in call_kwargs + assert 9 < call_kwargs["timeout"] < 10 + # test without deadline + instance.client.create_service_time_series.reset_mock() + instance._batch_write(series, deadline=None) + ( + call_args, + call_kwargs, + ) = instance.client.create_service_time_series.call_args_list[0] + assert "timeout" in call_kwargs + assert call_kwargs["timeout"] == gapic_v1.method.DEFAULT + + def test_export(self): + """Test that export correctly converts metrics and calls _batch_write.""" + project_id = "project" + instance = self._make_one(project_id) + instance._batch_write = mock.Mock() + # create mock metrics data + expected_value = 123 + attributes = { + "resource_instance": "instance1", + "resource_cluster": "cluster1", + "resource_table": "table1", + "resource_zone": "zone1", + "method": "ReadRows", + } + data_point = NumberDataPoint( + attributes=attributes, + start_time_unix_nano=100, + time_unix_nano=200, + value=expected_value, + ) + metric = Metric( + name="operation_latencies", + description="", + unit="ms", + data=Sum( + data_points=[data_point], + aggregation_temporality=AggregationTemporality.CUMULATIVE, + is_monotonic=False, + ), + ) + scope_metric = ScopeMetrics( + scope=mock.Mock(), metrics=[metric], schema_url=None + ) + resource_metric = ResourceMetrics( + resource=mock.Mock(), scope_metrics=[scope_metric], schema_url=None + ) + metrics_data = MetricsData(resource_metrics=[resource_metric]) + result = instance.export(metrics_data) + assert result == MetricExportResult.SUCCESS + instance._batch_write.assert_called_once() + # check the TimeSeries passed to _batch_write + call_args, call_kwargs = instance._batch_write.call_args_list[0] + series_list = call_args[0] + assert len(series_list) == 1 + series = series_list[0] + assert series.metric.type == f"{instance.prefix}/operation_latencies" + assert series.metric.labels["method"] == "ReadRows" + assert "resource_instance" not in series.metric.labels + assert series.resource.type == "bigtable_client_raw" + assert series.resource.labels["project_id"] == project_id + assert series.resource.labels["instance"] == "instance1" + assert series.resource.labels["cluster"] == "cluster1" + assert series.resource.labels["table"] == "table1" + assert series.resource.labels["zone"] == "zone1" + assert len(series.points) == 1 + point = series.points[0] + assert point.value.int64_value == expected_value + + def test_export_no_attributes(self): + """Test that export skips data points with no attributes.""" + instance = self._make_one("project") + instance._batch_write = mock.Mock() + data_point = NumberDataPoint( + attributes={}, start_time_unix_nano=100, time_unix_nano=200, value=123 + ) + metric = Metric( + name="operation_latencies", + description="", + unit="ms", + data=Sum( + data_points=[data_point], + aggregation_temporality=AggregationTemporality.CUMULATIVE, + is_monotonic=False, + ), + ) + scope_metric = ScopeMetrics( + scope=mock.Mock(), metrics=[metric], schema_url=None + ) + resource_metric = ResourceMetrics( + resource=mock.Mock(), scope_metrics=[scope_metric], schema_url=None + ) + metrics_data = MetricsData(resource_metrics=[resource_metric]) + result = instance.export(metrics_data) + assert result == MetricExportResult.SUCCESS + instance._batch_write.assert_called_once() + series_list = instance._batch_write.call_args[0][0] + assert len(series_list) == 0 + + def test_exception_in_export(self): + """ + make sure exceptions don't raise + """ + instance = self._make_one("project") + instance._batch_write = mock.Mock(side_effect=Exception("test")) + # create mock metrics data with one valid data point + attributes = { + "resource_instance": "instance1", + "resource_cluster": "cluster1", + "resource_table": "table1", + "resource_zone": "zone1", + } + data_point = NumberDataPoint( + attributes=attributes, + start_time_unix_nano=100, + time_unix_nano=200, + value=123, + ) + metric = Metric( + name="operation_latencies", + description="", + unit="ms", + data=Sum( + data_points=[data_point], + aggregation_temporality=AggregationTemporality.CUMULATIVE, + is_monotonic=False, + ), + ) + scope_metric = ScopeMetrics( + scope=mock.Mock(), metrics=[metric], schema_url=None + ) + resource_metric = ResourceMetrics( + resource=mock.Mock(), scope_metrics=[scope_metric], schema_url=None + ) + metrics_data = MetricsData(resource_metrics=[resource_metric]) + result = instance.export(metrics_data) + assert result == MetricExportResult.FAILURE diff --git a/packages/google-cloud-bigtable/tests/unit/data/_metrics/test_opentelemetry_handler.py b/packages/google-cloud-bigtable/tests/unit/data/_metrics/test_opentelemetry_handler.py new file mode 100644 index 000000000000..6c8489645811 --- /dev/null +++ b/packages/google-cloud-bigtable/tests/unit/data/_metrics/test_opentelemetry_handler.py @@ -0,0 +1,396 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import mock +import pytest +from grpc import StatusCode + +from google.cloud.bigtable.data._metrics.data_model import ( + ActiveOperationMetric, + CompletedAttemptMetric, + CompletedOperationMetric, + OperationType, +) +from google.cloud.bigtable.data._metrics.handlers.opentelemetry import ( + OpenTelemetryMetricsHandler, + _OpenTelemetryInstruments, +) + + +class TestOpentelemetryInstruments: + EXPECTED_METRICS = [ + "operation_latencies", + "first_response_latencies", + "attempt_latencies", + "server_latencies", + "application_latencies", + "throttling_latencies", + "retry_count", + "connectivity_error_count", + ] + + def _make_one(self, meter_provider=None): + return _OpenTelemetryInstruments(meter_provider) + + def test_meter_name(self): + expected_name = "bigtable.googleapis.com" + mock_meter_provider = mock.Mock() + self._make_one(mock_meter_provider) + mock_meter_provider.get_meter.assert_called_once_with(expected_name) + + @pytest.mark.parametrize( + "metric_name", [m for m in EXPECTED_METRICS if "latencies" in m] + ) + def test_histogram_creation(self, metric_name): + mock_meter_provider = mock.Mock() + instruments = self._make_one(mock_meter_provider) + mock_meter = mock_meter_provider.get_meter() + assert any( + [ + call.kwargs["name"] == metric_name + for call in mock_meter.create_histogram.call_args_list + ] + ) + assert all( + [ + call.kwargs["unit"] == "ms" + for call in mock_meter.create_histogram.call_args_list + ] + ) + assert all( + [ + call.kwargs["description"] is not None + for call in mock_meter.create_histogram.call_args_list + ] + ) + assert getattr(instruments, metric_name) is not None + + @pytest.mark.parametrize( + "metric_name", [m for m in EXPECTED_METRICS if "count" in m] + ) + def test_counter_creation(self, metric_name): + mock_meter_provider = mock.Mock() + instruments = self._make_one(mock_meter_provider) + mock_meter = mock_meter_provider.get_meter() + assert any( + [ + call.kwargs["name"] == metric_name + for call in mock_meter.create_counter.call_args_list + ] + ) + assert all( + [ + call.kwargs["description"] is not None + for call in mock_meter.create_histogram.call_args_list + ] + ) + assert getattr(instruments, metric_name) is not None + + def test_global_provider(self): + instruments = self._make_one() + # wait to import otel until after creating instance + import opentelemetry + + for metric_name in self.EXPECTED_METRICS: + metric = getattr(instruments, metric_name) + assert metric is not None + if "latencies" in metric_name: + assert isinstance(metric, opentelemetry.metrics.Histogram) + else: + assert isinstance(metric, opentelemetry.metrics.Counter) + + +class TestOpentelemetryMetricsHandler: + def _make_one(self, **kwargs): + return OpenTelemetryMetricsHandler(**kwargs) + + def test_ctor_defaults(self): + from google.cloud.bigtable import __version__ as CLIENT_VERSION + + expected_instance = "my_instance" + expected_table = "my_table" + with mock.patch.object( + OpenTelemetryMetricsHandler, "_generate_client_uid" + ) as uid_mock: + handler = self._make_one( + instance_id=expected_instance, table_id=expected_table + ) + assert isinstance(handler.otel, _OpenTelemetryInstruments) + assert handler.shared_labels["resource_instance"] == expected_instance + assert handler.shared_labels["resource_table"] == expected_table + assert handler.shared_labels["app_profile"] == "default" + assert ( + handler.shared_labels["client_name"] == f"python-bigtable/{CLIENT_VERSION}" + ) + assert handler.shared_labels["client_uid"] == uid_mock() + + def test_ctor_explicit(self): + expected_instance = "my_instance" + expected_table = "my_table" + expected_version = "my_version" + expected_uid = "my_uid" + expected_app_profile = "my_profile" + expected_instruments = object() + handler = self._make_one( + instance_id=expected_instance, + table_id=expected_table, + app_profile_id=expected_app_profile, + client_uid=expected_uid, + client_version=expected_version, + instruments=expected_instruments, + ) + assert handler.otel == expected_instruments + assert handler.shared_labels["resource_instance"] == expected_instance + assert handler.shared_labels["resource_table"] == expected_table + assert handler.shared_labels["app_profile"] == expected_app_profile + assert ( + handler.shared_labels["client_name"] + == f"python-bigtable/{expected_version}" + ) + assert handler.shared_labels["client_uid"] == expected_uid + + @mock.patch("socket.gethostname", return_value="hostname") + @mock.patch("os.getpid", return_value="pid") + @mock.patch("uuid.uuid4", return_value="uid") + def test_generate_client_uid_mock(self, socket_mock, os_mock, uuid_mock): + uid = OpenTelemetryMetricsHandler._generate_client_uid() + assert uid == "python-uid-pid@hostname" + + @mock.patch("socket.gethostname", side_effect=[ValueError("fail")]) + @mock.patch("os.getpid", side_effect=[ValueError("fail")]) + @mock.patch("uuid.uuid4", return_value="uid") + def test_generate_client_uid_mock_with_exceptions( + self, socket_mock, os_mock, uuid_mock + ): + uid = OpenTelemetryMetricsHandler._generate_client_uid() + assert uid == "python-uid-@localhost" + + def test_generate_client_uid(self): + import re + + uid = OpenTelemetryMetricsHandler._generate_client_uid() + # The expected pattern is python--@ + expected_pattern = ( + r"python-[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}-\d+@.+" + ) + assert re.match(expected_pattern, uid) + + def test_on_operation_complete_operation_latencies(self): + mock_instruments = mock.Mock(operation_latencies=mock.Mock()) + handler = self._make_one( + instance_id="inst", table_id="table", instruments=mock_instruments + ) + op = CompletedOperationMetric( + op_type=OperationType.READ_ROWS, + duration_ns=1234567, + completed_attempts=[], + final_status=StatusCode.OK, + cluster_id="cluster", + zone="zone", + is_streaming=True, + ) + handler.on_operation_complete(op) + expected_labels = { + "method": op.op_type.value, + "status": op.final_status.name, + "resource_zone": op.zone, + "resource_cluster": op.cluster_id, + **handler.shared_labels, + } + mock_instruments.operation_latencies.record.assert_called_once_with( + op.duration_ns / 1e6, + {"streaming": str(op.is_streaming), **expected_labels}, + ) + + @pytest.mark.parametrize( + "op_type,first_response_latency_ns,should_record", + [ + (OperationType.READ_ROWS, 12345, True), + (OperationType.READ_ROWS, None, False), + (OperationType.MUTATE_ROW, 12345, False), + ], + ) + def test_on_operation_complete_first_response_latencies( + self, op_type, first_response_latency_ns, should_record + ): + mock_instruments = mock.Mock(first_response_latencies=mock.Mock()) + handler = self._make_one( + instance_id="inst", table_id="table", instruments=mock_instruments + ) + op = CompletedOperationMetric( + op_type=op_type, + duration_ns=1234567, + completed_attempts=[], + final_status=StatusCode.OK, + cluster_id="cluster", + zone="zone", + is_streaming=True, + first_response_latency_ns=first_response_latency_ns, + ) + handler.on_operation_complete(op) + if should_record: + expected_labels = { + "method": op.op_type.value, + "status": op.final_status.name, + "resource_zone": op.zone, + "resource_cluster": op.cluster_id, + **handler.shared_labels, + } + mock_instruments.first_response_latencies.record.assert_called_once_with( + first_response_latency_ns / 1e6, expected_labels + ) + else: + mock_instruments.first_response_latencies.record.assert_not_called() + + @pytest.mark.parametrize("attempts_count", [0, 1, 5]) + def test_on_operation_complete_retry_count(self, attempts_count): + mock_instruments = mock.Mock(retry_count=mock.Mock()) + handler = self._make_one( + instance_id="inst", table_id="table", instruments=mock_instruments + ) + attempts = [mock.Mock()] * attempts_count + op = CompletedOperationMetric( + op_type=OperationType.READ_ROWS, + duration_ns=1234567, + completed_attempts=attempts, + final_status=StatusCode.OK, + cluster_id="cluster", + zone="zone", + is_streaming=True, + ) + handler.on_operation_complete(op) + if attempts: + expected_labels = { + "method": op.op_type.value, + "status": op.final_status.name, + "resource_zone": op.zone, + "resource_cluster": op.cluster_id, + **handler.shared_labels, + } + mock_instruments.retry_count.add.assert_called_once_with( + len(attempts) - 1, expected_labels + ) + else: + mock_instruments.retry_count.add.assert_not_called() + + def test_on_attempt_complete_attempt_latencies(self): + mock_instruments = mock.Mock(attempt_latencies=mock.Mock()) + handler = self._make_one( + instance_id="inst", table_id="table", instruments=mock_instruments + ) + attempt = CompletedAttemptMetric(duration_ns=1234567, end_status=StatusCode.OK) + op = ActiveOperationMetric( + op_type=OperationType.READ_ROWS, + zone="zone", + cluster_id="cluster", + is_streaming=True, + ) + handler.on_attempt_complete(attempt, op) + expected_labels = { + "method": op.op_type.value, + "resource_zone": op.zone, + "resource_cluster": op.cluster_id, + **handler.shared_labels, + } + mock_instruments.attempt_latencies.record.assert_called_once_with( + attempt.duration_ns / 1e6, + { + "streaming": str(op.is_streaming), + "status": attempt.end_status.name, + **expected_labels, + }, + ) + + @pytest.mark.parametrize( + "is_first_attempt,flow_throttling_ns", + [(True, 54321), (False, 0), (True, 0)], + ) + def test_on_attempt_complete_throttling_latencies( + self, is_first_attempt, flow_throttling_ns + ): + mock_instruments = mock.Mock(throttling_latencies=mock.Mock()) + handler = self._make_one( + instance_id="inst", table_id="table", instruments=mock_instruments + ) + attempt = CompletedAttemptMetric( + duration_ns=1234567, + end_status=StatusCode.OK, + ) + op = ActiveOperationMetric( + op_type=OperationType.READ_ROWS, + flow_throttling_time_ns=flow_throttling_ns, + ) + if not is_first_attempt: + op.completed_attempts.append(mock.Mock()) + handler.on_attempt_complete(attempt, op) + expected_throttling = 0 + if is_first_attempt: + expected_throttling += flow_throttling_ns / 1e6 + mock_instruments.throttling_latencies.record.assert_called_once_with( + pytest.approx(expected_throttling), mock.ANY + ) + + def test_on_attempt_complete_application_latencies(self): + mock_instruments = mock.Mock(application_latencies=mock.Mock()) + handler = self._make_one( + instance_id="inst", table_id="table", instruments=mock_instruments + ) + attempt = CompletedAttemptMetric( + duration_ns=1234567, + end_status=StatusCode.OK, + application_blocking_time_ns=234567, + backoff_before_attempt_ns=345678, + ) + op = ActiveOperationMetric(op_type=OperationType.READ_ROWS) + handler.on_attempt_complete(attempt, op) + mock_instruments.application_latencies.record.assert_called_once_with( + (attempt.application_blocking_time_ns + attempt.backoff_before_attempt_ns) + / 1e6, + mock.ANY, + ) + + @pytest.mark.parametrize( + "gfe_latency_ns,should_record_server_latency", + [(12345, True), (None, False), (0, True)], + ) + def test_on_attempt_complete_server_latencies_and_connectivity_error( + self, gfe_latency_ns, should_record_server_latency + ): + mock_instruments = mock.Mock( + server_latencies=mock.Mock(), connectivity_error_count=mock.Mock() + ) + handler = self._make_one( + instance_id="inst", table_id="table", instruments=mock_instruments + ) + attempt = CompletedAttemptMetric( + duration_ns=1234567, + end_status=StatusCode.OK, + gfe_latency_ns=gfe_latency_ns, + ) + op = ActiveOperationMetric( + op_type=OperationType.READ_ROWS, + zone="zone", + cluster_id="cluster", + is_streaming=True, + ) + handler.on_attempt_complete(attempt, op) + if should_record_server_latency: + mock_instruments.server_latencies.record.assert_called_once_with( + gfe_latency_ns / 1e6, mock.ANY + ) + mock_instruments.connectivity_error_count.add.assert_not_called() + else: + mock_instruments.server_latencies.record.assert_not_called() + mock_instruments.connectivity_error_count.add.assert_called_once_with( + 1, mock.ANY + ) diff --git a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test__mutate_rows.py b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test__mutate_rows.py index 3bce82f1e50f..1c0aa52ca0b8 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test__mutate_rows.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test__mutate_rows.py @@ -20,6 +20,7 @@ from google.rpc import status_pb2 from google.cloud.bigtable.data._cross_sync import CrossSync +from google.cloud.bigtable.data._metrics import ActiveOperationMetric from google.cloud.bigtable.data.mutations import DeleteAllFromRow, RowMutationEntry from google.cloud.bigtable_v2.types import MutateRowsResponse @@ -44,6 +45,9 @@ def _make_one(self, *args, **kwargs): kwargs["attempt_timeout"] = kwargs.pop("attempt_timeout", 0.1) kwargs["retryable_exceptions"] = kwargs.pop("retryable_exceptions", ()) kwargs["mutation_entries"] = kwargs.pop("mutation_entries", []) + kwargs["metric"] = kwargs.pop( + "metric", ActiveOperationMetric("MUTATE_ROWS") + ) return self._target_class()(*args, **kwargs) def _make_mutation(self, count=1, size=1): @@ -83,6 +87,7 @@ def test_ctor(self): entries = [self._make_mutation(), self._make_mutation()] operation_timeout = 0.05 attempt_timeout = 0.01 + metric = mock.Mock() retryable_exceptions = () instance = self._make_one( client, @@ -90,6 +95,7 @@ def test_ctor(self): entries, operation_timeout, attempt_timeout, + metric, retryable_exceptions, ) assert client.mutate_rows.call_count == 0 @@ -105,6 +111,7 @@ def test_ctor(self): assert instance.is_retryable(RuntimeError("")) is False assert instance.remaining_indices == list(range(len(entries))) assert instance.errors == {} + assert instance._operation_metric == metric def test_ctor_too_many_entries(self): """should raise an error if an operation is created with more than 100,000 entries""" @@ -119,7 +126,9 @@ def test_ctor_too_many_entries(self): operation_timeout = 0.05 attempt_timeout = 0.01 with pytest.raises(ValueError) as e: - self._make_one(client, table, entries, operation_timeout, attempt_timeout) + self._make_one( + client, table, entries, operation_timeout, attempt_timeout, mock.Mock() + ) assert "mutate_rows requests can contain at most 100000 mutations" in str( e.value ) @@ -129,6 +138,7 @@ def test_mutate_rows_operation(self): """Test successful case of mutate_rows_operation""" client = mock.Mock() table = mock.Mock() + metric = ActiveOperationMetric("MUTATE_ROWS") entries = [self._make_mutation(), self._make_mutation()] operation_timeout = 0.05 cls = self._target_class() @@ -136,7 +146,7 @@ def test_mutate_rows_operation(self): f"{cls.__module__}.{cls.__name__}._run_attempt", CrossSync._Sync_Impl.Mock() ) as attempt_mock: instance = self._make_one( - client, table, entries, operation_timeout, operation_timeout + client, table, entries, operation_timeout, operation_timeout, metric ) instance.start() assert attempt_mock.call_count == 1 @@ -147,6 +157,7 @@ def test_mutate_rows_attempt_exception(self, exc_type): client = CrossSync._Sync_Impl.Mock() table = mock.Mock() table._request_path = {"table_name": "table"} + metric = ActiveOperationMetric("MUTATE_ROWS") table.app_profile_id = None entries = [self._make_mutation(), self._make_mutation()] operation_timeout = 0.05 @@ -155,7 +166,7 @@ def test_mutate_rows_attempt_exception(self, exc_type): found_exc = None try: instance = self._make_one( - client, table, entries, operation_timeout, operation_timeout + client, table, entries, operation_timeout, operation_timeout, metric ) instance._run_attempt() except Exception as e: @@ -176,6 +187,7 @@ def test_mutate_rows_exception(self, exc_type): client = mock.Mock() table = mock.Mock() + metric = ActiveOperationMetric("MUTATE_ROWS") entries = [self._make_mutation(), self._make_mutation()] operation_timeout = 0.05 expected_cause = exc_type("abort") @@ -186,7 +198,7 @@ def test_mutate_rows_exception(self, exc_type): found_exc = None try: instance = self._make_one( - client, table, entries, operation_timeout, operation_timeout + client, table, entries, operation_timeout, operation_timeout, metric ) instance.start() except MutationsExceptionGroup as e: @@ -203,6 +215,7 @@ def test_mutate_rows_exception_retryable_eventually_pass(self, exc_type): """If an exception fails but eventually passes, it should not raise an exception""" client = mock.Mock() table = mock.Mock() + metric = ActiveOperationMetric("MUTATE_ROWS") entries = [self._make_mutation()] operation_timeout = 1 expected_cause = exc_type("retry") @@ -217,6 +230,7 @@ def test_mutate_rows_exception_retryable_eventually_pass(self, exc_type): entries, operation_timeout, operation_timeout, + metric, retryable_exceptions=(exc_type,), ) instance.start() @@ -233,6 +247,7 @@ def test_mutate_rows_incomplete_ignored(self): client = mock.Mock() table = mock.Mock() + metric = ActiveOperationMetric("MUTATE_ROWS") entries = [self._make_mutation()] operation_timeout = 0.05 with mock.patch.object( @@ -242,7 +257,7 @@ def test_mutate_rows_incomplete_ignored(self): found_exc = None try: instance = self._make_one( - client, table, entries, operation_timeout, operation_timeout + client, table, entries, operation_timeout, operation_timeout, metric ) instance.start() except MutationsExceptionGroup as e: diff --git a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test__read_rows.py b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test__read_rows.py index f16a523e862d..9acc44209be4 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test__read_rows.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test__read_rows.py @@ -18,6 +18,7 @@ import pytest from google.cloud.bigtable.data._cross_sync import CrossSync +from google.cloud.bigtable.data._metrics import ActiveOperationMetric try: from unittest import mock @@ -54,6 +55,7 @@ def test_ctor(self): expected_operation_timeout = 42 expected_request_timeout = 44 time_gen_mock = mock.Mock() + expected_metric = mock.Mock() subpath = "_async" if CrossSync._Sync_Impl.is_async else "_sync_autogen" with mock.patch( f"google.cloud.bigtable.data.{subpath}._read_rows._attempt_timeout_generator", @@ -64,6 +66,7 @@ def test_ctor(self): table, operation_timeout=expected_operation_timeout, attempt_timeout=expected_request_timeout, + metric=expected_metric, ) assert time_gen_mock.call_count == 1 time_gen_mock.assert_called_once_with( @@ -76,6 +79,7 @@ def test_ctor(self): assert instance.request.table_name == "test_table" assert instance.request.app_profile_id == table.app_profile_id assert instance.request.rows_limit == row_limit + assert instance._operation_metric == expected_metric @pytest.mark.parametrize( "in_keys,last_key,expected", @@ -254,7 +258,9 @@ def mock_stream(): table = mock.Mock() table._request_path = {"table_name": "table_name"} table.app_profile_id = "app_profile_id" - instance = self._make_one(query, table, 10, 10) + instance = self._make_one( + query, table, 10, 10, ActiveOperationMetric("READ_ROWS") + ) assert instance._remaining_count == start_limit for val in instance.chunk_stream(awaitable_stream()): pass @@ -289,7 +295,9 @@ def mock_stream(): table = mock.Mock() table._request_path = {"table_name": "table_name"} table.app_profile_id = "app_profile_id" - instance = self._make_one(query, table, 10, 10) + instance = self._make_one( + query, table, 10, 10, ActiveOperationMetric("READ_ROWS") + ) assert instance._remaining_count == start_limit with pytest.raises(InvalidChunk) as e: for val in instance.chunk_stream(awaitable_stream()): @@ -307,7 +315,9 @@ def mock_stream(): with mock.patch.object( self._get_target_class(), "_read_rows_attempt" ) as mock_attempt: - instance = self._make_one(mock.Mock(), mock.Mock(), 1, 1) + instance = self._make_one( + mock.Mock(), mock.Mock(), 1, 1, ActiveOperationMetric("READ_ROWS") + ) wrapped_gen = mock_stream() mock_attempt.return_value = wrapped_gen gen = instance.start_operation() diff --git a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py index 79ad903b6191..f9c3ef0a3b57 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_client.py @@ -83,6 +83,10 @@ def _make_client(cls, *args, use_emulator=True, use_mtls="auto", **kwargs): return cls._get_target_class()(*args, **kwargs) def test_ctor(self): + from google.cloud.bigtable.data._metrics.handlers.gcp_exporter import ( + BigtableMetricsExporter, + ) + expected_project = "project-id" expected_credentials = AnonymousCredentials() client = self._make_client( @@ -96,6 +100,8 @@ def test_ctor(self): assert isinstance( client._metrics_interceptor, CrossSync._Sync_Impl.MetricsInterceptor ) + assert client._gcp_metrics_exporter is not None + assert isinstance(client._gcp_metrics_exporter, BigtableMetricsExporter) client.close() def test_ctor_super_inits(self): @@ -158,7 +164,36 @@ def test_ctor_dict_options(self): start_background_refresh.assert_called_once() client.close() - def test_veneer_grpc_headers(self): + def test_metrics_exporter_init_shares_arguments(self): + expected_credentials = AnonymousCredentials() + expected_project = "custom_project" + expected_options = client_options.ClientOptions() + expected_options.credentials_file = None + expected_options.quota_project_id = None + with mock.patch( + "google.cloud.bigtable.data._metrics.handlers.gcp_exporter.BigtableMetricsExporter.__init__", + return_value=None, + ) as exporter_mock: + with self._make_client( + project=expected_project, + credentials=expected_credentials, + client_options=expected_options, + ): + exporter_mock.assert_called_once_with( + project_id=expected_project, + credentials=expected_credentials, + client_options=expected_options, + ) + + def test_metrics_exporter_init_implicit_project(self): + with self._make_client() as client: + assert client._gcp_metrics_exporter.project_id == client.project + + @mock.patch("google.cloud.bigtable.data._async.client.BigtableMetricsExporter") + @mock.patch( + "google.cloud.bigtable.data._sync_autogen.client.BigtableMetricsExporter" + ) + def test_veneer_grpc_headers(self, exporter_mock, exporter_mock_sync): client_component = "data-async" if CrossSync._Sync_Impl.is_async else "data" VENEER_HEADER_REGEX = re.compile( "gapic\\/[0-9]+\\.[\\w.-]+ gax\\/[0-9]+\\.[\\w.-]+ gccl\\/[0-9]+\\.[\\w.-]+-" @@ -957,6 +992,7 @@ def test_ctor(self): from google.cloud.bigtable.data._helpers import _WarmedInstanceKey from google.cloud.bigtable.data._metrics import ( BigtableClientSideMetricsController, + GoogleCloudMetricsHandler, ) expected_table_id = "table-id" @@ -999,6 +1035,8 @@ def test_ctor(self): assert instance_key in client._active_instances assert client._instance_owners[instance_key] == {id(table)} assert isinstance(table._metrics, BigtableClientSideMetricsController) + assert len(table._metrics.handlers) == 1 + assert isinstance(table._metrics.handlers[0], GoogleCloudMetricsHandler) assert table.default_operation_timeout == expected_operation_timeout assert table.default_attempt_timeout == expected_attempt_timeout assert ( @@ -1238,6 +1276,7 @@ def test_ctor(self): from google.cloud.bigtable.data._helpers import _WarmedInstanceKey from google.cloud.bigtable.data._metrics import ( BigtableClientSideMetricsController, + GoogleCloudMetricsHandler, ) expected_table_id = "table-id" @@ -1287,6 +1326,8 @@ def test_ctor(self): assert instance_key in client._active_instances assert client._instance_owners[instance_key] == {id(view)} assert isinstance(view._metrics, BigtableClientSideMetricsController) + assert len(view._metrics.handlers) == 1 + assert isinstance(view._metrics.handlers[0], GoogleCloudMetricsHandler) assert view.default_operation_timeout == expected_operation_timeout assert view.default_attempt_timeout == expected_attempt_timeout assert ( @@ -1325,6 +1366,10 @@ def _make_client(self, *args, **kwargs): return CrossSync._Sync_Impl.TestBigtableDataClient._make_client(*args, **kwargs) def _make_table(self, *args, **kwargs): + from google.cloud.bigtable.data._metrics.handlers.gcp_exporter import ( + BigtableMetricsExporter, + ) + client_mock = mock.Mock() client_mock._register_instance.side_effect = ( lambda *args, **kwargs: CrossSync._Sync_Impl.yield_to_event_loop() @@ -1340,6 +1385,7 @@ def _make_table(self, *args, **kwargs): ) client_mock._gapic_client.table_path.return_value = kwargs["table_id"] client_mock._gapic_client.instance_path.return_value = kwargs["instance_id"] + client_mock._gcp_metrics_exporter = BigtableMetricsExporter("project") return CrossSync._Sync_Impl.TestTable._get_target_class()( client_mock, *args, **kwargs ) @@ -1656,9 +1702,13 @@ def test_read_row(self): with self._make_client() as client: table = client.get_table("instance", "table") row_key = b"test_1" - with mock.patch.object(table, "read_rows") as read_rows: + with mock.patch.object( + CrossSync._Sync_Impl, "_ReadRowsOperation" + ) as mock_op_constructor: + mock_op = mock.Mock() expected_result = object() - read_rows.side_effect = lambda *args, **kwargs: [expected_result] + mock_op.start_operation.return_value = [expected_result] + mock_op_constructor.return_value = mock_op expected_op_timeout = 8 expected_req_timeout = 4 row = table.read_row( @@ -1667,30 +1717,33 @@ def test_read_row(self): attempt_timeout=expected_req_timeout, ) assert row == expected_result - assert read_rows.call_count == 1 - args, kwargs = read_rows.call_args_list[0] + assert mock_op_constructor.call_count == 1 + args, kwargs = mock_op_constructor.call_args_list[0] assert kwargs["operation_timeout"] == expected_op_timeout assert kwargs["attempt_timeout"] == expected_req_timeout - assert len(args) == 1 + assert len(args) == 2 assert isinstance(args[0], ReadRowsQuery) query = args[0] assert query.row_keys == [row_key] assert query.row_ranges == [] assert query.limit == 1 + assert args[1] is table def test_read_row_w_filter(self): """Test reading a single row with an added filter""" with self._make_client() as client: table = client.get_table("instance", "table") row_key = b"test_1" - with mock.patch.object(table, "read_rows") as read_rows: + with mock.patch.object( + CrossSync._Sync_Impl, "_ReadRowsOperation" + ) as mock_op_constructor: + mock_op = mock.Mock() expected_result = object() - read_rows.side_effect = lambda *args, **kwargs: [expected_result] + mock_op.start_operation.return_value = [expected_result] + mock_op_constructor.return_value = mock_op expected_op_timeout = 8 expected_req_timeout = 4 - mock_filter = mock.Mock() - expected_filter = {"filter": "mock filter"} - mock_filter._to_dict.return_value = expected_filter + expected_filter = mock.Mock() row = table.read_row( row_key, operation_timeout=expected_op_timeout, @@ -1698,11 +1751,11 @@ def test_read_row_w_filter(self): row_filter=expected_filter, ) assert row == expected_result - assert read_rows.call_count == 1 - args, kwargs = read_rows.call_args_list[0] + assert mock_op_constructor.call_count == 1 + args, kwargs = mock_op_constructor.call_args_list[0] assert kwargs["operation_timeout"] == expected_op_timeout assert kwargs["attempt_timeout"] == expected_req_timeout - assert len(args) == 1 + assert len(args) == 2 assert isinstance(args[0], ReadRowsQuery) query = args[0] assert query.row_keys == [row_key] @@ -1715,8 +1768,12 @@ def test_read_row_no_response(self): with self._make_client() as client: table = client.get_table("instance", "table") row_key = b"test_1" - with mock.patch.object(table, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: [] + with mock.patch.object( + CrossSync._Sync_Impl, "_ReadRowsOperation" + ) as mock_op_constructor: + mock_op = mock.Mock() + mock_op.start_operation.return_value = [] + mock_op_constructor.return_value = mock_op expected_op_timeout = 8 expected_req_timeout = 4 result = table.read_row( @@ -1725,8 +1782,8 @@ def test_read_row_no_response(self): attempt_timeout=expected_req_timeout, ) assert result is None - assert read_rows.call_count == 1 - args, kwargs = read_rows.call_args_list[0] + assert mock_op_constructor.call_count == 1 + args, kwargs = mock_op_constructor.call_args_list[0] assert kwargs["operation_timeout"] == expected_op_timeout assert kwargs["attempt_timeout"] == expected_req_timeout assert isinstance(args[0], ReadRowsQuery) @@ -1744,21 +1801,28 @@ def test_row_exists(self, return_value, expected_result): with self._make_client() as client: table = client.get_table("instance", "table") row_key = b"test_1" - with mock.patch.object(table, "read_rows") as read_rows: - read_rows.side_effect = lambda *args, **kwargs: return_value - expected_op_timeout = 1 - expected_req_timeout = 2 + with mock.patch.object( + CrossSync._Sync_Impl, "_ReadRowsOperation" + ) as mock_op_constructor: + mock_op = mock.Mock() + mock_op.start_operation.return_value = return_value + mock_op_constructor.return_value = mock_op + expected_op_timeout = 2 + expected_req_timeout = 1 result = table.row_exists( row_key, operation_timeout=expected_op_timeout, attempt_timeout=expected_req_timeout, ) assert expected_result == result - assert read_rows.call_count == 1 - args, kwargs = read_rows.call_args_list[0] + assert mock_op_constructor.call_count == 1 + args, kwargs = mock_op_constructor.call_args_list[0] assert kwargs["operation_timeout"] == expected_op_timeout assert kwargs["attempt_timeout"] == expected_req_timeout - assert isinstance(args[0], ReadRowsQuery) + query = args[0] + assert isinstance(query, ReadRowsQuery) + assert query.row_keys == [row_key] + assert query.limit == 1 expected_filter = { "chain": { "filters": [ @@ -1767,10 +1831,6 @@ def test_row_exists(self, return_value, expected_result): ] } } - query = args[0] - assert query.row_keys == [row_key] - assert query.row_ranges == [] - assert query.limit == 1 assert query.filter._to_dict() == expected_filter diff --git a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_mutations_batcher.py b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_mutations_batcher.py index f6568448ff8c..16523b116f1c 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_mutations_batcher.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_mutations_batcher.py @@ -258,6 +258,10 @@ def _get_target_class(self): def _make_one(self, table=None, **kwargs): from google.api_core.exceptions import DeadlineExceeded, ServiceUnavailable + from google.cloud.bigtable.data._metrics import ( + BigtableClientSideMetricsController, + ) + if table is None: table = mock.Mock() table._request_path = {"table_name": "table"} @@ -268,6 +272,7 @@ def _make_one(self, table=None, **kwargs): DeadlineExceeded, ServiceUnavailable, ) + table._metrics = BigtableClientSideMetricsController([]) return self._get_target_class()(table, **kwargs) @staticmethod @@ -816,14 +821,16 @@ def test__execute_mutate_rows(self): table.default_mutate_rows_retryable_errors = () with self._make_one(table) as instance: batch = [self._make_mutation()] - result = instance._execute_mutate_rows(batch) + expected_metric = mock.Mock() + result = instance._execute_mutate_rows(batch, expected_metric) assert start_operation.call_count == 1 args, kwargs = mutate_rows.call_args assert args[0] == table.client._gapic_client assert args[1] == table assert args[2] == batch - kwargs["operation_timeout"] == 17 - kwargs["attempt_timeout"] == 13 + assert kwargs["operation_timeout"] == 17 + assert kwargs["attempt_timeout"] == 13 + assert kwargs["metric"] == expected_metric assert result == [] def test__execute_mutate_rows_returns_errors(self): @@ -845,7 +852,7 @@ def test__execute_mutate_rows_returns_errors(self): table.default_mutate_rows_retryable_errors = () with self._make_one(table) as instance: batch = [self._make_mutation()] - result = instance._execute_mutate_rows(batch) + result = instance._execute_mutate_rows(batch, mock.Mock()) assert len(result) == 2 assert result[0] == err1 assert result[1] == err2 @@ -953,7 +960,7 @@ def test_timeout_args_passed(self): ) as instance: assert instance._operation_timeout == expected_operation_timeout assert instance._attempt_timeout == expected_attempt_timeout - instance._execute_mutate_rows([self._make_mutation()]) + instance._execute_mutate_rows([self._make_mutation()], mock.Mock()) assert mutate_rows.call_count == 1 kwargs = mutate_rows.call_args[1] assert kwargs["operation_timeout"] == expected_operation_timeout @@ -1039,6 +1046,13 @@ def test__add_exceptions(self, limit, in_e, start_e, end_e): def test_customizable_retryable_errors(self, input_retryables, expected_retryables): """Test that retryable functions support user-configurable arguments, and that the configured retryables are passed down to the gapic layer.""" + from google.cloud.bigtable.data._metrics import ActiveOperationMetric + from google.cloud.bigtable.data._metrics.handlers.gcp_exporter import ( + BigtableMetricsExporter, + ) + + mock_client = mock.Mock() + mock_client._gcp_metrics_exporter = BigtableMetricsExporter("project") with mock.patch.object( google.api_core.retry, "if_exception_type" ) as predicate_builder_mock: @@ -1047,7 +1061,7 @@ def test_customizable_retryable_errors(self, input_retryables, expected_retryabl ) as retry_fn_mock: table = None with mock.patch("asyncio.create_task"): - table = CrossSync._Sync_Impl.Table(mock.Mock(), "instance", "table") + table = CrossSync._Sync_Impl.Table(mock_client, "instance", "table") with self._make_one( table, batch_retryable_errors=input_retryables ) as instance: @@ -1056,12 +1070,14 @@ def test_customizable_retryable_errors(self, input_retryables, expected_retryabl predicate_builder_mock.return_value = expected_predicate retry_fn_mock.side_effect = RuntimeError("stop early") mutation = self._make_mutation(count=1, size=1) - instance._execute_mutate_rows([mutation]) + instance._execute_mutate_rows( + [mutation], ActiveOperationMetric("MUTATE_ROWS") + ) predicate_builder_mock.assert_called_once_with( *expected_retryables, _MutateRowsIncomplete ) - retry_call_args = retry_fn_mock.call_args_list[0].args - assert retry_call_args[1] is expected_predicate + retry_call_kwargs = retry_fn_mock.call_args_list[0].kwargs + assert retry_call_kwargs["predicate"] is expected_predicate def test_large_batch_write(self): """Test that a large batch of mutations can be written""" diff --git a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_read_rows_acceptance.py b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_read_rows_acceptance.py index 29332e712d35..77c55ce0183b 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_read_rows_acceptance.py +++ b/packages/google-cloud-bigtable/tests/unit/data/_sync_autogen/test_read_rows_acceptance.py @@ -24,6 +24,7 @@ import pytest from google.cloud.bigtable.data._cross_sync import CrossSync +from google.cloud.bigtable.data._metrics import ActiveOperationMetric from google.cloud.bigtable.data.exceptions import InvalidChunk from google.cloud.bigtable.data.row import Row from google.cloud.bigtable_v2 import ReadRowsResponse @@ -33,8 +34,13 @@ class TestReadRowsAcceptance: @staticmethod - def _get_operation_class(): - return CrossSync._Sync_Impl._ReadRowsOperation + def _make_operation(): + metric = ActiveOperationMetric("READ_ROWS") + op = CrossSync._Sync_Impl._ReadRowsOperation( + mock.Mock(), mock.Mock(), 5, 5, metric + ) + op._remaining_count = None + return op @staticmethod def _get_client_class(): @@ -72,13 +78,8 @@ def _process_chunks(self, *chunks): def _row_stream(): yield ReadRowsResponse(chunks=chunks) - instance = mock.Mock() - instance._remaining_count = None - instance._last_yielded_row_key = None - chunker = self._get_operation_class().chunk_stream( - instance, self._coro_wrapper(_row_stream()) - ) - merger = self._get_operation_class().merge_rows(chunker) + chunker = self._make_operation().chunk_stream(self._coro_wrapper(_row_stream())) + merger = self._make_operation().merge_rows(chunker) results = [] for row in merger: results.append(row) @@ -94,13 +95,10 @@ def _scenerio_stream(): try: results = [] - instance = mock.Mock() - instance._last_yielded_row_key = None - instance._remaining_count = None - chunker = self._get_operation_class().chunk_stream( - instance, self._coro_wrapper(_scenerio_stream()) + chunker = self._make_operation().chunk_stream( + self._coro_wrapper(_scenerio_stream()) ) - merger = self._get_operation_class().merge_rows(chunker) + merger = self._make_operation().merge_rows(chunker) for row in merger: for cell in row: cell_result = ReadRowsTest.Result( @@ -183,13 +181,10 @@ def test_out_of_order_rows(self): def _row_stream(): yield ReadRowsResponse(last_scanned_row_key=b"a") - instance = mock.Mock() - instance._remaining_count = None - instance._last_yielded_row_key = b"b" - chunker = self._get_operation_class().chunk_stream( - instance, self._coro_wrapper(_row_stream()) - ) - merger = self._get_operation_class().merge_rows(chunker) + op = self._make_operation() + op._last_yielded_row_key = b"b" + chunker = op.chunk_stream(self._coro_wrapper(_row_stream())) + merger = self._make_operation().merge_rows(chunker) with pytest.raises(InvalidChunk): for _ in merger: pass