From 947538056dd8a45aa422381debc707521227c01c Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Wed, 9 Apr 2025 01:17:15 +0000 Subject: [PATCH 01/14] [OTel C++] Implement retry metrics --- CMakeLists.txt | 1 + build_autogenerated.yaml | 2 + include/grpcpp/ext/otel_plugin.h | 6 + src/cpp/ext/otel/otel_client_call_tracer.cc | 36 ++- src/cpp/ext/otel/otel_client_call_tracer.h | 8 +- src/cpp/ext/otel/otel_plugin.cc | 16 ++ src/cpp/ext/otel/otel_plugin.h | 5 + test/cpp/ext/otel/BUILD | 1 + test/cpp/ext/otel/otel_plugin_test.cc | 272 ++++++++++++++++++++ test/cpp/ext/otel/otel_test_library.cc | 2 +- 10 files changed, 345 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ae47f1561abe..dc8da3d2a444c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21759,6 +21759,7 @@ add_executable(otel_plugin_test src/cpp/ext/otel/otel_client_call_tracer.cc src/cpp/ext/otel/otel_plugin.cc src/cpp/ext/otel/otel_server_call_tracer.cc + test/core/test_util/fail_first_call_filter.cc test/core/test_util/fake_stats_plugin.cc test/cpp/end2end/test_service_impl.cc test/cpp/ext/otel/otel_plugin_test.cc diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index 01da19a53cd90..d23651a4d3806 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -13429,6 +13429,7 @@ targets: - src/cpp/ext/otel/otel_plugin.h - src/cpp/ext/otel/otel_server_call_tracer.h - test/core/promise/test_context.h + - test/core/test_util/fail_first_call_filter.h - test/core/test_util/fake_stats_plugin.h - test/cpp/end2end/test_service_impl.h - test/cpp/ext/otel/otel_test_library.h @@ -13444,6 +13445,7 @@ targets: - src/cpp/ext/otel/otel_client_call_tracer.cc - src/cpp/ext/otel/otel_plugin.cc - src/cpp/ext/otel/otel_server_call_tracer.cc + - test/core/test_util/fail_first_call_filter.cc - test/core/test_util/fake_stats_plugin.cc - test/cpp/end2end/test_service_impl.cc - test/cpp/ext/otel/otel_plugin_test.cc diff --git a/include/grpcpp/ext/otel_plugin.h b/include/grpcpp/ext/otel_plugin.h index 1e475306c28cf..72d947e7dd610 100644 --- a/include/grpcpp/ext/otel_plugin.h +++ b/include/grpcpp/ext/otel_plugin.h @@ -100,6 +100,12 @@ class OpenTelemetryPluginBuilder { kServerCallRcvdTotalCompressedMessageSizeInstrumentName = "grpc.server.call.rcvd_total_compressed_message_size"; + /// Experimental Retry Metrics + static constexpr absl::string_view kClientCallRetries = + "grpc.client.call.retries"; + static constexpr absl::string_view kClientCallRetryDelay = + "grpc.client.call.retry_delay"; + OpenTelemetryPluginBuilder(); ~OpenTelemetryPluginBuilder(); /// If `SetMeterProvider()` is not called, no metrics are collected. diff --git a/src/cpp/ext/otel/otel_client_call_tracer.cc b/src/cpp/ext/otel/otel_client_call_tracer.cc index dcfd822ba942a..5cf3e7fa8fd55 100644 --- a/src/cpp/ext/otel/otel_client_call_tracer.cc +++ b/src/cpp/ext/otel/otel_client_call_tracer.cc @@ -66,7 +66,7 @@ namespace internal { // OpenTelemetryPluginImpl::ClientCallTracer::CallAttemptTracer::CallAttemptTracer( - const OpenTelemetryPluginImpl::ClientCallTracer* parent, + OpenTelemetryPluginImpl::ClientCallTracer* const parent, uint64_t attempt_num, bool is_transparent_retry, bool arena_allocated) : parent_(parent), arena_allocated_(arena_allocated), @@ -242,6 +242,12 @@ void OpenTelemetryPluginImpl::ClientCallTracer::CallAttemptTracer:: parent_->otel_plugin_->client_.attempt.rcvd_total_compressed_message_size ->Record(incoming_bytes, labels, opentelemetry::context::Context{}); } + { + grpc_core::MutexLock lock(&parent_->mu_); + if (--parent_->num_active_attempts_ == 0) { + parent_->time_at_last_attempt_end_ = absl::Now(); + } + } if (span_ != nullptr) { if (status.ok()) { span_->SetStatus(opentelemetry::trace::StatusCode::kOk); @@ -356,6 +362,30 @@ OpenTelemetryPluginImpl::ClientCallTracer::ClientCallTracer( } OpenTelemetryPluginImpl::ClientCallTracer::~ClientCallTracer() { + if (otel_plugin_->client_.call.retries != nullptr) { + otel_plugin_->client_.call.retries->Record( + retries_ - 1, + std::array, 3>{ + {{OpenTelemetryMethodKey(), MethodForStats()}, + {OpenTelemetryTargetKey(), scope_config_->filtered_target()}, + {OpenTelemetryRetryType(), "retry"}}}, + opentelemetry::context::Context{}); + otel_plugin_->client_.call.retries->Record( + transparent_retries_, + std::array, 3>{ + {{OpenTelemetryMethodKey(), MethodForStats()}, + {OpenTelemetryTargetKey(), scope_config_->filtered_target()}, + {OpenTelemetryRetryType(), "transparent"}}}, + opentelemetry::context::Context{}); + } + if (otel_plugin_->client_.call.retry_delay != nullptr) { + otel_plugin_->client_.call.retry_delay->Record( + absl::ToDoubleSeconds(retry_delay_), + std::array, 2>{ + {{OpenTelemetryMethodKey(), MethodForStats()}, + {OpenTelemetryTargetKey(), scope_config_->filtered_target()}}}, + opentelemetry::context::Context{}); + } if (span_ != nullptr) { span_->End(); } @@ -373,6 +403,9 @@ OpenTelemetryPluginImpl::ClientCallTracer::StartNewAttempt( grpc_core::MutexLock lock(&mu_); if (transparent_retries_ != 0 || retries_ != 0) { is_first_attempt = false; + if (num_active_attempts_ == 0) { + retry_delay_ += absl::Now() - time_at_last_attempt_end_; + } } if (is_transparent_retry) { ++transparent_retries_; @@ -380,6 +413,7 @@ OpenTelemetryPluginImpl::ClientCallTracer::StartNewAttempt( ++retries_; } attempt_num = retries_ - 1; // Sequence starts at 0 + ++num_active_attempts_; } if (is_first_attempt) { return arena_->New( diff --git a/src/cpp/ext/otel/otel_client_call_tracer.h b/src/cpp/ext/otel/otel_client_call_tracer.h index 6cece0ccce751..fd2e09849dc6b 100644 --- a/src/cpp/ext/otel/otel_client_call_tracer.h +++ b/src/cpp/ext/otel/otel_client_call_tracer.h @@ -51,7 +51,7 @@ class OpenTelemetryPluginImpl::ClientCallTracer class CallAttemptTracer : public grpc_core::ClientCallTracer::CallAttemptTracer { public: - CallAttemptTracer(const OpenTelemetryPluginImpl::ClientCallTracer* parent, + CallAttemptTracer(OpenTelemetryPluginImpl::ClientCallTracer* const parent, uint64_t attempt_num, bool is_transparent_retry, bool arena_allocated); @@ -100,7 +100,7 @@ class OpenTelemetryPluginImpl::ClientCallTracer private: void PopulateLabelInjectors(grpc_metadata_batch* metadata); - const ClientCallTracer* parent_; + ClientCallTracer* const parent_; const bool arena_allocated_; // Start time (for measuring latency). absl::Time start_time_; @@ -162,6 +162,10 @@ class OpenTelemetryPluginImpl::ClientCallTracer uint64_t retries_ ABSL_GUARDED_BY(&mu_) = 0; // Transparent retries per call uint64_t transparent_retries_ ABSL_GUARDED_BY(&mu_) = 0; + // Retry delay + absl::Duration retry_delay_ ABSL_GUARDED_BY(&mu_); + absl::Time time_at_last_attempt_end_ ABSL_GUARDED_BY(&mu_); + uint64_t num_active_attempts_ ABSL_GUARDED_BY(&mu_) = 0; opentelemetry::nostd::shared_ptr span_; }; diff --git a/src/cpp/ext/otel/otel_plugin.cc b/src/cpp/ext/otel/otel_plugin.cc index ce6ee234b70f1..b91393282e623 100644 --- a/src/cpp/ext/otel/otel_plugin.cc +++ b/src/cpp/ext/otel/otel_plugin.cc @@ -58,6 +58,8 @@ absl::string_view OpenTelemetryStatusKey() { return "grpc.status"; } absl::string_view OpenTelemetryTargetKey() { return "grpc.target"; } +absl::string_view OpenTelemetryRetryType() { return "grpc.retry_type"; } + namespace { absl::flat_hash_set BaseMetrics() { absl::flat_hash_set base_metrics{ @@ -501,6 +503,20 @@ OpenTelemetryPluginImpl::OpenTelemetryPluginImpl( kServerCallRcvdTotalCompressedMessageSizeInstrumentName), "Compressed message bytes received per server call", "By"); } + if (metrics.contains( + grpc::OpenTelemetryPluginBuilder::kClientCallRetries)) { + client_.call.retries = meter->CreateUInt64Histogram( + std::string(grpc::OpenTelemetryPluginBuilder::kClientCallRetries), + "Number of retry attempts made during the call", "{retry}"); + } + if (metrics.contains( + grpc::OpenTelemetryPluginBuilder::kClientCallRetryDelay)) { + client_.call.retry_delay = meter->CreateDoubleHistogram( + std::string(grpc::OpenTelemetryPluginBuilder::kClientCallRetryDelay), + "Total time of delay while there is no active attempt during the " + "client call", + "s"); + } // Store optional label keys for per call metrics CHECK(static_cast(grpc_core::ClientCallTracer::CallAttemptTracer:: OptionalLabelKey::kSize) <= diff --git a/src/cpp/ext/otel/otel_plugin.h b/src/cpp/ext/otel/otel_plugin.h index 2ffd50f659c4f..37185040c8ca9 100644 --- a/src/cpp/ext/otel/otel_plugin.h +++ b/src/cpp/ext/otel/otel_plugin.h @@ -119,6 +119,7 @@ class InternalOpenTelemetryPluginOption absl::string_view OpenTelemetryMethodKey(); absl::string_view OpenTelemetryStatusKey(); absl::string_view OpenTelemetryTargetKey(); +absl::string_view OpenTelemetryRetryType(); class OpenTelemetryPluginBuilderImpl { public: @@ -356,6 +357,10 @@ class OpenTelemetryPluginImpl }; struct ClientMetrics { + struct Call { + std::unique_ptr> retries; + std::unique_ptr> retry_delay; + } call; struct Attempt { std::unique_ptr> started; std::unique_ptr> duration; diff --git a/test/cpp/ext/otel/BUILD b/test/cpp/ext/otel/BUILD index b0141684cc136..4740f2803d1b2 100644 --- a/test/cpp/ext/otel/BUILD +++ b/test/cpp/ext/otel/BUILD @@ -67,6 +67,7 @@ grpc_cc_test( "//:grpc++", "//src/cpp/ext/otel:otel_plugin", "//test/core/promise:test_context", + "//test/core/test_util:fail_first_call_filter", "//test/core/test_util:grpc_test_util", ], ) diff --git a/test/cpp/ext/otel/otel_plugin_test.cc b/test/cpp/ext/otel/otel_plugin_test.cc index f3f07a5ffd321..ca393f30a2f86 100644 --- a/test/cpp/ext/otel/otel_plugin_test.cc +++ b/test/cpp/ext/otel/otel_plugin_test.cc @@ -40,6 +40,7 @@ #include "src/core/config/core_configuration.h" #include "src/core/lib/event_engine/channel_args_endpoint_config.h" #include "src/core/telemetry/call_tracer.h" +#include "test/core/test_util/fail_first_call_filter.h" #include "test/core/test_util/fake_stats_plugin.h" #include "test/core/test_util/test_config.h" #include "test/cpp/end2end/test_service_impl.h" @@ -199,6 +200,18 @@ MATCHER_P7(GaugeDataIsIncrementalForSpecificMetricAndLabelSet, metric_name, return result; } +MATCHER_P2(IsWithinRange, lo, hi, + absl::StrCat(negation ? "isn't" : "is", " between ", + ::testing::PrintToString(lo), " and ", + ::testing::PrintToString(hi))) { + return (lo) <= arg && arg <= (hi); +} + +template +struct Extract> { + using Type = T; +}; + TEST(OpenTelemetryPluginBuildTest, ApiDependency) { opentelemetry::metrics::Provider::GetMeterProvider(); } @@ -463,6 +476,265 @@ TEST_F(OpenTelemetryPluginEnd2EndTest, EXPECT_EQ(*status_value, "OK"); } +TEST_F(OpenTelemetryPluginEnd2EndTest, RetryStatsWithoutRetries) { + Init(std::move(Options().set_metric_names( + {grpc::OpenTelemetryPluginBuilder::kClientCallRetries, + grpc::OpenTelemetryPluginBuilder::kClientCallRetryDelay}))); + for (int i = 0; i < 5; ++i) { + SendRPC(); + } + const char* kRetryMetricName = "grpc.client.call.retries"; + const char* kRetryDelayMetricName = "grpc.client.call.retry_delay"; + auto data = ReadCurrentMetricsData( + [&](const absl::flat_hash_map< + std::string, + std::vector>& + data) { + return !data.contains(kRetryMetricName) || + !data.contains(kRetryDelayMetricName); + }); + ASSERT_EQ(data.size(), 2); + // Both transparent retry stats and non-transparent retry stats are populated + // under "grpc.client.call.retries" + EXPECT_THAT( + data[kRetryMetricName], + ::testing::UnorderedElementsAre( + ::testing::AllOf( + AttributesEq( + /*label_keys=*/std::array{"grpc.method", "grpc.target", + "grpc.retry_type"}, + /*label_values=*/ + std::array{ + kMethodName, canonical_server_address_, "retry"}, + /*optional_label_keys=*/std::array{}, + /*optional_label_values=*/std::array{}), + HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(0)), + /*min_matcher=*/::testing::Eq(int64_t(0)), + /*max_matcher=*/::testing::Eq(int64_t(0)), + /*count=*/5)), + ::testing::AllOf( + AttributesEq( + /*label_keys=*/std::array{"grpc.method", "grpc.target", + "grpc.retry_type"}, + /*label_values=*/ + std::array{ + kMethodName, canonical_server_address_, "transparent"}, + /*optional_label_keys=*/std::array{}, + /*optional_label_values=*/std::array{}), + HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(0)), + /*min_matcher=*/::testing::Eq(int64_t(0)), + /*max_matcher=*/::testing::Eq(int64_t(0)), + /*count=*/5)))); + // Check for retry delay stats. + EXPECT_THAT( + data[kRetryDelayMetricName], + ::testing::ElementsAre(::testing::AllOf( + AttributesEq( + /*label_keys=*/std::array{"grpc.method", + "grpc.target"}, + /*label_values=*/ + std::array{kMethodName, + canonical_server_address_}, + /*optional_label_keys=*/std::array{}, + /*optional_label_values=*/std::array{}), + HistogramResultEq(/*sum_matcher=*/::testing::DoubleNear(0, 1e-9), + /*min_matcher=*/::testing::DoubleNear(0, 1e-9), + /*max_matcher=*/::testing::DoubleNear(0, 1e-9), + /*count=*/5)))); +} + +TEST_F(OpenTelemetryPluginEnd2EndTest, RetryStatsWithRetries) { + Init(std::move( + Options() + .set_metric_names( + {grpc::OpenTelemetryPluginBuilder::kClientCallRetries, + grpc::OpenTelemetryPluginBuilder::kClientCallRetryDelay}) + .set_service_config( + "{\n" + " \"methodConfig\": [ {\n" + " \"name\": [\n" + " { \"service\": \"grpc.testing.EchoTestService\" }\n" + " ],\n" + " \"retryPolicy\": {\n" + " \"maxAttempts\": 3,\n" + " \"initialBackoff\": \"0.1s\",\n" + " \"maxBackoff\": \"120s\",\n" + " \"backoffMultiplier\": 1,\n" + " \"retryableStatusCodes\": [ \"ABORTED\" ]\n" + " }\n" + " } ]\n" + "}"))); + EchoRequest request; + request.mutable_param()->mutable_expected_error()->set_code( + StatusCode::ABORTED); + request.set_message("foo"); + EchoResponse response; + for (int i = 0; i < 5; ++i) { + grpc::ClientContext context; + grpc::Status status = stub_->Echo(&context, request, &response); + EXPECT_EQ(status.error_code(), StatusCode::ABORTED); + } + const char* kRetryMetricName = "grpc.client.call.retries"; + const char* kRetryDelayMetricName = "grpc.client.call.retry_delay"; + auto data = ReadCurrentMetricsData( + [&](const absl::flat_hash_map< + std::string, + std::vector>& + data) { + return !data.contains(kRetryMetricName) || + !data.contains(kRetryDelayMetricName); + }); + ASSERT_EQ(data.size(), 2); + // Both transparent retry stats and non-transparent retry stats are populated + // under "grpc.client.call.retries" + EXPECT_THAT( + data[kRetryMetricName], + ::testing::UnorderedElementsAre( + ::testing::AllOf( + AttributesEq( + /*label_keys=*/std::array{"grpc.method", "grpc.target", + "grpc.retry_type"}, + /*label_values=*/ + std::array{ + kMethodName, canonical_server_address_, "retry"}, + /*optional_label_keys=*/std::array{}, + /*optional_label_values=*/std::array{}), + HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(10)), + /*min_matcher=*/::testing::Eq(int64_t(2)), + /*max_matcher=*/::testing::Eq(int64_t(2)), + /*count=*/5)), + ::testing::AllOf( + AttributesEq( + /*label_keys=*/std::array{"grpc.method", "grpc.target", + "grpc.retry_type"}, + /*label_values=*/ + std::array{ + kMethodName, canonical_server_address_, "transparent"}, + /*optional_label_keys=*/std::array{}, + /*optional_label_values=*/std::array{}), + HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(0)), + /*min_matcher=*/::testing::Eq(int64_t(0)), + /*max_matcher=*/::testing::Eq(int64_t(0)), + /*count=*/5)))); + // Check for retry delay stats. + EXPECT_THAT( + data[kRetryDelayMetricName], + ::testing::ElementsAre(::testing::AllOf( + AttributesEq( + /*label_keys=*/std::array{"grpc.method", + "grpc.target"}, + /*label_values=*/ + std::array{kMethodName, + canonical_server_address_}, + /*optional_label_keys=*/std::array{}, + /*optional_label_values=*/std::array{}), + HistogramResultEq( + /*sum_matcher=*/IsWithinRange( + 0.1 * 5, 0.3 * 5 * grpc_test_slowdown_factor()), + /*min_matcher=*/ + IsWithinRange(0.1, 0.3 * grpc_test_slowdown_factor()), + /*max_matcher=*/ + IsWithinRange(0.1, 0.3 * grpc_test_slowdown_factor()), + /*count=*/5)))); +} + +class OTelMetricsTestForTransparentRetries + : public OpenTelemetryPluginEnd2EndTest { + protected: + void SetUp() override { + grpc_core::CoreConfiguration::RegisterBuilder( + [](grpc_core::CoreConfiguration::Builder* builder) { + // Register FailFirstCallFilter to simulate transparent retries. + builder->channel_init()->RegisterFilter( + GRPC_CLIENT_SUBCHANNEL, + &grpc_core::testing::FailFirstCallFilter::kFilterVtable); + }); + OpenTelemetryPluginEnd2EndTest::SetUp(); + } +}; + +TEST_F(OTelMetricsTestForTransparentRetries, RetryStatsWithTransparentRetries) { + Init(std::move(Options().set_metric_names( + {grpc::OpenTelemetryPluginBuilder::kClientCallRetries, + grpc::OpenTelemetryPluginBuilder::kClientCallRetryDelay}))); + for (int i = 0; i < 5; ++i) { + ChannelArguments args; + args.SetInt(GRPC_ARG_USE_LOCAL_SUBCHANNEL_POOL, 1); + auto channel = grpc::CreateCustomChannel( + server_address_, grpc::InsecureChannelCredentials(), args); + ResetStub(std::move(channel)); + SendRPC(); + } + const char* kRetryMetricName = "grpc.client.call.retries"; + const char* kRetryDelayMetricName = "grpc.client.call.retry_delay"; + auto data = ReadCurrentMetricsData( + [&](const absl::flat_hash_map< + std::string, + std::vector>& + data) { + return !data.contains(kRetryMetricName) || + !data.contains(kRetryDelayMetricName); + }); + ASSERT_EQ(data.size(), 2); + // Both transparent retry stats and non-transparent retry stats are populated + // under "grpc.client.call.retries" + EXPECT_THAT( + data[kRetryMetricName], + ::testing::UnorderedElementsAre( + ::testing::AllOf( + AttributesEq( + /*label_keys=*/std::array{"grpc.method", "grpc.target", + "grpc.retry_type"}, + /*label_values=*/ + std::array{ + kMethodName, canonical_server_address_, "retry"}, + /*optional_label_keys=*/std::array{}, + /*optional_label_values=*/std::array{}), + HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(0)), + /*min_matcher=*/::testing::Eq(int64_t(0)), + /*max_matcher=*/::testing::Eq(int64_t(0)), + /*count=*/5)), + ::testing::AllOf( + AttributesEq( + /*label_keys=*/std::array{"grpc.method", "grpc.target", + "grpc.retry_type"}, + /*label_values=*/ + std::array{ + kMethodName, canonical_server_address_, "transparent"}, + /*optional_label_keys=*/std::array{}, + /*optional_label_values=*/std::array{}), + HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(5)), + /*min_matcher=*/::testing::Eq(int64_t(1)), + /*max_matcher=*/::testing::Eq(int64_t(1)), + /*count=*/5)))); + // Check for retry delay stats. + EXPECT_THAT( + data[kRetryDelayMetricName], + ::testing::ElementsAre(::testing::AllOf( + AttributesEq( + /*label_keys=*/std::array{"grpc.method", + "grpc.target"}, + /*label_values=*/ + std::array{kMethodName, + canonical_server_address_}, + /*optional_label_keys=*/std::array{}, + /*optional_label_values=*/std::array{}), + HistogramResultEq( + /*sum_matcher=*/::testing::DoubleNear( + 0, 0.1 * grpc_test_slowdown_factor()), + /*min_matcher=*/ + ::testing::DoubleNear(0, 0.1 * grpc_test_slowdown_factor()), + /*max_matcher=*/ + ::testing::DoubleNear(0, 0.1 * grpc_test_slowdown_factor()), + /*count=*/5)))); +} + // Make sure that no meter provider results in normal operations. TEST_F(OpenTelemetryPluginEnd2EndTest, NoMeterProviderRegistered) { Init( diff --git a/test/cpp/ext/otel/otel_test_library.cc b/test/cpp/ext/otel/otel_test_library.cc index 6448b48eecf7f..080b25112e02b 100644 --- a/test/cpp/ext/otel/otel_test_library.cc +++ b/test/cpp/ext/otel/otel_test_library.cc @@ -136,7 +136,6 @@ OpenTelemetryPluginEnd2EndTest::MetricsCollectorThread::Stop() { } void OpenTelemetryPluginEnd2EndTest::Init(Options config) { - grpc_core::CoreConfiguration::Reset(); ChannelArguments channel_args; if (!config.labels_to_inject.empty()) { labels_to_inject_ = std::move(config.labels_to_inject); @@ -182,6 +181,7 @@ void OpenTelemetryPluginEnd2EndTest::TearDown() { grpc_core::ServerCallTracerFactory::TestOnlyReset(); grpc_core::GlobalStatsPluginRegistryTestPeer:: ResetGlobalStatsPluginRegistry(); + grpc_core::CoreConfiguration::Reset(); } void OpenTelemetryPluginEnd2EndTest::ResetStub( From c52578c3b6d4717261e62964dcb2d66d5cf81fb3 Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Wed, 9 Apr 2025 21:36:40 +0000 Subject: [PATCH 02/14] Add experimental in description --- src/cpp/ext/otel/otel_plugin.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/cpp/ext/otel/otel_plugin.cc b/src/cpp/ext/otel/otel_plugin.cc index b91393282e623..556b08e6825f7 100644 --- a/src/cpp/ext/otel/otel_plugin.cc +++ b/src/cpp/ext/otel/otel_plugin.cc @@ -507,14 +507,15 @@ OpenTelemetryPluginImpl::OpenTelemetryPluginImpl( grpc::OpenTelemetryPluginBuilder::kClientCallRetries)) { client_.call.retries = meter->CreateUInt64Histogram( std::string(grpc::OpenTelemetryPluginBuilder::kClientCallRetries), - "Number of retry attempts made during the call", "{retry}"); + "EXPERIMENTAL: Number of retry attempts made during the call", + "{retry}"); } if (metrics.contains( grpc::OpenTelemetryPluginBuilder::kClientCallRetryDelay)) { client_.call.retry_delay = meter->CreateDoubleHistogram( std::string(grpc::OpenTelemetryPluginBuilder::kClientCallRetryDelay), - "Total time of delay while there is no active attempt during the " - "client call", + "EXPERIMENTAL: Total time of delay while there is no active attempt " + "during the client call", "s"); } // Store optional label keys for per call metrics From 46b07151fa7a07c6e3a8acf9771d620cae1b243e Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Mon, 14 Apr 2025 21:57:47 +0000 Subject: [PATCH 03/14] No need to send 5 RPCs --- test/cpp/ext/otel/otel_plugin_test.cc | 57 +++++++-------- test/cpp/ext/otel/otel_test_library.cc | 99 ++++++++++++++++++++++++++ test/cpp/ext/otel/otel_test_library.h | 12 ++++ 3 files changed, 138 insertions(+), 30 deletions(-) diff --git a/test/cpp/ext/otel/otel_plugin_test.cc b/test/cpp/ext/otel/otel_plugin_test.cc index 8b26b3907bf6b..88f488f8d9268 100644 --- a/test/cpp/ext/otel/otel_plugin_test.cc +++ b/test/cpp/ext/otel/otel_plugin_test.cc @@ -318,9 +318,7 @@ TEST_F(OpenTelemetryPluginEnd2EndTest, RetryStatsWithoutRetries) { Init(std::move(Options().set_metric_names( {grpc::OpenTelemetryPluginBuilder::kClientCallRetries, grpc::OpenTelemetryPluginBuilder::kClientCallRetryDelay}))); - for (int i = 0; i < 5; ++i) { - SendRPC(); - } + SendRPC(); const char* kRetryMetricName = "grpc.client.call.retries"; const char* kRetryDelayMetricName = "grpc.client.call.retry_delay"; auto data = ReadCurrentMetricsData( @@ -350,7 +348,7 @@ TEST_F(OpenTelemetryPluginEnd2EndTest, RetryStatsWithoutRetries) { HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(0)), /*min_matcher=*/::testing::Eq(int64_t(0)), /*max_matcher=*/::testing::Eq(int64_t(0)), - /*count=*/5)), + /*count=*/1)), ::testing::AllOf( AttributesEq( /*label_keys=*/std::arraymutable_expected_error()->set_code( - StatusCode::ABORTED); - request.set_message("foo"); - EchoResponse response; - for (int i = 0; i < 5; ++i) { + { + EchoRequest request; + request.mutable_param()->mutable_expected_error()->set_code( + StatusCode::ABORTED); + request.set_message("foo"); + EchoResponse response; grpc::ClientContext context; grpc::Status status = stub_->Echo(&context, request, &response); EXPECT_EQ(status.error_code(), StatusCode::ABORTED); @@ -424,6 +422,7 @@ TEST_F(OpenTelemetryPluginEnd2EndTest, RetryStatsWithRetries) { return !data.contains(kRetryMetricName) || !data.contains(kRetryDelayMetricName); }); + ASSERT_EQ(data.size(), 2); // Both transparent retry stats and non-transparent retry stats are populated // under "grpc.client.call.retries" @@ -440,10 +439,10 @@ TEST_F(OpenTelemetryPluginEnd2EndTest, RetryStatsWithRetries) { kMethodName, canonical_server_address_, "retry"}, /*optional_label_keys=*/std::array{}, /*optional_label_values=*/std::array{}), - HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(10)), + HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(2)), /*min_matcher=*/::testing::Eq(int64_t(2)), /*max_matcher=*/::testing::Eq(int64_t(2)), - /*count=*/5)), + /*count=*/1)), ::testing::AllOf( AttributesEq( /*label_keys=*/std::array{}, /*optional_label_values=*/std::array{}), HistogramResultEq( - /*sum_matcher=*/IsWithinRange( - 0.1 * 5, 0.3 * 5 * grpc_test_slowdown_factor()), + /*sum_matcher=*/IsWithinRange(0.1, + 0.3 * grpc_test_slowdown_factor()), /*min_matcher=*/ IsWithinRange(0.1, 0.3 * grpc_test_slowdown_factor()), /*max_matcher=*/ IsWithinRange(0.1, 0.3 * grpc_test_slowdown_factor()), - /*count=*/5)))); + /*count=*/1)))); } class OTelMetricsTestForTransparentRetries @@ -499,14 +498,12 @@ TEST_F(OTelMetricsTestForTransparentRetries, RetryStatsWithTransparentRetries) { Init(std::move(Options().set_metric_names( {grpc::OpenTelemetryPluginBuilder::kClientCallRetries, grpc::OpenTelemetryPluginBuilder::kClientCallRetryDelay}))); - for (int i = 0; i < 5; ++i) { - ChannelArguments args; - args.SetInt(GRPC_ARG_USE_LOCAL_SUBCHANNEL_POOL, 1); - auto channel = grpc::CreateCustomChannel( - server_address_, grpc::InsecureChannelCredentials(), args); - ResetStub(std::move(channel)); - SendRPC(); - } + ChannelArguments args; + args.SetInt(GRPC_ARG_USE_LOCAL_SUBCHANNEL_POOL, 1); + auto channel = grpc::CreateCustomChannel( + server_address_, grpc::InsecureChannelCredentials(), args); + ResetStub(std::move(channel)); + SendRPC(); const char* kRetryMetricName = "grpc.client.call.retries"; const char* kRetryDelayMetricName = "grpc.client.call.retry_delay"; auto data = ReadCurrentMetricsData( @@ -536,7 +533,7 @@ TEST_F(OTelMetricsTestForTransparentRetries, RetryStatsWithTransparentRetries) { HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(0)), /*min_matcher=*/::testing::Eq(int64_t(0)), /*max_matcher=*/::testing::Eq(int64_t(0)), - /*count=*/5)), + /*count=*/1)), ::testing::AllOf( AttributesEq( /*label_keys=*/std::array{}, /*optional_label_values=*/std::array{}), - HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(5)), + HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(1)), /*min_matcher=*/::testing::Eq(int64_t(1)), /*max_matcher=*/::testing::Eq(int64_t(1)), - /*count=*/5)))); + /*count=*/1)))); // Check for retry delay stats. EXPECT_THAT( data[kRetryDelayMetricName], @@ -570,7 +567,7 @@ TEST_F(OTelMetricsTestForTransparentRetries, RetryStatsWithTransparentRetries) { ::testing::DoubleNear(0, 0.1 * grpc_test_slowdown_factor()), /*max_matcher=*/ ::testing::DoubleNear(0, 0.1 * grpc_test_slowdown_factor()), - /*count=*/5)))); + /*count=*/1)))); } // Make sure that no meter provider results in normal operations. diff --git a/test/cpp/ext/otel/otel_test_library.cc b/test/cpp/ext/otel/otel_test_library.cc index 080b25112e02b..cc93e10690e8a 100644 --- a/test/cpp/ext/otel/otel_test_library.cc +++ b/test/cpp/ext/otel/otel_test_library.cc @@ -39,6 +39,105 @@ #include "test/cpp/end2end/test_service_impl.h" #include "test/cpp/util/byte_buffer_proto_helper.h" +namespace { + +template +std::string ToString(T value) { + return std::to_string(value); +} + +template <> +std::string ToString(bool value) { + return value ? "true" : "false"; +} + +template <> +std::string ToString(std::string value) { + return value; +} + +template +std::string ToString(const std::vector& value) { + std::vector parts; + parts.reserve(value.size()); + for (const auto& item : value) { + parts.push_back(ToString(item)); + } + return absl::StrCat("[", absl::StrJoin(parts, ", "), "]"); +} + +std::string ToString( + const opentelemetry::sdk::common::OwnedAttributeValue& value) { + return std::visit([](const auto& value) { return ToString(value); }, value); +} + +std::string ToString( + const opentelemetry::sdk::metrics::PointAttributes& point_attributes) { + std::vector parts; + for (const auto& [key, value] : point_attributes.GetAttributes()) { + parts.push_back(absl::StrCat("{\"", key, "\", ", ToString(value), "}")); + } + return absl::StrJoin(parts, ", "); +} + +std::string ToString(const opentelemetry::sdk::metrics::ValueType& value) { + return std::visit([](const auto& value) { return std::to_string(value); }, + value); +} + +struct PointTypeVisitor { + std::string operator()( + const opentelemetry::sdk::metrics::SumPointData& point) { + return absl::StrFormat("{value = %s, is_monotonic = %s}", + ToString(point.value_), + ToString(point.is_monotonic_)); + } + + std::string operator()( + const opentelemetry::sdk::metrics::LastValuePointData& point) { + return absl::StrFormat( + "{value = %s, is_lastvalue_valid = %s, sample_ts = %ldns}", + ToString(point.value_), ToString(point.is_lastvalue_valid_), + point.sample_ts_.time_since_epoch().count()); + } + + std::string operator()( + const opentelemetry::sdk::metrics::HistogramPointData& point) { + return absl::StrFormat( + "{boundaries = %s, sum = %s, min = %s, max = %s, counts = %s, count = " + "%ld, record_min_max = %s}", + ToString(point.boundaries_), ToString(point.sum_), ToString(point.min_), + ToString(point.max_), ToString(point.counts_), point.count_, + ToString(point.record_min_max_)); + } + + std::string operator()( + const opentelemetry::sdk::metrics::DropPointData& point) { + return ""; + } +}; + +std::string ToString(const opentelemetry::sdk::metrics::PointType& point_type) { + return std::visit(PointTypeVisitor(), point_type); +} + +} // namespace + +OPENTELEMETRY_BEGIN_NAMESPACE +namespace sdk { +namespace metrics { + +void PrintTo(const PointDataAttributes& point_data_attributes, + std::ostream* os) { + *os << "{attributes = {" << ToString(point_data_attributes.attributes) + << "}, point_data = {" << ToString(point_data_attributes.point_data) + << "}}"; +} + +} // namespace metrics +} // namespace sdk +OPENTELEMETRY_END_NAMESPACE + namespace grpc { namespace testing { diff --git a/test/cpp/ext/otel/otel_test_library.h b/test/cpp/ext/otel/otel_test_library.h index 74cdb28ebfa1f..50332c1214aa4 100644 --- a/test/cpp/ext/otel/otel_test_library.h +++ b/test/cpp/ext/otel/otel_test_library.h @@ -32,12 +32,24 @@ #include "opentelemetry/metrics/provider.h" #include "opentelemetry/sdk/metrics/meter_provider.h" #include "opentelemetry/sdk/metrics/metric_reader.h" +#include "opentelemetry/version.h" #include "src/core/config/core_configuration.h" #include "src/core/telemetry/call_tracer.h" #include "src/cpp/ext/otel/otel_plugin.h" #include "test/core/test_util/test_config.h" #include "test/cpp/end2end/test_service_impl.h" +OPENTELEMETRY_BEGIN_NAMESPACE +namespace sdk { +namespace metrics { + +void PrintTo(const PointDataAttributes& point_data_attributes, + std::ostream* os); + +} // namespace metrics +} // namespace sdk +OPENTELEMETRY_END_NAMESPACE + namespace grpc { namespace testing { From ddc6e49d6bc0011d102683bc1eaa348409b33073 Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Tue, 1 Jul 2025 17:39:09 -0700 Subject: [PATCH 04/14] Fix up implementation --- include/grpcpp/ext/otel_plugin.h | 7 +- src/cpp/ext/otel/otel_client_call_tracer.cc | 18 +- src/cpp/ext/otel/otel_client_call_tracer.h | 2 +- src/cpp/ext/otel/otel_plugin.cc | 28 ++- src/cpp/ext/otel/otel_plugin.h | 3 +- test/cpp/ext/otel/otel_plugin_test.cc | 206 ++++++-------------- test/cpp/ext/otel/otel_test_library.cc | 2 - 7 files changed, 101 insertions(+), 165 deletions(-) diff --git a/include/grpcpp/ext/otel_plugin.h b/include/grpcpp/ext/otel_plugin.h index 72d947e7dd610..8626115355984 100644 --- a/include/grpcpp/ext/otel_plugin.h +++ b/include/grpcpp/ext/otel_plugin.h @@ -101,9 +101,12 @@ class OpenTelemetryPluginBuilder { "grpc.server.call.rcvd_total_compressed_message_size"; /// Experimental Retry Metrics - static constexpr absl::string_view kClientCallRetries = + static constexpr absl::string_view kClientCallRetriesInstrumentName = "grpc.client.call.retries"; - static constexpr absl::string_view kClientCallRetryDelay = + static constexpr absl::string_view + kClientCallTransparentRetriesInstrumentName = + "grpc.client.call.transparent_retries"; + static constexpr absl::string_view kClientCallRetryDelayInstrumentName = "grpc.client.call.retry_delay"; OpenTelemetryPluginBuilder(); diff --git a/src/cpp/ext/otel/otel_client_call_tracer.cc b/src/cpp/ext/otel/otel_client_call_tracer.cc index 5b0ea6a226620..208c385522f34 100644 --- a/src/cpp/ext/otel/otel_client_call_tracer.cc +++ b/src/cpp/ext/otel/otel_client_call_tracer.cc @@ -447,23 +447,25 @@ OpenTelemetryPluginImpl::ClientCallTracer::ClientCallTracer( } OpenTelemetryPluginImpl::ClientCallTracer::~ClientCallTracer() { - if (otel_plugin_->client_.call.retries != nullptr) { + if (otel_plugin_->client_.call.retries != nullptr && retries_ > 1) { otel_plugin_->client_.call.retries->Record( retries_ - 1, std::array, 3>{ {{OpenTelemetryMethodKey(), MethodForStats()}, - {OpenTelemetryTargetKey(), scope_config_->filtered_target()}, - {OpenTelemetryRetryType(), "retry"}}}, + {OpenTelemetryTargetKey(), scope_config_->filtered_target()}}}, opentelemetry::context::Context{}); - otel_plugin_->client_.call.retries->Record( + } + if (otel_plugin_->client_.call.transparent_retries != nullptr && + transparent_retries_ != 0) { + otel_plugin_->client_.call.transparent_retries->Record( transparent_retries_, std::array, 3>{ {{OpenTelemetryMethodKey(), MethodForStats()}, - {OpenTelemetryTargetKey(), scope_config_->filtered_target()}, - {OpenTelemetryRetryType(), "transparent"}}}, + {OpenTelemetryTargetKey(), scope_config_->filtered_target()}}}, opentelemetry::context::Context{}); } - if (otel_plugin_->client_.call.retry_delay != nullptr) { + if (otel_plugin_->client_.call.retry_delay != nullptr && + retry_delay_ != absl::ZeroDuration() && retries_ > 1) { otel_plugin_->client_.call.retry_delay->Record( absl::ToDoubleSeconds(retry_delay_), std::array, 2>{ @@ -488,7 +490,7 @@ OpenTelemetryPluginImpl::ClientCallTracer::StartNewAttempt( grpc_core::MutexLock lock(&mu_); if (transparent_retries_ != 0 || retries_ != 0) { is_first_attempt = false; - if (num_active_attempts_ == 0) { + if (num_active_attempts_ == 0 && !is_transparent_retry) { retry_delay_ += absl::Now() - time_at_last_attempt_end_; } } diff --git a/src/cpp/ext/otel/otel_client_call_tracer.h b/src/cpp/ext/otel/otel_client_call_tracer.h index d0b79c1c95c70..c0e9f961f30fd 100644 --- a/src/cpp/ext/otel/otel_client_call_tracer.h +++ b/src/cpp/ext/otel/otel_client_call_tracer.h @@ -158,7 +158,7 @@ class OpenTelemetryPluginImpl::ClientCallTracer OpenTelemetryPluginImpl* otel_plugin_; std::shared_ptr scope_config_; grpc_core::Mutex mu_; - // Non-transparent attempts per call (including first attempt) + // Non-transparent retry attempts per call (includes initial attempt) uint64_t retries_ ABSL_GUARDED_BY(&mu_) = 0; // Transparent retries per call uint64_t transparent_retries_ ABSL_GUARDED_BY(&mu_) = 0; diff --git a/src/cpp/ext/otel/otel_plugin.cc b/src/cpp/ext/otel/otel_plugin.cc index acb4e8b9132e4..1f31c3230104f 100644 --- a/src/cpp/ext/otel/otel_plugin.cc +++ b/src/cpp/ext/otel/otel_plugin.cc @@ -59,8 +59,6 @@ absl::string_view OpenTelemetryStatusKey() { return "grpc.status"; } absl::string_view OpenTelemetryTargetKey() { return "grpc.target"; } -absl::string_view OpenTelemetryRetryType() { return "grpc.retry_type"; } - namespace { absl::flat_hash_set BaseMetrics() { absl::flat_hash_set base_metrics{ @@ -504,17 +502,29 @@ OpenTelemetryPluginImpl::OpenTelemetryPluginImpl( kServerCallRcvdTotalCompressedMessageSizeInstrumentName), "Compressed message bytes received per server call", "By"); } - if (metrics.contains( - grpc::OpenTelemetryPluginBuilder::kClientCallRetries)) { + if (metrics.contains(grpc::OpenTelemetryPluginBuilder:: + kClientCallRetriesInstrumentName)) { client_.call.retries = meter->CreateUInt64Histogram( - std::string(grpc::OpenTelemetryPluginBuilder::kClientCallRetries), - "EXPERIMENTAL: Number of retry attempts made during the call", + std::string(grpc::OpenTelemetryPluginBuilder:: + kClientCallRetriesInstrumentName), + "EXPERIMENTAL: Number of retries during the client call. If there " + "were no retries, 0 is not reported.", "{retry}"); } - if (metrics.contains( - grpc::OpenTelemetryPluginBuilder::kClientCallRetryDelay)) { + if (metrics.contains(grpc::OpenTelemetryPluginBuilder:: + kClientCallTransparentRetriesInstrumentName)) { + client_.call.transparent_retries = meter->CreateUInt64Histogram( + std::string(grpc::OpenTelemetryPluginBuilder:: + kClientCallTransparentRetriesInstrumentName), + "EXPERIMENTAL: Number of transparent retries during the client call. " + "If there were no transparent retries, 0 is not reported.", + "{transparent_retry}"); + } + if (metrics.contains(grpc::OpenTelemetryPluginBuilder:: + kClientCallRetryDelayInstrumentName)) { client_.call.retry_delay = meter->CreateDoubleHistogram( - std::string(grpc::OpenTelemetryPluginBuilder::kClientCallRetryDelay), + std::string(grpc::OpenTelemetryPluginBuilder:: + kClientCallRetryDelayInstrumentName), "EXPERIMENTAL: Total time of delay while there is no active attempt " "during the client call", "s"); diff --git a/src/cpp/ext/otel/otel_plugin.h b/src/cpp/ext/otel/otel_plugin.h index 69751ad6f486c..40c0eb3cfbf6f 100644 --- a/src/cpp/ext/otel/otel_plugin.h +++ b/src/cpp/ext/otel/otel_plugin.h @@ -119,7 +119,6 @@ class InternalOpenTelemetryPluginOption absl::string_view OpenTelemetryMethodKey(); absl::string_view OpenTelemetryStatusKey(); absl::string_view OpenTelemetryTargetKey(); -absl::string_view OpenTelemetryRetryType(); class OpenTelemetryPluginBuilderImpl { public: @@ -359,6 +358,8 @@ class OpenTelemetryPluginImpl struct ClientMetrics { struct Call { std::unique_ptr> retries; + std::unique_ptr> + transparent_retries; std::unique_ptr> retry_delay; } call; struct Attempt { diff --git a/test/cpp/ext/otel/otel_plugin_test.cc b/test/cpp/ext/otel/otel_plugin_test.cc index 88f488f8d9268..1a0942d2f5655 100644 --- a/test/cpp/ext/otel/otel_plugin_test.cc +++ b/test/cpp/ext/otel/otel_plugin_test.cc @@ -314,79 +314,45 @@ TEST_F(OpenTelemetryPluginEnd2EndTest, EXPECT_EQ(*status_value, "OK"); } +// When there are no retries, no retry stats are exported. TEST_F(OpenTelemetryPluginEnd2EndTest, RetryStatsWithoutRetries) { Init(std::move(Options().set_metric_names( - {grpc::OpenTelemetryPluginBuilder::kClientCallRetries, - grpc::OpenTelemetryPluginBuilder::kClientCallRetryDelay}))); + {grpc::OpenTelemetryPluginBuilder::kClientAttemptDurationInstrumentName, + grpc::OpenTelemetryPluginBuilder::kClientCallRetriesInstrumentName, + grpc::OpenTelemetryPluginBuilder:: + kClientCallTransparentRetriesInstrumentName, + grpc::OpenTelemetryPluginBuilder:: + kClientCallRetryDelayInstrumentName}))); SendRPC(); const char* kRetryMetricName = "grpc.client.call.retries"; + const char* kTransparentRetryMetricName = + "grpc.client.call.transparent_retries"; const char* kRetryDelayMetricName = "grpc.client.call.retry_delay"; + const char* kClientAttemptDurationMetricName = "grpc.client.attempt.duration"; auto data = ReadCurrentMetricsData( [&](const absl::flat_hash_map< std::string, std::vector>& data) { - return !data.contains(kRetryMetricName) || - !data.contains(kRetryDelayMetricName); + // Use grpc.client.attempt.duration as a signal that client metrics have + // been collected for the call. + return !data.contains(kClientAttemptDurationMetricName); }); - ASSERT_EQ(data.size(), 2); - // Both transparent retry stats and non-transparent retry stats are populated - // under "grpc.client.call.retries" - EXPECT_THAT( - data[kRetryMetricName], - ::testing::UnorderedElementsAre( - ::testing::AllOf( - AttributesEq( - /*label_keys=*/std::array{"grpc.method", "grpc.target", - "grpc.retry_type"}, - /*label_values=*/ - std::array{ - kMethodName, canonical_server_address_, "retry"}, - /*optional_label_keys=*/std::array{}, - /*optional_label_values=*/std::array{}), - HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(0)), - /*min_matcher=*/::testing::Eq(int64_t(0)), - /*max_matcher=*/::testing::Eq(int64_t(0)), - /*count=*/1)), - ::testing::AllOf( - AttributesEq( - /*label_keys=*/std::array{"grpc.method", "grpc.target", - "grpc.retry_type"}, - /*label_values=*/ - std::array{ - kMethodName, canonical_server_address_, "transparent"}, - /*optional_label_keys=*/std::array{}, - /*optional_label_values=*/std::array{}), - HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(0)), - /*min_matcher=*/::testing::Eq(int64_t(0)), - /*max_matcher=*/::testing::Eq(int64_t(0)), - /*count=*/1)))); - // Check for retry delay stats. - EXPECT_THAT( - data[kRetryDelayMetricName], - ::testing::ElementsAre(::testing::AllOf( - AttributesEq( - /*label_keys=*/std::array{"grpc.method", - "grpc.target"}, - /*label_values=*/ - std::array{kMethodName, - canonical_server_address_}, - /*optional_label_keys=*/std::array{}, - /*optional_label_values=*/std::array{}), - HistogramResultEq(/*sum_matcher=*/::testing::DoubleNear(0, 1e-9), - /*min_matcher=*/::testing::DoubleNear(0, 1e-9), - /*max_matcher=*/::testing::DoubleNear(0, 1e-9), - /*count=*/1)))); + // TODO + EXPECT_FALSE(data.contains(kRetryMetricName)); + EXPECT_FALSE(data.contains(kTransparentRetryMetricName)); + EXPECT_FALSE(data.contains(kRetryDelayMetricName)); } TEST_F(OpenTelemetryPluginEnd2EndTest, RetryStatsWithRetries) { Init(std::move( Options() - .set_metric_names( - {grpc::OpenTelemetryPluginBuilder::kClientCallRetries, - grpc::OpenTelemetryPluginBuilder::kClientCallRetryDelay}) + .set_metric_names({grpc::OpenTelemetryPluginBuilder:: + kClientCallRetriesInstrumentName, + grpc::OpenTelemetryPluginBuilder:: + kClientCallTransparentRetriesInstrumentName, + grpc::OpenTelemetryPluginBuilder:: + kClientCallRetryDelayInstrumentName}) .set_service_config( "{\n" " \"methodConfig\": [ {\n" @@ -413,6 +379,8 @@ TEST_F(OpenTelemetryPluginEnd2EndTest, RetryStatsWithRetries) { EXPECT_EQ(status.error_code(), StatusCode::ABORTED); } const char* kRetryMetricName = "grpc.client.call.retries"; + const char* kTransparentRetryMetricName = + "grpc.client.call.transparent_retries"; const char* kRetryDelayMetricName = "grpc.client.call.retry_delay"; auto data = ReadCurrentMetricsData( [&](const absl::flat_hash_map< @@ -424,39 +392,22 @@ TEST_F(OpenTelemetryPluginEnd2EndTest, RetryStatsWithRetries) { }); ASSERT_EQ(data.size(), 2); - // Both transparent retry stats and non-transparent retry stats are populated - // under "grpc.client.call.retries" + // Check for retry stats EXPECT_THAT( data[kRetryMetricName], - ::testing::UnorderedElementsAre( - ::testing::AllOf( - AttributesEq( - /*label_keys=*/std::array{"grpc.method", "grpc.target", - "grpc.retry_type"}, - /*label_values=*/ - std::array{ - kMethodName, canonical_server_address_, "retry"}, - /*optional_label_keys=*/std::array{}, - /*optional_label_values=*/std::array{}), - HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(2)), - /*min_matcher=*/::testing::Eq(int64_t(2)), - /*max_matcher=*/::testing::Eq(int64_t(2)), - /*count=*/1)), - ::testing::AllOf( - AttributesEq( - /*label_keys=*/std::array{"grpc.method", "grpc.target", - "grpc.retry_type"}, - /*label_values=*/ - std::array{ - kMethodName, canonical_server_address_, "transparent"}, - /*optional_label_keys=*/std::array{}, - /*optional_label_values=*/std::array{}), - HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(0)), - /*min_matcher=*/::testing::Eq(int64_t(0)), - /*max_matcher=*/::testing::Eq(int64_t(0)), - /*count=*/1)))); + ::testing::UnorderedElementsAre(::testing::AllOf( + AttributesEq( + /*label_keys=*/std::array{"grpc.method", + "grpc.target"}, + /*label_values=*/ + std::array{kMethodName, + canonical_server_address_}, + /*optional_label_keys=*/std::array{}, + /*optional_label_values=*/std::array{}), + HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(2)), + /*min_matcher=*/::testing::Eq(int64_t(2)), + /*max_matcher=*/::testing::Eq(int64_t(2)), + /*count=*/1)))); // Check for retry delay stats. EXPECT_THAT( data[kRetryDelayMetricName], @@ -477,13 +428,15 @@ TEST_F(OpenTelemetryPluginEnd2EndTest, RetryStatsWithRetries) { /*max_matcher=*/ IsWithinRange(0.1, 0.3 * grpc_test_slowdown_factor()), /*count=*/1)))); + // No transparent retry stats reported + EXPECT_FALSE(data.contains(kTransparentRetryMetricName)); } class OTelMetricsTestForTransparentRetries : public OpenTelemetryPluginEnd2EndTest { protected: void SetUp() override { - grpc_core::CoreConfiguration::RegisterBuilder( + grpc_core::CoreConfiguration::RegisterEphemeralBuilder( [](grpc_core::CoreConfiguration::Builder* builder) { // Register FailFirstCallFilter to simulate transparent retries. builder->channel_init()->RegisterFilter( @@ -496,8 +449,11 @@ class OTelMetricsTestForTransparentRetries TEST_F(OTelMetricsTestForTransparentRetries, RetryStatsWithTransparentRetries) { Init(std::move(Options().set_metric_names( - {grpc::OpenTelemetryPluginBuilder::kClientCallRetries, - grpc::OpenTelemetryPluginBuilder::kClientCallRetryDelay}))); + {grpc::OpenTelemetryPluginBuilder::kClientCallRetriesInstrumentName, + grpc::OpenTelemetryPluginBuilder:: + kClientCallTransparentRetriesInstrumentName, + grpc::OpenTelemetryPluginBuilder:: + kClientCallRetryDelayInstrumentName}))); ChannelArguments args; args.SetInt(GRPC_ARG_USE_LOCAL_SUBCHANNEL_POOL, 1); auto channel = grpc::CreateCustomChannel( @@ -505,69 +461,35 @@ TEST_F(OTelMetricsTestForTransparentRetries, RetryStatsWithTransparentRetries) { ResetStub(std::move(channel)); SendRPC(); const char* kRetryMetricName = "grpc.client.call.retries"; + const char* kTransparentRetryMetricName = + "grpc.client.call.transparent_retries"; const char* kRetryDelayMetricName = "grpc.client.call.retry_delay"; auto data = ReadCurrentMetricsData( [&](const absl::flat_hash_map< std::string, std::vector>& - data) { - return !data.contains(kRetryMetricName) || - !data.contains(kRetryDelayMetricName); - }); - ASSERT_EQ(data.size(), 2); - // Both transparent retry stats and non-transparent retry stats are populated - // under "grpc.client.call.retries" + data) { return !data.contains(kTransparentRetryMetricName); }); + ASSERT_EQ(data.size(), 1); + // No retry stats reported + EXPECT_FALSE(data.contains(kRetryMetricName)); + // Check for transparent retry stats EXPECT_THAT( - data[kRetryMetricName], - ::testing::UnorderedElementsAre( - ::testing::AllOf( - AttributesEq( - /*label_keys=*/std::array{"grpc.method", "grpc.target", - "grpc.retry_type"}, - /*label_values=*/ - std::array{ - kMethodName, canonical_server_address_, "retry"}, - /*optional_label_keys=*/std::array{}, - /*optional_label_values=*/std::array{}), - HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(0)), - /*min_matcher=*/::testing::Eq(int64_t(0)), - /*max_matcher=*/::testing::Eq(int64_t(0)), - /*count=*/1)), - ::testing::AllOf( - AttributesEq( - /*label_keys=*/std::array{"grpc.method", "grpc.target", - "grpc.retry_type"}, - /*label_values=*/ - std::array{ - kMethodName, canonical_server_address_, "transparent"}, - /*optional_label_keys=*/std::array{}, - /*optional_label_values=*/std::array{}), - HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(1)), - /*min_matcher=*/::testing::Eq(int64_t(1)), - /*max_matcher=*/::testing::Eq(int64_t(1)), - /*count=*/1)))); - // Check for retry delay stats. - EXPECT_THAT( - data[kRetryDelayMetricName], - ::testing::ElementsAre(::testing::AllOf( + data[kTransparentRetryMetricName], + ::testing::UnorderedElementsAre(::testing::AllOf( AttributesEq( - /*label_keys=*/std::array{"grpc.method", + /*label_keys=*/std::array{"grpc.method", "grpc.target"}, /*label_values=*/ - std::array{kMethodName, + std::array{kMethodName, canonical_server_address_}, /*optional_label_keys=*/std::array{}, /*optional_label_values=*/std::array{}), - HistogramResultEq( - /*sum_matcher=*/::testing::DoubleNear( - 0, 0.1 * grpc_test_slowdown_factor()), - /*min_matcher=*/ - ::testing::DoubleNear(0, 0.1 * grpc_test_slowdown_factor()), - /*max_matcher=*/ - ::testing::DoubleNear(0, 0.1 * grpc_test_slowdown_factor()), - /*count=*/1)))); + HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(1)), + /*min_matcher=*/::testing::Eq(int64_t(1)), + /*max_matcher=*/::testing::Eq(int64_t(1)), + /*count=*/1)))); + // No retry delay reported + EXPECT_FALSE(data.contains(kRetryDelayMetricName)); } // Make sure that no meter provider results in normal operations. diff --git a/test/cpp/ext/otel/otel_test_library.cc b/test/cpp/ext/otel/otel_test_library.cc index 0a65628cb284c..ff1c016812d1d 100644 --- a/test/cpp/ext/otel/otel_test_library.cc +++ b/test/cpp/ext/otel/otel_test_library.cc @@ -230,8 +230,6 @@ OpenTelemetryPluginEnd2EndTest::MetricsCollectorThread::Stop() { } void OpenTelemetryPluginEnd2EndTest::Init(Options config) { - grpc_core::CoreConfiguration:: - ResetEverythingIncludingPersistentBuildersAbsolutelyNotRecommended(); ChannelArguments channel_args; if (!config.labels_to_inject.empty()) { labels_to_inject_ = std::move(config.labels_to_inject); From fe69b74b25b27815e2e7e38d13dd6e65f3fb2a25 Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Tue, 1 Jul 2025 17:45:08 -0700 Subject: [PATCH 05/14] Fix TODO --- test/cpp/ext/otel/otel_plugin_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/cpp/ext/otel/otel_plugin_test.cc b/test/cpp/ext/otel/otel_plugin_test.cc index 1a0942d2f5655..4ac927275bb39 100644 --- a/test/cpp/ext/otel/otel_plugin_test.cc +++ b/test/cpp/ext/otel/otel_plugin_test.cc @@ -338,7 +338,7 @@ TEST_F(OpenTelemetryPluginEnd2EndTest, RetryStatsWithoutRetries) { // been collected for the call. return !data.contains(kClientAttemptDurationMetricName); }); - // TODO + EXPECT_TRUE(data.contains(kClientAttemptDurationMetricName)); EXPECT_FALSE(data.contains(kRetryMetricName)); EXPECT_FALSE(data.contains(kTransparentRetryMetricName)); EXPECT_FALSE(data.contains(kRetryDelayMetricName)); From 5904a02479617254dd7826134e698e13501d7443 Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Wed, 2 Jul 2025 11:50:00 -0700 Subject: [PATCH 06/14] Fix sanity --- test/cpp/ext/otel/otel_plugin_test.cc | 66 +++++++++++---------------- 1 file changed, 26 insertions(+), 40 deletions(-) diff --git a/test/cpp/ext/otel/otel_plugin_test.cc b/test/cpp/ext/otel/otel_plugin_test.cc index 4ac927275bb39..d11fe652ee6d2 100644 --- a/test/cpp/ext/otel/otel_plugin_test.cc +++ b/test/cpp/ext/otel/otel_plugin_test.cc @@ -393,41 +393,31 @@ TEST_F(OpenTelemetryPluginEnd2EndTest, RetryStatsWithRetries) { ASSERT_EQ(data.size(), 2); // Check for retry stats - EXPECT_THAT( - data[kRetryMetricName], - ::testing::UnorderedElementsAre(::testing::AllOf( - AttributesEq( - /*label_keys=*/std::array{"grpc.method", - "grpc.target"}, - /*label_values=*/ - std::array{kMethodName, - canonical_server_address_}, - /*optional_label_keys=*/std::array{}, - /*optional_label_values=*/std::array{}), - HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(2)), - /*min_matcher=*/::testing::Eq(int64_t(2)), - /*max_matcher=*/::testing::Eq(int64_t(2)), - /*count=*/1)))); + EXPECT_THAT(data[kRetryMetricName], + ::testing::UnorderedElementsAre(::testing::AllOf( + AttributesEq(std::array{"grpc.method", + "grpc.target"}, + std::array{ + kMethodName, canonical_server_address_}, + std::array{}, + std::array{}), + HistogramResultEq(::testing::Eq(int64_t(2)), + ::testing::Eq(int64_t(2)), + ::testing::Eq(int64_t(2)), 1)))); // Check for retry delay stats. EXPECT_THAT( data[kRetryDelayMetricName], ::testing::ElementsAre(::testing::AllOf( AttributesEq( - /*label_keys=*/std::array{"grpc.method", - "grpc.target"}, - /*label_values=*/ + std::array{"grpc.method", "grpc.target"}, std::array{kMethodName, canonical_server_address_}, - /*optional_label_keys=*/std::array{}, - /*optional_label_values=*/std::array{}), + std::array{}, + std::array{}), HistogramResultEq( - /*sum_matcher=*/IsWithinRange(0.1, - 0.3 * grpc_test_slowdown_factor()), - /*min_matcher=*/ IsWithinRange(0.1, 0.3 * grpc_test_slowdown_factor()), - /*max_matcher=*/ IsWithinRange(0.1, 0.3 * grpc_test_slowdown_factor()), - /*count=*/1)))); + IsWithinRange(0.1, 0.3 * grpc_test_slowdown_factor()), 1)))); // No transparent retry stats reported EXPECT_FALSE(data.contains(kTransparentRetryMetricName)); } @@ -473,21 +463,17 @@ TEST_F(OTelMetricsTestForTransparentRetries, RetryStatsWithTransparentRetries) { // No retry stats reported EXPECT_FALSE(data.contains(kRetryMetricName)); // Check for transparent retry stats - EXPECT_THAT( - data[kTransparentRetryMetricName], - ::testing::UnorderedElementsAre(::testing::AllOf( - AttributesEq( - /*label_keys=*/std::array{"grpc.method", - "grpc.target"}, - /*label_values=*/ - std::array{kMethodName, - canonical_server_address_}, - /*optional_label_keys=*/std::array{}, - /*optional_label_values=*/std::array{}), - HistogramResultEq(/*sum_matcher=*/::testing::Eq(int64_t(1)), - /*min_matcher=*/::testing::Eq(int64_t(1)), - /*max_matcher=*/::testing::Eq(int64_t(1)), - /*count=*/1)))); + EXPECT_THAT(data[kTransparentRetryMetricName], + ::testing::UnorderedElementsAre(::testing::AllOf( + AttributesEq(std::array{"grpc.method", + "grpc.target"}, + std::array{ + kMethodName, canonical_server_address_}, + std::array{}, + std::array{}), + HistogramResultEq(::testing::Eq(int64_t(1)), + ::testing::Eq(int64_t(1)), + ::testing::Eq(int64_t(1)), 1)))); // No retry delay reported EXPECT_FALSE(data.contains(kRetryDelayMetricName)); } From 07dfb46d9fa0d7adc89c007e0ff80bcf1c19f7b6 Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Wed, 2 Jul 2025 11:58:45 -0700 Subject: [PATCH 07/14] Fix build --- src/cpp/ext/otel/otel_client_call_tracer.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cpp/ext/otel/otel_client_call_tracer.cc b/src/cpp/ext/otel/otel_client_call_tracer.cc index 208c385522f34..952832a337032 100644 --- a/src/cpp/ext/otel/otel_client_call_tracer.cc +++ b/src/cpp/ext/otel/otel_client_call_tracer.cc @@ -450,7 +450,7 @@ OpenTelemetryPluginImpl::ClientCallTracer::~ClientCallTracer() { if (otel_plugin_->client_.call.retries != nullptr && retries_ > 1) { otel_plugin_->client_.call.retries->Record( retries_ - 1, - std::array, 3>{ + std::array, 2>{ {{OpenTelemetryMethodKey(), MethodForStats()}, {OpenTelemetryTargetKey(), scope_config_->filtered_target()}}}, opentelemetry::context::Context{}); @@ -459,7 +459,7 @@ OpenTelemetryPluginImpl::ClientCallTracer::~ClientCallTracer() { transparent_retries_ != 0) { otel_plugin_->client_.call.transparent_retries->Record( transparent_retries_, - std::array, 3>{ + std::array, 2>{ {{OpenTelemetryMethodKey(), MethodForStats()}, {OpenTelemetryTargetKey(), scope_config_->filtered_target()}}}, opentelemetry::context::Context{}); From 97c53a466582e012c764faee8b00f074f93a18df Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Wed, 2 Jul 2025 13:29:14 -0700 Subject: [PATCH 08/14] Another attempt --- src/cpp/ext/otel/otel_client_call_tracer.cc | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/cpp/ext/otel/otel_client_call_tracer.cc b/src/cpp/ext/otel/otel_client_call_tracer.cc index 952832a337032..67682ea2aed9e 100644 --- a/src/cpp/ext/otel/otel_client_call_tracer.cc +++ b/src/cpp/ext/otel/otel_client_call_tracer.cc @@ -450,27 +450,24 @@ OpenTelemetryPluginImpl::ClientCallTracer::~ClientCallTracer() { if (otel_plugin_->client_.call.retries != nullptr && retries_ > 1) { otel_plugin_->client_.call.retries->Record( retries_ - 1, - std::array, 2>{ - {{OpenTelemetryMethodKey(), MethodForStats()}, - {OpenTelemetryTargetKey(), scope_config_->filtered_target()}}}, + {{OpenTelemetryMethodKey(), MethodForStats()}, + {OpenTelemetryTargetKey(), scope_config_->filtered_target()}}, opentelemetry::context::Context{}); } if (otel_plugin_->client_.call.transparent_retries != nullptr && transparent_retries_ != 0) { otel_plugin_->client_.call.transparent_retries->Record( transparent_retries_, - std::array, 2>{ - {{OpenTelemetryMethodKey(), MethodForStats()}, - {OpenTelemetryTargetKey(), scope_config_->filtered_target()}}}, + {{OpenTelemetryMethodKey(), MethodForStats()}, + {OpenTelemetryTargetKey(), scope_config_->filtered_target()}}, opentelemetry::context::Context{}); } if (otel_plugin_->client_.call.retry_delay != nullptr && retry_delay_ != absl::ZeroDuration() && retries_ > 1) { otel_plugin_->client_.call.retry_delay->Record( absl::ToDoubleSeconds(retry_delay_), - std::array, 2>{ - {{OpenTelemetryMethodKey(), MethodForStats()}, - {OpenTelemetryTargetKey(), scope_config_->filtered_target()}}}, + {{OpenTelemetryMethodKey(), MethodForStats()}, + {OpenTelemetryTargetKey(), scope_config_->filtered_target()}}, opentelemetry::context::Context{}); } if (span_ != nullptr) { From b7ee0d8cfced7f745e1959551096e4238d534d7a Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Wed, 2 Jul 2025 14:34:49 -0700 Subject: [PATCH 09/14] Another attempt --- src/cpp/ext/otel/otel_client_call_tracer.cc | 25 ++++++++++++--------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/cpp/ext/otel/otel_client_call_tracer.cc b/src/cpp/ext/otel/otel_client_call_tracer.cc index 67682ea2aed9e..5f88ebdafc915 100644 --- a/src/cpp/ext/otel/otel_client_call_tracer.cc +++ b/src/cpp/ext/otel/otel_client_call_tracer.cc @@ -447,27 +447,30 @@ OpenTelemetryPluginImpl::ClientCallTracer::ClientCallTracer( } OpenTelemetryPluginImpl::ClientCallTracer::~ClientCallTracer() { + std::array, + 2> + attributes = { + std::pair(OpenTelemetryMethodKey(), + opentelemetry::common::AttributeValue( + AbslStringViewToNoStdStringView(MethodForStats()))), + std::pair(OpenTelemetryTargetKey(), + opentelemetry::common::AttributeValue( + AbslStringViewToNoStdStringView( + scope_config_->filtered_target())))}; if (otel_plugin_->client_.call.retries != nullptr && retries_ > 1) { otel_plugin_->client_.call.retries->Record( - retries_ - 1, - {{OpenTelemetryMethodKey(), MethodForStats()}, - {OpenTelemetryTargetKey(), scope_config_->filtered_target()}}, - opentelemetry::context::Context{}); + retries_ - 1, attributes, opentelemetry::context::Context{}); } if (otel_plugin_->client_.call.transparent_retries != nullptr && transparent_retries_ != 0) { otel_plugin_->client_.call.transparent_retries->Record( - transparent_retries_, - {{OpenTelemetryMethodKey(), MethodForStats()}, - {OpenTelemetryTargetKey(), scope_config_->filtered_target()}}, - opentelemetry::context::Context{}); + transparent_retries_, attributes, opentelemetry::context::Context{}); } if (otel_plugin_->client_.call.retry_delay != nullptr && retry_delay_ != absl::ZeroDuration() && retries_ > 1) { otel_plugin_->client_.call.retry_delay->Record( - absl::ToDoubleSeconds(retry_delay_), - {{OpenTelemetryMethodKey(), MethodForStats()}, - {OpenTelemetryTargetKey(), scope_config_->filtered_target()}}, + absl::ToDoubleSeconds(retry_delay_), attributes, opentelemetry::context::Context{}); } if (span_ != nullptr) { From d4f57b0765f5bb5b4ba2618f7baa5b8f4c002621 Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Wed, 2 Jul 2025 15:57:13 -0700 Subject: [PATCH 10/14] Once more --- src/cpp/ext/otel/otel_client_call_tracer.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cpp/ext/otel/otel_client_call_tracer.cc b/src/cpp/ext/otel/otel_client_call_tracer.cc index 5f88ebdafc915..7d20e3e2c9e6f 100644 --- a/src/cpp/ext/otel/otel_client_call_tracer.cc +++ b/src/cpp/ext/otel/otel_client_call_tracer.cc @@ -451,10 +451,10 @@ OpenTelemetryPluginImpl::ClientCallTracer::~ClientCallTracer() { opentelemetry::common::AttributeValue>, 2> attributes = { - std::pair(OpenTelemetryMethodKey(), + std::pair(AbslStringViewToNoStdStringView(OpenTelemetryMethodKey()), opentelemetry::common::AttributeValue( AbslStringViewToNoStdStringView(MethodForStats()))), - std::pair(OpenTelemetryTargetKey(), + std::pair(AbslStringViewToNoStdStringView(OpenTelemetryTargetKey()), opentelemetry::common::AttributeValue( AbslStringViewToNoStdStringView( scope_config_->filtered_target())))}; From 6e4cc0e0d7df9a61892f4e37e49f232d0091562c Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Tue, 8 Jul 2025 15:15:41 -0700 Subject: [PATCH 11/14] Avoid lock if retry metrics are not enabled --- src/cpp/ext/otel/otel_client_call_tracer.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cpp/ext/otel/otel_client_call_tracer.cc b/src/cpp/ext/otel/otel_client_call_tracer.cc index 7d20e3e2c9e6f..3a939f656e546 100644 --- a/src/cpp/ext/otel/otel_client_call_tracer.cc +++ b/src/cpp/ext/otel/otel_client_call_tracer.cc @@ -310,7 +310,7 @@ void OpenTelemetryPluginImpl::ClientCallTracer:: parent_->otel_plugin_->client_.attempt.rcvd_total_compressed_message_size ->Record(incoming_bytes, labels, opentelemetry::context::Context{}); } - { + if (parent_->otel_plugin_->client_.call.retry_delay != nullptr) { grpc_core::MutexLock lock(&parent_->mu_); if (--parent_->num_active_attempts_ == 0) { parent_->time_at_last_attempt_end_ = absl::Now(); @@ -490,7 +490,8 @@ OpenTelemetryPluginImpl::ClientCallTracer::StartNewAttempt( grpc_core::MutexLock lock(&mu_); if (transparent_retries_ != 0 || retries_ != 0) { is_first_attempt = false; - if (num_active_attempts_ == 0 && !is_transparent_retry) { + if (otel_plugin_->client_.call.retry_delay != nullptr && + num_active_attempts_ == 0 && !is_transparent_retry) { retry_delay_ += absl::Now() - time_at_last_attempt_end_; } } From a5b21bf0f33878276266064d95eb6d3d77de76b1 Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Fri, 11 Jul 2025 15:44:41 -0700 Subject: [PATCH 12/14] Reviewer comment --- test/cpp/ext/otel/otel_test_library.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/cpp/ext/otel/otel_test_library.h b/test/cpp/ext/otel/otel_test_library.h index 7a05bc730cf55..8de9dd8100b89 100644 --- a/test/cpp/ext/otel/otel_test_library.h +++ b/test/cpp/ext/otel/otel_test_library.h @@ -407,6 +407,7 @@ MATCHER_P7(GaugeDataIsIncrementalForSpecificMetricAndLabelSet, metric_name, return result; } +// Helper matcher to check whether a value is within a certain range MATCHER_P2(IsWithinRange, lo, hi, absl::StrCat(negation ? "isn't" : "is", " between ", ::testing::PrintToString(lo), " and ", @@ -414,6 +415,8 @@ MATCHER_P2(IsWithinRange, lo, hi, return (lo) <= arg && arg <= (hi); } +// Specialization of Extract to be able to use `IsWithinRange` matcher within +// the `HistogramResultEq` matcher defined above. template struct Extract> { using Type = T; From b4eeaae4d4037d1e32c10dbe81e38711c73abbd7 Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Thu, 24 Jul 2025 14:43:04 -0700 Subject: [PATCH 13/14] Add comment --- src/cpp/ext/otel/otel_client_call_tracer.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cpp/ext/otel/otel_client_call_tracer.h b/src/cpp/ext/otel/otel_client_call_tracer.h index c0e9f961f30fd..6a9cc202b51a0 100644 --- a/src/cpp/ext/otel/otel_client_call_tracer.h +++ b/src/cpp/ext/otel/otel_client_call_tracer.h @@ -157,6 +157,9 @@ class OpenTelemetryPluginImpl::ClientCallTracer const bool registered_method_; OpenTelemetryPluginImpl* otel_plugin_; std::shared_ptr scope_config_; + // TODO(ctiller@): When refactoring the tracer code, consider the possibility + // of removing this mutex. More discussion in + // https://github.com/grpc/grpc/pull/39195/files#r2191231973. grpc_core::Mutex mu_; // Non-transparent retry attempts per call (includes initial attempt) uint64_t retries_ ABSL_GUARDED_BY(&mu_) = 0; From b7e8b773839f6685564bf45c5c4c5eca630b8127 Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Fri, 25 Jul 2025 15:03:51 -0700 Subject: [PATCH 14/14] Fix build --- test/cpp/ext/otel/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/cpp/ext/otel/BUILD b/test/cpp/ext/otel/BUILD index 73052afefbbf0..97761a4986066 100644 --- a/test/cpp/ext/otel/BUILD +++ b/test/cpp/ext/otel/BUILD @@ -59,6 +59,8 @@ grpc_cc_test( "otel/sdk/src/metrics", ], tags = [ + # Incompatible with internal tracing + "grpc:no-internal-poller", # TODO(b/332369798): Remove after fixing bug "grpc:otel-namespace-calamity", ], @@ -83,6 +85,8 @@ grpc_cc_test( "otel/sdk/src/metrics", ], tags = [ + # Incompatible with internal tracing + "grpc:no-internal-poller", # TODO(b/332369798): Remove after fixing bug "grpc:otel-namespace-calamity", ],