feat: Trajectory eval metrics added to evaluation service proto

Google APIs · copybara-github · commit 231f2325db84 · 2024-11-18T11:44:53.000-08:00
docs: Fixed typo for field `use_strict_string_match` in message `.google.cloud.aiplatform.v1beta1.ToolParameterKVMatchSpec`

PiperOrigin-RevId: 697705080
diff --git a/google/cloud/aiplatform/v1beta1/evaluation_service.proto b/google/cloud/aiplatform/v1beta1/evaluation_service.proto
@@ -139,6 +139,24 @@ message EvaluateInstancesRequest {
 
     // Input for tool parameter key value match metric.
     ToolParameterKVMatchInput tool_parameter_kv_match_input = 22;
+
+    // Input for trajectory exact match metric.
+    TrajectoryExactMatchInput trajectory_exact_match_input = 33;
+
+    // Input for trajectory in order match metric.
+    TrajectoryInOrderMatchInput trajectory_in_order_match_input = 34;
+
+    // Input for trajectory match any order metric.
+    TrajectoryAnyOrderMatchInput trajectory_any_order_match_input = 35;
+
+    // Input for trajectory precision metric.
+    TrajectoryPrecisionInput trajectory_precision_input = 37;
+
+    // Input for trajectory recall metric.
+    TrajectoryRecallInput trajectory_recall_input = 38;
+
+    // Input for trajectory single tool use metric.
+    TrajectorySingleToolUseInput trajectory_single_tool_use_input = 39;
   }
 
   // Required. The resource name of the Location to evaluate the instances.
@@ -235,6 +253,24 @@ message EvaluateInstancesResponse {
 
     // Results for tool parameter key value match metric.
     ToolParameterKVMatchResults tool_parameter_kv_match_results = 21;
+
+    // Result for trajectory exact match metric.
+    TrajectoryExactMatchResults trajectory_exact_match_results = 31;
+
+    // Result for trajectory in order match metric.
+    TrajectoryInOrderMatchResults trajectory_in_order_match_results = 32;
+
+    // Result for trajectory any order match metric.
+    TrajectoryAnyOrderMatchResults trajectory_any_order_match_results = 33;
+
+    // Result for trajectory precision metric.
+    TrajectoryPrecisionResults trajectory_precision_results = 35;
+
+    // Results for trajectory recall metric.
+    TrajectoryRecallResults trajectory_recall_results = 36;
+
+    // Results for trajectory single tool use metric.
+    TrajectorySingleToolUseResults trajectory_single_tool_use_results = 37;
   }
 }
 
@@ -1165,7 +1201,7 @@ message ToolParameterKVMatchInput {
 
 // Spec for tool parameter key value match metric.
 message ToolParameterKVMatchSpec {
-  // Optional. Whether to use STRCIT string match on parameter values.
+  // Optional. Whether to use STRICT string match on parameter values.
   bool use_strict_string_match = 1 [(google.api.field_behavior) = OPTIONAL];
 }
 
@@ -1191,3 +1227,256 @@ message ToolParameterKVMatchMetricValue {
   // Output only. Tool parameter key value match score.
   optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
+
+// Instances and metric spec for TrajectoryExactMatch metric.
+message TrajectoryExactMatchInput {
+  // Required. Spec for TrajectoryExactMatch metric.
+  TrajectoryExactMatchSpec metric_spec = 1
+      [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Repeated TrajectoryExactMatch instance.
+  repeated TrajectoryExactMatchInstance instances = 2
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Spec for TrajectoryExactMatch metric - returns 1 if tool calls in the
+// reference trajectory exactly match the predicted trajectory, else 0.
+message TrajectoryExactMatchSpec {}
+
+// Spec for TrajectoryExactMatch instance.
+message TrajectoryExactMatchInstance {
+  // Required. Spec for predicted tool call trajectory.
+  optional Trajectory predicted_trajectory = 1
+      [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Spec for reference tool call trajectory.
+  optional Trajectory reference_trajectory = 2
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Results for TrajectoryExactMatch metric.
+message TrajectoryExactMatchResults {
+  // Output only. TrajectoryExactMatch metric values.
+  repeated TrajectoryExactMatchMetricValue
+      trajectory_exact_match_metric_values = 1
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// TrajectoryExactMatch metric value for an instance.
+message TrajectoryExactMatchMetricValue {
+  // Output only. TrajectoryExactMatch score.
+  optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// Instances and metric spec for TrajectoryInOrderMatch metric.
+message TrajectoryInOrderMatchInput {
+  // Required. Spec for TrajectoryInOrderMatch metric.
+  TrajectoryInOrderMatchSpec metric_spec = 1
+      [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Repeated TrajectoryInOrderMatch instance.
+  repeated TrajectoryInOrderMatchInstance instances = 2
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Spec for TrajectoryInOrderMatch metric - returns 1 if tool calls in the
+// reference trajectory appear in the predicted trajectory in the same order,
+// else 0.
+message TrajectoryInOrderMatchSpec {}
+
+// Spec for TrajectoryInOrderMatch instance.
+message TrajectoryInOrderMatchInstance {
+  // Required. Spec for predicted tool call trajectory.
+  optional Trajectory predicted_trajectory = 1
+      [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Spec for reference tool call trajectory.
+  optional Trajectory reference_trajectory = 2
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Results for TrajectoryInOrderMatch metric.
+message TrajectoryInOrderMatchResults {
+  // Output only. TrajectoryInOrderMatch metric values.
+  repeated TrajectoryInOrderMatchMetricValue
+      trajectory_in_order_match_metric_values = 1
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// TrajectoryInOrderMatch metric value for an instance.
+message TrajectoryInOrderMatchMetricValue {
+  // Output only. TrajectoryInOrderMatch score.
+  optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// Instances and metric spec for TrajectoryAnyOrderMatch metric.
+message TrajectoryAnyOrderMatchInput {
+  // Required. Spec for TrajectoryAnyOrderMatch metric.
+  TrajectoryAnyOrderMatchSpec metric_spec = 1
+      [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Repeated TrajectoryAnyOrderMatch instance.
+  repeated TrajectoryAnyOrderMatchInstance instances = 2
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Spec for TrajectoryAnyOrderMatch metric - returns 1 if all tool calls in the
+// reference trajectory appear in the predicted trajectory in any order, else
+// 0.
+message TrajectoryAnyOrderMatchSpec {}
+
+// Spec for TrajectoryAnyOrderMatch instance.
+message TrajectoryAnyOrderMatchInstance {
+  // Required. Spec for predicted tool call trajectory.
+  optional Trajectory predicted_trajectory = 1
+      [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Spec for reference tool call trajectory.
+  optional Trajectory reference_trajectory = 2
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Results for TrajectoryAnyOrderMatch metric.
+message TrajectoryAnyOrderMatchResults {
+  // Output only. TrajectoryAnyOrderMatch metric values.
+  repeated TrajectoryAnyOrderMatchMetricValue
+      trajectory_any_order_match_metric_values = 1
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// TrajectoryAnyOrderMatch metric value for an instance.
+message TrajectoryAnyOrderMatchMetricValue {
+  // Output only. TrajectoryAnyOrderMatch score.
+  optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// Instances and metric spec for TrajectoryPrecision metric.
+message TrajectoryPrecisionInput {
+  // Required. Spec for TrajectoryPrecision metric.
+  TrajectoryPrecisionSpec metric_spec = 1
+      [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Repeated TrajectoryPrecision instance.
+  repeated TrajectoryPrecisionInstance instances = 2
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Spec for TrajectoryPrecision metric - returns a float score based on average
+// precision of individual tool calls.
+message TrajectoryPrecisionSpec {}
+
+// Spec for TrajectoryPrecision instance.
+message TrajectoryPrecisionInstance {
+  // Required. Spec for predicted tool call trajectory.
+  optional Trajectory predicted_trajectory = 1
+      [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Spec for reference tool call trajectory.
+  optional Trajectory reference_trajectory = 2
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Results for TrajectoryPrecision metric.
+message TrajectoryPrecisionResults {
+  // Output only. TrajectoryPrecision metric values.
+  repeated TrajectoryPrecisionMetricValue trajectory_precision_metric_values = 1
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// TrajectoryPrecision metric value for an instance.
+message TrajectoryPrecisionMetricValue {
+  // Output only. TrajectoryPrecision score.
+  optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// Instances and metric spec for TrajectoryRecall metric.
+message TrajectoryRecallInput {
+  // Required. Spec for TrajectoryRecall metric.
+  TrajectoryRecallSpec metric_spec = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Repeated TrajectoryRecall instance.
+  repeated TrajectoryRecallInstance instances = 2
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Spec for TrajectoryRecall metric - returns a float score based on average
+// recall of individual tool calls.
+message TrajectoryRecallSpec {}
+
+// Spec for TrajectoryRecall instance.
+message TrajectoryRecallInstance {
+  // Required. Spec for predicted tool call trajectory.
+  optional Trajectory predicted_trajectory = 1
+      [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Spec for reference tool call trajectory.
+  optional Trajectory reference_trajectory = 2
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Results for TrajectoryRecall metric.
+message TrajectoryRecallResults {
+  // Output only. TrajectoryRecall metric values.
+  repeated TrajectoryRecallMetricValue trajectory_recall_metric_values = 1
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// TrajectoryRecall metric value for an instance.
+message TrajectoryRecallMetricValue {
+  // Output only. TrajectoryRecall score.
+  optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// Instances and metric spec for TrajectorySingleToolUse metric.
+message TrajectorySingleToolUseInput {
+  // Required. Spec for TrajectorySingleToolUse metric.
+  TrajectorySingleToolUseSpec metric_spec = 1
+      [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Repeated TrajectorySingleToolUse instance.
+  repeated TrajectorySingleToolUseInstance instances = 2
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Spec for TrajectorySingleToolUse metric - returns 1 if tool is present in the
+// predicted trajectory, else 0.
+message TrajectorySingleToolUseSpec {
+  // Required. Spec for tool name to be checked for in the predicted trajectory.
+  optional string tool_name = 1 [(google.api.field_behavior) = REQUIRED];
+}
+
+// Spec for TrajectorySingleToolUse instance.
+message TrajectorySingleToolUseInstance {
+  // Required. Spec for predicted tool call trajectory.
+  optional Trajectory predicted_trajectory = 1
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Results for TrajectorySingleToolUse metric.
+message TrajectorySingleToolUseResults {
+  // Output only. TrajectorySingleToolUse metric values.
+  repeated TrajectorySingleToolUseMetricValue
+      trajectory_single_tool_use_metric_values = 1
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// TrajectorySingleToolUse metric value for an instance.
+message TrajectorySingleToolUseMetricValue {
+  // Output only. TrajectorySingleToolUse score.
+  optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// Spec for trajectory.
+message Trajectory {
+  // Required. Tool calls in the trajectory.
+  repeated ToolCall tool_calls = 1 [(google.api.field_behavior) = REQUIRED];
+}
+
+// Spec for tool call.
+message ToolCall {
+  // Required. Spec for tool name
+  optional string tool_name = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. Spec for tool input
+  optional string tool_input = 2 [(google.api.field_behavior) = OPTIONAL];
+}