@@ -139,6 +139,24 @@ message EvaluateInstancesRequest {
139139
140140 // Input for tool parameter key value match metric.
141141 ToolParameterKVMatchInput tool_parameter_kv_match_input = 22 ;
142+
143+ // Input for trajectory exact match metric.
144+ TrajectoryExactMatchInput trajectory_exact_match_input = 33 ;
145+
146+ // Input for trajectory in order match metric.
147+ TrajectoryInOrderMatchInput trajectory_in_order_match_input = 34 ;
148+
149+ // Input for trajectory match any order metric.
150+ TrajectoryAnyOrderMatchInput trajectory_any_order_match_input = 35 ;
151+
152+ // Input for trajectory precision metric.
153+ TrajectoryPrecisionInput trajectory_precision_input = 37 ;
154+
155+ // Input for trajectory recall metric.
156+ TrajectoryRecallInput trajectory_recall_input = 38 ;
157+
158+ // Input for trajectory single tool use metric.
159+ TrajectorySingleToolUseInput trajectory_single_tool_use_input = 39 ;
142160 }
143161
144162 // Required. The resource name of the Location to evaluate the instances.
@@ -235,6 +253,24 @@ message EvaluateInstancesResponse {
235253
236254 // Results for tool parameter key value match metric.
237255 ToolParameterKVMatchResults tool_parameter_kv_match_results = 21 ;
256+
257+ // Result for trajectory exact match metric.
258+ TrajectoryExactMatchResults trajectory_exact_match_results = 31 ;
259+
260+ // Result for trajectory in order match metric.
261+ TrajectoryInOrderMatchResults trajectory_in_order_match_results = 32 ;
262+
263+ // Result for trajectory any order match metric.
264+ TrajectoryAnyOrderMatchResults trajectory_any_order_match_results = 33 ;
265+
266+ // Result for trajectory precision metric.
267+ TrajectoryPrecisionResults trajectory_precision_results = 35 ;
268+
269+ // Results for trajectory recall metric.
270+ TrajectoryRecallResults trajectory_recall_results = 36 ;
271+
272+ // Results for trajectory single tool use metric.
273+ TrajectorySingleToolUseResults trajectory_single_tool_use_results = 37 ;
238274 }
239275}
240276
@@ -1165,7 +1201,7 @@ message ToolParameterKVMatchInput {
11651201
11661202// Spec for tool parameter key value match metric.
11671203message ToolParameterKVMatchSpec {
1168- // Optional. Whether to use STRCIT string match on parameter values.
1204+ // Optional. Whether to use STRICT string match on parameter values.
11691205 bool use_strict_string_match = 1 [(google.api.field_behavior ) = OPTIONAL ];
11701206}
11711207
@@ -1191,3 +1227,256 @@ message ToolParameterKVMatchMetricValue {
11911227 // Output only. Tool parameter key value match score.
11921228 optional float score = 1 [(google.api.field_behavior ) = OUTPUT_ONLY ];
11931229}
1230+
1231+ // Instances and metric spec for TrajectoryExactMatch metric.
1232+ message TrajectoryExactMatchInput {
1233+ // Required. Spec for TrajectoryExactMatch metric.
1234+ TrajectoryExactMatchSpec metric_spec = 1
1235+ [(google.api.field_behavior ) = REQUIRED ];
1236+
1237+ // Required. Repeated TrajectoryExactMatch instance.
1238+ repeated TrajectoryExactMatchInstance instances = 2
1239+ [(google.api.field_behavior ) = REQUIRED ];
1240+ }
1241+
1242+ // Spec for TrajectoryExactMatch metric - returns 1 if tool calls in the
1243+ // reference trajectory exactly match the predicted trajectory, else 0.
1244+ message TrajectoryExactMatchSpec {}
1245+
1246+ // Spec for TrajectoryExactMatch instance.
1247+ message TrajectoryExactMatchInstance {
1248+ // Required. Spec for predicted tool call trajectory.
1249+ optional Trajectory predicted_trajectory = 1
1250+ [(google.api.field_behavior ) = REQUIRED ];
1251+
1252+ // Required. Spec for reference tool call trajectory.
1253+ optional Trajectory reference_trajectory = 2
1254+ [(google.api.field_behavior ) = REQUIRED ];
1255+ }
1256+
1257+ // Results for TrajectoryExactMatch metric.
1258+ message TrajectoryExactMatchResults {
1259+ // Output only. TrajectoryExactMatch metric values.
1260+ repeated TrajectoryExactMatchMetricValue
1261+ trajectory_exact_match_metric_values = 1
1262+ [(google.api.field_behavior) = OUTPUT_ONLY];
1263+ }
1264+
1265+ // TrajectoryExactMatch metric value for an instance.
1266+ message TrajectoryExactMatchMetricValue {
1267+ // Output only. TrajectoryExactMatch score.
1268+ optional float score = 1 [(google.api.field_behavior ) = OUTPUT_ONLY ];
1269+ }
1270+
1271+ // Instances and metric spec for TrajectoryInOrderMatch metric.
1272+ message TrajectoryInOrderMatchInput {
1273+ // Required. Spec for TrajectoryInOrderMatch metric.
1274+ TrajectoryInOrderMatchSpec metric_spec = 1
1275+ [(google.api.field_behavior ) = REQUIRED ];
1276+
1277+ // Required. Repeated TrajectoryInOrderMatch instance.
1278+ repeated TrajectoryInOrderMatchInstance instances = 2
1279+ [(google.api.field_behavior ) = REQUIRED ];
1280+ }
1281+
1282+ // Spec for TrajectoryInOrderMatch metric - returns 1 if tool calls in the
1283+ // reference trajectory appear in the predicted trajectory in the same order,
1284+ // else 0.
1285+ message TrajectoryInOrderMatchSpec {}
1286+
1287+ // Spec for TrajectoryInOrderMatch instance.
1288+ message TrajectoryInOrderMatchInstance {
1289+ // Required. Spec for predicted tool call trajectory.
1290+ optional Trajectory predicted_trajectory = 1
1291+ [(google.api.field_behavior ) = REQUIRED ];
1292+
1293+ // Required. Spec for reference tool call trajectory.
1294+ optional Trajectory reference_trajectory = 2
1295+ [(google.api.field_behavior ) = REQUIRED ];
1296+ }
1297+
1298+ // Results for TrajectoryInOrderMatch metric.
1299+ message TrajectoryInOrderMatchResults {
1300+ // Output only. TrajectoryInOrderMatch metric values.
1301+ repeated TrajectoryInOrderMatchMetricValue
1302+ trajectory_in_order_match_metric_values = 1
1303+ [(google.api.field_behavior) = OUTPUT_ONLY];
1304+ }
1305+
1306+ // TrajectoryInOrderMatch metric value for an instance.
1307+ message TrajectoryInOrderMatchMetricValue {
1308+ // Output only. TrajectoryInOrderMatch score.
1309+ optional float score = 1 [(google.api.field_behavior ) = OUTPUT_ONLY ];
1310+ }
1311+
1312+ // Instances and metric spec for TrajectoryAnyOrderMatch metric.
1313+ message TrajectoryAnyOrderMatchInput {
1314+ // Required. Spec for TrajectoryAnyOrderMatch metric.
1315+ TrajectoryAnyOrderMatchSpec metric_spec = 1
1316+ [(google.api.field_behavior ) = REQUIRED ];
1317+
1318+ // Required. Repeated TrajectoryAnyOrderMatch instance.
1319+ repeated TrajectoryAnyOrderMatchInstance instances = 2
1320+ [(google.api.field_behavior ) = REQUIRED ];
1321+ }
1322+
1323+ // Spec for TrajectoryAnyOrderMatch metric - returns 1 if all tool calls in the
1324+ // reference trajectory appear in the predicted trajectory in any order, else
1325+ // 0.
1326+ message TrajectoryAnyOrderMatchSpec {}
1327+
1328+ // Spec for TrajectoryAnyOrderMatch instance.
1329+ message TrajectoryAnyOrderMatchInstance {
1330+ // Required. Spec for predicted tool call trajectory.
1331+ optional Trajectory predicted_trajectory = 1
1332+ [(google.api.field_behavior ) = REQUIRED ];
1333+
1334+ // Required. Spec for reference tool call trajectory.
1335+ optional Trajectory reference_trajectory = 2
1336+ [(google.api.field_behavior ) = REQUIRED ];
1337+ }
1338+
1339+ // Results for TrajectoryAnyOrderMatch metric.
1340+ message TrajectoryAnyOrderMatchResults {
1341+ // Output only. TrajectoryAnyOrderMatch metric values.
1342+ repeated TrajectoryAnyOrderMatchMetricValue
1343+ trajectory_any_order_match_metric_values = 1
1344+ [(google.api.field_behavior) = OUTPUT_ONLY];
1345+ }
1346+
1347+ // TrajectoryAnyOrderMatch metric value for an instance.
1348+ message TrajectoryAnyOrderMatchMetricValue {
1349+ // Output only. TrajectoryAnyOrderMatch score.
1350+ optional float score = 1 [(google.api.field_behavior ) = OUTPUT_ONLY ];
1351+ }
1352+
1353+ // Instances and metric spec for TrajectoryPrecision metric.
1354+ message TrajectoryPrecisionInput {
1355+ // Required. Spec for TrajectoryPrecision metric.
1356+ TrajectoryPrecisionSpec metric_spec = 1
1357+ [(google.api.field_behavior ) = REQUIRED ];
1358+
1359+ // Required. Repeated TrajectoryPrecision instance.
1360+ repeated TrajectoryPrecisionInstance instances = 2
1361+ [(google.api.field_behavior ) = REQUIRED ];
1362+ }
1363+
1364+ // Spec for TrajectoryPrecision metric - returns a float score based on average
1365+ // precision of individual tool calls.
1366+ message TrajectoryPrecisionSpec {}
1367+
1368+ // Spec for TrajectoryPrecision instance.
1369+ message TrajectoryPrecisionInstance {
1370+ // Required. Spec for predicted tool call trajectory.
1371+ optional Trajectory predicted_trajectory = 1
1372+ [(google.api.field_behavior ) = REQUIRED ];
1373+
1374+ // Required. Spec for reference tool call trajectory.
1375+ optional Trajectory reference_trajectory = 2
1376+ [(google.api.field_behavior ) = REQUIRED ];
1377+ }
1378+
1379+ // Results for TrajectoryPrecision metric.
1380+ message TrajectoryPrecisionResults {
1381+ // Output only. TrajectoryPrecision metric values.
1382+ repeated TrajectoryPrecisionMetricValue trajectory_precision_metric_values = 1
1383+ [(google.api.field_behavior ) = OUTPUT_ONLY ];
1384+ }
1385+
1386+ // TrajectoryPrecision metric value for an instance.
1387+ message TrajectoryPrecisionMetricValue {
1388+ // Output only. TrajectoryPrecision score.
1389+ optional float score = 1 [(google.api.field_behavior ) = OUTPUT_ONLY ];
1390+ }
1391+
1392+ // Instances and metric spec for TrajectoryRecall metric.
1393+ message TrajectoryRecallInput {
1394+ // Required. Spec for TrajectoryRecall metric.
1395+ TrajectoryRecallSpec metric_spec = 1 [(google.api.field_behavior ) = REQUIRED ];
1396+
1397+ // Required. Repeated TrajectoryRecall instance.
1398+ repeated TrajectoryRecallInstance instances = 2
1399+ [(google.api.field_behavior ) = REQUIRED ];
1400+ }
1401+
1402+ // Spec for TrajectoryRecall metric - returns a float score based on average
1403+ // recall of individual tool calls.
1404+ message TrajectoryRecallSpec {}
1405+
1406+ // Spec for TrajectoryRecall instance.
1407+ message TrajectoryRecallInstance {
1408+ // Required. Spec for predicted tool call trajectory.
1409+ optional Trajectory predicted_trajectory = 1
1410+ [(google.api.field_behavior ) = REQUIRED ];
1411+
1412+ // Required. Spec for reference tool call trajectory.
1413+ optional Trajectory reference_trajectory = 2
1414+ [(google.api.field_behavior ) = REQUIRED ];
1415+ }
1416+
1417+ // Results for TrajectoryRecall metric.
1418+ message TrajectoryRecallResults {
1419+ // Output only. TrajectoryRecall metric values.
1420+ repeated TrajectoryRecallMetricValue trajectory_recall_metric_values = 1
1421+ [(google.api.field_behavior ) = OUTPUT_ONLY ];
1422+ }
1423+
1424+ // TrajectoryRecall metric value for an instance.
1425+ message TrajectoryRecallMetricValue {
1426+ // Output only. TrajectoryRecall score.
1427+ optional float score = 1 [(google.api.field_behavior ) = OUTPUT_ONLY ];
1428+ }
1429+
1430+ // Instances and metric spec for TrajectorySingleToolUse metric.
1431+ message TrajectorySingleToolUseInput {
1432+ // Required. Spec for TrajectorySingleToolUse metric.
1433+ TrajectorySingleToolUseSpec metric_spec = 1
1434+ [(google.api.field_behavior ) = REQUIRED ];
1435+
1436+ // Required. Repeated TrajectorySingleToolUse instance.
1437+ repeated TrajectorySingleToolUseInstance instances = 2
1438+ [(google.api.field_behavior ) = REQUIRED ];
1439+ }
1440+
1441+ // Spec for TrajectorySingleToolUse metric - returns 1 if tool is present in the
1442+ // predicted trajectory, else 0.
1443+ message TrajectorySingleToolUseSpec {
1444+ // Required. Spec for tool name to be checked for in the predicted trajectory.
1445+ optional string tool_name = 1 [(google.api.field_behavior ) = REQUIRED ];
1446+ }
1447+
1448+ // Spec for TrajectorySingleToolUse instance.
1449+ message TrajectorySingleToolUseInstance {
1450+ // Required. Spec for predicted tool call trajectory.
1451+ optional Trajectory predicted_trajectory = 1
1452+ [(google.api.field_behavior ) = REQUIRED ];
1453+ }
1454+
1455+ // Results for TrajectorySingleToolUse metric.
1456+ message TrajectorySingleToolUseResults {
1457+ // Output only. TrajectorySingleToolUse metric values.
1458+ repeated TrajectorySingleToolUseMetricValue
1459+ trajectory_single_tool_use_metric_values = 1
1460+ [(google.api.field_behavior) = OUTPUT_ONLY];
1461+ }
1462+
1463+ // TrajectorySingleToolUse metric value for an instance.
1464+ message TrajectorySingleToolUseMetricValue {
1465+ // Output only. TrajectorySingleToolUse score.
1466+ optional float score = 1 [(google.api.field_behavior ) = OUTPUT_ONLY ];
1467+ }
1468+
1469+ // Spec for trajectory.
1470+ message Trajectory {
1471+ // Required. Tool calls in the trajectory.
1472+ repeated ToolCall tool_calls = 1 [(google.api.field_behavior ) = REQUIRED ];
1473+ }
1474+
1475+ // Spec for tool call.
1476+ message ToolCall {
1477+ // Required. Spec for tool name
1478+ optional string tool_name = 1 [(google.api.field_behavior ) = REQUIRED ];
1479+
1480+ // Optional. Spec for tool input
1481+ optional string tool_input = 2 [(google.api.field_behavior ) = OPTIONAL ];
1482+ }
0 commit comments