Skip to content

Commit 231f232

Browse files
Google APIscopybara-github
authored andcommitted
feat: Trajectory eval metrics added to evaluation service proto
docs: Fixed typo for field `use_strict_string_match` in message `.google.cloud.aiplatform.v1beta1.ToolParameterKVMatchSpec` PiperOrigin-RevId: 697705080
1 parent caa9439 commit 231f232

1 file changed

Lines changed: 290 additions & 1 deletion

File tree

google/cloud/aiplatform/v1beta1/evaluation_service.proto

Lines changed: 290 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,24 @@ message EvaluateInstancesRequest {
139139

140140
// Input for tool parameter key value match metric.
141141
ToolParameterKVMatchInput tool_parameter_kv_match_input = 22;
142+
143+
// Input for trajectory exact match metric.
144+
TrajectoryExactMatchInput trajectory_exact_match_input = 33;
145+
146+
// Input for trajectory in order match metric.
147+
TrajectoryInOrderMatchInput trajectory_in_order_match_input = 34;
148+
149+
// Input for trajectory match any order metric.
150+
TrajectoryAnyOrderMatchInput trajectory_any_order_match_input = 35;
151+
152+
// Input for trajectory precision metric.
153+
TrajectoryPrecisionInput trajectory_precision_input = 37;
154+
155+
// Input for trajectory recall metric.
156+
TrajectoryRecallInput trajectory_recall_input = 38;
157+
158+
// Input for trajectory single tool use metric.
159+
TrajectorySingleToolUseInput trajectory_single_tool_use_input = 39;
142160
}
143161

144162
// Required. The resource name of the Location to evaluate the instances.
@@ -235,6 +253,24 @@ message EvaluateInstancesResponse {
235253

236254
// Results for tool parameter key value match metric.
237255
ToolParameterKVMatchResults tool_parameter_kv_match_results = 21;
256+
257+
// Result for trajectory exact match metric.
258+
TrajectoryExactMatchResults trajectory_exact_match_results = 31;
259+
260+
// Result for trajectory in order match metric.
261+
TrajectoryInOrderMatchResults trajectory_in_order_match_results = 32;
262+
263+
// Result for trajectory any order match metric.
264+
TrajectoryAnyOrderMatchResults trajectory_any_order_match_results = 33;
265+
266+
// Result for trajectory precision metric.
267+
TrajectoryPrecisionResults trajectory_precision_results = 35;
268+
269+
// Results for trajectory recall metric.
270+
TrajectoryRecallResults trajectory_recall_results = 36;
271+
272+
// Results for trajectory single tool use metric.
273+
TrajectorySingleToolUseResults trajectory_single_tool_use_results = 37;
238274
}
239275
}
240276

@@ -1165,7 +1201,7 @@ message ToolParameterKVMatchInput {
11651201

11661202
// Spec for tool parameter key value match metric.
11671203
message ToolParameterKVMatchSpec {
1168-
// Optional. Whether to use STRCIT string match on parameter values.
1204+
// Optional. Whether to use STRICT string match on parameter values.
11691205
bool use_strict_string_match = 1 [(google.api.field_behavior) = OPTIONAL];
11701206
}
11711207

@@ -1191,3 +1227,256 @@ message ToolParameterKVMatchMetricValue {
11911227
// Output only. Tool parameter key value match score.
11921228
optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
11931229
}
1230+
1231+
// Instances and metric spec for TrajectoryExactMatch metric.
1232+
message TrajectoryExactMatchInput {
1233+
// Required. Spec for TrajectoryExactMatch metric.
1234+
TrajectoryExactMatchSpec metric_spec = 1
1235+
[(google.api.field_behavior) = REQUIRED];
1236+
1237+
// Required. Repeated TrajectoryExactMatch instance.
1238+
repeated TrajectoryExactMatchInstance instances = 2
1239+
[(google.api.field_behavior) = REQUIRED];
1240+
}
1241+
1242+
// Spec for TrajectoryExactMatch metric - returns 1 if tool calls in the
1243+
// reference trajectory exactly match the predicted trajectory, else 0.
1244+
message TrajectoryExactMatchSpec {}
1245+
1246+
// Spec for TrajectoryExactMatch instance.
1247+
message TrajectoryExactMatchInstance {
1248+
// Required. Spec for predicted tool call trajectory.
1249+
optional Trajectory predicted_trajectory = 1
1250+
[(google.api.field_behavior) = REQUIRED];
1251+
1252+
// Required. Spec for reference tool call trajectory.
1253+
optional Trajectory reference_trajectory = 2
1254+
[(google.api.field_behavior) = REQUIRED];
1255+
}
1256+
1257+
// Results for TrajectoryExactMatch metric.
1258+
message TrajectoryExactMatchResults {
1259+
// Output only. TrajectoryExactMatch metric values.
1260+
repeated TrajectoryExactMatchMetricValue
1261+
trajectory_exact_match_metric_values = 1
1262+
[(google.api.field_behavior) = OUTPUT_ONLY];
1263+
}
1264+
1265+
// TrajectoryExactMatch metric value for an instance.
1266+
message TrajectoryExactMatchMetricValue {
1267+
// Output only. TrajectoryExactMatch score.
1268+
optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1269+
}
1270+
1271+
// Instances and metric spec for TrajectoryInOrderMatch metric.
1272+
message TrajectoryInOrderMatchInput {
1273+
// Required. Spec for TrajectoryInOrderMatch metric.
1274+
TrajectoryInOrderMatchSpec metric_spec = 1
1275+
[(google.api.field_behavior) = REQUIRED];
1276+
1277+
// Required. Repeated TrajectoryInOrderMatch instance.
1278+
repeated TrajectoryInOrderMatchInstance instances = 2
1279+
[(google.api.field_behavior) = REQUIRED];
1280+
}
1281+
1282+
// Spec for TrajectoryInOrderMatch metric - returns 1 if tool calls in the
1283+
// reference trajectory appear in the predicted trajectory in the same order,
1284+
// else 0.
1285+
message TrajectoryInOrderMatchSpec {}
1286+
1287+
// Spec for TrajectoryInOrderMatch instance.
1288+
message TrajectoryInOrderMatchInstance {
1289+
// Required. Spec for predicted tool call trajectory.
1290+
optional Trajectory predicted_trajectory = 1
1291+
[(google.api.field_behavior) = REQUIRED];
1292+
1293+
// Required. Spec for reference tool call trajectory.
1294+
optional Trajectory reference_trajectory = 2
1295+
[(google.api.field_behavior) = REQUIRED];
1296+
}
1297+
1298+
// Results for TrajectoryInOrderMatch metric.
1299+
message TrajectoryInOrderMatchResults {
1300+
// Output only. TrajectoryInOrderMatch metric values.
1301+
repeated TrajectoryInOrderMatchMetricValue
1302+
trajectory_in_order_match_metric_values = 1
1303+
[(google.api.field_behavior) = OUTPUT_ONLY];
1304+
}
1305+
1306+
// TrajectoryInOrderMatch metric value for an instance.
1307+
message TrajectoryInOrderMatchMetricValue {
1308+
// Output only. TrajectoryInOrderMatch score.
1309+
optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1310+
}
1311+
1312+
// Instances and metric spec for TrajectoryAnyOrderMatch metric.
1313+
message TrajectoryAnyOrderMatchInput {
1314+
// Required. Spec for TrajectoryAnyOrderMatch metric.
1315+
TrajectoryAnyOrderMatchSpec metric_spec = 1
1316+
[(google.api.field_behavior) = REQUIRED];
1317+
1318+
// Required. Repeated TrajectoryAnyOrderMatch instance.
1319+
repeated TrajectoryAnyOrderMatchInstance instances = 2
1320+
[(google.api.field_behavior) = REQUIRED];
1321+
}
1322+
1323+
// Spec for TrajectoryAnyOrderMatch metric - returns 1 if all tool calls in the
1324+
// reference trajectory appear in the predicted trajectory in any order, else
1325+
// 0.
1326+
message TrajectoryAnyOrderMatchSpec {}
1327+
1328+
// Spec for TrajectoryAnyOrderMatch instance.
1329+
message TrajectoryAnyOrderMatchInstance {
1330+
// Required. Spec for predicted tool call trajectory.
1331+
optional Trajectory predicted_trajectory = 1
1332+
[(google.api.field_behavior) = REQUIRED];
1333+
1334+
// Required. Spec for reference tool call trajectory.
1335+
optional Trajectory reference_trajectory = 2
1336+
[(google.api.field_behavior) = REQUIRED];
1337+
}
1338+
1339+
// Results for TrajectoryAnyOrderMatch metric.
1340+
message TrajectoryAnyOrderMatchResults {
1341+
// Output only. TrajectoryAnyOrderMatch metric values.
1342+
repeated TrajectoryAnyOrderMatchMetricValue
1343+
trajectory_any_order_match_metric_values = 1
1344+
[(google.api.field_behavior) = OUTPUT_ONLY];
1345+
}
1346+
1347+
// TrajectoryAnyOrderMatch metric value for an instance.
1348+
message TrajectoryAnyOrderMatchMetricValue {
1349+
// Output only. TrajectoryAnyOrderMatch score.
1350+
optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1351+
}
1352+
1353+
// Instances and metric spec for TrajectoryPrecision metric.
1354+
message TrajectoryPrecisionInput {
1355+
// Required. Spec for TrajectoryPrecision metric.
1356+
TrajectoryPrecisionSpec metric_spec = 1
1357+
[(google.api.field_behavior) = REQUIRED];
1358+
1359+
// Required. Repeated TrajectoryPrecision instance.
1360+
repeated TrajectoryPrecisionInstance instances = 2
1361+
[(google.api.field_behavior) = REQUIRED];
1362+
}
1363+
1364+
// Spec for TrajectoryPrecision metric - returns a float score based on average
1365+
// precision of individual tool calls.
1366+
message TrajectoryPrecisionSpec {}
1367+
1368+
// Spec for TrajectoryPrecision instance.
1369+
message TrajectoryPrecisionInstance {
1370+
// Required. Spec for predicted tool call trajectory.
1371+
optional Trajectory predicted_trajectory = 1
1372+
[(google.api.field_behavior) = REQUIRED];
1373+
1374+
// Required. Spec for reference tool call trajectory.
1375+
optional Trajectory reference_trajectory = 2
1376+
[(google.api.field_behavior) = REQUIRED];
1377+
}
1378+
1379+
// Results for TrajectoryPrecision metric.
1380+
message TrajectoryPrecisionResults {
1381+
// Output only. TrajectoryPrecision metric values.
1382+
repeated TrajectoryPrecisionMetricValue trajectory_precision_metric_values = 1
1383+
[(google.api.field_behavior) = OUTPUT_ONLY];
1384+
}
1385+
1386+
// TrajectoryPrecision metric value for an instance.
1387+
message TrajectoryPrecisionMetricValue {
1388+
// Output only. TrajectoryPrecision score.
1389+
optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1390+
}
1391+
1392+
// Instances and metric spec for TrajectoryRecall metric.
1393+
message TrajectoryRecallInput {
1394+
// Required. Spec for TrajectoryRecall metric.
1395+
TrajectoryRecallSpec metric_spec = 1 [(google.api.field_behavior) = REQUIRED];
1396+
1397+
// Required. Repeated TrajectoryRecall instance.
1398+
repeated TrajectoryRecallInstance instances = 2
1399+
[(google.api.field_behavior) = REQUIRED];
1400+
}
1401+
1402+
// Spec for TrajectoryRecall metric - returns a float score based on average
1403+
// recall of individual tool calls.
1404+
message TrajectoryRecallSpec {}
1405+
1406+
// Spec for TrajectoryRecall instance.
1407+
message TrajectoryRecallInstance {
1408+
// Required. Spec for predicted tool call trajectory.
1409+
optional Trajectory predicted_trajectory = 1
1410+
[(google.api.field_behavior) = REQUIRED];
1411+
1412+
// Required. Spec for reference tool call trajectory.
1413+
optional Trajectory reference_trajectory = 2
1414+
[(google.api.field_behavior) = REQUIRED];
1415+
}
1416+
1417+
// Results for TrajectoryRecall metric.
1418+
message TrajectoryRecallResults {
1419+
// Output only. TrajectoryRecall metric values.
1420+
repeated TrajectoryRecallMetricValue trajectory_recall_metric_values = 1
1421+
[(google.api.field_behavior) = OUTPUT_ONLY];
1422+
}
1423+
1424+
// TrajectoryRecall metric value for an instance.
1425+
message TrajectoryRecallMetricValue {
1426+
// Output only. TrajectoryRecall score.
1427+
optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1428+
}
1429+
1430+
// Instances and metric spec for TrajectorySingleToolUse metric.
1431+
message TrajectorySingleToolUseInput {
1432+
// Required. Spec for TrajectorySingleToolUse metric.
1433+
TrajectorySingleToolUseSpec metric_spec = 1
1434+
[(google.api.field_behavior) = REQUIRED];
1435+
1436+
// Required. Repeated TrajectorySingleToolUse instance.
1437+
repeated TrajectorySingleToolUseInstance instances = 2
1438+
[(google.api.field_behavior) = REQUIRED];
1439+
}
1440+
1441+
// Spec for TrajectorySingleToolUse metric - returns 1 if tool is present in the
1442+
// predicted trajectory, else 0.
1443+
message TrajectorySingleToolUseSpec {
1444+
// Required. Spec for tool name to be checked for in the predicted trajectory.
1445+
optional string tool_name = 1 [(google.api.field_behavior) = REQUIRED];
1446+
}
1447+
1448+
// Spec for TrajectorySingleToolUse instance.
1449+
message TrajectorySingleToolUseInstance {
1450+
// Required. Spec for predicted tool call trajectory.
1451+
optional Trajectory predicted_trajectory = 1
1452+
[(google.api.field_behavior) = REQUIRED];
1453+
}
1454+
1455+
// Results for TrajectorySingleToolUse metric.
1456+
message TrajectorySingleToolUseResults {
1457+
// Output only. TrajectorySingleToolUse metric values.
1458+
repeated TrajectorySingleToolUseMetricValue
1459+
trajectory_single_tool_use_metric_values = 1
1460+
[(google.api.field_behavior) = OUTPUT_ONLY];
1461+
}
1462+
1463+
// TrajectorySingleToolUse metric value for an instance.
1464+
message TrajectorySingleToolUseMetricValue {
1465+
// Output only. TrajectorySingleToolUse score.
1466+
optional float score = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1467+
}
1468+
1469+
// Spec for trajectory.
1470+
message Trajectory {
1471+
// Required. Tool calls in the trajectory.
1472+
repeated ToolCall tool_calls = 1 [(google.api.field_behavior) = REQUIRED];
1473+
}
1474+
1475+
// Spec for tool call.
1476+
message ToolCall {
1477+
// Required. Spec for tool name
1478+
optional string tool_name = 1 [(google.api.field_behavior) = REQUIRED];
1479+
1480+
// Optional. Spec for tool input
1481+
optional string tool_input = 2 [(google.api.field_behavior) = OPTIONAL];
1482+
}

0 commit comments

Comments
 (0)