-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Expand file tree
/
Copy pathtest_tools_e2e.py
More file actions
356 lines (287 loc) · 14.2 KB
/
test_tools_e2e.py
File metadata and controls
356 lines (287 loc) · 14.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
"""E2E Tools Tests"""
import os
import pytest
from pydantic import BaseModel, Field
from copilot import define_tool
from copilot.generated.rpc import PermissionDecisionApproveOnce, PermissionDecisionReject
from copilot.session import PermissionHandler, PermissionNoResult
from copilot.tools import Tool, ToolInvocation, ToolResult
from .testharness import E2ETestContext, get_final_assistant_message
pytestmark = pytest.mark.asyncio(loop_scope="module")
class TestTools:
async def test_invokes_built_in_tools(self, ctx: E2ETestContext):
readme_path = os.path.join(ctx.work_dir, "README.md")
with open(readme_path, "w") as f:
f.write("# ELIZA, the only chatbot you'll ever need")
session = await ctx.client.create_session(
on_permission_request=PermissionHandler.approve_all
)
await session.send("What's the first line of README.md in this directory?")
assistant_message = await get_final_assistant_message(session)
assert "ELIZA" in assistant_message.data.content
async def test_invokes_custom_tool(self, ctx: E2ETestContext):
class EncryptParams(BaseModel):
input: str = Field(description="String to encrypt")
@define_tool("encrypt_string", description="Encrypts a string")
def encrypt_string(params: EncryptParams, invocation: ToolInvocation) -> str:
return params.input.upper()
session = await ctx.client.create_session(
on_permission_request=PermissionHandler.approve_all, tools=[encrypt_string]
)
await session.send("Use encrypt_string to encrypt this string: Hello")
assistant_message = await get_final_assistant_message(session)
assert "HELLO" in assistant_message.data.content
async def test_handles_tool_calling_errors(self, ctx: E2ETestContext):
@define_tool("get_user_location", description="Gets the user's location")
def get_user_location() -> str:
raise Exception("Melbourne")
session = await ctx.client.create_session(
on_permission_request=PermissionHandler.approve_all, tools=[get_user_location]
)
await session.send("What is my location? If you can't find out, just say 'unknown'.")
answer = await get_final_assistant_message(session)
# Check the underlying traffic
traffic = await ctx.get_exchanges()
last_conversation = traffic[-1]
tool_calls = []
for msg in last_conversation["request"]["messages"]:
if msg.get("role") == "assistant" and "tool_calls" in msg:
tool_calls.extend(msg["tool_calls"])
assert len(tool_calls) == 1
tool_call = tool_calls[0]
assert tool_call["type"] == "function"
assert tool_call["function"]["name"] == "get_user_location"
tool_results = [
msg for msg in last_conversation["request"]["messages"] if msg.get("role") == "tool"
]
assert len(tool_results) == 1
tool_result = tool_results[0]
assert tool_result["tool_call_id"] == tool_call["id"]
# The error message "Melbourne" should NOT be exposed to the LLM
assert "Melbourne" not in tool_result["content"]
# The assistant should not see the exception information
assert "Melbourne" not in (answer.data.content or "")
assert "unknown" in (answer.data.content or "").lower()
async def test_can_receive_and_return_complex_types(self, ctx: E2ETestContext):
class DbQuery(BaseModel):
table: str
ids: list[int]
sortAscending: bool
class DbQueryParams(BaseModel):
query: DbQuery
class City(BaseModel):
countryId: int
cityName: str
population: int
expected_session_id = None
@define_tool("db_query", description="Performs a database query")
def db_query(params: DbQueryParams, invocation: ToolInvocation) -> list[City]:
assert params.query.table == "cities"
assert params.query.ids == [12, 19]
assert params.query.sortAscending is True
assert invocation.session_id == expected_session_id
return [
City(countryId=19, cityName="Passos", population=135460),
City(countryId=12, cityName="San Lorenzo", population=204356),
]
session = await ctx.client.create_session(
on_permission_request=PermissionHandler.approve_all, tools=[db_query]
)
expected_session_id = session.session_id
await session.send(
"Perform a DB query for the 'cities' table using IDs 12 and 19, "
"sorting ascending. Reply only with lines of the form: [cityname] [population]"
)
assistant_message = await get_final_assistant_message(session)
response_content = assistant_message.data.content or ""
assert response_content != ""
assert "Passos" in response_content
assert "San Lorenzo" in response_content
assert "135460" in response_content.replace(",", "")
assert "204356" in response_content.replace(",", "")
async def test_skippermission_sent_in_tool_definition(self, ctx: E2ETestContext):
class LookupParams(BaseModel):
id: str = Field(description="ID to look up")
@define_tool(
"safe_lookup",
description="A safe lookup that skips permission",
skip_permission=True,
)
def safe_lookup(params: LookupParams, invocation: ToolInvocation) -> str:
return f"RESULT: {params.id}"
did_run_permission_request = False
def tracking_handler(request, invocation):
nonlocal did_run_permission_request
did_run_permission_request = True
return PermissionNoResult()
session = await ctx.client.create_session(
on_permission_request=tracking_handler, tools=[safe_lookup]
)
await session.send("Use safe_lookup to look up 'test123'")
assistant_message = await get_final_assistant_message(session)
assert "RESULT: test123" in assistant_message.data.content
assert not did_run_permission_request
async def test_overrides_built_in_tool_with_custom_tool(self, ctx: E2ETestContext):
class GrepParams(BaseModel):
query: str = Field(description="Search query")
@define_tool(
"grep",
description="A custom grep implementation that overrides the built-in",
overrides_built_in_tool=True,
)
def custom_grep(params: GrepParams, invocation: ToolInvocation) -> str:
return f"CUSTOM_GREP_RESULT: {params.query}"
session = await ctx.client.create_session(
on_permission_request=PermissionHandler.approve_all, tools=[custom_grep]
)
await session.send("Use grep to search for the word 'hello'")
assistant_message = await get_final_assistant_message(session)
assert "CUSTOM_GREP_RESULT" in assistant_message.data.content
async def test_invokes_custom_tool_with_permission_handler(self, ctx: E2ETestContext):
class EncryptParams(BaseModel):
input: str = Field(description="String to encrypt")
@define_tool("encrypt_string", description="Encrypts a string")
def encrypt_string(params: EncryptParams, invocation: ToolInvocation) -> str:
return params.input.upper()
permission_requests = []
def on_permission_request(request, invocation):
permission_requests.append(request)
return PermissionDecisionApproveOnce()
session = await ctx.client.create_session(
on_permission_request=on_permission_request, tools=[encrypt_string]
)
await session.send("Use encrypt_string to encrypt this string: Hello")
assistant_message = await get_final_assistant_message(session)
assert "HELLO" in assistant_message.data.content
# Should have received a custom-tool permission request
custom_tool_requests = [r for r in permission_requests if r.kind == "custom-tool"]
assert len(custom_tool_requests) > 0
assert custom_tool_requests[0].tool_name == "encrypt_string"
async def test_denies_custom_tool_when_permission_denied(self, ctx: E2ETestContext):
tool_handler_called = False
class EncryptParams(BaseModel):
input: str = Field(description="String to encrypt")
@define_tool("encrypt_string", description="Encrypts a string")
def encrypt_string(params: EncryptParams, invocation: ToolInvocation) -> str:
nonlocal tool_handler_called
tool_handler_called = True
return params.input.upper()
def on_permission_request(request, invocation):
return PermissionDecisionReject()
session = await ctx.client.create_session(
on_permission_request=on_permission_request, tools=[encrypt_string]
)
await session.send("Use encrypt_string to encrypt this string: Hello")
await get_final_assistant_message(session)
# The tool handler should NOT have been called since permission was denied
assert not tool_handler_called
async def test_should_execute_multiple_custom_tools_in_parallel_single_turn(
self, ctx: E2ETestContext
):
"""Multiple custom tools invoked in parallel in the same turn."""
import asyncio
city_called: asyncio.Future = asyncio.get_event_loop().create_future()
country_called: asyncio.Future = asyncio.get_event_loop().create_future()
def lookup_city(invocation: ToolInvocation) -> ToolResult:
city = (invocation.arguments or {}).get("city", "")
if not city_called.done():
city_called.set_result(city)
return ToolResult(text_result_for_llm=f"CITY_{city.upper()}")
def lookup_country(invocation: ToolInvocation) -> ToolResult:
country = (invocation.arguments or {}).get("country", "")
if not country_called.done():
country_called.set_result(country)
return ToolResult(text_result_for_llm=f"COUNTRY_{country.upper()}")
session = await ctx.client.create_session(
on_permission_request=PermissionHandler.approve_all,
tools=[
Tool(
name="lookup_city",
description="Looks up city information",
parameters={
"type": "object",
"properties": {"city": {"type": "string", "description": "City name"}},
"required": ["city"],
},
handler=lookup_city,
),
Tool(
name="lookup_country",
description="Looks up country information",
parameters={
"type": "object",
"properties": {
"country": {"type": "string", "description": "Country name"}
},
"required": ["country"],
},
handler=lookup_country,
),
],
)
try:
await session.send(
"Use lookup_city with 'Paris' and lookup_country with 'France' at the same time,"
" then combine both results in your reply."
)
city_result = await asyncio.wait_for(city_called, timeout=60.0)
country_result = await asyncio.wait_for(country_called, timeout=60.0)
assert city_result == "Paris"
assert country_result == "France"
assistant_message = await get_final_assistant_message(session, timeout=60.0)
assert assistant_message is not None
content = assistant_message.data.content or ""
assert "CITY_PARIS" in content
assert "COUNTRY_FRANCE" in content
finally:
await session.disconnect()
async def test_should_respect_availabletools_and_excludedtools_combined(
self, ctx: E2ETestContext
):
"""excluded_tools takes precedence over available_tools."""
excluded_tool_called = False
def allowed_handler(invocation: ToolInvocation) -> ToolResult:
input_val = (invocation.arguments or {}).get("input", "")
return ToolResult(text_result_for_llm=f"ALLOWED_{input_val.upper()}")
def excluded_handler(invocation: ToolInvocation) -> ToolResult:
nonlocal excluded_tool_called
excluded_tool_called = True
input_val = (invocation.arguments or {}).get("input", "")
return ToolResult(text_result_for_llm=f"EXCLUDED_{input_val.upper()}")
session = await ctx.client.create_session(
on_permission_request=PermissionHandler.approve_all,
tools=[
Tool(
name="allowed_tool",
description="An allowed tool",
parameters={
"type": "object",
"properties": {"input": {"type": "string", "description": "Input value"}},
"required": ["input"],
},
handler=allowed_handler,
),
Tool(
name="excluded_tool",
description="A tool that should be excluded",
parameters={
"type": "object",
"properties": {"input": {"type": "string", "description": "Input value"}},
"required": ["input"],
},
handler=excluded_handler,
),
],
available_tools=["allowed_tool", "excluded_tool"],
excluded_tools=["excluded_tool"],
)
try:
result = await session.send_and_wait(
"Use the allowed_tool with input 'test'. Do NOT use excluded_tool.",
timeout=60.0,
)
assert result is not None
assert "ALLOWED_TEST" in (result.data.content or "")
assert not excluded_tool_called, "Excluded tool should not have been called"
finally:
await session.disconnect()