Skip to content

Commit be1425b

Browse files
GWealecopybara-github
authored andcommitted
fix: support non-Latin text in InMemoryMemoryService search
Merge #5504 Fixes #5501 ### Root Cause `_extract_words_lower` uses `re.findall(r'[A-Za-z]+', text)` which only matches ASCII letters. All non-Latin characters (Japanese, Chinese, Korean, Cyrillic, etc.) are silently discarded, making `search_memory` unable to match any non-Latin text. ### Fix Change the regex from `[A-Za-z]+` to `\w+` with `re.UNICODE` flag, which matches all Unicode word characters (letters, digits, underscore) across all scripts. Co-authored-by: George Weale <gweale@google.com> PiperOrigin-RevId: 930813808
1 parent ef395c7 commit be1425b

2 files changed

Lines changed: 30 additions & 1 deletion

File tree

src/google/adk/memory/in_memory_memory_service.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def _user_key(app_name: str, user_id: str) -> str:
3939

4040
def _extract_words_lower(text: str) -> set[str]:
4141
"""Extracts words from a string and converts them to lowercase."""
42-
return set([word.lower() for word in re.findall(r'[A-Za-z]+', text)])
42+
return set([word.lower() for word in re.findall(r'\w+', text, re.UNICODE)])
4343

4444

4545
class InMemoryMemoryService(BaseMemoryService):

tests/unittests/memory/test_in_memory_memory_service.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,3 +327,32 @@ async def test_search_memory_is_scoped_by_user():
327327
assert (
328328
result_other_user.memories[0].content.parts[0].text == 'This is a secret.'
329329
)
330+
331+
332+
@pytest.mark.asyncio
333+
async def test_search_memory_matches_non_latin_text():
334+
"""Tests that search matches non-Latin (e.g. Cyrillic) text."""
335+
memory_service = InMemoryMemoryService()
336+
session = Session(
337+
app_name=MOCK_APP_NAME,
338+
user_id=MOCK_USER_ID,
339+
id='session-non-latin',
340+
last_update_time=5000,
341+
events=[
342+
Event(
343+
id='event-non-latin',
344+
invocation_id='inv-non-latin',
345+
author='user',
346+
timestamp=70000,
347+
content=types.Content(parts=[types.Part(text='Привет мир')]),
348+
),
349+
],
350+
)
351+
await memory_service.add_session_to_memory(session)
352+
353+
result = await memory_service.search_memory(
354+
app_name=MOCK_APP_NAME, user_id=MOCK_USER_ID, query='привет'
355+
)
356+
357+
assert len(result.memories) == 1
358+
assert result.memories[0].content.parts[0].text == 'Привет мир'

0 commit comments

Comments
 (0)