fix: support non-Latin text in InMemoryMemoryService search

GWeale · copybara-github · commit be1425b75513 · 2026-06-11T17:00:26.000-07:00
Merge #5504 Fixes #5501 ### Root Cause `_extract_words_lower` uses `re.findall(r'[A-Za-z]+', text)` which only matches ASCII letters. All non-Latin characters (Japanese, Chinese, Korean, Cyrillic, etc.) are silently discarded, making `search_memory` unable to match any non-Latin text. ### Fix Change the regex from `[A-Za-z]+` to `\w+` with `re.UNICODE` flag, which matches all Unicode word characters (letters, digits, underscore) across all scripts. Co-authored-by: George Weale <gweale@google.com> PiperOrigin-RevId: 930813808
diff --git a/src/google/adk/memory/in_memory_memory_service.py b/src/google/adk/memory/in_memory_memory_service.py
@@ -39,7 +39,7 @@ def _user_key(app_name: str, user_id: str) -> str:
 
 def _extract_words_lower(text: str) -> set[str]:
   """Extracts words from a string and converts them to lowercase."""
-  return set([word.lower() for word in re.findall(r'[A-Za-z]+', text)])
+  return set([word.lower() for word in re.findall(r'\w+', text, re.UNICODE)])
 
 
 class InMemoryMemoryService(BaseMemoryService):
diff --git a/tests/unittests/memory/test_in_memory_memory_service.py b/tests/unittests/memory/test_in_memory_memory_service.py
@@ -327,3 +327,32 @@ async def test_search_memory_is_scoped_by_user():
   assert (
       result_other_user.memories[0].content.parts[0].text == 'This is a secret.'
   )
+
+
+@pytest.mark.asyncio
+async def test_search_memory_matches_non_latin_text():
+  """Tests that search matches non-Latin (e.g. Cyrillic) text."""
+  memory_service = InMemoryMemoryService()
+  session = Session(
+      app_name=MOCK_APP_NAME,
+      user_id=MOCK_USER_ID,
+      id='session-non-latin',
+      last_update_time=5000,
+      events=[
+          Event(
+              id='event-non-latin',
+              invocation_id='inv-non-latin',
+              author='user',
+              timestamp=70000,
+              content=types.Content(parts=[types.Part(text='Привет мир')]),
+          ),
+      ],
+  )
+  await memory_service.add_session_to_memory(session)
+
+  result = await memory_service.search_memory(
+      app_name=MOCK_APP_NAME, user_id=MOCK_USER_ID, query='привет'
+  )
+
+  assert len(result.memories) == 1
+  assert result.memories[0].content.parts[0].text == 'Привет мир'