From 05a592a4ea1b840ab7b8e96aff8c3400aa8632fb Mon Sep 17 00:00:00 2001 From: alighazi288 <51366992+alighazi288@users.noreply.github.com> Date: Mon, 11 May 2026 13:23:46 -0400 Subject: [PATCH 1/2] feat(api): restrict field/property names to a portable, readable rule (#1383) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the validation rule requested in #1383 for user-supplied field/property names: - starts with a lowercase letter, then [a-z0-9_] - max 32 characters - leading underscore reserved for system use Adds `_is_valid_field_name` and `SafeFieldName` in `common/api/spec.py`, alongside the existing `_is_valid_name` / `SafeId` (which validates identifiers — kept permissive). The new rule is applied to user-supplied field/property names on input/spec models. Response/entity models stay permissive so legacy records continue to deserialize, and lookup-by-name fields (`*.category_name` on `AddFeatureSpec`, `UpdateFeatureSpec`, `DisableSemanticCategorySpec`) are kept permissive — those resolve against existing categories at `semantic_memory.py:299`, so tightening would block operations on legacy categories. --- packages/client/client_tests/test_memory.py | 4 +- .../common/src/memmachine_common/api/spec.py | 81 +++++++++++---- .../server/api_v2/test_router.py | 6 +- .../server/api_v2/test_spec.py | 98 +++++++++++++++++++ 4 files changed, 168 insertions(+), 21 deletions(-) diff --git a/packages/client/client_tests/test_memory.py b/packages/client/client_tests/test_memory.py index 906711ac3..328a74b31 100644 --- a/packages/client/client_tests/test_memory.py +++ b/packages/client/client_tests/test_memory.py @@ -1449,7 +1449,7 @@ def test_create_semantic_set_type_success(self, mock_client): result = memory.create_semantic_set_type( metadata_tags=["user_id", "session_id"], is_org_level=False, - name="User Sessions", + name="user_sessions", description="Set type for user sessions", ) @@ -1463,7 +1463,7 @@ def test_create_semantic_set_type_success(self, mock_client): assert json_data["project_id"] == "test_project" assert json_data["metadata_tags"] == ["user_id", "session_id"] assert json_data["is_org_level"] is False - assert json_data["name"] == "User Sessions" + assert json_data["name"] == "user_sessions" assert json_data["description"] == "Set type for user sessions" def test_create_semantic_set_type_minimal(self, mock_client): diff --git a/packages/common/src/memmachine_common/api/spec.py b/packages/common/src/memmachine_common/api/spec.py index 3863c6fb8..c2eb2337a 100644 --- a/packages/common/src/memmachine_common/api/spec.py +++ b/packages/common/src/memmachine_common/api/spec.py @@ -161,10 +161,47 @@ def _validate_int_compatible(v: str) -> str: return v +# Stricter rule for user-supplied field/property names (issue #1383), +# distinct from `_is_valid_name`/`SafeId` which validates *identifiers* +# (org_id, project_id, UIDs) and stays permissive for backward compat. +MAX_FIELD_NAME_LENGTH = 32 +RESERVED_FIELD_NAME_PREFIX = "_" +_FIELD_NAME_REGEX = regex.compile(r"^[a-z][a-z0-9_]*$") + + +def _is_valid_field_name(v: str) -> str: + """Validate a field/property name against the rule from issue #1383. + + - 1-32 characters + - starts with a lowercase letter, then ``[a-z0-9_]`` + - leading underscore reserved for system use + + Three checks instead of one combined regex so the error message names + the specific rule that was violated. + """ + if not v or len(v) > MAX_FIELD_NAME_LENGTH: + raise InvalidNameError( + f"Field/property name must be 1-{MAX_FIELD_NAME_LENGTH} chars, " + f"got {len(v)}: '{v}'", + ) + if v.startswith(RESERVED_FIELD_NAME_PREFIX): + raise InvalidNameError( + "Field/property names beginning with '_' are reserved for system " + f"use: '{v}'", + ) + if not _FIELD_NAME_REGEX.fullmatch(v): + raise InvalidNameError( + "Field/property name must start with a-z and contain only " + f"[a-z0-9_]: '{v}'", + ) + return v + + IntCompatibleId = Annotated[str, AfterValidator(_validate_int_compatible), Field(...)] SafeId = Annotated[str, AfterValidator(_is_valid_name), Field(...)] SafeIdWithDefault = Annotated[SafeId, Field(default=DEFAULT_ORG_AND_PROJECT_ID)] +SafeFieldName = Annotated[str, AfterValidator(_is_valid_field_name), Field(...)] class _WithOrgAndProj(BaseModel): @@ -386,7 +423,7 @@ class MemoryMessage(BaseModel): default="", description=SpecDoc.MEMORY_ROLE, ) - metadata: dict[str, str] = Field( + metadata: dict[SafeFieldName, str] = Field( default_factory=dict, description=SpecDoc.MEMORY_METADATA, ) @@ -495,7 +532,7 @@ class SearchMemoriesSpec(_WithOrgAndProj): ), ] set_metadata: Annotated[ - dict[str, JsonValue] | None, + dict[SafeFieldName, JsonValue] | None, Field( default=None, description=SpecDoc.SET_METADATA, @@ -585,7 +622,7 @@ class ListMemoriesSpec(_WithOrgAndProj): ), ] set_metadata: Annotated[ - dict[str, JsonValue] | None, + dict[SafeFieldName, JsonValue] | None, Field( default=None, description=SpecDoc.SET_METADATA, @@ -686,6 +723,10 @@ class AddFeatureSpec(_WithOrgAndProj): description=SpecDoc.FEATURE_SET_ID, ), ] + # Lookup field referencing an existing category by name (see + # ``SemanticMemory.add_new_feature``, which raises ``CategoryNotFoundError`` + # for unknown categories). Kept on permissive ``str`` so features can + # still be added to legacy categories whose names predate the #1383 rule. category_name: Annotated[ str, Field( @@ -694,14 +735,14 @@ class AddFeatureSpec(_WithOrgAndProj): ), ] tag: Annotated[ - str, + SafeFieldName, Field( ..., description=SpecDoc.FEATURE_TAG, ), ] feature: Annotated[ - str, + SafeFieldName, Field( ..., description=SpecDoc.FEATURE_NAME, @@ -715,7 +756,7 @@ class AddFeatureSpec(_WithOrgAndProj): ), ] feature_metadata: Annotated[ - dict[str, JsonValue] | None, + dict[SafeFieldName, JsonValue] | None, Field( default=None, description=SpecDoc.FEATURE_METADATA, @@ -771,6 +812,9 @@ class UpdateFeatureSpec(_WithOrgAndProj): description=SpecDoc.FEATURE_ID, ), ] + # Lookup of an existing category by name (see ``SemanticMemory.add_new_feature`` + # at ``semantic_memory.py:299``). Permissive so legacy categories stay + # addressable, matching ``AddFeatureSpec.category_name``. category_name: Annotated[ str | None, Field( @@ -779,14 +823,14 @@ class UpdateFeatureSpec(_WithOrgAndProj): ), ] tag: Annotated[ - str | None, + SafeFieldName | None, Field( default=None, description=SpecDoc.FEATURE_TAG, ), ] feature: Annotated[ - str | None, + SafeFieldName | None, Field( default=None, description=SpecDoc.FEATURE_NAME, @@ -800,7 +844,7 @@ class UpdateFeatureSpec(_WithOrgAndProj): ), ] metadata: Annotated[ - dict[str, str] | None, + dict[SafeFieldName, str] | None, Field( default=None, description=SpecDoc.FEATURE_METADATA, @@ -968,14 +1012,14 @@ class CreateSemanticSetTypeSpec(_WithOrgAndProj): ), ] metadata_tags: Annotated[ - list[str], + list[SafeFieldName], Field( ..., description=SpecDoc.SET_TYPE_METADATA_TAGS, ), ] name: Annotated[ - str | None, + SafeFieldName | None, Field( default=None, description=SpecDoc.SET_TYPE_NAME, @@ -1081,14 +1125,14 @@ class GetSemanticSetIdSpec(_WithOrgAndProj): ), ] metadata_tags: Annotated[ - list[str], + list[SafeFieldName], Field( ..., description=SpecDoc.SET_TYPE_METADATA_TAGS, ), ] set_metadata: Annotated[ - dict[str, JsonValue] | None, + dict[SafeFieldName, JsonValue] | None, Field( default=None, description=SpecDoc.SET_METADATA, @@ -1112,7 +1156,7 @@ class ListSemanticSetIdsSpec(_WithOrgAndProj): """Specification model for listing semantic set IDs.""" set_metadata: Annotated[ - dict[str, JsonValue] | None, + dict[SafeFieldName, JsonValue] | None, Field( default=None, description=SpecDoc.SET_METADATA, @@ -1257,7 +1301,7 @@ class AddSemanticCategorySpec(_WithOrgAndProj): ), ] category_name: Annotated[ - str, + SafeFieldName, Field( ..., description=SpecDoc.CATEGORY_NAME, @@ -1302,7 +1346,7 @@ class AddSemanticCategoryTemplateSpec(_WithOrgAndProj): ), ] category_name: Annotated[ - str, + SafeFieldName, Field( ..., description=SpecDoc.CATEGORY_NAME, @@ -1398,6 +1442,9 @@ class DisableSemanticCategorySpec(_WithOrgAndProj): description=SpecDoc.SEMANTIC_SET_ID, ), ] + # Lookup field referencing an existing category by name; kept on the + # permissive `str` type so legacy categories remain disable-able even + # if their names predate the #1383 rule. category_name: Annotated[ str, Field( @@ -1457,7 +1504,7 @@ class AddSemanticTagSpec(_WithOrgAndProj): ), ] tag_name: Annotated[ - str, + SafeFieldName, Field( ..., description=SpecDoc.TAG_NAME, diff --git a/packages/server/server_tests/memmachine_server/server/api_v2/test_router.py b/packages/server/server_tests/memmachine_server/server/api_v2/test_router.py index 8cfada19c..ad663222a 100644 --- a/packages/server/server_tests/memmachine_server/server/api_v2/test_router.py +++ b/packages/server/server_tests/memmachine_server/server/api_v2/test_router.py @@ -867,12 +867,14 @@ def test_update_feature_error(client, mock_memmachine): def test_create_semantic_set_type(client, mock_memmachine): + # `name` and `metadata_tags` items must conform to the #1383 rule + # (lowercase alphanumeric/underscore, ≤32 chars, no leading "_"). payload = { "org_id": "test_org", "project_id": "test_proj", "metadata_tags": ["user_id", "session_id"], "is_org_level": False, - "name": "User Sessions", + "name": "user_sessions", "description": "Set type for user sessions", } @@ -887,7 +889,7 @@ def test_create_semantic_set_type(client, mock_memmachine): assert call_args["session_data"].session_key == "test_org/test_proj" assert call_args["metadata_tags"] == ["user_id", "session_id"] assert call_args["is_org_level"] is False - assert call_args["name"] == "User Sessions" + assert call_args["name"] == "user_sessions" assert call_args["description"] == "Set type for user sessions" diff --git a/packages/server/server_tests/memmachine_server/server/api_v2/test_spec.py b/packages/server/server_tests/memmachine_server/server/api_v2/test_spec.py index 75fd4aee3..e87a27f7a 100644 --- a/packages/server/server_tests/memmachine_server/server/api_v2/test_spec.py +++ b/packages/server/server_tests/memmachine_server/server/api_v2/test_spec.py @@ -5,13 +5,17 @@ from dateutil.tz import tzoffset from memmachine_common.api.spec import ( DEFAULT_ORG_AND_PROJECT_ID, + MAX_FIELD_NAME_LENGTH, + AddFeatureSpec, AddMemoriesResponse, AddMemoriesSpec, AddMemoryResult, CreateProjectSpec, + CreateSemanticSetTypeSpec, DeleteEpisodicMemorySpec, DeleteProjectSpec, DeleteSemanticMemorySpec, + DisableSemanticCategorySpec, InvalidNameError, ListMemoriesSpec, MemoryMessage, @@ -19,6 +23,8 @@ ProjectResponse, SearchMemoriesSpec, SearchResult, + UpdateFeatureSpec, + _is_valid_field_name, _is_valid_name, ) from pydantic import ValidationError @@ -414,3 +420,95 @@ def test_timestamp_invalid_type(): MemoryMessage.model_validate( {"content": "hello", "timestamp": {"bad": "value"}} ) + + +# --- Field/property name validation (#1383) ---------------------------------- + + +@pytest.mark.parametrize( + "value", + ["a", "foo", "foo_bar", "a1", "user_id", "a" * MAX_FIELD_NAME_LENGTH], +) +def test_validate_field_name_valid(value): + assert _is_valid_field_name(value) == value + + +@pytest.mark.parametrize( + ("value", "expected_phrase"), + [ + ("", "1-32 chars"), + ("a" * (MAX_FIELD_NAME_LENGTH + 1), "1-32 chars"), + ("_internal", "reserved for system"), + ("0", "start with a-z and contain only"), # digit-leading + ("Foo", "start with a-z and contain only"), # uppercase + ("foo-bar", "start with a-z and contain only"), # hyphen + ("foo bar", "start with a-z and contain only"), # space + ], +) +def test_validate_field_name_invalid(value, expected_phrase): + with pytest.raises(InvalidNameError) as exc_info: + _is_valid_field_name(value) + assert expected_phrase in str(exc_info.value) + + +def test_dict_key_wiring_via_memory_message_metadata(): + """One sample of `dict[SafeFieldName, V]` enforcement; same wiring is + reused on `*.set_metadata`, `feature_metadata`, `UpdateFeatureSpec.metadata`.""" + with pytest.raises(ValidationError): + MemoryMessage(content="hi", metadata={"Bad-Key": "x"}) + msg = MemoryMessage(content="hi", metadata={"user_id": "alice"}) + assert msg.metadata == {"user_id": "alice"} + + +def test_scalar_wiring_via_add_feature_spec(): + """One sample of scalar `SafeFieldName` enforcement; same wiring is + reused on `tag`, `feature`, `name`, `category_name` (create paths), + `tag_name`.""" + with pytest.raises(ValidationError): + AddFeatureSpec( + org_id="org", + project_id="proj", + set_id="set:legacy", + category_name="prefs", + tag="Bad-Tag", + feature="favorite_food", + value="pizza", + ) + + +def test_list_item_wiring_via_metadata_tags(): + """One sample of `list[SafeFieldName]` enforcement.""" + with pytest.raises(ValidationError): + CreateSemanticSetTypeSpec( + org_id="org", + project_id="proj", + metadata_tags=["user_id", "Session Id"], + ) + + +def test_lookup_carveout_stays_permissive(): + """Lookup-by-name fields (`AddFeatureSpec.category_name`, + `UpdateFeatureSpec.category_name`, `DisableSemanticCategorySpec.category_name`) + remain permissive so legacy categories can still be addressed — + `SemanticMemory.add_new_feature` raises `CategoryNotFoundError` for + unknowns, so the API can't reject names it doesn't own.""" + spec = DisableSemanticCategorySpec( + org_id="org", + project_id="proj", + set_id="set:legacy", + category_name="Old-Cat", + ) + assert spec.category_name == "Old-Cat" + + +def test_update_feature_spec_partial_update_skips_unset_fields(): + """None defaults skip validation, so PATCH-style updates that omit + name fields keep working.""" + spec = UpdateFeatureSpec( + org_id="org", + project_id="proj", + feature_id="feat-uuid", + value="new_value", + ) + assert spec.value == "new_value" + assert spec.tag is None From 20e7f814512a749afa9ab77648077d04a2e2e6c2 Mon Sep 17 00:00:00 2001 From: alighazi288 <51366992+alighazi288@users.noreply.github.com> Date: Mon, 11 May 2026 14:07:14 -0400 Subject: [PATCH 2/2] fix(tests): adapt fixtures to the SafeFieldName rule (#1383) CI on #1396 surfaced two issues missed in the initial pass: - ty static check (server, 3.12/3.14): ty doesn't honor Pydantic's metaclass-generated `__init__` defaults, so direct kwargs construction (`AddFeatureSpec(...)`) of specs with optional fields was flagged as missing-argument. The rest of the file uses `Model.model_validate({...})` for exactly this reason; converted the four new tests to match. - test-install integration tests: six existing `create_semantic_set_type` fixtures in `test_integration_complete.py` used `name=` values with spaces and capitals ("User Sessions", "List Test Set Type", etc.). The live server now enforces `SafeFieldName` on `CreateSemanticSetTypeSpec.name`, so these 422'd. Renamed each to snake_case. --- .../client_tests/test_integration_complete.py | 12 ++-- .../server/api_v2/test_spec.py | 58 +++++++++++-------- 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/packages/client/client_tests/test_integration_complete.py b/packages/client/client_tests/test_integration_complete.py index d88e343c4..4fbfeadb0 100644 --- a/packages/client/client_tests/test_integration_complete.py +++ b/packages/client/client_tests/test_integration_complete.py @@ -1217,7 +1217,7 @@ def test_create_semantic_set_type(self, memory, unique_test_ids): set_type_id = memory.create_semantic_set_type( metadata_tags=["user_id", "session_id"], is_org_level=False, - name="User Sessions", + name="user_sessions", description="Set type for user sessions", ) @@ -1239,7 +1239,7 @@ def test_list_semantic_set_types(self, memory, unique_test_ids): # Create a set type first memory.create_semantic_set_type( metadata_tags=["list_test_tag"], - name="List Test Set Type", + name="list_test_set_type", ) # List set types @@ -1254,7 +1254,7 @@ def test_delete_semantic_set_type(self, memory, unique_test_ids): # Create a set type to delete set_type_id = memory.create_semantic_set_type( metadata_tags=["delete_test_tag"], - name="Delete Test Set Type", + name="delete_test_set_type", ) # Delete it @@ -1341,7 +1341,7 @@ def test_semantic_set_type_lifecycle(self, memory, unique_test_ids): # Step 1: Create set type set_type_id = memory.create_semantic_set_type( metadata_tags=["lifecycle_tag"], - name="Lifecycle Test", + name="lifecycle_test", description="Testing full lifecycle", ) assert set_type_id is not None @@ -1416,7 +1416,7 @@ def test_semantic_category_template_lifecycle(self, memory, unique_test_ids): # Step 1: Create a set type for templates set_type_id = memory.create_semantic_set_type( metadata_tags=["template_test_tag"], - name="Template Test Type", + name="template_test_type", description="Testing category templates", ) assert set_type_id is not None @@ -1480,7 +1480,7 @@ def test_semantic_category_disable(self, memory, unique_test_ids): # Step 1: Create a set type with a category template set_type_id = memory.create_semantic_set_type( metadata_tags=["disable_test_tag"], - name="Disable Test Type", + name="disable_test_type", ) assert set_type_id is not None diff --git a/packages/server/server_tests/memmachine_server/server/api_v2/test_spec.py b/packages/server/server_tests/memmachine_server/server/api_v2/test_spec.py index e87a27f7a..ee31eea22 100644 --- a/packages/server/server_tests/memmachine_server/server/api_v2/test_spec.py +++ b/packages/server/server_tests/memmachine_server/server/api_v2/test_spec.py @@ -455,8 +455,10 @@ def test_dict_key_wiring_via_memory_message_metadata(): """One sample of `dict[SafeFieldName, V]` enforcement; same wiring is reused on `*.set_metadata`, `feature_metadata`, `UpdateFeatureSpec.metadata`.""" with pytest.raises(ValidationError): - MemoryMessage(content="hi", metadata={"Bad-Key": "x"}) - msg = MemoryMessage(content="hi", metadata={"user_id": "alice"}) + MemoryMessage.model_validate({"content": "hi", "metadata": {"Bad-Key": "x"}}) + msg = MemoryMessage.model_validate( + {"content": "hi", "metadata": {"user_id": "alice"}} + ) assert msg.metadata == {"user_id": "alice"} @@ -465,24 +467,28 @@ def test_scalar_wiring_via_add_feature_spec(): reused on `tag`, `feature`, `name`, `category_name` (create paths), `tag_name`.""" with pytest.raises(ValidationError): - AddFeatureSpec( - org_id="org", - project_id="proj", - set_id="set:legacy", - category_name="prefs", - tag="Bad-Tag", - feature="favorite_food", - value="pizza", + AddFeatureSpec.model_validate( + { + "org_id": "org", + "project_id": "proj", + "set_id": "set:legacy", + "category_name": "prefs", + "tag": "Bad-Tag", + "feature": "favorite_food", + "value": "pizza", + } ) def test_list_item_wiring_via_metadata_tags(): """One sample of `list[SafeFieldName]` enforcement.""" with pytest.raises(ValidationError): - CreateSemanticSetTypeSpec( - org_id="org", - project_id="proj", - metadata_tags=["user_id", "Session Id"], + CreateSemanticSetTypeSpec.model_validate( + { + "org_id": "org", + "project_id": "proj", + "metadata_tags": ["user_id", "Session Id"], + } ) @@ -492,11 +498,13 @@ def test_lookup_carveout_stays_permissive(): remain permissive so legacy categories can still be addressed — `SemanticMemory.add_new_feature` raises `CategoryNotFoundError` for unknowns, so the API can't reject names it doesn't own.""" - spec = DisableSemanticCategorySpec( - org_id="org", - project_id="proj", - set_id="set:legacy", - category_name="Old-Cat", + spec = DisableSemanticCategorySpec.model_validate( + { + "org_id": "org", + "project_id": "proj", + "set_id": "set:legacy", + "category_name": "Old-Cat", + } ) assert spec.category_name == "Old-Cat" @@ -504,11 +512,13 @@ def test_lookup_carveout_stays_permissive(): def test_update_feature_spec_partial_update_skips_unset_fields(): """None defaults skip validation, so PATCH-style updates that omit name fields keep working.""" - spec = UpdateFeatureSpec( - org_id="org", - project_id="proj", - feature_id="feat-uuid", - value="new_value", + spec = UpdateFeatureSpec.model_validate( + { + "org_id": "org", + "project_id": "proj", + "feature_id": "feat-uuid", + "value": "new_value", + } ) assert spec.value == "new_value" assert spec.tag is None