Skip to content

Commit bce234b

Browse files
fix(writer): 完善 KuzuDB 标签发现的后端覆盖与健壮性
1 parent 5eaf754 commit bce234b

4 files changed

Lines changed: 98 additions & 55 deletions

File tree

src/codegraphcontext/cli/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2849,7 +2849,7 @@ def _write_datasource_graph(ingested: dict) -> None:
28492849
raise typer.Exit(1)
28502850

28512851
from codegraphcontext.tools.indexing.persistence.writer import GraphWriter
2852-
GraphWriter(driver).write_datasource_graph(ingested)
2852+
GraphWriter(driver, db_manager=dm).write_datasource_graph(ingested)
28532853

28542854

28552855
if __name__ == "__main__":

src/codegraphcontext/core/cgc_bundle.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -371,10 +371,10 @@ def _extract_schema(self) -> Dict[str, Any]:
371371
# Get node labels (backend-aware)
372372
backend = getattr(self.db_manager, "get_backend_type", lambda: "neo4j")()
373373
try:
374-
if backend == "kuzudb":
375-
# KuzuDB Python bindings ≤ 0.11 don't support SHOW TABLES
374+
if backend in ("kuzudb", "ladybugdb"):
375+
# KuzuDB/LadybugDB: SHOW TABLES not available in ≤ 0.11
376376
result = session.run("MATCH (n) RETURN DISTINCT label(n) AS lbl")
377-
labels = sorted({record[0] for record in result if record[0]})
377+
labels = sorted({record[0] for record in result if record[0] is not None})
378378
else:
379379
result = session.run("CALL db.labels()")
380380
labels = []

src/codegraphcontext/tools/indexing/persistence/writer.py

Lines changed: 45 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from ....utils.debug_log import info_logger, warning_logger
1212
from ....utils.git_utils import get_repo_commit_hash
1313
from ..sanitize import sanitize_props
14+
from ..schema_contract import NODE_LABELS
1415

1516

1617
def _is_binder_exception(e: Exception) -> bool:
@@ -25,14 +26,21 @@ class GraphWriter:
2526
def __init__(self, driver: Any, db_manager: Any = None):
2627
self.driver = driver
2728
self._db_manager = db_manager
29+
if db_manager is None:
30+
warning_logger(
31+
"[GraphWriter] db_manager not provided; "
32+
"backend detection will default to 'neo4j'"
33+
)
2834

2935
def _get_all_node_labels(self) -> list[str]:
3036
"""Discover all node labels in the database, backend-aware.
3137
32-
Neo4j uses ``CALL db.labels()``, KuzuDB uses
33-
``MATCH (n) RETURN DISTINCT label(n)`` (since ``SHOW TABLES``
34-
is not supported in KuzuDB Python bindings ≤ 0.11),
35-
and other backends fall back to a comprehensive static list.
38+
Neo4j / Nornic use ``CALL db.labels()``.
39+
KuzuDB / LadybugDB use ``MATCH (n) RETURN DISTINCT label(n)``
40+
(``SHOW TABLES`` is not supported in KuzuDB Python bindings ≤ 0.11).
41+
FalkorDB uses ``CALL db.labels()`` without YIELD.
42+
All backends fall back to :data:`schema_contract.NODE_LABELS`
43+
plus supplementary labels on failure.
3644
"""
3745
# Prefer db_manager.get_backend_type(); fall back to driver, then neo4j
3846
backend = (
@@ -41,41 +49,56 @@ def _get_all_node_labels(self) -> list[str]:
4149
or (lambda: "neo4j")
4250
)()
4351

44-
if backend == "kuzudb":
52+
if backend in ("kuzudb", "ladybugdb"):
53+
# NOTE: Full node scan required because SHOW TABLES is unavailable
54+
# in KuzuDB ≤ 0.11. Acceptable for delete_repository (low-frequency).
4555
try:
4656
with self.driver.session() as session:
47-
result = session.run("MATCH (n) RETURN DISTINCT label(n) AS lbl")
48-
labels = sorted({record[0] for record in result if record[0]})
57+
result = session.run(
58+
"MATCH (n) RETURN DISTINCT label(n) AS lbl"
59+
)
60+
labels = sorted(
61+
{record[0] for record in result if record[0] is not None}
62+
)
4963
if labels:
5064
return labels
5165
except Exception as e:
52-
info_logger(f"[DELETE] label discovery failed for KuzuDB ({e}), using fallback list")
66+
info_logger(
67+
f"[DELETE] label discovery failed for {backend} "
68+
f"({e}), using fallback list"
69+
)
5370

54-
elif backend in ("neo4j", "nornicdb"):
71+
elif backend in ("neo4j", "nornic"):
5572
try:
5673
with self.driver.session() as session:
57-
label_records = session.run("CALL db.labels() YIELD label RETURN label")
74+
label_records = session.run(
75+
"CALL db.labels() YIELD label RETURN label"
76+
)
5877
return sorted({record["label"] for record in label_records})
5978
except Exception as e:
60-
info_logger(f"[DELETE] CALL db.labels() failed ({e}), using fallback list")
79+
info_logger(
80+
f"[DELETE] CALL db.labels() failed for {backend} "
81+
f"({e}), using fallback list"
82+
)
6183

6284
elif backend in ("falkordb", "falkordb-remote"):
6385
try:
6486
with self.driver.session() as session:
6587
label_records = session.run("CALL db.labels()")
6688
return sorted({record["label"] for record in label_records})
6789
except Exception as e:
68-
info_logger(f"[DELETE] CALL db.labels() failed for FalkorDB ({e}), using fallback list")
69-
70-
# Fallback: comprehensive list of all known CGC node labels
71-
return sorted({
72-
"Repository", "Directory", "File", "Function", "Class",
73-
"Variable", "Parameter", "Module", "Interface", "Trait",
74-
"Struct", "Enum", "EnumValue", "Namespace", "TypeAlias",
75-
"Decorator", "Property", "Method", "DbTable", "DbColumn",
76-
"RedisKeyPattern", "ExternalClass", "ExternalFunction",
77-
"MavenModule", "GradleModule", "Endpoint", "OrmMapping",
78-
"Query", "SpringDataRepository",
90+
info_logger(
91+
f"[DELETE] CALL db.labels() failed for {backend} "
92+
f"({e}), using fallback list"
93+
)
94+
95+
# Fallback: canonical NODE_LABELS from schema_contract + supplementary
96+
# labels that may exist in the graph from dynamic indexing paths.
97+
return sorted(NODE_LABELS | {
98+
"ExternalClass", "ExternalFunction",
99+
"EnumValue", "Namespace", "TypeAlias", "Decorator",
100+
"Method", "Endpoint", "OrmMapping", "Query",
101+
"SpringDataRepository", "Mixin", "Extension", "Object",
79102
})
80103

81104
def add_repository_to_graph(self, repo_path: Path, is_dependency: bool = False) -> None:

tests/unit/tools/test_graph_builder_perf_fixes.py

Lines changed: 49 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,18 @@ def session(self):
6565
return self._session
6666

6767

68-
def _make_graph_builder(session: Optional[_RecordingSession] = None):
68+
class _FakeDBManager:
69+
"""Minimal stub that satisfies GraphWriter's backend detection."""
70+
71+
def __init__(self, backend: str = "neo4j"):
72+
self._backend = backend
73+
74+
def get_backend_type(self) -> str:
75+
return self._backend
76+
77+
78+
def _make_graph_builder(session: Optional[_RecordingSession] = None,
79+
backend: str = "neo4j"):
6980
"""Return a GraphBuilder with a fake driver. Skips full __init__ setup."""
7081
from codegraphcontext.tools.graph_builder import GraphBuilder
7182
from codegraphcontext.tools.indexing.persistence.writer import GraphWriter
@@ -74,7 +85,8 @@ def _make_graph_builder(session: Optional[_RecordingSession] = None):
7485
if session is None:
7586
session = _RecordingSession()
7687
gb.driver = _FakeDriver(session)
77-
gb._writer = GraphWriter(gb.driver)
88+
dm = _FakeDBManager(backend)
89+
gb._writer = GraphWriter(gb.driver, db_manager=dm)
7890
gb.parsers = {}
7991
return gb, session
8092

@@ -527,10 +539,13 @@ def test_non_javascript_import_rows_are_schema_complete(self):
527539
# ---------------------------------------------------------------------------
528540

529541
class _DeleteRepoSession(_RecordingSession):
530-
"""RecordingSession that intercepts `CALL db.labels()` queries and
542+
"""RecordingSession that intercepts label-discovery queries and
531543
returns a fixed label list without consuming a slot in the responses
532544
queue, so positional fixtures stay focused on deletion counts and
533-
aren't disturbed by the new label-discovery query in the implementation."""
545+
aren't disturbed by the label-discovery query in the implementation.
546+
547+
Supports both Neo4j (``CALL db.labels()``) and KuzuDB/LadybugDB
548+
(``MATCH (n) RETURN DISTINCT label(n)``) discovery patterns."""
534549

535550
def __init__(self, labels, responses=None):
536551
super().__init__(responses=responses)
@@ -540,6 +555,8 @@ def run(self, query: str, **kwargs):
540555
self.calls.append({"query": query, "kwargs": kwargs})
541556
if "db.labels()" in query:
542557
return _FakeResult([{"label": lbl} for lbl in self._labels])
558+
if "RETURN DISTINCT label(n)" in query:
559+
return _FakeResult([[lbl] for lbl in self._labels])
543560
if self._call_idx < len(self._responses):
544561
result = self._responses[self._call_idx]
545562
else:
@@ -685,46 +702,49 @@ def test_calls_db_labels_after_existence_check(self):
685702

686703
def test_finds_repo_stored_with_backslash_path(self):
687704
"""Fallback should find a Repository stored with Windows backslash paths."""
688-
session = _DeleteRepoSession(labels=self._DEFAULT_DB_LABELS, responses=[
705+
session = _RecordingSession(responses=[
689706
_FakeResult([{"cnt": 0}]), # normalized (forward-slash) fails
690707
_FakeResult([{"cnt": 1}]), # fallback (original backslash) succeeds
691-
] + [_FakeResult([{"deleted": 0}])] * 20)
708+
*([_FakeResult([{"deleted": 0}])] * 20), # drain loops
709+
])
692710
gb, _ = _make_graph_builder(session)
693711
result = gb.delete_repository_from_graph("C:\\Users\\test\\repo")
694712
assert result is True
695713

696-
# Verify that fallback path was used for subsequent operations
697-
# Should use backslash path prefix for STARTS WITH queries
698-
assert any(c["kwargs"].get("prefix") == "C:\\Users\\test\\repo\\" for c in session.calls), \
699-
"Expected backslash path prefix in STARTS WITH queries after fallback"
714+
# Verify that fallback path was used in subsequent parameterised queries.
715+
# The implementation uses $prefix / $path bindings, so we inspect kwargs
716+
# rather than the query string.
717+
prefix_values = [
718+
c["kwargs"].get("prefix", "") for c in session.calls
719+
if "STARTS WITH" in c["query"]
720+
]
721+
assert any("C:\\Users\\test\\repo\\" in p for p in prefix_values), \
722+
f"Expected backslash path prefix in $prefix kwargs, got: {prefix_values}"
700723

701724
def test_uses_matching_path_format_for_deletion(self):
702725
"""When fallback triggers, deletion queries should use the path format that matched."""
703-
session = _DeleteRepoSession(labels=self._DEFAULT_DB_LABELS, responses=[
726+
session = _RecordingSession(responses=[
704727
_FakeResult([{"cnt": 0}]), # normalized (forward-slash) fails
705728
_FakeResult([{"cnt": 1}]), # fallback (original backslash) succeeds
706-
] + [_FakeResult([{"deleted": 0}])] * 20)
729+
*([_FakeResult([{"deleted": 0}])] * 20), # drain loops
730+
])
707731
gb, _ = _make_graph_builder(session)
708732
gb.delete_repository_from_graph("D:\\WorkPlace\\AI\\MinerU\\pipeline")
709733

710-
# Check that all deletion queries use the backslash path
734+
# Check that parameterised queries use backslash paths (not forward-slash).
735+
# The implementation passes paths via $prefix / $path bindings.
711736
for c in session.calls:
712-
q = c["query"]
713-
kwargs = c["kwargs"]
714-
if "STARTS WITH" in q or "DETACH DELETE" in q:
715-
# Should use backslash path, not forward-slash
716-
prefix = kwargs.get("prefix")
717-
path = kwargs.get("path")
718-
if prefix:
719-
assert "D:/WorkPlace/AI/MinerU/pipeline" not in prefix, \
720-
"Should not use normalized forward-slash path after fallback"
721-
assert "D:\\WorkPlace\\AI\\MinerU\\pipeline" in prefix or "D:\\WorkPlace\\AI\\MinerU\\pipeline\\" in prefix, \
722-
"Should use original backslash path after fallback"
723-
if path:
724-
assert "D:/WorkPlace/AI/MinerU/pipeline" not in path, \
725-
"Should not use normalized forward-slash path after fallback"
726-
assert "D:\\WorkPlace\\AI\\MinerU\\pipeline" in path, \
727-
"Should use original backslash path after fallback"
737+
if "STARTS WITH" in c["query"] or "DETACH DELETE" in c["query"]:
738+
kwargs = c["kwargs"]
739+
for key in ("prefix", "path"):
740+
val = kwargs.get(key, "")
741+
if val:
742+
assert "D:/WorkPlace/AI/MinerU/pipeline" not in val, \
743+
f"Should not use forward-slash path in ${key} after fallback, got: {val}"
744+
assert (
745+
"D:\\WorkPlace\\AI\\MinerU\\pipeline" in val
746+
or "D:\\WorkPlace\\AI\\MinerU\\pipeline\\" in val
747+
), f"Should use backslash path in ${key}, got: {val}"
728748

729749

730750
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)