Skip to content

Commit fd58bb8

Browse files
committed
Only add source code when INDEX_SOURCE is enabled
1 parent 222fa1f commit fd58bb8

2 files changed

Lines changed: 33 additions & 26 deletions

File tree

src/codegraphcontext/tools/graph_builder.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515
from ..utils.tree_sitter_manager import get_tree_sitter_manager
1616
from ..cli.config_manager import get_config_value
1717

18-
INDEX_SOURCE = (get_config_value("INDEX_SOURCE") or "false").lower() == "true"
19-
2018

2119
class TreeSitterParser:
2220
"""A generic parser wrapper for a specific language using tree-sitter."""
@@ -150,7 +148,7 @@ def create_schema(self):
150148
session.run("""
151149
CREATE FULLTEXT INDEX code_search_index IF NOT EXISTS
152150
FOR (n:Function|Class|Variable)
153-
ON EACH [n.name, n.source, n.docstring]
151+
ON EACH [n.name, coalesce(n.source, ''), coalesce(n.docstring, '')]
154152
""" )
155153

156154
info_logger("Database schema verified/created successfully")
@@ -332,12 +330,7 @@ def add_file_to_graph(self, file_data: Dict, repo_name: str, imports_map: dict):
332330
SET n += $props
333331
MERGE (f)-[:CONTAINS]->(n)
334332
"""
335-
# Respect INDEX_SOURCE config
336-
if not INDEX_SOURCE:
337-
item = item.copy()
338-
item.pop("source", None)
339-
item.pop("source_code", None)
340-
item.pop("docstring", None)
333+
341334
session.run(query, file_path=file_path_str, name=item['name'], line_number=item['line_number'], props=item)
342335

343336
if label == 'Function':
@@ -807,15 +800,23 @@ def parse_file(self, repo_path: Path, file_path: Path, is_dependency: bool = Fal
807800

808801
debug_log(f"[parse_file] Starting parsing for: {file_path} with {parser.language_name} parser")
809802
try:
803+
index_source = (get_config_value("INDEX_SOURCE") or "false").lower() == "true"
810804
if parser.language_name == 'python':
811805
is_notebook = file_path.suffix == '.ipynb'
812-
file_data = parser.parse(file_path, is_dependency, is_notebook=is_notebook)
806+
file_data = parser.parse(
807+
file_path,
808+
is_dependency,
809+
is_notebook=is_notebook,
810+
index_source=index_source
811+
)
813812
else:
814-
file_data = parser.parse(file_path, is_dependency)
813+
file_data = parser.parse(
814+
file_path,
815+
is_dependency,
816+
index_source=index_source
817+
)
815818
file_data['repo_path'] = str(repo_path)
816-
debug_log(f"[parse_file] Successfully parsed: {file_path}")
817819
return file_data
818-
819820
except Exception as e:
820821
error_logger(f"Error parsing {file_path} with {parser.language_name} parser: {e}")
821822
debug_log(f"[parse_file] Error parsing {file_path}: {e}")

src/codegraphcontext/tools/languages/python.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,12 @@ def _get_docstring(self, body_node):
111111
return self._get_node_text(first_child.children[0])
112112
return None
113113

114-
def parse(self, file_path: Path, is_dependency: bool = False, is_notebook: bool = False) -> Dict:
114+
def parse(self, file_path: Path, is_dependency: bool = False, is_notebook: bool = False, index_source: bool = False) -> Dict:
115115
"""Parses a file and returns its structure in a standardized dictionary format."""
116116
original_file_path = file_path
117117
temp_py_file = None
118118
source_code = None
119+
self.index_source = index_source
119120

120121
try:
121122
if is_notebook:
@@ -140,7 +141,7 @@ def parse(self, file_path: Path, is_dependency: bool = False, is_notebook: bool
140141
root_node = tree.root_node
141142

142143
functions = self._find_functions(root_node)
143-
functions.extend(self._find_lambda_assignments(root_node))
144+
functions.extend(self._find_lambda_assignments(root_node, index_source))
144145
classes = self._find_classes(root_node)
145146
imports = self._find_imports(root_node)
146147
function_calls = self._find_calls(root_node)
@@ -164,7 +165,7 @@ def parse(self, file_path: Path, is_dependency: bool = False, is_notebook: bool
164165
os.remove(temp_py_file)
165166
info_logger(f"Removed temporary file: {temp_py_file}")
166167

167-
def _find_lambda_assignments(self, root_node):
168+
def _find_lambda_assignments(self, root_node, index_source: bool = False):
168169
functions = []
169170
query_str = PY_QUERIES.get('lambda_assignments')
170171
if not query_str: return []
@@ -187,9 +188,6 @@ def _find_lambda_assignments(self, root_node):
187188
"line_number": node.start_point[0] + 1,
188189
"end_line": assignment_node.end_point[0] + 1,
189190
"args": [p for p in [self._get_node_text(p) for p in params_node.children if p.type == 'identifier'] if p] if params_node else [],
190-
"source": self._get_node_text(assignment_node),
191-
192-
"docstring": None,
193191
"cyclomatic_complexity": 1,
194192
"context": context,
195193
"context_type": context_type,
@@ -198,10 +196,14 @@ def _find_lambda_assignments(self, root_node):
198196
"lang": self.language_name,
199197
"is_dependency": False,
200198
}
199+
if self.index_source:
200+
func_data["source"] = self._get_node_text(assignment_node)
201+
func_data["docstring"] = None
202+
201203
functions.append(func_data)
202204
return functions
203205

204-
def _find_functions(self, root_node):
206+
def _find_functions(self, root_node, index_source: bool = False):
205207
functions = []
206208
query_str = PY_QUERIES['functions']
207209
for match in execute_query(self.language, query_str, root_node):
@@ -253,9 +255,6 @@ def _find_functions(self, root_node):
253255
"line_number": node.start_point[0] + 1,
254256
"end_line": func_node.end_point[0] + 1,
255257
"args": args,
256-
"source": self._get_node_text(func_node),
257-
258-
"docstring": self._get_docstring(body_node),
259258
"cyclomatic_complexity": self._calculate_complexity(func_node),
260259
"context": context,
261260
"context_type": context_type,
@@ -264,10 +263,15 @@ def _find_functions(self, root_node):
264263
"lang": self.language_name,
265264
"is_dependency": False,
266265
}
266+
267+
if self.index_source:
268+
func_data["source"] = self._get_node_text(func_node)
269+
func_data["docstring"] = self._get_docstring(body_node)
270+
267271
functions.append(func_data)
268272
return functions
269273

270-
def _find_classes(self, root_node):
274+
def _find_classes(self, root_node, index_source: bool = False):
271275
classes = []
272276
query_str = PY_QUERIES['classes']
273277
for match in execute_query(self.language, query_str, root_node):
@@ -293,13 +297,15 @@ def _find_classes(self, root_node):
293297
"line_number": node.start_point[0] + 1,
294298
"end_line": class_node.end_point[0] + 1,
295299
"bases": [b for b in bases if b],
296-
"source": self._get_node_text(class_node),
297-
"docstring": self._get_docstring(body_node),
298300
"context": context,
299301
"decorators": [d for d in decorators if d],
300302
"lang": self.language_name,
301303
"is_dependency": False,
302304
}
305+
if self.index_source:
306+
class_data["source"] = self._get_node_text(class_node)
307+
class_data["docstring"] = self._get_docstring(body_node)
308+
303309
classes.append(class_data)
304310
return classes
305311

0 commit comments

Comments
 (0)