Fix bot review issues: update function signature, tighten filters, remove inspect hack

This commit is contained in:
P-FardeenMalik 2025-08-18 21:49:55 +05:30
parent b984057205
commit 6de749b39a
2 changed files with 23 additions and 24 deletions

View file

@ -42,9 +42,7 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
# Multi-language support: allow passing supported_languages # Multi-language support: allow passing supported_languages
supported_languages = [ supported_languages = None # defer to task defaults
'python', 'javascript', 'typescript', 'java', 'csharp', 'go', 'rust', 'cpp'
]
tasks = [ tasks = [
Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction, supported_languages=supported_languages), Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction, supported_languages=supported_languages),
# Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete # Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete

View file

@ -10,7 +10,7 @@ from cognee.infrastructure.engine import DataPoint
from cognee.shared.CodeGraphEntities import CodeFile, Repository from cognee.shared.CodeGraphEntities import CodeFile, Repository
async def get_source_code_files(repo_path): async def get_source_code_files(repo_path, language_config: dict[str, list[str]] | None = None):
""" """
Retrieve source code files from the specified repository path for multiple languages. Retrieve source code files from the specified repository path for multiple languages.
@ -32,10 +32,6 @@ async def get_source_code_files(repo_path):
return None return None
# Default config if not provided # Default config if not provided
import inspect
frame = inspect.currentframe()
args, _, _, values = inspect.getargvalues(frame)
language_config = values.get('language_config', None)
if language_config is None: if language_config is None:
language_config = { language_config = {
'python': ['.py'], 'python': ['.py'],
@ -57,15 +53,22 @@ async def get_source_code_files(repo_path):
lang = _get_language_from_extension(file, language_config) lang = _get_language_from_extension(file, language_config)
if lang is None: if lang is None:
continue continue
# Exclude test files and venv for all languages #Exclude common test files and virtual/env folders
if file.startswith("test_") or file.endswith("_test") or ".venv" in file: if (
file.startswith("test_")
or file.endswith("_test")
or ".test." in file
or ".spec." in file
or any(x in root for x in (".venv", "venv", "env", ".env", "site-packages"))
or any(x in root for x in ("node_modules", "dist", "build", ".git"))
):
continue continue
file_path = os.path.abspath(os.path.join(root, file)) file_path = os.path.abspath(os.path.join(root, file))
if os.path.getsize(file_path) == 0: if os.path.getsize(file_path) == 0:
continue continue
source_code_files.add((file_path, lang)) source_code_files.add((file_path, lang))
return list(source_code_files) return sorted(list(source_code_files))
def run_coroutine(coroutine_func, *args, **kwargs): def run_coroutine(coroutine_func, *args, **kwargs):
@ -100,19 +103,20 @@ async def get_repo_file_dependencies(
repo_path: str, detailed_extraction: bool = False, supported_languages: list = None repo_path: str, detailed_extraction: bool = False, supported_languages: list = None
) -> AsyncGenerator[DataPoint, None]: ) -> AsyncGenerator[DataPoint, None]:
""" """
Generate a dependency graph for Python files in the given repository path. Generate a dependency graph for source files (multi-language) in the given repository path.
Check the validity of the repository path and yield a repository object followed by the Check the validity of the repository path and yield a repository object followed by the
dependencies of Python files within that repository. Raise a FileNotFoundError if the dependencies of source files within that repository. Raise a FileNotFoundError if the
provided path does not exist. The extraction of detailed dependencies can be controlled provided path does not exist. The extraction of detailed dependencies can be controlled
via the `detailed_extraction` argument. via the `detailed_extraction` argument. Languages considered can be restricted via
the `supported_languages` argument.
Parameters: Parameters:
----------- -----------
- repo_path (str): The file path to the repository where Python files are located. - repo_path (str): The file path to the repository to process.
- detailed_extraction (bool): A flag indicating whether to perform a detailed - detailed_extraction (bool): Whether to perform a detailed extraction of code parts.
extraction of dependencies (default is False). (default False) - supported_languages (list | None): Subset of languages to include; if None, use defaults.
""" """
if isinstance(repo_path, list) and len(repo_path) == 1: if isinstance(repo_path, list) and len(repo_path) == 1:
@ -158,6 +162,7 @@ async def get_repo_file_dependencies(
# Import dependency extractors for each language (Python for now, extend later) # Import dependency extractors for each language (Python for now, extend later)
from cognee.tasks.repo_processor.get_local_dependencies import get_local_script_dependencies from cognee.tasks.repo_processor.get_local_dependencies import get_local_script_dependencies
import aiofiles
# TODO: Add other language extractors here # TODO: Add other language extractors here
for start_range, end_range in chunk_ranges: for start_range, end_range in chunk_ranges:
@ -168,9 +173,7 @@ async def get_repo_file_dependencies(
tasks.append(get_local_script_dependencies(repo_path, file_path, detailed_extraction)) tasks.append(get_local_script_dependencies(repo_path, file_path, detailed_extraction))
else: else:
# Placeholder: create a minimal CodeFile for other languages # Placeholder: create a minimal CodeFile for other languages
from cognee.shared.CodeGraphEntities import CodeFile async def make_codefile_stub(file_path=file_path, lang=lang):
import aiofiles
async def make_codefile_stub():
async with aiofiles.open(file_path, "r", encoding="utf-8") as f: async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
source = await f.read() source = await f.read()
return CodeFile( return CodeFile(
@ -186,8 +189,6 @@ async def get_repo_file_dependencies(
for source_code_file in results: for source_code_file in results:
source_code_file.part_of = repo source_code_file.part_of = repo
if not hasattr(source_code_file, 'language') or source_code_file.language is None: if (getattr(source_code_file, 'language', None) is None and source_code_file.file_path.endswith('.py')):
# Set language for python files if not set source_code_file.language = 'python'
if source_code_file.file_path.endswith('.py'):
source_code_file.language = 'python'
yield source_code_file yield source_code_file