Fix bot review issues: update function signature, tighten filters, remove inspect hack
This commit is contained in:
parent
b984057205
commit
6de749b39a
2 changed files with 23 additions and 24 deletions
|
|
@ -42,9 +42,7 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
|
||||||
|
|
||||||
|
|
||||||
# Multi-language support: allow passing supported_languages
|
# Multi-language support: allow passing supported_languages
|
||||||
supported_languages = [
|
supported_languages = None # defer to task defaults
|
||||||
'python', 'javascript', 'typescript', 'java', 'csharp', 'go', 'rust', 'cpp'
|
|
||||||
]
|
|
||||||
tasks = [
|
tasks = [
|
||||||
Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction, supported_languages=supported_languages),
|
Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction, supported_languages=supported_languages),
|
||||||
# Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
|
# Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from cognee.infrastructure.engine import DataPoint
|
||||||
from cognee.shared.CodeGraphEntities import CodeFile, Repository
|
from cognee.shared.CodeGraphEntities import CodeFile, Repository
|
||||||
|
|
||||||
|
|
||||||
async def get_source_code_files(repo_path):
|
async def get_source_code_files(repo_path, language_config: dict[str, list[str]] | None = None):
|
||||||
"""
|
"""
|
||||||
Retrieve source code files from the specified repository path for multiple languages.
|
Retrieve source code files from the specified repository path for multiple languages.
|
||||||
|
|
||||||
|
|
@ -32,10 +32,6 @@ async def get_source_code_files(repo_path):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Default config if not provided
|
# Default config if not provided
|
||||||
import inspect
|
|
||||||
frame = inspect.currentframe()
|
|
||||||
args, _, _, values = inspect.getargvalues(frame)
|
|
||||||
language_config = values.get('language_config', None)
|
|
||||||
if language_config is None:
|
if language_config is None:
|
||||||
language_config = {
|
language_config = {
|
||||||
'python': ['.py'],
|
'python': ['.py'],
|
||||||
|
|
@ -57,15 +53,22 @@ async def get_source_code_files(repo_path):
|
||||||
lang = _get_language_from_extension(file, language_config)
|
lang = _get_language_from_extension(file, language_config)
|
||||||
if lang is None:
|
if lang is None:
|
||||||
continue
|
continue
|
||||||
# Exclude test files and venv for all languages
|
#Exclude common test files and virtual/env folders
|
||||||
if file.startswith("test_") or file.endswith("_test") or ".venv" in file:
|
if (
|
||||||
|
file.startswith("test_")
|
||||||
|
or file.endswith("_test")
|
||||||
|
or ".test." in file
|
||||||
|
or ".spec." in file
|
||||||
|
or any(x in root for x in (".venv", "venv", "env", ".env", "site-packages"))
|
||||||
|
or any(x in root for x in ("node_modules", "dist", "build", ".git"))
|
||||||
|
):
|
||||||
continue
|
continue
|
||||||
file_path = os.path.abspath(os.path.join(root, file))
|
file_path = os.path.abspath(os.path.join(root, file))
|
||||||
if os.path.getsize(file_path) == 0:
|
if os.path.getsize(file_path) == 0:
|
||||||
continue
|
continue
|
||||||
source_code_files.add((file_path, lang))
|
source_code_files.add((file_path, lang))
|
||||||
|
|
||||||
return list(source_code_files)
|
return sorted(list(source_code_files))
|
||||||
|
|
||||||
|
|
||||||
def run_coroutine(coroutine_func, *args, **kwargs):
|
def run_coroutine(coroutine_func, *args, **kwargs):
|
||||||
|
|
@ -100,19 +103,20 @@ async def get_repo_file_dependencies(
|
||||||
repo_path: str, detailed_extraction: bool = False, supported_languages: list = None
|
repo_path: str, detailed_extraction: bool = False, supported_languages: list = None
|
||||||
) -> AsyncGenerator[DataPoint, None]:
|
) -> AsyncGenerator[DataPoint, None]:
|
||||||
"""
|
"""
|
||||||
Generate a dependency graph for Python files in the given repository path.
|
Generate a dependency graph for source files (multi-language) in the given repository path.
|
||||||
|
|
||||||
Check the validity of the repository path and yield a repository object followed by the
|
Check the validity of the repository path and yield a repository object followed by the
|
||||||
dependencies of Python files within that repository. Raise a FileNotFoundError if the
|
dependencies of source files within that repository. Raise a FileNotFoundError if the
|
||||||
provided path does not exist. The extraction of detailed dependencies can be controlled
|
provided path does not exist. The extraction of detailed dependencies can be controlled
|
||||||
via the `detailed_extraction` argument.
|
via the `detailed_extraction` argument. Languages considered can be restricted via
|
||||||
|
the `supported_languages` argument.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
- repo_path (str): The file path to the repository where Python files are located.
|
- repo_path (str): The file path to the repository to process.
|
||||||
- detailed_extraction (bool): A flag indicating whether to perform a detailed
|
- detailed_extraction (bool): Whether to perform a detailed extraction of code parts.
|
||||||
extraction of dependencies (default is False). (default False)
|
- supported_languages (list | None): Subset of languages to include; if None, use defaults.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if isinstance(repo_path, list) and len(repo_path) == 1:
|
if isinstance(repo_path, list) and len(repo_path) == 1:
|
||||||
|
|
@ -158,6 +162,7 @@ async def get_repo_file_dependencies(
|
||||||
|
|
||||||
# Import dependency extractors for each language (Python for now, extend later)
|
# Import dependency extractors for each language (Python for now, extend later)
|
||||||
from cognee.tasks.repo_processor.get_local_dependencies import get_local_script_dependencies
|
from cognee.tasks.repo_processor.get_local_dependencies import get_local_script_dependencies
|
||||||
|
import aiofiles
|
||||||
# TODO: Add other language extractors here
|
# TODO: Add other language extractors here
|
||||||
|
|
||||||
for start_range, end_range in chunk_ranges:
|
for start_range, end_range in chunk_ranges:
|
||||||
|
|
@ -168,9 +173,7 @@ async def get_repo_file_dependencies(
|
||||||
tasks.append(get_local_script_dependencies(repo_path, file_path, detailed_extraction))
|
tasks.append(get_local_script_dependencies(repo_path, file_path, detailed_extraction))
|
||||||
else:
|
else:
|
||||||
# Placeholder: create a minimal CodeFile for other languages
|
# Placeholder: create a minimal CodeFile for other languages
|
||||||
from cognee.shared.CodeGraphEntities import CodeFile
|
async def make_codefile_stub(file_path=file_path, lang=lang):
|
||||||
import aiofiles
|
|
||||||
async def make_codefile_stub():
|
|
||||||
async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
|
async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
|
||||||
source = await f.read()
|
source = await f.read()
|
||||||
return CodeFile(
|
return CodeFile(
|
||||||
|
|
@ -186,8 +189,6 @@ async def get_repo_file_dependencies(
|
||||||
|
|
||||||
for source_code_file in results:
|
for source_code_file in results:
|
||||||
source_code_file.part_of = repo
|
source_code_file.part_of = repo
|
||||||
if not hasattr(source_code_file, 'language') or source_code_file.language is None:
|
if (getattr(source_code_file, 'language', None) is None and source_code_file.file_path.endswith('.py')):
|
||||||
# Set language for python files if not set
|
source_code_file.language = 'python'
|
||||||
if source_code_file.file_path.endswith('.py'):
|
|
||||||
source_code_file.language = 'python'
|
|
||||||
yield source_code_file
|
yield source_code_file
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue