refactor: ruff format

This commit is contained in:
Igor Ilic 2025-08-27 14:23:53 +02:00
parent d4ce0e18fd
commit 6ca46f1e53
2 changed files with 49 additions and 28 deletions

View file

@ -40,11 +40,14 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
user = await get_default_user() user = await get_default_user()
detailed_extraction = True detailed_extraction = True
# Multi-language support: allow passing supported_languages # Multi-language support: allow passing supported_languages
supported_languages = None # defer to task defaults supported_languages = None # defer to task defaults
tasks = [ tasks = [
Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction, supported_languages=supported_languages), Task(
get_repo_file_dependencies,
detailed_extraction=detailed_extraction,
supported_languages=supported_languages,
),
# Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete # Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
Task(add_data_points, task_config={"batch_size": 30}), Task(add_data_points, task_config={"batch_size": 30}),
] ]

View file

@ -24,6 +24,7 @@ async def get_source_code_files(repo_path, language_config: dict[str, list[str]]
-------- --------
A list of (absolute_path, language) tuples for source code files. A list of (absolute_path, language) tuples for source code files.
""" """
def _get_language_from_extension(file, language_config): def _get_language_from_extension(file, language_config):
for lang, exts in language_config.items(): for lang, exts in language_config.items():
for ext in exts: for ext in exts:
@ -34,14 +35,14 @@ async def get_source_code_files(repo_path, language_config: dict[str, list[str]]
# Default config if not provided # Default config if not provided
if language_config is None: if language_config is None:
language_config = { language_config = {
'python': ['.py'], "python": [".py"],
'javascript': ['.js', '.jsx'], "javascript": [".js", ".jsx"],
'typescript': ['.ts', '.tsx'], "typescript": [".ts", ".tsx"],
'java': ['.java'], "java": [".java"],
'csharp': ['.cs'], "csharp": [".cs"],
'go': ['.go'], "go": [".go"],
'rust': ['.rs'], "rust": [".rs"],
'cpp': ['.cpp', '.c', '.h', '.hpp'], "cpp": [".cpp", ".c", ".h", ".hpp"],
} }
if not os.path.exists(repo_path): if not os.path.exists(repo_path):
@ -55,9 +56,17 @@ async def get_source_code_files(repo_path, language_config: dict[str, list[str]]
continue continue
# Exclude tests and common build/venv directories # Exclude tests and common build/venv directories
excluded_dirs = { excluded_dirs = {
".venv", "venv", "env", ".env", "site-packages", ".venv",
"node_modules", "dist", "build", ".git", "venv",
"tests", "test", "env",
".env",
"site-packages",
"node_modules",
"dist",
"build",
".git",
"tests",
"test",
} }
root_parts = set(os.path.normpath(root).split(os.sep)) root_parts = set(os.path.normpath(root).split(os.sep))
base_name, _ext = os.path.splitext(file) base_name, _ext = os.path.splitext(file)
@ -133,17 +142,19 @@ async def get_repo_file_dependencies(
# Build language config from supported_languages # Build language config from supported_languages
default_language_config = { default_language_config = {
'python': ['.py'], "python": [".py"],
'javascript': ['.js', '.jsx'], "javascript": [".js", ".jsx"],
'typescript': ['.ts', '.tsx'], "typescript": [".ts", ".tsx"],
'java': ['.java'], "java": [".java"],
'csharp': ['.cs'], "csharp": [".cs"],
'go': ['.go'], "go": [".go"],
'rust': ['.rs'], "rust": [".rs"],
'cpp': ['.cpp', '.c', '.h', '.hpp'], "cpp": [".cpp", ".c", ".h", ".hpp"],
} }
if supported_languages is not None: if supported_languages is not None:
language_config = {k: v for k, v in default_language_config.items() if k in supported_languages} language_config = {
k: v for k, v in default_language_config.items() if k in supported_languages
}
else: else:
language_config = default_language_config language_config = default_language_config
@ -175,12 +186,16 @@ async def get_repo_file_dependencies(
tasks = [] tasks = []
for file_path, lang in source_code_files[start_range : end_range + 1]: for file_path, lang in source_code_files[start_range : end_range + 1]:
# For now, only Python is supported; extend with other languages # For now, only Python is supported; extend with other languages
if lang == 'python': if lang == "python":
tasks.append(get_local_script_dependencies(repo_path, file_path, detailed_extraction)) tasks.append(
get_local_script_dependencies(repo_path, file_path, detailed_extraction)
)
else: else:
# Placeholder: create a minimal CodeFile for other languages # Placeholder: create a minimal CodeFile for other languages
async def make_codefile_stub(file_path=file_path, lang=lang): async def make_codefile_stub(file_path=file_path, lang=lang):
async with aiofiles.open(file_path, "r", encoding="utf-8", errors="replace") as f: async with aiofiles.open(
file_path, "r", encoding="utf-8", errors="replace"
) as f:
source = await f.read() source = await f.read()
return CodeFile( return CodeFile(
id=uuid5(NAMESPACE_OID, file_path), id=uuid5(NAMESPACE_OID, file_path),
@ -189,12 +204,15 @@ async def get_repo_file_dependencies(
language=lang, language=lang,
source_code=source, source_code=source,
) )
tasks.append(make_codefile_stub()) tasks.append(make_codefile_stub())
results: list[CodeFile] = await asyncio.gather(*tasks) results: list[CodeFile] = await asyncio.gather(*tasks)
for source_code_file in results: for source_code_file in results:
source_code_file.part_of = repo source_code_file.part_of = repo
if (getattr(source_code_file, 'language', None) is None and source_code_file.file_path.endswith('.py')): if getattr(
source_code_file.language = 'python' source_code_file, "language", None
) is None and source_code_file.file_path.endswith(".py"):
source_code_file.language = "python"
yield source_code_file yield source_code_file