refactor: ruff format
This commit is contained in:
parent
d4ce0e18fd
commit
6ca46f1e53
2 changed files with 49 additions and 28 deletions
|
|
@ -40,11 +40,14 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
|
||||||
user = await get_default_user()
|
user = await get_default_user()
|
||||||
detailed_extraction = True
|
detailed_extraction = True
|
||||||
|
|
||||||
|
|
||||||
# Multi-language support: allow passing supported_languages
|
# Multi-language support: allow passing supported_languages
|
||||||
supported_languages = None # defer to task defaults
|
supported_languages = None # defer to task defaults
|
||||||
tasks = [
|
tasks = [
|
||||||
Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction, supported_languages=supported_languages),
|
Task(
|
||||||
|
get_repo_file_dependencies,
|
||||||
|
detailed_extraction=detailed_extraction,
|
||||||
|
supported_languages=supported_languages,
|
||||||
|
),
|
||||||
# Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
|
# Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
|
||||||
Task(add_data_points, task_config={"batch_size": 30}),
|
Task(add_data_points, task_config={"batch_size": 30}),
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ async def get_source_code_files(repo_path, language_config: dict[str, list[str]]
|
||||||
--------
|
--------
|
||||||
A list of (absolute_path, language) tuples for source code files.
|
A list of (absolute_path, language) tuples for source code files.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _get_language_from_extension(file, language_config):
|
def _get_language_from_extension(file, language_config):
|
||||||
for lang, exts in language_config.items():
|
for lang, exts in language_config.items():
|
||||||
for ext in exts:
|
for ext in exts:
|
||||||
|
|
@ -34,14 +35,14 @@ async def get_source_code_files(repo_path, language_config: dict[str, list[str]]
|
||||||
# Default config if not provided
|
# Default config if not provided
|
||||||
if language_config is None:
|
if language_config is None:
|
||||||
language_config = {
|
language_config = {
|
||||||
'python': ['.py'],
|
"python": [".py"],
|
||||||
'javascript': ['.js', '.jsx'],
|
"javascript": [".js", ".jsx"],
|
||||||
'typescript': ['.ts', '.tsx'],
|
"typescript": [".ts", ".tsx"],
|
||||||
'java': ['.java'],
|
"java": [".java"],
|
||||||
'csharp': ['.cs'],
|
"csharp": [".cs"],
|
||||||
'go': ['.go'],
|
"go": [".go"],
|
||||||
'rust': ['.rs'],
|
"rust": [".rs"],
|
||||||
'cpp': ['.cpp', '.c', '.h', '.hpp'],
|
"cpp": [".cpp", ".c", ".h", ".hpp"],
|
||||||
}
|
}
|
||||||
|
|
||||||
if not os.path.exists(repo_path):
|
if not os.path.exists(repo_path):
|
||||||
|
|
@ -55,9 +56,17 @@ async def get_source_code_files(repo_path, language_config: dict[str, list[str]]
|
||||||
continue
|
continue
|
||||||
# Exclude tests and common build/venv directories
|
# Exclude tests and common build/venv directories
|
||||||
excluded_dirs = {
|
excluded_dirs = {
|
||||||
".venv", "venv", "env", ".env", "site-packages",
|
".venv",
|
||||||
"node_modules", "dist", "build", ".git",
|
"venv",
|
||||||
"tests", "test",
|
"env",
|
||||||
|
".env",
|
||||||
|
"site-packages",
|
||||||
|
"node_modules",
|
||||||
|
"dist",
|
||||||
|
"build",
|
||||||
|
".git",
|
||||||
|
"tests",
|
||||||
|
"test",
|
||||||
}
|
}
|
||||||
root_parts = set(os.path.normpath(root).split(os.sep))
|
root_parts = set(os.path.normpath(root).split(os.sep))
|
||||||
base_name, _ext = os.path.splitext(file)
|
base_name, _ext = os.path.splitext(file)
|
||||||
|
|
@ -133,17 +142,19 @@ async def get_repo_file_dependencies(
|
||||||
|
|
||||||
# Build language config from supported_languages
|
# Build language config from supported_languages
|
||||||
default_language_config = {
|
default_language_config = {
|
||||||
'python': ['.py'],
|
"python": [".py"],
|
||||||
'javascript': ['.js', '.jsx'],
|
"javascript": [".js", ".jsx"],
|
||||||
'typescript': ['.ts', '.tsx'],
|
"typescript": [".ts", ".tsx"],
|
||||||
'java': ['.java'],
|
"java": [".java"],
|
||||||
'csharp': ['.cs'],
|
"csharp": [".cs"],
|
||||||
'go': ['.go'],
|
"go": [".go"],
|
||||||
'rust': ['.rs'],
|
"rust": [".rs"],
|
||||||
'cpp': ['.cpp', '.c', '.h', '.hpp'],
|
"cpp": [".cpp", ".c", ".h", ".hpp"],
|
||||||
}
|
}
|
||||||
if supported_languages is not None:
|
if supported_languages is not None:
|
||||||
language_config = {k: v for k, v in default_language_config.items() if k in supported_languages}
|
language_config = {
|
||||||
|
k: v for k, v in default_language_config.items() if k in supported_languages
|
||||||
|
}
|
||||||
else:
|
else:
|
||||||
language_config = default_language_config
|
language_config = default_language_config
|
||||||
|
|
||||||
|
|
@ -175,12 +186,16 @@ async def get_repo_file_dependencies(
|
||||||
tasks = []
|
tasks = []
|
||||||
for file_path, lang in source_code_files[start_range : end_range + 1]:
|
for file_path, lang in source_code_files[start_range : end_range + 1]:
|
||||||
# For now, only Python is supported; extend with other languages
|
# For now, only Python is supported; extend with other languages
|
||||||
if lang == 'python':
|
if lang == "python":
|
||||||
tasks.append(get_local_script_dependencies(repo_path, file_path, detailed_extraction))
|
tasks.append(
|
||||||
|
get_local_script_dependencies(repo_path, file_path, detailed_extraction)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
# Placeholder: create a minimal CodeFile for other languages
|
# Placeholder: create a minimal CodeFile for other languages
|
||||||
async def make_codefile_stub(file_path=file_path, lang=lang):
|
async def make_codefile_stub(file_path=file_path, lang=lang):
|
||||||
async with aiofiles.open(file_path, "r", encoding="utf-8", errors="replace") as f:
|
async with aiofiles.open(
|
||||||
|
file_path, "r", encoding="utf-8", errors="replace"
|
||||||
|
) as f:
|
||||||
source = await f.read()
|
source = await f.read()
|
||||||
return CodeFile(
|
return CodeFile(
|
||||||
id=uuid5(NAMESPACE_OID, file_path),
|
id=uuid5(NAMESPACE_OID, file_path),
|
||||||
|
|
@ -189,12 +204,15 @@ async def get_repo_file_dependencies(
|
||||||
language=lang,
|
language=lang,
|
||||||
source_code=source,
|
source_code=source,
|
||||||
)
|
)
|
||||||
|
|
||||||
tasks.append(make_codefile_stub())
|
tasks.append(make_codefile_stub())
|
||||||
|
|
||||||
results: list[CodeFile] = await asyncio.gather(*tasks)
|
results: list[CodeFile] = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
for source_code_file in results:
|
for source_code_file in results:
|
||||||
source_code_file.part_of = repo
|
source_code_file.part_of = repo
|
||||||
if (getattr(source_code_file, 'language', None) is None and source_code_file.file_path.endswith('.py')):
|
if getattr(
|
||||||
source_code_file.language = 'python'
|
source_code_file, "language", None
|
||||||
|
) is None and source_code_file.file_path.endswith(".py"):
|
||||||
|
source_code_file.language = "python"
|
||||||
yield source_code_file
|
yield source_code_file
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue