COG-546 get_local_script_dependencies (#6)
A utility function, `get_local_script_dependencies`: - Extracts and resolves local dependencies of a Python script using `jedi` and `parso`. - Returns a sorted list of unique module paths - Optionally dependencies outside a specified repository path are filtered out - Includes an example/checker in `cognee/tasks/code`. Will be used for creating a graph from a repo.
This commit is contained in:
commit
a8aefd57ef
3 changed files with 149 additions and 0 deletions
20
cognee/tasks/code/get_local_dependencies_checker.py
Normal file
20
cognee/tasks/code/get_local_dependencies_checker.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
from cognee.tasks.repo_processor.get_local_dependencies import get_local_script_dependencies
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Get local script dependencies.")
|
||||
|
||||
# Suggested path: .../cognee/examples/python/simple_example.py
|
||||
parser.add_argument("script_path", type=str, help="Absolute path to the Python script file")
|
||||
|
||||
# Suggested path: .../cognee
|
||||
parser.add_argument("repo_path", type=str, help="Absolute path to the repository root")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
dependencies = asyncio.run(get_local_script_dependencies(args.script_path, args.repo_path))
|
||||
|
||||
print("Dependencies:")
|
||||
for dependency in dependencies:
|
||||
print(dependency)
|
||||
3
cognee/tasks/repo_processor/__init__.py
Normal file
3
cognee/tasks/repo_processor/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
import logging
|
||||
|
||||
logger = logging.getLogger("task:repo_processor")
|
||||
126
cognee/tasks/repo_processor/get_local_dependencies.py
Normal file
126
cognee/tasks/repo_processor/get_local_dependencies.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
import sys
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
import aiofiles
|
||||
import jedi
|
||||
import parso
|
||||
from parso.tree import BaseNode
|
||||
|
||||
from cognee.tasks.repo_processor import logger
|
||||
|
||||
|
||||
@contextmanager
|
||||
def add_sys_path(path):
|
||||
original_sys_path = sys.path.copy()
|
||||
sys.path.insert(0, path)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
sys.path = original_sys_path
|
||||
|
||||
|
||||
def _get_code_entities(node: parso.tree.NodeOrLeaf) -> List[Dict[str, any]]:
|
||||
"""
|
||||
Recursively extract code entities using parso.
|
||||
"""
|
||||
code_entity_list = []
|
||||
|
||||
if not hasattr(node, 'children'):
|
||||
return code_entity_list
|
||||
|
||||
name_nodes = (child for child in node.children if child.type == 'name')
|
||||
for name_node in name_nodes:
|
||||
code_entity = {
|
||||
'name': name_node.value,
|
||||
'line': name_node.start_pos[0],
|
||||
'column': name_node.start_pos[1]
|
||||
}
|
||||
code_entity_list.append(code_entity)
|
||||
|
||||
# Recursively process child nodes
|
||||
for child in node.children:
|
||||
code_entity_list.extend(_get_code_entities(child))
|
||||
|
||||
return code_entity_list
|
||||
|
||||
|
||||
def _update_code_entity(script: jedi.Script, code_entity: Dict[str, any]) -> None:
|
||||
"""
|
||||
Update a code_entity with (full_name, module_name, module_path) using Jedi
|
||||
"""
|
||||
try:
|
||||
results = script.goto(code_entity["line"], code_entity["column"], follow_imports=True)
|
||||
if results:
|
||||
result = results[0]
|
||||
code_entity["full_name"] = getattr(result, "full_name", None)
|
||||
code_entity["module_name"] = getattr(result, "module_name", None)
|
||||
code_entity["module_path"] = getattr(result, "module_path", None)
|
||||
except Exception as e:
|
||||
# logging.warning(f"Failed to analyze code entity {code_entity['name']}: {e}")
|
||||
logger.error(f"Failed to analyze code entity {code_entity['name']}: {e}")
|
||||
|
||||
|
||||
async def _extract_dependencies(script_path: str) -> List[str]:
|
||||
try:
|
||||
async with aiofiles.open(script_path, "r") as file:
|
||||
source_code = await file.read()
|
||||
except IOError as e:
|
||||
logger.error(f"Error opening {script_path}: {e}")
|
||||
return []
|
||||
|
||||
jedi.set_debug_function(lambda color, str_out: None)
|
||||
script = jedi.Script(code=source_code, path=script_path)
|
||||
|
||||
tree = parso.parse(source_code)
|
||||
code_entities = _get_code_entities(tree)
|
||||
|
||||
for code_entity in code_entities:
|
||||
_update_code_entity(script, code_entity)
|
||||
|
||||
module_paths = {
|
||||
entity.get("module_path")
|
||||
for entity in code_entities
|
||||
if entity.get("module_path") is not None
|
||||
}
|
||||
|
||||
str_paths = []
|
||||
for module_path in module_paths:
|
||||
try:
|
||||
str_paths.append(str(module_path))
|
||||
except Exception as e:
|
||||
logger.error(f"Error converting path to string: {e}")
|
||||
|
||||
return sorted(str_paths)
|
||||
|
||||
|
||||
async def get_local_script_dependencies(script_path: str, repo_path: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Extract and return a list of unique module paths that the script depends on.
|
||||
"""
|
||||
try:
|
||||
script_path = Path(script_path).resolve(strict=True)
|
||||
except (FileNotFoundError, PermissionError) as e:
|
||||
logger.error(f"Error resolving script path: {e}")
|
||||
return []
|
||||
|
||||
if not repo_path:
|
||||
return await _extract_dependencies(script_path)
|
||||
|
||||
try:
|
||||
repo_path = Path(repo_path).resolve(strict=True)
|
||||
except (FileNotFoundError, PermissionError) as e:
|
||||
logger.warning(f"Error resolving repo path: {e}. Proceeding without repo_path.")
|
||||
return await _extract_dependencies(script_path)
|
||||
|
||||
if not script_path.is_relative_to(repo_path):
|
||||
logger.warning(f"Script {script_path} not in repo {repo_path}. Proceeding without repo_path.")
|
||||
return await _extract_dependencies(script_path)
|
||||
|
||||
with add_sys_path(str(repo_path)):
|
||||
dependencies = await _extract_dependencies(script_path)
|
||||
|
||||
return [path for path in dependencies if path.startswith(str(repo_path))]
|
||||
Loading…
Add table
Reference in a new issue