diff --git a/examples/python/code_graph_pipeline.py b/examples/python/code_graph_pipeline.py index e4fe29840..221ff8385 100644 --- a/examples/python/code_graph_pipeline.py +++ b/examples/python/code_graph_pipeline.py @@ -1,11 +1,8 @@ -import argparse import asyncio import os -import cognee -import json +from uuid import UUID, uuid4 import numpy as np -from networkx.classes.digraph import DiGraph from cognee.modules.pipelines import Task, run_tasks from cognee.shared.CodeGraphEntities import CodeFile, CodeRelationship, Repository @@ -16,49 +13,70 @@ from cognee.tasks.code.get_local_dependencies_checker import ( from cognee.tasks.graph.convert_graph_from_code_graph import ( convert_graph_from_code_graph, ) -from cognee.tasks.repo_processor.get_repo_dependency_graph import ( - get_repo_dependency_graph, -) -from cognee.tasks.repo_processor.enrich_dependency_graph import enrich_dependency_graph + from cognee.tasks.summarization import summarize_code -from cognee.tasks.storage import index_data_points +from cognee.tests.tasks.graph.code_graph_test_data_generation import ( + code_graph_test_data_generation, +) async def print_results(pipeline): async for result in pipeline: print(result) -async def write_results(repo, pipeline): - output_dir = os.path.join(repo, "code_pipeline_output", "") - os.makedirs(output_dir, exist_ok = True) - async for code_files, summaries in pipeline: - for summary in summaries: - file_name = os.path.split(summary.made_from.extracted_id)[-1] - relpath = os.path.join(*os.path.split(os.path.relpath(summary.made_from.extracted_id, repo))[:-1]) - output_dir2 = os.path.join(repo, "code_pipeline_output", relpath) - os.makedirs(output_dir2, exist_ok=True) - with open(os.path.join(output_dir2, file_name.replace(".py", ".json")), "w") as f: - f.write(json.dumps({"summary": summary.text, "source_code": summary.made_from.source_code})) -async def reset_system(): - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - return(True) +def create_code_file(path, type): + abspath = os.path.abspath(path) + with open(abspath, "r") as f: + source_code = f.read() + code_file = CodeFile(extracted_id=abspath, type=type, source_code=source_code) + return (code_file, abspath) + + +async def get_local_script_dependencies_wrapper(script_path, repo_path): + dependencies = await get_local_script_dependencies(script_path, repo_path) + return (script_path, dependencies) + + +async def scan_repo(path, condition): + futures = [] + for root, dirs, files in os.walk(path): + for file in files: + if condition(file): + futures.append( + get_local_script_dependencies_wrapper( + os.path.abspath(f"{root}/{file}"), path + ) + ) + results = await asyncio.gather(*futures) + + code_files = {} + code_relationships = [] + for abspath, dependencies in results: + code_file, abspath = create_code_file(abspath, "python_file") + code_files[abspath] = code_file + + for dependency in dependencies: + dependency_code_file, dependency_abspath = create_code_file( + dependency, "python_file" + ) + code_files[dependency_abspath] = dependency_code_file + code_relationship = CodeRelationship( + source_id=abspath, + target_id=dependency_abspath, + type="files", + relation="depends_on", + ) + code_relationships.append(code_relationship) + + return (Repository(path=path), list(code_files.values()), code_relationships) + if __name__ == "__main__": - - parser = argparse.ArgumentParser(description="Process a file path") - parser.add_argument("path", help="Path to the file") - - args = parser.parse_args() - abspath = os.path.abspath(args.path) - data = abspath tasks = [ - Task(get_repo_dependency_graph), - Task(enrich_dependency_graph), - Task(convert_graph_from_code_graph, repo_path = abspath), - Task(index_data_points), + Task(scan_repo), + Task(convert_graph_from_code_graph), Task(summarize_code, summarization_model=SummarizedContent), ] + data = ("cognee", lambda file: file.endswith(".py")) pipeline = run_tasks(tasks, data, "cognify_pipeline") - - asyncio.run(write_results(abspath, pipeline)) + asyncio.run(print_results(pipeline))