refactor: Cog-547 code graph enrichment task clean
This commit is contained in:
parent
80b06c3acb
commit
8466764cbe
1 changed files with 55 additions and 37 deletions
|
|
@ -1,11 +1,8 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import cognee
|
||||
import json
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import numpy as np
|
||||
from networkx.classes.digraph import DiGraph
|
||||
|
||||
from cognee.modules.pipelines import Task, run_tasks
|
||||
from cognee.shared.CodeGraphEntities import CodeFile, CodeRelationship, Repository
|
||||
|
|
@ -16,49 +13,70 @@ from cognee.tasks.code.get_local_dependencies_checker import (
|
|||
from cognee.tasks.graph.convert_graph_from_code_graph import (
|
||||
convert_graph_from_code_graph,
|
||||
)
|
||||
from cognee.tasks.repo_processor.get_repo_dependency_graph import (
|
||||
get_repo_dependency_graph,
|
||||
)
|
||||
from cognee.tasks.repo_processor.enrich_dependency_graph import enrich_dependency_graph
|
||||
|
||||
from cognee.tasks.summarization import summarize_code
|
||||
from cognee.tasks.storage import index_data_points
|
||||
from cognee.tests.tasks.graph.code_graph_test_data_generation import (
|
||||
code_graph_test_data_generation,
|
||||
)
|
||||
|
||||
async def print_results(pipeline):
|
||||
async for result in pipeline:
|
||||
print(result)
|
||||
|
||||
async def write_results(repo, pipeline):
|
||||
output_dir = os.path.join(repo, "code_pipeline_output", "")
|
||||
os.makedirs(output_dir, exist_ok = True)
|
||||
async for code_files, summaries in pipeline:
|
||||
for summary in summaries:
|
||||
file_name = os.path.split(summary.made_from.extracted_id)[-1]
|
||||
relpath = os.path.join(*os.path.split(os.path.relpath(summary.made_from.extracted_id, repo))[:-1])
|
||||
output_dir2 = os.path.join(repo, "code_pipeline_output", relpath)
|
||||
os.makedirs(output_dir2, exist_ok=True)
|
||||
with open(os.path.join(output_dir2, file_name.replace(".py", ".json")), "w") as f:
|
||||
f.write(json.dumps({"summary": summary.text, "source_code": summary.made_from.source_code}))
|
||||
|
||||
async def reset_system():
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
return(True)
|
||||
def create_code_file(path, type):
|
||||
abspath = os.path.abspath(path)
|
||||
with open(abspath, "r") as f:
|
||||
source_code = f.read()
|
||||
code_file = CodeFile(extracted_id=abspath, type=type, source_code=source_code)
|
||||
return (code_file, abspath)
|
||||
|
||||
|
||||
async def get_local_script_dependencies_wrapper(script_path, repo_path):
|
||||
dependencies = await get_local_script_dependencies(script_path, repo_path)
|
||||
return (script_path, dependencies)
|
||||
|
||||
|
||||
async def scan_repo(path, condition):
|
||||
futures = []
|
||||
for root, dirs, files in os.walk(path):
|
||||
for file in files:
|
||||
if condition(file):
|
||||
futures.append(
|
||||
get_local_script_dependencies_wrapper(
|
||||
os.path.abspath(f"{root}/{file}"), path
|
||||
)
|
||||
)
|
||||
results = await asyncio.gather(*futures)
|
||||
|
||||
code_files = {}
|
||||
code_relationships = []
|
||||
for abspath, dependencies in results:
|
||||
code_file, abspath = create_code_file(abspath, "python_file")
|
||||
code_files[abspath] = code_file
|
||||
|
||||
for dependency in dependencies:
|
||||
dependency_code_file, dependency_abspath = create_code_file(
|
||||
dependency, "python_file"
|
||||
)
|
||||
code_files[dependency_abspath] = dependency_code_file
|
||||
code_relationship = CodeRelationship(
|
||||
source_id=abspath,
|
||||
target_id=dependency_abspath,
|
||||
type="files",
|
||||
relation="depends_on",
|
||||
)
|
||||
code_relationships.append(code_relationship)
|
||||
|
||||
return (Repository(path=path), list(code_files.values()), code_relationships)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
parser = argparse.ArgumentParser(description="Process a file path")
|
||||
parser.add_argument("path", help="Path to the file")
|
||||
|
||||
args = parser.parse_args()
|
||||
abspath = os.path.abspath(args.path)
|
||||
data = abspath
|
||||
tasks = [
|
||||
Task(get_repo_dependency_graph),
|
||||
Task(enrich_dependency_graph),
|
||||
Task(convert_graph_from_code_graph, repo_path = abspath),
|
||||
Task(index_data_points),
|
||||
Task(scan_repo),
|
||||
Task(convert_graph_from_code_graph),
|
||||
Task(summarize_code, summarization_model=SummarizedContent),
|
||||
]
|
||||
data = ("cognee", lambda file: file.endswith(".py"))
|
||||
pipeline = run_tasks(tasks, data, "cognify_pipeline")
|
||||
|
||||
asyncio.run(write_results(abspath, pipeline))
|
||||
asyncio.run(print_results(pipeline))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue