diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index 95e56108e..b2a03ebf2 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -31,10 +31,10 @@ from cognee.tasks.chunk_extract_summary.chunk_extract_summary import chunk_extra from cognee.tasks.chunk_naive_llm_classifier.chunk_naive_llm_classifier import chunk_naive_llm_classifier_task from cognee.tasks.chunk_remove_disconnected.chunk_remove_disconnected import chunk_remove_disconnected_task from cognee.tasks.chunk_to_graph_decomposition.chunk_to_graph_decomposition import chunk_to_graph_decomposition_task -from cognee.tasks.chunk_to_vector_graphstore.chunk_to_vector_graphstore import chunk_to_vector_graphstore_task +from cognee.tasks.save_chunks_to_store.save_chunks_to_store import save_chunks_to_store_task from cognee.tasks.chunk_update_check.chunk_update_check import chunk_update_check_task -from cognee.tasks.graph_decomposition_to_graph_nodes.graph_decomposition_to_graph_nodes import \ - graph_decomposition_to_graph_nodes_task +from cognee.tasks.chunks_into_graph.chunks_into_graph import \ + chunks_into_graph_task from cognee.tasks.source_documents_to_chunks.source_documents_to_chunks import source_documents_to_chunks logger = logging.getLogger("cognify.v2") @@ -111,10 +111,10 @@ async def cognify(datasets: Union[str, list[str]] = None, user: User = None): tasks = [ Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type Task(chunk_to_graph_decomposition_task, topology_model = KnowledgeGraph, task_config = { "batch_size": 10 }), # Set the graph topology for the document chunk data - Task(graph_decomposition_to_graph_nodes_task, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes + Task(chunks_into_graph_task, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes Task(chunk_update_check_task, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks Task( - chunk_to_vector_graphstore_task, + save_chunks_to_store_task, collection_name = "chunks", ), # Save the document chunks in vector db and as nodes in graph db (connected to the document node and between each other) run_tasks_parallel([ diff --git a/cognee/tasks/__init__.py b/cognee/tasks/__init__.py new file mode 100644 index 000000000..4029b0c4f --- /dev/null +++ b/cognee/tasks/__init__.py @@ -0,0 +1,9 @@ +from . chunk_extract_summary import chunk_extract_summary +from . chunk_naive_llm_classifier import chunk_naive_llm_classifier +from . chunk_remove_disconnected import chunk_remove_disconnected +from . chunk_to_graph_decomposition import chunk_to_graph_decomposition +from . save_chunks_to_store import save_chunks_to_store +from . chunk_update_check import chunk_update_check +from . chunks_into_graph import chunks_into_graph +from . source_documents_to_chunks import source_documents_to_chunks + diff --git a/cognee/tasks/chunk_extract_summary/__init__.py b/cognee/tasks/chunk_extract_summary/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cognee/tasks/chunk_naive_llm_classifier/__init__.py b/cognee/tasks/chunk_naive_llm_classifier/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cognee/tasks/chunk_remove_disconnected/__init__.py b/cognee/tasks/chunk_remove_disconnected/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cognee/tasks/chunk_to_graph_decomposition/__init__.py b/cognee/tasks/chunk_to_graph_decomposition/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cognee/tasks/chunk_update_check/__init__.py b/cognee/tasks/chunk_update_check/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cognee/tasks/chunks_into_graph/__init__.py b/cognee/tasks/chunks_into_graph/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cognee/tasks/graph_decomposition_to_graph_nodes/graph_decomposition_to_graph_nodes.py b/cognee/tasks/chunks_into_graph/chunks_into_graph.py similarity index 98% rename from cognee/tasks/graph_decomposition_to_graph_nodes/graph_decomposition_to_graph_nodes.py rename to cognee/tasks/chunks_into_graph/chunks_into_graph.py index 2c7539e34..40747ffbe 100644 --- a/cognee/tasks/graph_decomposition_to_graph_nodes/graph_decomposition_to_graph_nodes.py +++ b/cognee/tasks/chunks_into_graph/chunks_into_graph.py @@ -18,7 +18,7 @@ class EntityNode(BaseModel): created_at: datetime updated_at: datetime -async def graph_decomposition_to_graph_nodes_task(data_chunks: list[DocumentChunk], graph_model: Type[BaseModel], collection_name: str): +async def chunks_into_graph_task(data_chunks: list[DocumentChunk], graph_model: Type[BaseModel], collection_name: str): chunk_graphs = await asyncio.gather( *[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks] ) diff --git a/cognee/tasks/chunk_to_vector_graphstore/chunk_to_vector_graphstore.py b/cognee/tasks/save_chunks_to_store/save_chunks_to_store.py similarity index 97% rename from cognee/tasks/chunk_to_vector_graphstore/chunk_to_vector_graphstore.py rename to cognee/tasks/save_chunks_to_store/save_chunks_to_store.py index aceb879f3..6ae4fb890 100644 --- a/cognee/tasks/chunk_to_vector_graphstore/chunk_to_vector_graphstore.py +++ b/cognee/tasks/save_chunks_to_store/save_chunks_to_store.py @@ -2,7 +2,7 @@ from cognee.infrastructure.databases.vector import DataPoint, get_vector_engine from cognee.infrastructure.databases.graph import get_graph_engine from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk -async def chunk_to_vector_graphstore_task(data_chunks: list[DocumentChunk], collection_name: str): +async def save_chunks_to_store_task(data_chunks: list[DocumentChunk], collection_name: str): if len(data_chunks) == 0: return data_chunks diff --git a/cognee/tasks/source_documents_to_chunks/__init__.py b/cognee/tasks/source_documents_to_chunks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pyproject.toml b/pyproject.toml index 489348fa7..53c5681bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,10 @@ weaviate = ["weaviate-client"] qdrant = ["qdrant-client"] neo4j = ["neo4j", "py2neo"] notebook = ["ipykernel","overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"] +langchain = ["langfuse"] + + +poetry install cognee --extras=langchain [tool.poetry.group.dev.dependencies] pytest = "^7.4.0"