Task updates and updates to SQLAlchemy Adapter

This commit is contained in:
Vasilije 2024-08-07 13:29:53 +02:00
parent a782286b93
commit 557014e06b
12 changed files with 20 additions and 7 deletions

View file

@ -31,10 +31,10 @@ from cognee.tasks.chunk_extract_summary.chunk_extract_summary import chunk_extra
from cognee.tasks.chunk_naive_llm_classifier.chunk_naive_llm_classifier import chunk_naive_llm_classifier_task
from cognee.tasks.chunk_remove_disconnected.chunk_remove_disconnected import chunk_remove_disconnected_task
from cognee.tasks.chunk_to_graph_decomposition.chunk_to_graph_decomposition import chunk_to_graph_decomposition_task
from cognee.tasks.chunk_to_vector_graphstore.chunk_to_vector_graphstore import chunk_to_vector_graphstore_task
from cognee.tasks.save_chunks_to_store.save_chunks_to_store import save_chunks_to_store_task
from cognee.tasks.chunk_update_check.chunk_update_check import chunk_update_check_task
from cognee.tasks.graph_decomposition_to_graph_nodes.graph_decomposition_to_graph_nodes import \
graph_decomposition_to_graph_nodes_task
from cognee.tasks.chunks_into_graph.chunks_into_graph import \
chunks_into_graph_task
from cognee.tasks.source_documents_to_chunks.source_documents_to_chunks import source_documents_to_chunks
logger = logging.getLogger("cognify.v2")
@ -111,10 +111,10 @@ async def cognify(datasets: Union[str, list[str]] = None, user: User = None):
tasks = [
Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type
Task(chunk_to_graph_decomposition_task, topology_model = KnowledgeGraph, task_config = { "batch_size": 10 }), # Set the graph topology for the document chunk data
Task(graph_decomposition_to_graph_nodes_task, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes
Task(chunks_into_graph_task, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes
Task(chunk_update_check_task, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks
Task(
chunk_to_vector_graphstore_task,
save_chunks_to_store_task,
collection_name = "chunks",
), # Save the document chunks in vector db and as nodes in graph db (connected to the document node and between each other)
run_tasks_parallel([

9
cognee/tasks/__init__.py Normal file
View file

@ -0,0 +1,9 @@
from . chunk_extract_summary import chunk_extract_summary
from . chunk_naive_llm_classifier import chunk_naive_llm_classifier
from . chunk_remove_disconnected import chunk_remove_disconnected
from . chunk_to_graph_decomposition import chunk_to_graph_decomposition
from . save_chunks_to_store import save_chunks_to_store
from . chunk_update_check import chunk_update_check
from . chunks_into_graph import chunks_into_graph
from . source_documents_to_chunks import source_documents_to_chunks

View file

@ -18,7 +18,7 @@ class EntityNode(BaseModel):
created_at: datetime
updated_at: datetime
async def graph_decomposition_to_graph_nodes_task(data_chunks: list[DocumentChunk], graph_model: Type[BaseModel], collection_name: str):
async def chunks_into_graph_task(data_chunks: list[DocumentChunk], graph_model: Type[BaseModel], collection_name: str):
chunk_graphs = await asyncio.gather(
*[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
)

View file

@ -2,7 +2,7 @@ from cognee.infrastructure.databases.vector import DataPoint, get_vector_engine
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk
async def chunk_to_vector_graphstore_task(data_chunks: list[DocumentChunk], collection_name: str):
async def save_chunks_to_store_task(data_chunks: list[DocumentChunk], collection_name: str):
if len(data_chunks) == 0:
return data_chunks

View file

@ -83,6 +83,10 @@ weaviate = ["weaviate-client"]
qdrant = ["qdrant-client"]
neo4j = ["neo4j", "py2neo"]
notebook = ["ipykernel","overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"]
langchain = ["langfuse"]
poetry install cognee --extras=langchain
[tool.poetry.group.dev.dependencies]
pytest = "^7.4.0"