From 72e5b2bec877c8c8d4775a1ff780673604c6ac92 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 1 Sep 2025 17:48:50 +0200 Subject: [PATCH] feat: Initial memify commit --- cognee/api/v1/cognify/memify.py | 71 +++++++++++++++++++++++++ cognee/tasks/memify/__init__.py | 1 + cognee/tasks/memify/extract_subgraph.py | 2 + 3 files changed, 74 insertions(+) create mode 100644 cognee/api/v1/cognify/memify.py create mode 100644 cognee/tasks/memify/__init__.py create mode 100644 cognee/tasks/memify/extract_subgraph.py diff --git a/cognee/api/v1/cognify/memify.py b/cognee/api/v1/cognify/memify.py new file mode 100644 index 000000000..65a622af7 --- /dev/null +++ b/cognee/api/v1/cognify/memify.py @@ -0,0 +1,71 @@ +from pydantic import BaseModel +from typing import Union, Optional, List, Type +from uuid import UUID + +from cognee.shared.logging_utils import get_logger +from cognee.shared.data_models import KnowledgeGraph +from cognee.infrastructure.llm import get_max_chunk_tokens + +from cognee.modules.engine.models.node_set import NodeSet +from cognee.modules.pipelines import run_pipeline +from cognee.modules.pipelines.tasks.task import Task +from cognee.modules.chunking.TextChunker import TextChunker +from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.users.models import User + +from cognee.tasks.memify import extract_subgraph +from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor + +logger = get_logger("memify") + + +async def memify( + datasets: Union[str, list[str], list[UUID]] = None, + user: User = None, + tasks: List[Task] = None, + node_type: Optional[Type] = NodeSet, + node_name: Optional[List[str]] = None, + cypher_query: Optional[str] = None, + vector_db_config: dict = None, + graph_db_config: dict = None, + run_in_background: bool = False, +): + """ + Prerequisites: + - **LLM_API_KEY**: Must be configured (required for entity extraction and graph generation) + - **Data Added**: Must have data previously added via `cognee.add()` and `cognee.cognify()` + - **Vector Database**: Must be accessible for embeddings storage + - **Graph Database**: Must be accessible for relationship storage + + Args: + datasets: Dataset name(s) or dataset uuid to process. Processes all available data if None. + - Single dataset: "my_dataset" + - Multiple datasets: ["docs", "research", "reports"] + - None: Process all datasets for the user + user: User context for authentication and data access. Uses default if None. + vector_db_config: Custom vector database configuration for embeddings storage. + graph_db_config: Custom graph database configuration for relationship storage. + run_in_background: If True, starts processing asynchronously and returns immediately. + If False, waits for completion before returning. + Background mode recommended for large datasets (>100MB). + Use pipeline_run_id from return value to monitor progress. + """ + memify_tasks = [ + Task(extract_subgraph, cypher_query=cypher_query, node_type=node_type, node_name=node_name), + *tasks, # Unpack tasks provided to memify pipeline + ] + + # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for + pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background) + + # Run the run_pipeline in the background or blocking based on executor + return await pipeline_executor_func( + pipeline=run_pipeline, + tasks=memify_tasks, + user=user, + datasets=datasets, + vector_db_config=vector_db_config, + graph_db_config=graph_db_config, + incremental_loading=False, + pipeline_name="memify_pipeline", + ) diff --git a/cognee/tasks/memify/__init__.py b/cognee/tasks/memify/__init__.py new file mode 100644 index 000000000..a95e88794 --- /dev/null +++ b/cognee/tasks/memify/__init__.py @@ -0,0 +1 @@ +from extract_subgraph import extract_subgraph diff --git a/cognee/tasks/memify/extract_subgraph.py b/cognee/tasks/memify/extract_subgraph.py new file mode 100644 index 000000000..1cf7ab951 --- /dev/null +++ b/cognee/tasks/memify/extract_subgraph.py @@ -0,0 +1,2 @@ +async def extract_subgraph(): + pass