feat: Initial memify commit

2025-09-01 17:48:50 +02:00 · 2025-09-01 17:48:50 +02:00 · 72e5b2bec8
commit 72e5b2bec8
parent 940d4797bc
3 changed files with 74 additions and 0 deletions
--- a/cognee/api/v1/cognify/memify.py
+++ b/cognee/api/v1/cognify/memify.py
@ -0,0 +1,71 @@
+from pydantic import BaseModel
+from typing import Union, Optional, List, Type
+from uuid import UUID
+
+from cognee.shared.logging_utils import get_logger
+from cognee.shared.data_models import KnowledgeGraph
+from cognee.infrastructure.llm import get_max_chunk_tokens
+
+from cognee.modules.engine.models.node_set import NodeSet
+from cognee.modules.pipelines import run_pipeline
+from cognee.modules.pipelines.tasks.task import Task
+from cognee.modules.chunking.TextChunker import TextChunker
+from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
+from cognee.modules.users.models import User
+
+from cognee.tasks.memify import extract_subgraph
+from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
+
+logger = get_logger("memify")
+
+
+async def memify(
+    datasets: Union[str, list[str], list[UUID]] = None,
+    user: User = None,
+    tasks: List[Task] = None,
+    node_type: Optional[Type] = NodeSet,
+    node_name: Optional[List[str]] = None,
+    cypher_query: Optional[str] = None,
+    vector_db_config: dict = None,
+    graph_db_config: dict = None,
+    run_in_background: bool = False,
+):
+    """
+    Prerequisites:
+        - **LLM_API_KEY**: Must be configured (required for entity extraction and graph generation)
+        - **Data Added**: Must have data previously added via `cognee.add()` and `cognee.cognify()`
+        - **Vector Database**: Must be accessible for embeddings storage
+        - **Graph Database**: Must be accessible for relationship storage
+
+    Args:
+        datasets: Dataset name(s) or dataset uuid to process. Processes all available data if None.
+            - Single dataset: "my_dataset"
+            - Multiple datasets: ["docs", "research", "reports"]
+            - None: Process all datasets for the user
+        user: User context for authentication and data access. Uses default if None.
+        vector_db_config: Custom vector database configuration for embeddings storage.
+        graph_db_config: Custom graph database configuration for relationship storage.
+        run_in_background: If True, starts processing asynchronously and returns immediately.
+                          If False, waits for completion before returning.
+                          Background mode recommended for large datasets (>100MB).
+                          Use pipeline_run_id from return value to monitor progress.
+    """
+    memify_tasks = [
+        Task(extract_subgraph, cypher_query=cypher_query, node_type=node_type, node_name=node_name),
+        *tasks,  # Unpack tasks provided to memify pipeline
+    ]
+
+    # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
+    pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
+
+    # Run the run_pipeline in the background or blocking based on executor
+    return await pipeline_executor_func(
+        pipeline=run_pipeline,
+        tasks=memify_tasks,
+        user=user,
+        datasets=datasets,
+        vector_db_config=vector_db_config,
+        graph_db_config=graph_db_config,
+        incremental_loading=False,
+        pipeline_name="memify_pipeline",
+    )
--- a/cognee/tasks/memify/init.py
+++ b/cognee/tasks/memify/init.py
@ -0,0 +1 @@
+from extract_subgraph import extract_subgraph
--- a/cognee/tasks/memify/extract_subgraph.py
+++ b/cognee/tasks/memify/extract_subgraph.py
@ -0,0 +1,2 @@
+async def extract_subgraph():
+    pass
				`@ -0,0 +1 @@`
				`from extract_subgraph import extract_subgraph`