feat: Initial memify commit
This commit is contained in:
parent
940d4797bc
commit
72e5b2bec8
3 changed files with 74 additions and 0 deletions
71
cognee/api/v1/cognify/memify.py
Normal file
71
cognee/api/v1/cognify/memify.py
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from typing import Union, Optional, List, Type
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from cognee.shared.logging_utils import get_logger
|
||||||
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
|
from cognee.infrastructure.llm import get_max_chunk_tokens
|
||||||
|
|
||||||
|
from cognee.modules.engine.models.node_set import NodeSet
|
||||||
|
from cognee.modules.pipelines import run_pipeline
|
||||||
|
from cognee.modules.pipelines.tasks.task import Task
|
||||||
|
from cognee.modules.chunking.TextChunker import TextChunker
|
||||||
|
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||||
|
from cognee.modules.users.models import User
|
||||||
|
|
||||||
|
from cognee.tasks.memify import extract_subgraph
|
||||||
|
from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
|
||||||
|
|
||||||
|
logger = get_logger("memify")
|
||||||
|
|
||||||
|
|
||||||
|
async def memify(
|
||||||
|
datasets: Union[str, list[str], list[UUID]] = None,
|
||||||
|
user: User = None,
|
||||||
|
tasks: List[Task] = None,
|
||||||
|
node_type: Optional[Type] = NodeSet,
|
||||||
|
node_name: Optional[List[str]] = None,
|
||||||
|
cypher_query: Optional[str] = None,
|
||||||
|
vector_db_config: dict = None,
|
||||||
|
graph_db_config: dict = None,
|
||||||
|
run_in_background: bool = False,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Prerequisites:
|
||||||
|
- **LLM_API_KEY**: Must be configured (required for entity extraction and graph generation)
|
||||||
|
- **Data Added**: Must have data previously added via `cognee.add()` and `cognee.cognify()`
|
||||||
|
- **Vector Database**: Must be accessible for embeddings storage
|
||||||
|
- **Graph Database**: Must be accessible for relationship storage
|
||||||
|
|
||||||
|
Args:
|
||||||
|
datasets: Dataset name(s) or dataset uuid to process. Processes all available data if None.
|
||||||
|
- Single dataset: "my_dataset"
|
||||||
|
- Multiple datasets: ["docs", "research", "reports"]
|
||||||
|
- None: Process all datasets for the user
|
||||||
|
user: User context for authentication and data access. Uses default if None.
|
||||||
|
vector_db_config: Custom vector database configuration for embeddings storage.
|
||||||
|
graph_db_config: Custom graph database configuration for relationship storage.
|
||||||
|
run_in_background: If True, starts processing asynchronously and returns immediately.
|
||||||
|
If False, waits for completion before returning.
|
||||||
|
Background mode recommended for large datasets (>100MB).
|
||||||
|
Use pipeline_run_id from return value to monitor progress.
|
||||||
|
"""
|
||||||
|
memify_tasks = [
|
||||||
|
Task(extract_subgraph, cypher_query=cypher_query, node_type=node_type, node_name=node_name),
|
||||||
|
*tasks, # Unpack tasks provided to memify pipeline
|
||||||
|
]
|
||||||
|
|
||||||
|
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
|
||||||
|
pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
|
||||||
|
|
||||||
|
# Run the run_pipeline in the background or blocking based on executor
|
||||||
|
return await pipeline_executor_func(
|
||||||
|
pipeline=run_pipeline,
|
||||||
|
tasks=memify_tasks,
|
||||||
|
user=user,
|
||||||
|
datasets=datasets,
|
||||||
|
vector_db_config=vector_db_config,
|
||||||
|
graph_db_config=graph_db_config,
|
||||||
|
incremental_loading=False,
|
||||||
|
pipeline_name="memify_pipeline",
|
||||||
|
)
|
||||||
1
cognee/tasks/memify/__init__.py
Normal file
1
cognee/tasks/memify/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
from extract_subgraph import extract_subgraph
|
||||||
2
cognee/tasks/memify/extract_subgraph.py
Normal file
2
cognee/tasks/memify/extract_subgraph.py
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
async def extract_subgraph():
|
||||||
|
pass
|
||||||
Loading…
Add table
Reference in a new issue