From 72e5b2bec877c8c8d4775a1ff780673604c6ac92 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 1 Sep 2025 17:48:50 +0200 Subject: [PATCH 01/19] feat: Initial memify commit --- cognee/api/v1/cognify/memify.py | 71 +++++++++++++++++++++++++ cognee/tasks/memify/__init__.py | 1 + cognee/tasks/memify/extract_subgraph.py | 2 + 3 files changed, 74 insertions(+) create mode 100644 cognee/api/v1/cognify/memify.py create mode 100644 cognee/tasks/memify/__init__.py create mode 100644 cognee/tasks/memify/extract_subgraph.py diff --git a/cognee/api/v1/cognify/memify.py b/cognee/api/v1/cognify/memify.py new file mode 100644 index 000000000..65a622af7 --- /dev/null +++ b/cognee/api/v1/cognify/memify.py @@ -0,0 +1,71 @@ +from pydantic import BaseModel +from typing import Union, Optional, List, Type +from uuid import UUID + +from cognee.shared.logging_utils import get_logger +from cognee.shared.data_models import KnowledgeGraph +from cognee.infrastructure.llm import get_max_chunk_tokens + +from cognee.modules.engine.models.node_set import NodeSet +from cognee.modules.pipelines import run_pipeline +from cognee.modules.pipelines.tasks.task import Task +from cognee.modules.chunking.TextChunker import TextChunker +from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver +from cognee.modules.users.models import User + +from cognee.tasks.memify import extract_subgraph +from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor + +logger = get_logger("memify") + + +async def memify( + datasets: Union[str, list[str], list[UUID]] = None, + user: User = None, + tasks: List[Task] = None, + node_type: Optional[Type] = NodeSet, + node_name: Optional[List[str]] = None, + cypher_query: Optional[str] = None, + vector_db_config: dict = None, + graph_db_config: dict = None, + run_in_background: bool = False, +): + """ + Prerequisites: + - **LLM_API_KEY**: Must be configured (required for entity extraction and graph generation) + - **Data Added**: Must have data previously added via `cognee.add()` and `cognee.cognify()` + - **Vector Database**: Must be accessible for embeddings storage + - **Graph Database**: Must be accessible for relationship storage + + Args: + datasets: Dataset name(s) or dataset uuid to process. Processes all available data if None. + - Single dataset: "my_dataset" + - Multiple datasets: ["docs", "research", "reports"] + - None: Process all datasets for the user + user: User context for authentication and data access. Uses default if None. + vector_db_config: Custom vector database configuration for embeddings storage. + graph_db_config: Custom graph database configuration for relationship storage. + run_in_background: If True, starts processing asynchronously and returns immediately. + If False, waits for completion before returning. + Background mode recommended for large datasets (>100MB). + Use pipeline_run_id from return value to monitor progress. + """ + memify_tasks = [ + Task(extract_subgraph, cypher_query=cypher_query, node_type=node_type, node_name=node_name), + *tasks, # Unpack tasks provided to memify pipeline + ] + + # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for + pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background) + + # Run the run_pipeline in the background or blocking based on executor + return await pipeline_executor_func( + pipeline=run_pipeline, + tasks=memify_tasks, + user=user, + datasets=datasets, + vector_db_config=vector_db_config, + graph_db_config=graph_db_config, + incremental_loading=False, + pipeline_name="memify_pipeline", + ) diff --git a/cognee/tasks/memify/__init__.py b/cognee/tasks/memify/__init__.py new file mode 100644 index 000000000..a95e88794 --- /dev/null +++ b/cognee/tasks/memify/__init__.py @@ -0,0 +1 @@ +from extract_subgraph import extract_subgraph diff --git a/cognee/tasks/memify/extract_subgraph.py b/cognee/tasks/memify/extract_subgraph.py new file mode 100644 index 000000000..1cf7ab951 --- /dev/null +++ b/cognee/tasks/memify/extract_subgraph.py @@ -0,0 +1,2 @@ +async def extract_subgraph(): + pass From 0bf8abcc6fb9491b03c27b7c6f3cea1753004a29 Mon Sep 17 00:00:00 2001 From: Hande <159312713+hande-k@users.noreply.github.com> Date: Tue, 2 Sep 2025 11:18:22 +0300 Subject: [PATCH 02/19] fix: add fix to low level example --- cognee/modules/data/methods/load_or_create_datasets.py | 2 +- examples/low_level/pipeline.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/modules/data/methods/load_or_create_datasets.py b/cognee/modules/data/methods/load_or_create_datasets.py index 1d6ef3efb..2c9a6497c 100644 --- a/cognee/modules/data/methods/load_or_create_datasets.py +++ b/cognee/modules/data/methods/load_or_create_datasets.py @@ -2,7 +2,7 @@ from typing import List, Union from uuid import UUID from cognee.modules.data.models import Dataset -from cognee.modules.data.methods import create_authorized_dataset +from cognee.modules.data.methods.create_authorized_dataset import create_authorized_dataset from cognee.modules.data.exceptions import DatasetNotFoundError diff --git a/examples/low_level/pipeline.py b/examples/low_level/pipeline.py index 804e42ff7..085d313a7 100644 --- a/examples/low_level/pipeline.py +++ b/examples/low_level/pipeline.py @@ -73,7 +73,7 @@ def ingest_files(data: List[Any]): new_company.departments.append(departments_data_points[department_name]) - return companies_data_points.values() + return list(companies_data_points.values()) async def main(): From 195e05a544ea5fe62cf92a767cd0ce0dc876fdd4 Mon Sep 17 00:00:00 2001 From: Hande <159312713+hande-k@users.noreply.github.com> Date: Tue, 2 Sep 2025 11:41:26 +0300 Subject: [PATCH 03/19] fix: add fix to starter-kit low level --- cognee-starter-kit/src/pipelines/low_level.py | 72 ++++++++++--------- 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/cognee-starter-kit/src/pipelines/low_level.py b/cognee-starter-kit/src/pipelines/low_level.py index 4c4c9d6da..8b4fccf33 100644 --- a/cognee-starter-kit/src/pipelines/low_level.py +++ b/cognee-starter-kit/src/pipelines/low_level.py @@ -1,14 +1,15 @@ import os -import uuid import json import asyncio import pathlib +from typing import List, Any from cognee import config, prune, search, SearchType, visualize_graph from cognee.low_level import setup, DataPoint from cognee.pipelines import run_tasks, Task from cognee.tasks.storage import add_data_points from cognee.tasks.storage.index_graph_edges import index_graph_edges from cognee.modules.users.methods import get_default_user +from cognee.modules.data.methods import load_or_create_datasets class Person(DataPoint): @@ -33,45 +34,51 @@ class Company(DataPoint): metadata: dict = {"index_fields": ["name"]} -def ingest_files(): - companies_file_path = os.path.join(os.path.dirname(__file__), "../data/companies.json") - companies = json.loads(open(companies_file_path, "r").read()) +def ingest_files(data: List[Any]): + if not data or data == [None]: + companies_file_path = os.path.join(os.path.dirname(__file__), "../data/companies.json") + companies = json.loads(open(companies_file_path, "r").read()) - people_file_path = os.path.join(os.path.dirname(__file__), "../data/people.json") - people = json.loads(open(people_file_path, "r").read()) + people_file_path = os.path.join(os.path.dirname(__file__), "../data/people.json") + people = json.loads(open(people_file_path, "r").read()) + + data = [{"companies": companies, "people": people}] people_data_points = {} departments_data_points = {} - - for person in people: - new_person = Person(name=person["name"]) - people_data_points[person["name"]] = new_person - - if person["department"] not in departments_data_points: - departments_data_points[person["department"]] = Department( - name=person["department"], employees=[new_person] - ) - else: - departments_data_points[person["department"]].employees.append(new_person) - companies_data_points = {} - # Create a single CompanyType node, so we connect all companies to it. - companyType = CompanyType() + for data_item in data: + people = data_item["people"] + companies = data_item["companies"] - for company in companies: - new_company = Company(name=company["name"], departments=[], is_type=companyType) - companies_data_points[company["name"]] = new_company + for person in people: + new_person = Person(name=person["name"]) + people_data_points[person["name"]] = new_person - for department_name in company["departments"]: - if department_name not in departments_data_points: - departments_data_points[department_name] = Department( - name=department_name, employees=[] + if person["department"] not in departments_data_points: + departments_data_points[person["department"]] = Department( + name=person["department"], employees=[new_person] ) + else: + departments_data_points[person["department"]].employees.append(new_person) - new_company.departments.append(departments_data_points[department_name]) + # Create a single CompanyType node, so we connect all companies to it. + companyType = CompanyType() - return companies_data_points.values() + for company in companies: + new_company = Company(name=company["name"], departments=[], is_type=companyType) + companies_data_points[company["name"]] = new_company + + for department_name in company["departments"]: + if department_name not in departments_data_points: + departments_data_points[department_name] = Department( + name=department_name, employees=[] + ) + + new_company.departments.append(departments_data_points[department_name]) + + return list(companies_data_points.values()) async def main(): @@ -86,16 +93,17 @@ async def main(): await setup() - # Generate a random dataset_id - dataset_id = uuid.uuid4() + # Get default user user = await get_default_user() + datasets = await load_or_create_datasets(["demo_dataset"], [], user) + pipeline = run_tasks( [ Task(ingest_files), Task(add_data_points), ], - dataset_id, + datasets[0].id, None, user, "demo_pipeline", From af084af70fe8fc940aacea27f16cd400611932e0 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 2 Sep 2025 21:32:09 +0200 Subject: [PATCH 04/19] feat: Memify pipeline initial commit --- cognee/api/v1/add/add.py | 4 +- cognee/api/v1/cognify/memify.py | 48 +++++-- ...y_coding_rule_association_agent_system.txt | 6 + ...ify_coding_rule_association_agent_user.txt | 6 + .../modules/graph/cognee_graph/CogneeGraph.py | 69 ++++++++++ .../reset_dataset_pipeline_run_status.py | 22 +++- .../modules/pipelines/operations/pipeline.py | 1 + cognee/tasks/codingagents/__init__.py | 0 .../codingagents/coding_rule_associations.py | 124 ++++++++++++++++++ cognee/tasks/memify/__init__.py | 2 +- cognee/tasks/memify/extract_subgraph.py | 9 +- 11 files changed, 275 insertions(+), 16 deletions(-) create mode 100644 cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_system.txt create mode 100644 cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_user.txt create mode 100644 cognee/tasks/codingagents/__init__.py create mode 100644 cognee/tasks/codingagents/coding_rule_associations.py diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py index 98771947c..eeb867984 100644 --- a/cognee/api/v1/add/add.py +++ b/cognee/api/v1/add/add.py @@ -150,7 +150,9 @@ async def add( user, authorized_dataset = await resolve_authorized_user_dataset(dataset_id, dataset_name, user) - await reset_dataset_pipeline_run_status(authorized_dataset.id, user) + await reset_dataset_pipeline_run_status( + authorized_dataset.id, user, pipeline_names=["add_pipeline", "cognify_pipeline"] + ) pipeline_run_info = None diff --git a/cognee/api/v1/cognify/memify.py b/cognee/api/v1/cognify/memify.py index 65a622af7..7e35ef5dc 100644 --- a/cognee/api/v1/cognify/memify.py +++ b/cognee/api/v1/cognify/memify.py @@ -1,28 +1,33 @@ -from pydantic import BaseModel from typing import Union, Optional, List, Type from uuid import UUID from cognee.shared.logging_utils import get_logger -from cognee.shared.data_models import KnowledgeGraph -from cognee.infrastructure.llm import get_max_chunk_tokens +from cognee.modules.retrieval.utils.brute_force_triplet_search import get_memory_fragment +from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines.tasks.task import Task -from cognee.modules.chunking.TextChunker import TextChunker -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver from cognee.modules.users.models import User +from cognee.modules.pipelines.layers.resolve_authorized_user_datasets import ( + resolve_authorized_user_datasets, +) +from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import ( + reset_dataset_pipeline_run_status, +) +from cognee.modules.engine.operations.setup import setup -from cognee.tasks.memify import extract_subgraph +from cognee.tasks.memify.extract_subgraph import extract_subgraph +from cognee.tasks.codingagents.coding_rule_associations import add_rule_associations from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor logger = get_logger("memify") async def memify( + tasks: List[Task], datasets: Union[str, list[str], list[UUID]] = None, user: User = None, - tasks: List[Task] = None, node_type: Optional[Type] = NodeSet, node_name: Optional[List[str]] = None, cypher_query: Optional[str] = None, @@ -50,11 +55,35 @@ async def memify( Background mode recommended for large datasets (>100MB). Use pipeline_run_id from return value to monitor progress. """ + + if cypher_query: + pass + else: + memory_fragment = await get_memory_fragment(node_type=node_type, node_name=node_name) + # List of edges should be a single element in the list to represent one data item + data = [memory_fragment.edges] + memify_tasks = [ - Task(extract_subgraph, cypher_query=cypher_query, node_type=node_type, node_name=node_name), - *tasks, # Unpack tasks provided to memify pipeline + Task(extract_subgraph), + Task(CogneeGraph.resolve_edges_to_text, task_config={"batch_size": 10}), + Task( + add_rule_associations, + rules_nodeset_name="coding_agent_rules", + user_prompt_location="memify_coding_rule_association_agent_user.txt", + system_prompt_location="memify_coding_rule_association_agent_system.txt", + ), + # *tasks, # Unpack tasks provided to memify pipeline ] + await setup() + + user, authorized_datasets = await resolve_authorized_user_datasets(datasets, user) + + for dataset in authorized_datasets: + await reset_dataset_pipeline_run_status( + dataset.id, user, pipeline_names=["memify_pipeline"] + ) + # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background) @@ -63,6 +92,7 @@ async def memify( pipeline=run_pipeline, tasks=memify_tasks, user=user, + data=data, datasets=datasets, vector_db_config=vector_db_config, graph_db_config=graph_db_config, diff --git a/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_system.txt b/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_system.txt new file mode 100644 index 000000000..31c9825bd --- /dev/null +++ b/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_system.txt @@ -0,0 +1,6 @@ +You are an association agent tasked with suggesting structured developer rules from user-agent interactions stored in a Knowledge Graph. +You will receive the actual user agent interaction as a set of relationships from a knowledge graph separated by \n---\n each represented as node1 -- relation -- node2 triplet, and the list of the already existing developer rules. +Each rule represents a single best practice or guideline the agent should follow in the future. +Suggest rules that are general and not specific to the current text, strictly technical, add value and improve the future agent behavior. +Do not suggest rules similar to the existing ones or rules that are not general and dont add value. +It is acceptable to return an empty rule list. diff --git a/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_user.txt b/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_user.txt new file mode 100644 index 000000000..9b525c625 --- /dev/null +++ b/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_user.txt @@ -0,0 +1,6 @@ +**Here is the User-agent interaction context provided with a set of relationships from a knowledge graph separated by \n---\n each represented as node1 -- relation -- node2 triplet:** +`{{ chat }}` + + +**Already existing rules:** +`{{ rules }}` diff --git a/cognee/modules/graph/cognee_graph/CogneeGraph.py b/cognee/modules/graph/cognee_graph/CogneeGraph.py index 924532ce0..94a8e965e 100644 --- a/cognee/modules/graph/cognee_graph/CogneeGraph.py +++ b/cognee/modules/graph/cognee_graph/CogneeGraph.py @@ -188,3 +188,72 @@ class CogneeGraph(CogneeAbstractGraph): return n1 + n2 + e return heapq.nsmallest(k, self.edges, key=score) + + @staticmethod + async def resolve_edges_to_text(retrieved_edges: list) -> str: + """ + Converts retrieved graph edges into a human-readable string format. + + Parameters: + ----------- + + - retrieved_edges (list): A list of edges retrieved from the graph. + + Returns: + -------- + + - str: A formatted string representation of the nodes and their connections. + """ + + def _get_nodes(retrieved_edges: list) -> dict: + def _get_title(text: str, first_n_words: int = 7, top_n_words: int = 3) -> str: + def _top_n_words(text, stop_words=None, top_n=3, separator=", "): + """Concatenates the top N frequent words in text.""" + if stop_words is None: + from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS + + stop_words = DEFAULT_STOP_WORDS + + import string + + words = [word.lower().strip(string.punctuation) for word in text.split()] + + if stop_words: + words = [word for word in words if word and word not in stop_words] + + from collections import Counter + + top_words = [word for word, freq in Counter(words).most_common(top_n)] + + return separator.join(top_words) + + """Creates a title, by combining first words with most frequent words from the text.""" + first_n_words = text.split()[:first_n_words] + top_n_words = _top_n_words(text, top_n=top_n_words) + return f"{' '.join(first_n_words)}... [{top_n_words}]" + + """Creates a dictionary of nodes with their names and content.""" + nodes = {} + for edge in retrieved_edges: + for node in (edge.node1, edge.node2): + if node.id not in nodes: + text = node.attributes.get("text") + if text: + name = _get_title(text) + content = text + else: + name = node.attributes.get("name", "Unnamed Node") + content = node.attributes.get("description", name) + nodes[node.id] = {"node": node, "name": name, "content": content} + return nodes + + nodes = _get_nodes(retrieved_edges) + node_section = "\n".join( + f"Node: {info['name']}\n__node_content_start__\n{info['content']}\n__node_content_end__\n" + for info in nodes.values() + ) + connection_section = "\n".join( + f"{nodes[edge.node1.id]['name']} --[{edge.attributes['relationship_type']}]--> {nodes[edge.node2.id]['name']}" + for edge in retrieved_edges + ) + return f"Nodes:\n{node_section}\n\nConnections:\n{connection_section}" diff --git a/cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py b/cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py index cc72a6e51..bc59f9a6b 100644 --- a/cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +++ b/cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py @@ -1,12 +1,28 @@ from uuid import UUID +from typing import Optional, List + from cognee.modules.pipelines.methods import get_pipeline_runs_by_dataset, reset_pipeline_run_status from cognee.modules.pipelines.models.PipelineRun import PipelineRunStatus from cognee.modules.users.models import User -async def reset_dataset_pipeline_run_status(dataset_id: UUID, user: User): +async def reset_dataset_pipeline_run_status( + dataset_id: UUID, user: User, pipeline_names: Optional[list[str]] = None +): + """Reset the status of all (or selected) pipeline runs for a dataset. + + If *pipeline_names* is given, only runs whose *pipeline_name* is in + that list are touched. + """ related_pipeline_runs = await get_pipeline_runs_by_dataset(dataset_id) for pipeline_run in related_pipeline_runs: - if pipeline_run.status is not PipelineRunStatus.DATASET_PROCESSING_INITIATED: - await reset_pipeline_run_status(user.id, dataset_id, pipeline_run.pipeline_name) + # Skip runs that are initiated + if pipeline_run.status is PipelineRunStatus.DATASET_PROCESSING_INITIATED: + continue + + # If a name filter is provided, skip non-matching runs + if pipeline_names is not None and pipeline_run.pipeline_name not in pipeline_names: + continue + + await reset_pipeline_run_status(user.id, dataset_id, pipeline_run.pipeline_name) diff --git a/cognee/modules/pipelines/operations/pipeline.py b/cognee/modules/pipelines/operations/pipeline.py index cbe6dee5c..b59a171f7 100644 --- a/cognee/modules/pipelines/operations/pipeline.py +++ b/cognee/modules/pipelines/operations/pipeline.py @@ -5,6 +5,7 @@ from typing import Union from cognee.modules.pipelines.layers.setup_and_check_environment import ( setup_and_check_environment, ) + from cognee.shared.logging_utils import get_logger from cognee.modules.data.methods.get_dataset_data import get_dataset_data from cognee.modules.data.models import Data, Dataset diff --git a/cognee/tasks/codingagents/__init__.py b/cognee/tasks/codingagents/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cognee/tasks/codingagents/coding_rule_associations.py b/cognee/tasks/codingagents/coding_rule_associations.py new file mode 100644 index 000000000..6971ecc83 --- /dev/null +++ b/cognee/tasks/codingagents/coding_rule_associations.py @@ -0,0 +1,124 @@ +from uuid import NAMESPACE_OID, uuid5 + +from cognee.infrastructure.databases.graph import get_graph_engine +from cognee.infrastructure.databases.vector import get_vector_engine + +from cognee.low_level import DataPoint +from cognee.infrastructure.llm import LLMGateway +from cognee.shared.logging_utils import get_logger +from cognee.modules.engine.models import NodeSet +from cognee.tasks.storage import add_data_points, index_graph_edges +from typing import Optional, List, Any +from pydantic import Field + +logger = get_logger("coding_rule_association") + + +class Rule(DataPoint): + """A single developer rule extracted from text.""" + + text: str = Field(..., description="The coding rule associated with the conversation") + belongs_to_set: Optional[NodeSet] = None + metadata: dict = {"index_fields": ["rule"]} + + +class RuleSet(DataPoint): + """Collection of parsed rules.""" + + rules: List[Rule] = Field( + ..., + description="List of developer rules extracted from the input text. Each rule represents a coding best practice or guideline.", + ) + + +async def get_existing_rules(rules_nodeset_name: str) -> str: + graph_engine = await get_graph_engine() + nodes_data, _ = await graph_engine.get_nodeset_subgraph( + node_type=NodeSet, node_name=[rules_nodeset_name] + ) + + existing_rules = [ + item[1]["text"] + for item in nodes_data + if isinstance(item, tuple) + and len(item) == 2 + and isinstance(item[1], dict) + and "text" in item[1] + ] + + existing_rules = "\n".join(f"- {rule}" for rule in existing_rules) + + return existing_rules + + +async def get_origin_edges(data: str, rules: List[Rule]) -> list[Any]: + vector_engine = get_vector_engine() + + origin_chunk = await vector_engine.search("DocumentChunk_text", data, limit=1) + + try: + origin_id = origin_chunk[0].id + except (AttributeError, KeyError, TypeError, IndexError): + origin_id = None + + relationships = [] + + if origin_id and isinstance(rules, (list, tuple)) and len(rules) > 0: + for rule in rules: + try: + rule_id = getattr(rule, "id", None) + if rule_id is not None: + rel_name = "rule_associated_from" + relationships.append( + ( + rule_id, + origin_id, + rel_name, + { + "relationship_name": rel_name, + "source_node_id": rule_id, + "target_node_id": origin_id, + "ontology_valid": False, + }, + ) + ) + except Exception as e: + logger.info(f"Warning: Skipping invalid rule due to error: {e}") + else: + logger.info("No valid origin_id or rules provided.") + + return relationships + + +async def add_rule_associations( + data: str, + rules_nodeset_name: str, + user_prompt_location: str = "coding_rule_association_agent_user.txt", + system_prompt_location: str = "coding_rule_association_agent_system.txt", +): + graph_engine = await get_graph_engine() + existing_rules = await get_existing_rules(rules_nodeset_name=rules_nodeset_name) + + user_context = {"user data": data, "rules": existing_rules} + + user_prompt = LLMGateway.render_prompt(user_prompt_location, context=user_context) + system_prompt = LLMGateway.render_prompt(system_prompt_location, context={}) + + rule_list = await LLMGateway.acreate_structured_output( + text_input=user_prompt, system_prompt=system_prompt, response_model=RuleSet + ) + + rules_nodeset = NodeSet( + id=uuid5(NAMESPACE_OID, name=rules_nodeset_name), name=rules_nodeset_name + ) + for rule in rule_list.rules: + rule.belongs_to_set = rules_nodeset + + edges_to_save = await get_origin_edges(data=data, rules=rule_list.rules) + + await add_data_points(data_points=rule_list.rules) + + if len(edges_to_save) > 0: + await graph_engine.add_edges(edges_to_save) + + await index_graph_edges() diff --git a/cognee/tasks/memify/__init__.py b/cognee/tasks/memify/__init__.py index a95e88794..d2e0172f6 100644 --- a/cognee/tasks/memify/__init__.py +++ b/cognee/tasks/memify/__init__.py @@ -1 +1 @@ -from extract_subgraph import extract_subgraph +from .extract_subgraph import extract_subgraph diff --git a/cognee/tasks/memify/extract_subgraph.py b/cognee/tasks/memify/extract_subgraph.py index 1cf7ab951..198a5b367 100644 --- a/cognee/tasks/memify/extract_subgraph.py +++ b/cognee/tasks/memify/extract_subgraph.py @@ -1,2 +1,7 @@ -async def extract_subgraph(): - pass +from cognee.modules.retrieval.utils.brute_force_triplet_search import get_memory_fragment + + +async def extract_subgraph(subgraphs): + for subgraph in subgraphs: + for edge in subgraph: + yield edge From 1a2977779f49001c5696330b005a3c90d75f6b7f Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 3 Sep 2025 12:03:17 +0200 Subject: [PATCH 05/19] feat: Add memify coding agent example --- cognee/api/v1/cognify/memify.py | 12 +-- .../python/memify_coding_agent_example.py | 76 +++++++++++++++++++ 2 files changed, 78 insertions(+), 10 deletions(-) create mode 100644 examples/python/memify_coding_agent_example.py diff --git a/cognee/api/v1/cognify/memify.py b/cognee/api/v1/cognify/memify.py index 7e35ef5dc..df45bac76 100644 --- a/cognee/api/v1/cognify/memify.py +++ b/cognee/api/v1/cognify/memify.py @@ -4,7 +4,7 @@ from uuid import UUID from cognee.shared.logging_utils import get_logger from cognee.modules.retrieval.utils.brute_force_triplet_search import get_memory_fragment -from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph + from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines.tasks.task import Task @@ -18,7 +18,6 @@ from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import ( from cognee.modules.engine.operations.setup import setup from cognee.tasks.memify.extract_subgraph import extract_subgraph -from cognee.tasks.codingagents.coding_rule_associations import add_rule_associations from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor logger = get_logger("memify") @@ -65,14 +64,7 @@ async def memify( memify_tasks = [ Task(extract_subgraph), - Task(CogneeGraph.resolve_edges_to_text, task_config={"batch_size": 10}), - Task( - add_rule_associations, - rules_nodeset_name="coding_agent_rules", - user_prompt_location="memify_coding_rule_association_agent_user.txt", - system_prompt_location="memify_coding_rule_association_agent_system.txt", - ), - # *tasks, # Unpack tasks provided to memify pipeline + *tasks, # Unpack tasks provided to memify pipeline ] await setup() diff --git a/examples/python/memify_coding_agent_example.py b/examples/python/memify_coding_agent_example.py new file mode 100644 index 000000000..70064c346 --- /dev/null +++ b/examples/python/memify_coding_agent_example.py @@ -0,0 +1,76 @@ +import asyncio +import cognee +from cognee.shared.logging_utils import setup_logging, ERROR +from cognee.api.v1.search import SearchType + +# Prerequisites: +# 1. Copy `.env.template` and rename it to `.env`. +# 2. Add your OpenAI API key to the `.env` file in the `LLM_API_KEY` field: +# LLM_API_KEY = "your_key_here" + + +async def main(): + # Create a clean slate for cognee -- reset data and system state + print("Resetting cognee data...") + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + print("Data reset complete.\n") + + # cognee knowledge graph will be created based on this text + text = """ + Natural language processing (NLP) is an interdisciplinary + subfield of computer science and information retrieval. + """ + + coding_rules_text = """ + Code must be formatted by PEP8 standards. + Typing and Docstrings must be added. + """ + + print("Adding text to cognee:") + print(text.strip()) + # Add the text, and make it available for cognify + await cognee.add(text) + await cognee.add(coding_rules_text, node_set=["coding_rules"]) + print("Text added successfully.\n") + + # Use LLMs and cognee to create knowledge graph + await cognee.cognify() + print("Cognify process complete.\n") + + from cognee.api.v1.cognify.memify import memify + + from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph + from cognee.tasks.codingagents.coding_rule_associations import add_rule_associations + from cognee.modules.pipelines.tasks.task import Task + + memify_tasks = [ + Task(CogneeGraph.resolve_edges_to_text, task_config={"batch_size": 10}), + Task( + add_rule_associations, + rules_nodeset_name="coding_agent_rules", + user_prompt_location="memify_coding_rule_association_agent_user.txt", + system_prompt_location="memify_coding_rule_association_agent_system.txt", + ), + ] + + await memify(tasks=memify_tasks, node_name=["coding_rules"]) + + import os + import pathlib + from cognee.api.v1.visualize.visualize import visualize_graph + + file_path = os.path.join( + pathlib.Path(__file__).parent, ".artifacts", "graph_visualization.html" + ) + await visualize_graph(file_path) + + +if __name__ == "__main__": + logger = setup_logging(log_level=ERROR) + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(main()) + finally: + loop.run_until_complete(loop.shutdown_asyncgens()) From 2847569616cb47fa6f76c511d2d654a399dc24f1 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 3 Sep 2025 16:08:32 +0200 Subject: [PATCH 06/19] feat: memify next iteration --- cognee/api/v1/cognify/memify.py | 28 +++-- ...y_coding_rule_association_agent_system.txt | 2 +- .../modules/graph/cognee_graph/CogneeGraph.py | 69 ------------ cognee/modules/graph/utils/__init__.py | 1 + .../graph/utils/resolve_edges_to_text.py | 67 +++++++++++ .../retrieval/graph_completion_retriever.py | 48 +------- .../codingagents/coding_rule_associations.py | 6 +- cognee/tasks/memify/__init__.py | 1 + cognee/tasks/memify/extract_subgraph.py | 6 +- .../tasks/memify/extract_subgraph_chunks.py | 11 ++ .../python/memify_coding_agent_example.py | 34 ++++-- .../memify_coding_agent_example_chunks.py | 106 ++++++++++++++++++ 12 files changed, 235 insertions(+), 144 deletions(-) create mode 100644 cognee/modules/graph/utils/resolve_edges_to_text.py create mode 100644 cognee/tasks/memify/extract_subgraph_chunks.py create mode 100644 examples/python/memify_coding_agent_example_chunks.py diff --git a/cognee/api/v1/cognify/memify.py b/cognee/api/v1/cognify/memify.py index df45bac76..8237059ec 100644 --- a/cognee/api/v1/cognify/memify.py +++ b/cognee/api/v1/cognify/memify.py @@ -1,4 +1,5 @@ -from typing import Union, Optional, List, Type +from typing import Union, Optional, List, Type, Any +from dataclasses import field from uuid import UUID from cognee.shared.logging_utils import get_logger @@ -16,15 +17,16 @@ from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import ( reset_dataset_pipeline_run_status, ) from cognee.modules.engine.operations.setup import setup - -from cognee.tasks.memify.extract_subgraph import extract_subgraph from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor logger = get_logger("memify") async def memify( - tasks: List[Task], + preprocessing_tasks: List[Task], + processing_tasks: List[Task] = [], + postprocessing_tasks: List[Task] = [], + data: Optional[Any] = None, datasets: Union[str, list[str], list[UUID]] = None, user: User = None, node_type: Optional[Type] = NodeSet, @@ -55,16 +57,18 @@ async def memify( Use pipeline_run_id from return value to monitor progress. """ - if cypher_query: - pass - else: - memory_fragment = await get_memory_fragment(node_type=node_type, node_name=node_name) - # List of edges should be a single element in the list to represent one data item - data = [memory_fragment.edges] + if not data: + if cypher_query: + pass + else: + memory_fragment = await get_memory_fragment(node_type=node_type, node_name=node_name) + # Subgraphs should be a single element in the list to represent one data item + data = [memory_fragment] memify_tasks = [ - Task(extract_subgraph), - *tasks, # Unpack tasks provided to memify pipeline + *preprocessing_tasks, # Unpack tasks provided to memify pipeline + *processing_tasks, + *postprocessing_tasks, ] await setup() diff --git a/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_system.txt b/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_system.txt index 31c9825bd..d9adf45f7 100644 --- a/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_system.txt +++ b/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_system.txt @@ -1,6 +1,6 @@ You are an association agent tasked with suggesting structured developer rules from user-agent interactions stored in a Knowledge Graph. You will receive the actual user agent interaction as a set of relationships from a knowledge graph separated by \n---\n each represented as node1 -- relation -- node2 triplet, and the list of the already existing developer rules. Each rule represents a single best practice or guideline the agent should follow in the future. -Suggest rules that are general and not specific to the current text, strictly technical, add value and improve the future agent behavior. +Suggest rules that are general and not specific to the knowledge graph relationships, strictly technical, add value and improve the future agent behavior. Do not suggest rules similar to the existing ones or rules that are not general and dont add value. It is acceptable to return an empty rule list. diff --git a/cognee/modules/graph/cognee_graph/CogneeGraph.py b/cognee/modules/graph/cognee_graph/CogneeGraph.py index 94a8e965e..924532ce0 100644 --- a/cognee/modules/graph/cognee_graph/CogneeGraph.py +++ b/cognee/modules/graph/cognee_graph/CogneeGraph.py @@ -188,72 +188,3 @@ class CogneeGraph(CogneeAbstractGraph): return n1 + n2 + e return heapq.nsmallest(k, self.edges, key=score) - - @staticmethod - async def resolve_edges_to_text(retrieved_edges: list) -> str: - """ - Converts retrieved graph edges into a human-readable string format. - - Parameters: - ----------- - - - retrieved_edges (list): A list of edges retrieved from the graph. - - Returns: - -------- - - - str: A formatted string representation of the nodes and their connections. - """ - - def _get_nodes(retrieved_edges: list) -> dict: - def _get_title(text: str, first_n_words: int = 7, top_n_words: int = 3) -> str: - def _top_n_words(text, stop_words=None, top_n=3, separator=", "): - """Concatenates the top N frequent words in text.""" - if stop_words is None: - from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS - - stop_words = DEFAULT_STOP_WORDS - - import string - - words = [word.lower().strip(string.punctuation) for word in text.split()] - - if stop_words: - words = [word for word in words if word and word not in stop_words] - - from collections import Counter - - top_words = [word for word, freq in Counter(words).most_common(top_n)] - - return separator.join(top_words) - - """Creates a title, by combining first words with most frequent words from the text.""" - first_n_words = text.split()[:first_n_words] - top_n_words = _top_n_words(text, top_n=top_n_words) - return f"{' '.join(first_n_words)}... [{top_n_words}]" - - """Creates a dictionary of nodes with their names and content.""" - nodes = {} - for edge in retrieved_edges: - for node in (edge.node1, edge.node2): - if node.id not in nodes: - text = node.attributes.get("text") - if text: - name = _get_title(text) - content = text - else: - name = node.attributes.get("name", "Unnamed Node") - content = node.attributes.get("description", name) - nodes[node.id] = {"node": node, "name": name, "content": content} - return nodes - - nodes = _get_nodes(retrieved_edges) - node_section = "\n".join( - f"Node: {info['name']}\n__node_content_start__\n{info['content']}\n__node_content_end__\n" - for info in nodes.values() - ) - connection_section = "\n".join( - f"{nodes[edge.node1.id]['name']} --[{edge.attributes['relationship_type']}]--> {nodes[edge.node2.id]['name']}" - for edge in retrieved_edges - ) - return f"Nodes:\n{node_section}\n\nConnections:\n{connection_section}" diff --git a/cognee/modules/graph/utils/__init__.py b/cognee/modules/graph/utils/__init__.py index d1cda2d83..ebc648495 100644 --- a/cognee/modules/graph/utils/__init__.py +++ b/cognee/modules/graph/utils/__init__.py @@ -4,3 +4,4 @@ from .get_model_instance_from_graph import get_model_instance_from_graph from .retrieve_existing_edges import retrieve_existing_edges from .convert_node_to_data_point import convert_node_to_data_point from .deduplicate_nodes_and_edges import deduplicate_nodes_and_edges +from .resolve_edges_to_text import resolve_edges_to_text diff --git a/cognee/modules/graph/utils/resolve_edges_to_text.py b/cognee/modules/graph/utils/resolve_edges_to_text.py new file mode 100644 index 000000000..56c303abc --- /dev/null +++ b/cognee/modules/graph/utils/resolve_edges_to_text.py @@ -0,0 +1,67 @@ +async def resolve_edges_to_text(retrieved_edges: list) -> str: + """ + Converts retrieved graph edges into a human-readable string format. + + Parameters: + ----------- + + - retrieved_edges (list): A list of edges retrieved from the graph. + + Returns: + -------- + + - str: A formatted string representation of the nodes and their connections. + """ + + def _get_nodes(retrieved_edges: list) -> dict: + def _get_title(text: str, first_n_words: int = 7, top_n_words: int = 3) -> str: + def _top_n_words(text, stop_words=None, top_n=3, separator=", "): + """Concatenates the top N frequent words in text.""" + if stop_words is None: + from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS + + stop_words = DEFAULT_STOP_WORDS + + import string + + words = [word.lower().strip(string.punctuation) for word in text.split()] + + if stop_words: + words = [word for word in words if word and word not in stop_words] + + from collections import Counter + + top_words = [word for word, freq in Counter(words).most_common(top_n)] + + return separator.join(top_words) + + """Creates a title, by combining first words with most frequent words from the text.""" + first_n_words = text.split()[:first_n_words] + top_n_words = _top_n_words(text, top_n=top_n_words) + return f"{' '.join(first_n_words)}... [{top_n_words}]" + + """Creates a dictionary of nodes with their names and content.""" + nodes = {} + for edge in retrieved_edges: + for node in (edge.node1, edge.node2): + if node.id not in nodes: + text = node.attributes.get("text") + if text: + name = _get_title(text) + content = text + else: + name = node.attributes.get("name", "Unnamed Node") + content = node.attributes.get("description", name) + nodes[node.id] = {"node": node, "name": name, "content": content} + return nodes + + nodes = _get_nodes(retrieved_edges) + node_section = "\n".join( + f"Node: {info['name']}\n__node_content_start__\n{info['content']}\n__node_content_end__\n" + for info in nodes.values() + ) + connection_section = "\n".join( + f"{nodes[edge.node1.id]['name']} --[{edge.attributes['relationship_type']}]--> {nodes[edge.node2.id]['name']}" + for edge in retrieved_edges + ) + return f"Nodes:\n{node_section}\n\nConnections:\n{connection_section}" diff --git a/cognee/modules/retrieval/graph_completion_retriever.py b/cognee/modules/retrieval/graph_completion_retriever.py index 6a5193c56..bc4fa27b3 100644 --- a/cognee/modules/retrieval/graph_completion_retriever.py +++ b/cognee/modules/retrieval/graph_completion_retriever.py @@ -5,6 +5,7 @@ import string from cognee.infrastructure.engine import DataPoint from cognee.tasks.storage import add_data_points +from cognee.modules.graph.utils import resolve_edges_to_text from cognee.modules.graph.utils.convert_node_to_data_point import get_all_subclasses from cognee.modules.retrieval.base_retriever import BaseRetriever from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search @@ -53,22 +54,6 @@ class GraphCompletionRetriever(BaseRetriever): self.node_type = node_type self.node_name = node_name - def _get_nodes(self, retrieved_edges: list) -> dict: - """Creates a dictionary of nodes with their names and content.""" - nodes = {} - for edge in retrieved_edges: - for node in (edge.node1, edge.node2): - if node.id not in nodes: - text = node.attributes.get("text") - if text: - name = self._get_title(text) - content = text - else: - name = node.attributes.get("name", "Unnamed Node") - content = node.attributes.get("description", name) - nodes[node.id] = {"node": node, "name": name, "content": content} - return nodes - async def resolve_edges_to_text(self, retrieved_edges: list) -> str: """ Converts retrieved graph edges into a human-readable string format. @@ -83,16 +68,7 @@ class GraphCompletionRetriever(BaseRetriever): - str: A formatted string representation of the nodes and their connections. """ - nodes = self._get_nodes(retrieved_edges) - node_section = "\n".join( - f"Node: {info['name']}\n__node_content_start__\n{info['content']}\n__node_content_end__\n" - for info in nodes.values() - ) - connection_section = "\n".join( - f"{nodes[edge.node1.id]['name']} --[{edge.attributes['relationship_type']}]--> {nodes[edge.node2.id]['name']}" - for edge in retrieved_edges - ) - return f"Nodes:\n{node_section}\n\nConnections:\n{connection_section}" + return await resolve_edges_to_text(retrieved_edges) async def get_triplets(self, query: str) -> list: """ @@ -196,26 +172,6 @@ class GraphCompletionRetriever(BaseRetriever): return [completion] - def _top_n_words(self, text, stop_words=None, top_n=3, separator=", "): - """Concatenates the top N frequent words in text.""" - if stop_words is None: - stop_words = DEFAULT_STOP_WORDS - - words = [word.lower().strip(string.punctuation) for word in text.split()] - - if stop_words: - words = [word for word in words if word and word not in stop_words] - - top_words = [word for word, freq in Counter(words).most_common(top_n)] - - return separator.join(top_words) - - def _get_title(self, text: str, first_n_words: int = 7, top_n_words: int = 3) -> str: - """Creates a title, by combining first words with most frequent words from the text.""" - first_n_words = text.split()[:first_n_words] - top_n_words = self._top_n_words(text, top_n=top_n_words) - return f"{' '.join(first_n_words)}... [{top_n_words}]" - async def save_qa(self, question: str, answer: str, context: str, triplets: List) -> None: """ Saves a question and answer pair for later analysis or storage. diff --git a/cognee/tasks/codingagents/coding_rule_associations.py b/cognee/tasks/codingagents/coding_rule_associations.py index 6971ecc83..e722e7728 100644 --- a/cognee/tasks/codingagents/coding_rule_associations.py +++ b/cognee/tasks/codingagents/coding_rule_associations.py @@ -96,10 +96,14 @@ async def add_rule_associations( user_prompt_location: str = "coding_rule_association_agent_user.txt", system_prompt_location: str = "coding_rule_association_agent_system.txt", ): + if isinstance(data, list): + # If data is a list of strings join all strings in list + data = " ".join(data) + graph_engine = await get_graph_engine() existing_rules = await get_existing_rules(rules_nodeset_name=rules_nodeset_name) - user_context = {"user data": data, "rules": existing_rules} + user_context = {"chat": data, "rules": existing_rules} user_prompt = LLMGateway.render_prompt(user_prompt_location, context=user_context) system_prompt = LLMGateway.render_prompt(system_prompt_location, context={}) diff --git a/cognee/tasks/memify/__init__.py b/cognee/tasks/memify/__init__.py index d2e0172f6..692bac443 100644 --- a/cognee/tasks/memify/__init__.py +++ b/cognee/tasks/memify/__init__.py @@ -1 +1,2 @@ from .extract_subgraph import extract_subgraph +from .extract_subgraph_chunks import extract_subgraph_chunks diff --git a/cognee/tasks/memify/extract_subgraph.py b/cognee/tasks/memify/extract_subgraph.py index 198a5b367..d6ca3773f 100644 --- a/cognee/tasks/memify/extract_subgraph.py +++ b/cognee/tasks/memify/extract_subgraph.py @@ -1,7 +1,7 @@ -from cognee.modules.retrieval.utils.brute_force_triplet_search import get_memory_fragment +from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph -async def extract_subgraph(subgraphs): +async def extract_subgraph(subgraphs: list[CogneeGraph]): for subgraph in subgraphs: - for edge in subgraph: + for edge in subgraph.edges: yield edge diff --git a/cognee/tasks/memify/extract_subgraph_chunks.py b/cognee/tasks/memify/extract_subgraph_chunks.py new file mode 100644 index 000000000..9aab498d7 --- /dev/null +++ b/cognee/tasks/memify/extract_subgraph_chunks.py @@ -0,0 +1,11 @@ +from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph + + +async def extract_subgraph_chunks(subgraphs: list[CogneeGraph]): + """ + Get all Document Chunks from subgraphs and forward to next task in pipeline + """ + for subgraph in subgraphs: + for node in subgraph.nodes.values(): + if node.attributes["type"] == "DocumentChunk": + yield node.attributes["text"] diff --git a/examples/python/memify_coding_agent_example.py b/examples/python/memify_coding_agent_example.py index 70064c346..004a840f8 100644 --- a/examples/python/memify_coding_agent_example.py +++ b/examples/python/memify_coding_agent_example.py @@ -1,7 +1,18 @@ import asyncio +import pathlib +import os + import cognee +from cognee.api.v1.visualize.visualize import visualize_graph from cognee.shared.logging_utils import setup_logging, ERROR -from cognee.api.v1.search import SearchType +from cognee.api.v1.cognify.memify import memify +from cognee.modules.pipelines.tasks.task import Task +from cognee.tasks.memify.extract_subgraph import extract_subgraph +from cognee.modules.graph.utils import resolve_edges_to_text +from cognee.tasks.codingagents.coding_rule_associations import ( + add_rule_associations, + get_existing_rules, +) # Prerequisites: # 1. Copy `.env.template` and rename it to `.env`. @@ -38,14 +49,10 @@ async def main(): await cognee.cognify() print("Cognify process complete.\n") - from cognee.api.v1.cognify.memify import memify + subgraph_extraction_tasks = [Task(extract_subgraph)] - from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph - from cognee.tasks.codingagents.coding_rule_associations import add_rule_associations - from cognee.modules.pipelines.tasks.task import Task - - memify_tasks = [ - Task(CogneeGraph.resolve_edges_to_text, task_config={"batch_size": 10}), + rule_association_tasks = [ + Task(resolve_edges_to_text, task_config={"batch_size": 10}), Task( add_rule_associations, rules_nodeset_name="coding_agent_rules", @@ -54,11 +61,14 @@ async def main(): ), ] - await memify(tasks=memify_tasks, node_name=["coding_rules"]) + await memify( + preprocessing_tasks=subgraph_extraction_tasks, + processing_tasks=rule_association_tasks, + node_name=["coding_rules"], + ) - import os - import pathlib - from cognee.api.v1.visualize.visualize import visualize_graph + developer_rules = await get_existing_rules(rules_nodeset_name="coding_agent_rules") + print(developer_rules) file_path = os.path.join( pathlib.Path(__file__).parent, ".artifacts", "graph_visualization.html" diff --git a/examples/python/memify_coding_agent_example_chunks.py b/examples/python/memify_coding_agent_example_chunks.py new file mode 100644 index 000000000..b07bcb815 --- /dev/null +++ b/examples/python/memify_coding_agent_example_chunks.py @@ -0,0 +1,106 @@ +import asyncio +import pathlib +import os + +import cognee +from cognee.api.v1.visualize.visualize import visualize_graph +from cognee.shared.logging_utils import setup_logging, ERROR +from cognee.api.v1.cognify.memify import memify +from cognee.modules.pipelines.tasks.task import Task +from cognee.tasks.memify.extract_subgraph_chunks import extract_subgraph_chunks +from cognee.tasks.codingagents.coding_rule_associations import ( + add_rule_associations, + get_existing_rules, +) + +# Prerequisites: +# 1. Copy `.env.template` and rename it to `.env`. +# 2. Add your OpenAI API key to the `.env` file in the `LLM_API_KEY` field: +# LLM_API_KEY = "your_key_here" + + +async def main(): + # Create a clean slate for cognee -- reset data and system state + print("Resetting cognee data...") + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + print("Data reset complete.\n") + print("Adding conversation about rules to cognee:\n") + + coding_rules_chat_from_principal_engineer = """ + We want code to be formatted by PEP8 standards. + Typing and Docstrings must be added. + Please also make sure to write NOTE: on all more complex code segments. + If there is any duplicate code, try to handle it in one function to avoid code duplication. + Susan should also always review new code changes before merging to main. + New releases should not happen on Friday so we don't have to fix them during the weekend. + """ + print( + f"Coding rules conversation with principal engineer: {coding_rules_chat_from_principal_engineer}" + ) + + coding_rules_chat_from_manager = """ + Susan should always review new code changes before merging to main. + New releases should not happen on Friday so we don't have to fix them during the weekend. + """ + print(f"Coding rules conversation with manager: {coding_rules_chat_from_manager}") + + # Add the text, and make it available for cognify + await cognee.add([coding_rules_chat_from_principal_engineer, coding_rules_chat_from_manager]) + print("Text added successfully.\n") + + # Use LLMs and cognee to create knowledge graph + await cognee.cognify() + print("Cognify process complete.\n") + + # Visualize graph after cognification + file_path = os.path.join( + pathlib.Path(__file__).parent, ".artifacts", "graph_visualization_only_cognify.html" + ) + await visualize_graph(file_path) + print(f"Open file to see graph visualization only after cognification: {file_path}") + + # After graph is created, create a second pipeline that will go through the graph and enchance it with specific + # coding rule nodes + + # extract_subgraph_chunks is a function that returns all document chunks from specified subgraphs (if no subgraph is specifed the whole graph will be sent through memify) + subgraph_extraction_tasks = [Task(extract_subgraph_chunks)] + + # add_rule_associations is a function that handles processing coding rules from chunks and keeps track of + # existing rules so duplicate rules won't be created. As the result of this processing new Rule nodes will be created + # in the graph that specify coding rules found in conversations. + coding_rules_association_tasks = [ + Task( + add_rule_associations, + rules_nodeset_name="coding_agent_rules", + task_config={"batch_size": 1}, + ), + ] + + # Memify accepts these tasks and orchestrates forwarding of graph data through these tasks (if data is not specified). + # If data is explicitely specified in the arguments this specified data will be forwarded through the tasks instead + await memify( + preprocessing_tasks=subgraph_extraction_tasks, + processing_tasks=coding_rules_association_tasks, + ) + + # Find the new specific coding rules added to graph through memify (created based on chat conversation between team members) + developer_rules = await get_existing_rules(rules_nodeset_name="coding_agent_rules") + print(developer_rules) + + # Visualize new graph with added memify context + file_path = os.path.join( + pathlib.Path(__file__).parent, ".artifacts", "graph_visualization_after_memify.html" + ) + await visualize_graph(file_path) + print(f"Open file to see graph visualization after memify enhancment: {file_path}") + + +if __name__ == "__main__": + logger = setup_logging(log_level=ERROR) + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(main()) + finally: + loop.run_until_complete(loop.shutdown_asyncgens()) From 90ef8c30d211bd8de3861063b0a2144cedeb2588 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 3 Sep 2025 16:16:55 +0200 Subject: [PATCH 07/19] refactor: Rename tasks --- cognee/api/v1/cognify/memify.py | 12 ++++++------ examples/python/memify_coding_agent_example.py | 4 ++-- .../python/memify_coding_agent_example_chunks.py | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cognee/api/v1/cognify/memify.py b/cognee/api/v1/cognify/memify.py index 8237059ec..3c5f7be0f 100644 --- a/cognee/api/v1/cognify/memify.py +++ b/cognee/api/v1/cognify/memify.py @@ -23,9 +23,9 @@ logger = get_logger("memify") async def memify( - preprocessing_tasks: List[Task], - processing_tasks: List[Task] = [], - postprocessing_tasks: List[Task] = [], + data_streaming_tasks: List[Task], + data_processing_tasks: List[Task] = [], + data_persistence_tasks: List[Task] = [], data: Optional[Any] = None, datasets: Union[str, list[str], list[UUID]] = None, user: User = None, @@ -66,9 +66,9 @@ async def memify( data = [memory_fragment] memify_tasks = [ - *preprocessing_tasks, # Unpack tasks provided to memify pipeline - *processing_tasks, - *postprocessing_tasks, + *data_streaming_tasks, # Unpack tasks provided to memify pipeline + *data_processing_tasks, + *data_persistence_tasks, ] await setup() diff --git a/examples/python/memify_coding_agent_example.py b/examples/python/memify_coding_agent_example.py index 004a840f8..c0bda215a 100644 --- a/examples/python/memify_coding_agent_example.py +++ b/examples/python/memify_coding_agent_example.py @@ -62,8 +62,8 @@ async def main(): ] await memify( - preprocessing_tasks=subgraph_extraction_tasks, - processing_tasks=rule_association_tasks, + data_streaming_tasks=subgraph_extraction_tasks, + data_processing_tasks=rule_association_tasks, node_name=["coding_rules"], ) diff --git a/examples/python/memify_coding_agent_example_chunks.py b/examples/python/memify_coding_agent_example_chunks.py index b07bcb815..639b97396 100644 --- a/examples/python/memify_coding_agent_example_chunks.py +++ b/examples/python/memify_coding_agent_example_chunks.py @@ -80,8 +80,8 @@ async def main(): # Memify accepts these tasks and orchestrates forwarding of graph data through these tasks (if data is not specified). # If data is explicitely specified in the arguments this specified data will be forwarded through the tasks instead await memify( - preprocessing_tasks=subgraph_extraction_tasks, - processing_tasks=coding_rules_association_tasks, + data_streaming_tasks=subgraph_extraction_tasks, + data_processing_tasks=coding_rules_association_tasks, ) # Find the new specific coding rules added to graph through memify (created based on chat conversation between team members) From 0e3a10d925fffdb769b1e31fedd35a4460715aa7 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 3 Sep 2025 17:49:33 +0200 Subject: [PATCH 08/19] refactor: Change input task names --- cognee/api/v1/cognify/memify.py | 15 +-- ...y_coding_rule_association_agent_system.txt | 6 - ...ify_coding_rule_association_agent_user.txt | 6 - .../python/memify_coding_agent_example.py | 66 +++++++---- .../memify_coding_agent_example_chunks.py | 106 ------------------ 5 files changed, 51 insertions(+), 148 deletions(-) delete mode 100644 cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_system.txt delete mode 100644 cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_user.txt delete mode 100644 examples/python/memify_coding_agent_example_chunks.py diff --git a/cognee/api/v1/cognify/memify.py b/cognee/api/v1/cognify/memify.py index 3c5f7be0f..86f84626a 100644 --- a/cognee/api/v1/cognify/memify.py +++ b/cognee/api/v1/cognify/memify.py @@ -1,5 +1,4 @@ from typing import Union, Optional, List, Type, Any -from dataclasses import field from uuid import UUID from cognee.shared.logging_utils import get_logger @@ -18,14 +17,17 @@ from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import ( ) from cognee.modules.engine.operations.setup import setup from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor +from cognee.tasks.memify.extract_subgraph_chunks import extract_subgraph_chunks +from cognee.tasks.codingagents.coding_rule_associations import ( + add_rule_associations, +) logger = get_logger("memify") async def memify( - data_streaming_tasks: List[Task], - data_processing_tasks: List[Task] = [], - data_persistence_tasks: List[Task] = [], + extraction_tasks: List[Task] = [Task(extract_subgraph_chunks)], + enrichment_tasks: List[Task] = [Task(add_rule_associations)], data: Optional[Any] = None, datasets: Union[str, list[str], list[UUID]] = None, user: User = None, @@ -66,9 +68,8 @@ async def memify( data = [memory_fragment] memify_tasks = [ - *data_streaming_tasks, # Unpack tasks provided to memify pipeline - *data_processing_tasks, - *data_persistence_tasks, + *extraction_tasks, # Unpack tasks provided to memify pipeline + *enrichment_tasks, ] await setup() diff --git a/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_system.txt b/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_system.txt deleted file mode 100644 index d9adf45f7..000000000 --- a/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_system.txt +++ /dev/null @@ -1,6 +0,0 @@ -You are an association agent tasked with suggesting structured developer rules from user-agent interactions stored in a Knowledge Graph. -You will receive the actual user agent interaction as a set of relationships from a knowledge graph separated by \n---\n each represented as node1 -- relation -- node2 triplet, and the list of the already existing developer rules. -Each rule represents a single best practice or guideline the agent should follow in the future. -Suggest rules that are general and not specific to the knowledge graph relationships, strictly technical, add value and improve the future agent behavior. -Do not suggest rules similar to the existing ones or rules that are not general and dont add value. -It is acceptable to return an empty rule list. diff --git a/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_user.txt b/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_user.txt deleted file mode 100644 index 9b525c625..000000000 --- a/cognee/infrastructure/llm/prompts/memify_coding_rule_association_agent_user.txt +++ /dev/null @@ -1,6 +0,0 @@ -**Here is the User-agent interaction context provided with a set of relationships from a knowledge graph separated by \n---\n each represented as node1 -- relation -- node2 triplet:** -`{{ chat }}` - - -**Already existing rules:** -`{{ rules }}` diff --git a/examples/python/memify_coding_agent_example.py b/examples/python/memify_coding_agent_example.py index c0bda215a..61af467d3 100644 --- a/examples/python/memify_coding_agent_example.py +++ b/examples/python/memify_coding_agent_example.py @@ -7,8 +7,7 @@ from cognee.api.v1.visualize.visualize import visualize_graph from cognee.shared.logging_utils import setup_logging, ERROR from cognee.api.v1.cognify.memify import memify from cognee.modules.pipelines.tasks.task import Task -from cognee.tasks.memify.extract_subgraph import extract_subgraph -from cognee.modules.graph.utils import resolve_edges_to_text +from cognee.tasks.memify.extract_subgraph_chunks import extract_subgraph_chunks from cognee.tasks.codingagents.coding_rule_associations import ( add_rule_associations, get_existing_rules, @@ -26,54 +25,75 @@ async def main(): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) print("Data reset complete.\n") + print("Adding conversation about rules to cognee:\n") - # cognee knowledge graph will be created based on this text - text = """ - Natural language processing (NLP) is an interdisciplinary - subfield of computer science and information retrieval. - """ - - coding_rules_text = """ - Code must be formatted by PEP8 standards. + coding_rules_chat_from_principal_engineer = """ + We want code to be formatted by PEP8 standards. Typing and Docstrings must be added. + Please also make sure to write NOTE: on all more complex code segments. + If there is any duplicate code, try to handle it in one function to avoid code duplication. + Susan should also always review new code changes before merging to main. + New releases should not happen on Friday so we don't have to fix them during the weekend. """ + print( + f"Coding rules conversation with principal engineer: {coding_rules_chat_from_principal_engineer}" + ) + + coding_rules_chat_from_manager = """ + Susan should always review new code changes before merging to main. + New releases should not happen on Friday so we don't have to fix them during the weekend. + """ + print(f"Coding rules conversation with manager: {coding_rules_chat_from_manager}") - print("Adding text to cognee:") - print(text.strip()) # Add the text, and make it available for cognify - await cognee.add(text) - await cognee.add(coding_rules_text, node_set=["coding_rules"]) + await cognee.add([coding_rules_chat_from_principal_engineer, coding_rules_chat_from_manager]) print("Text added successfully.\n") # Use LLMs and cognee to create knowledge graph await cognee.cognify() print("Cognify process complete.\n") - subgraph_extraction_tasks = [Task(extract_subgraph)] + # Visualize graph after cognification + file_path = os.path.join( + pathlib.Path(__file__).parent, ".artifacts", "graph_visualization_only_cognify.html" + ) + await visualize_graph(file_path) + print(f"Open file to see graph visualization only after cognification: {file_path}") - rule_association_tasks = [ - Task(resolve_edges_to_text, task_config={"batch_size": 10}), + # After graph is created, create a second pipeline that will go through the graph and enchance it with specific + # coding rule nodes + + # extract_subgraph_chunks is a function that returns all document chunks from specified subgraphs (if no subgraph is specifed the whole graph will be sent through memify) + subgraph_extraction_tasks = [Task(extract_subgraph_chunks)] + + # add_rule_associations is a function that handles processing coding rules from chunks and keeps track of + # existing rules so duplicate rules won't be created. As the result of this processing new Rule nodes will be created + # in the graph that specify coding rules found in conversations. + coding_rules_association_tasks = [ Task( add_rule_associations, rules_nodeset_name="coding_agent_rules", - user_prompt_location="memify_coding_rule_association_agent_user.txt", - system_prompt_location="memify_coding_rule_association_agent_system.txt", + task_config={"batch_size": 1}, ), ] + # Memify accepts these tasks and orchestrates forwarding of graph data through these tasks (if data is not specified). + # If data is explicitely specified in the arguments this specified data will be forwarded through the tasks instead await memify( - data_streaming_tasks=subgraph_extraction_tasks, - data_processing_tasks=rule_association_tasks, - node_name=["coding_rules"], + extraction_tasks=subgraph_extraction_tasks, + enrichment_tasks=coding_rules_association_tasks, ) + # Find the new specific coding rules added to graph through memify (created based on chat conversation between team members) developer_rules = await get_existing_rules(rules_nodeset_name="coding_agent_rules") print(developer_rules) + # Visualize new graph with added memify context file_path = os.path.join( - pathlib.Path(__file__).parent, ".artifacts", "graph_visualization.html" + pathlib.Path(__file__).parent, ".artifacts", "graph_visualization_after_memify.html" ) await visualize_graph(file_path) + print(f"Open file to see graph visualization after memify enhancment: {file_path}") if __name__ == "__main__": diff --git a/examples/python/memify_coding_agent_example_chunks.py b/examples/python/memify_coding_agent_example_chunks.py deleted file mode 100644 index 639b97396..000000000 --- a/examples/python/memify_coding_agent_example_chunks.py +++ /dev/null @@ -1,106 +0,0 @@ -import asyncio -import pathlib -import os - -import cognee -from cognee.api.v1.visualize.visualize import visualize_graph -from cognee.shared.logging_utils import setup_logging, ERROR -from cognee.api.v1.cognify.memify import memify -from cognee.modules.pipelines.tasks.task import Task -from cognee.tasks.memify.extract_subgraph_chunks import extract_subgraph_chunks -from cognee.tasks.codingagents.coding_rule_associations import ( - add_rule_associations, - get_existing_rules, -) - -# Prerequisites: -# 1. Copy `.env.template` and rename it to `.env`. -# 2. Add your OpenAI API key to the `.env` file in the `LLM_API_KEY` field: -# LLM_API_KEY = "your_key_here" - - -async def main(): - # Create a clean slate for cognee -- reset data and system state - print("Resetting cognee data...") - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - print("Data reset complete.\n") - print("Adding conversation about rules to cognee:\n") - - coding_rules_chat_from_principal_engineer = """ - We want code to be formatted by PEP8 standards. - Typing and Docstrings must be added. - Please also make sure to write NOTE: on all more complex code segments. - If there is any duplicate code, try to handle it in one function to avoid code duplication. - Susan should also always review new code changes before merging to main. - New releases should not happen on Friday so we don't have to fix them during the weekend. - """ - print( - f"Coding rules conversation with principal engineer: {coding_rules_chat_from_principal_engineer}" - ) - - coding_rules_chat_from_manager = """ - Susan should always review new code changes before merging to main. - New releases should not happen on Friday so we don't have to fix them during the weekend. - """ - print(f"Coding rules conversation with manager: {coding_rules_chat_from_manager}") - - # Add the text, and make it available for cognify - await cognee.add([coding_rules_chat_from_principal_engineer, coding_rules_chat_from_manager]) - print("Text added successfully.\n") - - # Use LLMs and cognee to create knowledge graph - await cognee.cognify() - print("Cognify process complete.\n") - - # Visualize graph after cognification - file_path = os.path.join( - pathlib.Path(__file__).parent, ".artifacts", "graph_visualization_only_cognify.html" - ) - await visualize_graph(file_path) - print(f"Open file to see graph visualization only after cognification: {file_path}") - - # After graph is created, create a second pipeline that will go through the graph and enchance it with specific - # coding rule nodes - - # extract_subgraph_chunks is a function that returns all document chunks from specified subgraphs (if no subgraph is specifed the whole graph will be sent through memify) - subgraph_extraction_tasks = [Task(extract_subgraph_chunks)] - - # add_rule_associations is a function that handles processing coding rules from chunks and keeps track of - # existing rules so duplicate rules won't be created. As the result of this processing new Rule nodes will be created - # in the graph that specify coding rules found in conversations. - coding_rules_association_tasks = [ - Task( - add_rule_associations, - rules_nodeset_name="coding_agent_rules", - task_config={"batch_size": 1}, - ), - ] - - # Memify accepts these tasks and orchestrates forwarding of graph data through these tasks (if data is not specified). - # If data is explicitely specified in the arguments this specified data will be forwarded through the tasks instead - await memify( - data_streaming_tasks=subgraph_extraction_tasks, - data_processing_tasks=coding_rules_association_tasks, - ) - - # Find the new specific coding rules added to graph through memify (created based on chat conversation between team members) - developer_rules = await get_existing_rules(rules_nodeset_name="coding_agent_rules") - print(developer_rules) - - # Visualize new graph with added memify context - file_path = os.path.join( - pathlib.Path(__file__).parent, ".artifacts", "graph_visualization_after_memify.html" - ) - await visualize_graph(file_path) - print(f"Open file to see graph visualization after memify enhancment: {file_path}") - - -if __name__ == "__main__": - logger = setup_logging(log_level=ERROR) - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - loop.run_until_complete(main()) - finally: - loop.run_until_complete(loop.shutdown_asyncgens()) From 3c50ef4d6f8e94a7c6edde0e00b66738705fe83a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 4 Sep 2025 14:44:13 +0200 Subject: [PATCH 09/19] docs: Update docstring for memify --- cognee/api/v1/cognify/memify.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/cognee/api/v1/cognify/memify.py b/cognee/api/v1/cognify/memify.py index 86f84626a..dd089c060 100644 --- a/cognee/api/v1/cognify/memify.py +++ b/cognee/api/v1/cognify/memify.py @@ -33,24 +33,24 @@ async def memify( user: User = None, node_type: Optional[Type] = NodeSet, node_name: Optional[List[str]] = None, - cypher_query: Optional[str] = None, - vector_db_config: dict = None, - graph_db_config: dict = None, + vector_db_config: Optional[dict] = None, + graph_db_config: Optional[dict] = None, run_in_background: bool = False, ): """ - Prerequisites: - - **LLM_API_KEY**: Must be configured (required for entity extraction and graph generation) - - **Data Added**: Must have data previously added via `cognee.add()` and `cognee.cognify()` - - **Vector Database**: Must be accessible for embeddings storage - - **Graph Database**: Must be accessible for relationship storage - Args: - datasets: Dataset name(s) or dataset uuid to process. Processes all available data if None. + extraction_tasks: List of Cognee Tasks to execute for graph/data extraction. + enrichment_tasks: List of Cognee Tasks to handle enrichment of provided graph/data from extraction tasks. + data: The data to ingest. Can be anything when custom extraction and enrichment tasks are used. + Data provided here will be forwarded to the first extraction task in the pipeline as input. + If no data is provided the whole graph (or subgraph if node_name/node_type is specified) will be forwarded + datasets: Dataset name(s) or dataset uuid to process. Processes all available datasets if None. - Single dataset: "my_dataset" - Multiple datasets: ["docs", "research", "reports"] - None: Process all datasets for the user user: User context for authentication and data access. Uses default if None. + node_type: Filter graph to specific entity types (for advanced filtering). Used when no data is provided. + node_name: Filter graph to specific named entities (for targeted search). Used when no data is provided. vector_db_config: Custom vector database configuration for embeddings storage. graph_db_config: Custom graph database configuration for relationship storage. run_in_background: If True, starts processing asynchronously and returns immediately. @@ -60,12 +60,9 @@ async def memify( """ if not data: - if cypher_query: - pass - else: - memory_fragment = await get_memory_fragment(node_type=node_type, node_name=node_name) - # Subgraphs should be a single element in the list to represent one data item - data = [memory_fragment] + memory_fragment = await get_memory_fragment(node_type=node_type, node_name=node_name) + # Subgraphs should be a single element in the list to represent one data item + data = [memory_fragment] memify_tasks = [ *extraction_tasks, # Unpack tasks provided to memify pipeline From c924846b77c77d8b786c6055866112cbdd73fdc4 Mon Sep 17 00:00:00 2001 From: Hande <159312713+hande-k@users.noreply.github.com> Date: Thu, 4 Sep 2025 16:16:28 +0200 Subject: [PATCH 10/19] improve structure, readability --- cognee-starter-kit/src/pipelines/low_level.py | 291 +++++++++++++----- 1 file changed, 212 insertions(+), 79 deletions(-) diff --git a/cognee-starter-kit/src/pipelines/low_level.py b/cognee-starter-kit/src/pipelines/low_level.py index 8b4fccf33..80f4a22e9 100644 --- a/cognee-starter-kit/src/pipelines/low_level.py +++ b/cognee-starter-kit/src/pipelines/low_level.py @@ -1,8 +1,14 @@ -import os -import json +"""Cognee demo with simplified structure.""" + +from __future__ import annotations + import asyncio -import pathlib -from typing import List, Any +import json +import logging +from collections import defaultdict +from pathlib import Path +from typing import Any, Iterable, List, Mapping + from cognee import config, prune, search, SearchType, visualize_graph from cognee.low_level import setup, DataPoint from cognee.pipelines import run_tasks, Task @@ -13,120 +19,247 @@ from cognee.modules.data.methods import load_or_create_datasets class Person(DataPoint): + """Represent a person.""" + name: str metadata: dict = {"index_fields": ["name"]} class Department(DataPoint): + """Represent a department.""" + name: str employees: list[Person] metadata: dict = {"index_fields": ["name"]} class CompanyType(DataPoint): + """Represent a company type.""" + name: str = "Company" class Company(DataPoint): + """Represent a company.""" + name: str departments: list[Department] is_type: CompanyType metadata: dict = {"index_fields": ["name"]} -def ingest_files(data: List[Any]): +ROOT = Path(__file__).resolve().parent +DATA_DIR = ROOT.parent / "data" +COGNEE_DIR = ROOT / ".cognee_system" +ARTIFACTS_DIR = ROOT / ".artifacts" +GRAPH_HTML = ARTIFACTS_DIR / "graph_visualization.html" +COMPANIES_JSON = DATA_DIR / "companies.json" +PEOPLE_JSON = DATA_DIR / "people.json" + + +def load_json_file(path: Path) -> Any: + """Load a JSON file.""" + if not path.exists(): + raise FileNotFoundError(f"Missing required file: {path}") + return json.loads(path.read_text(encoding="utf-8")) + + +def remove_duplicates_preserve_order(seq: Iterable[Any]) -> list[Any]: + """Return list with duplicates removed while preserving order.""" + seen = set() + out = [] + for x in seq: + if x in seen: + continue + seen.add(x) + out.append(x) + return out + + +def collect_people(payloads: Iterable[Mapping[str, Any]]) -> list[Mapping[str, Any]]: + """Collect people from payloads.""" + people = [person for payload in payloads for person in payload.get("people", [])] + return people + + +def collect_companies(payloads: Iterable[Mapping[str, Any]]) -> list[Mapping[str, Any]]: + """Collect companies from payloads.""" + companies = [company for payload in payloads for company in payload.get("companies", [])] + return companies + + +def build_people_nodes(people: Iterable[Mapping[str, Any]]) -> dict: + """Build person nodes keyed by name.""" + nodes = {p["name"]: Person(name=p["name"]) for p in people if p.get("name")} + return nodes + + +def group_people_by_department(people: Iterable[Mapping[str, Any]]) -> dict: + """Group person names by department.""" + groups = defaultdict(list) + for person in people: + name = person.get("name") + if not name: + continue + dept = person.get("department", "Unknown") + groups[dept].append(name) + return groups + + +def collect_declared_departments( + groups: Mapping[str, list[str]], companies: Iterable[Mapping[str, Any]] +) -> set: + """Collect department names referenced anywhere.""" + names = set(groups) + for company in companies: + for dept in company.get("departments", []): + names.add(dept) + return names + + +def build_department_nodes(dept_names: Iterable[str]) -> dict: + """Build department nodes keyed by name.""" + nodes = {name: Department(name=name, employees=[]) for name in dept_names} + return nodes + + +def build_company_nodes(companies: Iterable[Mapping[str, Any]], company_type: CompanyType) -> dict: + """Build company nodes keyed by name.""" + nodes = { + c["name"]: Company(name=c["name"], departments=[], is_type=company_type) + for c in companies + if c.get("name") + } + return nodes + + +def iterate_company_department_pairs(companies: Iterable[Mapping[str, Any]]): + """Yield (company_name, department_name) pairs.""" + for company in companies: + comp_name = company.get("name") + if not comp_name: + continue + for dept in company.get("departments", []): + yield comp_name, dept + + +def attach_departments_to_companies( + companies: Iterable[Mapping[str, Any]], + dept_nodes: Mapping[str, Department], + company_nodes: Mapping[str, Company], +) -> None: + """Attach department nodes to companies.""" + for comp_name in company_nodes: + company_nodes[comp_name].departments = [] + for comp_name, dept_name in iterate_company_department_pairs(companies): + dept = dept_nodes.get(dept_name) + company = company_nodes.get(comp_name) + if not dept or not company: + continue + company.departments.append(dept) + + +def attach_employees_to_departments( + groups: Mapping[str, list[str]], + people_nodes: Mapping[str, Person], + dept_nodes: Mapping[str, Department], +) -> None: + """Attach employees to departments.""" + for dept in dept_nodes.values(): + dept.employees = [] + for dept_name, names in groups.items(): + unique_names = remove_duplicates_preserve_order(names) + target = dept_nodes.get(dept_name) + if not target: + continue + employees = [people_nodes[n] for n in unique_names if n in people_nodes] + target.employees = employees + + +def build_companies(payloads: Iterable[Mapping[str, Any]]) -> list[Company]: + """Build company nodes from payloads.""" + people = collect_people(payloads) + companies = collect_companies(payloads) + people_nodes = build_people_nodes(people) + groups = group_people_by_department(people) + dept_names = collect_declared_departments(groups, companies) + dept_nodes = build_department_nodes(dept_names) + company_type = CompanyType() + company_nodes = build_company_nodes(companies, company_type) + attach_departments_to_companies(companies, dept_nodes, company_nodes) + attach_employees_to_departments(groups, people_nodes, dept_nodes) + result = list(company_nodes.values()) + return result + + +def load_default_payload() -> list[Mapping[str, Any]]: + """Load the default payload from data files.""" + companies = load_json_file(COMPANIES_JSON) + people = load_json_file(PEOPLE_JSON) + payload = [{"companies": companies, "people": people}] + return payload + + +def ingest_payloads(data: List[Any] | None) -> list[Company]: + """Ingest payloads and build company nodes.""" if not data or data == [None]: - companies_file_path = os.path.join(os.path.dirname(__file__), "../data/companies.json") - companies = json.loads(open(companies_file_path, "r").read()) - - people_file_path = os.path.join(os.path.dirname(__file__), "../data/people.json") - people = json.loads(open(people_file_path, "r").read()) - - data = [{"companies": companies, "people": people}] - - people_data_points = {} - departments_data_points = {} - companies_data_points = {} - - for data_item in data: - people = data_item["people"] - companies = data_item["companies"] - - for person in people: - new_person = Person(name=person["name"]) - people_data_points[person["name"]] = new_person - - if person["department"] not in departments_data_points: - departments_data_points[person["department"]] = Department( - name=person["department"], employees=[new_person] - ) - else: - departments_data_points[person["department"]].employees.append(new_person) - - # Create a single CompanyType node, so we connect all companies to it. - companyType = CompanyType() - - for company in companies: - new_company = Company(name=company["name"], departments=[], is_type=companyType) - companies_data_points[company["name"]] = new_company - - for department_name in company["departments"]: - if department_name not in departments_data_points: - departments_data_points[department_name] = Department( - name=department_name, employees=[] - ) - - new_company.departments.append(departments_data_points[department_name]) - - return list(companies_data_points.values()) + data = load_default_payload() + companies = build_companies(data) + return companies -async def main(): - cognee_directory_path = str( - pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system")).resolve() - ) - # Set up the Cognee system directory. Cognee will store system files and databases here. - config.system_root_directory(cognee_directory_path) +async def execute_pipeline() -> None: + """Execute Cognee pipeline.""" - # Prune system metadata before running, only if we want "fresh" state. + # Configure system paths + logging.info("Configuring Cognee directories at %s", COGNEE_DIR) + config.system_root_directory(str(COGNEE_DIR)) + ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True) + + # Reset state and initialize await prune.prune_system(metadata=True) - await setup() - # Get default user + # Get user and dataset user = await get_default_user() - datasets = await load_or_create_datasets(["demo_dataset"], [], user) + dataset_id = datasets[0].id - pipeline = run_tasks( - [ - Task(ingest_files), - Task(add_data_points), - ], - datasets[0].id, - None, - user, - "demo_pipeline", - ) - + # Build and run pipeline + tasks = [Task(ingest_payloads), Task(add_data_points)] + pipeline = run_tasks(tasks, dataset_id, None, user, "demo_pipeline") async for status in pipeline: - print(status) + logging.info("Pipeline status: %s", status) + # Post-process: index graph edges and visualize await index_graph_edges() + await visualize_graph(str(GRAPH_HTML)) - # Or use our simple graph preview - graph_file_path = str( - os.path.join(os.path.dirname(__file__), ".artifacts/graph_visualization.html") - ) - await visualize_graph(graph_file_path) - - # Completion query that uses graph data to form context. + # Run query against graph completion = await search( query_text="Who works for GreenFuture Solutions?", query_type=SearchType.GRAPH_COMPLETION, ) - print("Graph completion result is:") - print(completion) + result = completion + logging.info("Graph completion result: %s", result) + + +def configure_logging() -> None: + """Configure logging.""" + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s | %(levelname)s | %(message)s", + ) + + +async def main() -> None: + """Run main function.""" + configure_logging() + try: + await execute_pipeline() + except Exception: + logging.exception("Run failed") + raise if __name__ == "__main__": From c1106b76fe140f9ed4588a50ff3914e4ef2a2778 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 4 Sep 2025 17:53:07 +0200 Subject: [PATCH 11/19] feat: Added new coding rules search --- .../retrieval/coding_rules_retriever.py | 19 +++++++++++++++++++ cognee/modules/search/methods/search.py | 4 ++++ cognee/modules/search/types/SearchType.py | 1 + .../codingagents/coding_rule_associations.py | 5 +++-- .../python/memify_coding_agent_example.py | 9 +++++++-- 5 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 cognee/modules/retrieval/coding_rules_retriever.py diff --git a/cognee/modules/retrieval/coding_rules_retriever.py b/cognee/modules/retrieval/coding_rules_retriever.py new file mode 100644 index 000000000..2578d1ee1 --- /dev/null +++ b/cognee/modules/retrieval/coding_rules_retriever.py @@ -0,0 +1,19 @@ +from cognee.shared.logging_utils import get_logger +from cognee.tasks.codingagents.coding_rule_associations import get_existing_rules + +logger = get_logger("CodingRulesRetriever") + + +class CodingRulesRetriever: + """Retriever for handling codeing rule based searches.""" + + def __init__(self, rules_nodeset_name): + if isinstance(rules_nodeset_name, list): + rules_nodeset_name = rules_nodeset_name[0] + self.rules_nodeset_name = rules_nodeset_name + """Initialize retriever with search parameters.""" + + async def get_existing_rules(self, query_text): + return await get_existing_rules( + rules_nodeset_name=self.rules_nodeset_name, return_list=True + ) diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index 71bf61d6b..b341e4a8a 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -13,6 +13,7 @@ from cognee.modules.retrieval.insights_retriever import InsightsRetriever from cognee.modules.retrieval.summaries_retriever import SummariesRetriever from cognee.modules.retrieval.completion_retriever import CompletionRetriever from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever +from cognee.modules.retrieval.coding_rules_retriever import CodingRulesRetriever from cognee.modules.retrieval.graph_summary_completion_retriever import ( GraphSummaryCompletionRetriever, ) @@ -167,6 +168,9 @@ async def specific_search( SearchType.CYPHER: CypherSearchRetriever().get_completion, SearchType.NATURAL_LANGUAGE: NaturalLanguageRetriever().get_completion, SearchType.FEEDBACK: UserQAFeedback(last_k=last_k).add_feedback, + SearchType.CODING_RULES: CodingRulesRetriever( + rules_nodeset_name=node_name + ).get_existing_rules, } # If the query type is FEELING_LUCKY, select the search type intelligently diff --git a/cognee/modules/search/types/SearchType.py b/cognee/modules/search/types/SearchType.py index c1f0521b2..0a7cae63a 100644 --- a/cognee/modules/search/types/SearchType.py +++ b/cognee/modules/search/types/SearchType.py @@ -15,3 +15,4 @@ class SearchType(Enum): GRAPH_COMPLETION_CONTEXT_EXTENSION = "GRAPH_COMPLETION_CONTEXT_EXTENSION" FEELING_LUCKY = "FEELING_LUCKY" FEEDBACK = "FEEDBACK" + CODING_RULES = "CODING_RULES" diff --git a/cognee/tasks/codingagents/coding_rule_associations.py b/cognee/tasks/codingagents/coding_rule_associations.py index e722e7728..c809bc68f 100644 --- a/cognee/tasks/codingagents/coding_rule_associations.py +++ b/cognee/tasks/codingagents/coding_rule_associations.py @@ -31,7 +31,7 @@ class RuleSet(DataPoint): ) -async def get_existing_rules(rules_nodeset_name: str) -> str: +async def get_existing_rules(rules_nodeset_name: str, return_list: bool = False) -> str: graph_engine = await get_graph_engine() nodes_data, _ = await graph_engine.get_nodeset_subgraph( node_type=NodeSet, node_name=[rules_nodeset_name] @@ -46,7 +46,8 @@ async def get_existing_rules(rules_nodeset_name: str) -> str: and "text" in item[1] ] - existing_rules = "\n".join(f"- {rule}" for rule in existing_rules) + if not return_list: + existing_rules = "\n".join(f"- {rule}" for rule in existing_rules) return existing_rules diff --git a/examples/python/memify_coding_agent_example.py b/examples/python/memify_coding_agent_example.py index 61af467d3..7f8c58802 100644 --- a/examples/python/memify_coding_agent_example.py +++ b/examples/python/memify_coding_agent_example.py @@ -85,8 +85,13 @@ async def main(): ) # Find the new specific coding rules added to graph through memify (created based on chat conversation between team members) - developer_rules = await get_existing_rules(rules_nodeset_name="coding_agent_rules") - print(developer_rules) + print( + await cognee.search( + query_text="List me the coding rules", + query_type=cognee.SearchType.CODING_RULES, + node_name=["coding_agent_rules"], + ) + ) # Visualize new graph with added memify context file_path = os.path.join( From 95bafd942c8b0553f45a7987492ac8cbf6e5ad86 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 4 Sep 2025 18:06:02 +0200 Subject: [PATCH 12/19] feat: add coding rule search type --- examples/python/memify_coding_agent_example.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/examples/python/memify_coding_agent_example.py b/examples/python/memify_coding_agent_example.py index 7f8c58802..0238cf775 100644 --- a/examples/python/memify_coding_agent_example.py +++ b/examples/python/memify_coding_agent_example.py @@ -85,14 +85,15 @@ async def main(): ) # Find the new specific coding rules added to graph through memify (created based on chat conversation between team members) - print( - await cognee.search( - query_text="List me the coding rules", - query_type=cognee.SearchType.CODING_RULES, - node_name=["coding_agent_rules"], - ) + coding_rules = await cognee.search( + query_text="List me the coding rules", + query_type=cognee.SearchType.CODING_RULES, + node_name=["coding_agent_rules"], ) + for coding_rule in coding_rules: + print(coding_rule) + # Visualize new graph with added memify context file_path = os.path.join( pathlib.Path(__file__).parent, ".artifacts", "graph_visualization_after_memify.html" From b0d4503f2b3252e1d8c56ec98644d72c219abb31 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 4 Sep 2025 18:12:59 +0200 Subject: [PATCH 13/19] refactor: Move memify our of api folder --- cognee/__init__.py | 1 + cognee/modules/memify/__init__.py | 1 + cognee/{api/v1/cognify => modules/memify}/memify.py | 0 examples/python/memify_coding_agent_example.py | 7 ++----- 4 files changed, 4 insertions(+), 5 deletions(-) create mode 100644 cognee/modules/memify/__init__.py rename cognee/{api/v1/cognify => modules/memify}/memify.py (100%) diff --git a/cognee/__init__.py b/cognee/__init__.py index 7aa6388d9..be5a16b3b 100644 --- a/cognee/__init__.py +++ b/cognee/__init__.py @@ -18,6 +18,7 @@ logger = setup_logging() from .api.v1.add import add from .api.v1.delete import delete from .api.v1.cognify import cognify +from .modules.memify import memify from .api.v1.config.config import config from .api.v1.datasets.datasets import datasets from .api.v1.prune import prune diff --git a/cognee/modules/memify/__init__.py b/cognee/modules/memify/__init__.py new file mode 100644 index 000000000..90aaa8404 --- /dev/null +++ b/cognee/modules/memify/__init__.py @@ -0,0 +1 @@ +from .memify import memify diff --git a/cognee/api/v1/cognify/memify.py b/cognee/modules/memify/memify.py similarity index 100% rename from cognee/api/v1/cognify/memify.py rename to cognee/modules/memify/memify.py diff --git a/examples/python/memify_coding_agent_example.py b/examples/python/memify_coding_agent_example.py index 0238cf775..17bf8fc0e 100644 --- a/examples/python/memify_coding_agent_example.py +++ b/examples/python/memify_coding_agent_example.py @@ -3,15 +3,12 @@ import pathlib import os import cognee +from cognee import memify from cognee.api.v1.visualize.visualize import visualize_graph from cognee.shared.logging_utils import setup_logging, ERROR -from cognee.api.v1.cognify.memify import memify from cognee.modules.pipelines.tasks.task import Task from cognee.tasks.memify.extract_subgraph_chunks import extract_subgraph_chunks -from cognee.tasks.codingagents.coding_rule_associations import ( - add_rule_associations, - get_existing_rules, -) +from cognee.tasks.codingagents.coding_rule_associations import add_rule_associations # Prerequisites: # 1. Copy `.env.template` and rename it to `.env`. From 805f443cd6e88e6a9ae68f3ddaa2594982488c65 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 4 Sep 2025 19:08:55 +0200 Subject: [PATCH 14/19] feat: Add memify router --- cognee/api/client.py | 3 + cognee/api/v1/add/routers/get_add_router.py | 6 -- cognee/api/v1/memify/__init__.py | 0 cognee/api/v1/memify/routers/__init__.py | 1 + .../v1/memify/routers/get_memify_router.py | 99 +++++++++++++++++++ cognee/modules/memify/memify.py | 13 ++- .../python/memify_coding_agent_example.py | 7 +- 7 files changed, 118 insertions(+), 11 deletions(-) create mode 100644 cognee/api/v1/memify/__init__.py create mode 100644 cognee/api/v1/memify/routers/__init__.py create mode 100644 cognee/api/v1/memify/routers/get_memify_router.py diff --git a/cognee/api/client.py b/cognee/api/client.py index 215e4a17e..d6bd71d5f 100644 --- a/cognee/api/client.py +++ b/cognee/api/client.py @@ -22,6 +22,7 @@ from cognee.api.v1.settings.routers import get_settings_router from cognee.api.v1.datasets.routers import get_datasets_router from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router from cognee.api.v1.search.routers import get_search_router +from cognee.api.v1.memify.routers import get_memify_router from cognee.api.v1.add.routers import get_add_router from cognee.api.v1.delete.routers import get_delete_router from cognee.api.v1.responses.routers import get_responses_router @@ -230,6 +231,8 @@ app.include_router(get_add_router(), prefix="/api/v1/add", tags=["add"]) app.include_router(get_cognify_router(), prefix="/api/v1/cognify", tags=["cognify"]) +app.include_router(get_memify_router(), prefix="/api/v1/memify", tags=["memify"]) + app.include_router(get_search_router(), prefix="/api/v1/search", tags=["search"]) app.include_router( diff --git a/cognee/api/v1/add/routers/get_add_router.py b/cognee/api/v1/add/routers/get_add_router.py index 1703d9931..9de818b7d 100644 --- a/cognee/api/v1/add/routers/get_add_router.py +++ b/cognee/api/v1/add/routers/get_add_router.py @@ -1,6 +1,3 @@ -import os -import requests -import subprocess from uuid import UUID from fastapi import APIRouter @@ -60,9 +57,6 @@ def get_add_router() -> APIRouter: ## Notes - To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True) - - GitHub repositories are cloned and all files are processed - - HTTP URLs are fetched and their content is processed - - The ALLOW_HTTP_REQUESTS environment variable controls URL processing - datasetId value can only be the UUID of an already existing dataset """ send_telemetry( diff --git a/cognee/api/v1/memify/__init__.py b/cognee/api/v1/memify/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cognee/api/v1/memify/routers/__init__.py b/cognee/api/v1/memify/routers/__init__.py new file mode 100644 index 000000000..1d1793c35 --- /dev/null +++ b/cognee/api/v1/memify/routers/__init__.py @@ -0,0 +1 @@ +from .get_memify_router import get_memify_router diff --git a/cognee/api/v1/memify/routers/get_memify_router.py b/cognee/api/v1/memify/routers/get_memify_router.py new file mode 100644 index 000000000..edac2775a --- /dev/null +++ b/cognee/api/v1/memify/routers/get_memify_router.py @@ -0,0 +1,99 @@ +from uuid import UUID + +from fastapi import APIRouter +from fastapi.responses import JSONResponse +from fastapi import Depends +from pydantic import Field +from typing import List, Optional + +from cognee.api.DTO import InDTO +from cognee.modules.users.models import User +from cognee.modules.users.methods import get_authenticated_user +from cognee.shared.utils import send_telemetry +from cognee.modules.pipelines.models import PipelineRunErrored +from cognee.shared.logging_utils import get_logger + +logger = get_logger() + + +class MemifyPayloadDTO(InDTO): + extraction_tasks: List[str] = Field( + default=None, + examples=[[]], + ) + enrichment_tasks: List[str] = (Field(default=None, examples=[[]]),) + data: Optional[str] = (Field(default=None),) + dataset_names: Optional[List[str]] = Field(default=None) + dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]]) + node_name: Optional[List[str]] = Field(default=None) + run_in_background: Optional[bool] = Field(default=False) + + +def get_memify_router() -> APIRouter: + router = APIRouter() + + @router.post("", response_model=dict) + async def memify(payload: MemifyPayloadDTO, user: User = Depends(get_authenticated_user)): + """ + Enrichment pipeline in Cognee, can work with already built graphs. If no data is provided existing knowledge graph will be used as data, + custom data can also be provided instead which can be processed with provided extraction and enrichment tasks. + + Provided tasks and data will be arranged to run the Cognee pipeline and execute graph enrichment/creation. + + ## Request Parameters + - **extractionTasks** Optional[List[str]]: List of available Cognee Tasks to execute for graph/data extraction. + - **enrichmentTasks** Optional[List[str]]: List of available Cognee Tasks to handle enrichment of provided graph/data from extraction tasks. + - **data** Optional[List[str]]: The data to ingest. Can be any text data when custom extraction and enrichment tasks are used. + Data provided here will be forwarded to the first extraction task in the pipeline as input. + If no data is provided the whole graph (or subgraph if node_name/node_type is specified) will be forwarded + - **dataset_names** (Optional[List[str]]): Name of the datasets to memify + - **dataset_ids** (Optional[List[UUID]]): List of UUIDs of an already existing dataset + - **node_name** (Optional[List[str]]): Filter graph to specific named entities (for targeted search). Used when no data is provided. + - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking). + + Either datasetName or datasetId must be provided. + + ## Response + Returns information about the add operation containing: + - Status of the operation + - Details about the processed data + - Any relevant metadata from the ingestion process + + ## Error Codes + - **400 Bad Request**: Neither datasetId nor datasetName provided + - **409 Conflict**: Error during memify operation + - **403 Forbidden**: User doesn't have permission to use dataset + + ## Notes + - To memify datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True) + - datasetId value can only be the UUID of an already existing dataset + """ + + send_telemetry( + "Memify API Endpoint Invoked", + user.id, + additional_properties={"endpoint": "POST /v1/memify"}, + ) + + if not payload.dataset_ids and not payload.dataset_names: + raise ValueError("Either datasetId or datasetName must be provided.") + + from cognee import memify + + try: + memify_run = await memify( + extraction_tasks=payload.extraction_tasks, + enrichment_tasks=payload.enrichment_tasks, + data=payload.data, + datasets=payload.dataset_ids if payload.dataset_ids else payload.dataset_names, + node_name=payload.node_name, + user=user, + ) + + if isinstance(memify_run, PipelineRunErrored): + return JSONResponse(status_code=420, content=memify_run) + return memify_run + except Exception as error: + return JSONResponse(status_code=409, content={"error": str(error)}) + + return router diff --git a/cognee/modules/memify/memify.py b/cognee/modules/memify/memify.py index dd089c060..80afd7325 100644 --- a/cognee/modules/memify/memify.py +++ b/cognee/modules/memify/memify.py @@ -26,8 +26,8 @@ logger = get_logger("memify") async def memify( - extraction_tasks: List[Task] = [Task(extract_subgraph_chunks)], - enrichment_tasks: List[Task] = [Task(add_rule_associations)], + extraction_tasks: Union[List[Task], List[str]] = [Task(extract_subgraph_chunks)], + enrichment_tasks: Union[List[Task], List[str]] = [Task(add_rule_associations)], data: Optional[Any] = None, datasets: Union[str, list[str], list[UUID]] = None, user: User = None, @@ -38,6 +38,15 @@ async def memify( run_in_background: bool = False, ): """ + Enrichment pipeline in Cognee, can work with already built graphs. If no data is provided existing knowledge graph will be used as data, + custom data can also be provided instead which can be processed with provided extraction and enrichment tasks. + + Provided tasks and data will be arranged to run the Cognee pipeline and execute graph enrichment/creation. + + This is the core processing step in Cognee that converts raw text and documents + into an intelligent knowledge graph. It analyzes content, extracts entities and + relationships, and creates semantic connections for enhanced search and reasoning. + Args: extraction_tasks: List of Cognee Tasks to execute for graph/data extraction. enrichment_tasks: List of Cognee Tasks to handle enrichment of provided graph/data from extraction tasks. diff --git a/examples/python/memify_coding_agent_example.py b/examples/python/memify_coding_agent_example.py index 17bf8fc0e..1fd3b1528 100644 --- a/examples/python/memify_coding_agent_example.py +++ b/examples/python/memify_coding_agent_example.py @@ -55,7 +55,7 @@ async def main(): pathlib.Path(__file__).parent, ".artifacts", "graph_visualization_only_cognify.html" ) await visualize_graph(file_path) - print(f"Open file to see graph visualization only after cognification: {file_path}") + print(f"Open file to see graph visualization only after cognification: {file_path}\n") # After graph is created, create a second pipeline that will go through the graph and enchance it with specific # coding rule nodes @@ -88,15 +88,16 @@ async def main(): node_name=["coding_agent_rules"], ) + print("Coding rules created by memify:") for coding_rule in coding_rules: - print(coding_rule) + print("- " + coding_rule) # Visualize new graph with added memify context file_path = os.path.join( pathlib.Path(__file__).parent, ".artifacts", "graph_visualization_after_memify.html" ) await visualize_graph(file_path) - print(f"Open file to see graph visualization after memify enhancment: {file_path}") + print(f"\nOpen file to see graph visualization after memify enhancment: {file_path}") if __name__ == "__main__": From e06cf11f49d2a574e0906d32dd022767a2d7cdd9 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 4 Sep 2025 19:53:59 +0200 Subject: [PATCH 15/19] fix: Resolve import issue with creating auth dataset --- cognee/api/v1/add/routers/get_add_router.py | 2 +- .../api/v1/memify/routers/get_memify_router.py | 16 ++++++++-------- .../data/methods/load_or_create_datasets.py | 2 +- cognee/modules/memify/memify.py | 16 ++++++++++++++-- .../modules/retrieval/coding_rules_retriever.py | 5 ++++- 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/cognee/api/v1/add/routers/get_add_router.py b/cognee/api/v1/add/routers/get_add_router.py index 9de818b7d..f27d559e1 100644 --- a/cognee/api/v1/add/routers/get_add_router.py +++ b/cognee/api/v1/add/routers/get_add_router.py @@ -21,7 +21,7 @@ def get_add_router() -> APIRouter: async def add( data: List[UploadFile] = File(default=None), datasetName: Optional[str] = Form(default=None), - datasetId: Union[UUID, Literal[""], None] = Form(default=None, examples=[""]), + datasetId: Union[UUID, None] = Form(default=None, examples=[""]), node_set: Optional[List[str]] = Form(default=[""], example=[""]), user: User = Depends(get_authenticated_user), ): diff --git a/cognee/api/v1/memify/routers/get_memify_router.py b/cognee/api/v1/memify/routers/get_memify_router.py index edac2775a..817eef9bd 100644 --- a/cognee/api/v1/memify/routers/get_memify_router.py +++ b/cognee/api/v1/memify/routers/get_memify_router.py @@ -17,15 +17,15 @@ logger = get_logger() class MemifyPayloadDTO(InDTO): - extraction_tasks: List[str] = Field( + extraction_tasks: Optional[List[str]] = Field( default=None, examples=[[]], ) - enrichment_tasks: List[str] = (Field(default=None, examples=[[]]),) - data: Optional[str] = (Field(default=None),) - dataset_names: Optional[List[str]] = Field(default=None) + enrichment_tasks: Optional[List[str]] = Field(default=None, examples=[[]]) + data: Optional[str] = Field(default="") + dataset_names: Optional[List[str]] = Field(default=None, examples=[[]]) dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]]) - node_name: Optional[List[str]] = Field(default=None) + node_name: Optional[List[str]] = Field(default=None, examples=[[]]) run_in_background: Optional[bool] = Field(default=False) @@ -78,10 +78,10 @@ def get_memify_router() -> APIRouter: if not payload.dataset_ids and not payload.dataset_names: raise ValueError("Either datasetId or datasetName must be provided.") - from cognee import memify - try: - memify_run = await memify( + from cognee.modules.memify import memify as cognee_memify + + memify_run = await cognee_memify( extraction_tasks=payload.extraction_tasks, enrichment_tasks=payload.enrichment_tasks, data=payload.data, diff --git a/cognee/modules/data/methods/load_or_create_datasets.py b/cognee/modules/data/methods/load_or_create_datasets.py index 1d6ef3efb..2c9a6497c 100644 --- a/cognee/modules/data/methods/load_or_create_datasets.py +++ b/cognee/modules/data/methods/load_or_create_datasets.py @@ -2,7 +2,7 @@ from typing import List, Union from uuid import UUID from cognee.modules.data.models import Dataset -from cognee.modules.data.methods import create_authorized_dataset +from cognee.modules.data.methods.create_authorized_dataset import create_authorized_dataset from cognee.modules.data.exceptions import DatasetNotFoundError diff --git a/cognee/modules/memify/memify.py b/cognee/modules/memify/memify.py index 80afd7325..d8e1087f2 100644 --- a/cognee/modules/memify/memify.py +++ b/cognee/modules/memify/memify.py @@ -26,8 +26,8 @@ logger = get_logger("memify") async def memify( - extraction_tasks: Union[List[Task], List[str]] = [Task(extract_subgraph_chunks)], - enrichment_tasks: Union[List[Task], List[str]] = [Task(add_rule_associations)], + extraction_tasks: Union[List[Task], List[str]] = None, + enrichment_tasks: Union[List[Task], List[str]] = None, data: Optional[Any] = None, datasets: Union[str, list[str], list[UUID]] = None, user: User = None, @@ -68,6 +68,18 @@ async def memify( Use pipeline_run_id from return value to monitor progress. """ + # Use default coding rules tasks if no tasks were provided + if not extraction_tasks: + extraction_tasks = [Task(extract_subgraph_chunks)] + if not enrichment_tasks: + enrichment_tasks = [ + Task( + add_rule_associations, + rules_nodeset_name="coding_agent_rules", + task_config={"batch_size": 1}, + ) + ] + if not data: memory_fragment = await get_memory_fragment(node_type=node_type, node_name=node_name) # Subgraphs should be a single element in the list to represent one data item diff --git a/cognee/modules/retrieval/coding_rules_retriever.py b/cognee/modules/retrieval/coding_rules_retriever.py index 2578d1ee1..364ff3236 100644 --- a/cognee/modules/retrieval/coding_rules_retriever.py +++ b/cognee/modules/retrieval/coding_rules_retriever.py @@ -7,8 +7,11 @@ logger = get_logger("CodingRulesRetriever") class CodingRulesRetriever: """Retriever for handling codeing rule based searches.""" - def __init__(self, rules_nodeset_name): + def __init__(self, rules_nodeset_name="coding_agent_rules"): if isinstance(rules_nodeset_name, list): + if not rules_nodeset_name: + # If there is no provided nodeset set to coding_agent_rules + rules_nodeset_name = ["coding_agent_rules"] rules_nodeset_name = rules_nodeset_name[0] self.rules_nodeset_name = rules_nodeset_name """Initialize retriever with search parameters.""" From 9e201035493e6a38d614db9cbbd87b7d69a926d6 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 4 Sep 2025 20:59:00 +0200 Subject: [PATCH 16/19] feat: Enable multi-user mode to work with memify --- .../v1/memify/routers/get_memify_router.py | 12 +++---- .../modules/graph/cognee_graph/CogneeGraph.py | 2 +- cognee/modules/memify/memify.py | 32 ++++++++++--------- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/cognee/api/v1/memify/routers/get_memify_router.py b/cognee/api/v1/memify/routers/get_memify_router.py index 817eef9bd..cf1df8f71 100644 --- a/cognee/api/v1/memify/routers/get_memify_router.py +++ b/cognee/api/v1/memify/routers/get_memify_router.py @@ -23,8 +23,8 @@ class MemifyPayloadDTO(InDTO): ) enrichment_tasks: Optional[List[str]] = Field(default=None, examples=[[]]) data: Optional[str] = Field(default="") - dataset_names: Optional[List[str]] = Field(default=None, examples=[[]]) - dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]]) + dataset_name: Optional[str] = Field(default=None) + dataset_id: Optional[UUID] = Field(default=None, examples=[[""]]) node_name: Optional[List[str]] = Field(default=None, examples=[[]]) run_in_background: Optional[bool] = Field(default=False) @@ -46,8 +46,8 @@ def get_memify_router() -> APIRouter: - **data** Optional[List[str]]: The data to ingest. Can be any text data when custom extraction and enrichment tasks are used. Data provided here will be forwarded to the first extraction task in the pipeline as input. If no data is provided the whole graph (or subgraph if node_name/node_type is specified) will be forwarded - - **dataset_names** (Optional[List[str]]): Name of the datasets to memify - - **dataset_ids** (Optional[List[UUID]]): List of UUIDs of an already existing dataset + - **dataset_name** (Optional[str]): Name of the datasets to memify + - **dataset_id** (Optional[UUID]): List of UUIDs of an already existing dataset - **node_name** (Optional[List[str]]): Filter graph to specific named entities (for targeted search). Used when no data is provided. - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking). @@ -75,7 +75,7 @@ def get_memify_router() -> APIRouter: additional_properties={"endpoint": "POST /v1/memify"}, ) - if not payload.dataset_ids and not payload.dataset_names: + if not payload.dataset_id and not payload.dataset_name: raise ValueError("Either datasetId or datasetName must be provided.") try: @@ -85,7 +85,7 @@ def get_memify_router() -> APIRouter: extraction_tasks=payload.extraction_tasks, enrichment_tasks=payload.enrichment_tasks, data=payload.data, - datasets=payload.dataset_ids if payload.dataset_ids else payload.dataset_names, + dataset=payload.dataset_id if payload.dataset_id else payload.dataset_name, node_name=payload.node_name, user=user, ) diff --git a/cognee/modules/graph/cognee_graph/CogneeGraph.py b/cognee/modules/graph/cognee_graph/CogneeGraph.py index 924532ce0..acfe04de7 100644 --- a/cognee/modules/graph/cognee_graph/CogneeGraph.py +++ b/cognee/modules/graph/cognee_graph/CogneeGraph.py @@ -76,7 +76,7 @@ class CogneeGraph(CogneeAbstractGraph): start_time = time.time() # Determine projection strategy - if node_type is not None and node_name not in [None, []]: + if node_type is not None and node_name not in [None, [], ""]: nodes_data, edges_data = await adapter.get_nodeset_subgraph( node_type=node_type, node_name=node_name ) diff --git a/cognee/modules/memify/memify.py b/cognee/modules/memify/memify.py index d8e1087f2..2d9b32a1b 100644 --- a/cognee/modules/memify/memify.py +++ b/cognee/modules/memify/memify.py @@ -4,7 +4,7 @@ from uuid import UUID from cognee.shared.logging_utils import get_logger from cognee.modules.retrieval.utils.brute_force_triplet_search import get_memory_fragment - +from cognee.context_global_variables import set_database_global_context_variables from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.pipelines import run_pipeline from cognee.modules.pipelines.tasks.task import Task @@ -29,7 +29,7 @@ async def memify( extraction_tasks: Union[List[Task], List[str]] = None, enrichment_tasks: Union[List[Task], List[str]] = None, data: Optional[Any] = None, - datasets: Union[str, list[str], list[UUID]] = None, + dataset: Union[str, UUID] = "main_dataset", user: User = None, node_type: Optional[Type] = NodeSet, node_name: Optional[List[str]] = None, @@ -53,10 +53,7 @@ async def memify( data: The data to ingest. Can be anything when custom extraction and enrichment tasks are used. Data provided here will be forwarded to the first extraction task in the pipeline as input. If no data is provided the whole graph (or subgraph if node_name/node_type is specified) will be forwarded - datasets: Dataset name(s) or dataset uuid to process. Processes all available datasets if None. - - Single dataset: "my_dataset" - - Multiple datasets: ["docs", "research", "reports"] - - None: Process all datasets for the user + dataset: Dataset name or dataset uuid to process. user: User context for authentication and data access. Uses default if None. node_type: Filter graph to specific entity types (for advanced filtering). Used when no data is provided. node_name: Filter graph to specific named entities (for targeted search). Used when no data is provided. @@ -80,7 +77,17 @@ async def memify( ) ] + await setup() + + user, authorized_dataset_list = await resolve_authorized_user_datasets(dataset, user) + authorized_dataset = authorized_dataset_list[0] + if not data: + # Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True + await set_database_global_context_variables( + authorized_dataset.id, authorized_dataset.owner_id + ) + memory_fragment = await get_memory_fragment(node_type=node_type, node_name=node_name) # Subgraphs should be a single element in the list to represent one data item data = [memory_fragment] @@ -90,14 +97,9 @@ async def memify( *enrichment_tasks, ] - await setup() - - user, authorized_datasets = await resolve_authorized_user_datasets(datasets, user) - - for dataset in authorized_datasets: - await reset_dataset_pipeline_run_status( - dataset.id, user, pipeline_names=["memify_pipeline"] - ) + await reset_dataset_pipeline_run_status( + authorized_dataset.id, user, pipeline_names=["memify_pipeline"] + ) # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background) @@ -108,7 +110,7 @@ async def memify( tasks=memify_tasks, user=user, data=data, - datasets=datasets, + datasets=authorized_dataset.id, vector_db_config=vector_db_config, graph_db_config=graph_db_config, incremental_loading=False, From 0c7ba7c23610cf966c5660b9ad8d6f5f054dc573 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 4 Sep 2025 21:05:24 +0200 Subject: [PATCH 17/19] refactor: Allow none through swagger --- cognee/api/v1/add/routers/get_add_router.py | 3 ++- cognee/api/v1/memify/routers/get_memify_router.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cognee/api/v1/add/routers/get_add_router.py b/cognee/api/v1/add/routers/get_add_router.py index f27d559e1..dfa7d275b 100644 --- a/cognee/api/v1/add/routers/get_add_router.py +++ b/cognee/api/v1/add/routers/get_add_router.py @@ -21,7 +21,8 @@ def get_add_router() -> APIRouter: async def add( data: List[UploadFile] = File(default=None), datasetName: Optional[str] = Form(default=None), - datasetId: Union[UUID, None] = Form(default=None, examples=[""]), + # Note: Literal is needed for Swagger use + datasetId: Union[UUID, Literal[""], None] = Form(default=None, examples=[""]), node_set: Optional[List[str]] = Form(default=[""], example=[""]), user: User = Depends(get_authenticated_user), ): diff --git a/cognee/api/v1/memify/routers/get_memify_router.py b/cognee/api/v1/memify/routers/get_memify_router.py index cf1df8f71..1976d7414 100644 --- a/cognee/api/v1/memify/routers/get_memify_router.py +++ b/cognee/api/v1/memify/routers/get_memify_router.py @@ -4,7 +4,7 @@ from fastapi import APIRouter from fastapi.responses import JSONResponse from fastapi import Depends from pydantic import Field -from typing import List, Optional +from typing import List, Optional, Union, Literal from cognee.api.DTO import InDTO from cognee.modules.users.models import User @@ -24,7 +24,8 @@ class MemifyPayloadDTO(InDTO): enrichment_tasks: Optional[List[str]] = Field(default=None, examples=[[]]) data: Optional[str] = Field(default="") dataset_name: Optional[str] = Field(default=None) - dataset_id: Optional[UUID] = Field(default=None, examples=[[""]]) + # Note: Literal is needed for Swagger use + dataset_id: Union[UUID, Literal[""], None] = Field(default=None, examples=[""]) node_name: Optional[List[str]] = Field(default=None, examples=[[]]) run_in_background: Optional[bool] = Field(default=False) From aaa17762938ad697003dfac59413d0477d082db8 Mon Sep 17 00:00:00 2001 From: Boris Date: Fri, 5 Sep 2025 15:39:04 +0200 Subject: [PATCH 18/19] feat: implement new local UI (#1279) ## Description ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: Daulet Amirkhanov --- cognee-frontend/public/next.svg | 1 - cognee-frontend/public/vercel.svg | 1 - .../src/app/(graph)/GraphVisualization.tsx | 8 +- .../src/app/(graph)/getColorForNodeType.ts | 26 +- cognee-frontend/src/app/account/Account.tsx | 51 ++ cognee-frontend/src/app/account/page.tsx | 1 + .../src/app/dashboard/AddDataToCognee.tsx | 104 ++++ .../dashboard/CogneeInstancesAccordion.tsx | 31 + .../src/app/dashboard/Dashboard.tsx | 140 +++++ .../src/app/dashboard/DatasetsAccordion.tsx | 346 +++++++++++ .../dashboard/InstanceDatasetsAccordion.tsx | 102 ++++ .../src/app/dashboard/NotebooksAccordion.tsx | 150 +++++ cognee-frontend/src/app/dashboard/page.tsx | 1 + cognee-frontend/src/app/plan/Plan.tsx | 157 +++++ cognee-frontend/src/app/plan/page.tsx | 1 + cognee-frontend/src/modules/auth/index.ts | 2 + cognee-frontend/src/modules/auth/types.ts | 6 + .../src/modules/auth/useAuthenticatedUser.ts | 17 + .../src/modules/cloud/checkCloudConnection.ts | 10 + cognee-frontend/src/modules/cloud/index.ts | 2 + cognee-frontend/src/modules/cloud/syncData.ts | 11 + .../src/modules/ingestion/useData.ts | 2 + .../src/modules/ingestion/useDatasets.ts | 67 ++- .../src/modules/notebooks/useNotebooks.ts | 134 +++++ .../LoadingIndicator.module.css | 2 +- cognee-frontend/src/ui/Icons/AddIcon.tsx | 2 +- cognee-frontend/src/ui/Icons/BackIcon.tsx | 8 + cognee-frontend/src/ui/Icons/CaretIcon.tsx | 7 +- cognee-frontend/src/ui/Icons/CheckIcon.tsx | 7 + cognee-frontend/src/ui/Icons/CloseIcon.tsx | 8 + cognee-frontend/src/ui/Icons/CloudIcon.tsx | 7 + cognee-frontend/src/ui/Icons/CogneeIcon.tsx | 7 + cognee-frontend/src/ui/Icons/DatasetIcon.tsx | 9 + .../src/ui/Icons/LocalCogneeIcon.tsx | 10 + cognee-frontend/src/ui/Icons/MenuIcon.tsx | 9 + cognee-frontend/src/ui/Icons/MinusIcon.tsx | 7 + cognee-frontend/src/ui/Icons/NotebookIcon.tsx | 8 + cognee-frontend/src/ui/Icons/PlayIcon.tsx | 7 + cognee-frontend/src/ui/Icons/PlusIcon.tsx | 8 + cognee-frontend/src/ui/Icons/SearchIcon.tsx | 9 +- cognee-frontend/src/ui/Icons/SettingsIcon.tsx | 7 +- cognee-frontend/src/ui/Icons/index.ts | 26 +- cognee-frontend/src/ui/Layout/Header.tsx | 74 +++ cognee-frontend/src/ui/Layout/index.ts | 3 +- cognee-frontend/src/ui/elements/Accordion.tsx | 45 ++ .../src/ui/elements/AvatarImage.tsx | 0 cognee-frontend/src/ui/elements/CTAButton.tsx | 4 +- .../src/ui/elements/GhostButton.tsx | 4 +- .../src/ui/elements/IconButton.tsx | 14 + cognee-frontend/src/ui/elements/Input.tsx | 2 +- .../src/ui/elements/{ => Modal}/Modal.tsx | 2 +- .../src/ui/elements/Modal/index.ts | 3 + .../src/ui/elements/Modal/useModal.ts | 49 ++ .../src/ui/elements/NeutralButton.tsx | 6 +- .../src/ui/elements/Notebook/Notebook.tsx | 342 +++++++++++ .../elements/Notebook/NotebookCellHeader.tsx | 68 +++ .../src/ui/elements/Notebook/index.ts | 1 + .../src/ui/elements/Notebook/types.ts | 15 + cognee-frontend/src/ui/elements/PopupMenu.tsx | 48 ++ cognee-frontend/src/ui/elements/Select.tsx | 6 +- cognee-frontend/src/ui/elements/TextArea.tsx | 27 +- cognee-frontend/src/ui/elements/index.ts | 6 +- cognee-frontend/src/utils/fetch.ts | 4 + cognee-frontend/src/utils/index.ts | 1 + cognee-frontend/src/utils/useBoolean.ts | 2 + cognee-frontend/src/utils/useOutsideClick.ts | 25 + cognee/api/client.py | 21 +- cognee/api/health.py | 18 +- cognee/api/v1/cloud/routers/__init__.py | 1 + .../api/v1/cloud/routers/get_checks_router.py | 23 + .../datasets/routers/get_datasets_router.py | 16 +- cognee/api/v1/notebooks/routers/__init__.py | 1 + .../notebooks/routers/get_notebooks_router.py | 93 +++ .../v1/search/routers/get_search_router.py | 3 +- cognee/api/v1/sync/__init__.py | 17 + cognee/api/v1/sync/routers/__init__.py | 3 + cognee/api/v1/sync/routers/get_sync_router.py | 134 +++++ cognee/api/v1/sync/sync.py | 548 ++++++++++++++++++ .../api/v1/users/routers/get_auth_router.py | 14 +- .../databases/relational/__init__.py | 2 + .../databases/relational/get_async_session.py | 15 + .../sqlalchemy/SqlAlchemyAdapter.py | 1 + .../relational/with_async_session.py | 25 + .../files/storage/LocalFileStorage.py | 9 + .../files/storage/S3FileStorage.py | 5 + .../files/storage/StorageManager.py | 8 +- .../infrastructure/files/storage/storage.py | 16 + .../exceptions/CloudApiKeyMissingError.py | 15 + .../cloud/exceptions/CloudConnectionError.py | 15 + cognee/modules/cloud/exceptions/__init__.py | 2 + cognee/modules/cloud/operations/__init__.py | 1 + .../modules/cloud/operations/check_api_key.py | 25 + .../data/methods/check_dataset_name.py | 2 +- .../modules/data/methods/get_dataset_data.py | 2 +- cognee/modules/notebooks/methods/__init__.py | 5 + .../notebooks/methods/create_notebook.py | 24 + .../notebooks/methods/delete_notebook.py | 13 + .../modules/notebooks/methods/get_notebook.py | 21 + .../notebooks/methods/get_notebooks.py | 18 + .../notebooks/methods/update_notebook.py | 17 + cognee/modules/notebooks/models/Notebook.py | 53 ++ cognee/modules/notebooks/models/__init__.py | 1 + .../modules/notebooks/operations/__init__.py | 1 + .../operations/run_in_local_sandbox.py | 69 +++ .../graph_completion_cot_retriever.py | 2 +- .../retrieval/graph_completion_retriever.py | 6 +- .../modules/retrieval/temporal_retriever.py | 10 +- cognee/modules/search/methods/search.py | 41 +- cognee/modules/sync/__init__.py | 1 + cognee/modules/sync/methods/__init__.py | 18 + .../sync/methods/create_sync_operation.py | 45 ++ .../sync/methods/get_sync_operation.py | 79 +++ .../sync/methods/update_sync_operation.py | 117 ++++ cognee/modules/sync/models/SyncOperation.py | 86 +++ cognee/modules/sync/models/__init__.py | 3 + cognee/modules/users/__init__.py | 1 - cognee/modules/users/methods/create_user.py | 27 +- .../get_specific_user_permission_datasets.py | 3 +- 118 files changed, 3857 insertions(+), 109 deletions(-) delete mode 100644 cognee-frontend/public/next.svg delete mode 100644 cognee-frontend/public/vercel.svg create mode 100644 cognee-frontend/src/app/account/Account.tsx create mode 100644 cognee-frontend/src/app/account/page.tsx create mode 100644 cognee-frontend/src/app/dashboard/AddDataToCognee.tsx create mode 100644 cognee-frontend/src/app/dashboard/CogneeInstancesAccordion.tsx create mode 100644 cognee-frontend/src/app/dashboard/Dashboard.tsx create mode 100644 cognee-frontend/src/app/dashboard/DatasetsAccordion.tsx create mode 100644 cognee-frontend/src/app/dashboard/InstanceDatasetsAccordion.tsx create mode 100644 cognee-frontend/src/app/dashboard/NotebooksAccordion.tsx create mode 100644 cognee-frontend/src/app/dashboard/page.tsx create mode 100644 cognee-frontend/src/app/plan/Plan.tsx create mode 100644 cognee-frontend/src/app/plan/page.tsx create mode 100644 cognee-frontend/src/modules/auth/index.ts create mode 100644 cognee-frontend/src/modules/auth/types.ts create mode 100644 cognee-frontend/src/modules/auth/useAuthenticatedUser.ts create mode 100644 cognee-frontend/src/modules/cloud/checkCloudConnection.ts create mode 100644 cognee-frontend/src/modules/cloud/index.ts create mode 100644 cognee-frontend/src/modules/cloud/syncData.ts create mode 100644 cognee-frontend/src/modules/notebooks/useNotebooks.ts create mode 100644 cognee-frontend/src/ui/Icons/BackIcon.tsx create mode 100644 cognee-frontend/src/ui/Icons/CheckIcon.tsx create mode 100644 cognee-frontend/src/ui/Icons/CloseIcon.tsx create mode 100644 cognee-frontend/src/ui/Icons/CloudIcon.tsx create mode 100644 cognee-frontend/src/ui/Icons/CogneeIcon.tsx create mode 100644 cognee-frontend/src/ui/Icons/DatasetIcon.tsx create mode 100644 cognee-frontend/src/ui/Icons/LocalCogneeIcon.tsx create mode 100644 cognee-frontend/src/ui/Icons/MenuIcon.tsx create mode 100644 cognee-frontend/src/ui/Icons/MinusIcon.tsx create mode 100644 cognee-frontend/src/ui/Icons/NotebookIcon.tsx create mode 100644 cognee-frontend/src/ui/Icons/PlayIcon.tsx create mode 100644 cognee-frontend/src/ui/Icons/PlusIcon.tsx create mode 100644 cognee-frontend/src/ui/Layout/Header.tsx create mode 100644 cognee-frontend/src/ui/elements/Accordion.tsx create mode 100644 cognee-frontend/src/ui/elements/AvatarImage.tsx create mode 100644 cognee-frontend/src/ui/elements/IconButton.tsx rename cognee-frontend/src/ui/elements/{ => Modal}/Modal.tsx (84%) create mode 100644 cognee-frontend/src/ui/elements/Modal/index.ts create mode 100644 cognee-frontend/src/ui/elements/Modal/useModal.ts create mode 100644 cognee-frontend/src/ui/elements/Notebook/Notebook.tsx create mode 100644 cognee-frontend/src/ui/elements/Notebook/NotebookCellHeader.tsx create mode 100644 cognee-frontend/src/ui/elements/Notebook/index.ts create mode 100644 cognee-frontend/src/ui/elements/Notebook/types.ts create mode 100644 cognee-frontend/src/ui/elements/PopupMenu.tsx create mode 100644 cognee-frontend/src/utils/useOutsideClick.ts create mode 100644 cognee/api/v1/cloud/routers/__init__.py create mode 100644 cognee/api/v1/cloud/routers/get_checks_router.py create mode 100644 cognee/api/v1/notebooks/routers/__init__.py create mode 100644 cognee/api/v1/notebooks/routers/get_notebooks_router.py create mode 100644 cognee/api/v1/sync/__init__.py create mode 100644 cognee/api/v1/sync/routers/__init__.py create mode 100644 cognee/api/v1/sync/routers/get_sync_router.py create mode 100644 cognee/api/v1/sync/sync.py create mode 100644 cognee/infrastructure/databases/relational/get_async_session.py create mode 100644 cognee/infrastructure/databases/relational/with_async_session.py create mode 100644 cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py create mode 100644 cognee/modules/cloud/exceptions/CloudConnectionError.py create mode 100644 cognee/modules/cloud/exceptions/__init__.py create mode 100644 cognee/modules/cloud/operations/__init__.py create mode 100644 cognee/modules/cloud/operations/check_api_key.py create mode 100644 cognee/modules/notebooks/methods/__init__.py create mode 100644 cognee/modules/notebooks/methods/create_notebook.py create mode 100644 cognee/modules/notebooks/methods/delete_notebook.py create mode 100644 cognee/modules/notebooks/methods/get_notebook.py create mode 100644 cognee/modules/notebooks/methods/get_notebooks.py create mode 100644 cognee/modules/notebooks/methods/update_notebook.py create mode 100644 cognee/modules/notebooks/models/Notebook.py create mode 100644 cognee/modules/notebooks/models/__init__.py create mode 100644 cognee/modules/notebooks/operations/__init__.py create mode 100644 cognee/modules/notebooks/operations/run_in_local_sandbox.py create mode 100644 cognee/modules/sync/__init__.py create mode 100644 cognee/modules/sync/methods/__init__.py create mode 100644 cognee/modules/sync/methods/create_sync_operation.py create mode 100644 cognee/modules/sync/methods/get_sync_operation.py create mode 100644 cognee/modules/sync/methods/update_sync_operation.py create mode 100644 cognee/modules/sync/models/SyncOperation.py create mode 100644 cognee/modules/sync/models/__init__.py diff --git a/cognee-frontend/public/next.svg b/cognee-frontend/public/next.svg deleted file mode 100644 index 5174b28c5..000000000 --- a/cognee-frontend/public/next.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/cognee-frontend/public/vercel.svg b/cognee-frontend/public/vercel.svg deleted file mode 100644 index d2f842227..000000000 --- a/cognee-frontend/public/vercel.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/cognee-frontend/src/app/(graph)/GraphVisualization.tsx b/cognee-frontend/src/app/(graph)/GraphVisualization.tsx index 67d6458f8..4e2d1e642 100644 --- a/cognee-frontend/src/app/(graph)/GraphVisualization.tsx +++ b/cognee-frontend/src/app/(graph)/GraphVisualization.tsx @@ -1,5 +1,6 @@ "use client"; +import classNames from "classnames"; import { MutableRefObject, useEffect, useImperativeHandle, useRef, useState, useCallback } from "react"; import { forceCollide, forceManyBody } from "d3-force-3d"; import ForceGraph, { ForceGraphMethods, GraphData, LinkObject, NodeObject } from "react-force-graph-2d"; @@ -10,6 +11,7 @@ interface GraphVisuzaliationProps { ref: MutableRefObject; data?: GraphData; graphControls: MutableRefObject; + className?: string; } export interface GraphVisualizationAPI { @@ -17,7 +19,7 @@ export interface GraphVisualizationAPI { setGraphShape: (shape: string) => void; } -export default function GraphVisualization({ ref, data, graphControls }: GraphVisuzaliationProps) { +export default function GraphVisualization({ ref, data, graphControls, className }: GraphVisuzaliationProps) { const textSize = 6; const nodeSize = 15; // const addNodeDistanceFromSourceNode = 15; @@ -201,7 +203,7 @@ export default function GraphVisualization({ ref, data, graphControls }: GraphVi if (typeof window !== "undefined" && data && graphRef.current) { // add collision force graphRef.current.d3Force("collision", forceCollide(nodeSize * 1.5)); - graphRef.current.d3Force("charge", forceManyBody().strength(-1500).distanceMin(300).distanceMax(900)); + graphRef.current.d3Force("charge", forceManyBody().strength(-10).distanceMin(10).distanceMax(50)); } }, [data, graphRef]); @@ -213,7 +215,7 @@ export default function GraphVisualization({ ref, data, graphControls }: GraphVi })); return ( -
+
{(data && typeof window !== "undefined") ? ( +
+
+
+
+
+
+
+ +
+ +
+ + + back + +
+
+
Account
+
Manage your account's settings.
+
{account.name}
+
+
+
Plan
+
You are using open-source version. Subscribe to get access to hosted cognee with your data!
+ + Select a plan + +
+
+
+
+
+
+
+
+
+ + ); +} diff --git a/cognee-frontend/src/app/account/page.tsx b/cognee-frontend/src/app/account/page.tsx new file mode 100644 index 000000000..f6323c313 --- /dev/null +++ b/cognee-frontend/src/app/account/page.tsx @@ -0,0 +1 @@ +export { default } from "./Account"; diff --git a/cognee-frontend/src/app/dashboard/AddDataToCognee.tsx b/cognee-frontend/src/app/dashboard/AddDataToCognee.tsx new file mode 100644 index 000000000..e5f4bb932 --- /dev/null +++ b/cognee-frontend/src/app/dashboard/AddDataToCognee.tsx @@ -0,0 +1,104 @@ +import { FormEvent, useCallback, useState } from "react"; +import { CloseIcon, PlusIcon } from "@/ui/Icons"; +import { useModal } from "@/ui/elements/Modal"; +import { CTAButton, GhostButton, IconButton, Modal, Select } from "@/ui/elements"; + +import addData from "@/modules/ingestion/addData"; +import { Dataset } from "@/modules/ingestion/useDatasets"; + +interface AddDataToCogneeProps { + datasets: Dataset[]; + refreshDatasets: () => void; +} + +export default function AddDataToCognee({ datasets, refreshDatasets }: AddDataToCogneeProps) { + const [filesForUpload, setFilesForUpload] = useState(null); + + const prepareFiles = useCallback((event: FormEvent) => { + const formElements = event.currentTarget; + const files = formElements.files; + + setFilesForUpload(files); + }, []); + + const processDataWithCognee = useCallback((state: object, event?: FormEvent) => { + event!.preventDefault(); + + if (!filesForUpload) { + return; + } + + const formElements = event!.currentTarget; + const datasetId = formElements.datasetName.value; + + return addData( + datasetId ? { + id: datasetId, + } : { + name: "main_dataset", + }, + Array.from(filesForUpload) + ) + .then(() => { + refreshDatasets(); + setFilesForUpload(null); + }); + }, [filesForUpload, refreshDatasets]); + + const { + isModalOpen: isAddDataModalOpen, + openModal: openAddDataModal, + closeModal: closeAddDataModal, + isActionLoading: isProcessingDataWithCognee, + confirmAction: submitDataToCognee, + } = useModal(false, processDataWithCognee); + + return ( + <> + + + Add data to cognee + + + +
+
+ Add new data to a dataset? + +
+
Please select a dataset to add data in.
If you don't have any, don't worry, we will create one for you.
+
+
+ + + + + select files + + + {filesForUpload?.length && ( +
+
selected files:
+ {Array.from(filesForUpload || []).map((file) => ( +
+ {file.name} +
+ ))} +
+ )} +
+
+ closeAddDataModal()}>cancel + + {isProcessingDataWithCognee ? "processing..." : "add"} + +
+
+
+
+ + ); +} diff --git a/cognee-frontend/src/app/dashboard/CogneeInstancesAccordion.tsx b/cognee-frontend/src/app/dashboard/CogneeInstancesAccordion.tsx new file mode 100644 index 000000000..037c9e828 --- /dev/null +++ b/cognee-frontend/src/app/dashboard/CogneeInstancesAccordion.tsx @@ -0,0 +1,31 @@ +"use client"; + +import { useBoolean } from "@/utils"; +import { Accordion } from "@/ui/elements"; + +interface CogneeInstancesAccordionProps { + children: React.ReactNode; +} + +export default function CogneeInstancesAccordion({ + children, +}: CogneeInstancesAccordionProps) { + const { + value: isInstancesPanelOpen, + setTrue: openInstancesPanel, + setFalse: closeInstancesPanel, + } = useBoolean(true); + + return ( + <> + Cognee Instances} + isOpen={isInstancesPanelOpen} + openAccordion={openInstancesPanel} + closeAccordion={closeInstancesPanel} + > + {children} + + + ); +} diff --git a/cognee-frontend/src/app/dashboard/Dashboard.tsx b/cognee-frontend/src/app/dashboard/Dashboard.tsx new file mode 100644 index 000000000..c5980fb29 --- /dev/null +++ b/cognee-frontend/src/app/dashboard/Dashboard.tsx @@ -0,0 +1,140 @@ +"use client"; + +import { useCallback, useEffect, useRef, useState } from "react"; + +import { Header } from "@/ui/Layout"; +import { SearchIcon } from "@/ui/Icons"; +import { Notebook } from "@/ui/elements"; +import { Notebook as NotebookType } from "@/ui/elements/Notebook/types"; +import { Dataset } from "@/modules/ingestion/useDatasets"; +import useNotebooks from "@/modules/notebooks/useNotebooks"; + +import NotebooksAccordion from "./NotebooksAccordion"; +import CogneeInstancesAccordion from "./CogneeInstancesAccordion"; +import AddDataToCognee from "./AddDataToCognee"; +import InstanceDatasetsAccordion from "./InstanceDatasetsAccordion"; + +export default function Dashboard() { + const { + notebooks, + refreshNotebooks, + runCell, + addNotebook, + updateNotebook, + saveNotebook, + removeNotebook, + } = useNotebooks(); + + useEffect(() => { + if (!notebooks.length) { + refreshNotebooks() + .then((notebooks) => { + if (notebooks[0]) { + setSelectedNotebookId(notebooks[0].id); + } + }); + } + }, [notebooks.length, refreshNotebooks]); + + const [selectedNotebookId, setSelectedNotebookId] = useState(null); + + const handleNotebookRemove = useCallback((notebookId: string) => { + setSelectedNotebookId((currentSelectedNotebookId) => ( + currentSelectedNotebookId === notebookId ? null : currentSelectedNotebookId + )); + return removeNotebook(notebookId); + }, [removeNotebook]); + + const saveNotebookTimeoutRef = useRef(null); + const saveNotebookThrottled = useCallback((notebook: NotebookType) => { + const throttleTime = 1000; + + if (saveNotebookTimeoutRef.current) { + clearTimeout(saveNotebookTimeoutRef.current); + saveNotebookTimeoutRef.current = null; + } + + saveNotebookTimeoutRef.current = setTimeout(() => { + saveNotebook(notebook); + }, throttleTime) as unknown as number; + }, [saveNotebook]); + + useEffect(() => { + return () => { + if (saveNotebookTimeoutRef.current) { + clearTimeout(saveNotebookTimeoutRef.current); + saveNotebookTimeoutRef.current = null; + } + }; + }, []); + + const handleNotebookUpdate = useCallback((notebook: NotebookType) => { + updateNotebook(notebook); + saveNotebookThrottled(notebook); + }, [saveNotebookThrottled, updateNotebook]); + + const selectedNotebook = notebooks.find((notebook) => notebook.id === selectedNotebookId); + + // ############################ + // Datasets logic + + const [datasets, setDatasets] = useState([]); + const refreshDatasetsRef = useRef(() => {}); + + const handleDatasetsChange = useCallback((payload: { datasets: Dataset[], refreshDatasets: () => void }) => { + const { + datasets, + refreshDatasets, + } = payload; + + refreshDatasetsRef.current = refreshDatasets; + setDatasets(datasets); + }, []); + + return ( +
+
+ +
+
+
+ + +
+ + + + + +
+ + + +
+
+ +
+ {selectedNotebook && ( + + )} +
+
+
+ ); +} diff --git a/cognee-frontend/src/app/dashboard/DatasetsAccordion.tsx b/cognee-frontend/src/app/dashboard/DatasetsAccordion.tsx new file mode 100644 index 000000000..55ce23dfa --- /dev/null +++ b/cognee-frontend/src/app/dashboard/DatasetsAccordion.tsx @@ -0,0 +1,346 @@ +"use client"; + +import { ChangeEvent, useCallback, useEffect, useState } from "react"; +import { useBoolean } from "@/utils"; +import { Accordion, CTAButton, GhostButton, IconButton, Input, Modal, PopupMenu } from "@/ui/elements"; +import { AccordionProps } from "@/ui/elements/Accordion"; +import { CloseIcon, DatasetIcon, MinusIcon, PlusIcon } from "@/ui/Icons"; +import useDatasets, { Dataset } from "@/modules/ingestion/useDatasets"; +import addData from "@/modules/ingestion/addData"; +import cognifyDataset from "@/modules/datasets/cognifyDataset"; +import { DataFile } from '@/modules/ingestion/useData'; +import { LoadingIndicator } from '@/ui/App'; + +interface DatasetsChangePayload { + datasets: Dataset[] + refreshDatasets: () => void; +} + +export interface DatasetsAccordionProps extends Omit { + onDatasetsChange?: (payload: DatasetsChangePayload) => void; +} + +export default function DatasetsAccordion({ + title, + tools, + switchCaretPosition = false, + className, + contentClassName, + onDatasetsChange, +}: DatasetsAccordionProps) { + const { + value: isDatasetsPanelOpen, + setTrue: openDatasetsPanel, + setFalse: closeDatasetsPanel, + } = useBoolean(true); + + const { + datasets, + refreshDatasets, + addDataset, + removeDataset, + getDatasetData, + removeDatasetData, + } = useDatasets(); + + useEffect(() => { + if (datasets.length === 0) { + refreshDatasets(); + } + }, [datasets.length, refreshDatasets]); + + const [openDatasets, openDataset] = useState>(new Set()); + + const toggleDataset = (id: string) => { + openDataset((prev) => { + const newState = new Set(prev); + + if (newState.has(id)) { + newState.delete(id) + } else { + getDatasetData(id) + .then(() => { + newState.add(id); + }); + } + + return newState; + }); + }; + + const refreshOpenDatasetsData = useCallback(() => { + return Promise.all( + openDatasets.values().map( + (datasetId) => getDatasetData(datasetId) + ) + ); + }, [getDatasetData, openDatasets]); + + const refreshDatasetsAndData = useCallback(() => { + refreshDatasets() + .then(refreshOpenDatasetsData); + }, [refreshDatasets, refreshOpenDatasetsData]); + + useEffect(() => { + onDatasetsChange?.({ + datasets, + refreshDatasets: refreshDatasetsAndData, + }); + }, [datasets, onDatasetsChange, refreshDatasets, refreshDatasetsAndData]); + + const { + value: isNewDatasetModalOpen, + setTrue: openNewDatasetModal, + setFalse: closeNewDatasetModal, + } = useBoolean(false); + + const handleDatasetAdd = () => { + openNewDatasetModal(); + }; + + const [newDatasetError, setNewDatasetError] = useState(""); + + const handleNewDatasetSubmit = (event: React.FormEvent) => { + event.preventDefault(); + setNewDatasetError(""); + + const formElements = event.currentTarget; + + const datasetName = formElements.datasetName.value; + + if (datasetName.trim().length === 0) { + setNewDatasetError("Dataset name cannot be empty."); + return; + } + + if (datasetName.includes(" ") || datasetName.includes(".")) { + setNewDatasetError("Dataset name cannot contain spaces or periods."); + return; + } + + addDataset(datasetName) + .then(() => { + closeNewDatasetModal(); + refreshDatasetsAndData(); + }); + }; + + const { + value: isRemoveDatasetModalOpen, + setTrue: openRemoveDatasetModal, + setFalse: closeRemoveDatasetModal, + } = useBoolean(false); + + const [datasetToRemove, setDatasetToRemove] = useState(null); + + const handleDatasetRemove = (dataset: Dataset) => { + setDatasetToRemove(dataset); + openRemoveDatasetModal(); + }; + + const handleDatasetRemoveCancel = () => { + setDatasetToRemove(null); + closeRemoveDatasetModal(); + }; + + const handleRemoveDatasetConfirm = (event: React.FormEvent) => { + event.preventDefault(); + + if (datasetToRemove) { + removeDataset(datasetToRemove.id) + .then(() => { + closeRemoveDatasetModal(); + setDatasetToRemove(null); + refreshDatasetsAndData(); + }); + } + }; + + const { + value: isProcessingFiles, + setTrue: setProcessingFilesInProgress, + setFalse: setProcessingFilesDone, + } = useBoolean(false); + + const handleAddFiles = (dataset: Dataset, event: ChangeEvent) => { + event.stopPropagation(); + + if (isProcessingFiles) { + return; + } + + setProcessingFilesInProgress(); + + if (!event.target.files) { + return; + } + + const files: File[] = Array.from(event.target.files); + + if (!files.length) { + return; + } + + return addData(dataset, files) + .then(async () => { + await getDatasetData(dataset.id); + + const onUpdate = () => {}; + + return cognifyDataset(dataset, onUpdate) + .finally(() => { + setProcessingFilesDone(); + }); + }); + }; + + const [dataToRemove, setDataToRemove] = useState(null); + const { + value: isRemoveDataModalOpen, + setTrue: openRemoveDataModal, + setFalse: closeRemoveDataModal, + } = useBoolean(false); + + const handleDataRemove = (data: DataFile) => { + setDataToRemove(data); + + openRemoveDataModal(); + }; + const handleDataRemoveCancel = () => { + setDataToRemove(null); + closeRemoveDataModal(); + }; + const handleDataRemoveConfirm = (event: React.FormEvent) => { + event.preventDefault(); + + if (dataToRemove) { + removeDatasetData(dataToRemove.datasetId, dataToRemove.id) + .then(() => { + closeRemoveDataModal(); + setDataToRemove(null); + refreshDatasetsAndData(); + }); + } + } + + return ( + <> + Datasets} + isOpen={isDatasetsPanelOpen} + openAccordion={openDatasetsPanel} + closeAccordion={closeDatasetsPanel} + tools={tools || } + switchCaretPosition={switchCaretPosition} + className={className} + contentClassName={contentClassName} + > +
+ {datasets.length === 0 && ( +
+ No datasets here, add one by clicking + +
+ )} + {datasets.map((dataset) => { + return ( + + {isProcessingFiles ? : } + {dataset.name} +
+ )} + isOpen={openDatasets.has(dataset.id)} + openAccordion={() => toggleDataset(dataset.id)} + closeAccordion={() => toggleDataset(dataset.id)} + tools={( + + + +
+
+ + add data +
+
+
+
handleDatasetRemove(dataset)} className="hover:bg-gray-100 w-full text-left px-2 cursor-pointer">delete
+
+
+
+ )} + className="first:pt-1.5" + switchCaretPosition={true} + > + <> + {dataset.data?.length === 0 && ( +
+ No data in a dataset, add by clicking "add data" in a dropdown menu +
+ )} + {dataset.data?.map((data) => ( +
+ {data.name} +
+ handleDataRemove(data)}> +
+
+ ))} + +
+ ); + })} +
+ + + +
+
+ Create a new dataset? + +
+
Please provide a name for the dataset being created.
+
+
+ + {newDatasetError && {newDatasetError}} +
+
+ closeNewDatasetModal()}>cancel + create +
+
+
+
+ + +
+
+ Delete {datasetToRemove?.name} dataset? + +
+
Are you sure you want to delete {datasetToRemove?.name}? This action cannot be undone.
+
+ cancel + delete +
+
+
+ + +
+
+ Delete {dataToRemove?.name} data? + +
+
Are you sure you want to delete {dataToRemove?.name}? This action cannot be undone.
+
+ cancel + delete +
+
+
+ + ); +} diff --git a/cognee-frontend/src/app/dashboard/InstanceDatasetsAccordion.tsx b/cognee-frontend/src/app/dashboard/InstanceDatasetsAccordion.tsx new file mode 100644 index 000000000..fd0605349 --- /dev/null +++ b/cognee-frontend/src/app/dashboard/InstanceDatasetsAccordion.tsx @@ -0,0 +1,102 @@ +import { useCallback, useEffect } from "react"; + +import { fetch, useBoolean } from "@/utils"; +import { checkCloudConnection } from "@/modules/cloud"; +import { CloseIcon, CloudIcon, LocalCogneeIcon } from "@/ui/Icons"; +import { CTAButton, GhostButton, IconButton, Input, Modal } from "@/ui/elements"; + +import DatasetsAccordion, { DatasetsAccordionProps } from "./DatasetsAccordion"; + +type InstanceDatasetsAccordionProps = Omit; + +export default function InstanceDatasetsAccordion({ onDatasetsChange }: InstanceDatasetsAccordionProps) { + const { + value: isLocalCogneeConnected, + setTrue: setLocalCogneeConnected, + } = useBoolean(false); + + const { + value: isCloudCogneeConnected, + setTrue: setCloudCogneeConnected, + } = useBoolean(false); + + const checkConnectionToCloudCognee = useCallback((apiKey: string) => { + return checkCloudConnection(apiKey) + .then(setCloudCogneeConnected) + }, [setCloudCogneeConnected]); + + useEffect(() => { + const checkConnectionToLocalCognee = () => { + fetch.checkHealth() + .then(setLocalCogneeConnected) + }; + + checkConnectionToLocalCognee(); + + checkConnectionToCloudCognee(""); + }, [checkConnectionToCloudCognee, setCloudCogneeConnected, setLocalCogneeConnected]); + + const { + value: isCloudConnectedModalOpen, + setTrue: openCloudConnectionModal, + setFalse: closeCloudConnectionModal, + } = useBoolean(false); + + const handleCloudConnectionConfirm = (event: React.FormEvent) => { + event.preventDefault(); + + const apiKeyValue = event.currentTarget.apiKey.value; + + checkConnectionToCloudCognee(apiKeyValue) + .then(() => { + closeCloudConnectionModal(); + }); + }; + + return ( + <> + +
+ + local cognee +
+
+ )} + tools={isLocalCogneeConnected ? Connected : Not connected} + switchCaretPosition={true} + className="pt-3 pb-1.5" + contentClassName="pl-4" + onDatasetsChange={onDatasetsChange} + /> + + + + +
+
+ Connect to cloud? + +
+
Please provide your API key. You can find it on our platform.
+
+
+ +
+
+ closeCloudConnectionModal()}>cancel + connect +
+
+
+
+ + ); +} diff --git a/cognee-frontend/src/app/dashboard/NotebooksAccordion.tsx b/cognee-frontend/src/app/dashboard/NotebooksAccordion.tsx new file mode 100644 index 000000000..174efaa9e --- /dev/null +++ b/cognee-frontend/src/app/dashboard/NotebooksAccordion.tsx @@ -0,0 +1,150 @@ +"use client"; + +import { FormEvent, useCallback, useState } from "react"; +import { useBoolean } from "@/utils"; +import { Accordion, CTAButton, GhostButton, IconButton, Input, Modal } from "@/ui/elements"; +import { CloseIcon, MinusIcon, NotebookIcon, PlusIcon } from "@/ui/Icons"; +import { Notebook } from "@/ui/elements/Notebook/types"; +import { LoadingIndicator } from "@/ui/App"; +import { useModal } from "@/ui/elements/Modal"; + +interface NotebooksAccordionProps { + notebooks: Notebook[]; + addNotebook: (name: string) => Promise; + removeNotebook: (id: string) => Promise; + openNotebook: (id: string) => void; +} + +export default function NotebooksAccordion({ + notebooks, + addNotebook, + removeNotebook, + openNotebook, +}: NotebooksAccordionProps) { + const { + value: isNotebookPanelOpen, + setTrue: openNotebookPanel, + setFalse: closeNotebookPanel, + } = useBoolean(true); + + const { + value: isNotebookLoading, + setTrue: notebookLoading, + setFalse: notebookLoaded, + } = useBoolean(false); + + // Notebook removal modal + const [notebookToRemove, setNotebookToRemove] = useState(null); + + const handleNotebookRemove = (notebook: Notebook) => { + setNotebookToRemove(notebook); + openRemoveNotebookModal(); + }; + + const { + value: isRemoveNotebookModalOpen, + setTrue: openRemoveNotebookModal, + setFalse: closeRemoveNotebookModal, + } = useBoolean(false); + + const handleNotebookRemoveCancel = () => { + closeRemoveNotebookModal(); + setNotebookToRemove(null); + }; + + const handleNotebookRemoveConfirm = () => { + notebookLoading(); + removeNotebook(notebookToRemove!.id) + .finally(notebookLoaded) + .finally(closeRemoveNotebookModal); + setNotebookToRemove(null); + }; + + const handleNotebookAdd = useCallback((_: object, formEvent?: FormEvent) => { + if (!formEvent) { + return; + } + + formEvent.preventDefault(); + + const formElements = formEvent.currentTarget; + const notebookName = formElements.notebookName.value.trim(); + + return addNotebook(notebookName) + }, [addNotebook]); + + const { + isModalOpen: isNewNotebookModalOpen, + openModal: openNewNotebookModal, + closeModal: closeNewNotebookModal, + confirmAction: handleNewNotebookSubmit, + isActionLoading: isNewDatasetLoading, + } = useModal(false, handleNotebookAdd); + + return ( + <> + Notebooks} + isOpen={isNotebookPanelOpen} + openAccordion={openNotebookPanel} + closeAccordion={closeNotebookPanel} + tools={isNewDatasetLoading ? ( + + ) : ( + + )} + > + {notebooks.length === 0 && ( +
+ No notebooks here, add one by clicking + +
+ )} + {notebooks.map((notebook: Notebook) => ( +
+ +
+ {notebook.deletable && handleNotebookRemove(notebook)}>} +
+
+ ))} +
+ + +
+
+ Create a new notebook? + +
+
Please provide a name for the notebook being created.
+
+
+ + {/* {newDatasetError && {newDatasetError}} */} +
+
+ closeNewNotebookModal()}>cancel + create +
+
+
+
+ + +
+
+ Delete {notebookToRemove?.name} notebook? + +
+
Are you sure you want to delete {notebookToRemove?.name}? This action cannot be undone.
+
+ cancel + delete +
+
+
+ + ); +} diff --git a/cognee-frontend/src/app/dashboard/page.tsx b/cognee-frontend/src/app/dashboard/page.tsx new file mode 100644 index 000000000..2ab67cdd6 --- /dev/null +++ b/cognee-frontend/src/app/dashboard/page.tsx @@ -0,0 +1 @@ +export { default } from "./Dashboard"; diff --git a/cognee-frontend/src/app/plan/Plan.tsx b/cognee-frontend/src/app/plan/Plan.tsx new file mode 100644 index 000000000..fcca31566 --- /dev/null +++ b/cognee-frontend/src/app/plan/Plan.tsx @@ -0,0 +1,157 @@ +import Link from "next/link"; +import { BackIcon, CheckIcon } from "@/ui/Icons"; +import { CTAButton, NeutralButton } from "@/ui/elements"; +import Header from "@/ui/Layout/Header"; + +export default function Plan() { + return ( + <> +
+
+
+
+
+
+
+
+
+ +
+ +
+
+ + + back + +
+ +
+
+
+
Basic
+
Free
+
+ +
+
On-prem Subscription
+
$2470 /per month
+
Save 20% yearly
+
+ +
+
Cloud Subscription
+
$25 /per month
+
(beta pricing)
+
+ +
+
Everything in the free plan, plus...
+
+
License to use Cognee open source
+
Cognee tasks and pipelines
+
Custom schema and ontology generation
+
Integrated evaluations
+
More than 28 data sources supported
+
+
+ +
+
Everything in the free plan, plus...
+
+
License to use Cognee open source and Cognee Platform
+
1 day SLA
+
On-prem deployment
+
Hands-on support
+
Architecture review
+
Roadmap prioritization
+
Knowledge transfer
+
+
+ +
+
Everything in the free plan, plus...
+
+
Fully hosted cloud platform
+
Multi-tenant architecture
+
Comprehensive API endpoints
+
Automated scaling and parallel processing
+
Ability to group memories per user and domain
+
Automatic updates and priority support
+
1 GB ingestion + 10,000 API calls
+
+
+ +
+ Try for free +
+ +
+ Talk to us +
+ +
+ Sign up for Cogwit Beta +
+
+ +
+
Feature Comparison
+
Basic
+
On-prem
+
Cloud
+ +
Data Sources
+
28+
+
28+
+
28+
+ +
Deployment
+
Self-hosted
+
On-premise
+
Cloud
+ +
API Calls
+
Limited
+
Unlimited
+
10,000
+ +
Support
+
Community
+
Hands-on
+
Priority
+ +
SLA
+
—
+
1 day
+
Standard
+
+ +
+
+
Can I change my plan anytime?
+
Yes, you can upgrade or downgrade your plan at any time. Changes take effect immediately.
+
+
+
What happens to my data if I downgrade?
+
Your data is preserved, but features may be limited based on your new plan constraints.
+
+
+
Do you offer educational discounts?
+
Yes, we offer special pricing for educational institutions and students. Contact us for details.
+
+
+
Is there a free trial for paid plans?
+
All new accounts start with a 14-day free trial of our Pro plan features.
+
+
+
+ +
+ Need a custom solution? + Contact us +
+
+ + ); +} diff --git a/cognee-frontend/src/app/plan/page.tsx b/cognee-frontend/src/app/plan/page.tsx new file mode 100644 index 000000000..a1352fe8e --- /dev/null +++ b/cognee-frontend/src/app/plan/page.tsx @@ -0,0 +1 @@ +export { default } from "./Plan"; diff --git a/cognee-frontend/src/modules/auth/index.ts b/cognee-frontend/src/modules/auth/index.ts new file mode 100644 index 000000000..ea21cce2b --- /dev/null +++ b/cognee-frontend/src/modules/auth/index.ts @@ -0,0 +1,2 @@ +export { default as useAuthenticatedUser } from "./useAuthenticatedUser"; +export { type User } from "./types"; diff --git a/cognee-frontend/src/modules/auth/types.ts b/cognee-frontend/src/modules/auth/types.ts new file mode 100644 index 000000000..3441b4149 --- /dev/null +++ b/cognee-frontend/src/modules/auth/types.ts @@ -0,0 +1,6 @@ +export interface User { + id: string; + name: string; + email: string; + avatarImagePath: string; +} diff --git a/cognee-frontend/src/modules/auth/useAuthenticatedUser.ts b/cognee-frontend/src/modules/auth/useAuthenticatedUser.ts new file mode 100644 index 000000000..f789f3de4 --- /dev/null +++ b/cognee-frontend/src/modules/auth/useAuthenticatedUser.ts @@ -0,0 +1,17 @@ +import { useEffect, useState } from "react"; +import { fetch } from "@/utils"; +import { User } from "./types"; + +export default function useAuthenticatedUser() { + const [user, setUser] = useState(null); + + useEffect(() => { + if (!user) { + fetch("/v1/auth/me") + .then((response) => response.json()) + .then((data) => setUser(data)); + } + }, [user]); + + return { user }; +} diff --git a/cognee-frontend/src/modules/cloud/checkCloudConnection.ts b/cognee-frontend/src/modules/cloud/checkCloudConnection.ts new file mode 100644 index 000000000..dfc40767d --- /dev/null +++ b/cognee-frontend/src/modules/cloud/checkCloudConnection.ts @@ -0,0 +1,10 @@ +import { fetch } from "@/utils"; + +export default function checkCloudConnection(apiKey: string) { + return fetch("/v1/checks/connection", { + method: "POST", + headers: { + "X-Api-Key": apiKey, + }, + }); +} diff --git a/cognee-frontend/src/modules/cloud/index.ts b/cognee-frontend/src/modules/cloud/index.ts new file mode 100644 index 000000000..409f803c3 --- /dev/null +++ b/cognee-frontend/src/modules/cloud/index.ts @@ -0,0 +1,2 @@ +export { default as syncData } from "./syncData"; +export { default as checkCloudConnection } from "./checkCloudConnection"; diff --git a/cognee-frontend/src/modules/cloud/syncData.ts b/cognee-frontend/src/modules/cloud/syncData.ts new file mode 100644 index 000000000..dc4360a27 --- /dev/null +++ b/cognee-frontend/src/modules/cloud/syncData.ts @@ -0,0 +1,11 @@ +import { fetch } from "@/utils"; + +export default function syncData(datasetId?: string) { + return fetch("/v1/sync", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + ...(datasetId ? { body: JSON.stringify({ datasetId }) } : { body: "{}" }), + }); +} diff --git a/cognee-frontend/src/modules/ingestion/useData.ts b/cognee-frontend/src/modules/ingestion/useData.ts index 4368e296e..941521135 100644 --- a/cognee-frontend/src/modules/ingestion/useData.ts +++ b/cognee-frontend/src/modules/ingestion/useData.ts @@ -5,6 +5,7 @@ export interface DataFile { id: string; name: string; file: File; + datasetId: string; } const useData = () => { @@ -16,6 +17,7 @@ const useData = () => { id: v4(), name: file.name, file, + datasetId: "", })) ); }, []); diff --git a/cognee-frontend/src/modules/ingestion/useDatasets.ts b/cognee-frontend/src/modules/ingestion/useDatasets.ts index 9ae4ddcb2..7ef2b6b79 100644 --- a/cognee-frontend/src/modules/ingestion/useDatasets.ts +++ b/cognee-frontend/src/modules/ingestion/useDatasets.ts @@ -1,7 +1,9 @@ import { useCallback, useEffect, useRef, useState } from 'react'; import { v4 } from 'uuid'; -import { DataFile } from './useData'; + import { fetch } from '@/utils'; +import { DataFile } from './useData'; +import createDataset from "../datasets/createDataset"; export interface Dataset { id: string; @@ -56,21 +58,24 @@ function useDatasets() { }, []); const addDataset = useCallback((datasetName: string) => { - setDatasets((datasets) => [ - ...datasets, - { - id: v4(), - name: datasetName, - data: [], - status: 'DATASET_INITIALIZED', - } - ]); + return createDataset({ name: datasetName }) + .then((dataset) => { + setDatasets((datasets) => [ + ...datasets, + dataset, + ]); + }); }, []); const removeDataset = useCallback((datasetId: string) => { - setDatasets((datasets) => - datasets.filter((dataset) => dataset.id !== datasetId) - ); + return fetch(`/v1/datasets/${datasetId}`, { + method: 'DELETE', + }) + .then(() => { + setDatasets((datasets) => + datasets.filter((dataset) => dataset.id !== datasetId) + ); + }); }, []); const fetchDatasets = useCallback(() => { @@ -94,7 +99,41 @@ function useDatasets() { }); }, [checkDatasetStatuses]); - return { datasets, addDataset, removeDataset, refreshDatasets: fetchDatasets }; + const getDatasetData = useCallback((datasetId: string) => { + return fetch(`/v1/datasets/${datasetId}/data`) + .then((response) => response.json()) + .then((data) => { + const datasetIndex = datasets.findIndex((dataset) => dataset.id === datasetId); + + if (datasetIndex >= 0) { + setDatasets((datasets) => [ + ...datasets.slice(0, datasetIndex), + { + ...datasets[datasetIndex], + data, + }, + ...datasets.slice(datasetIndex + 1), + ]); + } + + return data; + }); + }, [datasets]); + + const removeDatasetData = useCallback((datasetId: string, dataId: string) => { + return fetch(`/v1/datasets/${datasetId}/data/${dataId}`, { + method: 'DELETE', + }); + }, []); + + return { + datasets, + addDataset, + removeDataset, + getDatasetData, + removeDatasetData, + refreshDatasets: fetchDatasets, + }; }; export default useDatasets; diff --git a/cognee-frontend/src/modules/notebooks/useNotebooks.ts b/cognee-frontend/src/modules/notebooks/useNotebooks.ts new file mode 100644 index 000000000..f36f97448 --- /dev/null +++ b/cognee-frontend/src/modules/notebooks/useNotebooks.ts @@ -0,0 +1,134 @@ +import { useCallback, useState } from "react"; +import { fetch } from "@/utils"; +import { Cell, Notebook } from "@/ui/elements/Notebook/types"; + +function useNotebooks() { + const [notebooks, setNotebooks] = useState([]); + + const addNotebook = useCallback((notebookName: string) => { + return fetch("/v1/notebooks", { + body: JSON.stringify({ name: notebookName }), + method: "POST", + headers: { + "Content-Type": "application/json", + }, + }) + .then((response) => response.json()) + .then((notebook) => { + setNotebooks((notebooks) => [ + ...notebooks, + notebook, + ]); + + return notebook; + }); + }, []); + + const removeNotebook = useCallback((notebookId: string) => { + return fetch(`/v1/notebooks/${notebookId}`, { + method: "DELETE", + }) + .then(() => { + setNotebooks((notebooks) => + notebooks.filter((notebook) => notebook.id !== notebookId) + ); + }); + }, []); + + const fetchNotebooks = useCallback(() => { + return fetch("/v1/notebooks", { + headers: { + "Content-Type": "application/json", + }, + }) + .then((response) => response.json()) + .then((notebooks) => { + setNotebooks(notebooks); + + return notebooks; + }) + .catch((error) => { + console.error("Error fetching notebooks:", error); + }); + }, []); + + const updateNotebook = useCallback((updatedNotebook: Notebook) => { + setNotebooks((existingNotebooks) => + existingNotebooks.map((notebook) => + notebook.id === updatedNotebook.id + ? updatedNotebook + : notebook + ) + ); + }, []); + + const saveNotebook = useCallback((notebook: Notebook) => { + return fetch(`/v1/notebooks/${notebook.id}`, { + body: JSON.stringify({ + name: notebook.name, + cells: notebook.cells, + }), + method: "PUT", + headers: { + "Content-Type": "application/json", + }, + }) + .then((response) => response.json()) + }, []); + + const runCell = useCallback((notebook: Notebook, cell: Cell) => { + setNotebooks((existingNotebooks) => + existingNotebooks.map((existingNotebook) => + existingNotebook.id === notebook.id ? { + ...existingNotebook, + cells: existingNotebook.cells.map((existingCell) => + existingCell.id === cell.id ? { + ...existingCell, + result: undefined, + error: undefined, + } : existingCell + ), + } : notebook + ) + ); + + return fetch(`/v1/notebooks/${notebook.id}/${cell.id}/run`, { + body: JSON.stringify({ + content: cell.content, + }), + method: "POST", + headers: { + "Content-Type": "application/json", + }, + }) + .then((response) => response.json()) + .then((response) => { + setNotebooks((existingNotebooks) => + existingNotebooks.map((existingNotebook) => + existingNotebook.id === notebook.id ? { + ...existingNotebook, + cells: existingNotebook.cells.map((existingCell) => + existingCell.id === cell.id ? { + ...existingCell, + result: response.result, + error: response.error, + } : existingCell + ), + } : notebook + ) + ); + }); + }, []); + + return { + notebooks, + addNotebook, + saveNotebook, + updateNotebook, + removeNotebook, + refreshNotebooks: fetchNotebooks, + runCell, + }; +}; + +export default useNotebooks; diff --git a/cognee-frontend/src/ui/App/Loading/DefaultLoadingIndicator/LoadingIndicator.module.css b/cognee-frontend/src/ui/App/Loading/DefaultLoadingIndicator/LoadingIndicator.module.css index 472081d57..d66b1e7f8 100644 --- a/cognee-frontend/src/ui/App/Loading/DefaultLoadingIndicator/LoadingIndicator.module.css +++ b/cognee-frontend/src/ui/App/Loading/DefaultLoadingIndicator/LoadingIndicator.module.css @@ -3,7 +3,7 @@ width: 1rem; height: 1rem; border-radius: 50%; - border: 0.18rem solid white; + border: 0.18rem solid var(--color-indigo-600);; border-top-color: transparent; border-bottom-color: transparent; animation: spin 2s linear infinite; diff --git a/cognee-frontend/src/ui/Icons/AddIcon.tsx b/cognee-frontend/src/ui/Icons/AddIcon.tsx index b9092feec..da150c8a3 100644 --- a/cognee-frontend/src/ui/Icons/AddIcon.tsx +++ b/cognee-frontend/src/ui/Icons/AddIcon.tsx @@ -1,4 +1,4 @@ -export default function SearchIcon({ width = 24, height = 24, color = 'currentColor', className = '' }) { +export default function AddIcon({ width = 24, height = 24, color = 'currentColor', className = '' }) { return ( diff --git a/cognee-frontend/src/ui/Icons/BackIcon.tsx b/cognee-frontend/src/ui/Icons/BackIcon.tsx new file mode 100644 index 000000000..796fb923b --- /dev/null +++ b/cognee-frontend/src/ui/Icons/BackIcon.tsx @@ -0,0 +1,8 @@ +export default function BackIcon({ width = 16, height = 16, color = "#17191C", className = "" }) { + return ( + + + + + ); +} diff --git a/cognee-frontend/src/ui/Icons/CaretIcon.tsx b/cognee-frontend/src/ui/Icons/CaretIcon.tsx index 29a5eca62..cef9b9a9a 100644 --- a/cognee-frontend/src/ui/Icons/CaretIcon.tsx +++ b/cognee-frontend/src/ui/Icons/CaretIcon.tsx @@ -1,8 +1,7 @@ -export default function CaretIcon({ width = 50, height = 36, color = "currentColor", className = "" }) { +export default function CaretIcon({ width = 17, height = 16, color = "#000000", className = "" }) { return ( - - - + + ); } diff --git a/cognee-frontend/src/ui/Icons/CheckIcon.tsx b/cognee-frontend/src/ui/Icons/CheckIcon.tsx new file mode 100644 index 000000000..68610b1eb --- /dev/null +++ b/cognee-frontend/src/ui/Icons/CheckIcon.tsx @@ -0,0 +1,7 @@ +export default function CheckIcon({ width = 17, height = 18, color = "#5C10F4", className = "" }) { + return ( + + + + ); +} diff --git a/cognee-frontend/src/ui/Icons/CloseIcon.tsx b/cognee-frontend/src/ui/Icons/CloseIcon.tsx new file mode 100644 index 000000000..7ea30123d --- /dev/null +++ b/cognee-frontend/src/ui/Icons/CloseIcon.tsx @@ -0,0 +1,8 @@ +export default function CloseIcon({ width = 29, height = 29, color = "#000000", className = "" }) { + return ( + + + + + ); +} diff --git a/cognee-frontend/src/ui/Icons/CloudIcon.tsx b/cognee-frontend/src/ui/Icons/CloudIcon.tsx new file mode 100644 index 000000000..9578c24d4 --- /dev/null +++ b/cognee-frontend/src/ui/Icons/CloudIcon.tsx @@ -0,0 +1,7 @@ +export default function CloudIcon({ width = 16, height = 12, color = "#5C10F4", className = "" }) { + return ( + + + + ); +} diff --git a/cognee-frontend/src/ui/Icons/CogneeIcon.tsx b/cognee-frontend/src/ui/Icons/CogneeIcon.tsx new file mode 100644 index 000000000..d9f95e0f2 --- /dev/null +++ b/cognee-frontend/src/ui/Icons/CogneeIcon.tsx @@ -0,0 +1,7 @@ +export default function CogneeIcon({ width = 21, height = 24, color="#6510F4", className="" }) { + return ( + + + + ); +} diff --git a/cognee-frontend/src/ui/Icons/DatasetIcon.tsx b/cognee-frontend/src/ui/Icons/DatasetIcon.tsx new file mode 100644 index 000000000..d17ff0470 --- /dev/null +++ b/cognee-frontend/src/ui/Icons/DatasetIcon.tsx @@ -0,0 +1,9 @@ +export default function DatasetIcon({ width = 16, height = 16, color = "#000000", className = '' }) { + return ( + + + + + + ); +} diff --git a/cognee-frontend/src/ui/Icons/LocalCogneeIcon.tsx b/cognee-frontend/src/ui/Icons/LocalCogneeIcon.tsx new file mode 100644 index 000000000..37f6016fc --- /dev/null +++ b/cognee-frontend/src/ui/Icons/LocalCogneeIcon.tsx @@ -0,0 +1,10 @@ +export default function LocalCogneeIcon({ width = 16, height = 16, color = "#000000", className = "" }) { + return ( + + + + + + + ); +} diff --git a/cognee-frontend/src/ui/Icons/MenuIcon.tsx b/cognee-frontend/src/ui/Icons/MenuIcon.tsx new file mode 100644 index 000000000..666b3293d --- /dev/null +++ b/cognee-frontend/src/ui/Icons/MenuIcon.tsx @@ -0,0 +1,9 @@ +export default function AddIcon({ width = 16, height = 16, color = "#000000", className = "" }) { + return ( + + + + + + ); +} diff --git a/cognee-frontend/src/ui/Icons/MinusIcon.tsx b/cognee-frontend/src/ui/Icons/MinusIcon.tsx new file mode 100644 index 000000000..7757d81a6 --- /dev/null +++ b/cognee-frontend/src/ui/Icons/MinusIcon.tsx @@ -0,0 +1,7 @@ +export default function MinusIcon({ width = 16, height = 16, color = "#000000", className = "" }) { + return ( + + + + ); +} diff --git a/cognee-frontend/src/ui/Icons/NotebookIcon.tsx b/cognee-frontend/src/ui/Icons/NotebookIcon.tsx new file mode 100644 index 000000000..a46228d80 --- /dev/null +++ b/cognee-frontend/src/ui/Icons/NotebookIcon.tsx @@ -0,0 +1,8 @@ +export default function NotebookIcon({ width = 16, height = 16, color = "#000000", className = "" }) { + return ( + + + + + ); +} diff --git a/cognee-frontend/src/ui/Icons/PlayIcon.tsx b/cognee-frontend/src/ui/Icons/PlayIcon.tsx new file mode 100644 index 000000000..865f103b0 --- /dev/null +++ b/cognee-frontend/src/ui/Icons/PlayIcon.tsx @@ -0,0 +1,7 @@ +export default function PlayIcon({ width = 11, height = 14, color = "#000000", className = "" }) { + return ( + + + + ); +} diff --git a/cognee-frontend/src/ui/Icons/PlusIcon.tsx b/cognee-frontend/src/ui/Icons/PlusIcon.tsx new file mode 100644 index 000000000..69a760e20 --- /dev/null +++ b/cognee-frontend/src/ui/Icons/PlusIcon.tsx @@ -0,0 +1,8 @@ +export default function PlusIcon({ width = 16, height = 16, color = "#000000", className = "" }) { + return ( + + + + + ); +} diff --git a/cognee-frontend/src/ui/Icons/SearchIcon.tsx b/cognee-frontend/src/ui/Icons/SearchIcon.tsx index 3a3baac33..56cddf4c2 100644 --- a/cognee-frontend/src/ui/Icons/SearchIcon.tsx +++ b/cognee-frontend/src/ui/Icons/SearchIcon.tsx @@ -1,9 +1,8 @@ -export default function SearchIcon({ width = 24, height = 24, color = 'currentColor', className = '' }) { +export default function SearchIcon({ width = 12, height = 12, color = "#D8D8D8", className = "" }) { return ( - - - - + + + ); } diff --git a/cognee-frontend/src/ui/Icons/SettingsIcon.tsx b/cognee-frontend/src/ui/Icons/SettingsIcon.tsx index ce006f49b..87e4c9872 100644 --- a/cognee-frontend/src/ui/Icons/SettingsIcon.tsx +++ b/cognee-frontend/src/ui/Icons/SettingsIcon.tsx @@ -1,7 +1,8 @@ -export default function SettingsIcon({ width = 32, height = 33, color = "#E8EAED" }) { +export default function SettingsIcon({ width = 16, height = 17, color = "#000000" }) { return ( - - + + + ); } diff --git a/cognee-frontend/src/ui/Icons/index.ts b/cognee-frontend/src/ui/Icons/index.ts index 0adaa8fd1..3d3f8124f 100644 --- a/cognee-frontend/src/ui/Icons/index.ts +++ b/cognee-frontend/src/ui/Icons/index.ts @@ -1,7 +1,19 @@ -export { default as AddIcon } from './AddIcon'; -export { default as CaretIcon } from './CaretIcon'; -export { default as SearchIcon } from './SearchIcon'; -export { default as DeleteIcon } from './DeleteIcon'; -export { default as GithubIcon } from './GitHubIcon'; -export { default as DiscordIcon } from './DiscordIcon'; -export { default as SettingsIcon } from './SettingsIcon'; +export { default as AddIcon } from "./AddIcon"; +export { default as BackIcon } from "./BackIcon"; +export { default as PlayIcon } from "./PlayIcon"; +export { default as MenuIcon } from "./MenuIcon"; +export { default as PlusIcon } from "./PlusIcon"; +export { default as MinusIcon } from "./MinusIcon"; +export { default as CloseIcon } from "./CloseIcon"; +export { default as CheckIcon } from "./CheckIcon"; +export { default as CaretIcon } from "./CaretIcon"; +export { default as CloudIcon } from "./CloudIcon"; +export { default as SearchIcon } from "./SearchIcon"; +export { default as DeleteIcon } from "./DeleteIcon"; +export { default as GithubIcon } from "./GitHubIcon"; +export { default as CogneeIcon } from "./CogneeIcon"; +export { default as DiscordIcon } from "./DiscordIcon"; +export { default as DatasetIcon } from "./DatasetIcon"; +export { default as SettingsIcon } from "./SettingsIcon"; +export { default as NotebookIcon } from "./NotebookIcon"; +export { default as LocalCogneeIcon } from "./LocalCogneeIcon"; diff --git a/cognee-frontend/src/ui/Layout/Header.tsx b/cognee-frontend/src/ui/Layout/Header.tsx new file mode 100644 index 000000000..465153e1a --- /dev/null +++ b/cognee-frontend/src/ui/Layout/Header.tsx @@ -0,0 +1,74 @@ +"use client"; + +import Link from "next/link"; +import Image from "next/image"; +import { useBoolean } from "@/utils"; + +import { CloseIcon, CloudIcon, CogneeIcon } from "../Icons"; +import { CTAButton, GhostButton, IconButton, Modal } from "../elements"; +import { useAuthenticatedUser } from "@/modules/auth"; +import syncData from "@/modules/cloud/syncData"; + +export default function Header() { + const { user } = useAuthenticatedUser(); + + const { + value: isSyncModalOpen, + setTrue: openSyncModal, + setFalse: closeSyncModal, + } = useBoolean(false); + + const handleDataSyncConfirm = () => { + syncData() + .finally(() => { + closeSyncModal(); + }); + }; + + return ( + <> +
+
+ +
Cognee Graph Interface
+
+ +
+ + +
Sync
+
+ + Premium + + {/*
+ +
*/} + + {user?.avatarImagePath ? ( + Name of the user + ) : ( +
+ {user?.email?.charAt(0) || "C"} +
+ )} + +
+
+ + +
+
+ Sync local datasets with cloud datasets? + +
+
Are you sure you want to sync local datasets to cloud?
+
+ cancel + confirm +
+
+
+ + ); +} diff --git a/cognee-frontend/src/ui/Layout/index.ts b/cognee-frontend/src/ui/Layout/index.ts index 54938ca4d..af5a67ac5 100644 --- a/cognee-frontend/src/ui/Layout/index.ts +++ b/cognee-frontend/src/ui/Layout/index.ts @@ -1 +1,2 @@ -export { default as Divider } from './Divider/Divider'; +export { default as Divider } from "./Divider/Divider"; +export { default as Header } from "./Header"; diff --git a/cognee-frontend/src/ui/elements/Accordion.tsx b/cognee-frontend/src/ui/elements/Accordion.tsx new file mode 100644 index 000000000..8779d6d36 --- /dev/null +++ b/cognee-frontend/src/ui/elements/Accordion.tsx @@ -0,0 +1,45 @@ +import classNames from "classnames"; +import { CaretIcon } from "../Icons"; + +export interface AccordionProps { + isOpen: boolean; + title: React.ReactNode; + openAccordion: () => void; + closeAccordion: () => void; + tools?: React.ReactNode; + children: React.ReactNode; + className?: string; + contentClassName?: string; + switchCaretPosition?: boolean; +} + +export default function Accordion({ title, tools, children, isOpen, openAccordion, closeAccordion, className, contentClassName, switchCaretPosition = false }: AccordionProps) { + return ( +
+
+ + {tools} +
+ + {isOpen && ( +
+ {children} +
+ )} +
+ ); +} diff --git a/cognee-frontend/src/ui/elements/AvatarImage.tsx b/cognee-frontend/src/ui/elements/AvatarImage.tsx new file mode 100644 index 000000000..e69de29bb diff --git a/cognee-frontend/src/ui/elements/CTAButton.tsx b/cognee-frontend/src/ui/elements/CTAButton.tsx index c38384cdd..02f1871fc 100644 --- a/cognee-frontend/src/ui/elements/CTAButton.tsx +++ b/cognee-frontend/src/ui/elements/CTAButton.tsx @@ -1,8 +1,8 @@ -import classNames from 'classnames'; +import classNames from "classnames"; import { ButtonHTMLAttributes } from "react"; export default function CTAButton({ children, className, ...props }: ButtonHTMLAttributes) { return ( - + ); } diff --git a/cognee-frontend/src/ui/elements/GhostButton.tsx b/cognee-frontend/src/ui/elements/GhostButton.tsx index 333dcc394..a27a0ff94 100644 --- a/cognee-frontend/src/ui/elements/GhostButton.tsx +++ b/cognee-frontend/src/ui/elements/GhostButton.tsx @@ -1,8 +1,8 @@ -import classNames from 'classnames'; +import classNames from "classnames"; import { ButtonHTMLAttributes } from "react"; export default function CTAButton({ children, className, ...props }: ButtonHTMLAttributes) { return ( - + ); } diff --git a/cognee-frontend/src/ui/elements/IconButton.tsx b/cognee-frontend/src/ui/elements/IconButton.tsx new file mode 100644 index 000000000..cbc35df5b --- /dev/null +++ b/cognee-frontend/src/ui/elements/IconButton.tsx @@ -0,0 +1,14 @@ +import classNames from "classnames"; +import { ButtonHTMLAttributes } from "react"; + +interface ButtonProps extends ButtonHTMLAttributes { + as?: React.ElementType; +} + +export default function IconButton({ as, children, className, ...props }: ButtonProps) { + const Element = as || "button"; + + return ( + {children} + ); +} diff --git a/cognee-frontend/src/ui/elements/Input.tsx b/cognee-frontend/src/ui/elements/Input.tsx index 904658eba..76451f9fa 100644 --- a/cognee-frontend/src/ui/elements/Input.tsx +++ b/cognee-frontend/src/ui/elements/Input.tsx @@ -3,6 +3,6 @@ import { InputHTMLAttributes } from "react" export default function Input({ className, ...props }: InputHTMLAttributes) { return ( - + ) } diff --git a/cognee-frontend/src/ui/elements/Modal.tsx b/cognee-frontend/src/ui/elements/Modal/Modal.tsx similarity index 84% rename from cognee-frontend/src/ui/elements/Modal.tsx rename to cognee-frontend/src/ui/elements/Modal/Modal.tsx index fd1db3c32..9d559a7ac 100644 --- a/cognee-frontend/src/ui/elements/Modal.tsx +++ b/cognee-frontend/src/ui/elements/Modal/Modal.tsx @@ -5,7 +5,7 @@ interface ModalProps { export default function Modal({ isOpen, children }: ModalProps) { return isOpen && ( -
+
{children}
); diff --git a/cognee-frontend/src/ui/elements/Modal/index.ts b/cognee-frontend/src/ui/elements/Modal/index.ts new file mode 100644 index 000000000..6386401d6 --- /dev/null +++ b/cognee-frontend/src/ui/elements/Modal/index.ts @@ -0,0 +1,3 @@ +export { default as Modal } from "./Modal"; +export { default as useModal } from "./useModal"; + diff --git a/cognee-frontend/src/ui/elements/Modal/useModal.ts b/cognee-frontend/src/ui/elements/Modal/useModal.ts new file mode 100644 index 000000000..4947d32ca --- /dev/null +++ b/cognee-frontend/src/ui/elements/Modal/useModal.ts @@ -0,0 +1,49 @@ +import { FormEvent, useCallback, useState } from "react"; +import { useBoolean } from "@/utils"; + +export default function useModal(initiallyOpen?: boolean, confirmCallback?: (state: object, event?: FormEvent) => Promise | ConfirmActionReturnType) { + const [modalState, setModalState] = useState({}); + const [isActionLoading, setLoading] = useState(false); + + const { + value: isModalOpen, + setTrue: openModalInternal, + setFalse: closeModalInternal, + } = useBoolean(initiallyOpen || false); + + const openModal = useCallback((state?: object) => { + if (state) { + setModalState(state); + } + openModalInternal(); + }, [openModalInternal]); + + const closeModal = useCallback(() => { + closeModalInternal(); + setModalState({}); + }, [closeModalInternal]); + + const confirmAction = useCallback((event?: FormEvent) => { + if (confirmCallback) { + setLoading(true); + + const maybePromise = confirmCallback(modalState, event); + + if (maybePromise instanceof Promise) { + return maybePromise + .finally(closeModal) + .finally(() => setLoading(false)); + } else { + return maybePromise; // Not a promise. + } + } + }, [closeModal, confirmCallback, modalState]); + + return { + isModalOpen, + openModal, + closeModal, + confirmAction, + isActionLoading, + }; +} diff --git a/cognee-frontend/src/ui/elements/NeutralButton.tsx b/cognee-frontend/src/ui/elements/NeutralButton.tsx index 5b274ad65..7b991fcb8 100644 --- a/cognee-frontend/src/ui/elements/NeutralButton.tsx +++ b/cognee-frontend/src/ui/elements/NeutralButton.tsx @@ -1,8 +1,8 @@ -import classNames from 'classnames'; +import classNames from "classnames"; import { ButtonHTMLAttributes } from "react"; -export default function CTAButton({ children, className, ...props }: ButtonHTMLAttributes) { +export default function NeutralButton({ children, className, ...props }: ButtonHTMLAttributes) { return ( - + ); } diff --git a/cognee-frontend/src/ui/elements/Notebook/Notebook.tsx b/cognee-frontend/src/ui/elements/Notebook/Notebook.tsx new file mode 100644 index 000000000..0e037890a --- /dev/null +++ b/cognee-frontend/src/ui/elements/Notebook/Notebook.tsx @@ -0,0 +1,342 @@ +"use client"; + +import { v4 as uuid4 } from "uuid"; +import classNames from "classnames"; +import { Fragment, MutableRefObject, useCallback, useEffect, useRef, useState } from "react"; + +import { CaretIcon, PlusIcon } from "@/ui/Icons"; +import { IconButton, PopupMenu, TextArea } from "@/ui/elements"; +import { GraphControlsAPI } from "@/app/(graph)/GraphControls"; +import GraphVisualization, { GraphVisualizationAPI } from "@/app/(graph)/GraphVisualization"; + +import NotebookCellHeader from "./NotebookCellHeader"; +import { Cell, Notebook as NotebookType } from "./types"; + +interface NotebookProps { + notebook: NotebookType; + runCell: (notebook: NotebookType, cell: Cell) => Promise; + updateNotebook: (updatedNotebook: NotebookType) => void; + saveNotebook: (notebook: NotebookType) => void; +} + +export default function Notebook({ notebook, updateNotebook, saveNotebook, runCell }: NotebookProps) { + const saveCells = useCallback(() => { + saveNotebook(notebook); + }, [notebook, saveNotebook]); + + useEffect(() => { + window.addEventListener("beforeunload", saveCells); + + return () => { + window.removeEventListener("beforeunload", saveCells); + }; + }, [saveCells]); + + useEffect(() => { + if (notebook.cells.length === 0) { + const newCell: Cell = { + id: uuid4(), + name: "first cell", + type: "code", + content: "", + }; + updateNotebook({ + ...notebook, + cells: [newCell], + }); + } + }, [notebook, saveNotebook, updateNotebook]); + + const handleCellRun = useCallback((cell: Cell) => { + return runCell(notebook, cell); + }, [notebook, runCell]); + + const handleCellAdd = useCallback((afterCellIndex: number, cellType: "markdown" | "code") => { + const newCell: Cell = { + id: uuid4(), + name: "new cell", + type: cellType, + content: "", + }; + + const newNotebook = { + ...notebook, + cells: [ + ...notebook.cells.slice(0, afterCellIndex + 1), + newCell, + ...notebook.cells.slice(afterCellIndex + 1), + ], + }; + + toggleCellOpen(newCell.id); + updateNotebook(newNotebook); + }, [notebook, updateNotebook]); + + const handleCellRemove = useCallback((cell: Cell) => { + updateNotebook({ + ...notebook, + cells: notebook.cells.filter((c: Cell) => c.id !== cell.id), + }); + }, [notebook, updateNotebook]); + + const handleCellInputChange = useCallback((notebook: NotebookType, cell: Cell, value: string) => { + const newCell = {...cell, content: value }; + + updateNotebook({ + ...notebook, + cells: notebook.cells.map((cell: Cell) => (cell.id === newCell.id ? newCell : cell)), + }); + }, [updateNotebook]); + + const handleCellUp = useCallback((cell: Cell) => { + const index = notebook.cells.indexOf(cell); + + if (index > 0) { + const newCells = [...notebook.cells]; + newCells[index] = notebook.cells[index - 1]; + newCells[index - 1] = cell; + + updateNotebook({ + ...notebook, + cells: newCells, + }); + } + }, [notebook, updateNotebook]); + + const handleCellDown = useCallback((cell: Cell) => { + const index = notebook.cells.indexOf(cell); + + if (index < notebook.cells.length - 1) { + const newCells = [...notebook.cells]; + newCells[index] = notebook.cells[index + 1]; + newCells[index + 1] = cell; + + updateNotebook({ + ...notebook, + cells: newCells, + }); + } + }, [notebook, updateNotebook]); + + const handleCellRename = useCallback((cell: Cell) => { + const newName = prompt("Enter a new name for the cell:"); + + if (newName) { + updateNotebook({ + ...notebook, + cells: notebook.cells.map((c: Cell) => (c.id === cell.id ? {...c, name: newName } : c)), + }); + } + }, [notebook, updateNotebook]); + + const [openCells, setOpenCells] = useState(new Set(notebook.cells.map((c: Cell) => c.id))); + + const toggleCellOpen = (id: string) => { + setOpenCells((prev) => { + const newState = new Set(prev); + + if (newState.has(id)) { + newState.delete(id) + } else { + newState.add(id); + } + + return newState; + }); + }; + + return ( +
+
{notebook.name}
+ + {notebook.cells.map((cell: Cell, index) => ( + +
+
+ {cell.type === "code" ? ( + <> +
+ + + +
+ + + + {openCells.has(cell.id) && ( + <> +