From 4ca1de266e344ec5b914f80407760b72ebfc76b3 Mon Sep 17 00:00:00 2001 From: Hassan <261925524@formanite.fccollege.edu.pk> Date: Thu, 31 Jul 2025 05:15:41 -0700 Subject: [PATCH 01/73] feat/configurable-path-exclusion --- cognee/api/v1/cognify/code_graph_pipeline.py | 20 +++- .../get_repo_file_dependencies.py | 106 ++++++++---------- cognee/tests/test_repo_processor.py | 45 ++++++++ 3 files changed, 109 insertions(+), 62 deletions(-) create mode 100644 cognee/tests/test_repo_processor.py diff --git a/cognee/api/v1/cognify/code_graph_pipeline.py b/cognee/api/v1/cognify/code_graph_pipeline.py index 00a0d3dc9..d7faab6b5 100644 --- a/cognee/api/v1/cognify/code_graph_pipeline.py +++ b/cognee/api/v1/cognify/code_graph_pipeline.py @@ -28,7 +28,7 @@ logger = get_logger("code_graph_pipeline") @observe -async def run_code_graph_pipeline(repo_path, include_docs=False): +async def run_code_graph_pipeline(repo_path, include_docs=False, excluded_paths=None): import cognee from cognee.low_level import setup @@ -40,14 +40,25 @@ async def run_code_graph_pipeline(repo_path, include_docs=False): user = await get_default_user() detailed_extraction = True + # Default exclusion patterns + if excluded_paths is None: + excluded_paths = [ + ".venv/", "venv/", "__pycache__/", ".pytest_cache/", + "build/", "dist/", "node_modules/", ".npm/", ".git/", + ".svn/", ".idea/", ".vscode/", "tmp/", "temp/", + "*.pyc", "*.pyo", "*.log", "*.tmp" + ] + tasks = [ - Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction), - # Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete + Task( + get_repo_file_dependencies, + detailed_extraction=detailed_extraction, + excluded_paths=excluded_paths + ), Task(add_data_points, task_config={"batch_size": 30}), ] if include_docs: - # This tasks take a long time to complete non_code_tasks = [ Task(get_non_py_files, task_config={"batch_size": 50}), Task(ingest_data, dataset_name="repo_docs", user=user), @@ -67,7 +78,6 @@ async def run_code_graph_pipeline(repo_path, include_docs=False): dataset_name = "codebase" - # Save dataset to database db_engine = get_relational_engine() async with db_engine.get_async_session() as session: dataset = await create_dataset(dataset_name, user, session) diff --git a/cognee/tasks/repo_processor/get_repo_file_dependencies.py b/cognee/tasks/repo_processor/get_repo_file_dependencies.py index 232850936..2567a44cd 100644 --- a/cognee/tasks/repo_processor/get_repo_file_dependencies.py +++ b/cognee/tasks/repo_processor/get_repo_file_dependencies.py @@ -1,56 +1,68 @@ import asyncio import math import os - -# from concurrent.futures import ProcessPoolExecutor -from typing import AsyncGenerator +import fnmatch +from typing import AsyncGenerator, Optional, List from uuid import NAMESPACE_OID, uuid5 from cognee.infrastructure.engine import DataPoint from cognee.shared.CodeGraphEntities import CodeFile, Repository -async def get_source_code_files(repo_path): +async def get_source_code_files(repo_path: str, excluded_paths: Optional[List[str]] = None): """ - Retrieve Python source code files from the specified repository path. - - This function scans the given repository path for files that have the .py extension - while excluding test files and files within a virtual environment. It returns a list of - absolute paths to the source code files that are not empty. + Retrieve Python source code files from the specified repository path, + excluding paths and file patterns commonly irrelevant to code analysis. Parameters: ----------- - - - repo_path: The file path to the repository to search for Python source files. + - repo_path: Root path of the repository to search + - excluded_paths: Optional list of path fragments or glob patterns to exclude Returns: -------- - - A list of absolute paths to .py files that contain source code, excluding empty - files, test files, and files from a virtual environment. + List of absolute file paths for .py files, excluding test files, + empty files, and files under ignored directories or matching ignore patterns. """ - if not os.path.exists(repo_path): - return {} - py_files_paths = ( - os.path.join(root, file) - for root, _, files in os.walk(repo_path) - for file in files - if ( - file.endswith(".py") - and not file.startswith("test_") - and not file.endswith("_test") - and ".venv" not in file - ) - ) + if not os.path.exists(repo_path): + return [] + + # Default exclusions + default_excluded_patterns = [ + ".venv/", "venv/", "__pycache__/", ".pytest_cache/", "build/", "dist/", + "node_modules/", ".npm/", ".git/", ".svn/", ".idea/", ".vscode/", "tmp/", "temp/", + "*.pyc", "*.pyo", "*.log", "*.tmp" + ] + + excluded_patterns = default_excluded_patterns + (excluded_paths or []) + + py_files_paths = [] + for root, _, files in os.walk(repo_path): + for file in files: + full_path = os.path.join(root, file) + rel_path = os.path.relpath(full_path, repo_path) + + # Check for exclusion + should_exclude = any( + pattern in rel_path or fnmatch.fnmatch(rel_path, pattern) + for pattern in excluded_patterns + ) + if should_exclude: + continue + + if ( + file.endswith(".py") + and not file.startswith("test_") + and not file.endswith("_test") + ): + py_files_paths.append(full_path) source_code_files = set() for file_path in py_files_paths: file_path = os.path.abspath(file_path) - if os.path.getsize(file_path) == 0: continue - source_code_files.add(file_path) return list(source_code_files) @@ -62,20 +74,7 @@ def run_coroutine(coroutine_func, *args, **kwargs): This function creates a new asyncio event loop, sets it as the current loop, and executes the given coroutine function with the provided arguments. Once the coroutine - completes, the loop is closed. Intended for use in environments where an existing event - loop is not available or desirable. - - Parameters: - ----------- - - - coroutine_func: The coroutine function to be run. - - *args: Positional arguments to pass to the coroutine function. - - **kwargs: Keyword arguments to pass to the coroutine function. - - Returns: - -------- - - The result returned by the coroutine after completion. + completes, the loop is closed. """ loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) @@ -85,28 +84,24 @@ def run_coroutine(coroutine_func, *args, **kwargs): async def get_repo_file_dependencies( - repo_path: str, detailed_extraction: bool = False + repo_path: str, + detailed_extraction: bool = False, + excluded_paths: Optional[List[str]] = None ) -> AsyncGenerator[DataPoint, None]: """ Generate a dependency graph for Python files in the given repository path. - Check the validity of the repository path and yield a repository object followed by the - dependencies of Python files within that repository. Raise a FileNotFoundError if the - provided path does not exist. The extraction of detailed dependencies can be controlled - via the `detailed_extraction` argument. - Parameters: ----------- - - - repo_path (str): The file path to the repository where Python files are located. - - detailed_extraction (bool): A flag indicating whether to perform a detailed - extraction of dependencies (default is False). (default False) + - repo_path: Path to local repository + - detailed_extraction: Whether to extract fine-grained dependencies + - excluded_paths: Optional custom exclusion list """ if not os.path.exists(repo_path): raise FileNotFoundError(f"Repository path {repo_path} does not exist.") - source_code_files = await get_source_code_files(repo_path) + source_code_files = await get_source_code_files(repo_path, excluded_paths=excluded_paths) repo = Repository( id=uuid5(NAMESPACE_OID, repo_path), @@ -125,11 +120,9 @@ async def get_repo_file_dependencies( for chunk_number in range(number_of_chunks) ] - # Codegraph dependencies are not installed by default, so we import where we use them. from cognee.tasks.repo_processor.get_local_dependencies import get_local_script_dependencies for start_range, end_range in chunk_ranges: - # with ProcessPoolExecutor(max_workers=12) as executor: tasks = [ get_local_script_dependencies(repo_path, file_path, detailed_extraction) for file_path in source_code_files[start_range : end_range + 1] @@ -139,5 +132,4 @@ async def get_repo_file_dependencies( for source_code_file in results: source_code_file.part_of = repo - yield source_code_file diff --git a/cognee/tests/test_repo_processor.py b/cognee/tests/test_repo_processor.py new file mode 100644 index 000000000..4de102da6 --- /dev/null +++ b/cognee/tests/test_repo_processor.py @@ -0,0 +1,45 @@ +import os +import shutil +import tempfile +from cognee.tasks.repo_processor.code_graph_repo import get_source_code_files + +def test_get_source_code_files_excludes_common_dirs_and_files(): + # Create a temporary test directory + test_repo = tempfile.mkdtemp() + + # Create files and folders to include/exclude + included_file = os.path.join(test_repo, "main.py") + excluded_dirs = [".venv", "node_modules", "__pycache__", ".git"] + excluded_files = ["ignore.pyc", "temp.log", "junk.tmp"] + + # Create included file + with open(included_file, "w") as f: + f.write("print('Hello world')") + + # Create excluded directories and files inside them + for folder in excluded_dirs: + folder_path = os.path.join(test_repo, folder) + os.makedirs(folder_path) + file_path = os.path.join(folder_path, "ignored.js") + with open(file_path, "w") as f: + f.write("// ignore this") + + # Create excluded files in root + for file_name in excluded_files: + file_path = os.path.join(test_repo, file_name) + with open(file_path, "w") as f: + f.write("dummy") + + # Run function + results = get_source_code_files(test_repo) + + # Assert only included file is present + assert included_file in results + for root, dirs, files in os.walk(test_repo): + for name in files: + full_path = os.path.join(root, name) + if full_path != included_file: + assert full_path not in results, f"{full_path} should have been excluded" + + # Cleanup + shutil.rmtree(test_repo) From c898895f2229f851127a977411abb6b9cc6a4f74 Mon Sep 17 00:00:00 2001 From: Hassan <261925524@formanite.fccollege.edu.pk> Date: Thu, 31 Jul 2025 07:00:11 -0700 Subject: [PATCH 02/73] feat/configurable-path-exclusion --- cognee/tests/test_repo_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/tests/test_repo_processor.py b/cognee/tests/test_repo_processor.py index 4de102da6..fc3c26b05 100644 --- a/cognee/tests/test_repo_processor.py +++ b/cognee/tests/test_repo_processor.py @@ -1,7 +1,7 @@ import os import shutil import tempfile -from cognee.tasks.repo_processor.code_graph_repo import get_source_code_files +from cognee.tasks.repo_processor.get_repo_file_dependencies import get_source_code_files def test_get_source_code_files_excludes_common_dirs_and_files(): # Create a temporary test directory From 8f26a01b3ab744a818bfeaeae932a41921f92ccc Mon Sep 17 00:00:00 2001 From: Hassan <261925524@formanite.fccollege.edu.pk> Date: Sat, 2 Aug 2025 10:33:07 -0700 Subject: [PATCH 03/73] style: run ruff format and fix lint issues --- cognee/api/v1/cognify/code_graph_pipeline.py | 24 +++++++++++--- .../get_repo_file_dependencies.py | 31 ++++++++++++------- cognee/tests/test_repo_processor.py | 1 + 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/cognee/api/v1/cognify/code_graph_pipeline.py b/cognee/api/v1/cognify/code_graph_pipeline.py index d7faab6b5..ae1c8b0ac 100644 --- a/cognee/api/v1/cognify/code_graph_pipeline.py +++ b/cognee/api/v1/cognify/code_graph_pipeline.py @@ -43,17 +43,31 @@ async def run_code_graph_pipeline(repo_path, include_docs=False, excluded_paths= # Default exclusion patterns if excluded_paths is None: excluded_paths = [ - ".venv/", "venv/", "__pycache__/", ".pytest_cache/", - "build/", "dist/", "node_modules/", ".npm/", ".git/", - ".svn/", ".idea/", ".vscode/", "tmp/", "temp/", - "*.pyc", "*.pyo", "*.log", "*.tmp" + ".venv/", + "venv/", + "__pycache__/", + ".pytest_cache/", + "build/", + "dist/", + "node_modules/", + ".npm/", + ".git/", + ".svn/", + ".idea/", + ".vscode/", + "tmp/", + "temp/", + "*.pyc", + "*.pyo", + "*.log", + "*.tmp", ] tasks = [ Task( get_repo_file_dependencies, detailed_extraction=detailed_extraction, - excluded_paths=excluded_paths + excluded_paths=excluded_paths, ), Task(add_data_points, task_config={"batch_size": 30}), ] diff --git a/cognee/tasks/repo_processor/get_repo_file_dependencies.py b/cognee/tasks/repo_processor/get_repo_file_dependencies.py index 2567a44cd..f1435a9e2 100644 --- a/cognee/tasks/repo_processor/get_repo_file_dependencies.py +++ b/cognee/tasks/repo_processor/get_repo_file_dependencies.py @@ -30,9 +30,24 @@ async def get_source_code_files(repo_path: str, excluded_paths: Optional[List[st # Default exclusions default_excluded_patterns = [ - ".venv/", "venv/", "__pycache__/", ".pytest_cache/", "build/", "dist/", - "node_modules/", ".npm/", ".git/", ".svn/", ".idea/", ".vscode/", "tmp/", "temp/", - "*.pyc", "*.pyo", "*.log", "*.tmp" + ".venv/", + "venv/", + "__pycache__/", + ".pytest_cache/", + "build/", + "dist/", + "node_modules/", + ".npm/", + ".git/", + ".svn/", + ".idea/", + ".vscode/", + "tmp/", + "temp/", + "*.pyc", + "*.pyo", + "*.log", + "*.tmp", ] excluded_patterns = default_excluded_patterns + (excluded_paths or []) @@ -51,11 +66,7 @@ async def get_source_code_files(repo_path: str, excluded_paths: Optional[List[st if should_exclude: continue - if ( - file.endswith(".py") - and not file.startswith("test_") - and not file.endswith("_test") - ): + if file.endswith(".py") and not file.startswith("test_") and not file.endswith("_test"): py_files_paths.append(full_path) source_code_files = set() @@ -84,9 +95,7 @@ def run_coroutine(coroutine_func, *args, **kwargs): async def get_repo_file_dependencies( - repo_path: str, - detailed_extraction: bool = False, - excluded_paths: Optional[List[str]] = None + repo_path: str, detailed_extraction: bool = False, excluded_paths: Optional[List[str]] = None ) -> AsyncGenerator[DataPoint, None]: """ Generate a dependency graph for Python files in the given repository path. diff --git a/cognee/tests/test_repo_processor.py b/cognee/tests/test_repo_processor.py index fc3c26b05..2d5868f36 100644 --- a/cognee/tests/test_repo_processor.py +++ b/cognee/tests/test_repo_processor.py @@ -3,6 +3,7 @@ import shutil import tempfile from cognee.tasks.repo_processor.get_repo_file_dependencies import get_source_code_files + def test_get_source_code_files_excludes_common_dirs_and_files(): # Create a temporary test directory test_repo = tempfile.mkdtemp() From bf34ba398e1d3dd39373a0e3b86f0c90e54ef8f7 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 15:14:46 +0200 Subject: [PATCH 04/73] feat: adds temporal models for llm extraction --- cognee/modules/chunking/models/DocumentChunk.py | 7 ++++--- cognee/modules/engine/models/Event.py | 16 ++++++++++++++++ cognee/modules/engine/models/Interval.py | 7 +++++++ cognee/modules/engine/models/Timestamp.py | 13 +++++++++++++ cognee/modules/engine/models/__init__.py | 3 +++ 5 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 cognee/modules/engine/models/Event.py create mode 100644 cognee/modules/engine/models/Interval.py create mode 100644 cognee/modules/engine/models/Timestamp.py diff --git a/cognee/modules/chunking/models/DocumentChunk.py b/cognee/modules/chunking/models/DocumentChunk.py index 475703265..9f8c57486 100644 --- a/cognee/modules/chunking/models/DocumentChunk.py +++ b/cognee/modules/chunking/models/DocumentChunk.py @@ -1,8 +1,9 @@ -from typing import List +from typing import List, Union from cognee.infrastructure.engine import DataPoint from cognee.modules.data.processing.document_types import Document from cognee.modules.engine.models import Entity +from cognee.tasks.temporal_graph.models import Event class DocumentChunk(DataPoint): @@ -20,7 +21,7 @@ class DocumentChunk(DataPoint): - chunk_index: The index of the chunk in the original document. - cut_type: The type of cut that defined this chunk. - is_part_of: The document to which this chunk belongs. - - contains: A list of entities contained within the chunk (default is None). + - contains: A list of entities or events contained within the chunk (default is None). - metadata: A dictionary to hold meta information related to the chunk, including index fields. """ @@ -30,6 +31,6 @@ class DocumentChunk(DataPoint): chunk_index: int cut_type: str is_part_of: Document - contains: List[Entity] = None + contains: List[Union[Entity, Event]] = None metadata: dict = {"index_fields": ["text"]} diff --git a/cognee/modules/engine/models/Event.py b/cognee/modules/engine/models/Event.py new file mode 100644 index 000000000..88141e602 --- /dev/null +++ b/cognee/modules/engine/models/Event.py @@ -0,0 +1,16 @@ +from typing import Optional, Any +from pydantic import SkipValidation +from cognee.infrastructure.engine import DataPoint +from cognee.modules.engine.models.Timestamp import Timestamp +from cognee.modules.engine.models.Interval import Interval + + +class Event(DataPoint): + name: str + description: Optional[str] = None + at: Optional[Timestamp] = None + during: Optional[Interval] = None + location: Optional[str] = None + attributes: SkipValidation[Any] = None + + metadata: dict = {"index_fields": ["name"]} \ No newline at end of file diff --git a/cognee/modules/engine/models/Interval.py b/cognee/modules/engine/models/Interval.py new file mode 100644 index 000000000..3666bf69d --- /dev/null +++ b/cognee/modules/engine/models/Interval.py @@ -0,0 +1,7 @@ +from pydantic import Field +from cognee.infrastructure.engine import DataPoint +from cognee.modules.engine.models.Timestamp import Timestamp + +class Interval(DataPoint): + time_from: Timestamp = Field(...) + time_to: Timestamp = Field(...) \ No newline at end of file diff --git a/cognee/modules/engine/models/Timestamp.py b/cognee/modules/engine/models/Timestamp.py new file mode 100644 index 000000000..38977c348 --- /dev/null +++ b/cognee/modules/engine/models/Timestamp.py @@ -0,0 +1,13 @@ +from pydantic import Field +from cognee.infrastructure.engine import DataPoint + + +class Timestamp(DataPoint): + time_at: int = Field(...) + year: int = Field(...) + month: int = Field(...) + day: int = Field(...) + hour: int = Field(...) + minute: int = Field(...) + second: int = Field(...) + timestamp_str: str = Field(...) \ No newline at end of file diff --git a/cognee/modules/engine/models/__init__.py b/cognee/modules/engine/models/__init__.py index 2535f00f3..8d28ebf8a 100644 --- a/cognee/modules/engine/models/__init__.py +++ b/cognee/modules/engine/models/__init__.py @@ -4,3 +4,6 @@ from .TableRow import TableRow from .TableType import TableType from .node_set import NodeSet from .ColumnValue import ColumnValue +from .Timestamp import Timestamp +from .Interval import Interval +from .Event import Event From a3cc1ebe2dd986366eb911ce5c55fbc036411ae4 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 15:15:55 +0200 Subject: [PATCH 05/73] feat: adds pydantic models --- cognee/tasks/temporal_graph/models.py | 50 +++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 cognee/tasks/temporal_graph/models.py diff --git a/cognee/tasks/temporal_graph/models.py b/cognee/tasks/temporal_graph/models.py new file mode 100644 index 000000000..3818110c5 --- /dev/null +++ b/cognee/tasks/temporal_graph/models.py @@ -0,0 +1,50 @@ +from typing import Optional, List +from pydantic import BaseModel, Field + + + +class Timestamp(BaseModel): + year: int = Field(..., ge=1, le=9999) + month: int = Field(..., ge=1, le=12) + day: int = Field(..., ge=1, le=31) + hour: int = Field(..., ge=0, le=23) + minute: int = Field(..., ge=0, le=59) + second: int = Field(..., ge=0, le=59) + + +class Interval(BaseModel): + starts_at: Timestamp + ends_at: Timestamp + + +class QueryInterval(BaseModel): + starts_at: Optional[Timestamp] = None + ends_at: Optional[Timestamp] = None + + +class Event(BaseModel): + name: str + description: Optional[str] = None + time_from: Optional[Timestamp] = None + time_to: Optional[Timestamp] = None + location: Optional[str] = None + + +class EventList(BaseModel): + events: List[Event] + + +class EntityAttribute(BaseModel): + entity: str + entity_type: str + relationship: str + + +class EventWithEntities(BaseModel): + event_name: str + description: Optional[str] = None + attributes: List[EntityAttribute] = [] + + +class EventEntityList(BaseModel): + events: List[EventWithEntities] \ No newline at end of file From f5489f202731146f25ab37b1fb868f4c35010dc2 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 15:16:35 +0200 Subject: [PATCH 06/73] feat: adds event and timestamp pydantic to datapoint methods --- cognee/modules/engine/utils/__init__.py | 2 ++ .../engine/utils/generate_event_datapoint.py | 30 +++++++++++++++++++ .../utils/generate_timestamp_datapoint.py | 27 +++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 cognee/modules/engine/utils/generate_event_datapoint.py create mode 100644 cognee/modules/engine/utils/generate_timestamp_datapoint.py diff --git a/cognee/modules/engine/utils/__init__.py b/cognee/modules/engine/utils/__init__.py index 4d4ab02e7..892315259 100644 --- a/cognee/modules/engine/utils/__init__.py +++ b/cognee/modules/engine/utils/__init__.py @@ -1,3 +1,5 @@ from .generate_node_id import generate_node_id from .generate_node_name import generate_node_name from .generate_edge_name import generate_edge_name +from .generate_event_datapoint import generate_event_datapoint +from .generate_timestamp_datapoint import generate_timestamp_datapoint diff --git a/cognee/modules/engine/utils/generate_event_datapoint.py b/cognee/modules/engine/utils/generate_event_datapoint.py new file mode 100644 index 000000000..aeec325d9 --- /dev/null +++ b/cognee/modules/engine/utils/generate_event_datapoint.py @@ -0,0 +1,30 @@ +from cognee.modules.engine.models import Interval, Event +from cognee.modules.engine.utils.generate_timestamp_datapoint import generate_timestamp_datapoint + +def generate_event_datapoint(event) -> Event: + """Create an Event datapoint from an event model.""" + # Base event data + event_data = { + "name": event.name, + "description": event.description, + "location": event.location, + } + + # Create timestamps if they exist + time_from = generate_timestamp_datapoint(event.time_from) if event.time_from else None + time_to = generate_timestamp_datapoint(event.time_to) if event.time_to else None + + # Add temporal information + if time_from and time_to: + event_data["during"] = Interval(time_from=time_from, time_to=time_to) + # Enrich description with temporal info + temporal_info = f"\n---\nTime data: {time_from.timestamp_str} to {time_to.timestamp_str}" + event_data["description"] = (event_data["description"] or "Event") + temporal_info + elif time_from or time_to: + timestamp = time_from or time_to + event_data["at"] = timestamp + # Enrich description with temporal info + temporal_info = f"\n---\nTime data: {timestamp.timestamp_str}" + event_data["description"] = (event_data["description"] or "Event") + temporal_info + + return Event(**event_data) \ No newline at end of file diff --git a/cognee/modules/engine/utils/generate_timestamp_datapoint.py b/cognee/modules/engine/utils/generate_timestamp_datapoint.py new file mode 100644 index 000000000..cbef2d177 --- /dev/null +++ b/cognee/modules/engine/utils/generate_timestamp_datapoint.py @@ -0,0 +1,27 @@ +from datetime import datetime, timezone +from cognee.modules.engine.models import Interval, Timestamp, Event +from cognee.modules.engine.utils import generate_node_id + +def generate_timestamp_datapoint(ts: Timestamp) -> Timestamp: + """Create a Timestamp datapoint from a Timestamp model.""" + time_at = date_to_int(ts) + timestamp_str = ( + f"{ts.year:04d}-{ts.month:02d}-{ts.day:02d} {ts.hour:02d}:{ts.minute:02d}:{ts.second:02d}" + ) + return Timestamp( + id=generate_node_id(str(time_at)), + time_at=time_at, + year=ts.year, + month=ts.month, + day=ts.day, + hour=ts.hour, + minute=ts.minute, + second=ts.second, + timestamp_str=timestamp_str, + ) + +def date_to_int(ts: Timestamp) -> int: + """Convert timestamp to integer milliseconds.""" + dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.second, tzinfo=timezone.utc) + time = int(dt.timestamp() * 1000) + return time \ No newline at end of file From 9bb36f37c0edb1a89b359cdb87ac142994840654 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 15:17:32 +0200 Subject: [PATCH 07/73] feat: adds event graph extraction to LLMGateway for litellm --- cognee/infrastructure/llm/LLMGateway.py | 9 +++++ cognee/infrastructure/llm/config.py | 1 + .../prompts/generate_event_graph_prompt.txt | 30 ++++++++++++++++ .../litellm_instructor/extraction/__init__.py | 1 + .../extraction/knowledge_graph/__init__.py | 1 + .../knowledge_graph/extract_event_graph.py | 34 +++++++++++++++++++ 6 files changed, 76 insertions(+) create mode 100644 cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt create mode 100644 cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py diff --git a/cognee/infrastructure/llm/LLMGateway.py b/cognee/infrastructure/llm/LLMGateway.py index a88cfb85d..d8364e9ef 100644 --- a/cognee/infrastructure/llm/LLMGateway.py +++ b/cognee/infrastructure/llm/LLMGateway.py @@ -135,3 +135,12 @@ class LLMGateway: ) return extract_summary(content=content, response_model=response_model) + + @staticmethod + def extract_event_graph(content: str, response_model: Type[BaseModel]) -> Coroutine: + # TODO: Add BAML version of category and extraction and update function (consulted with Igor) + from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import ( + extract_event_graph, + ) + + return extract_event_graph(content=content, response_model=response_model) diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index de2e2168e..199ede986 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -52,6 +52,7 @@ class LLMConfig(BaseSettings): transcription_model: str = "whisper-1" graph_prompt_path: str = "generate_graph_prompt.txt" + temporal_graph_prompt_path: str = "generate_event_graph_prompt.txt" llm_rate_limit_enabled: bool = False llm_rate_limit_requests: int = 60 llm_rate_limit_interval: int = 60 # in seconds (default is 60 requests per minute) diff --git a/cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt b/cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt new file mode 100644 index 000000000..c81ae6d3d --- /dev/null +++ b/cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt @@ -0,0 +1,30 @@ +For the purposes of building event-based knowledge graphs, you are tasked with extracting highly granular stream events from a text. The events are defined as follows: +## Event Definition +- Anything with a date or a timestamp is an event +- Anything that took place in time (even if the time is unknown) is an event +- Anything that lasted over a period of time, or happened in an instant is an event: from historical milestones (wars, presidencies, olympiads) to personal milestones (birth, death, employment, etc.), to mundane actions (a walk, a conversation, etc.) +- **ANY action or verb represents an event** - this is the most important rule +- Every single verb in the text corresponds to an event that must be extracted +- This includes: thinking, feeling, seeing, hearing, moving, speaking, writing, reading, eating, sleeping, working, playing, studying, traveling, meeting, calling, texting, buying, selling, creating, destroying, building, breaking, starting, stopping, beginning, ending, etc. +- Even the most mundane or obvious actions are events: "he walked", "she sat", "they talked", "I thought", "we waited" +## Requirements +- **Be extremely thorough** - extract EVERY event mentioned, no matter how small or obvious +- **Timestamped first" - every time stamp, or date should have atleast one event +- **Verbs/actions = one event** - After you are done with timestamped events -- every verb that is an action should have a corresponding event. +- We expect long streams of events from any piece of text, easily reaching a hundred events +- Granularity and richness of the stream is key to our success and is of utmost importance +- Not all events will have timestamps, add timestamps only to known events +- For events that were instantaneous, just attach the time_from or time_to property don't create both +- **Do not skip any events** - if you're unsure whether something is an event, extract it anyway +- **Quantity over filtering** - it's better to extract too many events than to miss any +- **Descriptions** - Always include the event description together with entities (Who did what, what happened? What is the event?). If you can include the corresponding part from the text. +## Output Format +Your reply should be a JSON: list of dictionaries with the following structure: +```python +class Event(BaseModel): + name: str [concise] + description: Optional[str] = None + time_from: Optional[Timestamp] = None + time_to: Optional[Timestamp] = None + location: Optional[str] = None +``` \ No newline at end of file diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py index 3d4edab27..002246a77 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py @@ -1,3 +1,4 @@ from .knowledge_graph.extract_content_graph import extract_content_graph +from .knowledge_graph.extract_event_graph import extract_event_graph from .extract_categories import extract_categories from .extract_summary import extract_summary, extract_code_summary diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py index 0939b2b34..f758b8909 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py @@ -1 +1,2 @@ from .extract_content_graph import extract_content_graph +from .extract_event_graph import extract_event_graph diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py new file mode 100644 index 000000000..2a0c0cab8 --- /dev/null +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py @@ -0,0 +1,34 @@ +import os +from pydantic import BaseModel +from typing import Type +from cognee.infrastructure.llm.LLMGateway import LLMGateway + +from cognee.infrastructure.llm.config import ( + get_llm_config, +) + +async def extract_event_graph( + content: str, response_model: Type[BaseModel], system_prompt: str = None +): + """Extract event graph from content using LLM.""" + + llm_config = get_llm_config() + + prompt_path = llm_config.graph_prompt_path + + # Check if the prompt path is an absolute path or just a filename + if os.path.isabs(prompt_path): + # directory containing the file + base_directory = os.path.dirname(prompt_path) + # just the filename itself + prompt_path = os.path.basename(prompt_path) + else: + base_directory = None + + system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory) + + content_graph = await LLMGateway.acreate_structured_output( + content, system_prompt, response_model + ) + + return content_graph \ No newline at end of file From 5a43751e61ab218f340eccd533742443f2197ed0 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 15:17:57 +0200 Subject: [PATCH 08/73] feat: adds entity and event extraction task --- cognee/tasks/temporal_graph/__init__.py | 2 ++ .../extract_events_and_entities.py | 20 +++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 cognee/tasks/temporal_graph/__init__.py create mode 100644 cognee/tasks/temporal_graph/extract_events_and_entities.py diff --git a/cognee/tasks/temporal_graph/__init__.py b/cognee/tasks/temporal_graph/__init__.py new file mode 100644 index 000000000..163fb6840 --- /dev/null +++ b/cognee/tasks/temporal_graph/__init__.py @@ -0,0 +1,2 @@ +from .extract_events_and_entities import extract_events_and_entities + diff --git a/cognee/tasks/temporal_graph/extract_events_and_entities.py b/cognee/tasks/temporal_graph/extract_events_and_entities.py new file mode 100644 index 000000000..37e113d56 --- /dev/null +++ b/cognee/tasks/temporal_graph/extract_events_and_entities.py @@ -0,0 +1,20 @@ +import asyncio +from typing import Type, List +from cognee.infrastructure.llm.LLMGateway import LLMGateway +from cognee.modules.chunking.models import DocumentChunk +from cognee.tasks.temporal_graph.models import EventList +from cognee.modules.engine.utils.generate_event_datapoint import generate_event_datapoint + + +async def extract_events_and_entities(data_chunks: List[DocumentChunk]) -> List[DocumentChunk]: + """Extracts events and entities from a chunk of documents.""" + events = await asyncio.gather( + *[LLMGateway.extract_event_graph(chunk.text, EventList) for chunk in data_chunks] + ) + + for data_chunk, event_list in zip(data_chunks, events): + for event in event_list.events: + event_datapoint = generate_event_datapoint(event) + data_chunk.contains.append(event_datapoint) + + return data_chunks \ No newline at end of file From 2ec22567c333e39229024211ab99b6f49e620717 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 15:18:47 +0200 Subject: [PATCH 09/73] feat: adds temporal tasks to cognify --- cognee/api/v1/cognify/cognify.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 21d750875..aaf2939ba 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -22,6 +22,7 @@ from cognee.tasks.graph import extract_graph_from_data from cognee.tasks.storage import add_data_points from cognee.tasks.summarization import summarize_text from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor +from cognee.tasks.temporal_graph import extract_events_and_entities logger = get_logger("cognify") @@ -39,6 +40,7 @@ async def cognify( graph_db_config: dict = None, run_in_background: bool = False, incremental_loading: bool = True, + temporal_cognify: bool = False, ): """ Transform ingested data into a structured knowledge graph. @@ -177,7 +179,10 @@ async def cognify( - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False) - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60) """ - tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path) + if temporal_cognify: + tasks = await get_temporal_tasks(user, graph_model, chunker, chunk_size, ontology_file_path) + else: + tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path) # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background) @@ -224,3 +229,20 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's ] return default_tasks + +async def get_temporal_tasks( + user: User = None, chunker=TextChunker, chunk_size: int = None +) -> list[Task]: + temporal_tasks = [ + Task(classify_documents), + Task(check_permissions_on_dataset, user=user, permissions=["write"]), + Task( + extract_chunks_from_documents, + max_chunk_size=chunk_size or get_max_chunk_tokens(), + chunker=chunker, + ), + Task(extract_events_and_entities, task_config={"chunk_size": 10}), + Task(add_data_points, task_config={"batch_size": 10}), + ] + + return temporal_tasks From 7c08890609aef16340b61dd3fc7fcbe5e2be3e95 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 27 Aug 2025 16:53:24 +0200 Subject: [PATCH 10/73] chore: Update mcp version --- cognee-mcp/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee-mcp/pyproject.toml b/cognee-mcp/pyproject.toml index a8596615b..8bde50841 100644 --- a/cognee-mcp/pyproject.toml +++ b/cognee-mcp/pyproject.toml @@ -8,7 +8,7 @@ requires-python = ">=3.10" dependencies = [ # For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes. # "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/vasilije/Projects/tiktok/cognee", - "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.3", + "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.4", "fastmcp>=2.10.0,<3.0.0", "mcp>=1.12.0,<2.0.0", "uv>=0.6.3,<1.0.0", From 1f2809a2e1ac79152d9a9771195e9661c88ed7ff Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 27 Aug 2025 17:40:25 +0200 Subject: [PATCH 11/73] chore: Update lock files --- poetry.lock | 196 ++++++++++++++++++++++++++-------------------------- uv.lock | 153 ++++++++++++++++++++-------------------- 2 files changed, 173 insertions(+), 176 deletions(-) diff --git a/poetry.lock b/poetry.lock index 109e5d917..0a336adcb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -7123,15 +7123,15 @@ twisted = ["twisted"] [[package]] name = "prompt-toolkit" -version = "3.0.51" +version = "3.0.52" description = "Library for building powerful interactive command lines in Python" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"notebook\" or extra == \"dev\"" files = [ - {file = "prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07"}, - {file = "prompt_toolkit-3.0.51.tar.gz", hash = "sha256:931a162e3b27fc90c86f1b48bb1fb2c528c2761475e57c9c06de13311c7b54ed"}, + {file = "prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955"}, + {file = "prompt_toolkit-3.0.52.tar.gz", hash = "sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855"}, ] [package.dependencies] @@ -8707,107 +8707,105 @@ files = [ [[package]] name = "rapidfuzz" -version = "3.13.0" +version = "3.14.0" description = "rapid fuzzy string matching" optional = true -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main"] markers = "extra == \"docs\"" files = [ - {file = "rapidfuzz-3.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:aafc42a1dc5e1beeba52cd83baa41372228d6d8266f6d803c16dbabbcc156255"}, - {file = "rapidfuzz-3.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:85c9a131a44a95f9cac2eb6e65531db014e09d89c4f18c7b1fa54979cb9ff1f3"}, - {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d7cec4242d30dd521ef91c0df872e14449d1dffc2a6990ede33943b0dae56c3"}, - {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e297c09972698c95649e89121e3550cee761ca3640cd005e24aaa2619175464e"}, - {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ef0f5f03f61b0e5a57b1df7beafd83df993fd5811a09871bad6038d08e526d0d"}, - {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d8cf5f7cd6e4d5eb272baf6a54e182b2c237548d048e2882258336533f3f02b7"}, - {file = "rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9256218ac8f1a957806ec2fb9a6ddfc6c32ea937c0429e88cf16362a20ed8602"}, - {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e1bdd2e6d0c5f9706ef7595773a81ca2b40f3b33fd7f9840b726fb00c6c4eb2e"}, - {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5280be8fd7e2bee5822e254fe0a5763aa0ad57054b85a32a3d9970e9b09bbcbf"}, - {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fd742c03885db1fce798a1cd87a20f47f144ccf26d75d52feb6f2bae3d57af05"}, - {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:5435fcac94c9ecf0504bf88a8a60c55482c32e18e108d6079a0089c47f3f8cf6"}, - {file = "rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:93a755266856599be4ab6346273f192acde3102d7aa0735e2f48b456397a041f"}, - {file = "rapidfuzz-3.13.0-cp310-cp310-win32.whl", hash = "sha256:3abe6a4e8eb4cfc4cda04dd650a2dc6d2934cbdeda5def7e6fd1c20f6e7d2a0b"}, - {file = "rapidfuzz-3.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:e8ddb58961401da7d6f55f185512c0d6bd24f529a637078d41dd8ffa5a49c107"}, - {file = "rapidfuzz-3.13.0-cp310-cp310-win_arm64.whl", hash = "sha256:c523620d14ebd03a8d473c89e05fa1ae152821920c3ff78b839218ff69e19ca3"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d395a5cad0c09c7f096433e5fd4224d83b53298d53499945a9b0e5a971a84f3a"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b7b3eda607a019169f7187328a8d1648fb9a90265087f6903d7ee3a8eee01805"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98e0bfa602e1942d542de077baf15d658bd9d5dcfe9b762aff791724c1c38b70"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bef86df6d59667d9655905b02770a0c776d2853971c0773767d5ef8077acd624"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fedd316c165beed6307bf754dee54d3faca2c47e1f3bcbd67595001dfa11e969"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5158da7f2ec02a930be13bac53bb5903527c073c90ee37804090614cab83c29e"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b6f913ee4618ddb6d6f3e387b76e8ec2fc5efee313a128809fbd44e65c2bbb2"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d25fdbce6459ccbbbf23b4b044f56fbd1158b97ac50994eaae2a1c0baae78301"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:25343ccc589a4579fbde832e6a1e27258bfdd7f2eb0f28cb836d6694ab8591fc"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a9ad1f37894e3ffb76bbab76256e8a8b789657183870be11aa64e306bb5228fd"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5dc71ef23845bb6b62d194c39a97bb30ff171389c9812d83030c1199f319098c"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b7f4c65facdb94f44be759bbd9b6dda1fa54d0d6169cdf1a209a5ab97d311a75"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-win32.whl", hash = "sha256:b5104b62711565e0ff6deab2a8f5dbf1fbe333c5155abe26d2cfd6f1849b6c87"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:9093cdeb926deb32a4887ebe6910f57fbcdbc9fbfa52252c10b56ef2efb0289f"}, - {file = "rapidfuzz-3.13.0-cp311-cp311-win_arm64.whl", hash = "sha256:f70f646751b6aa9d05be1fb40372f006cc89d6aad54e9d79ae97bd1f5fce5203"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a1a6a906ba62f2556372282b1ef37b26bca67e3d2ea957277cfcefc6275cca7"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2fd0975e015b05c79a97f38883a11236f5a24cca83aa992bd2558ceaa5652b26"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d4e13593d298c50c4f94ce453f757b4b398af3fa0fd2fde693c3e51195b7f69"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed6f416bda1c9133000009d84d9409823eb2358df0950231cc936e4bf784eb97"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1dc82b6ed01acb536b94a43996a94471a218f4d89f3fdd9185ab496de4b2a981"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9d824de871daa6e443b39ff495a884931970d567eb0dfa213d234337343835f"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d18228a2390375cf45726ce1af9d36ff3dc1f11dce9775eae1f1b13ac6ec50f"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9f5fe634c9482ec5d4a6692afb8c45d370ae86755e5f57aa6c50bfe4ca2bdd87"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:694eb531889f71022b2be86f625a4209c4049e74be9ca836919b9e395d5e33b3"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:11b47b40650e06147dee5e51a9c9ad73bb7b86968b6f7d30e503b9f8dd1292db"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:98b8107ff14f5af0243f27d236bcc6e1ef8e7e3b3c25df114e91e3a99572da73"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b836f486dba0aceb2551e838ff3f514a38ee72b015364f739e526d720fdb823a"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-win32.whl", hash = "sha256:4671ee300d1818d7bdfd8fa0608580d7778ba701817216f0c17fb29e6b972514"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:6e2065f68fb1d0bf65adc289c1bdc45ba7e464e406b319d67bb54441a1b9da9e"}, - {file = "rapidfuzz-3.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:65cc97c2fc2c2fe23586599686f3b1ceeedeca8e598cfcc1b7e56dc8ca7e2aa7"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:09e908064d3684c541d312bd4c7b05acb99a2c764f6231bd507d4b4b65226c23"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:57c390336cb50d5d3bfb0cfe1467478a15733703af61f6dffb14b1cd312a6fae"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0da54aa8547b3c2c188db3d1c7eb4d1bb6dd80baa8cdaeaec3d1da3346ec9caa"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df8e8c21e67afb9d7fbe18f42c6111fe155e801ab103c81109a61312927cc611"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:461fd13250a2adf8e90ca9a0e1e166515cbcaa5e9c3b1f37545cbbeff9e77f6b"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2b3dd5d206a12deca16870acc0d6e5036abeb70e3cad6549c294eff15591527"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1343d745fbf4688e412d8f398c6e6d6f269db99a54456873f232ba2e7aeb4939"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b1b065f370d54551dcc785c6f9eeb5bd517ae14c983d2784c064b3aa525896df"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:11b125d8edd67e767b2295eac6eb9afe0b1cdc82ea3d4b9257da4b8e06077798"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c33f9c841630b2bb7e69a3fb5c84a854075bb812c47620978bddc591f764da3d"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ae4574cb66cf1e85d32bb7e9ec45af5409c5b3970b7ceb8dea90168024127566"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e05752418b24bbd411841b256344c26f57da1148c5509e34ea39c7eb5099ab72"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-win32.whl", hash = "sha256:0e1d08cb884805a543f2de1f6744069495ef527e279e05370dd7c83416af83f8"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:9a7c6232be5f809cd39da30ee5d24e6cadd919831e6020ec6c2391f4c3bc9264"}, - {file = "rapidfuzz-3.13.0-cp313-cp313-win_arm64.whl", hash = "sha256:3f32f15bacd1838c929b35c84b43618481e1b3d7a61b5ed2db0291b70ae88b53"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cc64da907114d7a18b5e589057e3acaf2fec723d31c49e13fedf043592a3f6a7"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4d9d7f84c8e992a8dbe5a3fdbea73d733da39bf464e62c912ac3ceba9c0cff93"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a79a2f07786a2070669b4b8e45bd96a01c788e7a3c218f531f3947878e0f956"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9f338e71c45b69a482de8b11bf4a029993230760120c8c6e7c9b71760b6825a1"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:adb40ca8ddfcd4edd07b0713a860be32bdf632687f656963bcbce84cea04b8d8"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48719f7dcf62dfb181063b60ee2d0a39d327fa8ad81b05e3e510680c44e1c078"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9327a4577f65fc3fb712e79f78233815b8a1c94433d0c2c9f6bc5953018b3565"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:200030dfc0a1d5d6ac18e993c5097c870c97c41574e67f227300a1fb74457b1d"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:cc269e74cad6043cb8a46d0ce580031ab642b5930562c2bb79aa7fbf9c858d26"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:e62779c6371bd2b21dbd1fdce89eaec2d93fd98179d36f61130b489f62294a92"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f4797f821dc5d7c2b6fc818b89f8a3f37bcc900dd9e4369e6ebf1e525efce5db"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d21f188f6fe4fbf422e647ae9d5a68671d00218e187f91859c963d0738ccd88c"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-win32.whl", hash = "sha256:45dd4628dd9c21acc5c97627dad0bb791764feea81436fb6e0a06eef4c6dceaa"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-win_amd64.whl", hash = "sha256:624a108122039af89ddda1a2b7ab2a11abe60c1521956f142f5d11bcd42ef138"}, - {file = "rapidfuzz-3.13.0-cp39-cp39-win_arm64.whl", hash = "sha256:435071fd07a085ecbf4d28702a66fd2e676a03369ee497cc38bcb69a46bc77e2"}, - {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fe5790a36d33a5d0a6a1f802aa42ecae282bf29ac6f7506d8e12510847b82a45"}, - {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:cdb33ee9f8a8e4742c6b268fa6bd739024f34651a06b26913381b1413ebe7590"}, - {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c99b76b93f7b495eee7dcb0d6a38fb3ce91e72e99d9f78faa5664a881cb2b7d"}, - {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6af42f2ede8b596a6aaf6d49fdee3066ca578f4856b85ab5c1e2145de367a12d"}, - {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c0efa73afbc5b265aca0d8a467ae2a3f40d6854cbe1481cb442a62b7bf23c99"}, - {file = "rapidfuzz-3.13.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7ac21489de962a4e2fc1e8f0b0da4aa1adc6ab9512fd845563fecb4b4c52093a"}, - {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1ba007f4d35a45ee68656b2eb83b8715e11d0f90e5b9f02d615a8a321ff00c27"}, - {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d7a217310429b43be95b3b8ad7f8fc41aba341109dc91e978cd7c703f928c58f"}, - {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:558bf526bcd777de32b7885790a95a9548ffdcce68f704a81207be4a286c1095"}, - {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:202a87760f5145140d56153b193a797ae9338f7939eb16652dd7ff96f8faf64c"}, - {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfcccc08f671646ccb1e413c773bb92e7bba789e3a1796fd49d23c12539fe2e4"}, - {file = "rapidfuzz-3.13.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:1f219f1e3c3194d7a7de222f54450ce12bc907862ff9a8962d83061c1f923c86"}, - {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ccbd0e7ea1a216315f63ffdc7cd09c55f57851afc8fe59a74184cb7316c0598b"}, - {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a50856f49a4016ef56edd10caabdaf3608993f9faf1e05c3c7f4beeac46bd12a"}, - {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fd05336db4d0b8348d7eaaf6fa3c517b11a56abaa5e89470ce1714e73e4aca7"}, - {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:573ad267eb9b3f6e9b04febce5de55d8538a87c56c64bf8fd2599a48dc9d8b77"}, - {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30fd1451f87ccb6c2f9d18f6caa483116bbb57b5a55d04d3ddbd7b86f5b14998"}, - {file = "rapidfuzz-3.13.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a6dd36d4916cf57ddb05286ed40b09d034ca5d4bca85c17be0cb6a21290597d9"}, - {file = "rapidfuzz-3.13.0.tar.gz", hash = "sha256:d2eaf3839e52cbcc0accbe9817a67b4b0fcf70aaeb229cfddc1c28061f9ce5d8"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:91d8c7d9d38835d5fcf9bc87593add864eaea41eb33654d93ded3006b198a326"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5a1e574230262956d28e40191dd44ad3d81d2d29b5e716c6c7c0ba17c4d1524e"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1eda6546831f15e6d8d27593873129ae5e4d2f05cf13bacc2d5222e117f3038"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d29686b524b35f93fc14961026a8cfb37283af76ab6f4ed49aebf4df01b44a4a"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0fb99bc445014e893c152e36e98b3e9418cc2c0fa7b83d01f3d1b89e73618ed2"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0d9cd4212ca2ea18d026b3f3dfc1ec25919e75ddfd2c7dd20bf7797f262e2460"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:e6a41c6be1394b17b03bc3af3051f54ba0b4018324a0d4cb34c7d2344ec82e79"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:19bee793c4a84b0f5153fcff2e7cfeaeeb976497a5892baaadb6eadef7e6f398"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:977144b50b2f1864c825796ad2d41f47a3fd5b7632a2e9905c4d2c8883a8234d"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ca7c7274bec8085f7a2b68b0490d270a260385d45280d8a2a8ae5884cfb217ba"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:efa7eca15825c78dc2b9e9e5824fa095cef8954de98e5a6d2f4ad2416a3d5ddf"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a780c08c41e7ec4336d7a8fcdcd7920df74de6c57be87b72adad4e1b40a31632"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-win32.whl", hash = "sha256:cf540e48175c0620639aa4f4e2b56d61291935c0f684469e8e125e7fa4daef65"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:e7769fbc78aba051f514d8a08374e3989124b2d1eee6888c72706a174d0e8a6d"}, + {file = "rapidfuzz-3.14.0-cp310-cp310-win_arm64.whl", hash = "sha256:71442f5e9fad60a4942df3be340acd5315e59aefc5a83534b6a9aa62db67809d"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6501e49395ad5cecf1623cb4801639faa1c833dbacc07c26fa7b8f7fa19fd1c0"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c3cd9b8d5e159c67d242f80cae1b9d9b1502779fc69fcd268a1eb7053f58048"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a578cadbe61f738685ffa20e56e8346847e40ecb033bdc885373a070cfe4a351"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b5b46340872a1736544b23f3c355f292935311623a0e63a271f284ffdbab05e4"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:238422749da213c3dfe36397b746aeda8579682e93b723a1e77655182198e693"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:83f3ad0e7ad3cf1138e36be26f4cacb7580ac0132b26528a89e8168a0875afd8"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:7c34e34fb7e01aeea1e84192cf01daf1d56ccc8a0b34c0833f9799b341c6d539"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a58bbbbdd2a150c76c6b3af5ac2bbe9afcff26e6b17e1f60b6bd766cc7094fcf"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:d0e50b4bea57bfcda4afee993eef390fd8f0a64981c971ac4decd9452143892d"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:357eb9d394bfc742d3528e8bb13afa9baebc7fbe863071975426b47fc21db220"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:fb960ec526030077658764a309b60e907d86d898f8efbe959845ec2873e514eb"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6bedb19db81d8d723cc4d914cb079d89ff359364184cc3c3db7cef1fc7819444"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-win32.whl", hash = "sha256:8dba3d6e10a34aa255a6f6922cf249f8d0b9829e6b00854e371d803040044f7f"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:ce79e37b23c1cbf1dc557159c8f20f6d71e9d28aef63afcf87bcb58c8add096a"}, + {file = "rapidfuzz-3.14.0-cp311-cp311-win_arm64.whl", hash = "sha256:e140ff4b5d0ea386b998137ddd1335a7bd4201ef987d4cb5a48c3e8c174f8aec"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:93c8739f7bf7931d690aeb527c27e2a61fd578f076d542ddd37e29fa535546b6"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7596e95ab03da6cff70f4ec9a5298b2802e8bdd443159d18180b186c80df1416"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cdd49e097ced3746eadb5fb87379f377c0b093f9aba1133ae4f311b574e2ed8"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f4cd4898f21686bb141e151ba920bcd1744cab339277f484c0f97fe7de2c45c8"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:83427518ad72050add47e2cf581080bde81df7f69882e508da3e08faad166b1f"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05435b4f2472cbf7aac8b837e2e84a165e595c60d79da851da7cfa85ed15895d"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:2dae744c1cdb8b1411ed511a719b505a0348da1970a652bfc735598e68779287"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9ca05daaca07232037014fc6ce2c2ef0a05c69712f6a5e77da6da5209fb04d7c"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:2227f4b3742295f380adefef7b6338c30434f8a8e18a11895a1a7c9308b6635d"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:847ea42b5a6077bc796e1b99cd357a641207b20e3573917b0469b28b5a22238a"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:539506f13cf0dd6ef2f846571f8e116dba32a468e52d05a91161785ab7de2ed1"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:03c4b4d4f45f846e4eae052ee18d39d6afe659d74f6d99df5a0d2c5d53930505"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-win32.whl", hash = "sha256:aff0baa3980a8aeb2ce5e15930140146b5fe3fb2d63c8dc4cb08dfbd2051ceb2"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:d1eef7f0694fe4cf991f61adaa040955da1e0072c8c41d7db5eb60e83da9e61b"}, + {file = "rapidfuzz-3.14.0-cp312-cp312-win_arm64.whl", hash = "sha256:269d8d1fe5830eef46a165a5c6dd240a05ad44c281a77957461b79cede1ece0f"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5cf3828b8cbac02686e1d5c499c58e43c5f613ad936fe19a2d092e53f3308ccd"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:68c3931c19c51c11654cf75f663f34c0c7ea04c456c84ccebfd52b2047121dba"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9b4232168959af46f2c0770769e7986ff6084d97bc4b6b2b16b2bfa34164421b"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:174c784cecfafe22d783b5124ebffa2e02cc01e49ffe60a28ad86d217977f478"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0b2dedf216f43a50f227eee841ef0480e29e26b2ce2d7ee680b28354ede18627"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5698239eecf5b759630450ef59521ad3637e5bd4afc2b124ae8af2ff73309c41"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:0acc9553fc26f1c291c381a6aa8d3c5625be23b5721f139528af40cc4119ae1d"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:00141dfd3b8c9ae15fbb5fbd191a08bde63cdfb1f63095d8f5faf1698e30da93"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:67f725c3f5713da6e0750dc23f65f0f822c6937c25e3fc9ee797aa6783bef8c1"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:ba351cf2678d40a23fb4cbfe82cc45ea338a57518dca62a823c5b6381aa20c68"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:558323dcd5fb38737226be84c78cafbe427706e47379f02c57c3e35ac3745061"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cb4e4ea174add5183c707d890a816a85e9330f93e5ded139dab182adc727930c"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-win32.whl", hash = "sha256:ec379e1b407935d729c08da9641cfc5dfb2a7796f74cdd82158ce5986bb8ff88"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:4b59ba48a909bdf7ec5dad6e3a5a0004aeec141ae5ddb205d0c5bd4389894cf9"}, + {file = "rapidfuzz-3.14.0-cp313-cp313-win_arm64.whl", hash = "sha256:e688b0a98edea42da450fa6ba41736203ead652a78b558839916c10df855f545"}, + {file = "rapidfuzz-3.14.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:cb6c5a46444a2787e466acd77e162049f061304025ab24da02b59caedea66064"}, + {file = "rapidfuzz-3.14.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:99ed7a9e9ff798157caf3c3d96ca7da6560878902d8f70fa7731acc94e0d293c"}, + {file = "rapidfuzz-3.14.0-cp313-cp313t-win32.whl", hash = "sha256:c8e954dd59291ff0cd51b9c0f425e5dc84731bb006dbd5b7846746fe873a0452"}, + {file = "rapidfuzz-3.14.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5754e3ca259667c46a2b58ca7d7568251d6e23d2f0e354ac1cc5564557f4a32d"}, + {file = "rapidfuzz-3.14.0-cp313-cp313t-win_arm64.whl", hash = "sha256:558865f6825d27006e6ae2e1635cfe236d736c8f2c5c82db6db4b1b6df4478bc"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:3cc4bd8de6643258c5899f21414f9d45d7589d158eee8d438ea069ead624823b"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:081aac1acb4ab449f8ea7d4e5ea268227295503e1287f56f0b56c7fc3452da1e"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3e0209c6ef7f2c732e10ce4fccafcf7d9e79eb8660a81179aa307c7bd09fafcd"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6e4610997e9de08395e8632b605488a9efc859fe0516b6993b3925f3057f9da7"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efd0095cde6d0179c92c997ede4b85158bf3c7386043e2fadbee291018b29300"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0a141c07f9e97c45e67aeed677bac92c08f228c556a80750ea3e191e82d54034"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:5a9de40fa6be7809fd2579c8020b9edaf6f50ffc43082b14e95ad3928a254f22"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20f510dae17bad8f4909ab32b40617f964af55131e630de7ebc0ffa7f00fe634"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:79c3fd17a432c3f74de94782d7139f9a22e948cec31659a1a05d67b5c0f4290e"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:8cde9ffb86ea33d67cce9b26b513a177038be48ee2eb4d856cc60a75cb698db7"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:cafb657c8f2959761bca40c0da66f29d111e2c40d91f8ed4a75cc486c99b33ae"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4d80a9f673c534800d73f164ed59620e2ba820ed3840abb67c56022ad043564b"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-win32.whl", hash = "sha256:da9878a01357c7906fb16359b3622ce256933a3286058ee503358859e1442f68"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-win_amd64.whl", hash = "sha256:09af941076ef18f6c2b35acfd5004c60d03414414058e98ece6ca9096f454870"}, + {file = "rapidfuzz-3.14.0-cp314-cp314-win_arm64.whl", hash = "sha256:1a878eb065ce6061038dd1c0b9e8eb7477f7d05d5c5161a1d2a5fa630818f938"}, + {file = "rapidfuzz-3.14.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:33ce0326e6feb0d2207a7ca866a5aa6a2ac2361f1ca43ca32aca505268c18ec9"}, + {file = "rapidfuzz-3.14.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e8056d10e99dedf110e929fdff4de6272057115b28eeef4fb6f0d99fd73c026f"}, + {file = "rapidfuzz-3.14.0-cp314-cp314t-win32.whl", hash = "sha256:ddde238b7076e49c2c21a477ee4b67143e1beaf7a3185388fe0b852e64c6ef52"}, + {file = "rapidfuzz-3.14.0-cp314-cp314t-win_amd64.whl", hash = "sha256:ef24464be04a7da1adea741376ddd2b092e0de53c9b500fd3c2e38e071295c9e"}, + {file = "rapidfuzz-3.14.0-cp314-cp314t-win_arm64.whl", hash = "sha256:fd4a27654f51bed3518bc5bbf166627caf3ddd858b12485380685777421f8933"}, + {file = "rapidfuzz-3.14.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4c9a00ef2f684b1132aeb3c0737483dc8f85a725dbe792aee1d1c3cbcf329b34"}, + {file = "rapidfuzz-3.14.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:2e203d76b3dcd1b466ee196f7adb71009860906303db274ae20c7c5af62bc1a8"}, + {file = "rapidfuzz-3.14.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2b317a71fd938348d8dbbe2f559cda58a67fdcafdd3107afca7ab0fb654efa86"}, + {file = "rapidfuzz-3.14.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e5d610a2c5efdb2a3f9eaecac4ecd6d849efb2522efa36000e006179062056dc"}, + {file = "rapidfuzz-3.14.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:c053cad08ab872df4e201daacb66d7fd04b5b4c395baebb193b9910c63ed22ec"}, + {file = "rapidfuzz-3.14.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:7e52ac8a458b2f09291fa968b23192d6664c7568a43607de2a51a088d016152d"}, + {file = "rapidfuzz-3.14.0.tar.gz", hash = "sha256:672b6ba06150e53d7baf4e3d5f12ffe8c213d5088239a15b5ae586ab245ac8b2"}, ] [package.extras] diff --git a/uv.lock b/uv.lock index 30f0da326..694d772f4 100644 --- a/uv.lock +++ b/uv.lock @@ -5162,14 +5162,14 @@ wheels = [ [[package]] name = "prompt-toolkit" -version = "3.0.51" +version = "3.0.52" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "wcwidth" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bb/6e/9d084c929dfe9e3bfe0c6a47e31f78a25c54627d64a66e884a8bf5474f1c/prompt_toolkit-3.0.51.tar.gz", hash = "sha256:931a162e3b27fc90c86f1b48bb1fb2c528c2761475e57c9c06de13311c7b54ed", size = 428940, upload-time = "2025-04-15T09:18:47.731Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/96/06e01a7b38dce6fe1db213e061a4602dd6032a8a97ef6c1a862537732421/prompt_toolkit-3.0.52.tar.gz", hash = "sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855", size = 434198, upload-time = "2025-08-27T15:24:02.057Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ce/4f/5249960887b1fbe561d9ff265496d170b55a735b76724f10ef19f9e40716/prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07", size = 387810, upload-time = "2025-04-15T09:18:44.753Z" }, + { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431, upload-time = "2025-08-27T15:23:59.498Z" }, ] [[package]] @@ -6266,82 +6266,81 @@ wheels = [ [[package]] name = "rapidfuzz" -version = "3.13.0" +version = "3.14.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ed/f6/6895abc3a3d056b9698da3199b04c0e56226d530ae44a470edabf8b664f0/rapidfuzz-3.13.0.tar.gz", hash = "sha256:d2eaf3839e52cbcc0accbe9817a67b4b0fcf70aaeb229cfddc1c28061f9ce5d8", size = 57904226, upload-time = "2025-04-03T20:38:51.226Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d4/11/0de727b336f28e25101d923c9feeeb64adcf231607fe7e1b083795fa149a/rapidfuzz-3.14.0.tar.gz", hash = "sha256:672b6ba06150e53d7baf4e3d5f12ffe8c213d5088239a15b5ae586ab245ac8b2", size = 58073448, upload-time = "2025-08-27T13:41:31.541Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/de/27/ca10b3166024ae19a7e7c21f73c58dfd4b7fef7420e5497ee64ce6b73453/rapidfuzz-3.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:aafc42a1dc5e1beeba52cd83baa41372228d6d8266f6d803c16dbabbcc156255", size = 1998899, upload-time = "2025-04-03T20:35:08.764Z" }, - { url = "https://files.pythonhosted.org/packages/f0/38/c4c404b13af0315483a6909b3a29636e18e1359307fb74a333fdccb3730d/rapidfuzz-3.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:85c9a131a44a95f9cac2eb6e65531db014e09d89c4f18c7b1fa54979cb9ff1f3", size = 1449949, upload-time = "2025-04-03T20:35:11.26Z" }, - { url = "https://files.pythonhosted.org/packages/12/ae/15c71d68a6df6b8e24595421fdf5bcb305888318e870b7be8d935a9187ee/rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d7cec4242d30dd521ef91c0df872e14449d1dffc2a6990ede33943b0dae56c3", size = 1424199, upload-time = "2025-04-03T20:35:12.954Z" }, - { url = "https://files.pythonhosted.org/packages/dc/9a/765beb9e14d7b30d12e2d6019e8b93747a0bedbc1d0cce13184fa3825426/rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e297c09972698c95649e89121e3550cee761ca3640cd005e24aaa2619175464e", size = 5352400, upload-time = "2025-04-03T20:35:15.421Z" }, - { url = "https://files.pythonhosted.org/packages/e2/b8/49479fe6f06b06cd54d6345ed16de3d1ac659b57730bdbe897df1e059471/rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ef0f5f03f61b0e5a57b1df7beafd83df993fd5811a09871bad6038d08e526d0d", size = 1652465, upload-time = "2025-04-03T20:35:18.43Z" }, - { url = "https://files.pythonhosted.org/packages/6f/d8/08823d496b7dd142a7b5d2da04337df6673a14677cfdb72f2604c64ead69/rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d8cf5f7cd6e4d5eb272baf6a54e182b2c237548d048e2882258336533f3f02b7", size = 1616590, upload-time = "2025-04-03T20:35:20.482Z" }, - { url = "https://files.pythonhosted.org/packages/38/d4/5cfbc9a997e544f07f301c54d42aac9e0d28d457d543169e4ec859b8ce0d/rapidfuzz-3.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9256218ac8f1a957806ec2fb9a6ddfc6c32ea937c0429e88cf16362a20ed8602", size = 3086956, upload-time = "2025-04-03T20:35:22.756Z" }, - { url = "https://files.pythonhosted.org/packages/25/1e/06d8932a72fa9576095234a15785136407acf8f9a7dbc8136389a3429da1/rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e1bdd2e6d0c5f9706ef7595773a81ca2b40f3b33fd7f9840b726fb00c6c4eb2e", size = 2494220, upload-time = "2025-04-03T20:35:25.563Z" }, - { url = "https://files.pythonhosted.org/packages/03/16/5acf15df63119d5ca3d9a54b82807866ff403461811d077201ca351a40c3/rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5280be8fd7e2bee5822e254fe0a5763aa0ad57054b85a32a3d9970e9b09bbcbf", size = 7585481, upload-time = "2025-04-03T20:35:27.426Z" }, - { url = "https://files.pythonhosted.org/packages/e1/cf/ebade4009431ea8e715e59e882477a970834ddaacd1a670095705b86bd0d/rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fd742c03885db1fce798a1cd87a20f47f144ccf26d75d52feb6f2bae3d57af05", size = 2894842, upload-time = "2025-04-03T20:35:29.457Z" }, - { url = "https://files.pythonhosted.org/packages/a7/bd/0732632bd3f906bf613229ee1b7cbfba77515db714a0e307becfa8a970ae/rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:5435fcac94c9ecf0504bf88a8a60c55482c32e18e108d6079a0089c47f3f8cf6", size = 3438517, upload-time = "2025-04-03T20:35:31.381Z" }, - { url = "https://files.pythonhosted.org/packages/83/89/d3bd47ec9f4b0890f62aea143a1e35f78f3d8329b93d9495b4fa8a3cbfc3/rapidfuzz-3.13.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:93a755266856599be4ab6346273f192acde3102d7aa0735e2f48b456397a041f", size = 4412773, upload-time = "2025-04-03T20:35:33.425Z" }, - { url = "https://files.pythonhosted.org/packages/b3/57/1a152a07883e672fc117c7f553f5b933f6e43c431ac3fd0e8dae5008f481/rapidfuzz-3.13.0-cp310-cp310-win32.whl", hash = "sha256:3abe6a4e8eb4cfc4cda04dd650a2dc6d2934cbdeda5def7e6fd1c20f6e7d2a0b", size = 1842334, upload-time = "2025-04-03T20:35:35.648Z" }, - { url = "https://files.pythonhosted.org/packages/a7/68/7248addf95b6ca51fc9d955161072285da3059dd1472b0de773cff910963/rapidfuzz-3.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:e8ddb58961401da7d6f55f185512c0d6bd24f529a637078d41dd8ffa5a49c107", size = 1624392, upload-time = "2025-04-03T20:35:37.294Z" }, - { url = "https://files.pythonhosted.org/packages/68/23/f41c749f2c61ed1ed5575eaf9e73ef9406bfedbf20a3ffa438d15b5bf87e/rapidfuzz-3.13.0-cp310-cp310-win_arm64.whl", hash = "sha256:c523620d14ebd03a8d473c89e05fa1ae152821920c3ff78b839218ff69e19ca3", size = 865584, upload-time = "2025-04-03T20:35:39.005Z" }, - { url = "https://files.pythonhosted.org/packages/87/17/9be9eff5a3c7dfc831c2511262082c6786dca2ce21aa8194eef1cb71d67a/rapidfuzz-3.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d395a5cad0c09c7f096433e5fd4224d83b53298d53499945a9b0e5a971a84f3a", size = 1999453, upload-time = "2025-04-03T20:35:40.804Z" }, - { url = "https://files.pythonhosted.org/packages/75/67/62e57896ecbabe363f027d24cc769d55dd49019e576533ec10e492fcd8a2/rapidfuzz-3.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b7b3eda607a019169f7187328a8d1648fb9a90265087f6903d7ee3a8eee01805", size = 1450881, upload-time = "2025-04-03T20:35:42.734Z" }, - { url = "https://files.pythonhosted.org/packages/96/5c/691c5304857f3476a7b3df99e91efc32428cbe7d25d234e967cc08346c13/rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98e0bfa602e1942d542de077baf15d658bd9d5dcfe9b762aff791724c1c38b70", size = 1422990, upload-time = "2025-04-03T20:35:45.158Z" }, - { url = "https://files.pythonhosted.org/packages/46/81/7a7e78f977496ee2d613154b86b203d373376bcaae5de7bde92f3ad5a192/rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bef86df6d59667d9655905b02770a0c776d2853971c0773767d5ef8077acd624", size = 5342309, upload-time = "2025-04-03T20:35:46.952Z" }, - { url = "https://files.pythonhosted.org/packages/51/44/12fdd12a76b190fe94bf38d252bb28ddf0ab7a366b943e792803502901a2/rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fedd316c165beed6307bf754dee54d3faca2c47e1f3bcbd67595001dfa11e969", size = 1656881, upload-time = "2025-04-03T20:35:49.954Z" }, - { url = "https://files.pythonhosted.org/packages/27/ae/0d933e660c06fcfb087a0d2492f98322f9348a28b2cc3791a5dbadf6e6fb/rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5158da7f2ec02a930be13bac53bb5903527c073c90ee37804090614cab83c29e", size = 1608494, upload-time = "2025-04-03T20:35:51.646Z" }, - { url = "https://files.pythonhosted.org/packages/3d/2c/4b2f8aafdf9400e5599b6ed2f14bc26ca75f5a923571926ccbc998d4246a/rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b6f913ee4618ddb6d6f3e387b76e8ec2fc5efee313a128809fbd44e65c2bbb2", size = 3072160, upload-time = "2025-04-03T20:35:53.472Z" }, - { url = "https://files.pythonhosted.org/packages/60/7d/030d68d9a653c301114101c3003b31ce01cf2c3224034cd26105224cd249/rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d25fdbce6459ccbbbf23b4b044f56fbd1158b97ac50994eaae2a1c0baae78301", size = 2491549, upload-time = "2025-04-03T20:35:55.391Z" }, - { url = "https://files.pythonhosted.org/packages/8e/cd/7040ba538fc6a8ddc8816a05ecf46af9988b46c148ddd7f74fb0fb73d012/rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:25343ccc589a4579fbde832e6a1e27258bfdd7f2eb0f28cb836d6694ab8591fc", size = 7584142, upload-time = "2025-04-03T20:35:57.71Z" }, - { url = "https://files.pythonhosted.org/packages/c1/96/85f7536fbceb0aa92c04a1c37a3fc4fcd4e80649e9ed0fb585382df82edc/rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a9ad1f37894e3ffb76bbab76256e8a8b789657183870be11aa64e306bb5228fd", size = 2896234, upload-time = "2025-04-03T20:35:59.969Z" }, - { url = "https://files.pythonhosted.org/packages/55/fd/460e78438e7019f2462fe9d4ecc880577ba340df7974c8a4cfe8d8d029df/rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5dc71ef23845bb6b62d194c39a97bb30ff171389c9812d83030c1199f319098c", size = 3437420, upload-time = "2025-04-03T20:36:01.91Z" }, - { url = "https://files.pythonhosted.org/packages/cc/df/c3c308a106a0993befd140a414c5ea78789d201cf1dfffb8fd9749718d4f/rapidfuzz-3.13.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b7f4c65facdb94f44be759bbd9b6dda1fa54d0d6169cdf1a209a5ab97d311a75", size = 4410860, upload-time = "2025-04-03T20:36:04.352Z" }, - { url = "https://files.pythonhosted.org/packages/75/ee/9d4ece247f9b26936cdeaae600e494af587ce9bf8ddc47d88435f05cfd05/rapidfuzz-3.13.0-cp311-cp311-win32.whl", hash = "sha256:b5104b62711565e0ff6deab2a8f5dbf1fbe333c5155abe26d2cfd6f1849b6c87", size = 1843161, upload-time = "2025-04-03T20:36:06.802Z" }, - { url = "https://files.pythonhosted.org/packages/c9/5a/d00e1f63564050a20279015acb29ecaf41646adfacc6ce2e1e450f7f2633/rapidfuzz-3.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:9093cdeb926deb32a4887ebe6910f57fbcdbc9fbfa52252c10b56ef2efb0289f", size = 1629962, upload-time = "2025-04-03T20:36:09.133Z" }, - { url = "https://files.pythonhosted.org/packages/3b/74/0a3de18bc2576b794f41ccd07720b623e840fda219ab57091897f2320fdd/rapidfuzz-3.13.0-cp311-cp311-win_arm64.whl", hash = "sha256:f70f646751b6aa9d05be1fb40372f006cc89d6aad54e9d79ae97bd1f5fce5203", size = 866631, upload-time = "2025-04-03T20:36:11.022Z" }, - { url = "https://files.pythonhosted.org/packages/13/4b/a326f57a4efed8f5505b25102797a58e37ee11d94afd9d9422cb7c76117e/rapidfuzz-3.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a1a6a906ba62f2556372282b1ef37b26bca67e3d2ea957277cfcefc6275cca7", size = 1989501, upload-time = "2025-04-03T20:36:13.43Z" }, - { url = "https://files.pythonhosted.org/packages/b7/53/1f7eb7ee83a06c400089ec7cb841cbd581c2edd7a4b21eb2f31030b88daa/rapidfuzz-3.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2fd0975e015b05c79a97f38883a11236f5a24cca83aa992bd2558ceaa5652b26", size = 1445379, upload-time = "2025-04-03T20:36:16.439Z" }, - { url = "https://files.pythonhosted.org/packages/07/09/de8069a4599cc8e6d194e5fa1782c561151dea7d5e2741767137e2a8c1f0/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d4e13593d298c50c4f94ce453f757b4b398af3fa0fd2fde693c3e51195b7f69", size = 1405986, upload-time = "2025-04-03T20:36:18.447Z" }, - { url = "https://files.pythonhosted.org/packages/5d/77/d9a90b39c16eca20d70fec4ca377fbe9ea4c0d358c6e4736ab0e0e78aaf6/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed6f416bda1c9133000009d84d9409823eb2358df0950231cc936e4bf784eb97", size = 5310809, upload-time = "2025-04-03T20:36:20.324Z" }, - { url = "https://files.pythonhosted.org/packages/1e/7d/14da291b0d0f22262d19522afaf63bccf39fc027c981233fb2137a57b71f/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1dc82b6ed01acb536b94a43996a94471a218f4d89f3fdd9185ab496de4b2a981", size = 1629394, upload-time = "2025-04-03T20:36:22.256Z" }, - { url = "https://files.pythonhosted.org/packages/b7/e4/79ed7e4fa58f37c0f8b7c0a62361f7089b221fe85738ae2dbcfb815e985a/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9d824de871daa6e443b39ff495a884931970d567eb0dfa213d234337343835f", size = 1600544, upload-time = "2025-04-03T20:36:24.207Z" }, - { url = "https://files.pythonhosted.org/packages/4e/20/e62b4d13ba851b0f36370060025de50a264d625f6b4c32899085ed51f980/rapidfuzz-3.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d18228a2390375cf45726ce1af9d36ff3dc1f11dce9775eae1f1b13ac6ec50f", size = 3052796, upload-time = "2025-04-03T20:36:26.279Z" }, - { url = "https://files.pythonhosted.org/packages/cd/8d/55fdf4387dec10aa177fe3df8dbb0d5022224d95f48664a21d6b62a5299d/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9f5fe634c9482ec5d4a6692afb8c45d370ae86755e5f57aa6c50bfe4ca2bdd87", size = 2464016, upload-time = "2025-04-03T20:36:28.525Z" }, - { url = "https://files.pythonhosted.org/packages/9b/be/0872f6a56c0f473165d3b47d4170fa75263dc5f46985755aa9bf2bbcdea1/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:694eb531889f71022b2be86f625a4209c4049e74be9ca836919b9e395d5e33b3", size = 7556725, upload-time = "2025-04-03T20:36:30.629Z" }, - { url = "https://files.pythonhosted.org/packages/5d/f3/6c0750e484d885a14840c7a150926f425d524982aca989cdda0bb3bdfa57/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:11b47b40650e06147dee5e51a9c9ad73bb7b86968b6f7d30e503b9f8dd1292db", size = 2859052, upload-time = "2025-04-03T20:36:32.836Z" }, - { url = "https://files.pythonhosted.org/packages/6f/98/5a3a14701b5eb330f444f7883c9840b43fb29c575e292e09c90a270a6e07/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:98b8107ff14f5af0243f27d236bcc6e1ef8e7e3b3c25df114e91e3a99572da73", size = 3390219, upload-time = "2025-04-03T20:36:35.062Z" }, - { url = "https://files.pythonhosted.org/packages/e9/7d/f4642eaaeb474b19974332f2a58471803448be843033e5740965775760a5/rapidfuzz-3.13.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b836f486dba0aceb2551e838ff3f514a38ee72b015364f739e526d720fdb823a", size = 4377924, upload-time = "2025-04-03T20:36:37.363Z" }, - { url = "https://files.pythonhosted.org/packages/8e/83/fa33f61796731891c3e045d0cbca4436a5c436a170e7f04d42c2423652c3/rapidfuzz-3.13.0-cp312-cp312-win32.whl", hash = "sha256:4671ee300d1818d7bdfd8fa0608580d7778ba701817216f0c17fb29e6b972514", size = 1823915, upload-time = "2025-04-03T20:36:39.451Z" }, - { url = "https://files.pythonhosted.org/packages/03/25/5ee7ab6841ca668567d0897905eebc79c76f6297b73bf05957be887e9c74/rapidfuzz-3.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:6e2065f68fb1d0bf65adc289c1bdc45ba7e464e406b319d67bb54441a1b9da9e", size = 1616985, upload-time = "2025-04-03T20:36:41.631Z" }, - { url = "https://files.pythonhosted.org/packages/76/5e/3f0fb88db396cb692aefd631e4805854e02120a2382723b90dcae720bcc6/rapidfuzz-3.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:65cc97c2fc2c2fe23586599686f3b1ceeedeca8e598cfcc1b7e56dc8ca7e2aa7", size = 860116, upload-time = "2025-04-03T20:36:43.915Z" }, - { url = "https://files.pythonhosted.org/packages/0a/76/606e71e4227790750f1646f3c5c873e18d6cfeb6f9a77b2b8c4dec8f0f66/rapidfuzz-3.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:09e908064d3684c541d312bd4c7b05acb99a2c764f6231bd507d4b4b65226c23", size = 1982282, upload-time = "2025-04-03T20:36:46.149Z" }, - { url = "https://files.pythonhosted.org/packages/0a/f5/d0b48c6b902607a59fd5932a54e3518dae8223814db8349b0176e6e9444b/rapidfuzz-3.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:57c390336cb50d5d3bfb0cfe1467478a15733703af61f6dffb14b1cd312a6fae", size = 1439274, upload-time = "2025-04-03T20:36:48.323Z" }, - { url = "https://files.pythonhosted.org/packages/59/cf/c3ac8c80d8ced6c1f99b5d9674d397ce5d0e9d0939d788d67c010e19c65f/rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0da54aa8547b3c2c188db3d1c7eb4d1bb6dd80baa8cdaeaec3d1da3346ec9caa", size = 1399854, upload-time = "2025-04-03T20:36:50.294Z" }, - { url = "https://files.pythonhosted.org/packages/09/5d/ca8698e452b349c8313faf07bfa84e7d1c2d2edf7ccc67bcfc49bee1259a/rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df8e8c21e67afb9d7fbe18f42c6111fe155e801ab103c81109a61312927cc611", size = 5308962, upload-time = "2025-04-03T20:36:52.421Z" }, - { url = "https://files.pythonhosted.org/packages/66/0a/bebada332854e78e68f3d6c05226b23faca79d71362509dbcf7b002e33b7/rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:461fd13250a2adf8e90ca9a0e1e166515cbcaa5e9c3b1f37545cbbeff9e77f6b", size = 1625016, upload-time = "2025-04-03T20:36:54.639Z" }, - { url = "https://files.pythonhosted.org/packages/de/0c/9e58d4887b86d7121d1c519f7050d1be5eb189d8a8075f5417df6492b4f5/rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2b3dd5d206a12deca16870acc0d6e5036abeb70e3cad6549c294eff15591527", size = 1600414, upload-time = "2025-04-03T20:36:56.669Z" }, - { url = "https://files.pythonhosted.org/packages/9b/df/6096bc669c1311568840bdcbb5a893edc972d1c8d2b4b4325c21d54da5b1/rapidfuzz-3.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1343d745fbf4688e412d8f398c6e6d6f269db99a54456873f232ba2e7aeb4939", size = 3053179, upload-time = "2025-04-03T20:36:59.366Z" }, - { url = "https://files.pythonhosted.org/packages/f9/46/5179c583b75fce3e65a5cd79a3561bd19abd54518cb7c483a89b284bf2b9/rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b1b065f370d54551dcc785c6f9eeb5bd517ae14c983d2784c064b3aa525896df", size = 2456856, upload-time = "2025-04-03T20:37:01.708Z" }, - { url = "https://files.pythonhosted.org/packages/6b/64/e9804212e3286d027ac35bbb66603c9456c2bce23f823b67d2f5cabc05c1/rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:11b125d8edd67e767b2295eac6eb9afe0b1cdc82ea3d4b9257da4b8e06077798", size = 7567107, upload-time = "2025-04-03T20:37:04.521Z" }, - { url = "https://files.pythonhosted.org/packages/8a/f2/7d69e7bf4daec62769b11757ffc31f69afb3ce248947aadbb109fefd9f65/rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c33f9c841630b2bb7e69a3fb5c84a854075bb812c47620978bddc591f764da3d", size = 2854192, upload-time = "2025-04-03T20:37:06.905Z" }, - { url = "https://files.pythonhosted.org/packages/05/21/ab4ad7d7d0f653e6fe2e4ccf11d0245092bef94cdff587a21e534e57bda8/rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ae4574cb66cf1e85d32bb7e9ec45af5409c5b3970b7ceb8dea90168024127566", size = 3398876, upload-time = "2025-04-03T20:37:09.692Z" }, - { url = "https://files.pythonhosted.org/packages/0f/a8/45bba94c2489cb1ee0130dcb46e1df4fa2c2b25269e21ffd15240a80322b/rapidfuzz-3.13.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e05752418b24bbd411841b256344c26f57da1148c5509e34ea39c7eb5099ab72", size = 4377077, upload-time = "2025-04-03T20:37:11.929Z" }, - { url = "https://files.pythonhosted.org/packages/0c/f3/5e0c6ae452cbb74e5436d3445467447e8c32f3021f48f93f15934b8cffc2/rapidfuzz-3.13.0-cp313-cp313-win32.whl", hash = "sha256:0e1d08cb884805a543f2de1f6744069495ef527e279e05370dd7c83416af83f8", size = 1822066, upload-time = "2025-04-03T20:37:14.425Z" }, - { url = "https://files.pythonhosted.org/packages/96/e3/a98c25c4f74051df4dcf2f393176b8663bfd93c7afc6692c84e96de147a2/rapidfuzz-3.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:9a7c6232be5f809cd39da30ee5d24e6cadd919831e6020ec6c2391f4c3bc9264", size = 1615100, upload-time = "2025-04-03T20:37:16.611Z" }, - { url = "https://files.pythonhosted.org/packages/60/b1/05cd5e697c00cd46d7791915f571b38c8531f714832eff2c5e34537c49ee/rapidfuzz-3.13.0-cp313-cp313-win_arm64.whl", hash = "sha256:3f32f15bacd1838c929b35c84b43618481e1b3d7a61b5ed2db0291b70ae88b53", size = 858976, upload-time = "2025-04-03T20:37:19.336Z" }, - { url = "https://files.pythonhosted.org/packages/d5/e1/f5d85ae3c53df6f817ca70dbdd37c83f31e64caced5bb867bec6b43d1fdf/rapidfuzz-3.13.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fe5790a36d33a5d0a6a1f802aa42ecae282bf29ac6f7506d8e12510847b82a45", size = 1904437, upload-time = "2025-04-03T20:38:00.255Z" }, - { url = "https://files.pythonhosted.org/packages/db/d7/ded50603dddc5eb182b7ce547a523ab67b3bf42b89736f93a230a398a445/rapidfuzz-3.13.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:cdb33ee9f8a8e4742c6b268fa6bd739024f34651a06b26913381b1413ebe7590", size = 1383126, upload-time = "2025-04-03T20:38:02.676Z" }, - { url = "https://files.pythonhosted.org/packages/c4/48/6f795e793babb0120b63a165496d64f989b9438efbeed3357d9a226ce575/rapidfuzz-3.13.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c99b76b93f7b495eee7dcb0d6a38fb3ce91e72e99d9f78faa5664a881cb2b7d", size = 1365565, upload-time = "2025-04-03T20:38:06.646Z" }, - { url = "https://files.pythonhosted.org/packages/f0/50/0062a959a2d72ed17815824e40e2eefdb26f6c51d627389514510a7875f3/rapidfuzz-3.13.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6af42f2ede8b596a6aaf6d49fdee3066ca578f4856b85ab5c1e2145de367a12d", size = 5251719, upload-time = "2025-04-03T20:38:09.191Z" }, - { url = "https://files.pythonhosted.org/packages/e7/02/bd8b70cd98b7a88e1621264778ac830c9daa7745cd63e838bd773b1aeebd/rapidfuzz-3.13.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c0efa73afbc5b265aca0d8a467ae2a3f40d6854cbe1481cb442a62b7bf23c99", size = 2991095, upload-time = "2025-04-03T20:38:12.554Z" }, - { url = "https://files.pythonhosted.org/packages/9f/8d/632d895cdae8356826184864d74a5f487d40cb79f50a9137510524a1ba86/rapidfuzz-3.13.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7ac21489de962a4e2fc1e8f0b0da4aa1adc6ab9512fd845563fecb4b4c52093a", size = 1553888, upload-time = "2025-04-03T20:38:15.357Z" }, - { url = "https://files.pythonhosted.org/packages/88/df/6060c5a9c879b302bd47a73fc012d0db37abf6544c57591bcbc3459673bd/rapidfuzz-3.13.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1ba007f4d35a45ee68656b2eb83b8715e11d0f90e5b9f02d615a8a321ff00c27", size = 1905935, upload-time = "2025-04-03T20:38:18.07Z" }, - { url = "https://files.pythonhosted.org/packages/a2/6c/a0b819b829e20525ef1bd58fc776fb8d07a0c38d819e63ba2b7c311a2ed4/rapidfuzz-3.13.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d7a217310429b43be95b3b8ad7f8fc41aba341109dc91e978cd7c703f928c58f", size = 1383714, upload-time = "2025-04-03T20:38:20.628Z" }, - { url = "https://files.pythonhosted.org/packages/6a/c1/3da3466cc8a9bfb9cd345ad221fac311143b6a9664b5af4adb95b5e6ce01/rapidfuzz-3.13.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:558bf526bcd777de32b7885790a95a9548ffdcce68f704a81207be4a286c1095", size = 1367329, upload-time = "2025-04-03T20:38:23.01Z" }, - { url = "https://files.pythonhosted.org/packages/da/f0/9f2a9043bfc4e66da256b15d728c5fc2d865edf0028824337f5edac36783/rapidfuzz-3.13.0-pp311-pypy311_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:202a87760f5145140d56153b193a797ae9338f7939eb16652dd7ff96f8faf64c", size = 5251057, upload-time = "2025-04-03T20:38:25.52Z" }, - { url = "https://files.pythonhosted.org/packages/6a/ff/af2cb1d8acf9777d52487af5c6b34ce9d13381a753f991d95ecaca813407/rapidfuzz-3.13.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfcccc08f671646ccb1e413c773bb92e7bba789e3a1796fd49d23c12539fe2e4", size = 2992401, upload-time = "2025-04-03T20:38:28.196Z" }, - { url = "https://files.pythonhosted.org/packages/c1/c5/c243b05a15a27b946180db0d1e4c999bef3f4221505dff9748f1f6c917be/rapidfuzz-3.13.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:1f219f1e3c3194d7a7de222f54450ce12bc907862ff9a8962d83061c1f923c86", size = 1553782, upload-time = "2025-04-03T20:38:30.778Z" }, + { url = "https://files.pythonhosted.org/packages/da/11/3b7fffe4abf37907f7cd675d0e0e9b319fc8016d02b3f8af2a6d42f0c408/rapidfuzz-3.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:91d8c7d9d38835d5fcf9bc87593add864eaea41eb33654d93ded3006b198a326", size = 2001447, upload-time = "2025-08-27T13:38:36.322Z" }, + { url = "https://files.pythonhosted.org/packages/8b/00/def426992bba23ba58fbc11d3e3f6325f5e988d189ffec9ee14f15fbbb56/rapidfuzz-3.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5a1e574230262956d28e40191dd44ad3d81d2d29b5e716c6c7c0ba17c4d1524e", size = 1448465, upload-time = "2025-08-27T13:38:38.31Z" }, + { url = "https://files.pythonhosted.org/packages/34/af/e61ffb1960a2c2888e31a5a331eea36acc3671c1e6d5ae6f2c0d26aa09bf/rapidfuzz-3.14.0-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1eda6546831f15e6d8d27593873129ae5e4d2f05cf13bacc2d5222e117f3038", size = 1471970, upload-time = "2025-08-27T13:38:40.074Z" }, + { url = "https://files.pythonhosted.org/packages/86/1d/55f8d1fca4ba201c4451435fc32c2ca24e9cf4ef501bf73eedd116a7b48a/rapidfuzz-3.14.0-cp310-cp310-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d29686b524b35f93fc14961026a8cfb37283af76ab6f4ed49aebf4df01b44a4a", size = 1787116, upload-time = "2025-08-27T13:38:41.432Z" }, + { url = "https://files.pythonhosted.org/packages/06/20/8234c1e7232cf5e38df33064306a318e50400f811b44fa8c2ab5fdb72ea0/rapidfuzz-3.14.0-cp310-cp310-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0fb99bc445014e893c152e36e98b3e9418cc2c0fa7b83d01f3d1b89e73618ed2", size = 2344061, upload-time = "2025-08-27T13:38:42.824Z" }, + { url = "https://files.pythonhosted.org/packages/e4/4b/b891cd701374955df3a2dc26e953d051d3e49962c6445be5ed3b8d793343/rapidfuzz-3.14.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0d9cd4212ca2ea18d026b3f3dfc1ec25919e75ddfd2c7dd20bf7797f262e2460", size = 3299404, upload-time = "2025-08-27T13:38:44.768Z" }, + { url = "https://files.pythonhosted.org/packages/d6/8a/1853d52ff05fb02d43d70e31e786a6d56d739a670f8e1999ec3980f5a94b/rapidfuzz-3.14.0-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:e6a41c6be1394b17b03bc3af3051f54ba0b4018324a0d4cb34c7d2344ec82e79", size = 1310003, upload-time = "2025-08-27T13:38:46.197Z" }, + { url = "https://files.pythonhosted.org/packages/6e/59/50e489bcee5d1efe23168534f664f0b42e2196ec62a726af142858b3290f/rapidfuzz-3.14.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:19bee793c4a84b0f5153fcff2e7cfeaeeb976497a5892baaadb6eadef7e6f398", size = 2493703, upload-time = "2025-08-27T13:38:48.073Z" }, + { url = "https://files.pythonhosted.org/packages/d7/18/9d1a39e2b2f405baab88f61db8bcd405251f726d60b749da471a6b10dc6d/rapidfuzz-3.14.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:977144b50b2f1864c825796ad2d41f47a3fd5b7632a2e9905c4d2c8883a8234d", size = 2617527, upload-time = "2025-08-27T13:38:49.64Z" }, + { url = "https://files.pythonhosted.org/packages/33/b2/79095caca38f823ef885848eb827359a9e6c588022bb882caf17cb8d6c16/rapidfuzz-3.14.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ca7c7274bec8085f7a2b68b0490d270a260385d45280d8a2a8ae5884cfb217ba", size = 2904388, upload-time = "2025-08-27T13:38:51.424Z" }, + { url = "https://files.pythonhosted.org/packages/1d/bf/38bd80d1042646e466c7e2ba760b59cf7268275b03328224efa77235be8a/rapidfuzz-3.14.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:efa7eca15825c78dc2b9e9e5824fa095cef8954de98e5a6d2f4ad2416a3d5ddf", size = 3424872, upload-time = "2025-08-27T13:38:53.049Z" }, + { url = "https://files.pythonhosted.org/packages/c9/81/e67ad350489ca935cd375f1973a2a67956541f1c19ac287c3779887f7ef3/rapidfuzz-3.14.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a780c08c41e7ec4336d7a8fcdcd7920df74de6c57be87b72adad4e1b40a31632", size = 4415393, upload-time = "2025-08-27T13:38:55.831Z" }, + { url = "https://files.pythonhosted.org/packages/39/11/4d7b72ee18b8428cb097107e1f2ce3baeaf944d2d3b48de15d5149361941/rapidfuzz-3.14.0-cp310-cp310-win32.whl", hash = "sha256:cf540e48175c0620639aa4f4e2b56d61291935c0f684469e8e125e7fa4daef65", size = 1840100, upload-time = "2025-08-27T13:38:57.385Z" }, + { url = "https://files.pythonhosted.org/packages/f3/87/3ffe0a293301a8a398f885a0cb90e1fed863e9ce3ed9367ff707e9e6a037/rapidfuzz-3.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:e7769fbc78aba051f514d8a08374e3989124b2d1eee6888c72706a174d0e8a6d", size = 1659381, upload-time = "2025-08-27T13:38:59.439Z" }, + { url = "https://files.pythonhosted.org/packages/e2/44/4f2ff0e36ffcb48597c14671680274151cc9268a1ff0d059f9d3f794f0be/rapidfuzz-3.14.0-cp310-cp310-win_arm64.whl", hash = "sha256:71442f5e9fad60a4942df3be340acd5315e59aefc5a83534b6a9aa62db67809d", size = 875041, upload-time = "2025-08-27T13:39:00.901Z" }, + { url = "https://files.pythonhosted.org/packages/52/66/6b4aa4c63d9b22a9851a83f3ed4b52e127a1f655f80ecc4894f807a82566/rapidfuzz-3.14.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6501e49395ad5cecf1623cb4801639faa1c833dbacc07c26fa7b8f7fa19fd1c0", size = 2011991, upload-time = "2025-08-27T13:39:02.27Z" }, + { url = "https://files.pythonhosted.org/packages/ae/b8/a79e997baf4f4467c8428feece5d7b9ac22ff0918ebf793ed247ba5a3f3a/rapidfuzz-3.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c3cd9b8d5e159c67d242f80cae1b9d9b1502779fc69fcd268a1eb7053f58048", size = 1458900, upload-time = "2025-08-27T13:39:03.777Z" }, + { url = "https://files.pythonhosted.org/packages/b5/82/6ca7ebc66d0dd1330e92d08a37412c705d7366216bddd46ca6afcabaa6a0/rapidfuzz-3.14.0-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a578cadbe61f738685ffa20e56e8346847e40ecb033bdc885373a070cfe4a351", size = 1484735, upload-time = "2025-08-27T13:39:05.502Z" }, + { url = "https://files.pythonhosted.org/packages/a8/5d/26eb60bc8eea194a03b32fdd9a4f5866fa9859dcaedf8da1f256dc9a47fc/rapidfuzz-3.14.0-cp311-cp311-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b5b46340872a1736544b23f3c355f292935311623a0e63a271f284ffdbab05e4", size = 1806075, upload-time = "2025-08-27T13:39:07.109Z" }, + { url = "https://files.pythonhosted.org/packages/3a/9c/12f2af41750ae4f30c06d5de1e0f3c4a5f55cbea9dabf3940a096cd8580a/rapidfuzz-3.14.0-cp311-cp311-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:238422749da213c3dfe36397b746aeda8579682e93b723a1e77655182198e693", size = 2358269, upload-time = "2025-08-27T13:39:08.796Z" }, + { url = "https://files.pythonhosted.org/packages/e2/3b/3c1839d51d1dfa768c8274025a36eedc177ed5b43a9d12cc7d91201eca03/rapidfuzz-3.14.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:83f3ad0e7ad3cf1138e36be26f4cacb7580ac0132b26528a89e8168a0875afd8", size = 3313513, upload-time = "2025-08-27T13:39:10.44Z" }, + { url = "https://files.pythonhosted.org/packages/e7/47/ed1384c7c8c39dc36de202860373085ee9c43493d6e9d7bab654d2099da0/rapidfuzz-3.14.0-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:7c34e34fb7e01aeea1e84192cf01daf1d56ccc8a0b34c0833f9799b341c6d539", size = 1320968, upload-time = "2025-08-27T13:39:12.024Z" }, + { url = "https://files.pythonhosted.org/packages/16/0b/3d7458160b5dfe230b05cf8bf62505bf4e2c6d73782dd37248149b43e130/rapidfuzz-3.14.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a58bbbbdd2a150c76c6b3af5ac2bbe9afcff26e6b17e1f60b6bd766cc7094fcf", size = 2507138, upload-time = "2025-08-27T13:39:13.584Z" }, + { url = "https://files.pythonhosted.org/packages/e7/e5/8df797e4f3df2cc308092c5437dda570aa75ea5e5cc3dc1180165fce2332/rapidfuzz-3.14.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:d0e50b4bea57bfcda4afee993eef390fd8f0a64981c971ac4decd9452143892d", size = 2629575, upload-time = "2025-08-27T13:39:15.624Z" }, + { url = "https://files.pythonhosted.org/packages/89/f9/e87e94cd6fc22e19a21b44030161b9e9680b5127bcea97aba05be506b66f/rapidfuzz-3.14.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:357eb9d394bfc742d3528e8bb13afa9baebc7fbe863071975426b47fc21db220", size = 2919216, upload-time = "2025-08-27T13:39:17.313Z" }, + { url = "https://files.pythonhosted.org/packages/b5/6e/f20154e8cb7a7c9938241aff7ba0477521bee1f57a57c78706664390a558/rapidfuzz-3.14.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:fb960ec526030077658764a309b60e907d86d898f8efbe959845ec2873e514eb", size = 3435208, upload-time = "2025-08-27T13:39:18.942Z" }, + { url = "https://files.pythonhosted.org/packages/43/43/c2d0e17f75ded0f36ee264fc719f67de3610628d983769179e9d8a44c7db/rapidfuzz-3.14.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6bedb19db81d8d723cc4d914cb079d89ff359364184cc3c3db7cef1fc7819444", size = 4428371, upload-time = "2025-08-27T13:39:20.628Z" }, + { url = "https://files.pythonhosted.org/packages/a6/d7/41f645ad06494a94bafb1be8871585d5723a1f93b34929022014f8f03fef/rapidfuzz-3.14.0-cp311-cp311-win32.whl", hash = "sha256:8dba3d6e10a34aa255a6f6922cf249f8d0b9829e6b00854e371d803040044f7f", size = 1839290, upload-time = "2025-08-27T13:39:22.396Z" }, + { url = "https://files.pythonhosted.org/packages/f3/96/c783107296403cf50acde118596b07aa1af4b0287ac4600b38b0673b1fd7/rapidfuzz-3.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:ce79e37b23c1cbf1dc557159c8f20f6d71e9d28aef63afcf87bcb58c8add096a", size = 1661571, upload-time = "2025-08-27T13:39:24.03Z" }, + { url = "https://files.pythonhosted.org/packages/00/9e/8c562c5d78e31085a07ff1332329711030dd2c25b84c02fb10dcf9be1f64/rapidfuzz-3.14.0-cp311-cp311-win_arm64.whl", hash = "sha256:e140ff4b5d0ea386b998137ddd1335a7bd4201ef987d4cb5a48c3e8c174f8aec", size = 875433, upload-time = "2025-08-27T13:39:26.25Z" }, + { url = "https://files.pythonhosted.org/packages/fa/ca/80c1d697fe42d0caea8d08b0f323b2a4c65a9d057d4d33fe139fd0f1b7d0/rapidfuzz-3.14.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:93c8739f7bf7931d690aeb527c27e2a61fd578f076d542ddd37e29fa535546b6", size = 2000791, upload-time = "2025-08-27T13:39:28.375Z" }, + { url = "https://files.pythonhosted.org/packages/01/01/e980b8d2e85efb4ff1fca26c590d645186a70e51abd4323f29582d41ba9b/rapidfuzz-3.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7596e95ab03da6cff70f4ec9a5298b2802e8bdd443159d18180b186c80df1416", size = 1455837, upload-time = "2025-08-27T13:39:29.987Z" }, + { url = "https://files.pythonhosted.org/packages/03/35/3433345c659a4c6cf93b66963ef5ec2d5088d230cbca9f035a3e30d13e70/rapidfuzz-3.14.0-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cdd49e097ced3746eadb5fb87379f377c0b093f9aba1133ae4f311b574e2ed8", size = 1457107, upload-time = "2025-08-27T13:39:31.991Z" }, + { url = "https://files.pythonhosted.org/packages/2b/27/ac98741cd2696330feb462a37cc9b945cb333a1b39f90216fe1af0568cd6/rapidfuzz-3.14.0-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f4cd4898f21686bb141e151ba920bcd1744cab339277f484c0f97fe7de2c45c8", size = 1767664, upload-time = "2025-08-27T13:39:33.604Z" }, + { url = "https://files.pythonhosted.org/packages/db/1c/1495395016c05fc5d6d0d2622c4854eab160812c4dbc60f5e076116921cf/rapidfuzz-3.14.0-cp312-cp312-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:83427518ad72050add47e2cf581080bde81df7f69882e508da3e08faad166b1f", size = 2329980, upload-time = "2025-08-27T13:39:35.204Z" }, + { url = "https://files.pythonhosted.org/packages/9c/e6/587fe4d88eab2a4ea8660744bfebfd0a0d100e7d26fd3fde5062f02ccf84/rapidfuzz-3.14.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05435b4f2472cbf7aac8b837e2e84a165e595c60d79da851da7cfa85ed15895d", size = 3271666, upload-time = "2025-08-27T13:39:36.973Z" }, + { url = "https://files.pythonhosted.org/packages/b4/8e/9928afd7a4727c173de615a4b26e70814ccd9407d87c3c233a01a1b4fc9c/rapidfuzz-3.14.0-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:2dae744c1cdb8b1411ed511a719b505a0348da1970a652bfc735598e68779287", size = 1307744, upload-time = "2025-08-27T13:39:38.825Z" }, + { url = "https://files.pythonhosted.org/packages/e5/5c/03d95b1dc5916e43f505d8bd8da37788b972ccabf14bf3ee0e143b7151d4/rapidfuzz-3.14.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9ca05daaca07232037014fc6ce2c2ef0a05c69712f6a5e77da6da5209fb04d7c", size = 2477512, upload-time = "2025-08-27T13:39:40.881Z" }, + { url = "https://files.pythonhosted.org/packages/96/30/a1da6a124e10fd201a75e68ebf0bdedcf47a3878910c2e05deebf08e9e40/rapidfuzz-3.14.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:2227f4b3742295f380adefef7b6338c30434f8a8e18a11895a1a7c9308b6635d", size = 2613793, upload-time = "2025-08-27T13:39:42.62Z" }, + { url = "https://files.pythonhosted.org/packages/76/56/4776943e4b4130e58ebaf2dbea3ce9f4cb3c6c6a5640dcacb0e84e926190/rapidfuzz-3.14.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:847ea42b5a6077bc796e1b99cd357a641207b20e3573917b0469b28b5a22238a", size = 2880096, upload-time = "2025-08-27T13:39:44.394Z" }, + { url = "https://files.pythonhosted.org/packages/60/cc/25d7faa947d159935cfb0cfc270620f250f033338055702d7e8cc1885e00/rapidfuzz-3.14.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:539506f13cf0dd6ef2f846571f8e116dba32a468e52d05a91161785ab7de2ed1", size = 3413927, upload-time = "2025-08-27T13:39:46.142Z" }, + { url = "https://files.pythonhosted.org/packages/2c/39/3090aeb1ca57a71715f5590a890e45097dbc4862f2c0a5a756e022d0f006/rapidfuzz-3.14.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:03c4b4d4f45f846e4eae052ee18d39d6afe659d74f6d99df5a0d2c5d53930505", size = 4387126, upload-time = "2025-08-27T13:39:48.217Z" }, + { url = "https://files.pythonhosted.org/packages/d8/9b/1dd7bd2824ac7c7daeb6b79c5cf7504c5d2a31b564649457061cc3f8ce9a/rapidfuzz-3.14.0-cp312-cp312-win32.whl", hash = "sha256:aff0baa3980a8aeb2ce5e15930140146b5fe3fb2d63c8dc4cb08dfbd2051ceb2", size = 1804449, upload-time = "2025-08-27T13:39:49.971Z" }, + { url = "https://files.pythonhosted.org/packages/31/32/43074dade26b9a82c5d05262b9179b25ec5d665f18c54f66b64b00791fb4/rapidfuzz-3.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:d1eef7f0694fe4cf991f61adaa040955da1e0072c8c41d7db5eb60e83da9e61b", size = 1656931, upload-time = "2025-08-27T13:39:52.195Z" }, + { url = "https://files.pythonhosted.org/packages/ce/82/c78f0ab282acefab5a55cbbc7741165cad787fce7fbeb0bb5b3903d06749/rapidfuzz-3.14.0-cp312-cp312-win_arm64.whl", hash = "sha256:269d8d1fe5830eef46a165a5c6dd240a05ad44c281a77957461b79cede1ece0f", size = 878656, upload-time = "2025-08-27T13:39:53.816Z" }, + { url = "https://files.pythonhosted.org/packages/04/b1/e6875e32209b28a581d3b8ec1ffded8f674de4a27f4540ec312d0ecf4b83/rapidfuzz-3.14.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5cf3828b8cbac02686e1d5c499c58e43c5f613ad936fe19a2d092e53f3308ccd", size = 2015663, upload-time = "2025-08-27T13:39:55.815Z" }, + { url = "https://files.pythonhosted.org/packages/f1/c7/702472c4f3c4e5f9985bb5143405a5c4aadf3b439193f4174944880c50a3/rapidfuzz-3.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:68c3931c19c51c11654cf75f663f34c0c7ea04c456c84ccebfd52b2047121dba", size = 1472180, upload-time = "2025-08-27T13:39:57.663Z" }, + { url = "https://files.pythonhosted.org/packages/49/e1/c22fc941b8e506db9a6f051298e17edbae76e1be63e258e51f13791d5eb2/rapidfuzz-3.14.0-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9b4232168959af46f2c0770769e7986ff6084d97bc4b6b2b16b2bfa34164421b", size = 1461676, upload-time = "2025-08-27T13:39:59.409Z" }, + { url = "https://files.pythonhosted.org/packages/97/4c/9dd58e4b4d2b1b7497c35c5280b4fa064bd6e6e3ed5fcf67513faaa2d4f4/rapidfuzz-3.14.0-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:174c784cecfafe22d783b5124ebffa2e02cc01e49ffe60a28ad86d217977f478", size = 1774563, upload-time = "2025-08-27T13:40:01.284Z" }, + { url = "https://files.pythonhosted.org/packages/96/8f/89a39ab5fbd971e6a25431edbbf66e255d271a0b67aadc340b8e8bf573e7/rapidfuzz-3.14.0-cp313-cp313-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0b2dedf216f43a50f227eee841ef0480e29e26b2ce2d7ee680b28354ede18627", size = 2332659, upload-time = "2025-08-27T13:40:03.04Z" }, + { url = "https://files.pythonhosted.org/packages/34/b0/f30f9bae81a472182787641c9c2430da79431c260f7620899a105ee959d0/rapidfuzz-3.14.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5698239eecf5b759630450ef59521ad3637e5bd4afc2b124ae8af2ff73309c41", size = 3289626, upload-time = "2025-08-27T13:40:04.77Z" }, + { url = "https://files.pythonhosted.org/packages/d2/b9/c9eb0bfb62972123a23b31811d4d345e8dd46cb3083d131dd3c1c97b70af/rapidfuzz-3.14.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:0acc9553fc26f1c291c381a6aa8d3c5625be23b5721f139528af40cc4119ae1d", size = 1324164, upload-time = "2025-08-27T13:40:06.642Z" }, + { url = "https://files.pythonhosted.org/packages/7f/a1/91bf79a76626bd0dae694ad9c57afdad2ca275f9808f69e570be39a99e71/rapidfuzz-3.14.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:00141dfd3b8c9ae15fbb5fbd191a08bde63cdfb1f63095d8f5faf1698e30da93", size = 2480695, upload-time = "2025-08-27T13:40:08.459Z" }, + { url = "https://files.pythonhosted.org/packages/2f/6a/bfab3575842d8ccc406c3fa8c618b476363e4218a0d01394543c741ef1bd/rapidfuzz-3.14.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:67f725c3f5713da6e0750dc23f65f0f822c6937c25e3fc9ee797aa6783bef8c1", size = 2628236, upload-time = "2025-08-27T13:40:10.27Z" }, + { url = "https://files.pythonhosted.org/packages/5d/10/e7e99ca1a6546645aa21d1b426f728edbfb7a3abcb1a7b7642353b79ae57/rapidfuzz-3.14.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:ba351cf2678d40a23fb4cbfe82cc45ea338a57518dca62a823c5b6381aa20c68", size = 2893483, upload-time = "2025-08-27T13:40:12.079Z" }, + { url = "https://files.pythonhosted.org/packages/00/11/fb46a86659e2bb304764478a28810f36bb56f794087f34a5bd1b81dd0be5/rapidfuzz-3.14.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:558323dcd5fb38737226be84c78cafbe427706e47379f02c57c3e35ac3745061", size = 3411761, upload-time = "2025-08-27T13:40:14.051Z" }, + { url = "https://files.pythonhosted.org/packages/fc/76/89eabf1e7523f6dc996ea6b2bfcfd22565cdfa830c7c3af0ebc5b17e9ce7/rapidfuzz-3.14.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cb4e4ea174add5183c707d890a816a85e9330f93e5ded139dab182adc727930c", size = 4404126, upload-time = "2025-08-27T13:40:16.39Z" }, + { url = "https://files.pythonhosted.org/packages/c8/6c/ddc7ee86d392908efdf95a1242b87b94523f6feaa368b7a24efa39ecd9d9/rapidfuzz-3.14.0-cp313-cp313-win32.whl", hash = "sha256:ec379e1b407935d729c08da9641cfc5dfb2a7796f74cdd82158ce5986bb8ff88", size = 1828545, upload-time = "2025-08-27T13:40:19.069Z" }, + { url = "https://files.pythonhosted.org/packages/95/47/2a271455b602eef360cd5cc716d370d7ab47b9d57f00263821a217fd30f4/rapidfuzz-3.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:4b59ba48a909bdf7ec5dad6e3a5a0004aeec141ae5ddb205d0c5bd4389894cf9", size = 1658600, upload-time = "2025-08-27T13:40:21.278Z" }, + { url = "https://files.pythonhosted.org/packages/86/47/5acb5d160a091c3175c6f5e3f227ccdf03b201b05ceaad2b8b7f5009ebe9/rapidfuzz-3.14.0-cp313-cp313-win_arm64.whl", hash = "sha256:e688b0a98edea42da450fa6ba41736203ead652a78b558839916c10df855f545", size = 885686, upload-time = "2025-08-27T13:40:23.254Z" }, + { url = "https://files.pythonhosted.org/packages/dc/f2/203c44a06dfefbb580ad7b743333880d600d7bdff693af9d290bd2b09742/rapidfuzz-3.14.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:cb6c5a46444a2787e466acd77e162049f061304025ab24da02b59caedea66064", size = 2041214, upload-time = "2025-08-27T13:40:25.051Z" }, + { url = "https://files.pythonhosted.org/packages/ec/db/6571a5bbba38255ede8098b3b45c007242788e5a5c3cdbe7f6f03dd6daed/rapidfuzz-3.14.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:99ed7a9e9ff798157caf3c3d96ca7da6560878902d8f70fa7731acc94e0d293c", size = 1501621, upload-time = "2025-08-27T13:40:26.881Z" }, + { url = "https://files.pythonhosted.org/packages/0b/85/efbae42fe8ca2bdb967751da1df2e3ebb5be9ea68f22f980731e5c18ce25/rapidfuzz-3.14.0-cp313-cp313t-win32.whl", hash = "sha256:c8e954dd59291ff0cd51b9c0f425e5dc84731bb006dbd5b7846746fe873a0452", size = 1887956, upload-time = "2025-08-27T13:40:29.143Z" }, + { url = "https://files.pythonhosted.org/packages/c8/60/2bb44b5ecb7151093ed7e2020156f260bdd9a221837f57a0bc5938b2b6d1/rapidfuzz-3.14.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5754e3ca259667c46a2b58ca7d7568251d6e23d2f0e354ac1cc5564557f4a32d", size = 1702542, upload-time = "2025-08-27T13:40:31.103Z" }, + { url = "https://files.pythonhosted.org/packages/6f/b7/688e9ab091545ff8eed564994a01309d8a52718211f27af94743d55b3c80/rapidfuzz-3.14.0-cp313-cp313t-win_arm64.whl", hash = "sha256:558865f6825d27006e6ae2e1635cfe236d736c8f2c5c82db6db4b1b6df4478bc", size = 912891, upload-time = "2025-08-27T13:40:33.263Z" }, + { url = "https://files.pythonhosted.org/packages/48/79/7fc4263d071c3cbd645f53084e3cebcae1207bf875798a26618c80c97b99/rapidfuzz-3.14.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4c9a00ef2f684b1132aeb3c0737483dc8f85a725dbe792aee1d1c3cbcf329b34", size = 1876620, upload-time = "2025-08-27T13:41:17.526Z" }, + { url = "https://files.pythonhosted.org/packages/25/7b/9f0911600d6f8ab1ab03267792e0b60073602aa2fa8c5bf086f2b26a2dee/rapidfuzz-3.14.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:2e203d76b3dcd1b466ee196f7adb71009860906303db274ae20c7c5af62bc1a8", size = 1351893, upload-time = "2025-08-27T13:41:19.629Z" }, + { url = "https://files.pythonhosted.org/packages/5b/a0/70ce2c0ec683b15a6efb647012a6c98dcc66b658e16bb11ebb32cae625b9/rapidfuzz-3.14.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2b317a71fd938348d8dbbe2f559cda58a67fdcafdd3107afca7ab0fb654efa86", size = 1554510, upload-time = "2025-08-27T13:41:22.217Z" }, + { url = "https://files.pythonhosted.org/packages/e2/ed/5b83587b6a6bfe7845ed36286fd5780c00ba93c56463bd501b44617f427b/rapidfuzz-3.14.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e5d610a2c5efdb2a3f9eaecac4ecd6d849efb2522efa36000e006179062056dc", size = 1888611, upload-time = "2025-08-27T13:41:24.326Z" }, + { url = "https://files.pythonhosted.org/packages/e6/d9/9332a39587a2478470a54218d5f85b5a29b6b3eb02b2310689b59ad3da11/rapidfuzz-3.14.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:c053cad08ab872df4e201daacb66d7fd04b5b4c395baebb193b9910c63ed22ec", size = 1363908, upload-time = "2025-08-27T13:41:26.463Z" }, + { url = "https://files.pythonhosted.org/packages/21/7f/c90f55402b5b43fd5cff42a8dab60373345b8f2697a7b83515eb62666913/rapidfuzz-3.14.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:7e52ac8a458b2f09291fa968b23192d6664c7568a43607de2a51a088d016152d", size = 1555592, upload-time = "2025-08-27T13:41:28.583Z" }, ] [[package]] From 3482f353a9da314b61714ea8c7b7b360fcd69bbe Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 18:02:57 +0200 Subject: [PATCH 12/73] chore: adds extract kg from events and changes temporal tasks call --- cognee/api/v1/cognify/cognify.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index aaf2939ba..dee4e79be 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -22,7 +22,7 @@ from cognee.tasks.graph import extract_graph_from_data from cognee.tasks.storage import add_data_points from cognee.tasks.summarization import summarize_text from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor -from cognee.tasks.temporal_graph import extract_events_and_entities +from cognee.tasks.temporal_graph import extract_events_and_timestamps, extract_knowledge_graph_from_events logger = get_logger("cognify") @@ -180,7 +180,7 @@ async def cognify( - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60) """ if temporal_cognify: - tasks = await get_temporal_tasks(user, graph_model, chunker, chunk_size, ontology_file_path) + tasks = await get_temporal_tasks(user, chunker, chunk_size) else: tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path) @@ -241,7 +241,8 @@ async def get_temporal_tasks( max_chunk_size=chunk_size or get_max_chunk_tokens(), chunker=chunker, ), - Task(extract_events_and_entities, task_config={"chunk_size": 10}), + Task(extract_events_and_timestamps, task_config={"chunk_size": 10}), + Task(extract_knowledge_graph_from_events), Task(add_data_points, task_config={"batch_size": 10}), ] From 7468ef6e538f5259ef2f6d87f256d8beb42f9a0d Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 18:03:38 +0200 Subject: [PATCH 13/73] feat: adds event entity extraction --- cognee/infrastructure/llm/LLMGateway.py | 9 +++++ cognee/infrastructure/llm/config.py | 1 + .../prompts/generate_event_entity_prompt.txt | 25 ++++++++++++++ .../litellm_instructor/extraction/__init__.py | 1 + .../extraction/extract_event_entities.py | 33 +++++++++++++++++++ 5 files changed, 69 insertions(+) create mode 100644 cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt create mode 100644 cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py diff --git a/cognee/infrastructure/llm/LLMGateway.py b/cognee/infrastructure/llm/LLMGateway.py index d8364e9ef..2df1fe4f3 100644 --- a/cognee/infrastructure/llm/LLMGateway.py +++ b/cognee/infrastructure/llm/LLMGateway.py @@ -144,3 +144,12 @@ class LLMGateway: ) return extract_event_graph(content=content, response_model=response_model) + + @staticmethod + def extract_event_entities(content: str, response_model: Type[BaseModel]) -> Coroutine: + # TODO: Add BAML version of category and extraction and update function (consulted with Igor) + from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import ( + extract_event_entities, + ) + + return extract_event_entities(content=content, response_model=response_model) diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index 199ede986..7aa8f33f7 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -53,6 +53,7 @@ class LLMConfig(BaseSettings): transcription_model: str = "whisper-1" graph_prompt_path: str = "generate_graph_prompt.txt" temporal_graph_prompt_path: str = "generate_event_graph_prompt.txt" + event_entity_prompt_path: str = "generate_event_entity_prompt.txt" llm_rate_limit_enabled: bool = False llm_rate_limit_requests: int = 60 llm_rate_limit_interval: int = 60 # in seconds (default is 60 requests per minute) diff --git a/cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt b/cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt new file mode 100644 index 000000000..7a34ef25b --- /dev/null +++ b/cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt @@ -0,0 +1,25 @@ +For the purposes of building event-based knowledge graphs, you are tasked with extracting highly granular entities from events text. An entity is any distinct, identifiable thing, person, place, object, organization, concept, or phenomenon that can be named, referenced, or described in the event context. This includes but is not limited to: people, places, objects, organizations, concepts, events, processes, states, conditions, properties, attributes, roles, functions, and any other meaningful referents that contribute to understanding the event. +**Temporal Entity Exclusion**: Do not extract timestamp-like entities (dates, times, durations) as these are handled separately. However, extract named temporal periods, eras, historical epochs, and culturally significant time references +## Input Format +The input will be a list of dictionaries, each containing: +- `event_name`: The name of the event +- `description`: The description of the event +## Task +For each event, extract all entities mentioned in the event description and determine their relationship to the event. +## Output Format +Return the same enriched JSON with an additional key in each dictionary: `attributes`. +The `attributes` should be a list of dictionaries, each containing: +- `entity`: The name of the entity +- `entity_type`: The type/category of the entity (person, place, organization, object, concept, etc.) +- `relationship`: A concise description of how the entity relates to the event +## Requirements +- **Be extremely thorough** - extract EVERY non-temporal entity mentioned, no matter how small, obvious, or seemingly insignificant +- **After you are done with obvious entities, every noun, pronoun, proper noun, and named reference = one entity** +- We expect rich entity networks from any event, easily reaching a dozens of entities per event +- Granularity and richness of the entity extraction is key to our success and is of utmost importance +- **Do not skip any entities** - if you're unsure whether something is an entity, extract it anyway +- Use the event name for context when determining relationships +- Relationships should be technical with one or at most two words. If two words, use underscore camelcase style +- Relationships could imply general meaning like: subject, object, participant, recipient, agent, instrument, tool, source, cause, effect, purpose, manner, resource, etc. +- You can combine two words to form a relationship name: subject_role, previous_owner, etc. +- Focus on how the entity specifically relates to the event \ No newline at end of file diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py index 002246a77..24006c046 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py @@ -2,3 +2,4 @@ from .knowledge_graph.extract_content_graph import extract_content_graph from .knowledge_graph.extract_event_graph import extract_event_graph from .extract_categories import extract_categories from .extract_summary import extract_summary, extract_code_summary +from .extract_event_entities import extract_event_entities \ No newline at end of file diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py new file mode 100644 index 000000000..123c05269 --- /dev/null +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py @@ -0,0 +1,33 @@ +import os +from typing import List, Type +from pydantic import BaseModel +from cognee.infrastructure.llm.LLMGateway import LLMGateway +from cognee.infrastructure.llm.config import ( + get_llm_config, +) + + +async def extract_event_entities( + content: str, response_model: Type[BaseModel] +): + """Extract event entities from content using LLM.""" + llm_config = get_llm_config() + + prompt_path = llm_config.event_entity_prompt_path + + # Check if the prompt path is an absolute path or just a filename + if os.path.isabs(prompt_path): + # directory containing the file + base_directory = os.path.dirname(prompt_path) + # just the filename itself + prompt_path = os.path.basename(prompt_path) + else: + base_directory = None + + system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory) + + content_graph = await LLMGateway.acreate_structured_output( + content, system_prompt, response_model + ) + + return content_graph \ No newline at end of file From 97abdeeb2a81e72548fd01c2de918e98f6c9fb1b Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 18:04:10 +0200 Subject: [PATCH 14/73] feat: adds entity kg from events logic --- cognee/tasks/temporal_graph/__init__.py | 3 +- .../temporal_graph/add_entities_to_event.py | 55 +++++++++++++++++++ cognee/tasks/temporal_graph/enrich_events.py | 21 +++++++ .../extract_events_and_entities.py | 2 +- .../extract_knowledge_graph_from_events.py | 26 +++++++++ 5 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 cognee/tasks/temporal_graph/add_entities_to_event.py create mode 100644 cognee/tasks/temporal_graph/enrich_events.py create mode 100644 cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py diff --git a/cognee/tasks/temporal_graph/__init__.py b/cognee/tasks/temporal_graph/__init__.py index 163fb6840..991553605 100644 --- a/cognee/tasks/temporal_graph/__init__.py +++ b/cognee/tasks/temporal_graph/__init__.py @@ -1,2 +1,3 @@ -from .extract_events_and_entities import extract_events_and_entities +from .extract_events_and_entities import extract_events_and_timestamps +from .extract_knowledge_graph_from_events import extract_knowledge_graph_from_events diff --git a/cognee/tasks/temporal_graph/add_entities_to_event.py b/cognee/tasks/temporal_graph/add_entities_to_event.py new file mode 100644 index 000000000..5585a1b50 --- /dev/null +++ b/cognee/tasks/temporal_graph/add_entities_to_event.py @@ -0,0 +1,55 @@ +from cognee.modules.engine.models import Event +from cognee.tasks.temporal_graph.models import EventWithEntities +from cognee.modules.engine.models.Entity import Entity +from cognee.modules.engine.models.EntityType import EntityType +from cognee.infrastructure.engine.models.Edge import Edge +from cognee.modules.engine.utils import generate_node_id, generate_node_name + +def add_entities_to_event(event: Event, event_with_entities: EventWithEntities) -> None: + """Add entities to event via attributes field.""" + if not event_with_entities.attributes: + return + + # Create entity types cache + entity_types = {} + + # Process each attribute + for attribute in event_with_entities.attributes: + # Get or create entity type + entity_type = get_or_create_entity_type(entity_types, attribute.entity_type) + + # Create entity + entity_id = generate_node_id(attribute.entity) + entity_name = generate_node_name(attribute.entity) + entity = Entity( + id=entity_id, + name=entity_name, + is_a=entity_type, + description=f"Entity {attribute.entity} of type {attribute.entity_type}", + ontology_valid=False, + belongs_to_set=None, + ) + + # Create edge + edge = Edge(relationship_type=attribute.relationship) + + # Add to event attributes + if event.attributes is None: + event.attributes = [] + event.attributes.append((edge, [entity])) + +def get_or_create_entity_type(entity_types: dict, entity_type_name: str) -> EntityType: + """Get existing entity type or create new one.""" + if entity_type_name not in entity_types: + type_id = generate_node_id(entity_type_name) + type_name = generate_node_name(entity_type_name) + entity_type = EntityType( + id=type_id, + name=type_name, + type=type_name, + description=f"Type for {entity_type_name}", + ontology_valid=False, + ) + entity_types[entity_type_name] = entity_type + + return entity_types[entity_type_name] \ No newline at end of file diff --git a/cognee/tasks/temporal_graph/enrich_events.py b/cognee/tasks/temporal_graph/enrich_events.py new file mode 100644 index 000000000..4c9edb2bb --- /dev/null +++ b/cognee/tasks/temporal_graph/enrich_events.py @@ -0,0 +1,21 @@ +from typing import List + +from cognee.infrastructure.llm import LLMGateway +from cognee.modules.engine.models import Event +from cognee.tasks.temporal_graph.models import EventWithEntities,EventEntityList + +async def enrich_events(events: List[Event]) -> List[EventWithEntities]: + """Extract entities from events and return enriched events.""" + import json + + # Convert events to JSON format for LLM processing + events_json = [ + {"event_name": event.name, "description": event.description or ""} for event in events + ] + + events_json_str = json.dumps(events_json) + + # Extract entities from events + entity_result = await LLMGateway.extract_event_entities(events_json_str, EventEntityList) + + return entity_result.events \ No newline at end of file diff --git a/cognee/tasks/temporal_graph/extract_events_and_entities.py b/cognee/tasks/temporal_graph/extract_events_and_entities.py index 37e113d56..bf4367f6a 100644 --- a/cognee/tasks/temporal_graph/extract_events_and_entities.py +++ b/cognee/tasks/temporal_graph/extract_events_and_entities.py @@ -6,7 +6,7 @@ from cognee.tasks.temporal_graph.models import EventList from cognee.modules.engine.utils.generate_event_datapoint import generate_event_datapoint -async def extract_events_and_entities(data_chunks: List[DocumentChunk]) -> List[DocumentChunk]: +async def extract_events_and_timestamps(data_chunks: List[DocumentChunk]) -> List[DocumentChunk]: """Extracts events and entities from a chunk of documents.""" events = await asyncio.gather( *[LLMGateway.extract_event_graph(chunk.text, EventList) for chunk in data_chunks] diff --git a/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py b/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py new file mode 100644 index 000000000..0e49c5296 --- /dev/null +++ b/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py @@ -0,0 +1,26 @@ +from typing import List +from cognee.modules.chunking.models import DocumentChunk +from cognee.modules.engine.models import Event +from cognee.tasks.temporal_graph.enrich_events import enrich_events +from cognee.tasks.temporal_graph.add_entities_to_event import add_entities_to_event + +async def extract_knowledge_graph_from_events(data_chunks: List[DocumentChunk]) -> List[DocumentChunk]: + """Extract events from chunks and enrich them with entities.""" + # Extract events from chunks + all_events = [] + for chunk in data_chunks: + for item in chunk.contains: + if isinstance(item, Event): + all_events.append(item) + + if not all_events: + return data_chunks + + # Enrich events with entities + enriched_events = await enrich_events(all_events) + + # Add entities to events + for event, enriched_event in zip(all_events, enriched_events): + add_entities_to_event(event, enriched_event) + + return data_chunks \ No newline at end of file From 8999f826c76d66899e6ad5a4c3c669b62b947bde Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 18:04:21 +0200 Subject: [PATCH 15/73] feat: adds temporal example --- examples/python/temporal_example.py | 119 ++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 examples/python/temporal_example.py diff --git a/examples/python/temporal_example.py b/examples/python/temporal_example.py new file mode 100644 index 000000000..c61c80ac4 --- /dev/null +++ b/examples/python/temporal_example.py @@ -0,0 +1,119 @@ +import asyncio +import cognee +from cognee.shared.logging_utils import setup_logging, INFO + + +import json +from pathlib import Path + + +biography_1 = """ + Attaphol Buspakom Attaphol Buspakom ( ; ) , nicknamed Tak ( ; ) ; 1 October 1962 – 16 April 2015 ) was a Thai national and football coach . He was given the role at Muangthong United and Buriram United after TTM Samut Sakhon folded after the 2009 season . He played for the Thailand national football team , appearing in several FIFA World Cup qualifying matches . + + Club career . + Attaphol began his career as a player at Thai Port FC Authority of Thailand in 1985 . In his first year , he won his first championship with the club . He played for the club until 1989 and in 1987 also won the Queens Cup . He then moved to Malaysia for two seasons for Pahang FA , then return to Thailand to his former club . His time from 1991 to 1994 was marked by less success than in his first stay at Port Authority . From 1994 to 1996 he played for Pahang again and this time he was able to win with the club , the Malaysia Super League and also reached the final of the Malaysia Cup and the Malaysia FA Cup . Both cup finals but lost . Back in Thailand , he let end his playing career at FC Stock Exchange of Thailand , with which he once again runner-up in 1996-97 . In 1998 , he finished his career . + + International career . + For the Thailand national football team Attaphol played between 1985 and 1998 a total of 85 games and scored 13 results . In 1992 , he participated with the team in the finals of the Asian Cup . He also stood in various cadres to qualifications to FIFA World Cup . + + Coaching career . + Bec Tero Sasana . + In BEC Tero Sasana F.C . began his coaching career in 2001 for him , first as assistant coach . He took over the reigning champions of the Thai League T1 , after his predecessor Pichai Pituwong resigned from his post . It was his first coach station and he had the difficult task of leading the club through the new AFC Champions League . He could accomplish this task with flying colors and even led the club to the finals . The finale , then still played in home and away matches , was lost with 1:2 at the end against Al Ain FC . Attaphol is and was next to Charnwit Polcheewin the only coach who managed a club from Thailand to lead to the final of the AFC Champions League . 2002-03 and 2003-04 he won with the club also two runner-up . In his team , which reached the final of the Champions League , were a number of exceptional players like Therdsak Chaiman , Worrawoot Srimaka , Dusit Chalermsan and Anurak Srikerd . + + Geylang United / Krung Thai Bank . + In 2006 , he went to Singapore in the S-League to Geylang United He was released after a few months due to lack of success . In 2008 , he took over as coach at Krung Thai Bank F.C. , where he had almost a similar task , as a few years earlier by BEC-Tero . As vice-champion of the club was also qualified for the AFC Champions League . However , he failed to lead the team through the group stage of the season 2008 and beyond . With the Kashima Antlers of Japan and Beijing Guoan F.C . athletic competition was too great . One of the highlights was put under his leadership , yet the club . In the group match against the Vietnam club Nam Dinh F.C . his team won with 9-1 , but also lost four weeks later with 1-8 against Kashima Antlers . At the end of the National Football League season , he reached the Krung Thai 6th Table space . The Erstligalizenz the club was sold at the end of the season at the Bangkok Glass F.C. . Attaphol finished his coaching career with the club and accepted an offer of TTM Samutsakorn . After only a short time in office + + Muangthong United . + In 2009 , he received an offer from Muangthong United F.C. , which he accepted and changed . He can champion Muang Thong United for 2009 Thai Premier League and Attaphol won Coach of The year for Thai Premier League and he was able to lead Muang Thong United to play AFC Champions League qualifying play-off for the first in the clubs history . + + Buriram United . + In 2010 Buspakom moved from Muangthong United to Buriram United F.C. . He received Coach of the Month in Thai Premier League 2 time in June and October . In 2011 , he led Buriram United win 2011 Thai Premier League second time for club and set a record with the most points in the Thai League T1 for 85 point and He led Buriram win 2011 Thai FA Cup by beat Muangthong United F.C . 1-0 and he led Buriram win 2011 Thai League Cup by beat Thai Port F.C . 2-0 . In 2012 , he led Buriram United to the 2012 AFC Champions League group stage . Buriram along with Guangzhou Evergrande F.C . from China , Kashiwa Reysol from Japan and Jeonbuk Hyundai Motors which are all champions from their country . In the first match of Buriram they beat Kashiwa 3-2 and Second Match they beat Guangzhou 1-2 at the Tianhe Stadium . Before losing to Jeonbuk 0-2 and 3-2 with lose Kashiwa and Guangzhou 1-0 and 1-2 respectively and Thai Premier League Attaphol lead Buriram end 4th for table with win 2012 Thai FA Cup and 2012 Thai League Cup . + + Bangkok Glass . + In 2013 , he moved from Buriram United to Bangkok Glass F.C. . + + Personal life . + Attaphols sons , Wannaphon Buspakom and Kanokpon Buspakom , are professional footballers . + + Honours . + Player . + Thai Port - Kor Royal Cup - Winners ( 2 ) : 1985 , 1990 + Pahang FA - Malaysia Super League - Champions ( 1 ) : 1995 + Thailand - Sea Games - Gold Medal ( 1 ) ; 1993 - Silver Medal ( 1 ) ; 1991 + + Manager . + BEC Tero Sasana - AFC Champions League - Runner-up ( 1 ) : 2002-03 + - ASEAN Club Championship - Runner-up ( 1 ) : 2003 + Muangthong United - Thai Premier League - Champions ( 1 ) : 2009 + Buriram United - Thai Premier League - Champions ( 1 ) : 2011 + - Thai FA Cup - Winners ( 2 ) : 2011 , 2012 + - Thai League Cup - Winners ( 2 ) : 2011 , 2012 + - Toyota Premier Cup - Winner ( 1 ) : 2011 + - Kor Royal Cup - Winner ( 1 ) : 2013 + + Individual + - Thai Premier League Coach of the Year ( 3 ) : 2001-02 , 2009 , 2013 + """ + +biography_2 = """ + Arnulf Øverland Ole Peter Arnulf Øverland ( 27 April 1889 – 25 March 1968 ) was a Norwegian poet and artist . He is principally known for his poetry which served to inspire the Norwegian resistance movement during the German occupation of Norway during World War II . + + Biography . + Øverland was born in Kristiansund and raised in Bergen . His parents were Peter Anton Øverland ( 1852–1906 ) and Hanna Hage ( 1854–1939 ) . The early death of his father , left the family economically stressed . He was able to attend Bergen Cathedral School and in 1904 Kristiania Cathedral School . He graduated in 1907 and for a time studied philology at University of Kristiania . Øverland published his first collection of poems ( 1911 ) . + + Øverland became a communist sympathizer from the early 1920s and became a member of Mot Dag . He also served as chairman of the Norwegian Students Society 1923–28 . He changed his stand in 1937 , partly as an expression of dissent against the ongoing Moscow Trials . He was an avid opponent of Nazism and in 1936 he wrote the poem Du må ikke sove which was printed in the journal Samtiden . It ends with . ( I thought: : Something is imminent . Our era is over – Europe’s on fire! ) . Probably the most famous line of the poem is ( You mustnt endure so well the injustice that doesnt affect you yourself! ) + + During the German occupation of Norway from 1940 in World War II , he wrote to inspire the Norwegian resistance movement . He wrote a series of poems which were clandestinely distributed , leading to the arrest of both him and his future wife Margrete Aamot Øverland in 1941 . Arnulf Øverland was held first in the prison camp of Grini before being transferred to Sachsenhausen concentration camp in Germany . He spent a four-year imprisonment until the liberation of Norway in 1945 . His poems were later collected in Vi overlever alt and published in 1945 . + + Øverland played an important role in the Norwegian language struggle in the post-war era . He became a noted supporter for the conservative written form of Norwegian called Riksmål , he was president of Riksmålsforbundet ( an organization in support of Riksmål ) from 1947 to 1956 . In addition , Øverland adhered to the traditionalist style of writing , criticising modernist poetry on several occasions . His speech Tungetale fra parnasset , published in Arbeiderbladet in 1954 , initiated the so-called Glossolalia debate . + + Personal life . + In 1918 he had married the singer Hildur Arntzen ( 1888–1957 ) . Their marriage was dissolved in 1939 . In 1940 , he married Bartholine Eufemia Leganger ( 1903–1995 ) . They separated shortly after , and were officially divorced in 1945 . Øverland was married to journalist Margrete Aamot Øverland ( 1913–1978 ) during June 1945 . In 1946 , the Norwegian Parliament arranged for Arnulf and Margrete Aamot Øverland to reside at the Grotten . He lived there until his death in 1968 and she lived there for another ten years until her death in 1978 . Arnulf Øverland was buried at Vår Frelsers Gravlund in Oslo . Joseph Grimeland designed the bust of Arnulf Øverland ( bronze , 1970 ) at his grave site . + + Famous Quotes . + - “For a “monotheistic” religion it should be sufficient with three gods.” + - “What is there to be said about a Church which certainly promises its believers eternal salvation , but at the same time condemns the non-believers , all those who think differently , to an eternal torment in hell ? – If that Church absolutely must talk about love , then it should do so very quietly.” + + Selected Works . + - Den ensomme fest ( 1911 ) + - Berget det blå ( 1927 ) + - En Hustavle ( 1929 ) + - Den røde front ( 1937 ) + - Vi overlever alt ( 1945 ) + - Sverdet bak døren ( 1956 ) + - Livets minutter ( 1965 ) + + Awards . + - Gyldendals Endowment ( 1935 ) + - Dobloug Prize ( 1951 ) + - Mads Wiel Nygaards legat ( 1961 ) + + Other sources . + - Hambro , Carl ( 1984 ) Arnulf Øverland : det brennende hjerte ( Oslo : Aschehoug ) + + External links . + - Du må ikke sove ! + - Translation of Du må ikke sove by Lars-Toralf Storstrand + - Kristendommen , den tiende landeplage - Christianity , the tenth plague + """ + + +async def main(): + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + await cognee.add([biography_1, biography_2]) + await cognee.cognify(temporal_cognify=True) + + print() + + +if __name__ == "__main__": + logger = setup_logging(log_level=INFO) + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(main()) + finally: + loop.run_until_complete(loop.shutdown_asyncgens()) \ No newline at end of file From 58a3be7c126b2d3d14ae47e53d2891ae4d12cd5b Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 18:04:58 +0200 Subject: [PATCH 16/73] ruff format --- cognee/api/v1/cognify/cognify.py | 6 +++++- .../litellm_instructor/extraction/__init__.py | 2 +- .../extraction/extract_event_entities.py | 6 ++---- .../extraction/knowledge_graph/extract_event_graph.py | 3 ++- cognee/modules/engine/models/Event.py | 2 +- cognee/modules/engine/models/Interval.py | 3 ++- cognee/modules/engine/models/Timestamp.py | 2 +- cognee/modules/engine/utils/generate_event_datapoint.py | 3 ++- .../modules/engine/utils/generate_timestamp_datapoint.py | 4 +++- cognee/tasks/temporal_graph/__init__.py | 1 - cognee/tasks/temporal_graph/add_entities_to_event.py | 4 +++- cognee/tasks/temporal_graph/enrich_events.py | 5 +++-- cognee/tasks/temporal_graph/extract_events_and_entities.py | 2 +- .../temporal_graph/extract_knowledge_graph_from_events.py | 7 +++++-- cognee/tasks/temporal_graph/models.py | 3 +-- examples/python/temporal_example.py | 2 +- 16 files changed, 33 insertions(+), 22 deletions(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index dee4e79be..a0803ff96 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -22,7 +22,10 @@ from cognee.tasks.graph import extract_graph_from_data from cognee.tasks.storage import add_data_points from cognee.tasks.summarization import summarize_text from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor -from cognee.tasks.temporal_graph import extract_events_and_timestamps, extract_knowledge_graph_from_events +from cognee.tasks.temporal_graph import ( + extract_events_and_timestamps, + extract_knowledge_graph_from_events, +) logger = get_logger("cognify") @@ -230,6 +233,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's return default_tasks + async def get_temporal_tasks( user: User = None, chunker=TextChunker, chunk_size: int = None ) -> list[Task]: diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py index 24006c046..72e3c755f 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py @@ -2,4 +2,4 @@ from .knowledge_graph.extract_content_graph import extract_content_graph from .knowledge_graph.extract_event_graph import extract_event_graph from .extract_categories import extract_categories from .extract_summary import extract_summary, extract_code_summary -from .extract_event_entities import extract_event_entities \ No newline at end of file +from .extract_event_entities import extract_event_entities diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py index 123c05269..ad33863b0 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py @@ -7,9 +7,7 @@ from cognee.infrastructure.llm.config import ( ) -async def extract_event_entities( - content: str, response_model: Type[BaseModel] -): +async def extract_event_entities(content: str, response_model: Type[BaseModel]): """Extract event entities from content using LLM.""" llm_config = get_llm_config() @@ -30,4 +28,4 @@ async def extract_event_entities( content, system_prompt, response_model ) - return content_graph \ No newline at end of file + return content_graph diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py index 2a0c0cab8..0373649f2 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py @@ -7,6 +7,7 @@ from cognee.infrastructure.llm.config import ( get_llm_config, ) + async def extract_event_graph( content: str, response_model: Type[BaseModel], system_prompt: str = None ): @@ -31,4 +32,4 @@ async def extract_event_graph( content, system_prompt, response_model ) - return content_graph \ No newline at end of file + return content_graph diff --git a/cognee/modules/engine/models/Event.py b/cognee/modules/engine/models/Event.py index 88141e602..4a0bab830 100644 --- a/cognee/modules/engine/models/Event.py +++ b/cognee/modules/engine/models/Event.py @@ -13,4 +13,4 @@ class Event(DataPoint): location: Optional[str] = None attributes: SkipValidation[Any] = None - metadata: dict = {"index_fields": ["name"]} \ No newline at end of file + metadata: dict = {"index_fields": ["name"]} diff --git a/cognee/modules/engine/models/Interval.py b/cognee/modules/engine/models/Interval.py index 3666bf69d..914bc62ea 100644 --- a/cognee/modules/engine/models/Interval.py +++ b/cognee/modules/engine/models/Interval.py @@ -2,6 +2,7 @@ from pydantic import Field from cognee.infrastructure.engine import DataPoint from cognee.modules.engine.models.Timestamp import Timestamp + class Interval(DataPoint): time_from: Timestamp = Field(...) - time_to: Timestamp = Field(...) \ No newline at end of file + time_to: Timestamp = Field(...) diff --git a/cognee/modules/engine/models/Timestamp.py b/cognee/modules/engine/models/Timestamp.py index 38977c348..31779683a 100644 --- a/cognee/modules/engine/models/Timestamp.py +++ b/cognee/modules/engine/models/Timestamp.py @@ -10,4 +10,4 @@ class Timestamp(DataPoint): hour: int = Field(...) minute: int = Field(...) second: int = Field(...) - timestamp_str: str = Field(...) \ No newline at end of file + timestamp_str: str = Field(...) diff --git a/cognee/modules/engine/utils/generate_event_datapoint.py b/cognee/modules/engine/utils/generate_event_datapoint.py index aeec325d9..cc56763ae 100644 --- a/cognee/modules/engine/utils/generate_event_datapoint.py +++ b/cognee/modules/engine/utils/generate_event_datapoint.py @@ -1,6 +1,7 @@ from cognee.modules.engine.models import Interval, Event from cognee.modules.engine.utils.generate_timestamp_datapoint import generate_timestamp_datapoint + def generate_event_datapoint(event) -> Event: """Create an Event datapoint from an event model.""" # Base event data @@ -27,4 +28,4 @@ def generate_event_datapoint(event) -> Event: temporal_info = f"\n---\nTime data: {timestamp.timestamp_str}" event_data["description"] = (event_data["description"] or "Event") + temporal_info - return Event(**event_data) \ No newline at end of file + return Event(**event_data) diff --git a/cognee/modules/engine/utils/generate_timestamp_datapoint.py b/cognee/modules/engine/utils/generate_timestamp_datapoint.py index cbef2d177..6f2cdf6d1 100644 --- a/cognee/modules/engine/utils/generate_timestamp_datapoint.py +++ b/cognee/modules/engine/utils/generate_timestamp_datapoint.py @@ -2,6 +2,7 @@ from datetime import datetime, timezone from cognee.modules.engine.models import Interval, Timestamp, Event from cognee.modules.engine.utils import generate_node_id + def generate_timestamp_datapoint(ts: Timestamp) -> Timestamp: """Create a Timestamp datapoint from a Timestamp model.""" time_at = date_to_int(ts) @@ -20,8 +21,9 @@ def generate_timestamp_datapoint(ts: Timestamp) -> Timestamp: timestamp_str=timestamp_str, ) + def date_to_int(ts: Timestamp) -> int: """Convert timestamp to integer milliseconds.""" dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.second, tzinfo=timezone.utc) time = int(dt.timestamp() * 1000) - return time \ No newline at end of file + return time diff --git a/cognee/tasks/temporal_graph/__init__.py b/cognee/tasks/temporal_graph/__init__.py index 991553605..11d812541 100644 --- a/cognee/tasks/temporal_graph/__init__.py +++ b/cognee/tasks/temporal_graph/__init__.py @@ -1,3 +1,2 @@ from .extract_events_and_entities import extract_events_and_timestamps from .extract_knowledge_graph_from_events import extract_knowledge_graph_from_events - diff --git a/cognee/tasks/temporal_graph/add_entities_to_event.py b/cognee/tasks/temporal_graph/add_entities_to_event.py index 5585a1b50..2cb4b1425 100644 --- a/cognee/tasks/temporal_graph/add_entities_to_event.py +++ b/cognee/tasks/temporal_graph/add_entities_to_event.py @@ -5,6 +5,7 @@ from cognee.modules.engine.models.EntityType import EntityType from cognee.infrastructure.engine.models.Edge import Edge from cognee.modules.engine.utils import generate_node_id, generate_node_name + def add_entities_to_event(event: Event, event_with_entities: EventWithEntities) -> None: """Add entities to event via attributes field.""" if not event_with_entities.attributes: @@ -38,6 +39,7 @@ def add_entities_to_event(event: Event, event_with_entities: EventWithEntities) event.attributes = [] event.attributes.append((edge, [entity])) + def get_or_create_entity_type(entity_types: dict, entity_type_name: str) -> EntityType: """Get existing entity type or create new one.""" if entity_type_name not in entity_types: @@ -52,4 +54,4 @@ def get_or_create_entity_type(entity_types: dict, entity_type_name: str) -> Enti ) entity_types[entity_type_name] = entity_type - return entity_types[entity_type_name] \ No newline at end of file + return entity_types[entity_type_name] diff --git a/cognee/tasks/temporal_graph/enrich_events.py b/cognee/tasks/temporal_graph/enrich_events.py index 4c9edb2bb..bedd642eb 100644 --- a/cognee/tasks/temporal_graph/enrich_events.py +++ b/cognee/tasks/temporal_graph/enrich_events.py @@ -2,7 +2,8 @@ from typing import List from cognee.infrastructure.llm import LLMGateway from cognee.modules.engine.models import Event -from cognee.tasks.temporal_graph.models import EventWithEntities,EventEntityList +from cognee.tasks.temporal_graph.models import EventWithEntities, EventEntityList + async def enrich_events(events: List[Event]) -> List[EventWithEntities]: """Extract entities from events and return enriched events.""" @@ -18,4 +19,4 @@ async def enrich_events(events: List[Event]) -> List[EventWithEntities]: # Extract entities from events entity_result = await LLMGateway.extract_event_entities(events_json_str, EventEntityList) - return entity_result.events \ No newline at end of file + return entity_result.events diff --git a/cognee/tasks/temporal_graph/extract_events_and_entities.py b/cognee/tasks/temporal_graph/extract_events_and_entities.py index bf4367f6a..de0cdd601 100644 --- a/cognee/tasks/temporal_graph/extract_events_and_entities.py +++ b/cognee/tasks/temporal_graph/extract_events_and_entities.py @@ -17,4 +17,4 @@ async def extract_events_and_timestamps(data_chunks: List[DocumentChunk]) -> Lis event_datapoint = generate_event_datapoint(event) data_chunk.contains.append(event_datapoint) - return data_chunks \ No newline at end of file + return data_chunks diff --git a/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py b/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py index 0e49c5296..8cbcc3c22 100644 --- a/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +++ b/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py @@ -4,7 +4,10 @@ from cognee.modules.engine.models import Event from cognee.tasks.temporal_graph.enrich_events import enrich_events from cognee.tasks.temporal_graph.add_entities_to_event import add_entities_to_event -async def extract_knowledge_graph_from_events(data_chunks: List[DocumentChunk]) -> List[DocumentChunk]: + +async def extract_knowledge_graph_from_events( + data_chunks: List[DocumentChunk], +) -> List[DocumentChunk]: """Extract events from chunks and enrich them with entities.""" # Extract events from chunks all_events = [] @@ -23,4 +26,4 @@ async def extract_knowledge_graph_from_events(data_chunks: List[DocumentChunk]) for event, enriched_event in zip(all_events, enriched_events): add_entities_to_event(event, enriched_event) - return data_chunks \ No newline at end of file + return data_chunks diff --git a/cognee/tasks/temporal_graph/models.py b/cognee/tasks/temporal_graph/models.py index 3818110c5..ef5cd42c9 100644 --- a/cognee/tasks/temporal_graph/models.py +++ b/cognee/tasks/temporal_graph/models.py @@ -2,7 +2,6 @@ from typing import Optional, List from pydantic import BaseModel, Field - class Timestamp(BaseModel): year: int = Field(..., ge=1, le=9999) month: int = Field(..., ge=1, le=12) @@ -47,4 +46,4 @@ class EventWithEntities(BaseModel): class EventEntityList(BaseModel): - events: List[EventWithEntities] \ No newline at end of file + events: List[EventWithEntities] diff --git a/examples/python/temporal_example.py b/examples/python/temporal_example.py index c61c80ac4..c4c1c9875 100644 --- a/examples/python/temporal_example.py +++ b/examples/python/temporal_example.py @@ -116,4 +116,4 @@ if __name__ == "__main__": try: loop.run_until_complete(main()) finally: - loop.run_until_complete(loop.shutdown_asyncgens()) \ No newline at end of file + loop.run_until_complete(loop.shutdown_asyncgens()) From 70727332eecbbf9a6fa5d98d1a63205dd1cc68ea Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 18:08:16 +0200 Subject: [PATCH 17/73] ruff format --- cognee/api/v1/cognify/cognify.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index cf3aa254a..42f1b51e3 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -190,7 +190,9 @@ async def cognify( if temporal_cognify: tasks = await get_temporal_tasks(user, chunker, chunk_size) else: - tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt) + tasks = await get_default_tasks( + user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt + ) # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background) From 2d2a7d69d35e241d228395ae590bcc396a3cf06f Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 19:08:01 +0200 Subject: [PATCH 18/73] fix: adjusting test to the new Optional DocumentChunk property --- .../retrieval/chunks_retriever_test.py | 20 +++++++++++++++++-- .../rag_completion_retriever_test.py | 20 +++++++++++++++++-- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/cognee/tests/unit/modules/retrieval/chunks_retriever_test.py b/cognee/tests/unit/modules/retrieval/chunks_retriever_test.py index f763cafd6..44786f79d 100644 --- a/cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +++ b/cognee/tests/unit/modules/retrieval/chunks_retriever_test.py @@ -1,7 +1,7 @@ import os import pytest import pathlib - +from typing import List import cognee from cognee.low_level import setup from cognee.tasks.storage import add_data_points @@ -10,6 +10,20 @@ from cognee.modules.chunking.models import DocumentChunk from cognee.modules.data.processing.document_types import TextDocument from cognee.modules.retrieval.exceptions.exceptions import NoDataError from cognee.modules.retrieval.chunks_retriever import ChunksRetriever +from cognee.infrastructure.engine import DataPoint +from cognee.modules.data.processing.document_types import Document +from cognee.modules.engine.models import Entity + + +class DocumentChunkWithEntities(DataPoint): + text: str + chunk_size: int + chunk_index: int + cut_type: str + is_part_of: Document + contains: List[Entity] = None + + metadata: dict = {"index_fields": ["text"]} class TestChunksRetriever: @@ -179,7 +193,9 @@ class TestChunksRetriever: await retriever.get_context("Christina Mayer") vector_engine = get_vector_engine() - await vector_engine.create_collection("DocumentChunk_text", payload_schema=DocumentChunk) + await vector_engine.create_collection( + "DocumentChunk_text", payload_schema=DocumentChunkWithEntities + ) context = await retriever.get_context("Christina Mayer") assert len(context) == 0, "Found chunks when none should exist" diff --git a/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py b/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py index 356aed4d3..252af8352 100644 --- a/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +++ b/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py @@ -1,7 +1,7 @@ import os +from typing import List import pytest import pathlib - import cognee from cognee.low_level import setup from cognee.tasks.storage import add_data_points @@ -10,6 +10,20 @@ from cognee.modules.chunking.models import DocumentChunk from cognee.modules.data.processing.document_types import TextDocument from cognee.modules.retrieval.exceptions.exceptions import NoDataError from cognee.modules.retrieval.completion_retriever import CompletionRetriever +from cognee.infrastructure.engine import DataPoint +from cognee.modules.data.processing.document_types import Document +from cognee.modules.engine.models import Entity + + +class DocumentChunkWithEntities(DataPoint): + text: str + chunk_size: int + chunk_index: int + cut_type: str + is_part_of: Document + contains: List[Entity] = None + + metadata: dict = {"index_fields": ["text"]} class TestRAGCompletionRetriever: @@ -182,7 +196,9 @@ class TestRAGCompletionRetriever: await retriever.get_context("Christina Mayer") vector_engine = get_vector_engine() - await vector_engine.create_collection("DocumentChunk_text", payload_schema=DocumentChunk) + await vector_engine.create_collection( + "DocumentChunk_text", payload_schema=DocumentChunkWithEntities + ) context = await retriever.get_context("Christina Mayer") assert context == "", "Returned context should be empty on an empty graph" From 34ff4ad9daea8925ed781172908600299414688e Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 19:21:49 +0200 Subject: [PATCH 19/73] fix: circular dep fix --- cognee/api/v1/cognify/cognify.py | 7 +++---- cognee/tasks/temporal_graph/__init__.py | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 42f1b51e3..465453d04 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -22,10 +22,9 @@ from cognee.tasks.graph import extract_graph_from_data from cognee.tasks.storage import add_data_points from cognee.tasks.summarization import summarize_text from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor -from cognee.tasks.temporal_graph import ( - extract_events_and_timestamps, - extract_knowledge_graph_from_events, -) +from cognee.tasks.temporal_graph.extract_events_and_entities import extract_events_and_timestamps +from cognee.tasks.temporal_graph.extract_knowledge_graph_from_events import extract_knowledge_graph_from_events + logger = get_logger("cognify") diff --git a/cognee/tasks/temporal_graph/__init__.py b/cognee/tasks/temporal_graph/__init__.py index 11d812541..8b1378917 100644 --- a/cognee/tasks/temporal_graph/__init__.py +++ b/cognee/tasks/temporal_graph/__init__.py @@ -1,2 +1 @@ -from .extract_events_and_entities import extract_events_and_timestamps -from .extract_knowledge_graph_from_events import extract_knowledge_graph_from_events + From 140437acf13a89a4ba74a14500305d4d3200068d Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 27 Aug 2025 19:23:29 +0200 Subject: [PATCH 20/73] ruff fix --- cognee/api/v1/cognify/cognify.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 465453d04..31a357afa 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -23,7 +23,9 @@ from cognee.tasks.storage import add_data_points from cognee.tasks.summarization import summarize_text from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor from cognee.tasks.temporal_graph.extract_events_and_entities import extract_events_and_timestamps -from cognee.tasks.temporal_graph.extract_knowledge_graph_from_events import extract_knowledge_graph_from_events +from cognee.tasks.temporal_graph.extract_knowledge_graph_from_events import ( + extract_knowledge_graph_from_events, +) logger = get_logger("cognify") From ac87e62adb55803cc2335889b21bcc3777d3d833 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 28 Aug 2025 10:52:08 +0200 Subject: [PATCH 21/73] feat: Save search flag progress --- .../modules/retrieval/completion_retriever.py | 17 ++++++++++++-- ..._completion_context_extension_retriever.py | 13 ++++++++++- .../graph_completion_cot_retriever.py | 15 +++++++++++-- .../retrieval/graph_completion_retriever.py | 12 +++++++++- cognee/modules/retrieval/utils/completion.py | 22 +++++++++++++------ cognee/modules/search/methods/search.py | 7 +++++- 6 files changed, 72 insertions(+), 14 deletions(-) diff --git a/cognee/modules/retrieval/completion_retriever.py b/cognee/modules/retrieval/completion_retriever.py index 655a9010d..e9c8331a1 100644 --- a/cognee/modules/retrieval/completion_retriever.py +++ b/cognee/modules/retrieval/completion_retriever.py @@ -65,7 +65,14 @@ class CompletionRetriever(BaseRetriever): logger.error("DocumentChunk_text collection not found") raise NoDataError("No data found in the system, please add data first.") from error - async def get_completion(self, query: str, context: Optional[Any] = None) -> Any: + async def get_completion( + self, + query: str, + context: Optional[Any] = None, + user_prompt: str = None, + system_prompt: str = None, + only_context: bool = False, + ) -> Any: """ Generates an LLM completion using the context. @@ -88,6 +95,12 @@ class CompletionRetriever(BaseRetriever): context = await self.get_context(query) completion = await generate_completion( - query, context, self.user_prompt_path, self.system_prompt_path + query=query, + context=context, + user_prompt_path=self.user_prompt_path, + system_prompt_path=self.system_prompt_path, + user_prompt=user_prompt, + system_prompt=system_prompt, + only_context=only_context, ) return [completion] diff --git a/cognee/modules/retrieval/graph_completion_context_extension_retriever.py b/cognee/modules/retrieval/graph_completion_context_extension_retriever.py index d05e6b4fa..f25edb4a7 100644 --- a/cognee/modules/retrieval/graph_completion_context_extension_retriever.py +++ b/cognee/modules/retrieval/graph_completion_context_extension_retriever.py @@ -41,7 +41,13 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever): ) async def get_completion( - self, query: str, context: Optional[Any] = None, context_extension_rounds=4 + self, + query: str, + context: Optional[Any] = None, + user_prompt: str = None, + system_prompt: str = None, + only_context: bool = False, + context_extension_rounds=4, ) -> List[str]: """ Extends the context for a given query by retrieving related triplets and generating new @@ -86,6 +92,8 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever): context=context, user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, + user_prompt=user_prompt, + system_prompt=system_prompt, ) triplets += await self.get_triplets(completion) @@ -112,6 +120,9 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever): context=context, user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, + user_prompt=user_prompt, + system_prompt=system_prompt, + only_context=only_context, ) if self.save_interaction and context and triplets and completion: diff --git a/cognee/modules/retrieval/graph_completion_cot_retriever.py b/cognee/modules/retrieval/graph_completion_cot_retriever.py index 032dccf9e..63ab6b3b7 100644 --- a/cognee/modules/retrieval/graph_completion_cot_retriever.py +++ b/cognee/modules/retrieval/graph_completion_cot_retriever.py @@ -51,7 +51,13 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): self.followup_user_prompt_path = followup_user_prompt_path async def get_completion( - self, query: str, context: Optional[Any] = None, max_iter=4 + self, + query: str, + context: Optional[Any] = None, + user_prompt: str = None, + system_prompt: str = None, + only_context: bool = False, + max_iter=4, ) -> List[str]: """ Generate completion responses based on a user query and contextual information. @@ -92,6 +98,8 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): context=context, user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, + user_prompt=user_prompt, + system_prompt=system_prompt, ) logger.info(f"Chain-of-thought: round {round_idx} - answer: {completion}") if round_idx < max_iter: @@ -128,4 +136,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): question=query, answer=completion, context=context, triplets=triplets ) - return [completion] + if only_context: + return [context] + else: + return [completion] diff --git a/cognee/modules/retrieval/graph_completion_retriever.py b/cognee/modules/retrieval/graph_completion_retriever.py index fb3cf4885..d88252054 100644 --- a/cognee/modules/retrieval/graph_completion_retriever.py +++ b/cognee/modules/retrieval/graph_completion_retriever.py @@ -151,7 +151,14 @@ class GraphCompletionRetriever(BaseRetriever): return context, triplets - async def get_completion(self, query: str, context: Optional[Any] = None) -> Any: + async def get_completion( + self, + query: str, + context: Optional[Any] = None, + user_prompt: str = None, + system_prompt: str = None, + only_context: bool = False, + ) -> Any: """ Generates a completion using graph connections context based on a query. @@ -177,6 +184,9 @@ class GraphCompletionRetriever(BaseRetriever): context=context, user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, + user_prompt=user_prompt, + system_prompt=system_prompt, + only_context=only_context, ) if self.save_interaction and context and triplets and completion: diff --git a/cognee/modules/retrieval/utils/completion.py b/cognee/modules/retrieval/utils/completion.py index ca0b30c18..69381d647 100644 --- a/cognee/modules/retrieval/utils/completion.py +++ b/cognee/modules/retrieval/utils/completion.py @@ -6,18 +6,26 @@ async def generate_completion( context: str, user_prompt_path: str, system_prompt_path: str, + user_prompt: str = None, + system_prompt: str = None, + only_context: bool = False, ) -> str: """Generates a completion using LLM with given context and prompts.""" args = {"question": query, "context": context} - user_prompt = LLMGateway.render_prompt(user_prompt_path, args) - system_prompt = LLMGateway.read_query_prompt(system_prompt_path) - - return await LLMGateway.acreate_structured_output( - text_input=user_prompt, - system_prompt=system_prompt, - response_model=str, + user_prompt = LLMGateway.render_prompt(user_prompt if user_prompt else user_prompt_path, args) + system_prompt = LLMGateway.read_query_prompt( + system_prompt if system_prompt else system_prompt_path ) + if only_context: + return context + else: + return await LLMGateway.acreate_structured_output( + text_input=user_prompt, + system_prompt=system_prompt, + response_model=str, + ) + async def summarize_text( text: str, diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index f5f2a793a..3e5d6ffcd 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -101,11 +101,14 @@ async def specific_search( query: str, user: User, system_prompt_path="answer_simple_question.txt", + user_prompt: str = None, + system_prompt: str = None, top_k: int = 10, node_type: Optional[Type] = None, node_name: Optional[List[str]] = None, save_interaction: Optional[bool] = False, last_k: Optional[int] = None, + only_context: bool = None, ) -> list: search_tasks: dict[SearchType, Callable] = { SearchType.SUMMARIES: SummariesRetriever(top_k=top_k).get_completion, @@ -159,7 +162,9 @@ async def specific_search( send_telemetry("cognee.search EXECUTION STARTED", user.id) - results = await search_task(query) + results = await search_task( + query=query, system_prompt=system_prompt, user_prompt=user_prompt, only_context=only_context + ) send_telemetry("cognee.search EXECUTION COMPLETED", user.id) From e4a5869a437eb7836a9484178470774e504cbcf4 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 28 Aug 2025 10:55:26 +0200 Subject: [PATCH 22/73] fix: fix graph promp path in event graph task --- .../extraction/knowledge_graph/extract_event_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py index 0373649f2..667e2eb7d 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py @@ -15,7 +15,7 @@ async def extract_event_graph( llm_config = get_llm_config() - prompt_path = llm_config.graph_prompt_path + prompt_path = llm_config.temporal_graph_prompt_path # Check if the prompt path is an absolute path or just a filename if os.path.isabs(prompt_path): From 2915698d601f8ce84d5d63458d0e8da51794fa67 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 28 Aug 2025 13:43:37 +0200 Subject: [PATCH 23/73] feat: Add only_context and system prompt flags for search --- .../v1/search/routers/get_search_router.py | 6 + cognee/api/v1/search/search.py | 4 + .../modules/retrieval/completion_retriever.py | 18 ++- ..._completion_context_extension_retriever.py | 20 +-- .../graph_completion_cot_retriever.py | 12 +- .../retrieval/graph_completion_retriever.py | 12 +- .../graph_summary_completion_retriever.py | 4 +- .../modules/retrieval/summaries_retriever.py | 2 +- cognee/modules/retrieval/utils/completion.py | 18 +-- cognee/modules/search/methods/search.py | 117 +++++++++++++----- 10 files changed, 140 insertions(+), 73 deletions(-) diff --git a/cognee/api/v1/search/routers/get_search_router.py b/cognee/api/v1/search/routers/get_search_router.py index 0ceeb1abb..b141c6bdc 100644 --- a/cognee/api/v1/search/routers/get_search_router.py +++ b/cognee/api/v1/search/routers/get_search_router.py @@ -20,7 +20,9 @@ class SearchPayloadDTO(InDTO): datasets: Optional[list[str]] = Field(default=None) dataset_ids: Optional[list[UUID]] = Field(default=None, examples=[[]]) query: str = Field(default="What is in the document?") + system_prompt: Optional[str] = Field(default=None) top_k: Optional[int] = Field(default=10) + only_context: bool = Field(default=False) def get_search_router() -> APIRouter: @@ -102,7 +104,9 @@ def get_search_router() -> APIRouter: "datasets": payload.datasets, "dataset_ids": [str(dataset_id) for dataset_id in payload.dataset_ids or []], "query": payload.query, + "system_prompt": payload.system_prompt, "top_k": payload.top_k, + "only_context": payload.only_context, }, ) @@ -115,7 +119,9 @@ def get_search_router() -> APIRouter: user=user, datasets=payload.datasets, dataset_ids=payload.dataset_ids, + system_prompt=payload.system_prompt, top_k=payload.top_k, + only_context=payload.only_context, ) return results diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index f37f8ba6d..113d33557 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -16,11 +16,13 @@ async def search( datasets: Optional[Union[list[str], str]] = None, dataset_ids: Optional[Union[list[UUID], UUID]] = None, system_prompt_path: str = "answer_simple_question.txt", + system_prompt: Optional[str] = None, top_k: int = 10, node_type: Optional[Type] = None, node_name: Optional[List[str]] = None, save_interaction: bool = False, last_k: Optional[int] = None, + only_context: bool = False, ) -> list: """ Search and query the knowledge graph for insights, information, and connections. @@ -183,11 +185,13 @@ async def search( dataset_ids=dataset_ids if dataset_ids else datasets, user=user, system_prompt_path=system_prompt_path, + system_prompt=system_prompt, top_k=top_k, node_type=node_type, node_name=node_name, save_interaction=save_interaction, last_k=last_k, + only_context=only_context, ) return filtered_search_results diff --git a/cognee/modules/retrieval/completion_retriever.py b/cognee/modules/retrieval/completion_retriever.py index e9c8331a1..4d34dfdbe 100644 --- a/cognee/modules/retrieval/completion_retriever.py +++ b/cognee/modules/retrieval/completion_retriever.py @@ -23,12 +23,16 @@ class CompletionRetriever(BaseRetriever): self, user_prompt_path: str = "context_for_question.txt", system_prompt_path: str = "answer_simple_question.txt", + system_prompt: str = None, top_k: Optional[int] = 1, + only_context: bool = False, ): """Initialize retriever with optional custom prompt paths.""" self.user_prompt_path = user_prompt_path self.system_prompt_path = system_prompt_path self.top_k = top_k if top_k is not None else 1 + self.system_prompt = system_prompt + self.only_context = only_context async def get_context(self, query: str) -> str: """ @@ -65,14 +69,7 @@ class CompletionRetriever(BaseRetriever): logger.error("DocumentChunk_text collection not found") raise NoDataError("No data found in the system, please add data first.") from error - async def get_completion( - self, - query: str, - context: Optional[Any] = None, - user_prompt: str = None, - system_prompt: str = None, - only_context: bool = False, - ) -> Any: + async def get_completion(self, query: str, context: Optional[Any] = None) -> Any: """ Generates an LLM completion using the context. @@ -99,8 +96,7 @@ class CompletionRetriever(BaseRetriever): context=context, user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, - user_prompt=user_prompt, - system_prompt=system_prompt, - only_context=only_context, + system_prompt=self.system_prompt, + only_context=self.only_context, ) return [completion] diff --git a/cognee/modules/retrieval/graph_completion_context_extension_retriever.py b/cognee/modules/retrieval/graph_completion_context_extension_retriever.py index f25edb4a7..8bdf5f1a0 100644 --- a/cognee/modules/retrieval/graph_completion_context_extension_retriever.py +++ b/cognee/modules/retrieval/graph_completion_context_extension_retriever.py @@ -26,10 +26,12 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever): self, user_prompt_path: str = "graph_context_for_question.txt", system_prompt_path: str = "answer_simple_question.txt", + system_prompt: Optional[str] = None, top_k: Optional[int] = 5, node_type: Optional[Type] = None, node_name: Optional[List[str]] = None, save_interaction: bool = False, + only_context: bool = False, ): super().__init__( user_prompt_path=user_prompt_path, @@ -38,15 +40,14 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever): node_type=node_type, node_name=node_name, save_interaction=save_interaction, + system_prompt=system_prompt, + only_context=only_context, ) async def get_completion( self, query: str, context: Optional[Any] = None, - user_prompt: str = None, - system_prompt: str = None, - only_context: bool = False, context_extension_rounds=4, ) -> List[str]: """ @@ -92,8 +93,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever): context=context, user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, - user_prompt=user_prompt, - system_prompt=system_prompt, + system_prompt=self.system_prompt, ) triplets += await self.get_triplets(completion) @@ -120,9 +120,8 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever): context=context, user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, - user_prompt=user_prompt, - system_prompt=system_prompt, - only_context=only_context, + system_prompt=self.system_prompt, + only_context=self.only_context, ) if self.save_interaction and context and triplets and completion: @@ -130,4 +129,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever): question=query, answer=completion, context=context, triplets=triplets ) - return [completion] + if self.only_context: + return [context] + else: + return [completion] diff --git a/cognee/modules/retrieval/graph_completion_cot_retriever.py b/cognee/modules/retrieval/graph_completion_cot_retriever.py index 63ab6b3b7..86ff8555b 100644 --- a/cognee/modules/retrieval/graph_completion_cot_retriever.py +++ b/cognee/modules/retrieval/graph_completion_cot_retriever.py @@ -32,14 +32,18 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): validation_system_prompt_path: str = "cot_validation_system_prompt.txt", followup_system_prompt_path: str = "cot_followup_system_prompt.txt", followup_user_prompt_path: str = "cot_followup_user_prompt.txt", + system_prompt: str = None, top_k: Optional[int] = 5, node_type: Optional[Type] = None, node_name: Optional[List[str]] = None, save_interaction: bool = False, + only_context: bool = False, ): super().__init__( user_prompt_path=user_prompt_path, system_prompt_path=system_prompt_path, + system_prompt=system_prompt, + only_context=only_context, top_k=top_k, node_type=node_type, node_name=node_name, @@ -54,9 +58,6 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): self, query: str, context: Optional[Any] = None, - user_prompt: str = None, - system_prompt: str = None, - only_context: bool = False, max_iter=4, ) -> List[str]: """ @@ -98,8 +99,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): context=context, user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, - user_prompt=user_prompt, - system_prompt=system_prompt, + system_prompt=self.system_prompt, ) logger.info(f"Chain-of-thought: round {round_idx} - answer: {completion}") if round_idx < max_iter: @@ -136,7 +136,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): question=query, answer=completion, context=context, triplets=triplets ) - if only_context: + if self.only_context: return [context] else: return [completion] diff --git a/cognee/modules/retrieval/graph_completion_retriever.py b/cognee/modules/retrieval/graph_completion_retriever.py index d88252054..6a5193c56 100644 --- a/cognee/modules/retrieval/graph_completion_retriever.py +++ b/cognee/modules/retrieval/graph_completion_retriever.py @@ -36,15 +36,19 @@ class GraphCompletionRetriever(BaseRetriever): self, user_prompt_path: str = "graph_context_for_question.txt", system_prompt_path: str = "answer_simple_question.txt", + system_prompt: str = None, top_k: Optional[int] = 5, node_type: Optional[Type] = None, node_name: Optional[List[str]] = None, save_interaction: bool = False, + only_context: bool = False, ): """Initialize retriever with prompt paths and search parameters.""" self.save_interaction = save_interaction self.user_prompt_path = user_prompt_path self.system_prompt_path = system_prompt_path + self.system_prompt = system_prompt + self.only_context = only_context self.top_k = top_k if top_k is not None else 5 self.node_type = node_type self.node_name = node_name @@ -155,9 +159,6 @@ class GraphCompletionRetriever(BaseRetriever): self, query: str, context: Optional[Any] = None, - user_prompt: str = None, - system_prompt: str = None, - only_context: bool = False, ) -> Any: """ Generates a completion using graph connections context based on a query. @@ -184,9 +185,8 @@ class GraphCompletionRetriever(BaseRetriever): context=context, user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, - user_prompt=user_prompt, - system_prompt=system_prompt, - only_context=only_context, + system_prompt=self.system_prompt, + only_context=self.only_context, ) if self.save_interaction and context and triplets and completion: diff --git a/cognee/modules/retrieval/graph_summary_completion_retriever.py b/cognee/modules/retrieval/graph_summary_completion_retriever.py index d344ebd26..051f39b22 100644 --- a/cognee/modules/retrieval/graph_summary_completion_retriever.py +++ b/cognee/modules/retrieval/graph_summary_completion_retriever.py @@ -21,6 +21,7 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever): user_prompt_path: str = "graph_context_for_question.txt", system_prompt_path: str = "answer_simple_question.txt", summarize_prompt_path: str = "summarize_search_results.txt", + system_prompt: Optional[str] = None, top_k: Optional[int] = 5, node_type: Optional[Type] = None, node_name: Optional[List[str]] = None, @@ -34,6 +35,7 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever): node_type=node_type, node_name=node_name, save_interaction=save_interaction, + system_prompt=system_prompt, ) self.summarize_prompt_path = summarize_prompt_path @@ -57,4 +59,4 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever): - str: A summary string representing the content of the retrieved edges. """ direct_text = await super().resolve_edges_to_text(retrieved_edges) - return await summarize_text(direct_text, self.summarize_prompt_path) + return await summarize_text(direct_text, self.summarize_prompt_path, self.system_prompt) diff --git a/cognee/modules/retrieval/summaries_retriever.py b/cognee/modules/retrieval/summaries_retriever.py index 56f414013..df35cdc51 100644 --- a/cognee/modules/retrieval/summaries_retriever.py +++ b/cognee/modules/retrieval/summaries_retriever.py @@ -62,7 +62,7 @@ class SummariesRetriever(BaseRetriever): logger.info(f"Returning {len(summary_payloads)} summary payloads") return summary_payloads - async def get_completion(self, query: str, context: Optional[Any] = None) -> Any: + async def get_completion(self, query: str, context: Optional[Any] = None, **kwargs) -> Any: """ Generates a completion using summaries context. diff --git a/cognee/modules/retrieval/utils/completion.py b/cognee/modules/retrieval/utils/completion.py index 69381d647..4c2639517 100644 --- a/cognee/modules/retrieval/utils/completion.py +++ b/cognee/modules/retrieval/utils/completion.py @@ -1,3 +1,4 @@ +from typing import Optional from cognee.infrastructure.llm.LLMGateway import LLMGateway @@ -6,15 +7,15 @@ async def generate_completion( context: str, user_prompt_path: str, system_prompt_path: str, - user_prompt: str = None, - system_prompt: str = None, + user_prompt: Optional[str] = None, + system_prompt: Optional[str] = None, only_context: bool = False, ) -> str: """Generates a completion using LLM with given context and prompts.""" args = {"question": query, "context": context} - user_prompt = LLMGateway.render_prompt(user_prompt if user_prompt else user_prompt_path, args) - system_prompt = LLMGateway.read_query_prompt( - system_prompt if system_prompt else system_prompt_path + user_prompt = user_prompt if user_prompt else LLMGateway.render_prompt(user_prompt_path, args) + system_prompt = ( + system_prompt if system_prompt else LLMGateway.read_query_prompt(system_prompt_path) ) if only_context: @@ -29,10 +30,13 @@ async def generate_completion( async def summarize_text( text: str, - prompt_path: str = "summarize_search_results.txt", + system_prompt_path: str = "summarize_search_results.txt", + system_prompt: str = None, ) -> str: """Summarizes text using LLM with the specified prompt.""" - system_prompt = LLMGateway.read_query_prompt(prompt_path) + system_prompt = ( + system_prompt if system_prompt else LLMGateway.read_query_prompt(system_prompt_path) + ) return await LLMGateway.acreate_structured_output( text_input=text, diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index 3e5d6ffcd..465d0cbb3 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -37,11 +37,13 @@ async def search( dataset_ids: Union[list[UUID], None], user: User, system_prompt_path="answer_simple_question.txt", + system_prompt: Optional[str] = None, top_k: int = 10, node_type: Optional[Type] = None, node_name: Optional[List[str]] = None, save_interaction: Optional[bool] = False, last_k: Optional[int] = None, + only_context: bool = False, ): """ @@ -61,28 +63,34 @@ async def search( # Use search function filtered by permissions if access control is enabled if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true": return await authorized_search( - query_text=query_text, query_type=query_type, + query_text=query_text, user=user, dataset_ids=dataset_ids, system_prompt_path=system_prompt_path, + system_prompt=system_prompt, top_k=top_k, + node_type=node_type, + node_name=node_name, save_interaction=save_interaction, last_k=last_k, + only_context=only_context, ) query = await log_query(query_text, query_type.value, user.id) search_results = await specific_search( - query_type, - query_text, - user, + query_type=query_type, + query_text=query_text, + user=user, system_prompt_path=system_prompt_path, + system_prompt=system_prompt, top_k=top_k, node_type=node_type, node_name=node_name, save_interaction=save_interaction, last_k=last_k, + only_context=only_context, ) await log_result( @@ -98,11 +106,10 @@ async def search( async def specific_search( query_type: SearchType, - query: str, + query_text: str, user: User, - system_prompt_path="answer_simple_question.txt", - user_prompt: str = None, - system_prompt: str = None, + system_prompt_path: str = "answer_simple_question.txt", + system_prompt: Optional[str] = None, top_k: int = 10, node_type: Optional[Type] = None, node_name: Optional[List[str]] = None, @@ -115,7 +122,10 @@ async def specific_search( SearchType.INSIGHTS: InsightsRetriever(top_k=top_k).get_completion, SearchType.CHUNKS: ChunksRetriever(top_k=top_k).get_completion, SearchType.RAG_COMPLETION: CompletionRetriever( - system_prompt_path=system_prompt_path, top_k=top_k + system_prompt_path=system_prompt_path, + top_k=top_k, + system_prompt=system_prompt, + only_context=only_context, ).get_completion, SearchType.GRAPH_COMPLETION: GraphCompletionRetriever( system_prompt_path=system_prompt_path, @@ -123,6 +133,8 @@ async def specific_search( node_type=node_type, node_name=node_name, save_interaction=save_interaction, + system_prompt=system_prompt, + only_context=only_context, ).get_completion, SearchType.GRAPH_COMPLETION_COT: GraphCompletionCotRetriever( system_prompt_path=system_prompt_path, @@ -130,6 +142,8 @@ async def specific_search( node_type=node_type, node_name=node_name, save_interaction=save_interaction, + system_prompt=system_prompt, + only_context=only_context, ).get_completion, SearchType.GRAPH_COMPLETION_CONTEXT_EXTENSION: GraphCompletionContextExtensionRetriever( system_prompt_path=system_prompt_path, @@ -137,6 +151,8 @@ async def specific_search( node_type=node_type, node_name=node_name, save_interaction=save_interaction, + system_prompt=system_prompt, + only_context=only_context, ).get_completion, SearchType.GRAPH_SUMMARY_COMPLETION: GraphSummaryCompletionRetriever( system_prompt_path=system_prompt_path, @@ -144,6 +160,7 @@ async def specific_search( node_type=node_type, node_name=node_name, save_interaction=save_interaction, + system_prompt=system_prompt, ).get_completion, SearchType.CODE: CodeRetriever(top_k=top_k).get_completion, SearchType.CYPHER: CypherSearchRetriever().get_completion, @@ -153,7 +170,7 @@ async def specific_search( # If the query type is FEELING_LUCKY, select the search type intelligently if query_type is SearchType.FEELING_LUCKY: - query_type = await select_search_type(query) + query_type = await select_search_type(query_text) search_task = search_tasks.get(query_type) @@ -162,9 +179,7 @@ async def specific_search( send_telemetry("cognee.search EXECUTION STARTED", user.id) - results = await search_task( - query=query, system_prompt=system_prompt, user_prompt=user_prompt, only_context=only_context - ) + results = await search_task(query=query_text) send_telemetry("cognee.search EXECUTION COMPLETED", user.id) @@ -172,14 +187,18 @@ async def specific_search( async def authorized_search( - query_text: str, query_type: SearchType, - user: User = None, + query_text: str, + user: User, dataset_ids: Optional[list[UUID]] = None, system_prompt_path: str = "answer_simple_question.txt", + system_prompt: Optional[str] = None, top_k: int = 10, - save_interaction: bool = False, + node_type: Optional[Type] = None, + node_name: Optional[List[str]] = None, + save_interaction: Optional[bool] = False, last_k: Optional[int] = None, + only_context: bool = None, ) -> list: """ Verifies access for provided datasets or uses all datasets user has read access for and performs search per dataset. @@ -193,14 +212,18 @@ async def authorized_search( # Searches all provided datasets and handles setting up of appropriate database context based on permissions search_results = await specific_search_by_context( - search_datasets, - query_text, - query_type, - user, - system_prompt_path, - top_k, - save_interaction, + search_datasets=search_datasets, + query_type=query_type, + query_text=query_text, + user=user, + system_prompt_path=system_prompt_path, + system_prompt=system_prompt, + top_k=top_k, + node_type=node_type, + node_name=node_name, + save_interaction=save_interaction, last_k=last_k, + only_context=only_context, ) await log_result(query.id, json.dumps(search_results, cls=JSONEncoder), user.id) @@ -210,13 +233,17 @@ async def authorized_search( async def specific_search_by_context( search_datasets: list[Dataset], - query_text: str, query_type: SearchType, + query_text: str, user: User, - system_prompt_path: str, - top_k: int, - save_interaction: bool = False, + system_prompt_path: str = "answer_simple_question.txt", + system_prompt: Optional[str] = None, + top_k: int = 10, + node_type: Optional[Type] = None, + node_name: Optional[List[str]] = None, + save_interaction: Optional[bool] = False, last_k: Optional[int] = None, + only_context: bool = None, ): """ Searches all provided datasets and handles setting up of appropriate database context based on permissions. @@ -224,18 +251,33 @@ async def specific_search_by_context( """ async def _search_by_context( - dataset, user, query_type, query_text, system_prompt_path, top_k, last_k + dataset: Dataset, + query_type: SearchType, + query_text: str, + user: User, + system_prompt_path: str = "answer_simple_question.txt", + system_prompt: Optional[str] = None, + top_k: int = 10, + node_type: Optional[Type] = None, + node_name: Optional[List[str]] = None, + save_interaction: Optional[bool] = False, + last_k: Optional[int] = None, + only_context: bool = None, ): # Set database configuration in async context for each dataset user has access for await set_database_global_context_variables(dataset.id, dataset.owner_id) search_results = await specific_search( - query_type, - query_text, - user, + query_type=query_type, + query_text=query_text, + user=user, system_prompt_path=system_prompt_path, + system_prompt=system_prompt, top_k=top_k, + node_type=node_type, + node_name=node_name, save_interaction=save_interaction, last_k=last_k, + only_context=only_context, ) return { "search_result": search_results, @@ -248,7 +290,18 @@ async def specific_search_by_context( for dataset in search_datasets: tasks.append( _search_by_context( - dataset, user, query_type, query_text, system_prompt_path, top_k, last_k + dataset=dataset, + query_type=query_type, + query_text=query_text, + user=user, + system_prompt_path=system_prompt_path, + system_prompt=system_prompt, + top_k=top_k, + node_type=node_type, + node_name=node_name, + save_interaction=save_interaction, + last_k=last_k, + only_context=only_context, ) ) From 7fd5e1e0104c061e056c5e97a4b0ea04effa45dd Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 28 Aug 2025 13:53:08 +0200 Subject: [PATCH 24/73] fix: Make custom_prompt be None by default --- cognee/api/v1/cognify/routers/get_cognify_router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/api/v1/cognify/routers/get_cognify_router.py b/cognee/api/v1/cognify/routers/get_cognify_router.py index 6809f089a..d40345f8e 100644 --- a/cognee/api/v1/cognify/routers/get_cognify_router.py +++ b/cognee/api/v1/cognify/routers/get_cognify_router.py @@ -38,7 +38,7 @@ class CognifyPayloadDTO(InDTO): dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]]) run_in_background: Optional[bool] = Field(default=False) custom_prompt: Optional[str] = Field( - default=None, description="Custom prompt for entity extraction and graph generation" + default="", description="Custom prompt for entity extraction and graph generation" ) From 15155520dd8a83c1aa9b1fc630f418dd0043daf3 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 28 Aug 2025 17:03:47 +0200 Subject: [PATCH 25/73] feat: adds temporal retriever --- .../databases/graph/kuzu/adapter.py | 135 +++++++++++++++ .../databases/graph/neo4j_driver/adapter.py | 89 ++++++++++ .../llm/prompts/extract_query_time.txt | 15 ++ .../modules/retrieval/temporal_retriever.py | 156 ++++++++++++++++++ cognee/modules/search/methods/search.py | 2 + cognee/modules/search/types/SearchType.py | 1 + 6 files changed, 398 insertions(+) create mode 100644 cognee/infrastructure/llm/prompts/extract_query_time.txt create mode 100644 cognee/modules/retrieval/temporal_retriever.py diff --git a/cognee/infrastructure/databases/graph/kuzu/adapter.py b/cognee/infrastructure/databases/graph/kuzu/adapter.py index 70bcf2053..085d7cd00 100644 --- a/cognee/infrastructure/databases/graph/kuzu/adapter.py +++ b/cognee/infrastructure/databases/graph/kuzu/adapter.py @@ -21,6 +21,8 @@ from cognee.infrastructure.databases.graph.graph_db_interface import ( ) from cognee.infrastructure.engine import DataPoint from cognee.modules.storage.utils import JSONEncoder +from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int +from cognee.tasks.temporal_graph.models import Timestamp logger = get_logger() @@ -106,6 +108,18 @@ class KuzuAdapter(GraphDBInterface): self.db.init_database() self.connection = Connection(self.db) + + try: + self.connection.execute("INSTALL JSON;") + except Exception as e: + logger.info(f"JSON extension already installed or not needed: {e}") + + try: + self.connection.execute("LOAD EXTENSION JSON;") + logger.info("Loaded JSON extension") + except Exception as e: + logger.info(f"JSON extension already loaded or unavailable: {e}") + # Create node table with essential fields and timestamp self.connection.execute(""" CREATE NODE TABLE IF NOT EXISTS Node( @@ -1693,3 +1707,124 @@ class KuzuAdapter(GraphDBInterface): SET r.properties = $props """ await self.query(update_query, {"node_id": node_id, "props": new_props}) + + async def collect_events(self, ids: List[str]) -> Any: + """ + Collect all Event-type nodes reachable within 1..2 hops + from the given node IDs. + + Args: + graph_engine: Object exposing an async .query(str) -> Any + ids: List of node IDs (strings) + + Returns: + List of events + """ + + event_collection_cypher = """UNWIND [{quoted}] AS uid + MATCH (start {{id: uid}}) + MATCH (start)-[*1..2]-(event) + WHERE event.type = 'Event' + WITH DISTINCT event + RETURN collect(event) AS events; + """ + + query = event_collection_cypher.format(quoted=ids) + result = await self.query(query) + events = [] + for node in result[0][0]: + props = json.loads(node["properties"]) + + event = { + "id": node["id"], + "name": node["name"], + "description": props.get("description"), + } + + if props.get("location"): + event["location"] = props["location"] + + events.append(event) + + return [{"events": events}] + + async def collect_time_ids( + self, + time_from: Optional[Timestamp] = None, + time_to: Optional[Timestamp] = None, + ) -> str: + """ + Collect IDs of Timestamp nodes between time_from and time_to. + + Args: + graph_engine: Object exposing an async .query(query, params) -> list[dict] + time_from: Lower bound int (inclusive), optional + time_to: Upper bound int (inclusive), optional + + Returns: + A string of quoted IDs: "'id1', 'id2', 'id3'" + (ready for use in a Cypher UNWIND clause). + """ + + ids: List[str] = [] + + if time_from and time_to: + time_from = date_to_int(time_from) + time_to = date_to_int(time_to) + + cypher = f""" + MATCH (n:Node) + WHERE n.type = 'Timestamp' + // Extract time_at from the JSON string and cast to INT64 + WITH n, json_extract(n.properties, '$.time_at') AS t_str + WITH n, + CASE + WHEN t_str IS NULL OR t_str = '' THEN NULL + ELSE CAST(t_str AS INT64) + END AS t + WHERE t >= {time_from} + AND t <= {time_to} + RETURN n.id as id + """ + + elif time_from: + time_from = date_to_int(time_from) + + cypher = f""" + MATCH (n:Node) + WHERE n.type = 'Timestamp' + // Extract time_at from the JSON string and cast to INT64 + WITH n, json_extract(n.properties, '$.time_at') AS t_str + WITH n, + CASE + WHEN t_str IS NULL OR t_str = '' THEN NULL + ELSE CAST(t_str AS INT64) + END AS t + WHERE t >= {time_from} + RETURN n.id as id + """ + + elif time_to: + time_to = date_to_int(time_to) + + cypher = f""" + MATCH (n:Node) + WHERE n.type = 'Timestamp' + // Extract time_at from the JSON string and cast to INT64 + WITH n, json_extract(n.properties, '$.time_at') AS t_str + WITH n, + CASE + WHEN t_str IS NULL OR t_str = '' THEN NULL + ELSE CAST(t_str AS INT64) + END AS t + WHERE t <= {time_to} + RETURN n.id as id + """ + + else: + return ids + + time_nodes = await self.query(cypher) + time_ids_list = [item[0] for item in time_nodes] + + return ", ".join(f"'{uid}'" for uid in time_ids_list) diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py index f36296970..03b16eb33 100644 --- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py @@ -11,6 +11,8 @@ from contextlib import asynccontextmanager from typing import Optional, Any, List, Dict, Type, Tuple from cognee.infrastructure.engine import DataPoint +from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int +from cognee.tasks.temporal_graph.models import Timestamp from cognee.shared.logging_utils import get_logger, ERROR from cognee.infrastructure.databases.graph.graph_db_interface import ( GraphDBInterface, @@ -1371,3 +1373,90 @@ class Neo4jAdapter(GraphDBInterface): query, params={"weight": float(weight), "node_ids": list(node_ids)}, ) + + async def collect_events(self, ids: List[str]) -> Any: + """ + Collect all Event-type nodes reachable within 1..2 hops + from the given node IDs. + + Args: + graph_engine: Object exposing an async .query(str) -> Any + ids: List of node IDs (strings) + + Returns: + List of events + """ + + event_collection_cypher = """UNWIND [{quoted}] AS uid + MATCH (start {{id: uid}}) + MATCH (start)-[*1..2]-(event) + WHERE event.type = 'Event' + WITH DISTINCT event + RETURN collect(event) AS events; + """ + + query = event_collection_cypher.format(quoted=ids) + return await self.query(query) + + async def collect_time_ids( + self, + time_from: Optional[Timestamp] = None, + time_to: Optional[Timestamp] = None, + ) -> str: + """ + Collect IDs of Timestamp nodes between time_from and time_to. + + Args: + graph_engine: Object exposing an async .query(query, params) -> list[dict] + time_from: Lower bound int (inclusive), optional + time_to: Upper bound int (inclusive), optional + + Returns: + A string of quoted IDs: "'id1', 'id2', 'id3'" + (ready for use in a Cypher UNWIND clause). + """ + + ids: List[str] = [] + + if time_from and time_to: + time_from = date_to_int(time_from) + time_to = date_to_int(time_to) + + cypher = """ + MATCH (n) + WHERE n.type = 'Timestamp' + AND n.time_at >= $time_from + AND n.time_at <= $time_to + RETURN n.id AS id + """ + params = {"time_from": time_from, "time_to": time_to} + + elif time_from: + time_from = date_to_int(time_from) + + cypher = """ + MATCH (n) + WHERE n.type = 'Timestamp' + AND n.time_at >= $time_from + RETURN n.id AS id + """ + params = {"time_from": time_from} + + elif time_to: + time_to = date_to_int(time_to) + + cypher = """ + MATCH (n) + WHERE n.type = 'Timestamp' + AND n.time_at <= $time_to + RETURN n.id AS id + """ + params = {"time_to": time_to} + + else: + return ids + + time_nodes = await self.query(cypher, params) + time_ids_list = [item["id"] for item in time_nodes if "id" in item] + + return ", ".join(f"'{uid}'" for uid in time_ids_list) diff --git a/cognee/infrastructure/llm/prompts/extract_query_time.txt b/cognee/infrastructure/llm/prompts/extract_query_time.txt new file mode 100644 index 000000000..763d0e1c4 --- /dev/null +++ b/cognee/infrastructure/llm/prompts/extract_query_time.txt @@ -0,0 +1,15 @@ +For the purposes of identifying timestamps in a query, you are tasked with extracting relevant timestamps from the query. +## Timestamp requirements +- If the query contains interval extrack both starts_at and ends_at properties +- If the query contains an instantaneous timestamp, starts_at and ends_at should be the same +- If the query its open-ended (before 2009 or after 2009), the corresponding non defined end of the time should be none + -For example: "before 2009" -- starts_at: None, ends_at: 2009 or "after 2009" -- starts_at: 2009, ends_at: None +- Put always the data that comes first in time as starts_at and the timestamps that comes second in time as ends_at +- If starts_at or ends_at cannot be extracted both of them has to be None +## Output Format +Your reply should be a JSON: list of dictionaries with the following structure: +```python +class QueryInterval(BaseModel): + starts_at: Optional[Timestamp] = None + ends_at: Optional[Timestamp] = None +``` \ No newline at end of file diff --git a/cognee/modules/retrieval/temporal_retriever.py b/cognee/modules/retrieval/temporal_retriever.py new file mode 100644 index 000000000..3ea402080 --- /dev/null +++ b/cognee/modules/retrieval/temporal_retriever.py @@ -0,0 +1,156 @@ +import os +from typing import Any, Optional, List, Type + +from poetry.console.commands import self +from operator import itemgetter +from cognee.infrastructure.databases.vector import get_vector_engine +from cognee.modules.retrieval.utils.completion import generate_completion +from cognee.infrastructure.databases.graph import get_graph_engine +from cognee.infrastructure.llm import LLMGateway +from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int +from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever +from cognee.shared.logging_utils import get_logger + + +from cognee.tasks.temporal_graph.models import QueryInterval +from cognee.infrastructure.llm.config import ( + get_llm_config, +) + +logger = get_logger() + + +class TemporalRetriever(GraphCompletionRetriever): + """ + Handles graph completion by generating responses based on a series of interactions with + a language model. This class extends from GraphCompletionRetriever and is designed to + manage the retrieval and validation process for user queries, integrating follow-up + questions based on reasoning. The public methods are: + + - get_completion + + Instance variables include: + - validation_system_prompt_path + - validation_user_prompt_path + - followup_system_prompt_path + - followup_user_prompt_path + """ + + def __init__( + self, + user_prompt_path: str = "graph_context_for_question.txt", + system_prompt_path: str = "answer_simple_question.txt", + time_extraction_prompt_path: str = "extract_query_time.txt", + top_k: Optional[int] = 5, + node_type: Optional[Type] = None, + node_name: Optional[List[str]] = None, + save_interaction: bool = False, + ): + super().__init__( + user_prompt_path=user_prompt_path, + system_prompt_path=system_prompt_path, + top_k=top_k, + node_type=node_type, + node_name=node_name, + ) + self.user_prompt_path = user_prompt_path + self.system_prompt_path = system_prompt_path + self.time_extraction_prompt_path = time_extraction_prompt_path + self.top_k = top_k if top_k is not None else 5 + self.node_type = node_type + self.node_name = node_name + + def descriptions_to_string(self, results): + descs = [] + for entry in results: + d = entry.get("description") + if d: + descs.append(d.strip()) + return "\n#####################\n".join(descs) + + async def extract_time_from_query(self, query: str): + prompt_path = self.time_extraction_prompt_path + + if os.path.isabs(prompt_path): + base_directory = os.path.dirname(prompt_path) + prompt_path = os.path.basename(prompt_path) + else: + base_directory = None + + system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory) + + interval = await LLMGateway.acreate_structured_output(query, system_prompt, QueryInterval) + + time_from = interval.starts_at + time_to = interval.ends_at + + return time_from, time_to + + async def filter_top_k_events(self, relevant_events, scored_results): + # Build a score lookup from vector search results + score_lookup = {res.payload["id"]: res.score for res in scored_results} + + events_with_scores = [] + for event in relevant_events[0]["events"]: + score = score_lookup.get(event["id"], float("inf")) + events_with_scores.append({**event, "score": score}) + + events_with_scores.sort(key=itemgetter("score")) + + top_events = events_with_scores[: self.top_k] + + return events_with_scores[: self.top_k] + + async def get_context(self, query: str) -> Any: + """Retrieves context based on the query.""" + + time_from, time_to = await self.extract_time_from_query(query) + + graph_engine = await get_graph_engine() + + if time_from and time_to: + ids = await graph_engine.collect_time_ids(time_from=time_from, time_to=time_to) + elif time_from: + ids = await graph_engine.collect_time_ids(time_from=time_from) + elif time_to: + ids = await graph_engine.collect_time_ids(time_to=time_to) + else: + logger.info( + "No timestamps identified based on the query, performing retrieval using triplet search on events and entities." + ) + triplets = await self.get_triplets(query) + return await self.resolve_edges_to_text(triplets) + + if ids: + relevant_events = await graph_engine.collect_events(ids=ids) + else: + logger.info( + "No events identified based on timestamp filtering, performing retrieval using triplet search on events and entities." + ) + triplets = await self.get_triplets(query) + return await self.resolve_edges_to_text(triplets) + + vector_engine = get_vector_engine() + query_vector = (await vector_engine.embedding_engine.embed_text([query]))[0] + + vector_search_results = await vector_engine.search( + collection_name="Event_name", query_vector=query_vector, limit=0 + ) + + top_k_events = await self.filter_top_k_events(relevant_events, vector_search_results) + + return self.descriptions_to_string(top_k_events) + + async def get_completion(self, query: str, context: Optional[Any] = None) -> Any: + """Generates a response using the query and optional context.""" + + context = await self.get_context(query=query) + + completion = await generate_completion( + query=query, + context=context, + user_prompt_path=self.user_prompt_path, + system_prompt_path=self.system_prompt_path, + ) + + return [completion] diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index f5f2a793a..6c0aa6a1d 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -12,6 +12,7 @@ from cognee.modules.retrieval.insights_retriever import InsightsRetriever from cognee.modules.retrieval.summaries_retriever import SummariesRetriever from cognee.modules.retrieval.completion_retriever import CompletionRetriever from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever +from cognee.modules.retrieval.temporal_retriever import TemporalRetriever from cognee.modules.retrieval.graph_summary_completion_retriever import ( GraphSummaryCompletionRetriever, ) @@ -146,6 +147,7 @@ async def specific_search( SearchType.CYPHER: CypherSearchRetriever().get_completion, SearchType.NATURAL_LANGUAGE: NaturalLanguageRetriever().get_completion, SearchType.FEEDBACK: UserQAFeedback(last_k=last_k).add_feedback, + SearchType.TEMPORAL: TemporalRetriever(top_k=top_k).get_completion, } # If the query type is FEELING_LUCKY, select the search type intelligently diff --git a/cognee/modules/search/types/SearchType.py b/cognee/modules/search/types/SearchType.py index c1f0521b2..a9b7989fe 100644 --- a/cognee/modules/search/types/SearchType.py +++ b/cognee/modules/search/types/SearchType.py @@ -15,3 +15,4 @@ class SearchType(Enum): GRAPH_COMPLETION_CONTEXT_EXTENSION = "GRAPH_COMPLETION_CONTEXT_EXTENSION" FEELING_LUCKY = "FEELING_LUCKY" FEEDBACK = "FEEDBACK" + TEMPORAL = "TEMPORAL" From 8747c0a2b029c1fa6fb981fff5601cd80e56d4f7 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 28 Aug 2025 17:04:20 +0200 Subject: [PATCH 26/73] feat: adds temporal example --- examples/python/temporal_example.py | 54 ++++++++++------------------- 1 file changed, 18 insertions(+), 36 deletions(-) diff --git a/examples/python/temporal_example.py b/examples/python/temporal_example.py index c4c1c9875..4b54b72ed 100644 --- a/examples/python/temporal_example.py +++ b/examples/python/temporal_example.py @@ -1,10 +1,7 @@ import asyncio import cognee from cognee.shared.logging_utils import setup_logging, INFO - - -import json -from pathlib import Path +from cognee.api.v1.search import SearchType biography_1 = """ @@ -32,25 +29,6 @@ biography_1 = """ Bangkok Glass . In 2013 , he moved from Buriram United to Bangkok Glass F.C. . - Personal life . - Attaphols sons , Wannaphon Buspakom and Kanokpon Buspakom , are professional footballers . - - Honours . - Player . - Thai Port - Kor Royal Cup - Winners ( 2 ) : 1985 , 1990 - Pahang FA - Malaysia Super League - Champions ( 1 ) : 1995 - Thailand - Sea Games - Gold Medal ( 1 ) ; 1993 - Silver Medal ( 1 ) ; 1991 - - Manager . - BEC Tero Sasana - AFC Champions League - Runner-up ( 1 ) : 2002-03 - - ASEAN Club Championship - Runner-up ( 1 ) : 2003 - Muangthong United - Thai Premier League - Champions ( 1 ) : 2009 - Buriram United - Thai Premier League - Champions ( 1 ) : 2011 - - Thai FA Cup - Winners ( 2 ) : 2011 , 2012 - - Thai League Cup - Winners ( 2 ) : 2011 , 2012 - - Toyota Premier Cup - Winner ( 1 ) : 2011 - - Kor Royal Cup - Winner ( 1 ) : 2013 - Individual - Thai Premier League Coach of the Year ( 3 ) : 2001-02 , 2009 , 2013 """ @@ -69,11 +47,7 @@ biography_2 = """ Personal life . In 1918 he had married the singer Hildur Arntzen ( 1888–1957 ) . Their marriage was dissolved in 1939 . In 1940 , he married Bartholine Eufemia Leganger ( 1903–1995 ) . They separated shortly after , and were officially divorced in 1945 . Øverland was married to journalist Margrete Aamot Øverland ( 1913–1978 ) during June 1945 . In 1946 , the Norwegian Parliament arranged for Arnulf and Margrete Aamot Øverland to reside at the Grotten . He lived there until his death in 1968 and she lived there for another ten years until her death in 1978 . Arnulf Øverland was buried at Vår Frelsers Gravlund in Oslo . Joseph Grimeland designed the bust of Arnulf Øverland ( bronze , 1970 ) at his grave site . - - Famous Quotes . - - “For a “monotheistic” religion it should be sufficient with three gods.” - - “What is there to be said about a Church which certainly promises its believers eternal salvation , but at the same time condemns the non-believers , all those who think differently , to an eternal torment in hell ? – If that Church absolutely must talk about love , then it should do so very quietly.” - + Selected Works . - Den ensomme fest ( 1911 ) - Berget det blå ( 1927 ) @@ -88,13 +62,6 @@ biography_2 = """ - Dobloug Prize ( 1951 ) - Mads Wiel Nygaards legat ( 1961 ) - Other sources . - - Hambro , Carl ( 1984 ) Arnulf Øverland : det brennende hjerte ( Oslo : Aschehoug ) - - External links . - - Du må ikke sove ! - - Translation of Du må ikke sove by Lars-Toralf Storstrand - - Kristendommen , den tiende landeplage - Christianity , the tenth plague """ @@ -105,7 +72,22 @@ async def main(): await cognee.add([biography_1, biography_2]) await cognee.cognify(temporal_cognify=True) - print() + queries = [ + "What happened before 1980?", + "What happened after 2010?", + "What happened between 2000 and 2006?", + "What happened between 1903 and 1995, I am interested in the Selected Works of Arnulf Øverland Ole Peter Arnulf Øverland?", + "Who is Attaphol Buspakom Attaphol Buspakom?", + ] + + for query_text in queries: + search_results = await cognee.search( + query_type=SearchType.TEMPORAL, + query_text=query_text, + top_k=15, + ) + print(f"Query: {query_text}") + print(f"Results: {search_results}\n") if __name__ == "__main__": From 96a32dfc67a83e73229dcef31420f364a15e4460 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 28 Aug 2025 17:06:41 +0200 Subject: [PATCH 27/73] chore: removes auto importent useless lib --- cognee/modules/retrieval/temporal_retriever.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cognee/modules/retrieval/temporal_retriever.py b/cognee/modules/retrieval/temporal_retriever.py index 3ea402080..63ad86d40 100644 --- a/cognee/modules/retrieval/temporal_retriever.py +++ b/cognee/modules/retrieval/temporal_retriever.py @@ -1,21 +1,17 @@ import os from typing import Any, Optional, List, Type -from poetry.console.commands import self + from operator import itemgetter from cognee.infrastructure.databases.vector import get_vector_engine from cognee.modules.retrieval.utils.completion import generate_completion from cognee.infrastructure.databases.graph import get_graph_engine from cognee.infrastructure.llm import LLMGateway -from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever from cognee.shared.logging_utils import get_logger from cognee.tasks.temporal_graph.models import QueryInterval -from cognee.infrastructure.llm.config import ( - get_llm_config, -) logger = get_logger() From 90faf22dd0a7f2a7d17bc06ba9be5b44f1c1b17c Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 28 Aug 2025 17:08:40 +0200 Subject: [PATCH 28/73] chore: removing unused var --- cognee/modules/retrieval/temporal_retriever.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cognee/modules/retrieval/temporal_retriever.py b/cognee/modules/retrieval/temporal_retriever.py index 63ad86d40..61881bf7e 100644 --- a/cognee/modules/retrieval/temporal_retriever.py +++ b/cognee/modules/retrieval/temporal_retriever.py @@ -93,8 +93,6 @@ class TemporalRetriever(GraphCompletionRetriever): events_with_scores.sort(key=itemgetter("score")) - top_events = events_with_scores[: self.top_k] - return events_with_scores[: self.top_k] async def get_context(self, query: str) -> Any: From 966e676d610a38b1607ce415ec8b9d620cf5cec2 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 28 Aug 2025 17:23:15 +0200 Subject: [PATCH 29/73] refactor: Have search prompt be empty string by default --- cognee/api/v1/search/routers/get_search_router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/api/v1/search/routers/get_search_router.py b/cognee/api/v1/search/routers/get_search_router.py index b141c6bdc..39a896dd8 100644 --- a/cognee/api/v1/search/routers/get_search_router.py +++ b/cognee/api/v1/search/routers/get_search_router.py @@ -20,7 +20,7 @@ class SearchPayloadDTO(InDTO): datasets: Optional[list[str]] = Field(default=None) dataset_ids: Optional[list[UUID]] = Field(default=None, examples=[[]]) query: str = Field(default="What is in the document?") - system_prompt: Optional[str] = Field(default=None) + system_prompt: Optional[str] = Field(default="") top_k: Optional[int] = Field(default=10) only_context: bool = Field(default=False) From cf636ba77f08665ce075235c5571eabc45c559be Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 28 Aug 2025 18:37:44 +0200 Subject: [PATCH 30/73] feat: Enable nodesets on backend --- cognee/api/v1/add/routers/get_add_router.py | 38 ++++--------------- .../v1/search/routers/get_search_router.py | 3 ++ 2 files changed, 11 insertions(+), 30 deletions(-) diff --git a/cognee/api/v1/add/routers/get_add_router.py b/cognee/api/v1/add/routers/get_add_router.py index 66b165a38..8424a4fb5 100644 --- a/cognee/api/v1/add/routers/get_add_router.py +++ b/cognee/api/v1/add/routers/get_add_router.py @@ -25,6 +25,7 @@ def get_add_router() -> APIRouter: data: List[UploadFile] = File(default=None), datasetName: Optional[str] = Form(default=None), datasetId: Union[UUID, Literal[""], None] = Form(default=None, examples=[""]), + node_set: Optional[List[str]] = Form(default=[""], example=[""]), user: User = Depends(get_authenticated_user), ): """ @@ -65,9 +66,7 @@ def get_add_router() -> APIRouter: send_telemetry( "Add API Endpoint Invoked", user.id, - additional_properties={ - "endpoint": "POST /v1/add", - }, + additional_properties={"endpoint": "POST /v1/add", "node_set": node_set}, ) from cognee.api.v1.add import add as cognee_add @@ -76,34 +75,13 @@ def get_add_router() -> APIRouter: raise ValueError("Either datasetId or datasetName must be provided.") try: - if ( - isinstance(data, str) - and data.startswith("http") - and (os.getenv("ALLOW_HTTP_REQUESTS", "true").lower() == "true") - ): - if "github" in data: - # Perform git clone if the URL is from GitHub - repo_name = data.split("/")[-1].replace(".git", "") - subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True) - # TODO: Update add call with dataset info - await cognee_add( - "data://.data/", - f"{repo_name}", - ) - else: - # Fetch and store the data from other types of URL using curl - response = requests.get(data) - response.raise_for_status() + add_run = await cognee_add( + data, datasetName, user=user, dataset_id=datasetId, node_set=node_set + ) - file_data = await response.content() - # TODO: Update add call with dataset info - return await cognee_add(file_data) - else: - add_run = await cognee_add(data, datasetName, user=user, dataset_id=datasetId) - - if isinstance(add_run, PipelineRunErrored): - return JSONResponse(status_code=420, content=add_run.model_dump(mode="json")) - return add_run.model_dump() + if isinstance(add_run, PipelineRunErrored): + return JSONResponse(status_code=420, content=add_run.model_dump(mode="json")) + return add_run.model_dump() except Exception as error: return JSONResponse(status_code=409, content={"error": str(error)}) diff --git a/cognee/api/v1/search/routers/get_search_router.py b/cognee/api/v1/search/routers/get_search_router.py index 0ceeb1abb..961532a06 100644 --- a/cognee/api/v1/search/routers/get_search_router.py +++ b/cognee/api/v1/search/routers/get_search_router.py @@ -20,6 +20,7 @@ class SearchPayloadDTO(InDTO): datasets: Optional[list[str]] = Field(default=None) dataset_ids: Optional[list[UUID]] = Field(default=None, examples=[[]]) query: str = Field(default="What is in the document?") + node_name: Optional[list[str]] = Field(default=None, example=[]) top_k: Optional[int] = Field(default=10) @@ -102,6 +103,7 @@ def get_search_router() -> APIRouter: "datasets": payload.datasets, "dataset_ids": [str(dataset_id) for dataset_id in payload.dataset_ids or []], "query": payload.query, + "node_name": payload.node_name, "top_k": payload.top_k, }, ) @@ -115,6 +117,7 @@ def get_search_router() -> APIRouter: user=user, datasets=payload.datasets, dataset_ids=payload.dataset_ids, + node_name=payload.node_name, top_k=payload.top_k, ) From 5bfae7a36b10b746c167a4895d108130f9a62a2a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 29 Aug 2025 10:30:49 +0200 Subject: [PATCH 31/73] refactor: Resolve unit tests failing for search --- cognee/modules/search/methods/search.py | 2 +- .../unit/modules/search/search_methods_test.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index 465d0cbb3..2db105d71 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -179,7 +179,7 @@ async def specific_search( send_telemetry("cognee.search EXECUTION STARTED", user.id) - results = await search_task(query=query_text) + results = await search_task(query_text) send_telemetry("cognee.search EXECUTION COMPLETED", user.id) diff --git a/cognee/tests/unit/modules/search/search_methods_test.py b/cognee/tests/unit/modules/search/search_methods_test.py index 46995d087..9833a770b 100644 --- a/cognee/tests/unit/modules/search/search_methods_test.py +++ b/cognee/tests/unit/modules/search/search_methods_test.py @@ -58,15 +58,17 @@ async def test_search( # Verify mock_log_query.assert_called_once_with(query_text, query_type.value, mock_user.id) mock_specific_search.assert_called_once_with( - query_type, - query_text, - mock_user, + query_type=query_type, + query_text=query_text, + user=mock_user, system_prompt_path="answer_simple_question.txt", + system_prompt=None, top_k=10, node_type=None, node_name=None, save_interaction=False, last_k=None, + only_context=False, ) # Verify result logging @@ -201,7 +203,10 @@ async def test_specific_search_feeling_lucky( if retriever_name == "CompletionRetriever": mock_retriever_class.assert_called_once_with( - system_prompt_path="answer_simple_question.txt", top_k=top_k + system_prompt_path="answer_simple_question.txt", + top_k=top_k, + system_prompt=None, + only_context=None, ) else: mock_retriever_class.assert_called_once_with(top_k=top_k) From b06fe395b32e55a7a70349e8740e5911e9442f83 Mon Sep 17 00:00:00 2001 From: gneeraj2001 Date: Fri, 29 Aug 2025 02:06:43 -0700 Subject: [PATCH 32/73] Fix path handling consistency Signed-off-by: gneeraj2001 --- cognee/base_config.py | 15 ++- .../infrastructure/databases/graph/config.py | 16 ++- .../infrastructure/databases/vector/config.py | 21 ++-- cognee/root_dir.py | 28 +++++ cognee/tests/test_path_config.py | 114 ++++++++++++++++++ 5 files changed, 182 insertions(+), 12 deletions(-) create mode 100644 cognee/tests/test_path_config.py diff --git a/cognee/base_config.py b/cognee/base_config.py index aa0b14008..d80e6197f 100644 --- a/cognee/base_config.py +++ b/cognee/base_config.py @@ -1,15 +1,28 @@ import os from typing import Optional from functools import lru_cache -from cognee.root_dir import get_absolute_path +from cognee.root_dir import get_absolute_path, ensure_absolute_path from cognee.modules.observability.observers import Observer from pydantic_settings import BaseSettings, SettingsConfigDict +import pydantic class BaseConfig(BaseSettings): data_root_directory: str = get_absolute_path(".data_storage") system_root_directory: str = get_absolute_path(".cognee_system") monitoring_tool: object = Observer.LANGFUSE + + @pydantic.model_validator(mode="after") + def validate_paths(cls, values): + # Require absolute paths for root directories + values.data_root_directory = ensure_absolute_path( + values.data_root_directory, allow_relative=False + ) + values.system_root_directory = ensure_absolute_path( + values.system_root_directory, allow_relative=False + ) + return values + langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY") langfuse_secret_key: Optional[str] = os.getenv("LANGFUSE_SECRET_KEY") langfuse_host: Optional[str] = os.getenv("LANGFUSE_HOST") diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py index cdc001863..60c193d91 100644 --- a/cognee/infrastructure/databases/graph/config.py +++ b/cognee/infrastructure/databases/graph/config.py @@ -6,6 +6,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict import pydantic from pydantic import Field from cognee.base_config import get_base_config +from cognee.root_dir import ensure_absolute_path from cognee.shared.data_models import KnowledgeGraph @@ -51,15 +52,22 @@ class GraphConfig(BaseSettings): @pydantic.model_validator(mode="after") def fill_derived(cls, values): provider = values.graph_database_provider.lower() + base_config = get_base_config() # Set default filename if no filename is provided if not values.graph_filename: values.graph_filename = f"cognee_graph_{provider}" - # Set file path based on graph database provider if no file path is provided - if not values.graph_file_path: - base_config = get_base_config() - + # Handle graph file path + if values.graph_file_path: + # Convert relative paths to absolute using system_root_directory as base + values.graph_file_path = ensure_absolute_path( + values.graph_file_path, + base_path=base_config.system_root_directory, + allow_relative=True + ) + else: + # Default path databases_directory_path = os.path.join(base_config.system_root_directory, "databases") values.graph_file_path = os.path.join(databases_directory_path, values.graph_filename) diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py index 07a3d1e05..ed846a54b 100644 --- a/cognee/infrastructure/databases/vector/config.py +++ b/cognee/infrastructure/databases/vector/config.py @@ -4,6 +4,7 @@ from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict from cognee.base_config import get_base_config +from cognee.root_dir import ensure_absolute_path class VectorConfig(BaseSettings): @@ -11,12 +12,10 @@ class VectorConfig(BaseSettings): Manage the configuration settings for the vector database. Public methods: - - to_dict: Convert the configuration to a dictionary. Instance variables: - - - vector_db_url: The URL of the vector database. + - vector_db_url: The URL of the vector database. Can be relative to system_root_directory. - vector_db_port: The port for the vector database. - vector_db_key: The key for accessing the vector database. - vector_db_provider: The provider for the vector database. @@ -30,10 +29,18 @@ class VectorConfig(BaseSettings): model_config = SettingsConfigDict(env_file=".env", extra="allow") @pydantic.model_validator(mode="after") - def fill_derived(cls, values): - # Set file path based on graph database provider if no file path is provided - if not values.vector_db_url: - base_config = get_base_config() + def validate_paths(cls, values): + base_config = get_base_config() + + if values.vector_db_url: + # Convert relative paths to absolute using system_root_directory as base + values.vector_db_url = ensure_absolute_path( + values.vector_db_url, + base_path=base_config.system_root_directory, + allow_relative=True, + ) + else: + # Default path databases_directory_path = os.path.join(base_config.system_root_directory, "databases") values.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb") diff --git a/cognee/root_dir.py b/cognee/root_dir.py index 2e21d5ce3..73afd0c12 100644 --- a/cognee/root_dir.py +++ b/cognee/root_dir.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import Optional ROOT_DIR = Path(__file__).resolve().parent @@ -6,3 +7,30 @@ ROOT_DIR = Path(__file__).resolve().parent def get_absolute_path(path_from_root: str) -> str: absolute_path = ROOT_DIR / path_from_root return str(absolute_path.resolve()) + + +def ensure_absolute_path( + path: str, base_path: Optional[str] = None, allow_relative: bool = False +) -> str: + """Ensures a path is absolute, optionally converting relative paths. + + Args: + path: The path to validate/convert + base_path: Optional base path for relative paths. If None, uses ROOT_DIR + allow_relative: If False, raises error for relative paths instead of converting + + Returns: + Absolute path as string + + Raises: + ValueError: If path is relative and allow_relative is False + """ + path_obj = Path(path) + if path_obj.is_absolute(): + return str(path_obj.resolve()) + + if not allow_relative: + raise ValueError(f"Path must be absolute. Got relative path: {path}") + + base = Path(base_path) if base_path else ROOT_DIR + return str((base / path).resolve()) diff --git a/cognee/tests/test_path_config.py b/cognee/tests/test_path_config.py new file mode 100644 index 000000000..ff1905c5e --- /dev/null +++ b/cognee/tests/test_path_config.py @@ -0,0 +1,114 @@ +import os +from pathlib import Path + +def ensure_absolute_path(path: str, base_path: str = None, allow_relative: bool = False) -> str: + """Ensures a path is absolute, optionally converting relative paths.""" + if path is None: + raise ValueError("Path cannot be None") + + path_obj = Path(path) + if path_obj.is_absolute(): + return str(path_obj.resolve()) + + if not allow_relative: + raise ValueError(f"Path must be absolute. Got relative path: {path}") + + if base_path is None: + raise ValueError("base_path must be provided when converting relative paths") + + base = Path(base_path) + if not base.is_absolute(): + raise ValueError("base_path must be absolute when converting relative paths") + + return str((base / path).resolve()) + +def test_root_dir_absolute_paths(): + """Test absolute path handling in root_dir.py""" + # Test with absolute path + abs_path = "C:/absolute/path" if os.name == 'nt' else "/absolute/path" + result = ensure_absolute_path(abs_path, allow_relative=False) + assert result == str(Path(abs_path).resolve()) + + # Test with relative path (should fail) + rel_path = "relative/path" + try: + ensure_absolute_path(rel_path, allow_relative=False) + assert False, "Should fail with relative path when allow_relative=False" + except ValueError as e: + assert "must be absolute" in str(e) + + # Test with None path + try: + ensure_absolute_path(None) + assert False, "Should fail with None path" + except ValueError as e: + assert "cannot be None" in str(e) + +def test_database_relative_paths(): + """Test relative path handling for vector and graph databases""" + system_root = "C:/system/root" if os.name == 'nt' else "/system/root" + + # Test with absolute path + abs_path = "C:/data/vector.db" if os.name == 'nt' else "/data/vector.db" + result = ensure_absolute_path(abs_path, base_path=system_root, allow_relative=True) + assert result == str(Path(abs_path).resolve()) + + # Test with relative path (should convert to absolute) + rel_path = "data/vector.db" + result = ensure_absolute_path(rel_path, base_path=system_root, allow_relative=True) + expected = str((Path(system_root) / rel_path).resolve()) + assert result == expected + + # Test with relative base_path (should fail) + try: + ensure_absolute_path(rel_path, base_path="relative/base", allow_relative=True) + assert False, "Should fail when base_path is relative" + except ValueError as e: + assert "base_path must be absolute" in str(e) + + # Test without base_path for relative path + try: + ensure_absolute_path(rel_path, allow_relative=True) + assert False, "Should fail when base_path is not provided for relative path" + except ValueError as e: + assert "base_path must be provided" in str(e) + +def test_path_consistency(): + """Test that paths are handled consistently across configurations""" + system_root = "C:/system/root" if os.name == 'nt' else "/system/root" + + # Root directories must be absolute + data_root = "C:/data/root" if os.name == 'nt' else "/data/root" + assert ensure_absolute_path(data_root, allow_relative=False) == str(Path(data_root).resolve()) + + # Database paths can be relative but must resolve against system_root + db_paths = [ + # Vector DB paths + "vector.db", # Simple relative + "data/vector.db", # Nested relative + "../vector.db", # Parent relative + "./vector.db", # Current dir relative + # Graph DB paths + "graph.db", # Simple relative + "data/graph/db", # Nested relative + "../graph.db", # Parent relative + "./graph.db", # Current dir relative + # With different extensions + "data/vector.lancedb", # Vector DB with extension + "data/graph/kuzu", # Graph DB with extension + ] + + for rel_path in db_paths: + result = ensure_absolute_path(rel_path, base_path=system_root, allow_relative=True) + expected = str((Path(system_root) / rel_path).resolve()) + assert result == expected, f"Failed to resolve {rel_path} correctly" + +if __name__ == "__main__": + print("Running path configuration tests...") + test_root_dir_absolute_paths() + print("✓ Root directory absolute path tests passed") + test_database_relative_paths() + print("✓ Database relative path tests passed") + test_path_consistency() + print("✓ Path consistency tests passed") + print("All tests passed successfully!") From aa3d704adc6baa143309fc66fb0edc1450b0085a Mon Sep 17 00:00:00 2001 From: Neeraj Gopalakrishnan <91423180+gneeraj2001@users.noreply.github.com> Date: Fri, 29 Aug 2025 02:31:58 -0700 Subject: [PATCH 33/73] Update cognee/base_config.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- cognee/base_config.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cognee/base_config.py b/cognee/base_config.py index d80e6197f..b3258dba9 100644 --- a/cognee/base_config.py +++ b/cognee/base_config.py @@ -13,15 +13,15 @@ class BaseConfig(BaseSettings): monitoring_tool: object = Observer.LANGFUSE @pydantic.model_validator(mode="after") - def validate_paths(cls, values): + def validate_paths(self): # Require absolute paths for root directories - values.data_root_directory = ensure_absolute_path( - values.data_root_directory, allow_relative=False + self.data_root_directory = ensure_absolute_path( + self.data_root_directory, allow_relative=False ) - values.system_root_directory = ensure_absolute_path( - values.system_root_directory, allow_relative=False + self.system_root_directory = ensure_absolute_path( + self.system_root_directory, allow_relative=False ) - return values + return self langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY") langfuse_secret_key: Optional[str] = os.getenv("LANGFUSE_SECRET_KEY") From 6e262d5eb3902c6839f071f78784f37c32f6934a Mon Sep 17 00:00:00 2001 From: Neeraj Gopalakrishnan <91423180+gneeraj2001@users.noreply.github.com> Date: Fri, 29 Aug 2025 02:33:16 -0700 Subject: [PATCH 34/73] Update cognee/tests/test_path_config.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- cognee/tests/test_path_config.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cognee/tests/test_path_config.py b/cognee/tests/test_path_config.py index ff1905c5e..600f04579 100644 --- a/cognee/tests/test_path_config.py +++ b/cognee/tests/test_path_config.py @@ -103,12 +103,3 @@ def test_path_consistency(): expected = str((Path(system_root) / rel_path).resolve()) assert result == expected, f"Failed to resolve {rel_path} correctly" -if __name__ == "__main__": - print("Running path configuration tests...") - test_root_dir_absolute_paths() - print("✓ Root directory absolute path tests passed") - test_database_relative_paths() - print("✓ Database relative path tests passed") - test_path_consistency() - print("✓ Path consistency tests passed") - print("All tests passed successfully!") From 19e5980b50310dfa1f331911440b7591c1431689 Mon Sep 17 00:00:00 2001 From: Neeraj Gopalakrishnan <91423180+gneeraj2001@users.noreply.github.com> Date: Fri, 29 Aug 2025 02:35:03 -0700 Subject: [PATCH 35/73] Update cognee/tests/test_path_config.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- cognee/tests/test_path_config.py | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/cognee/tests/test_path_config.py b/cognee/tests/test_path_config.py index 600f04579..7a3d57e5b 100644 --- a/cognee/tests/test_path_config.py +++ b/cognee/tests/test_path_config.py @@ -1,26 +1,11 @@ import os from pathlib import Path -def ensure_absolute_path(path: str, base_path: str = None, allow_relative: bool = False) -> str: - """Ensures a path is absolute, optionally converting relative paths.""" - if path is None: - raise ValueError("Path cannot be None") - - path_obj = Path(path) - if path_obj.is_absolute(): - return str(path_obj.resolve()) - - if not allow_relative: - raise ValueError(f"Path must be absolute. Got relative path: {path}") - - if base_path is None: - raise ValueError("base_path must be provided when converting relative paths") - - base = Path(base_path) - if not base.is_absolute(): - raise ValueError("base_path must be absolute when converting relative paths") - - return str((base / path).resolve()) +from pathlib import Path +import pytest +from cognee.root_dir import ensure_absolute_path + +# …rest of your test cases using ensure_absolute_path… def test_root_dir_absolute_paths(): """Test absolute path handling in root_dir.py""" From d385d7edba37fd7b8f177bc3ebe647a1d3aa2d17 Mon Sep 17 00:00:00 2001 From: Neeraj Gopalakrishnan <91423180+gneeraj2001@users.noreply.github.com> Date: Fri, 29 Aug 2025 02:35:48 -0700 Subject: [PATCH 36/73] Update cognee/tests/test_path_config.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- cognee/tests/test_path_config.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/cognee/tests/test_path_config.py b/cognee/tests/test_path_config.py index 7a3d57e5b..65201fc70 100644 --- a/cognee/tests/test_path_config.py +++ b/cognee/tests/test_path_config.py @@ -45,19 +45,12 @@ def test_database_relative_paths(): assert result == expected # Test with relative base_path (should fail) - try: + with pytest.raises(ValueError, match="base_path must be absolute"): ensure_absolute_path(rel_path, base_path="relative/base", allow_relative=True) - assert False, "Should fail when base_path is relative" - except ValueError as e: - assert "base_path must be absolute" in str(e) # Test without base_path for relative path - try: + with pytest.raises(ValueError, match="base_path must be provided"): ensure_absolute_path(rel_path, allow_relative=True) - assert False, "Should fail when base_path is not provided for relative path" - except ValueError as e: - assert "base_path must be provided" in str(e) - def test_path_consistency(): """Test that paths are handled consistently across configurations""" system_root = "C:/system/root" if os.name == 'nt' else "/system/root" From ded92862c7b5b21147bd344f5a2d254a4bab909b Mon Sep 17 00:00:00 2001 From: Neeraj Gopalakrishnan <91423180+gneeraj2001@users.noreply.github.com> Date: Fri, 29 Aug 2025 02:38:39 -0700 Subject: [PATCH 37/73] Update cognee/root_dir.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- cognee/root_dir.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/cognee/root_dir.py b/cognee/root_dir.py index 73afd0c12..4853acd02 100644 --- a/cognee/root_dir.py +++ b/cognee/root_dir.py @@ -15,22 +15,29 @@ def ensure_absolute_path( """Ensures a path is absolute, optionally converting relative paths. Args: - path: The path to validate/convert - base_path: Optional base path for relative paths. If None, uses ROOT_DIR - allow_relative: If False, raises error for relative paths instead of converting + path: The path to validate/convert. + base_path: Required base when converting relative paths (e.g., SYSTEM_ROOT_DIRECTORY). + allow_relative: If False, raises error for relative paths instead of converting. Returns: Absolute path as string Raises: - ValueError: If path is relative and allow_relative is False + ValueError: If path is None; or path is relative and allow_relative is False; + or base_path is missing/non-absolute when converting. """ - path_obj = Path(path) + if path is None: + raise ValueError("Path cannot be None") + path_obj = Path(path).expanduser() if path_obj.is_absolute(): return str(path_obj.resolve()) if not allow_relative: raise ValueError(f"Path must be absolute. Got relative path: {path}") - base = Path(base_path) if base_path else ROOT_DIR - return str((base / path).resolve()) + if base_path is None: + raise ValueError("base_path must be provided when converting relative paths") + base = Path(base_path).expanduser() + if not base.is_absolute(): + raise ValueError("base_path must be absolute when converting relative paths") + return str((base / path_obj).resolve()) From de939c154768e614022846d55977477f94e8b81e Mon Sep 17 00:00:00 2001 From: Neeraj Gopalakrishnan <91423180+gneeraj2001@users.noreply.github.com> Date: Fri, 29 Aug 2025 02:39:04 -0700 Subject: [PATCH 38/73] Update cognee/tests/test_path_config.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- cognee/tests/test_path_config.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/cognee/tests/test_path_config.py b/cognee/tests/test_path_config.py index 65201fc70..b90ce8cac 100644 --- a/cognee/tests/test_path_config.py +++ b/cognee/tests/test_path_config.py @@ -16,19 +16,12 @@ def test_root_dir_absolute_paths(): # Test with relative path (should fail) rel_path = "relative/path" - try: + with pytest.raises(ValueError, match="must be absolute"): ensure_absolute_path(rel_path, allow_relative=False) - assert False, "Should fail with relative path when allow_relative=False" - except ValueError as e: - assert "must be absolute" in str(e) - - # Test with None path - try: - ensure_absolute_path(None) - assert False, "Should fail with None path" - except ValueError as e: - assert "cannot be None" in str(e) + # Test with None path + with pytest.raises(ValueError, match="cannot be None"): + ensure_absolute_path(None) def test_database_relative_paths(): """Test relative path handling for vector and graph databases""" system_root = "C:/system/root" if os.name == 'nt' else "/system/root" From c3f5840bff1a9623066718d3a6ab14994bd4b0fe Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 29 Aug 2025 12:24:15 +0200 Subject: [PATCH 39/73] refactor: Remove unused argument --- cognee/modules/retrieval/utils/completion.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cognee/modules/retrieval/utils/completion.py b/cognee/modules/retrieval/utils/completion.py index 4c2639517..81e636aad 100644 --- a/cognee/modules/retrieval/utils/completion.py +++ b/cognee/modules/retrieval/utils/completion.py @@ -7,13 +7,12 @@ async def generate_completion( context: str, user_prompt_path: str, system_prompt_path: str, - user_prompt: Optional[str] = None, system_prompt: Optional[str] = None, only_context: bool = False, ) -> str: """Generates a completion using LLM with given context and prompts.""" args = {"question": query, "context": context} - user_prompt = user_prompt if user_prompt else LLMGateway.render_prompt(user_prompt_path, args) + user_prompt = LLMGateway.render_prompt(user_prompt_path, args) system_prompt = ( system_prompt if system_prompt else LLMGateway.read_query_prompt(system_prompt_path) ) From 21f688385b16cc3bc50d355b32eb4b7610df2053 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 29 Aug 2025 12:53:29 +0200 Subject: [PATCH 40/73] feat: Add nodeset as default node type --- cognee/api/v1/search/search.py | 3 ++- cognee/modules/search/methods/search.py | 27 ++++++++++++++++--- .../modules/search/search_methods_test.py | 4 +-- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index f37f8ba6d..344e763ae 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -1,6 +1,7 @@ from uuid import UUID from typing import Union, Optional, List, Type +from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.users.models import User from cognee.modules.search.types import SearchType from cognee.modules.users.methods import get_default_user @@ -17,7 +18,7 @@ async def search( dataset_ids: Optional[Union[list[UUID], UUID]] = None, system_prompt_path: str = "answer_simple_question.txt", top_k: int = 10, - node_type: Optional[Type] = None, + node_type: Optional[Type] = NodeSet, node_name: Optional[List[str]] = None, save_interaction: bool = False, last_k: Optional[int] = None, diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index f5f2a793a..8e38e63c3 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -4,6 +4,7 @@ import asyncio from uuid import UUID from typing import Callable, List, Optional, Type, Union +from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.retrieval.user_qa_feedback import UserQAFeedback from cognee.modules.search.exceptions import UnsupportedSearchTypeError from cognee.context_global_variables import set_database_global_context_variables @@ -38,7 +39,7 @@ async def search( user: User, system_prompt_path="answer_simple_question.txt", top_k: int = 10, - node_type: Optional[Type] = None, + node_type: Optional[Type] = NodeSet, node_name: Optional[List[str]] = None, save_interaction: Optional[bool] = False, last_k: Optional[int] = None, @@ -67,6 +68,8 @@ async def search( dataset_ids=dataset_ids, system_prompt_path=system_prompt_path, top_k=top_k, + node_type=node_type, + node_name=node_name, save_interaction=save_interaction, last_k=last_k, ) @@ -102,7 +105,7 @@ async def specific_search( user: User, system_prompt_path="answer_simple_question.txt", top_k: int = 10, - node_type: Optional[Type] = None, + node_type: Optional[Type] = NodeSet, node_name: Optional[List[str]] = None, save_interaction: Optional[bool] = False, last_k: Optional[int] = None, @@ -173,6 +176,8 @@ async def authorized_search( dataset_ids: Optional[list[UUID]] = None, system_prompt_path: str = "answer_simple_question.txt", top_k: int = 10, + node_type: Optional[Type] = NodeSet, + node_name: Optional[List[str]] = None, save_interaction: bool = False, last_k: Optional[int] = None, ) -> list: @@ -194,7 +199,9 @@ async def authorized_search( user, system_prompt_path, top_k, - save_interaction, + node_type=node_type, + node_name=node_name, + save_interaction=save_interaction, last_k=last_k, ) @@ -210,6 +217,8 @@ async def specific_search_by_context( user: User, system_prompt_path: str, top_k: int, + node_type: Optional[Type] = NodeSet, + node_name: Optional[List[str]] = None, save_interaction: bool = False, last_k: Optional[int] = None, ): @@ -229,6 +238,8 @@ async def specific_search_by_context( user, system_prompt_path=system_prompt_path, top_k=top_k, + node_type=node_type, + node_name=node_name, save_interaction=save_interaction, last_k=last_k, ) @@ -243,7 +254,15 @@ async def specific_search_by_context( for dataset in search_datasets: tasks.append( _search_by_context( - dataset, user, query_type, query_text, system_prompt_path, top_k, last_k + dataset, + user, + query_type, + query_text, + system_prompt_path, + top_k, + node_type=node_type, + node_name=node_name, + last_k=last_k, ) ) diff --git a/cognee/tests/unit/modules/search/search_methods_test.py b/cognee/tests/unit/modules/search/search_methods_test.py index 46995d087..004e1fca3 100644 --- a/cognee/tests/unit/modules/search/search_methods_test.py +++ b/cognee/tests/unit/modules/search/search_methods_test.py @@ -3,8 +3,8 @@ import uuid from unittest.mock import AsyncMock, MagicMock, patch import pytest -from pylint.checkers.utils import node_type +from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.search.exceptions import UnsupportedSearchTypeError from cognee.modules.search.methods.search import search, specific_search from cognee.modules.search.types import SearchType @@ -63,7 +63,7 @@ async def test_search( mock_user, system_prompt_path="answer_simple_question.txt", top_k=10, - node_type=None, + node_type=NodeSet, node_name=None, save_interaction=False, last_k=None, From e6ee182d789b43e056ce71400367c04683fc2e8a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 29 Aug 2025 13:03:06 +0200 Subject: [PATCH 41/73] fix: Handle [] node_name case --- cognee/modules/graph/cognee_graph/CogneeGraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/modules/graph/cognee_graph/CogneeGraph.py b/cognee/modules/graph/cognee_graph/CogneeGraph.py index ed867ae24..924532ce0 100644 --- a/cognee/modules/graph/cognee_graph/CogneeGraph.py +++ b/cognee/modules/graph/cognee_graph/CogneeGraph.py @@ -76,7 +76,7 @@ class CogneeGraph(CogneeAbstractGraph): start_time = time.time() # Determine projection strategy - if node_type is not None and node_name is not None: + if node_type is not None and node_name not in [None, []]: nodes_data, edges_data = await adapter.get_nodeset_subgraph( node_type=node_type, node_name=node_name ) From b9fa285c1ac9a1c98dac414a3f8dc62e57305c42 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 29 Aug 2025 13:38:52 +0200 Subject: [PATCH 42/73] fix: Add node_name and node_type to context search --- cognee/modules/search/methods/search.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index 8e38e63c3..74ef2a6ad 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -228,7 +228,15 @@ async def specific_search_by_context( """ async def _search_by_context( - dataset, user, query_type, query_text, system_prompt_path, top_k, last_k + dataset, + user, + query_type, + query_text, + system_prompt_path, + top_k, + node_type: Optional[Type] = NodeSet, + node_name: Optional[List[str]] = None, + last_k: Optional[int] = None, ): # Set database configuration in async context for each dataset user has access for await set_database_global_context_variables(dataset.id, dataset.owner_id) From 614055c850661fcbb816a9bf77b2e61324a83f69 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 29 Aug 2025 14:16:18 +0200 Subject: [PATCH 43/73] refactor: Add docs for new search arguments --- cognee/api/v1/search/routers/get_search_router.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cognee/api/v1/search/routers/get_search_router.py b/cognee/api/v1/search/routers/get_search_router.py index 39a896dd8..f9f4e4764 100644 --- a/cognee/api/v1/search/routers/get_search_router.py +++ b/cognee/api/v1/search/routers/get_search_router.py @@ -1,9 +1,11 @@ from uuid import UUID +import pathlib from typing import Optional from datetime import datetime from pydantic import Field from fastapi import Depends, APIRouter from fastapi.responses import JSONResponse + from cognee.modules.search.types import SearchType from cognee.api.DTO import InDTO, OutDTO from cognee.modules.users.exceptions.exceptions import PermissionDeniedError @@ -20,7 +22,9 @@ class SearchPayloadDTO(InDTO): datasets: Optional[list[str]] = Field(default=None) dataset_ids: Optional[list[UUID]] = Field(default=None, examples=[[]]) query: str = Field(default="What is in the document?") - system_prompt: Optional[str] = Field(default="") + system_prompt: Optional[str] = Field( + default="Answer the question using the provided context. Be as brief as possible." + ) top_k: Optional[int] = Field(default=10) only_context: bool = Field(default=False) @@ -81,7 +85,9 @@ def get_search_router() -> APIRouter: - **datasets** (Optional[List[str]]): List of dataset names to search within - **dataset_ids** (Optional[List[UUID]]): List of dataset UUIDs to search within - **query** (str): The search query string + - **system_prompt** Optional[str]: System prompt to be used for Completion type searches in Cognee - **top_k** (Optional[int]): Maximum number of results to return (default: 10) + - **only_context** bool: Set to true to only return context Cognee will be sending to LLM in Completion type searches. This will be returned instead of LLM calls for completion type searches. ## Response Returns a list of search results containing relevant nodes from the graph. From 978815586cee1c0809c4fc3df57b88cebfc8c2e0 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 29 Aug 2025 14:21:42 +0200 Subject: [PATCH 44/73] docs: Add docstring for node usage in backend --- cognee/api/v1/add/routers/get_add_router.py | 2 ++ cognee/api/v1/search/routers/get_search_router.py | 1 + 2 files changed, 3 insertions(+) diff --git a/cognee/api/v1/add/routers/get_add_router.py b/cognee/api/v1/add/routers/get_add_router.py index 8424a4fb5..1703d9931 100644 --- a/cognee/api/v1/add/routers/get_add_router.py +++ b/cognee/api/v1/add/routers/get_add_router.py @@ -42,6 +42,8 @@ def get_add_router() -> APIRouter: - Regular file uploads - **datasetName** (Optional[str]): Name of the dataset to add data to - **datasetId** (Optional[UUID]): UUID of an already existing dataset + - **node_set** Optional[list[str]]: List of node identifiers for graph organization and access control. + Used for grouping related data points in the knowledge graph. Either datasetName or datasetId must be provided. diff --git a/cognee/api/v1/search/routers/get_search_router.py b/cognee/api/v1/search/routers/get_search_router.py index 961532a06..003df7cd4 100644 --- a/cognee/api/v1/search/routers/get_search_router.py +++ b/cognee/api/v1/search/routers/get_search_router.py @@ -80,6 +80,7 @@ def get_search_router() -> APIRouter: - **datasets** (Optional[List[str]]): List of dataset names to search within - **dataset_ids** (Optional[List[UUID]]): List of dataset UUIDs to search within - **query** (str): The search query string + - **node_name** Optional[list[str]]: Filter results to specific node_sets defined in the add pipeline (for targeted search). - **top_k** (Optional[int]): Maximum number of results to return (default: 10) ## Response From 14e07bc650803a18b37085e7e1ed0e4189bae46a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 29 Aug 2025 14:41:57 +0200 Subject: [PATCH 45/73] fix: Make metadata prune true by default --- cognee/modules/data/deletion/prune_system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/modules/data/deletion/prune_system.py b/cognee/modules/data/deletion/prune_system.py index 055d69b55..5bbd7c22f 100644 --- a/cognee/modules/data/deletion/prune_system.py +++ b/cognee/modules/data/deletion/prune_system.py @@ -3,7 +3,7 @@ from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_eng from cognee.infrastructure.databases.relational import get_relational_engine -async def prune_system(graph=True, vector=True, metadata=False): +async def prune_system(graph=True, vector=True, metadata=True): if graph: graph_engine = await get_graph_engine() await graph_engine.delete_graph() From 4159846bb39c2197b460f28d28b205953bf8ed39 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 29 Aug 2025 16:04:14 +0200 Subject: [PATCH 46/73] fix: Make exluded paths use absolute path --- cognee/api/v1/cognify/code_graph_pipeline.py | 12 ++++++++---- cognee/modules/retrieval/code_retriever.py | 8 ++++++++ .../get_repo_file_dependencies.py | 17 +++++++++++++---- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/cognee/api/v1/cognify/code_graph_pipeline.py b/cognee/api/v1/cognify/code_graph_pipeline.py index 66b8568fa..fb3612857 100644 --- a/cognee/api/v1/cognify/code_graph_pipeline.py +++ b/cognee/api/v1/cognify/code_graph_pipeline.py @@ -1,6 +1,7 @@ import os import pathlib import asyncio +from typing import Optional from cognee.shared.logging_utils import get_logger, setup_logging from cognee.modules.observability.get_observe import get_observe @@ -28,7 +29,12 @@ logger = get_logger("code_graph_pipeline") @observe -async def run_code_graph_pipeline(repo_path, include_docs=False, excluded_paths=None): +async def run_code_graph_pipeline( + repo_path, + include_docs=False, + excluded_paths: Optional[list[str]] = None, + supported_languages: Optional[list[str]] = None, +): import cognee from cognee.low_level import setup @@ -40,8 +46,6 @@ async def run_code_graph_pipeline(repo_path, include_docs=False, excluded_paths= user = await get_default_user() detailed_extraction = True - # Multi-language support: allow passing supported_languages - supported_languages = None # defer to task defaults tasks = [ Task( get_repo_file_dependencies, @@ -95,7 +99,7 @@ async def run_code_graph_pipeline(repo_path, include_docs=False, excluded_paths= if __name__ == "__main__": async def main(): - async for run_status in run_code_graph_pipeline("/Users/igorilic/Desktop/cognee/examples"): + async for run_status in run_code_graph_pipeline("REPO_PATH"): print(f"{run_status.pipeline_run_id}: {run_status.status}") file_path = os.path.join( diff --git a/cognee/modules/retrieval/code_retriever.py b/cognee/modules/retrieval/code_retriever.py index 6e819d8a7..76b5e758c 100644 --- a/cognee/modules/retrieval/code_retriever.py +++ b/cognee/modules/retrieval/code_retriever.py @@ -94,7 +94,15 @@ class CodeRetriever(BaseRetriever): {"id": res.id, "score": res.score, "payload": res.payload} ) + existing_collection = [] for collection in self.classes_and_functions_collections: + if await vector_engine.has_collection(collection): + existing_collection.append(collection) + + if not existing_collection: + raise RuntimeError("No collection found for code retriever") + + for collection in existing_collection: logger.debug(f"Searching {collection} collection with general query") search_results_code = await vector_engine.search( collection, query, limit=self.top_k diff --git a/cognee/tasks/repo_processor/get_repo_file_dependencies.py b/cognee/tasks/repo_processor/get_repo_file_dependencies.py index 3ebf1fcb1..06cc3bddb 100644 --- a/cognee/tasks/repo_processor/get_repo_file_dependencies.py +++ b/cognee/tasks/repo_processor/get_repo_file_dependencies.py @@ -1,6 +1,7 @@ import asyncio import math import os +from pathlib import Path from typing import Set from typing import AsyncGenerator, Optional, List from uuid import NAMESPACE_OID, uuid5 @@ -78,15 +79,22 @@ async def get_source_code_files( if lang is None: continue # Exclude tests, common build/venv directories and files provided in exclude_paths - excluded_dirs = EXCLUDED_DIRS | set(excluded_paths or []) - root_parts = set(os.path.normpath(root).split(os.sep)) + excluded_dirs = EXCLUDED_DIRS + excluded_paths = {Path(p).resolve() for p in (excluded_paths or [])} # full paths + + root_path = Path(root).resolve() + root_parts = set(root_path.parts) # same as before base_name, _ext = os.path.splitext(file) if ( base_name.startswith("test_") - or base_name.endswith("_test") # catches Go's *_test.go and similar + or base_name.endswith("_test") or ".test." in file or ".spec." in file - or (excluded_dirs & root_parts) + or (excluded_dirs & root_parts) # name match + or any( + root_path.is_relative_to(p) # full-path match + for p in excluded_paths + ) ): continue file_path = os.path.abspath(os.path.join(root, file)) @@ -164,6 +172,7 @@ async def get_repo_file_dependencies( "go": [".go"], "rust": [".rs"], "cpp": [".cpp", ".c", ".h", ".hpp"], + "c": [".c", ".h"], } if supported_languages is not None: language_config = { From 1970106f1e7b21db97c8ba952e807b986086f56f Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 29 Aug 2025 16:07:18 +0200 Subject: [PATCH 47/73] chore: adds docstrings --- cognee/api/v1/cognify/cognify.py | 19 +++++++++++ .../extraction/extract_event_entities.py | 15 ++++++++- .../knowledge_graph/extract_event_graph.py | 19 ++++++++--- .../engine/utils/generate_event_datapoint.py | 17 +++++++++- .../utils/generate_timestamp_datapoint.py | 26 +++++++++++++-- .../temporal_graph/add_entities_to_event.py | 32 +++++++++++++++++-- cognee/tasks/temporal_graph/enrich_events.py | 14 +++++++- .../extract_events_and_entities.py | 14 +++++++- .../extract_knowledge_graph_from_events.py | 14 +++++++- examples/python/temporal_example.py | 1 - 10 files changed, 157 insertions(+), 14 deletions(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 31a357afa..e4f91b44c 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -247,6 +247,25 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's async def get_temporal_tasks( user: User = None, chunker=TextChunker, chunk_size: int = None ) -> list[Task]: + """ + Builds and returns a list of temporal processing tasks to be executed in sequence. + + The pipeline includes: + 1. Document classification. + 2. Dataset permission checks (requires "write" access). + 3. Document chunking with a specified or default chunk size. + 4. Event and timestamp extraction from chunks. + 5. Knowledge graph extraction from events. + 6. Batched insertion of data points. + + Args: + user (User, optional): The user requesting task execution, used for permission checks. + chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker. + chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default. + + Returns: + list[Task]: A list of Task objects representing the temporal processing pipeline. + """ temporal_tasks = [ Task(classify_documents), Task(check_permissions_on_dataset, user=user, permissions=["write"]), diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py index ad33863b0..b1dd6910d 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py @@ -8,7 +8,20 @@ from cognee.infrastructure.llm.config import ( async def extract_event_entities(content: str, response_model: Type[BaseModel]): - """Extract event entities from content using LLM.""" + """ + Extracts event-related entities from the given content using an LLM with structured output. + + This function loads an event entity extraction prompt from the LLM configuration, + renders it into a system prompt, and queries the LLM to produce structured entities + that conform to the specified response model. + + Args: + content (str): The input text from which to extract event entities. + response_model (Type[BaseModel]): A Pydantic model defining the structure of the expected output. + + Returns: + BaseModel: An instance of the response_model populated with extracted event entities. + """ llm_config = get_llm_config() prompt_path = llm_config.event_entity_prompt_path diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py index 667e2eb7d..9a40ea855 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py @@ -8,10 +8,21 @@ from cognee.infrastructure.llm.config import ( ) -async def extract_event_graph( - content: str, response_model: Type[BaseModel], system_prompt: str = None -): - """Extract event graph from content using LLM.""" +async def extract_event_graph(content: str, response_model: Type[BaseModel]): + """ + Extracts an event graph from the given content using an LLM with a structured output format. + + This function loads a temporal graph extraction prompt from the LLM configuration, + renders it as a system prompt, and queries the LLM to produce a structured event + graph matching the specified response model. + + Args: + content (str): The input text from which to extract the event graph. + response_model (Type[BaseModel]): A Pydantic model defining the structure of the expected output. + + Returns: + BaseModel: An instance of the response_model populated with the extracted event graph. + """ llm_config = get_llm_config() diff --git a/cognee/modules/engine/utils/generate_event_datapoint.py b/cognee/modules/engine/utils/generate_event_datapoint.py index cc56763ae..7768b06ac 100644 --- a/cognee/modules/engine/utils/generate_event_datapoint.py +++ b/cognee/modules/engine/utils/generate_event_datapoint.py @@ -3,7 +3,22 @@ from cognee.modules.engine.utils.generate_timestamp_datapoint import generate_ti def generate_event_datapoint(event) -> Event: - """Create an Event datapoint from an event model.""" + """ + Generates an Event datapoint from a given event model, including temporal metadata if available. + + The function maps the basic attributes (name, description, location) from the input event + and enriches them with temporal information. If start and end times are provided, an + Interval is created. If only one timestamp is available, it is added directly. Temporal + information is also appended to the event description for context. + + Args: + event: An event model instance containing attributes such as name, description, + location, time_from, and time_to. + + Returns: + Event: A structured Event object with name, description, location, and enriched + temporal details. + """ # Base event data event_data = { "name": event.name, diff --git a/cognee/modules/engine/utils/generate_timestamp_datapoint.py b/cognee/modules/engine/utils/generate_timestamp_datapoint.py index 6f2cdf6d1..b078e161e 100644 --- a/cognee/modules/engine/utils/generate_timestamp_datapoint.py +++ b/cognee/modules/engine/utils/generate_timestamp_datapoint.py @@ -4,7 +4,21 @@ from cognee.modules.engine.utils import generate_node_id def generate_timestamp_datapoint(ts: Timestamp) -> Timestamp: - """Create a Timestamp datapoint from a Timestamp model.""" + """ + Generates a normalized Timestamp datapoint from a given Timestamp model. + + The function converts the provided timestamp into an integer representation, + constructs a human-readable string format, and creates a new Timestamp object + with a unique identifier. + + Args: + ts (Timestamp): The input Timestamp model containing date and time components. + + Returns: + Timestamp: A new Timestamp object with a generated ID, integer representation, + original components, and formatted string. + """ + time_at = date_to_int(ts) timestamp_str = ( f"{ts.year:04d}-{ts.month:02d}-{ts.day:02d} {ts.hour:02d}:{ts.minute:02d}:{ts.second:02d}" @@ -23,7 +37,15 @@ def generate_timestamp_datapoint(ts: Timestamp) -> Timestamp: def date_to_int(ts: Timestamp) -> int: - """Convert timestamp to integer milliseconds.""" + """ + Converts a Timestamp model into an integer representation in milliseconds since the Unix epoch (UTC). + + Args: + ts (Timestamp): The input Timestamp model containing year, month, day, hour, minute, and second. + + Returns: + int: The UTC timestamp in milliseconds since January 1, 1970. + """ dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.second, tzinfo=timezone.utc) time = int(dt.timestamp() * 1000) return time diff --git a/cognee/tasks/temporal_graph/add_entities_to_event.py b/cognee/tasks/temporal_graph/add_entities_to_event.py index 2cb4b1425..8c1146a9e 100644 --- a/cognee/tasks/temporal_graph/add_entities_to_event.py +++ b/cognee/tasks/temporal_graph/add_entities_to_event.py @@ -7,7 +7,23 @@ from cognee.modules.engine.utils import generate_node_id, generate_node_name def add_entities_to_event(event: Event, event_with_entities: EventWithEntities) -> None: - """Add entities to event via attributes field.""" + """ + Adds extracted entities to an Event object by populating its attributes field. + + For each attribute in the provided EventWithEntities, the function ensures that + the corresponding entity type exists, creates an Entity node with metadata, and + links it to the event via an Edge representing the relationship. Entities are + cached by type to avoid duplication. + + Args: + event (Event): The target Event object to enrich with entities. + event_with_entities (EventWithEntities): An event model containing extracted + attributes with entity, type, and relationship metadata. + + Returns: + None + """ + if not event_with_entities.attributes: return @@ -41,7 +57,19 @@ def add_entities_to_event(event: Event, event_with_entities: EventWithEntities) def get_or_create_entity_type(entity_types: dict, entity_type_name: str) -> EntityType: - """Get existing entity type or create new one.""" + """ + Retrieves an existing EntityType from the cache or creates a new one if it does not exist. + + If the given entity type name is not already in the cache, a new EntityType is generated + with a unique ID, normalized name, and description, then added to the cache. + + Args: + entity_types (dict): A cache mapping entity type names to EntityType objects. + entity_type_name (str): The name of the entity type to retrieve or create. + + Returns: + EntityType: The existing or newly created EntityType object. + """ if entity_type_name not in entity_types: type_id = generate_node_id(entity_type_name) type_name = generate_node_name(entity_type_name) diff --git a/cognee/tasks/temporal_graph/enrich_events.py b/cognee/tasks/temporal_graph/enrich_events.py index bedd642eb..ef93da462 100644 --- a/cognee/tasks/temporal_graph/enrich_events.py +++ b/cognee/tasks/temporal_graph/enrich_events.py @@ -6,7 +6,19 @@ from cognee.tasks.temporal_graph.models import EventWithEntities, EventEntityLis async def enrich_events(events: List[Event]) -> List[EventWithEntities]: - """Extract entities from events and return enriched events.""" + """ + Enriches a list of events by extracting entities using an LLM. + + The function serializes event data into JSON, sends it to the LLM for + entity extraction, and returns enriched events with associated entities. + + Args: + events (List[Event]): A list of Event objects to be enriched. + + Returns: + List[EventWithEntities]: A list of events augmented with extracted entities. + """ + import json # Convert events to JSON format for LLM processing diff --git a/cognee/tasks/temporal_graph/extract_events_and_entities.py b/cognee/tasks/temporal_graph/extract_events_and_entities.py index de0cdd601..8babc0ee5 100644 --- a/cognee/tasks/temporal_graph/extract_events_and_entities.py +++ b/cognee/tasks/temporal_graph/extract_events_and_entities.py @@ -7,7 +7,19 @@ from cognee.modules.engine.utils.generate_event_datapoint import generate_event_ async def extract_events_and_timestamps(data_chunks: List[DocumentChunk]) -> List[DocumentChunk]: - """Extracts events and entities from a chunk of documents.""" + """ + Extracts events and their timestamps from document chunks using an LLM. + + Each document chunk is processed with the event graph extractor to identify events. + The extracted events are converted into Event datapoints and appended to the + chunk's `contains` list. + + Args: + data_chunks (List[DocumentChunk]): A list of document chunks containing text to process. + + Returns: + List[DocumentChunk]: The same list of document chunks, enriched with extracted Event datapoints. + """ events = await asyncio.gather( *[LLMGateway.extract_event_graph(chunk.text, EventList) for chunk in data_chunks] ) diff --git a/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py b/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py index 8cbcc3c22..e50fa4ae2 100644 --- a/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +++ b/cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py @@ -8,7 +8,19 @@ from cognee.tasks.temporal_graph.add_entities_to_event import add_entities_to_ev async def extract_knowledge_graph_from_events( data_chunks: List[DocumentChunk], ) -> List[DocumentChunk]: - """Extract events from chunks and enrich them with entities.""" + """ + Extracts events from document chunks and enriches them with entities to form a knowledge graph. + + The function collects all Event objects from the given document chunks, + uses an LLM to extract and attach related entities, and updates the events + with these enriched attributes. + + Args: + data_chunks (List[DocumentChunk]): A list of document chunks containing extracted events. + + Returns: + List[DocumentChunk]: The same list of document chunks, with their events enriched by entities. + """ # Extract events from chunks all_events = [] for chunk in data_chunks: diff --git a/examples/python/temporal_example.py b/examples/python/temporal_example.py index 4b54b72ed..c79e3c1db 100644 --- a/examples/python/temporal_example.py +++ b/examples/python/temporal_example.py @@ -61,7 +61,6 @@ biography_2 = """ - Gyldendals Endowment ( 1935 ) - Dobloug Prize ( 1951 ) - Mads Wiel Nygaards legat ( 1961 ) - """ From 0ecea42c2ccc0a12cf69b5dc23b51ae5196f0da5 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 29 Aug 2025 16:12:25 +0200 Subject: [PATCH 48/73] test: Remove repo path test --- cognee/tests/test_repo_processor.py | 46 ----------------------------- 1 file changed, 46 deletions(-) delete mode 100644 cognee/tests/test_repo_processor.py diff --git a/cognee/tests/test_repo_processor.py b/cognee/tests/test_repo_processor.py deleted file mode 100644 index 2d5868f36..000000000 --- a/cognee/tests/test_repo_processor.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -import shutil -import tempfile -from cognee.tasks.repo_processor.get_repo_file_dependencies import get_source_code_files - - -def test_get_source_code_files_excludes_common_dirs_and_files(): - # Create a temporary test directory - test_repo = tempfile.mkdtemp() - - # Create files and folders to include/exclude - included_file = os.path.join(test_repo, "main.py") - excluded_dirs = [".venv", "node_modules", "__pycache__", ".git"] - excluded_files = ["ignore.pyc", "temp.log", "junk.tmp"] - - # Create included file - with open(included_file, "w") as f: - f.write("print('Hello world')") - - # Create excluded directories and files inside them - for folder in excluded_dirs: - folder_path = os.path.join(test_repo, folder) - os.makedirs(folder_path) - file_path = os.path.join(folder_path, "ignored.js") - with open(file_path, "w") as f: - f.write("// ignore this") - - # Create excluded files in root - for file_name in excluded_files: - file_path = os.path.join(test_repo, file_name) - with open(file_path, "w") as f: - f.write("dummy") - - # Run function - results = get_source_code_files(test_repo) - - # Assert only included file is present - assert included_file in results - for root, dirs, files in os.walk(test_repo): - for name in files: - full_path = os.path.join(root, name) - if full_path != included_file: - assert full_path not in results, f"{full_path} should have been excluded" - - # Cleanup - shutil.rmtree(test_repo) From fc06256b7e5fc740d90ebf5224f36e1e0cf49cfe Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 29 Aug 2025 17:59:29 +0200 Subject: [PATCH 49/73] feat: Add low reasoning for gpt5 model --- .../litellm_instructor/llm/openai/adapter.py | 26 ++++++++++++- poetry.lock | 2 +- pyproject.toml | 2 +- uv.lock | 38 +++++++++---------- 4 files changed, 46 insertions(+), 22 deletions(-) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py index 95c14f1bc..ee6c1a8dd 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py @@ -23,9 +23,12 @@ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.ll sleep_and_retry_sync, ) from cognee.modules.observability.get_observe import get_observe +from cognee.shared.logging_utils import get_logger observe = get_observe() +logger = get_logger() + class OpenAIAdapter(LLMInterface): """ @@ -129,6 +132,7 @@ class OpenAIAdapter(LLMInterface): api_version=self.api_version, response_model=response_model, max_retries=self.MAX_RETRIES, + extra_body={"reasoning_effort": "low"}, ) except ( ContentFilterFinishReasonError, @@ -139,7 +143,27 @@ class OpenAIAdapter(LLMInterface): isinstance(error, InstructorRetryException) and "content management policy" not in str(error).lower() ): - raise error + logger.debug( + "LLM Model does not support reasoning_effort parameter, trying call without the parameter." + ) + return await self.aclient.chat.completions.create( + model=self.model, + messages=[ + { + "role": "user", + "content": f"""{text_input}""", + }, + { + "role": "system", + "content": system_prompt, + }, + ], + api_key=self.api_key, + api_base=self.endpoint, + api_version=self.api_version, + response_model=response_model, + max_retries=self.MAX_RETRIES, + ) if not (self.fallback_model and self.fallback_api_key): raise ContentPolicyFilterError( diff --git a/poetry.lock b/poetry.lock index 0a336adcb..64c1bb050 100644 --- a/poetry.lock +++ b/poetry.lock @@ -11728,4 +11728,4 @@ posthog = ["posthog"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<=3.13" -content-hash = "7743005314483d6cc76febb7970c8af9a3d2a63e76247505e33b20fdc974aca1" +content-hash = "576318d370b89d128a7c3e755fe3c898fef4e359acdd3f05f952ae497751fb04" diff --git a/pyproject.toml b/pyproject.toml index 272c8e929..ece238338 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ "Operating System :: Microsoft :: Windows", ] dependencies = [ - "openai>=1.80.1,<1.99.9", + "openai>=1.80.1,<2.0.0", "python-dotenv>=1.0.1,<2.0.0", "pydantic>=2.10.5,<3.0.0", "pydantic-settings>=2.2.1,<3", diff --git a/uv.lock b/uv.lock index 694d772f4..fb8ecd9bd 100644 --- a/uv.lock +++ b/uv.lock @@ -1015,7 +1015,7 @@ requires-dist = [ { name = "notebook", marker = "extra == 'notebook'", specifier = ">=7.1.0,<8" }, { name = "numpy", specifier = ">=1.26.4,<=4.0.0" }, { name = "onnxruntime", specifier = ">=1.0.0,<2.0.0" }, - { name = "openai", specifier = ">=1.80.1,<1.99.9" }, + { name = "openai", specifier = ">=1.80.1,<2.0.0" }, { name = "pandas", specifier = ">=2.2.2,<3.0.0" }, { name = "pgvector", marker = "extra == 'postgres'", specifier = ">=0.3.5,<0.4" }, { name = "pgvector", marker = "extra == 'postgres-binary'", specifier = ">=0.3.5,<0.4" }, @@ -1791,17 +1791,17 @@ name = "fastembed" version = "0.6.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "huggingface-hub" }, - { name = "loguru" }, - { name = "mmh3" }, + { name = "huggingface-hub", marker = "python_full_version < '3.13'" }, + { name = "loguru", marker = "python_full_version < '3.13'" }, + { name = "mmh3", marker = "python_full_version < '3.13'" }, { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, - { name = "onnxruntime" }, - { name = "pillow" }, - { name = "py-rust-stemmers" }, - { name = "requests" }, - { name = "tokenizers" }, - { name = "tqdm" }, + { name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.12.*'" }, + { name = "onnxruntime", marker = "python_full_version < '3.13'" }, + { name = "pillow", marker = "python_full_version < '3.13'" }, + { name = "py-rust-stemmers", marker = "python_full_version < '3.13'" }, + { name = "requests", marker = "python_full_version < '3.13'" }, + { name = "tokenizers", marker = "python_full_version < '3.13'" }, + { name = "tqdm", marker = "python_full_version < '3.13'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/c6/f4/036a656c605f63dc25f11284f60f69900a54a19c513e1ae60d21d6977e75/fastembed-0.6.0.tar.gz", hash = "sha256:5c9ead25f23449535b07243bbe1f370b820dcc77ec2931e61674e3fe7ff24733", size = 50731, upload-time = "2025-02-26T13:50:33.031Z" } wheels = [ @@ -2617,7 +2617,7 @@ wheels = [ [[package]] name = "instructor" -version = "1.10.0" +version = "1.11.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -2633,9 +2633,9 @@ dependencies = [ { name = "tenacity" }, { name = "typer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a5/67/63c4b4d2cc3c7b4238920ad3388a6f5d67265ab7c09ee34012d6b591130e/instructor-1.10.0.tar.gz", hash = "sha256:887d33e058b913290dbf526b0096b1bb8d7ea1a07d75afecbf716161f959697b", size = 69388981, upload-time = "2025-07-18T15:28:52.386Z" } +sdist = { url = "https://files.pythonhosted.org/packages/64/17/802d1dc4484410b65249e9d3c95a751b9c05dc106f1dff2e4a601c063ecd/instructor-1.11.2.tar.gz", hash = "sha256:e9ad4e2e0450a0840720bd2be034ffdfd7a65262ebdb854e7b2969886e1a2576", size = 69867645, upload-time = "2025-08-27T22:20:40.207Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/fb/ffc1ade9779795a8dc8e2379b1bfb522161ee7df8df12722f50d348fb4ea/instructor-1.10.0-py3-none-any.whl", hash = "sha256:9c789f0fce915d5498059afb5314530c8a5b22b0283302679148ddae98f732b0", size = 119455, upload-time = "2025-07-18T15:28:48.785Z" }, + { url = "https://files.pythonhosted.org/packages/25/93/d514a35d01db8461a56798c53f715ee1c956e72ec8885de88779b1244f2c/instructor-1.11.2-py3-none-any.whl", hash = "sha256:f7bc1094bcb7c6494d53ff284fe6a6737eb5e343945693c198e253ee7496fe82", size = 148884, upload-time = "2025-08-27T22:20:36.579Z" }, ] [[package]] @@ -3464,8 +3464,8 @@ name = "loguru" version = "0.7.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "win32-setctime", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "python_full_version < '3.13' and sys_platform == 'win32'" }, + { name = "win32-setctime", marker = "python_full_version < '3.13' and sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559, upload-time = "2024-12-06T11:20:56.608Z" } wheels = [ @@ -4604,7 +4604,7 @@ wheels = [ [[package]] name = "openai" -version = "1.99.8" +version = "1.102.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -4616,9 +4616,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/4b/81/288157471c43975cc849bc8779b8c7209aec6da5d7cbcd87a982912a19e5/openai-1.99.8.tar.gz", hash = "sha256:4b49845983eb4d5ffae9bae5d98bd5c0bd3a709a30f8b994fc8f316961b6d566", size = 506953, upload-time = "2025-08-11T20:19:02.312Z" } +sdist = { url = "https://files.pythonhosted.org/packages/07/55/da5598ed5c6bdd9939633854049cddc5cbac0da938dfcfcb3c6b119c16c0/openai-1.102.0.tar.gz", hash = "sha256:2e0153bcd64a6523071e90211cbfca1f2bbc5ceedd0993ba932a5869f93b7fc9", size = 519027, upload-time = "2025-08-26T20:50:29.397Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/36/b6/3940f037aa33e6d5aa00707fd02843a1cac06ee0e106f39cfb71d0653d23/openai-1.99.8-py3-none-any.whl", hash = "sha256:426b981079cffde6dd54868b9b84761ffa291cde77010f051b96433e1835b47d", size = 786821, upload-time = "2025-08-11T20:18:59.943Z" }, + { url = "https://files.pythonhosted.org/packages/bd/0d/c9e7016d82c53c5b5e23e2bad36daebb8921ed44f69c0a985c6529a35106/openai-1.102.0-py3-none-any.whl", hash = "sha256:d751a7e95e222b5325306362ad02a7aa96e1fab3ed05b5888ce1c7ca63451345", size = 812015, upload-time = "2025-08-26T20:50:27.219Z" }, ] [[package]] From bf1d4b915730d664b7b6f7e000ae83be4a2c8f9a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 29 Aug 2025 18:15:35 +0200 Subject: [PATCH 50/73] refactor: Change reasoning from low to minimal --- .../litellm_instructor/llm/openai/adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py index ee6c1a8dd..7e8d75d10 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py @@ -132,7 +132,7 @@ class OpenAIAdapter(LLMInterface): api_version=self.api_version, response_model=response_model, max_retries=self.MAX_RETRIES, - extra_body={"reasoning_effort": "low"}, + extra_body={"reasoning_effort": "minimal"}, ) except ( ContentFilterFinishReasonError, From 0fac4da2d0d3418e6d24e074ee82979abcef5e92 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 29 Aug 2025 18:21:24 +0200 Subject: [PATCH 51/73] feat: adds temporal graph integration and structural tests --- .github/workflows/temporal_graph_tests.yml | 224 +++++++++++++++++++++ .github/workflows/test_suites.yml | 6 + cognee/tests/test_temporal_graph.py | 149 ++++++++++++++ 3 files changed, 379 insertions(+) create mode 100644 .github/workflows/temporal_graph_tests.yml create mode 100644 cognee/tests/test_temporal_graph.py diff --git a/.github/workflows/temporal_graph_tests.yml b/.github/workflows/temporal_graph_tests.yml new file mode 100644 index 000000000..4156dffff --- /dev/null +++ b/.github/workflows/temporal_graph_tests.yml @@ -0,0 +1,224 @@ +name: Temporal Graph Tests + +permissions: + contents: read + +on: + workflow_call: + inputs: + databases: + required: false + type: string + default: "all" + description: "Which vector databases to test (comma-separated list or 'all')" + +jobs: + run_temporal_graph_kuzu_lance_sqlite: + name: Temporal Graph test Kuzu (lancedb + sqlite) + runs-on: ubuntu-22.04 + if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'kuzu/lance/sqlite') }} + steps: + - name: Check out + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: ${{ inputs.python-version }} + + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" + + - name: Run Temporal Graph with Kuzu (lancedb + sqlite) + env: + ENV: 'dev' + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + GRAPH_DATABASE_PROVIDER: 'kuzu' + VECTOR_DB_PROVIDER: 'lancedb' + DB_PROVIDER: 'sqlite' + run: uv run python ./cognee/tests/test_temporal_graph.py + + run_temporal_graph_neo4j_lance_sqlite: + name: Temporal Graph test Neo4j (lancedb + sqlite) + runs-on: ubuntu-22.04 + if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'neo4j/lance/sqlite') }} + services: + neo4j: + image: neo4j:5.11 + env: + NEO4J_AUTH: neo4j/pleaseletmein + NEO4J_PLUGINS: '["apoc","graph-data-science"]' + ports: + - 7474:7474 + - 7687:7687 + options: >- + --health-cmd="cypher-shell -u neo4j -p pleaseletmein 'RETURN 1'" + --health-interval=10s + --health-timeout=5s + --health-retries=5 + + steps: + - name: Check out + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: ${{ inputs.python-version }} + + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" + + - name: Run Temporal Graph with Neo4j (lancedb + sqlite) + env: + ENV: 'dev' + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + GRAPH_DATABASE_PROVIDER: 'neo4j' + VECTOR_DB_PROVIDER: 'lancedb' + DB_PROVIDER: 'sqlite' + GRAPH_DATABASE_URL: bolt://localhost:7687 + GRAPH_DATABASE_USERNAME: neo4j + GRAPH_DATABASE_PASSWORD: pleaseletmein + run: uv run python ./cognee/tests/test_temporal_graph.py + + run_temporal_graph_kuzu_postgres_pgvector: + name: Temporal Graph test Kuzu (postgres + pgvector) + runs-on: ubuntu-22.04 + if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'kuzu/pgvector/postgres') }} + services: + postgres: + image: pgvector/pgvector:pg17 + env: + POSTGRES_USER: cognee + POSTGRES_PASSWORD: cognee + POSTGRES_DB: cognee_db + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + steps: + - name: Check out + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: ${{ inputs.python-version }} + extra-dependencies: "postgres" + + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" + + - name: Run Temporal Graph with Kuzu (postgres + pgvector) + env: + ENV: dev + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + GRAPH_DATABASE_PROVIDER: 'kuzu' + VECTOR_DB_PROVIDER: 'pgvector' + DB_PROVIDER: 'postgres' + DB_NAME: 'cognee_db' + DB_HOST: '127.0.0.1' + DB_PORT: 5432 + DB_USERNAME: cognee + DB_PASSWORD: cognee + run: uv run python ./cognee/tests/test_temporal_graph.py + + run_temporal_graph_neo4j_postgres_pgvector: + name: Temporal Graph test Neo4j (postgres + pgvector) + runs-on: ubuntu-22.04 + if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'neo4j/pgvector/postgres') }} + services: + neo4j: + image: neo4j:5.11 + env: + NEO4J_AUTH: neo4j/pleaseletmein + NEO4J_PLUGINS: '["apoc","graph-data-science"]' + ports: + - 7474:7474 + - 7687:7687 + options: >- + --health-cmd="cypher-shell -u neo4j -p pleaseletmein 'RETURN 1'" + --health-interval=10s + --health-timeout=5s + --health-retries=5 + postgres: + image: pgvector/pgvector:pg17 + env: + POSTGRES_USER: cognee + POSTGRES_PASSWORD: cognee + POSTGRES_DB: cognee_db + ports: + - 5432:5432 + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries=5 + steps: + - name: Check out + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: ${{ inputs.python-version }} + extra-dependencies: "postgres" + + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" + + - name: Run Temporal Graph with Neo4j (postgres + pgvector) + env: + ENV: dev + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + GRAPH_DATABASE_PROVIDER: 'neo4j' + VECTOR_DB_PROVIDER: 'pgvector' + DB_PROVIDER: 'postgres' + GRAPH_DATABASE_URL: bolt://localhost:7687 + GRAPH_DATABASE_USERNAME: neo4j + GRAPH_DATABASE_PASSWORD: pleaseletmein + DB_NAME: cognee_db + DB_HOST: 127.0.0.1 + DB_PORT: 5432 + DB_USERNAME: cognee + DB_PASSWORD: cognee + run: uv run python ./cognee/tests/test_temporal_graph.py diff --git a/.github/workflows/test_suites.yml b/.github/workflows/test_suites.yml index f4e86d544..5b953413c 100644 --- a/.github/workflows/test_suites.yml +++ b/.github/workflows/test_suites.yml @@ -50,6 +50,12 @@ jobs: uses: ./.github/workflows/graph_db_tests.yml secrets: inherit + temporal-graph-tests: + name: Temporal Graph Test + needs: [ basic-tests, e2e-tests, cli-tests, graph-db-tests ] + uses: ./.github/workflows/temporal_graph_tests.yml + secrets: inherit + search-db-tests: name: Search Test on Different DBs needs: [basic-tests, e2e-tests, cli-tests, graph-db-tests] diff --git a/cognee/tests/test_temporal_graph.py b/cognee/tests/test_temporal_graph.py new file mode 100644 index 000000000..ff52d2ed1 --- /dev/null +++ b/cognee/tests/test_temporal_graph.py @@ -0,0 +1,149 @@ +import asyncio +import cognee + +from cognee.shared.logging_utils import setup_logging, INFO +from cognee.api.v1.search import SearchType +from cognee.shared.logging_utils import get_logger +from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine +from collections import Counter + +logger = get_logger() + +biography_1 = """ + Attaphol Buspakom Attaphol Buspakom ( ; ) , nicknamed Tak ( ; ) ; 1 October 1962 – 16 April 2015 ) was a Thai national and football coach . He was given the role at Muangthong United and Buriram United after TTM Samut Sakhon folded after the 2009 season . He played for the Thailand national football team , appearing in several FIFA World Cup qualifying matches . + + Club career . + Attaphol began his career as a player at Thai Port FC Authority of Thailand in 1985 . In his first year , he won his first championship with the club . He played for the club until 1989 and in 1987 also won the Queens Cup . He then moved to Malaysia for two seasons for Pahang FA , then return to Thailand to his former club . His time from 1991 to 1994 was marked by less success than in his first stay at Port Authority . From 1994 to 1996 he played for Pahang again and this time he was able to win with the club , the Malaysia Super League and also reached the final of the Malaysia Cup and the Malaysia FA Cup . Both cup finals but lost . Back in Thailand , he let end his playing career at FC Stock Exchange of Thailand , with which he once again runner‑up in 1996-97 . In 1998 , he finished his career . + + International career . + For the Thailand national football team Attaphol played between 1985 and 1998 a total of 85 games and scored 13 results . In 1992 , he participated with the team in the finals of the Asian Cup . He also stood in various cadres to qualifications to FIFA World Cup . + + Coaching career . + Bec Tero Sasana . + In BEC Tero Sasana F.C . began his coaching career in 2001 for him , first as assistant coach . He took over the reigning champions of the Thai League T1 , after his predecessor Pichai Pituwong resigned from his post . It was his first coach station and he had the difficult task of leading the club through the new AFC Champions League . He could accomplish this task with flying colors and even led the club to the finals . The finale , then still played in home and away matches , was lost with 1:2 at the end against Al Ain FC . Attaphol is and was next to Charnwit Polcheewin the only coach who managed a club from Thailand to lead to the final of the AFC Champions League . 2002-03 and 2003-04 he won with the club also two runner‑up . In his team , which reached the final of the Champions League , were a number of exceptional players like Therdsak Chaiman , Worrawoot Srimaka , Dusit Chalermsan and Anurak Srikerd . + + Geylang United / Krung Thai Bank . + In 2006 , he went to Singapore in the S‑League to Geylang United He was released after a few months due to lack of success . In 2008 , he took over as coach at Krung Thai Bank F.C. , where he had almost a similar task , as a few years earlier by BEC‑Tero . As vice‑champion of the club was also qualified for the AFC Champions League . However , he failed to lead the team through the group stage of the season 2008 and beyond . With the Kashima Antlers of Japan and Beijing Guoan F.C . athletic competition was too great . One of the highlights was put under his leadership , yet the club . In the group match against the Vietnam club Nam Dinh F.C . his team won with 9-1 , but also lost four weeks later with 1-8 against Kashima Antlers . At the end of the National Football League season , he reached the Krung Thai 6th Table space . The Erstligalizenz the club was sold at the end of the season at the Bangkok Glass F.C. . Attaphol finished his coaching career with the club and accepted an offer of TTM Samutsakorn . After only a short time in office + + Muangthong United . + In 2009 , he received an offer from Muangthong United F.C. , which he accepted and changed . He can champion Muang Thong United for 2009 Thai Premier League and Attaphol won Coach of The year for Thai Premier League and he was able to lead Muang Thong United to play AFC Champions League qualifying play‑off for the first in the clubs history . + + Buriram United . + In 2010 Buspakom moved from Muangthong United to Buriram United F.C. . He received Coach of the Month in Thai Premier League 2 time in June and October . In 2011 , he led Buriram United win 2011 Thai Premier League second time for club and set a record with the most points in the Thai League T1 for 85 point and He led Buriram win 2011 Thai FA Cup by beat Muangthong United F.C . 1‑0 and he led Buriram win 2011 Thai League Cup by beat Thai Port F.C . 2‑0 . In 2012 , he led Buriram United to the 2012 AFC Champions League group stage . Buriram along with Guangzhou Evergrande F.C . from China , Kashiwa Reysol from Japan and Jeonbuk Hyundai Motors which are all champions from their country . In the first match of Buriram they beat Kashiwa 3‑2 and Second Match they beat Guangzhou 1‑2 at the Tianhe Stadium . Before losing to Jeonbuk 0‑2 and 3‑2 with lose Kashiwa and Guangzhou 1‑0 and 1‑2 respectively and Thai Premier League Attaphol lead Buriram end 4th for table with win 2012 Thai FA Cup and 2012 Thai League Cup . + + Bangkok Glass . + In 2013 , he moved from Buriram United to Bangkok Glass F.C. . + + Individual + - Thai Premier League Coach of the Year ( 3 ) : 2001-02 , 2009 , 2013 + """ + + +biography_2 = """ + Arnulf Øverland Ole Peter Arnulf Øverland ( 27 April 1889 – 25 March 1968 ) was a Norwegian poet and artist . He is principally known for his poetry which served to inspire the Norwegian resistance movement during the German occupation of Norway during World War II . + + Biography . + Øverland was born in Kristiansund and raised in Bergen . His parents were Peter Anton Øverland ( 1852–1906 ) and Hanna Hage ( 1854–1939 ) . The early death of his father , left the family economically stressed . He was able to attend Bergen Cathedral School and in 1904 Kristiania Cathedral School . He graduated in 1907 and for a time studied philology at University of Kristiania . Øverland published his first collection of poems ( 1911 ) . + + Øverland became a communist sympathizer from the early 1920s and became a member of Mot Dag . He also served as chairman of the Norwegian Students Society 1923–28 . He changed his stand in 1937 , partly as an expression of dissent against the ongoing Moscow Trials . He was an avid opponent of Nazism and in 1936 he wrote the poem Du må ikke sove which was printed in the journal Samtiden . It ends with . ( I thought: : Something is imminent . Our era is over – Europe’s on fire! ) . Probably the most famous line of the poem is ( You mustnt endure so well the injustice that doesnt affect you yourself! ) + + During the German occupation of Norway from 1940 in World War II , he wrote to inspire the Norwegian resistance movement . He wrote a series of poems which were clandestinely distributed , leading to the arrest of both him and his future wife Margrete Aamot Øverland in 1941 . Arnulf Øverland was held first in the prison camp of Grini before being transferred to Sachsenhausen concentration camp in Germany . He spent a four‑year imprisonment until the liberation of Norway in 1945 . His poems were later collected in Vi overlever alt and published in 1945 . + + Øverland played an important role in the Norwegian language struggle in the post‑war era . He became a noted supporter for the conservative written form of Norwegian called Riksmål , he was president of Riksmålsforbundet ( an organization in support of Riksmål ) from 1947 to 1956 . In addition , Øverland adhered to the traditionalist style of writing , criticising modernist poetry on several occasions . His speech Tungetale fra parnasset , published in Arbeiderbladet in 1954 , initiated the so‑called Glossolalia debate . + + Personal life . + In 1918 he had married the singer Hildur Arntzen ( 1888–1957 ) . Their marriage was dissolved in 1939 . In 1940 , he married Bartholine Eufemia Leganger ( 1903–1995 ) . They separated shortly after , and were officially divorced in 1945 . Øverland was married to journalist Margrete Aamot Øverland ( 1913–1978 ) during June 1945 . In 1946 , the Norwegian Parliament arranged for Arnulf and Margrete Aamot Øverland to reside at the Grotten . He lived there until his death in 1968 and she lived there for another ten years until her death in 1978 . Arnulf Øverland was buried at Vår Frelsers Gravlund in Oslo . Joseph Grimeland designed the bust of Arnulf Øverland ( bronze , 1970 ) at his grave site . + + Selected Works . + - Den ensomme fest ( 1911 ) + - Berget det blå ( 1927 ) + - En Hustavle ( 1929 ) + - Den røde front ( 1937 ) + - Vi overlever alt ( 1945 ) + - Sverdet bak døren ( 1956 ) + - Livets minutter ( 1965 ) + + Awards . + - Gyldendals Endowment ( 1935 ) + - Dobloug Prize ( 1951 ) + - Mads Wiel Nygaards legat ( 1961 ) + """ + + +async def main(): + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + await cognee.add([biography_1, biography_2]) + + await cognee.cognify(temporal_cognify=True) + + graph_engine = await get_graph_engine() + graph = await graph_engine.get_graph_data() + + type_counts = Counter(node_data[1].get("type", {}) for node_data in graph[0]) + + edge_type_counts = Counter(edge_type[2] for edge_type in graph[1]) + + # Graph structure test + assert type_counts.get("TextDocument", 0) == 2, ( + f"Expected exactly one TextDocument, but found {type_counts.get('TextDocument', 0)}" + ) + + assert type_counts.get("DocumentChunk", 0) == 2, ( + f"Expected exactly one DocumentChunk, but found {type_counts.get('DocumentChunk', 0)}" + ) + + assert type_counts.get("Entity", 0) >= 20, ( + f"Expected multiple entities (assert is set to 20), but found {type_counts.get('Entity', 0)}" + ) + + assert type_counts.get("EntityType", 0) >= 2, ( + f"Expected multiple entity types, but found {type_counts.get('EntityType', 0)}" + ) + + assert type_counts.get("Event", 0) >= 20, ( + f"Expected multiple events (assert is set to 20), but found {type_counts.get('Event', 0)}" + ) + + assert type_counts.get("Timestamp", 0) >= 20, ( + f"Expected multiple timestamps (assert is set to 20), but found {type_counts.get('Timestamp', 0)}" + ) + + assert type_counts.get("Interval", 0) >= 2, ( + f"Expected multiple intervals, but found {type_counts.get('Interval', 0)}" + ) + + assert edge_type_counts.get("contains", 0) >= 20, ( + f"Expected multiple 'contains' edge, but found {edge_type_counts.get('contains', 0)}" + ) + + assert edge_type_counts.get("is_a", 0) >= 20, ( + f"Expected multiple 'is_a' edge, but found {edge_type_counts.get('is_a', 0)}" + ) + + assert (edge_type_counts.get("during", 0) == type_counts.get("Interval", 0))( + "Expected the same amount of during and interval objects in the graph" + ) + + assert (edge_type_counts.get("during", 0) == type_counts.get("Interval", 0))( + "Expected the same amount of during and interval objects in the graph" + ) + + assert (edge_type_counts.get("time_from", 0) == type_counts.get("Interval", 0))( + "Expected the same amount of time_from and interval objects in the graph" + ) + + assert (edge_type_counts.get("time_to", 0) == type_counts.get("Interval", 0))( + "Expected the same amount of time_to and interval objects in the graph" + ) + + +if __name__ == "__main__": + logger = setup_logging(log_level=INFO) + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(main()) + finally: + loop.run_until_complete(loop.shutdown_asyncgens()) From b3853f7454f1f0f127ac0de509af5529ff69cf41 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 29 Aug 2025 18:40:18 +0200 Subject: [PATCH 52/73] fix: fixes asserts --- cognee/tests/test_temporal_graph.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cognee/tests/test_temporal_graph.py b/cognee/tests/test_temporal_graph.py index ff52d2ed1..8ab76bf28 100644 --- a/cognee/tests/test_temporal_graph.py +++ b/cognee/tests/test_temporal_graph.py @@ -122,19 +122,19 @@ async def main(): f"Expected multiple 'is_a' edge, but found {edge_type_counts.get('is_a', 0)}" ) - assert (edge_type_counts.get("during", 0) == type_counts.get("Interval", 0))( + assert (edge_type_counts.get("during", 0) == type_counts.get("Interval", 0)), ( "Expected the same amount of during and interval objects in the graph" ) - assert (edge_type_counts.get("during", 0) == type_counts.get("Interval", 0))( + assert (edge_type_counts.get("during", 0) == type_counts.get("Interval", 0)), ( "Expected the same amount of during and interval objects in the graph" ) - assert (edge_type_counts.get("time_from", 0) == type_counts.get("Interval", 0))( + assert (edge_type_counts.get("time_from", 0) == type_counts.get("Interval", 0)), ( "Expected the same amount of time_from and interval objects in the graph" ) - assert (edge_type_counts.get("time_to", 0) == type_counts.get("Interval", 0))( + assert (edge_type_counts.get("time_to", 0) == type_counts.get("Interval", 0)), ( "Expected the same amount of time_to and interval objects in the graph" ) From f276c2aa2a81ebfa9f31040ad0bc274f1eef2f1a Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 29 Aug 2025 18:41:16 +0200 Subject: [PATCH 53/73] ruff fix --- cognee/tests/test_temporal_graph.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cognee/tests/test_temporal_graph.py b/cognee/tests/test_temporal_graph.py index 8ab76bf28..998b780f7 100644 --- a/cognee/tests/test_temporal_graph.py +++ b/cognee/tests/test_temporal_graph.py @@ -122,19 +122,19 @@ async def main(): f"Expected multiple 'is_a' edge, but found {edge_type_counts.get('is_a', 0)}" ) - assert (edge_type_counts.get("during", 0) == type_counts.get("Interval", 0)), ( + assert edge_type_counts.get("during", 0) == type_counts.get("Interval", 0), ( "Expected the same amount of during and interval objects in the graph" ) - assert (edge_type_counts.get("during", 0) == type_counts.get("Interval", 0)), ( + assert edge_type_counts.get("during", 0) == type_counts.get("Interval", 0), ( "Expected the same amount of during and interval objects in the graph" ) - assert (edge_type_counts.get("time_from", 0) == type_counts.get("Interval", 0)), ( + assert edge_type_counts.get("time_from", 0) == type_counts.get("Interval", 0), ( "Expected the same amount of time_from and interval objects in the graph" ) - assert (edge_type_counts.get("time_to", 0) == type_counts.get("Interval", 0)), ( + assert edge_type_counts.get("time_to", 0) == type_counts.get("Interval", 0), ( "Expected the same amount of time_to and interval objects in the graph" ) From a3da74a01d633b48d2cc74a25f7369db5b812eeb Mon Sep 17 00:00:00 2001 From: vasilije Date: Fri, 29 Aug 2025 21:49:28 +0200 Subject: [PATCH 54/73] add open router --- .github/workflows/test_openrouter.yml | 30 +++++++++++++++++++++++++++ .github/workflows/test_suites.yml | 9 ++++++++ 2 files changed, 39 insertions(+) create mode 100644 .github/workflows/test_openrouter.yml diff --git a/.github/workflows/test_openrouter.yml b/.github/workflows/test_openrouter.yml new file mode 100644 index 000000000..9c2dcdebe --- /dev/null +++ b/.github/workflows/test_openrouter.yml @@ -0,0 +1,30 @@ +name: test | openrouter + +on: + workflow_call: + +jobs: + test-openrouter: + name: Run OpenRouter Test + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Run OpenRouter Simple Example + env: + LLM_PROVIDER: "custom" + LLM_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + LLM_MODEL: "openrouter/x-ai/grok-code-fast-1" + LLM_ENDPOINT: "https://openrouter.ai/api/v1" + EMBEDDING_PROVIDER: "openai" + EMBEDDING_API_KEY: ${{ secrets.OPENAI_API_KEY }} + EMBEDDING_MODEL: "openai/text-embedding-3-large" + EMBEDDING_DIMENSIONS: "3072" + EMBEDDING_MAX_TOKENS: "8191" + run: uv run python ./examples/python/simple_example.py diff --git a/.github/workflows/test_suites.yml b/.github/workflows/test_suites.yml index f4e86d544..34a2c2e02 100644 --- a/.github/workflows/test_suites.yml +++ b/.github/workflows/test_suites.yml @@ -115,6 +115,12 @@ jobs: uses: ./.github/workflows/test_gemini.yml secrets: inherit + openrouter-tests: + name: OpenRouter Tests + needs: [basic-tests, e2e-tests, cli-tests] + uses: ./.github/workflows/test_openrouter.yml + secrets: inherit + # Ollama tests moved to the end ollama-tests: name: Ollama Tests @@ -128,6 +134,7 @@ jobs: vector-db-tests, example-tests, gemini-tests, + openrouter-tests, mcp-test, relational-db-migration-tests, docker-compose-test, @@ -150,6 +157,7 @@ jobs: db-examples-tests, mcp-test, gemini-tests, + openrouter-tests, ollama-tests, relational-db-migration-tests, docker-compose-test, @@ -171,6 +179,7 @@ jobs: "${{ needs.db-examples-tests.result }}" == "success" && "${{ needs.relational-db-migration-tests.result }}" == "success" && "${{ needs.gemini-tests.result }}" == "success" && + "${{ needs.openrouter-tests.result }}" == "success" && "${{ needs.docker-compose-test.result }}" == "success" && "${{ needs.docker-ci-test.result }}" == "success" && "${{ needs.ollama-tests.result }}" == "success" ]]; then From 377c0d3973a760dc9163d4f6a0a6bbfeccb7dbef Mon Sep 17 00:00:00 2001 From: vasilije Date: Sat, 30 Aug 2025 10:38:46 +0200 Subject: [PATCH 55/73] added fix to embedding engine --- .../databases/vector/embeddings/get_embedding_engine.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py b/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py index ae15b6c6e..192f1958c 100644 --- a/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +++ b/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py @@ -33,6 +33,7 @@ def get_embedding_engine() -> EmbeddingEngine: config.embedding_api_version, config.huggingface_tokenizer, llm_config.llm_api_key, + llm_config.llm_provider ) @@ -47,6 +48,7 @@ def create_embedding_engine( embedding_api_version, huggingface_tokenizer, llm_api_key, + llm_provider ): """ Create and return an embedding engine based on the specified provider. @@ -99,7 +101,7 @@ def create_embedding_engine( return LiteLLMEmbeddingEngine( provider=embedding_provider, - api_key=embedding_api_key or llm_api_key, + api_key=embedding_api_key or (embedding_api_key if llm_provider == 'custom' else llm_api_key), endpoint=embedding_endpoint, api_version=embedding_api_version, model=embedding_model, From 9df440c02040f0b18a6b8df420168dcc42e31feb Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Mon, 1 Sep 2025 15:18:29 +0200 Subject: [PATCH 56/73] feat: adds time extraction + unit tests for temporal retriever --- .../modules/retrieval/temporal_retriever.py | 1 - cognee/tests/test_temporal_graph.py | 18 ++ .../retrieval/temporal_retriever_test.py | 223 ++++++++++++++++++ 3 files changed, 241 insertions(+), 1 deletion(-) create mode 100644 cognee/tests/unit/modules/retrieval/temporal_retriever_test.py diff --git a/cognee/modules/retrieval/temporal_retriever.py b/cognee/modules/retrieval/temporal_retriever.py index 61881bf7e..edd38489c 100644 --- a/cognee/modules/retrieval/temporal_retriever.py +++ b/cognee/modules/retrieval/temporal_retriever.py @@ -40,7 +40,6 @@ class TemporalRetriever(GraphCompletionRetriever): top_k: Optional[int] = 5, node_type: Optional[Type] = None, node_name: Optional[List[str]] = None, - save_interaction: bool = False, ): super().__init__( user_prompt_path=user_prompt_path, diff --git a/cognee/tests/test_temporal_graph.py b/cognee/tests/test_temporal_graph.py index 998b780f7..9a9b2a93e 100644 --- a/cognee/tests/test_temporal_graph.py +++ b/cognee/tests/test_temporal_graph.py @@ -1,11 +1,14 @@ import asyncio import cognee +from cognee.modules.retrieval.temporal_retriever import TemporalRetriever from cognee.shared.logging_utils import setup_logging, INFO +from cognee.tasks.temporal_graph.models import Timestamp from cognee.api.v1.search import SearchType from cognee.shared.logging_utils import get_logger from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine from collections import Counter +from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int logger = get_logger() @@ -138,6 +141,21 @@ async def main(): "Expected the same amount of time_to and interval objects in the graph" ) + retriever = TemporalRetriever() + + result_before = await retriever.extract_time_from_query("What happened before 1890?") + + assert result_before[0] == None + + result_after = await retriever.extract_time_from_query("What happened after 1891?") + + assert result_after[1] == None + + result_between = await retriever.extract_time_from_query("What happened between 1890 and 1900?") + + assert result_between[1] + assert result_between[0] + if __name__ == "__main__": logger = setup_logging(log_level=INFO) diff --git a/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py b/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py new file mode 100644 index 000000000..954dc398e --- /dev/null +++ b/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py @@ -0,0 +1,223 @@ +import asyncio +from types import SimpleNamespace +import pytest + +from cognee.modules.retrieval.temporal_retriever import TemporalRetriever + + +# Test TemporalRetriever initialization defaults and overrides +def test_init_defaults_and_overrides(): + tr = TemporalRetriever() + assert tr.top_k == 5 + assert tr.user_prompt_path == "graph_context_for_question.txt" + assert tr.system_prompt_path == "answer_simple_question.txt" + assert tr.time_extraction_prompt_path == "extract_query_time.txt" + + tr2 = TemporalRetriever( + top_k=3, + user_prompt_path="u.txt", + system_prompt_path="s.txt", + time_extraction_prompt_path="t.txt", + ) + assert tr2.top_k == 3 + assert tr2.user_prompt_path == "u.txt" + assert tr2.system_prompt_path == "s.txt" + assert tr2.time_extraction_prompt_path == "t.txt" + + +# Test descriptions_to_string with basic and empty results +def test_descriptions_to_string_basic_and_empty(): + tr = TemporalRetriever() + + results = [ + {"description": " First "}, + {"nope": "no description"}, + {"description": "Second"}, + {"description": ""}, + {"description": " Third line "}, + ] + + s = tr.descriptions_to_string(results) + assert s == "First\n#####################\nSecond\n#####################\nThird line" + + assert tr.descriptions_to_string([]) == "" + + +# Test filter_top_k_events sorts and limits correctly +@pytest.mark.asyncio +async def test_filter_top_k_events_sorts_and_limits(): + tr = TemporalRetriever(top_k=2) + + relevant_events = [ + { + "events": [ + {"id": "e1", "description": "E1"}, + {"id": "e2", "description": "E2"}, + {"id": "e3", "description": "E3 - not in vector results"}, + ] + } + ] + + scored_results = [ + SimpleNamespace(payload={"id": "e2"}, score=0.10), + SimpleNamespace(payload={"id": "e1"}, score=0.20), + ] + + top = await tr.filter_top_k_events(relevant_events, scored_results) + + assert [e["id"] for e in top] == ["e2", "e1"] + assert all("score" in e for e in top) + assert top[0]["score"] == 0.10 + assert top[1]["score"] == 0.20 + + +# Test filter_top_k_events handles unknown ids as infinite scores +@pytest.mark.asyncio +async def test_filter_top_k_events_includes_unknown_as_infinite_but_not_in_top_k(): + tr = TemporalRetriever(top_k=2) + + relevant_events = [ + { + "events": [ + {"id": "known1", "description": "Known 1"}, + {"id": "unknown", "description": "Unknown"}, + {"id": "known2", "description": "Known 2"}, + ] + } + ] + + scored_results = [ + SimpleNamespace(payload={"id": "known2"}, score=0.05), + SimpleNamespace(payload={"id": "known1"}, score=0.50), + ] + + top = await tr.filter_top_k_events(relevant_events, scored_results) + assert [e["id"] for e in top] == ["known2", "known1"] + assert all(e["score"] != float("inf") for e in top) + + +# Test descriptions_to_string with unicode and newlines +def test_descriptions_to_string_unicode_and_newlines(): + tr = TemporalRetriever() + results = [ + {"description": "Line A\nwith newline"}, + {"description": "This is a description"}, + ] + s = tr.descriptions_to_string(results) + assert "Line A\nwith newline" in s + assert "This is a description" in s + assert s.count("#####################") == 1 + + +# Test filter_top_k_events when top_k is larger than available events +@pytest.mark.asyncio +async def test_filter_top_k_events_limits_when_top_k_exceeds_events(): + tr = TemporalRetriever(top_k=10) + relevant_events = [{"events": [{"id": "a"}, {"id": "b"}]}] + scored_results = [ + SimpleNamespace(payload={"id": "a"}, score=0.1), + SimpleNamespace(payload={"id": "b"}, score=0.2), + ] + out = await tr.filter_top_k_events(relevant_events, scored_results) + assert [e["id"] for e in out] == ["a", "b"] + + +# Test filter_top_k_events when scored_results is empty +@pytest.mark.asyncio +async def test_filter_top_k_events_handles_empty_scored_results(): + tr = TemporalRetriever(top_k=2) + relevant_events = [{"events": [{"id": "x"}, {"id": "y"}]}] + scored_results = [] + out = await tr.filter_top_k_events(relevant_events, scored_results) + assert [e["id"] for e in out] == ["x", "y"] + assert all(e["score"] == float("inf") for e in out) + + +# Test filter_top_k_events error handling for missing structure +@pytest.mark.asyncio +async def test_filter_top_k_events_error_handling(): + tr = TemporalRetriever(top_k=2) + with pytest.raises((KeyError, TypeError)): + await tr.filter_top_k_events([{}], []) + + +class _FakeRetriever(TemporalRetriever): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._calls = [] + + async def extract_time_from_query(self, query: str): + if "both" in query: + return "2024-01-01", "2024-12-31" + if "from_only" in query: + return "2024-01-01", None + if "to_only" in query: + return None, "2024-12-31" + return None, None + + async def get_triplets(self, query: str): + self._calls.append(("get_triplets", query)) + return [{"s": "a", "p": "b", "o": "c"}] + + async def resolve_edges_to_text(self, triplets): + self._calls.append(("resolve_edges_to_text", len(triplets))) + return "edges->text" + + async def _fake_graph_collect_ids(self, **kwargs): + return ["e1", "e2"] + + async def _fake_graph_collect_events(self, ids): + return [{"events": [ + {"id": "e1", "description": "E1"}, + {"id": "e2", "description": "E2"}, + {"id": "e3", "description": "E3"}, + ]}] + + async def _fake_vector_embed(self, texts): + assert isinstance(texts, list) and texts + return [[0.0, 1.0, 2.0]] + + async def _fake_vector_search(self, **kwargs): + return [ + SimpleNamespace(payload={"id": "e2"}, score=0.05), + SimpleNamespace(payload={"id": "e1"}, score=0.10), + ] + + async def get_context(self, query: str): + time_from, time_to = await self.extract_time_from_query(query) + + if not (time_from or time_to): + triplets = await self.get_triplets(query) + return await self.resolve_edges_to_text(triplets) + + ids = await self._fake_graph_collect_ids( + time_from=time_from, time_to=time_to + ) + relevant_events = await self._fake_graph_collect_events(ids) + + _ = await self._fake_vector_embed([query]) + vector_search_results = await self._fake_vector_search( + collection_name="Event_name", query_vector=[0.0], limit=0 + ) + top_k_events = await self.filter_top_k_events(relevant_events, vector_search_results) + return self.descriptions_to_string(top_k_events) + + +# Test get_context fallback to triplets when no time is extracted +@pytest.mark.asyncio +async def test_fake_get_context_falls_back_to_triplets_when_no_time(): + tr = _FakeRetriever(top_k=2) + ctx = await tr.get_context("no_time") + assert ctx == "edges->text" + assert tr._calls[0][0] == "get_triplets" + assert tr._calls[1][0] == "resolve_edges_to_text" + + +# Test get_context when time is extracted and vector ranking is applied +@pytest.mark.asyncio +async def test_fake_get_context_with_time_filters_and_vector_ranking(): + tr = _FakeRetriever(top_k=2) + ctx = await tr.get_context("both time") + assert ctx.startswith("E2") + assert "#####################" in ctx + assert "E1" in ctx and "E3" not in ctx From d336511c57cab0e2726673ed11e581e7a7cdc709 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Mon, 1 Sep 2025 15:31:30 +0200 Subject: [PATCH 57/73] ruff fix --- .../retrieval/temporal_retriever_test.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py b/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py index 954dc398e..a322cb237 100644 --- a/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +++ b/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py @@ -167,11 +167,15 @@ class _FakeRetriever(TemporalRetriever): return ["e1", "e2"] async def _fake_graph_collect_events(self, ids): - return [{"events": [ - {"id": "e1", "description": "E1"}, - {"id": "e2", "description": "E2"}, - {"id": "e3", "description": "E3"}, - ]}] + return [ + { + "events": [ + {"id": "e1", "description": "E1"}, + {"id": "e2", "description": "E2"}, + {"id": "e3", "description": "E3"}, + ] + } + ] async def _fake_vector_embed(self, texts): assert isinstance(texts, list) and texts @@ -190,9 +194,7 @@ class _FakeRetriever(TemporalRetriever): triplets = await self.get_triplets(query) return await self.resolve_edges_to_text(triplets) - ids = await self._fake_graph_collect_ids( - time_from=time_from, time_to=time_to - ) + ids = await self._fake_graph_collect_ids(time_from=time_from, time_to=time_to) relevant_events = await self._fake_graph_collect_events(ids) _ = await self._fake_vector_embed([query]) From 60b09182cfcf00adb0a87395c607bc7970367397 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Mon, 1 Sep 2025 15:33:11 +0200 Subject: [PATCH 58/73] fixes linting --- cognee/tests/test_temporal_graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/tests/test_temporal_graph.py b/cognee/tests/test_temporal_graph.py index 9a9b2a93e..675a01689 100644 --- a/cognee/tests/test_temporal_graph.py +++ b/cognee/tests/test_temporal_graph.py @@ -145,11 +145,11 @@ async def main(): result_before = await retriever.extract_time_from_query("What happened before 1890?") - assert result_before[0] == None + assert result_before[0] is None result_after = await retriever.extract_time_from_query("What happened after 1891?") - assert result_after[1] == None + assert result_after[1] is None result_between = await retriever.extract_time_from_query("What happened between 1890 and 1900?") From e29c16edc515d81f82e95bac5e4b44dbc18cafda Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 1 Sep 2025 16:31:10 +0200 Subject: [PATCH 59/73] fix: Return coding rules to MCP --- cognee-mcp/pyproject.toml | 2 +- cognee-mcp/src/server.py | 28 +++++++-------- cognee-mcp/uv.lock | 71 +++++++++++++++++++++++++++++++++------ 3 files changed, 75 insertions(+), 26 deletions(-) diff --git a/cognee-mcp/pyproject.toml b/cognee-mcp/pyproject.toml index a8596615b..8bde50841 100644 --- a/cognee-mcp/pyproject.toml +++ b/cognee-mcp/pyproject.toml @@ -8,7 +8,7 @@ requires-python = ">=3.10" dependencies = [ # For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes. # "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/vasilije/Projects/tiktok/cognee", - "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.3", + "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.4", "fastmcp>=2.10.0,<3.0.0", "mcp>=1.12.0,<2.0.0", "uv>=0.6.3,<1.0.0", diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index 5d11e0ce5..9e55b9707 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -21,16 +21,16 @@ from cognee.shared.data_models import KnowledgeGraph from cognee.modules.storage.utils import JSONEncoder -# try: -# from codingagents.coding_rule_associations import ( -# add_rule_associations, -# get_existing_rules, -# ) -# except ModuleNotFoundError: -# from .codingagents.coding_rule_associations import ( -# add_rule_associations, -# get_existing_rules, -# ) +try: + from codingagents.coding_rule_associations import ( + add_rule_associations, + get_existing_rules, + ) +except ModuleNotFoundError: + from .codingagents.coding_rule_associations import ( + add_rule_associations, + get_existing_rules, + ) mcp = FastMCP("Cognee") @@ -310,7 +310,7 @@ async def save_interaction(data: str) -> list: logger.info("Save interaction process finished.") logger.info("Generating associated rules from interaction data.") - # await add_rule_associations(data=data, rules_nodeset_name="coding_agent_rules") + await add_rule_associations(data=data, rules_nodeset_name="coding_agent_rules") logger.info("Associated rules generated from interaction data.") @@ -572,10 +572,8 @@ async def get_developer_rules() -> list: async def fetch_rules_from_cognee() -> str: """Collect all developer rules from Cognee""" with redirect_stdout(sys.stderr): - note = "This is broken in 0.2.2" - return note - # developer_rules = await get_existing_rules(rules_nodeset_name="coding_agent_rules") - # return developer_rules + developer_rules = await get_existing_rules(rules_nodeset_name="coding_agent_rules") + return developer_rules rules_text = await fetch_rules_from_cognee() diff --git a/cognee-mcp/uv.lock b/cognee-mcp/uv.lock index bfa434b4f..dd2797519 100644 --- a/cognee-mcp/uv.lock +++ b/cognee-mcp/uv.lock @@ -332,6 +332,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" }, ] +[[package]] +name = "baml-py" +version = "0.201.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/54/2b0edb3d22e95ce56f36610391c11108a4ef26ba2837736a32001687ae34/baml_py-0.201.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:83228d2af2b0e845bbbb4e14f7cbd3376cec385aee01210ac522ab6076e07bec", size = 17387971, upload-time = "2025-07-03T19:29:05.844Z" }, + { url = "https://files.pythonhosted.org/packages/c9/08/1d48c28c63eadea2c04360cbb7f64968599e99cd6b8fc0ec0bd4424d3cf1/baml_py-0.201.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:2a9d016139e3ae5b5ce98c7b05b5fbd53d5d38f04dc810ec4d70fb17dd6c10e4", size = 16191010, upload-time = "2025-07-03T19:29:09.323Z" }, + { url = "https://files.pythonhosted.org/packages/73/1a/20b2d46501e3dd0648af339825106a6ac5eeb5d22d7e6a10cf16b9aa1cb8/baml_py-0.201.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b5058505b1a3c5f04fc1679aec4d730fa9bef2cbd96209b3ed50152f60b96baf", size = 19950249, upload-time = "2025-07-03T19:29:11.974Z" }, + { url = "https://files.pythonhosted.org/packages/38/24/bc871059e905159ae1913c2e3032dd6ef2f5c3d0983999d2c2f1eebb65a4/baml_py-0.201.0-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:36289d548581ba4accd5eaaab3246872542dd32dc6717e537654fa0cad884071", size = 19231310, upload-time = "2025-07-03T19:29:14.857Z" }, + { url = "https://files.pythonhosted.org/packages/0e/11/4268a0b82b02c7202fe5aa0d7175712158d998c491cac723b2bac3d5d495/baml_py-0.201.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5ab70e7bd6481d71edca8a33313347b29faccec78b9960138aa437522813ac9a", size = 19490012, upload-time = "2025-07-03T19:29:18.512Z" }, + { url = "https://files.pythonhosted.org/packages/31/21/c9f9aea1adba2a5978ffab11ba0948a9f3f81ec6ed3056067713260e93a1/baml_py-0.201.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:7efc5c693a7142c230a4f3d6700415127fee0b9f5fdbb36db63e04e27ac4c0f1", size = 20090620, upload-time = "2025-07-03T19:29:21.072Z" }, + { url = "https://files.pythonhosted.org/packages/99/cf/92123d8d753f1d1473e080c4c182139bfe3b9a6418e891cf1d96b6c33848/baml_py-0.201.0-cp38-abi3-win_amd64.whl", hash = "sha256:56499857b7a27ae61a661c8ce0dddd0fb567a45c0b826157e44048a14cf586f9", size = 17253005, upload-time = "2025-07-03T19:29:23.722Z" }, + { url = "https://files.pythonhosted.org/packages/59/88/5056aa1bc9480f758cd6e210d63bd1f9ad90b44c87f4121285906526495e/baml_py-0.201.0-cp38-abi3-win_arm64.whl", hash = "sha256:1e52dc1151db84a302b746590fe2bc484bdd794f83fa5da7216d9394c559f33a", size = 15612701, upload-time = "2025-07-03T19:29:26.712Z" }, +] + [[package]] name = "bcrypt" version = "4.3.0" @@ -590,13 +605,14 @@ wheels = [ [[package]] name = "cognee" -version = "0.2.1" +version = "0.2.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiofiles" }, { name = "aiohttp" }, { name = "aiosqlite" }, { name = "alembic" }, + { name = "baml-py" }, { name = "dlt", extra = ["sqlalchemy"] }, { name = "fastapi" }, { name = "fastapi-users", extra = ["sqlalchemy"] }, @@ -624,6 +640,7 @@ dependencies = [ { name = "pympler" }, { name = "pypdf" }, { name = "python-dotenv" }, + { name = "python-magic-bin", marker = "sys_platform == 'win32'" }, { name = "python-multipart" }, { name = "rdflib" }, { name = "s3fs", extra = ["boto3"] }, @@ -634,9 +651,9 @@ dependencies = [ { name = "tiktoken" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/41/46/e7df1faebc92fa31ef8e33faf81feb435782727a789de5532d178e047224/cognee-0.2.1.tar.gz", hash = "sha256:bf5208383fc841981641c040e5b6588e58111af4d771f9eab6552f441e6a8e6c", size = 15497626, upload-time = "2025-07-25T15:53:57.009Z" } +sdist = { url = "https://files.pythonhosted.org/packages/da/b1/99c7f0c20cae101d4777bdc17b466bab58d0b4abfbb5d62c54d3babcc3ec/cognee-0.2.4.tar.gz", hash = "sha256:e8ac1c60cabb2e1d41db4f337a4dca3c7aa0c54d605d32e6087dba1c02b3beba", size = 13955686, upload-time = "2025-08-27T14:39:05.532Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/0e/b705c6eeb538dcdd8fbbb331be25fe8e0bbc1af7d76e61566ec9845b29d3/cognee-0.2.1-py3-none-any.whl", hash = "sha256:6e9d437e0c58a16233841ebf19b1a3d8b67da069460a4f08d0c0e00301b1d36d", size = 1019851, upload-time = "2025-07-25T15:53:53.488Z" }, + { url = "https://files.pythonhosted.org/packages/e8/78/24df77b88d719ba308281412ebeb17c37867333e16bd2d1da7e192c1dc5d/cognee-0.2.4-py3-none-any.whl", hash = "sha256:56ab83c18ec9d7b307dfa206fcef39bc036e893d13e5390212f730b5204e3ae1", size = 1433548, upload-time = "2025-08-27T14:38:56.986Z" }, ] [package.optional-dependencies] @@ -682,7 +699,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "cognee", extras = ["postgres", "codegraph", "gemini", "huggingface", "docs", "neo4j"], specifier = "==0.2.1" }, + { name = "cognee", extras = ["postgres", "codegraph", "gemini", "huggingface", "docs", "neo4j"], specifier = "==0.2.4" }, { name = "fastmcp", specifier = ">=2.10.0,<3.0.0" }, { name = "mcp", specifier = ">=1.12.0,<2.0.0" }, { name = "uv", specifier = ">=0.6.3,<1.0.0" }, @@ -1258,6 +1275,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/9a/51108b68e77650a7289b5f1ceff8dc0929ab48a26d1d2015f22121a9d183/fastmcp-2.11.0-py3-none-any.whl", hash = "sha256:8709a04522e66fda407b469fbe4d3290651aa7b06097b91c097e9a973c9b9bb3", size = 256193, upload-time = "2025-08-01T21:30:09.905Z" }, ] +[[package]] +name = "fastuuid" +version = "0.12.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/17/13146a1e916bd2971d0a58db5e0a4ad23efdd49f78f33ac871c161f8007b/fastuuid-0.12.0.tar.gz", hash = "sha256:d0bd4e5b35aad2826403f4411937c89e7c88857b1513fe10f696544c03e9bd8e", size = 19180, upload-time = "2025-01-27T18:04:14.387Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/97/c3/9db9aee6f34e6dfd1f909d3d7432ac26e491a0471f8bb8b676c44b625b3f/fastuuid-0.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:22a900ef0956aacf862b460e20541fdae2d7c340594fe1bd6fdcb10d5f0791a9", size = 247356, upload-time = "2025-01-27T18:04:45.397Z" }, + { url = "https://files.pythonhosted.org/packages/14/a5/999e6e017af3d85841ce1e172d32fd27c8700804c125f496f71bfddc1a9f/fastuuid-0.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0302f5acf54dc75de30103025c5a95db06d6c2be36829043a0aa16fc170076bc", size = 258384, upload-time = "2025-01-27T18:04:03.562Z" }, + { url = "https://files.pythonhosted.org/packages/c4/e6/beae8411cac5b3b0b9d59ee08405eb39c3abe81dad459114363eff55c14a/fastuuid-0.12.0-cp310-cp310-manylinux_2_34_x86_64.whl", hash = "sha256:7946b4a310cfc2d597dcba658019d72a2851612a2cebb949d809c0e2474cf0a6", size = 278480, upload-time = "2025-01-27T18:04:05.663Z" }, + { url = "https://files.pythonhosted.org/packages/f1/f6/c598b9a052435716fc5a084ef17049edd35ca2c8241161269bfea4905ab4/fastuuid-0.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:a1b6764dd42bf0c46c858fb5ade7b7a3d93b7a27485a7a5c184909026694cd88", size = 156799, upload-time = "2025-01-27T18:05:41.867Z" }, + { url = "https://files.pythonhosted.org/packages/d4/99/555eab31381c7912103d4c8654082611e5e82a7bb88ad5ab067e36b622d7/fastuuid-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2bced35269315d16fe0c41003f8c9d63f2ee16a59295d90922cad5e6a67d0418", size = 247249, upload-time = "2025-01-27T18:03:23.092Z" }, + { url = "https://files.pythonhosted.org/packages/6d/3b/d62ce7f2af3d50a8e787603d44809770f43a3f2ff708bf10c252bf479109/fastuuid-0.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82106e4b0a24f4f2f73c88f89dadbc1533bb808900740ca5db9bbb17d3b0c824", size = 258369, upload-time = "2025-01-27T18:04:08.903Z" }, + { url = "https://files.pythonhosted.org/packages/86/23/33ec5355036745cf83ea9ca7576d2e0750ff8d268c03b4af40ed26f1a303/fastuuid-0.12.0-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:4db1bc7b8caa1d7412e1bea29b016d23a8d219131cff825b933eb3428f044dca", size = 278316, upload-time = "2025-01-27T18:04:12.74Z" }, + { url = "https://files.pythonhosted.org/packages/40/91/32ce82a14650148b6979ccd1a0089fd63d92505a90fb7156d2acc3245cbd/fastuuid-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:07afc8e674e67ac3d35a608c68f6809da5fab470fb4ef4469094fdb32ba36c51", size = 156643, upload-time = "2025-01-27T18:05:59.266Z" }, + { url = "https://files.pythonhosted.org/packages/f6/28/442e79d6219b90208cb243ac01db05d89cc4fdf8ecd563fb89476baf7122/fastuuid-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:328694a573fe9dce556b0b70c9d03776786801e028d82f0b6d9db1cb0521b4d1", size = 247372, upload-time = "2025-01-27T18:03:40.967Z" }, + { url = "https://files.pythonhosted.org/packages/40/eb/e0fd56890970ca7a9ec0d116844580988b692b1a749ac38e0c39e1dbdf23/fastuuid-0.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02acaea2c955bb2035a7d8e7b3fba8bd623b03746ae278e5fa932ef54c702f9f", size = 258200, upload-time = "2025-01-27T18:04:12.138Z" }, + { url = "https://files.pythonhosted.org/packages/f5/3c/4b30e376e65597a51a3dc929461a0dec77c8aec5d41d930f482b8f43e781/fastuuid-0.12.0-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:ed9f449cba8cf16cced252521aee06e633d50ec48c807683f21cc1d89e193eb0", size = 278446, upload-time = "2025-01-27T18:04:15.877Z" }, + { url = "https://files.pythonhosted.org/packages/fe/96/cc5975fd23d2197b3e29f650a7a9beddce8993eaf934fa4ac595b77bb71f/fastuuid-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:0df2ea4c9db96fd8f4fa38d0e88e309b3e56f8fd03675a2f6958a5b082a0c1e4", size = 157185, upload-time = "2025-01-27T18:06:19.21Z" }, + { url = "https://files.pythonhosted.org/packages/a9/e8/d2bb4f19e5ee15f6f8e3192a54a897678314151aa17d0fb766d2c2cbc03d/fastuuid-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7fe2407316a04ee8f06d3dbc7eae396d0a86591d92bafe2ca32fce23b1145786", size = 247512, upload-time = "2025-01-27T18:04:08.115Z" }, + { url = "https://files.pythonhosted.org/packages/bc/53/25e811d92fd60f5c65e098c3b68bd8f1a35e4abb6b77a153025115b680de/fastuuid-0.12.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9b31dd488d0778c36f8279b306dc92a42f16904cba54acca71e107d65b60b0c", size = 258257, upload-time = "2025-01-27T18:03:56.408Z" }, + { url = "https://files.pythonhosted.org/packages/10/23/73618e7793ea0b619caae2accd9e93e60da38dd78dd425002d319152ef2f/fastuuid-0.12.0-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:b19361ee649365eefc717ec08005972d3d1eb9ee39908022d98e3bfa9da59e37", size = 278559, upload-time = "2025-01-27T18:03:58.661Z" }, + { url = "https://files.pythonhosted.org/packages/e4/41/6317ecfc4757d5f2a604e5d3993f353ba7aee85fa75ad8b86fce6fc2fa40/fastuuid-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:8fc66b11423e6f3e1937385f655bedd67aebe56a3dcec0cb835351cfe7d358c9", size = 157276, upload-time = "2025-01-27T18:06:39.245Z" }, +] + [[package]] name = "filelock" version = "3.18.0" @@ -2253,11 +2294,12 @@ wheels = [ [[package]] name = "litellm" -version = "1.70.4" +version = "1.76.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, { name = "click" }, + { name = "fastuuid" }, { name = "httpx" }, { name = "importlib-metadata" }, { name = "jinja2" }, @@ -2268,9 +2310,9 @@ dependencies = [ { name = "tiktoken" }, { name = "tokenizers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/60/d7/d0d76ba896a1e8978550dcc76157d1c50910ba9ade4ef3981a34f01f4fa6/litellm-1.70.4.tar.gz", hash = "sha256:ef6749a091faaaf88313afe4111cdd95736e1e60f21ba894e74f7c5bab2870bd", size = 7813817, upload-time = "2025-05-23T00:05:24.47Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/fd/aa87c0a598377786521bee585f4d525e846f5339b816903298bfbb9daef5/litellm-1.76.1.tar.gz", hash = "sha256:d5a3a3efda04999b60ec0d1c29c1eaaa12f89a7b29db4bda691c7fb55b4fa6ad", size = 10178100, upload-time = "2025-08-30T21:05:48.578Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/8f/0b26ecb08b8282ae0fdfa2223b5df8263579c9e3c75ca96bb7fb7cbc632c/litellm-1.70.4-py3-none-any.whl", hash = "sha256:4d14d04bf5e2bd49336b4abc59193352c731ff371022e4fcf590208f41f644f7", size = 7903749, upload-time = "2025-05-23T00:05:21.017Z" }, + { url = "https://files.pythonhosted.org/packages/d9/d3/16423b6d399540eeff357f00abc85f62dc337d347a0c98ccadc448a61df5/litellm-1.76.1-py3-none-any.whl", hash = "sha256:938f05075372f26098211ea9b3cb0a6bb7b46111330226b70d42d40bd307812f", size = 8965465, upload-time = "2025-08-30T21:05:46.068Z" }, ] [[package]] @@ -3117,7 +3159,7 @@ wheels = [ [[package]] name = "openai" -version = "1.98.0" +version = "1.99.8" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -3129,9 +3171,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d8/9d/52eadb15c92802711d6b6cf00df3a6d0d18b588f4c5ba5ff210c6419fc03/openai-1.98.0.tar.gz", hash = "sha256:3ee0fcc50ae95267fd22bd1ad095ba5402098f3df2162592e68109999f685427", size = 496695, upload-time = "2025-07-30T12:48:03.701Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4b/81/288157471c43975cc849bc8779b8c7209aec6da5d7cbcd87a982912a19e5/openai-1.99.8.tar.gz", hash = "sha256:4b49845983eb4d5ffae9bae5d98bd5c0bd3a709a30f8b994fc8f316961b6d566", size = 506953, upload-time = "2025-08-11T20:19:02.312Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/fe/f64631075b3d63a613c0d8ab761d5941631a470f6fa87eaaee1aa2b4ec0c/openai-1.98.0-py3-none-any.whl", hash = "sha256:b99b794ef92196829120e2df37647722104772d2a74d08305df9ced5f26eae34", size = 767713, upload-time = "2025-07-30T12:48:01.264Z" }, + { url = "https://files.pythonhosted.org/packages/36/b6/3940f037aa33e6d5aa00707fd02843a1cac06ee0e106f39cfb71d0653d23/openai-1.99.8-py3-none-any.whl", hash = "sha256:426b981079cffde6dd54868b9b84761ffa291cde77010f051b96433e1835b47d", size = 786821, upload-time = "2025-08-11T20:18:59.943Z" }, ] [[package]] @@ -4163,6 +4205,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6c/73/9f872cb81fc5c3bb48f7227872c28975f998f3e7c2b1c16e95e6432bbb90/python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3", size = 13840, upload-time = "2022-06-07T20:16:57.763Z" }, ] +[[package]] +name = "python-magic-bin" +version = "0.4.14" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/5d/10b9ac745d9fd2f7151a2ab901e6bb6983dbd70e87c71111f54859d1ca2e/python_magic_bin-0.4.14-py2.py3-none-win32.whl", hash = "sha256:34a788c03adde7608028203e2dbb208f1f62225ad91518787ae26d603ae68892", size = 397784, upload-time = "2017-10-02T16:30:15.806Z" }, + { url = "https://files.pythonhosted.org/packages/07/c2/094e3d62b906d952537196603a23aec4bcd7c6126bf80eb14e6f9f4be3a2/python_magic_bin-0.4.14-py2.py3-none-win_amd64.whl", hash = "sha256:90be6206ad31071a36065a2fc169c5afb5e0355cbe6030e87641c6c62edc2b69", size = 409299, upload-time = "2017-10-02T16:30:18.545Z" }, +] + [[package]] name = "python-multipart" version = "0.0.20" From 74cf56e1ce6db668f4019282c722965e7925d428 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 1 Sep 2025 16:31:10 +0200 Subject: [PATCH 60/73] fix: Return coding rules to MCP --- cognee-mcp/pyproject.toml | 2 +- cognee-mcp/src/server.py | 28 +++++++-------- cognee-mcp/uv.lock | 71 +++++++++++++++++++++++++++++++++------ 3 files changed, 75 insertions(+), 26 deletions(-) diff --git a/cognee-mcp/pyproject.toml b/cognee-mcp/pyproject.toml index a8596615b..8bde50841 100644 --- a/cognee-mcp/pyproject.toml +++ b/cognee-mcp/pyproject.toml @@ -8,7 +8,7 @@ requires-python = ">=3.10" dependencies = [ # For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes. # "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/vasilije/Projects/tiktok/cognee", - "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.3", + "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.2.4", "fastmcp>=2.10.0,<3.0.0", "mcp>=1.12.0,<2.0.0", "uv>=0.6.3,<1.0.0", diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index 5d11e0ce5..9e55b9707 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -21,16 +21,16 @@ from cognee.shared.data_models import KnowledgeGraph from cognee.modules.storage.utils import JSONEncoder -# try: -# from codingagents.coding_rule_associations import ( -# add_rule_associations, -# get_existing_rules, -# ) -# except ModuleNotFoundError: -# from .codingagents.coding_rule_associations import ( -# add_rule_associations, -# get_existing_rules, -# ) +try: + from codingagents.coding_rule_associations import ( + add_rule_associations, + get_existing_rules, + ) +except ModuleNotFoundError: + from .codingagents.coding_rule_associations import ( + add_rule_associations, + get_existing_rules, + ) mcp = FastMCP("Cognee") @@ -310,7 +310,7 @@ async def save_interaction(data: str) -> list: logger.info("Save interaction process finished.") logger.info("Generating associated rules from interaction data.") - # await add_rule_associations(data=data, rules_nodeset_name="coding_agent_rules") + await add_rule_associations(data=data, rules_nodeset_name="coding_agent_rules") logger.info("Associated rules generated from interaction data.") @@ -572,10 +572,8 @@ async def get_developer_rules() -> list: async def fetch_rules_from_cognee() -> str: """Collect all developer rules from Cognee""" with redirect_stdout(sys.stderr): - note = "This is broken in 0.2.2" - return note - # developer_rules = await get_existing_rules(rules_nodeset_name="coding_agent_rules") - # return developer_rules + developer_rules = await get_existing_rules(rules_nodeset_name="coding_agent_rules") + return developer_rules rules_text = await fetch_rules_from_cognee() diff --git a/cognee-mcp/uv.lock b/cognee-mcp/uv.lock index bfa434b4f..dd2797519 100644 --- a/cognee-mcp/uv.lock +++ b/cognee-mcp/uv.lock @@ -332,6 +332,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" }, ] +[[package]] +name = "baml-py" +version = "0.201.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/54/2b0edb3d22e95ce56f36610391c11108a4ef26ba2837736a32001687ae34/baml_py-0.201.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:83228d2af2b0e845bbbb4e14f7cbd3376cec385aee01210ac522ab6076e07bec", size = 17387971, upload-time = "2025-07-03T19:29:05.844Z" }, + { url = "https://files.pythonhosted.org/packages/c9/08/1d48c28c63eadea2c04360cbb7f64968599e99cd6b8fc0ec0bd4424d3cf1/baml_py-0.201.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:2a9d016139e3ae5b5ce98c7b05b5fbd53d5d38f04dc810ec4d70fb17dd6c10e4", size = 16191010, upload-time = "2025-07-03T19:29:09.323Z" }, + { url = "https://files.pythonhosted.org/packages/73/1a/20b2d46501e3dd0648af339825106a6ac5eeb5d22d7e6a10cf16b9aa1cb8/baml_py-0.201.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b5058505b1a3c5f04fc1679aec4d730fa9bef2cbd96209b3ed50152f60b96baf", size = 19950249, upload-time = "2025-07-03T19:29:11.974Z" }, + { url = "https://files.pythonhosted.org/packages/38/24/bc871059e905159ae1913c2e3032dd6ef2f5c3d0983999d2c2f1eebb65a4/baml_py-0.201.0-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:36289d548581ba4accd5eaaab3246872542dd32dc6717e537654fa0cad884071", size = 19231310, upload-time = "2025-07-03T19:29:14.857Z" }, + { url = "https://files.pythonhosted.org/packages/0e/11/4268a0b82b02c7202fe5aa0d7175712158d998c491cac723b2bac3d5d495/baml_py-0.201.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5ab70e7bd6481d71edca8a33313347b29faccec78b9960138aa437522813ac9a", size = 19490012, upload-time = "2025-07-03T19:29:18.512Z" }, + { url = "https://files.pythonhosted.org/packages/31/21/c9f9aea1adba2a5978ffab11ba0948a9f3f81ec6ed3056067713260e93a1/baml_py-0.201.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:7efc5c693a7142c230a4f3d6700415127fee0b9f5fdbb36db63e04e27ac4c0f1", size = 20090620, upload-time = "2025-07-03T19:29:21.072Z" }, + { url = "https://files.pythonhosted.org/packages/99/cf/92123d8d753f1d1473e080c4c182139bfe3b9a6418e891cf1d96b6c33848/baml_py-0.201.0-cp38-abi3-win_amd64.whl", hash = "sha256:56499857b7a27ae61a661c8ce0dddd0fb567a45c0b826157e44048a14cf586f9", size = 17253005, upload-time = "2025-07-03T19:29:23.722Z" }, + { url = "https://files.pythonhosted.org/packages/59/88/5056aa1bc9480f758cd6e210d63bd1f9ad90b44c87f4121285906526495e/baml_py-0.201.0-cp38-abi3-win_arm64.whl", hash = "sha256:1e52dc1151db84a302b746590fe2bc484bdd794f83fa5da7216d9394c559f33a", size = 15612701, upload-time = "2025-07-03T19:29:26.712Z" }, +] + [[package]] name = "bcrypt" version = "4.3.0" @@ -590,13 +605,14 @@ wheels = [ [[package]] name = "cognee" -version = "0.2.1" +version = "0.2.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiofiles" }, { name = "aiohttp" }, { name = "aiosqlite" }, { name = "alembic" }, + { name = "baml-py" }, { name = "dlt", extra = ["sqlalchemy"] }, { name = "fastapi" }, { name = "fastapi-users", extra = ["sqlalchemy"] }, @@ -624,6 +640,7 @@ dependencies = [ { name = "pympler" }, { name = "pypdf" }, { name = "python-dotenv" }, + { name = "python-magic-bin", marker = "sys_platform == 'win32'" }, { name = "python-multipart" }, { name = "rdflib" }, { name = "s3fs", extra = ["boto3"] }, @@ -634,9 +651,9 @@ dependencies = [ { name = "tiktoken" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/41/46/e7df1faebc92fa31ef8e33faf81feb435782727a789de5532d178e047224/cognee-0.2.1.tar.gz", hash = "sha256:bf5208383fc841981641c040e5b6588e58111af4d771f9eab6552f441e6a8e6c", size = 15497626, upload-time = "2025-07-25T15:53:57.009Z" } +sdist = { url = "https://files.pythonhosted.org/packages/da/b1/99c7f0c20cae101d4777bdc17b466bab58d0b4abfbb5d62c54d3babcc3ec/cognee-0.2.4.tar.gz", hash = "sha256:e8ac1c60cabb2e1d41db4f337a4dca3c7aa0c54d605d32e6087dba1c02b3beba", size = 13955686, upload-time = "2025-08-27T14:39:05.532Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/0e/b705c6eeb538dcdd8fbbb331be25fe8e0bbc1af7d76e61566ec9845b29d3/cognee-0.2.1-py3-none-any.whl", hash = "sha256:6e9d437e0c58a16233841ebf19b1a3d8b67da069460a4f08d0c0e00301b1d36d", size = 1019851, upload-time = "2025-07-25T15:53:53.488Z" }, + { url = "https://files.pythonhosted.org/packages/e8/78/24df77b88d719ba308281412ebeb17c37867333e16bd2d1da7e192c1dc5d/cognee-0.2.4-py3-none-any.whl", hash = "sha256:56ab83c18ec9d7b307dfa206fcef39bc036e893d13e5390212f730b5204e3ae1", size = 1433548, upload-time = "2025-08-27T14:38:56.986Z" }, ] [package.optional-dependencies] @@ -682,7 +699,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "cognee", extras = ["postgres", "codegraph", "gemini", "huggingface", "docs", "neo4j"], specifier = "==0.2.1" }, + { name = "cognee", extras = ["postgres", "codegraph", "gemini", "huggingface", "docs", "neo4j"], specifier = "==0.2.4" }, { name = "fastmcp", specifier = ">=2.10.0,<3.0.0" }, { name = "mcp", specifier = ">=1.12.0,<2.0.0" }, { name = "uv", specifier = ">=0.6.3,<1.0.0" }, @@ -1258,6 +1275,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/9a/51108b68e77650a7289b5f1ceff8dc0929ab48a26d1d2015f22121a9d183/fastmcp-2.11.0-py3-none-any.whl", hash = "sha256:8709a04522e66fda407b469fbe4d3290651aa7b06097b91c097e9a973c9b9bb3", size = 256193, upload-time = "2025-08-01T21:30:09.905Z" }, ] +[[package]] +name = "fastuuid" +version = "0.12.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/17/13146a1e916bd2971d0a58db5e0a4ad23efdd49f78f33ac871c161f8007b/fastuuid-0.12.0.tar.gz", hash = "sha256:d0bd4e5b35aad2826403f4411937c89e7c88857b1513fe10f696544c03e9bd8e", size = 19180, upload-time = "2025-01-27T18:04:14.387Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/97/c3/9db9aee6f34e6dfd1f909d3d7432ac26e491a0471f8bb8b676c44b625b3f/fastuuid-0.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:22a900ef0956aacf862b460e20541fdae2d7c340594fe1bd6fdcb10d5f0791a9", size = 247356, upload-time = "2025-01-27T18:04:45.397Z" }, + { url = "https://files.pythonhosted.org/packages/14/a5/999e6e017af3d85841ce1e172d32fd27c8700804c125f496f71bfddc1a9f/fastuuid-0.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0302f5acf54dc75de30103025c5a95db06d6c2be36829043a0aa16fc170076bc", size = 258384, upload-time = "2025-01-27T18:04:03.562Z" }, + { url = "https://files.pythonhosted.org/packages/c4/e6/beae8411cac5b3b0b9d59ee08405eb39c3abe81dad459114363eff55c14a/fastuuid-0.12.0-cp310-cp310-manylinux_2_34_x86_64.whl", hash = "sha256:7946b4a310cfc2d597dcba658019d72a2851612a2cebb949d809c0e2474cf0a6", size = 278480, upload-time = "2025-01-27T18:04:05.663Z" }, + { url = "https://files.pythonhosted.org/packages/f1/f6/c598b9a052435716fc5a084ef17049edd35ca2c8241161269bfea4905ab4/fastuuid-0.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:a1b6764dd42bf0c46c858fb5ade7b7a3d93b7a27485a7a5c184909026694cd88", size = 156799, upload-time = "2025-01-27T18:05:41.867Z" }, + { url = "https://files.pythonhosted.org/packages/d4/99/555eab31381c7912103d4c8654082611e5e82a7bb88ad5ab067e36b622d7/fastuuid-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2bced35269315d16fe0c41003f8c9d63f2ee16a59295d90922cad5e6a67d0418", size = 247249, upload-time = "2025-01-27T18:03:23.092Z" }, + { url = "https://files.pythonhosted.org/packages/6d/3b/d62ce7f2af3d50a8e787603d44809770f43a3f2ff708bf10c252bf479109/fastuuid-0.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82106e4b0a24f4f2f73c88f89dadbc1533bb808900740ca5db9bbb17d3b0c824", size = 258369, upload-time = "2025-01-27T18:04:08.903Z" }, + { url = "https://files.pythonhosted.org/packages/86/23/33ec5355036745cf83ea9ca7576d2e0750ff8d268c03b4af40ed26f1a303/fastuuid-0.12.0-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:4db1bc7b8caa1d7412e1bea29b016d23a8d219131cff825b933eb3428f044dca", size = 278316, upload-time = "2025-01-27T18:04:12.74Z" }, + { url = "https://files.pythonhosted.org/packages/40/91/32ce82a14650148b6979ccd1a0089fd63d92505a90fb7156d2acc3245cbd/fastuuid-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:07afc8e674e67ac3d35a608c68f6809da5fab470fb4ef4469094fdb32ba36c51", size = 156643, upload-time = "2025-01-27T18:05:59.266Z" }, + { url = "https://files.pythonhosted.org/packages/f6/28/442e79d6219b90208cb243ac01db05d89cc4fdf8ecd563fb89476baf7122/fastuuid-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:328694a573fe9dce556b0b70c9d03776786801e028d82f0b6d9db1cb0521b4d1", size = 247372, upload-time = "2025-01-27T18:03:40.967Z" }, + { url = "https://files.pythonhosted.org/packages/40/eb/e0fd56890970ca7a9ec0d116844580988b692b1a749ac38e0c39e1dbdf23/fastuuid-0.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02acaea2c955bb2035a7d8e7b3fba8bd623b03746ae278e5fa932ef54c702f9f", size = 258200, upload-time = "2025-01-27T18:04:12.138Z" }, + { url = "https://files.pythonhosted.org/packages/f5/3c/4b30e376e65597a51a3dc929461a0dec77c8aec5d41d930f482b8f43e781/fastuuid-0.12.0-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:ed9f449cba8cf16cced252521aee06e633d50ec48c807683f21cc1d89e193eb0", size = 278446, upload-time = "2025-01-27T18:04:15.877Z" }, + { url = "https://files.pythonhosted.org/packages/fe/96/cc5975fd23d2197b3e29f650a7a9beddce8993eaf934fa4ac595b77bb71f/fastuuid-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:0df2ea4c9db96fd8f4fa38d0e88e309b3e56f8fd03675a2f6958a5b082a0c1e4", size = 157185, upload-time = "2025-01-27T18:06:19.21Z" }, + { url = "https://files.pythonhosted.org/packages/a9/e8/d2bb4f19e5ee15f6f8e3192a54a897678314151aa17d0fb766d2c2cbc03d/fastuuid-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7fe2407316a04ee8f06d3dbc7eae396d0a86591d92bafe2ca32fce23b1145786", size = 247512, upload-time = "2025-01-27T18:04:08.115Z" }, + { url = "https://files.pythonhosted.org/packages/bc/53/25e811d92fd60f5c65e098c3b68bd8f1a35e4abb6b77a153025115b680de/fastuuid-0.12.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9b31dd488d0778c36f8279b306dc92a42f16904cba54acca71e107d65b60b0c", size = 258257, upload-time = "2025-01-27T18:03:56.408Z" }, + { url = "https://files.pythonhosted.org/packages/10/23/73618e7793ea0b619caae2accd9e93e60da38dd78dd425002d319152ef2f/fastuuid-0.12.0-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:b19361ee649365eefc717ec08005972d3d1eb9ee39908022d98e3bfa9da59e37", size = 278559, upload-time = "2025-01-27T18:03:58.661Z" }, + { url = "https://files.pythonhosted.org/packages/e4/41/6317ecfc4757d5f2a604e5d3993f353ba7aee85fa75ad8b86fce6fc2fa40/fastuuid-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:8fc66b11423e6f3e1937385f655bedd67aebe56a3dcec0cb835351cfe7d358c9", size = 157276, upload-time = "2025-01-27T18:06:39.245Z" }, +] + [[package]] name = "filelock" version = "3.18.0" @@ -2253,11 +2294,12 @@ wheels = [ [[package]] name = "litellm" -version = "1.70.4" +version = "1.76.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, { name = "click" }, + { name = "fastuuid" }, { name = "httpx" }, { name = "importlib-metadata" }, { name = "jinja2" }, @@ -2268,9 +2310,9 @@ dependencies = [ { name = "tiktoken" }, { name = "tokenizers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/60/d7/d0d76ba896a1e8978550dcc76157d1c50910ba9ade4ef3981a34f01f4fa6/litellm-1.70.4.tar.gz", hash = "sha256:ef6749a091faaaf88313afe4111cdd95736e1e60f21ba894e74f7c5bab2870bd", size = 7813817, upload-time = "2025-05-23T00:05:24.47Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/fd/aa87c0a598377786521bee585f4d525e846f5339b816903298bfbb9daef5/litellm-1.76.1.tar.gz", hash = "sha256:d5a3a3efda04999b60ec0d1c29c1eaaa12f89a7b29db4bda691c7fb55b4fa6ad", size = 10178100, upload-time = "2025-08-30T21:05:48.578Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/8f/0b26ecb08b8282ae0fdfa2223b5df8263579c9e3c75ca96bb7fb7cbc632c/litellm-1.70.4-py3-none-any.whl", hash = "sha256:4d14d04bf5e2bd49336b4abc59193352c731ff371022e4fcf590208f41f644f7", size = 7903749, upload-time = "2025-05-23T00:05:21.017Z" }, + { url = "https://files.pythonhosted.org/packages/d9/d3/16423b6d399540eeff357f00abc85f62dc337d347a0c98ccadc448a61df5/litellm-1.76.1-py3-none-any.whl", hash = "sha256:938f05075372f26098211ea9b3cb0a6bb7b46111330226b70d42d40bd307812f", size = 8965465, upload-time = "2025-08-30T21:05:46.068Z" }, ] [[package]] @@ -3117,7 +3159,7 @@ wheels = [ [[package]] name = "openai" -version = "1.98.0" +version = "1.99.8" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -3129,9 +3171,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d8/9d/52eadb15c92802711d6b6cf00df3a6d0d18b588f4c5ba5ff210c6419fc03/openai-1.98.0.tar.gz", hash = "sha256:3ee0fcc50ae95267fd22bd1ad095ba5402098f3df2162592e68109999f685427", size = 496695, upload-time = "2025-07-30T12:48:03.701Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4b/81/288157471c43975cc849bc8779b8c7209aec6da5d7cbcd87a982912a19e5/openai-1.99.8.tar.gz", hash = "sha256:4b49845983eb4d5ffae9bae5d98bd5c0bd3a709a30f8b994fc8f316961b6d566", size = 506953, upload-time = "2025-08-11T20:19:02.312Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/fe/f64631075b3d63a613c0d8ab761d5941631a470f6fa87eaaee1aa2b4ec0c/openai-1.98.0-py3-none-any.whl", hash = "sha256:b99b794ef92196829120e2df37647722104772d2a74d08305df9ced5f26eae34", size = 767713, upload-time = "2025-07-30T12:48:01.264Z" }, + { url = "https://files.pythonhosted.org/packages/36/b6/3940f037aa33e6d5aa00707fd02843a1cac06ee0e106f39cfb71d0653d23/openai-1.99.8-py3-none-any.whl", hash = "sha256:426b981079cffde6dd54868b9b84761ffa291cde77010f051b96433e1835b47d", size = 786821, upload-time = "2025-08-11T20:18:59.943Z" }, ] [[package]] @@ -4163,6 +4205,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6c/73/9f872cb81fc5c3bb48f7227872c28975f998f3e7c2b1c16e95e6432bbb90/python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3", size = 13840, upload-time = "2022-06-07T20:16:57.763Z" }, ] +[[package]] +name = "python-magic-bin" +version = "0.4.14" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/5d/10b9ac745d9fd2f7151a2ab901e6bb6983dbd70e87c71111f54859d1ca2e/python_magic_bin-0.4.14-py2.py3-none-win32.whl", hash = "sha256:34a788c03adde7608028203e2dbb208f1f62225ad91518787ae26d603ae68892", size = 397784, upload-time = "2017-10-02T16:30:15.806Z" }, + { url = "https://files.pythonhosted.org/packages/07/c2/094e3d62b906d952537196603a23aec4bcd7c6126bf80eb14e6f9f4be3a2/python_magic_bin-0.4.14-py2.py3-none-win_amd64.whl", hash = "sha256:90be6206ad31071a36065a2fc169c5afb5e0355cbe6030e87641c6c62edc2b69", size = 409299, upload-time = "2017-10-02T16:30:18.545Z" }, +] + [[package]] name = "python-multipart" version = "0.0.20" From 940d4797bc0d56ef855b85ce76dd8758cfc24135 Mon Sep 17 00:00:00 2001 From: vasilije Date: Mon, 1 Sep 2025 17:17:48 +0200 Subject: [PATCH 61/73] added fix to embeddings format --- .../databases/vector/embeddings/get_embedding_engine.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py b/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py index 192f1958c..e7fcf4e94 100644 --- a/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +++ b/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py @@ -33,7 +33,7 @@ def get_embedding_engine() -> EmbeddingEngine: config.embedding_api_version, config.huggingface_tokenizer, llm_config.llm_api_key, - llm_config.llm_provider + llm_config.llm_provider, ) @@ -48,7 +48,7 @@ def create_embedding_engine( embedding_api_version, huggingface_tokenizer, llm_api_key, - llm_provider + llm_provider, ): """ Create and return an embedding engine based on the specified provider. @@ -101,7 +101,8 @@ def create_embedding_engine( return LiteLLMEmbeddingEngine( provider=embedding_provider, - api_key=embedding_api_key or (embedding_api_key if llm_provider == 'custom' else llm_api_key), + api_key=embedding_api_key + or (embedding_api_key if llm_provider == "custom" else llm_api_key), endpoint=embedding_endpoint, api_version=embedding_api_version, model=embedding_model, From e25ac2785c80ef2d502e1bee5f4550b303d5a60c Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Tue, 2 Sep 2025 08:51:26 +0200 Subject: [PATCH 62/73] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a7e7f1e05..e618d5bf9 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,9 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github ## Get Started -Get started quickly with a Google Colab notebook , Deepnote notebook or starter repo +Get started quickly with a Google Colab notebook , Deepnote notebook or starter repo + + ## Contributing From d8326a7e3aad95d090739777d126b6cf4008a784 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 2 Sep 2025 11:21:05 +0200 Subject: [PATCH 63/73] feat: path handling has to be absolute by gneeraj2001 --- cognee/base_config.py | 8 +-- .../infrastructure/databases/graph/config.py | 6 +- .../infrastructure/databases/vector/config.py | 8 +-- cognee/root_dir.py | 24 ++------ cognee/tests/test_path_config.py | 59 +------------------ 5 files changed, 14 insertions(+), 91 deletions(-) diff --git a/cognee/base_config.py b/cognee/base_config.py index b3258dba9..940846128 100644 --- a/cognee/base_config.py +++ b/cognee/base_config.py @@ -15,12 +15,8 @@ class BaseConfig(BaseSettings): @pydantic.model_validator(mode="after") def validate_paths(self): # Require absolute paths for root directories - self.data_root_directory = ensure_absolute_path( - self.data_root_directory, allow_relative=False - ) - self.system_root_directory = ensure_absolute_path( - self.system_root_directory, allow_relative=False - ) + self.data_root_directory = ensure_absolute_path(self.data_root_directory) + self.system_root_directory = ensure_absolute_path(self.system_root_directory) return self langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY") diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py index 60c193d91..d96de4520 100644 --- a/cognee/infrastructure/databases/graph/config.py +++ b/cognee/infrastructure/databases/graph/config.py @@ -60,11 +60,9 @@ class GraphConfig(BaseSettings): # Handle graph file path if values.graph_file_path: - # Convert relative paths to absolute using system_root_directory as base + # Check if absolute path is provided values.graph_file_path = ensure_absolute_path( - values.graph_file_path, - base_path=base_config.system_root_directory, - allow_relative=True + os.path.join(values.graph_file_path, values.graph_filename) ) else: # Default path diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py index ed846a54b..7a20130bd 100644 --- a/cognee/infrastructure/databases/vector/config.py +++ b/cognee/infrastructure/databases/vector/config.py @@ -1,5 +1,6 @@ import os import pydantic +from pathlib import Path from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict @@ -32,12 +33,11 @@ class VectorConfig(BaseSettings): def validate_paths(cls, values): base_config = get_base_config() - if values.vector_db_url: - # Convert relative paths to absolute using system_root_directory as base + # If vector_db_url is provided and is not a path skip checking if path is absolute (as it can also be a url) + if values.vector_db_url and Path(values.vector_db_url).exists(): + # Relative path to absolute values.vector_db_url = ensure_absolute_path( values.vector_db_url, - base_path=base_config.system_root_directory, - allow_relative=True, ) else: # Default path diff --git a/cognee/root_dir.py b/cognee/root_dir.py index 4853acd02..46d8fcb69 100644 --- a/cognee/root_dir.py +++ b/cognee/root_dir.py @@ -9,22 +9,14 @@ def get_absolute_path(path_from_root: str) -> str: return str(absolute_path.resolve()) -def ensure_absolute_path( - path: str, base_path: Optional[str] = None, allow_relative: bool = False -) -> str: - """Ensures a path is absolute, optionally converting relative paths. +def ensure_absolute_path(path: str) -> str: + """Ensures a path is absolute. Args: - path: The path to validate/convert. - base_path: Required base when converting relative paths (e.g., SYSTEM_ROOT_DIRECTORY). - allow_relative: If False, raises error for relative paths instead of converting. + path: The path to validate. Returns: Absolute path as string - - Raises: - ValueError: If path is None; or path is relative and allow_relative is False; - or base_path is missing/non-absolute when converting. """ if path is None: raise ValueError("Path cannot be None") @@ -32,12 +24,4 @@ def ensure_absolute_path( if path_obj.is_absolute(): return str(path_obj.resolve()) - if not allow_relative: - raise ValueError(f"Path must be absolute. Got relative path: {path}") - - if base_path is None: - raise ValueError("base_path must be provided when converting relative paths") - base = Path(base_path).expanduser() - if not base.is_absolute(): - raise ValueError("base_path must be absolute when converting relative paths") - return str((base / path_obj).resolve()) + raise ValueError(f"Path must be absolute. Got relative path: {path}") diff --git a/cognee/tests/test_path_config.py b/cognee/tests/test_path_config.py index b90ce8cac..55f641479 100644 --- a/cognee/tests/test_path_config.py +++ b/cognee/tests/test_path_config.py @@ -1,19 +1,16 @@ import os -from pathlib import Path - from pathlib import Path import pytest from cognee.root_dir import ensure_absolute_path -# …rest of your test cases using ensure_absolute_path… def test_root_dir_absolute_paths(): """Test absolute path handling in root_dir.py""" # Test with absolute path - abs_path = "C:/absolute/path" if os.name == 'nt' else "/absolute/path" + abs_path = "C:/absolute/path" if os.name == "nt" else "/absolute/path" result = ensure_absolute_path(abs_path, allow_relative=False) assert result == str(Path(abs_path).resolve()) - + # Test with relative path (should fail) rel_path = "relative/path" with pytest.raises(ValueError, match="must be absolute"): @@ -22,55 +19,3 @@ def test_root_dir_absolute_paths(): # Test with None path with pytest.raises(ValueError, match="cannot be None"): ensure_absolute_path(None) -def test_database_relative_paths(): - """Test relative path handling for vector and graph databases""" - system_root = "C:/system/root" if os.name == 'nt' else "/system/root" - - # Test with absolute path - abs_path = "C:/data/vector.db" if os.name == 'nt' else "/data/vector.db" - result = ensure_absolute_path(abs_path, base_path=system_root, allow_relative=True) - assert result == str(Path(abs_path).resolve()) - - # Test with relative path (should convert to absolute) - rel_path = "data/vector.db" - result = ensure_absolute_path(rel_path, base_path=system_root, allow_relative=True) - expected = str((Path(system_root) / rel_path).resolve()) - assert result == expected - - # Test with relative base_path (should fail) - with pytest.raises(ValueError, match="base_path must be absolute"): - ensure_absolute_path(rel_path, base_path="relative/base", allow_relative=True) - - # Test without base_path for relative path - with pytest.raises(ValueError, match="base_path must be provided"): - ensure_absolute_path(rel_path, allow_relative=True) -def test_path_consistency(): - """Test that paths are handled consistently across configurations""" - system_root = "C:/system/root" if os.name == 'nt' else "/system/root" - - # Root directories must be absolute - data_root = "C:/data/root" if os.name == 'nt' else "/data/root" - assert ensure_absolute_path(data_root, allow_relative=False) == str(Path(data_root).resolve()) - - # Database paths can be relative but must resolve against system_root - db_paths = [ - # Vector DB paths - "vector.db", # Simple relative - "data/vector.db", # Nested relative - "../vector.db", # Parent relative - "./vector.db", # Current dir relative - # Graph DB paths - "graph.db", # Simple relative - "data/graph/db", # Nested relative - "../graph.db", # Parent relative - "./graph.db", # Current dir relative - # With different extensions - "data/vector.lancedb", # Vector DB with extension - "data/graph/kuzu", # Graph DB with extension - ] - - for rel_path in db_paths: - result = ensure_absolute_path(rel_path, base_path=system_root, allow_relative=True) - expected = str((Path(system_root) / rel_path).resolve()) - assert result == expected, f"Failed to resolve {rel_path} correctly" - From cb6651a6e9925e48e6f270b610e05f6a0298eae6 Mon Sep 17 00:00:00 2001 From: gneeraj2001 Date: Fri, 29 Aug 2025 02:06:43 -0700 Subject: [PATCH 64/73] Fix path handling consistency Signed-off-by: gneeraj2001 --- cognee/base_config.py | 15 ++- .../infrastructure/databases/graph/config.py | 16 ++- .../infrastructure/databases/vector/config.py | 21 ++-- cognee/root_dir.py | 28 +++++ cognee/tests/test_path_config.py | 114 ++++++++++++++++++ 5 files changed, 182 insertions(+), 12 deletions(-) create mode 100644 cognee/tests/test_path_config.py diff --git a/cognee/base_config.py b/cognee/base_config.py index aa0b14008..d80e6197f 100644 --- a/cognee/base_config.py +++ b/cognee/base_config.py @@ -1,15 +1,28 @@ import os from typing import Optional from functools import lru_cache -from cognee.root_dir import get_absolute_path +from cognee.root_dir import get_absolute_path, ensure_absolute_path from cognee.modules.observability.observers import Observer from pydantic_settings import BaseSettings, SettingsConfigDict +import pydantic class BaseConfig(BaseSettings): data_root_directory: str = get_absolute_path(".data_storage") system_root_directory: str = get_absolute_path(".cognee_system") monitoring_tool: object = Observer.LANGFUSE + + @pydantic.model_validator(mode="after") + def validate_paths(cls, values): + # Require absolute paths for root directories + values.data_root_directory = ensure_absolute_path( + values.data_root_directory, allow_relative=False + ) + values.system_root_directory = ensure_absolute_path( + values.system_root_directory, allow_relative=False + ) + return values + langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY") langfuse_secret_key: Optional[str] = os.getenv("LANGFUSE_SECRET_KEY") langfuse_host: Optional[str] = os.getenv("LANGFUSE_HOST") diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py index cdc001863..60c193d91 100644 --- a/cognee/infrastructure/databases/graph/config.py +++ b/cognee/infrastructure/databases/graph/config.py @@ -6,6 +6,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict import pydantic from pydantic import Field from cognee.base_config import get_base_config +from cognee.root_dir import ensure_absolute_path from cognee.shared.data_models import KnowledgeGraph @@ -51,15 +52,22 @@ class GraphConfig(BaseSettings): @pydantic.model_validator(mode="after") def fill_derived(cls, values): provider = values.graph_database_provider.lower() + base_config = get_base_config() # Set default filename if no filename is provided if not values.graph_filename: values.graph_filename = f"cognee_graph_{provider}" - # Set file path based on graph database provider if no file path is provided - if not values.graph_file_path: - base_config = get_base_config() - + # Handle graph file path + if values.graph_file_path: + # Convert relative paths to absolute using system_root_directory as base + values.graph_file_path = ensure_absolute_path( + values.graph_file_path, + base_path=base_config.system_root_directory, + allow_relative=True + ) + else: + # Default path databases_directory_path = os.path.join(base_config.system_root_directory, "databases") values.graph_file_path = os.path.join(databases_directory_path, values.graph_filename) diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py index 07a3d1e05..ed846a54b 100644 --- a/cognee/infrastructure/databases/vector/config.py +++ b/cognee/infrastructure/databases/vector/config.py @@ -4,6 +4,7 @@ from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict from cognee.base_config import get_base_config +from cognee.root_dir import ensure_absolute_path class VectorConfig(BaseSettings): @@ -11,12 +12,10 @@ class VectorConfig(BaseSettings): Manage the configuration settings for the vector database. Public methods: - - to_dict: Convert the configuration to a dictionary. Instance variables: - - - vector_db_url: The URL of the vector database. + - vector_db_url: The URL of the vector database. Can be relative to system_root_directory. - vector_db_port: The port for the vector database. - vector_db_key: The key for accessing the vector database. - vector_db_provider: The provider for the vector database. @@ -30,10 +29,18 @@ class VectorConfig(BaseSettings): model_config = SettingsConfigDict(env_file=".env", extra="allow") @pydantic.model_validator(mode="after") - def fill_derived(cls, values): - # Set file path based on graph database provider if no file path is provided - if not values.vector_db_url: - base_config = get_base_config() + def validate_paths(cls, values): + base_config = get_base_config() + + if values.vector_db_url: + # Convert relative paths to absolute using system_root_directory as base + values.vector_db_url = ensure_absolute_path( + values.vector_db_url, + base_path=base_config.system_root_directory, + allow_relative=True, + ) + else: + # Default path databases_directory_path = os.path.join(base_config.system_root_directory, "databases") values.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb") diff --git a/cognee/root_dir.py b/cognee/root_dir.py index 2e21d5ce3..73afd0c12 100644 --- a/cognee/root_dir.py +++ b/cognee/root_dir.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import Optional ROOT_DIR = Path(__file__).resolve().parent @@ -6,3 +7,30 @@ ROOT_DIR = Path(__file__).resolve().parent def get_absolute_path(path_from_root: str) -> str: absolute_path = ROOT_DIR / path_from_root return str(absolute_path.resolve()) + + +def ensure_absolute_path( + path: str, base_path: Optional[str] = None, allow_relative: bool = False +) -> str: + """Ensures a path is absolute, optionally converting relative paths. + + Args: + path: The path to validate/convert + base_path: Optional base path for relative paths. If None, uses ROOT_DIR + allow_relative: If False, raises error for relative paths instead of converting + + Returns: + Absolute path as string + + Raises: + ValueError: If path is relative and allow_relative is False + """ + path_obj = Path(path) + if path_obj.is_absolute(): + return str(path_obj.resolve()) + + if not allow_relative: + raise ValueError(f"Path must be absolute. Got relative path: {path}") + + base = Path(base_path) if base_path else ROOT_DIR + return str((base / path).resolve()) diff --git a/cognee/tests/test_path_config.py b/cognee/tests/test_path_config.py new file mode 100644 index 000000000..ff1905c5e --- /dev/null +++ b/cognee/tests/test_path_config.py @@ -0,0 +1,114 @@ +import os +from pathlib import Path + +def ensure_absolute_path(path: str, base_path: str = None, allow_relative: bool = False) -> str: + """Ensures a path is absolute, optionally converting relative paths.""" + if path is None: + raise ValueError("Path cannot be None") + + path_obj = Path(path) + if path_obj.is_absolute(): + return str(path_obj.resolve()) + + if not allow_relative: + raise ValueError(f"Path must be absolute. Got relative path: {path}") + + if base_path is None: + raise ValueError("base_path must be provided when converting relative paths") + + base = Path(base_path) + if not base.is_absolute(): + raise ValueError("base_path must be absolute when converting relative paths") + + return str((base / path).resolve()) + +def test_root_dir_absolute_paths(): + """Test absolute path handling in root_dir.py""" + # Test with absolute path + abs_path = "C:/absolute/path" if os.name == 'nt' else "/absolute/path" + result = ensure_absolute_path(abs_path, allow_relative=False) + assert result == str(Path(abs_path).resolve()) + + # Test with relative path (should fail) + rel_path = "relative/path" + try: + ensure_absolute_path(rel_path, allow_relative=False) + assert False, "Should fail with relative path when allow_relative=False" + except ValueError as e: + assert "must be absolute" in str(e) + + # Test with None path + try: + ensure_absolute_path(None) + assert False, "Should fail with None path" + except ValueError as e: + assert "cannot be None" in str(e) + +def test_database_relative_paths(): + """Test relative path handling for vector and graph databases""" + system_root = "C:/system/root" if os.name == 'nt' else "/system/root" + + # Test with absolute path + abs_path = "C:/data/vector.db" if os.name == 'nt' else "/data/vector.db" + result = ensure_absolute_path(abs_path, base_path=system_root, allow_relative=True) + assert result == str(Path(abs_path).resolve()) + + # Test with relative path (should convert to absolute) + rel_path = "data/vector.db" + result = ensure_absolute_path(rel_path, base_path=system_root, allow_relative=True) + expected = str((Path(system_root) / rel_path).resolve()) + assert result == expected + + # Test with relative base_path (should fail) + try: + ensure_absolute_path(rel_path, base_path="relative/base", allow_relative=True) + assert False, "Should fail when base_path is relative" + except ValueError as e: + assert "base_path must be absolute" in str(e) + + # Test without base_path for relative path + try: + ensure_absolute_path(rel_path, allow_relative=True) + assert False, "Should fail when base_path is not provided for relative path" + except ValueError as e: + assert "base_path must be provided" in str(e) + +def test_path_consistency(): + """Test that paths are handled consistently across configurations""" + system_root = "C:/system/root" if os.name == 'nt' else "/system/root" + + # Root directories must be absolute + data_root = "C:/data/root" if os.name == 'nt' else "/data/root" + assert ensure_absolute_path(data_root, allow_relative=False) == str(Path(data_root).resolve()) + + # Database paths can be relative but must resolve against system_root + db_paths = [ + # Vector DB paths + "vector.db", # Simple relative + "data/vector.db", # Nested relative + "../vector.db", # Parent relative + "./vector.db", # Current dir relative + # Graph DB paths + "graph.db", # Simple relative + "data/graph/db", # Nested relative + "../graph.db", # Parent relative + "./graph.db", # Current dir relative + # With different extensions + "data/vector.lancedb", # Vector DB with extension + "data/graph/kuzu", # Graph DB with extension + ] + + for rel_path in db_paths: + result = ensure_absolute_path(rel_path, base_path=system_root, allow_relative=True) + expected = str((Path(system_root) / rel_path).resolve()) + assert result == expected, f"Failed to resolve {rel_path} correctly" + +if __name__ == "__main__": + print("Running path configuration tests...") + test_root_dir_absolute_paths() + print("✓ Root directory absolute path tests passed") + test_database_relative_paths() + print("✓ Database relative path tests passed") + test_path_consistency() + print("✓ Path consistency tests passed") + print("All tests passed successfully!") From 0e1e14b7c1698c263f1130563712990704811f16 Mon Sep 17 00:00:00 2001 From: Neeraj Gopalakrishnan <91423180+gneeraj2001@users.noreply.github.com> Date: Fri, 29 Aug 2025 02:31:58 -0700 Subject: [PATCH 65/73] Update cognee/base_config.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- cognee/base_config.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cognee/base_config.py b/cognee/base_config.py index d80e6197f..b3258dba9 100644 --- a/cognee/base_config.py +++ b/cognee/base_config.py @@ -13,15 +13,15 @@ class BaseConfig(BaseSettings): monitoring_tool: object = Observer.LANGFUSE @pydantic.model_validator(mode="after") - def validate_paths(cls, values): + def validate_paths(self): # Require absolute paths for root directories - values.data_root_directory = ensure_absolute_path( - values.data_root_directory, allow_relative=False + self.data_root_directory = ensure_absolute_path( + self.data_root_directory, allow_relative=False ) - values.system_root_directory = ensure_absolute_path( - values.system_root_directory, allow_relative=False + self.system_root_directory = ensure_absolute_path( + self.system_root_directory, allow_relative=False ) - return values + return self langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY") langfuse_secret_key: Optional[str] = os.getenv("LANGFUSE_SECRET_KEY") From d3dd87d90ecbba788c4f866c78806d3bfe672d01 Mon Sep 17 00:00:00 2001 From: Neeraj Gopalakrishnan <91423180+gneeraj2001@users.noreply.github.com> Date: Fri, 29 Aug 2025 02:33:16 -0700 Subject: [PATCH 66/73] Update cognee/tests/test_path_config.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- cognee/tests/test_path_config.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cognee/tests/test_path_config.py b/cognee/tests/test_path_config.py index ff1905c5e..600f04579 100644 --- a/cognee/tests/test_path_config.py +++ b/cognee/tests/test_path_config.py @@ -103,12 +103,3 @@ def test_path_consistency(): expected = str((Path(system_root) / rel_path).resolve()) assert result == expected, f"Failed to resolve {rel_path} correctly" -if __name__ == "__main__": - print("Running path configuration tests...") - test_root_dir_absolute_paths() - print("✓ Root directory absolute path tests passed") - test_database_relative_paths() - print("✓ Database relative path tests passed") - test_path_consistency() - print("✓ Path consistency tests passed") - print("All tests passed successfully!") From c9e4e6e6f4141a212ff5b180c227922844716296 Mon Sep 17 00:00:00 2001 From: Neeraj Gopalakrishnan <91423180+gneeraj2001@users.noreply.github.com> Date: Fri, 29 Aug 2025 02:35:03 -0700 Subject: [PATCH 67/73] Update cognee/tests/test_path_config.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- cognee/tests/test_path_config.py | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/cognee/tests/test_path_config.py b/cognee/tests/test_path_config.py index 600f04579..7a3d57e5b 100644 --- a/cognee/tests/test_path_config.py +++ b/cognee/tests/test_path_config.py @@ -1,26 +1,11 @@ import os from pathlib import Path -def ensure_absolute_path(path: str, base_path: str = None, allow_relative: bool = False) -> str: - """Ensures a path is absolute, optionally converting relative paths.""" - if path is None: - raise ValueError("Path cannot be None") - - path_obj = Path(path) - if path_obj.is_absolute(): - return str(path_obj.resolve()) - - if not allow_relative: - raise ValueError(f"Path must be absolute. Got relative path: {path}") - - if base_path is None: - raise ValueError("base_path must be provided when converting relative paths") - - base = Path(base_path) - if not base.is_absolute(): - raise ValueError("base_path must be absolute when converting relative paths") - - return str((base / path).resolve()) +from pathlib import Path +import pytest +from cognee.root_dir import ensure_absolute_path + +# …rest of your test cases using ensure_absolute_path… def test_root_dir_absolute_paths(): """Test absolute path handling in root_dir.py""" From fde28725a4b287e1e98c7ba9a3a5c1dc1e8edbb8 Mon Sep 17 00:00:00 2001 From: Neeraj Gopalakrishnan <91423180+gneeraj2001@users.noreply.github.com> Date: Fri, 29 Aug 2025 02:35:48 -0700 Subject: [PATCH 68/73] Update cognee/tests/test_path_config.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- cognee/tests/test_path_config.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/cognee/tests/test_path_config.py b/cognee/tests/test_path_config.py index 7a3d57e5b..65201fc70 100644 --- a/cognee/tests/test_path_config.py +++ b/cognee/tests/test_path_config.py @@ -45,19 +45,12 @@ def test_database_relative_paths(): assert result == expected # Test with relative base_path (should fail) - try: + with pytest.raises(ValueError, match="base_path must be absolute"): ensure_absolute_path(rel_path, base_path="relative/base", allow_relative=True) - assert False, "Should fail when base_path is relative" - except ValueError as e: - assert "base_path must be absolute" in str(e) # Test without base_path for relative path - try: + with pytest.raises(ValueError, match="base_path must be provided"): ensure_absolute_path(rel_path, allow_relative=True) - assert False, "Should fail when base_path is not provided for relative path" - except ValueError as e: - assert "base_path must be provided" in str(e) - def test_path_consistency(): """Test that paths are handled consistently across configurations""" system_root = "C:/system/root" if os.name == 'nt' else "/system/root" From e063c4908944b0aa4b24527f3517614d99c8b86f Mon Sep 17 00:00:00 2001 From: Neeraj Gopalakrishnan <91423180+gneeraj2001@users.noreply.github.com> Date: Fri, 29 Aug 2025 02:38:39 -0700 Subject: [PATCH 69/73] Update cognee/root_dir.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- cognee/root_dir.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/cognee/root_dir.py b/cognee/root_dir.py index 73afd0c12..4853acd02 100644 --- a/cognee/root_dir.py +++ b/cognee/root_dir.py @@ -15,22 +15,29 @@ def ensure_absolute_path( """Ensures a path is absolute, optionally converting relative paths. Args: - path: The path to validate/convert - base_path: Optional base path for relative paths. If None, uses ROOT_DIR - allow_relative: If False, raises error for relative paths instead of converting + path: The path to validate/convert. + base_path: Required base when converting relative paths (e.g., SYSTEM_ROOT_DIRECTORY). + allow_relative: If False, raises error for relative paths instead of converting. Returns: Absolute path as string Raises: - ValueError: If path is relative and allow_relative is False + ValueError: If path is None; or path is relative and allow_relative is False; + or base_path is missing/non-absolute when converting. """ - path_obj = Path(path) + if path is None: + raise ValueError("Path cannot be None") + path_obj = Path(path).expanduser() if path_obj.is_absolute(): return str(path_obj.resolve()) if not allow_relative: raise ValueError(f"Path must be absolute. Got relative path: {path}") - base = Path(base_path) if base_path else ROOT_DIR - return str((base / path).resolve()) + if base_path is None: + raise ValueError("base_path must be provided when converting relative paths") + base = Path(base_path).expanduser() + if not base.is_absolute(): + raise ValueError("base_path must be absolute when converting relative paths") + return str((base / path_obj).resolve()) From 3027b01701d266aeb637e3c68734a91eff0c8986 Mon Sep 17 00:00:00 2001 From: Neeraj Gopalakrishnan <91423180+gneeraj2001@users.noreply.github.com> Date: Fri, 29 Aug 2025 02:39:04 -0700 Subject: [PATCH 70/73] Update cognee/tests/test_path_config.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- cognee/tests/test_path_config.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/cognee/tests/test_path_config.py b/cognee/tests/test_path_config.py index 65201fc70..b90ce8cac 100644 --- a/cognee/tests/test_path_config.py +++ b/cognee/tests/test_path_config.py @@ -16,19 +16,12 @@ def test_root_dir_absolute_paths(): # Test with relative path (should fail) rel_path = "relative/path" - try: + with pytest.raises(ValueError, match="must be absolute"): ensure_absolute_path(rel_path, allow_relative=False) - assert False, "Should fail with relative path when allow_relative=False" - except ValueError as e: - assert "must be absolute" in str(e) - - # Test with None path - try: - ensure_absolute_path(None) - assert False, "Should fail with None path" - except ValueError as e: - assert "cannot be None" in str(e) + # Test with None path + with pytest.raises(ValueError, match="cannot be None"): + ensure_absolute_path(None) def test_database_relative_paths(): """Test relative path handling for vector and graph databases""" system_root = "C:/system/root" if os.name == 'nt' else "/system/root" From f36357acd8826ec8d84d3459d729fc6b44026ad7 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 2 Sep 2025 11:21:05 +0200 Subject: [PATCH 71/73] feat: path handling has to be absolute by gneeraj2001 --- cognee/base_config.py | 8 +-- .../infrastructure/databases/graph/config.py | 6 +- .../infrastructure/databases/vector/config.py | 8 +-- cognee/root_dir.py | 24 ++------ cognee/tests/test_path_config.py | 59 +------------------ 5 files changed, 14 insertions(+), 91 deletions(-) diff --git a/cognee/base_config.py b/cognee/base_config.py index b3258dba9..940846128 100644 --- a/cognee/base_config.py +++ b/cognee/base_config.py @@ -15,12 +15,8 @@ class BaseConfig(BaseSettings): @pydantic.model_validator(mode="after") def validate_paths(self): # Require absolute paths for root directories - self.data_root_directory = ensure_absolute_path( - self.data_root_directory, allow_relative=False - ) - self.system_root_directory = ensure_absolute_path( - self.system_root_directory, allow_relative=False - ) + self.data_root_directory = ensure_absolute_path(self.data_root_directory) + self.system_root_directory = ensure_absolute_path(self.system_root_directory) return self langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY") diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py index 60c193d91..d96de4520 100644 --- a/cognee/infrastructure/databases/graph/config.py +++ b/cognee/infrastructure/databases/graph/config.py @@ -60,11 +60,9 @@ class GraphConfig(BaseSettings): # Handle graph file path if values.graph_file_path: - # Convert relative paths to absolute using system_root_directory as base + # Check if absolute path is provided values.graph_file_path = ensure_absolute_path( - values.graph_file_path, - base_path=base_config.system_root_directory, - allow_relative=True + os.path.join(values.graph_file_path, values.graph_filename) ) else: # Default path diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py index ed846a54b..7a20130bd 100644 --- a/cognee/infrastructure/databases/vector/config.py +++ b/cognee/infrastructure/databases/vector/config.py @@ -1,5 +1,6 @@ import os import pydantic +from pathlib import Path from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict @@ -32,12 +33,11 @@ class VectorConfig(BaseSettings): def validate_paths(cls, values): base_config = get_base_config() - if values.vector_db_url: - # Convert relative paths to absolute using system_root_directory as base + # If vector_db_url is provided and is not a path skip checking if path is absolute (as it can also be a url) + if values.vector_db_url and Path(values.vector_db_url).exists(): + # Relative path to absolute values.vector_db_url = ensure_absolute_path( values.vector_db_url, - base_path=base_config.system_root_directory, - allow_relative=True, ) else: # Default path diff --git a/cognee/root_dir.py b/cognee/root_dir.py index 4853acd02..46d8fcb69 100644 --- a/cognee/root_dir.py +++ b/cognee/root_dir.py @@ -9,22 +9,14 @@ def get_absolute_path(path_from_root: str) -> str: return str(absolute_path.resolve()) -def ensure_absolute_path( - path: str, base_path: Optional[str] = None, allow_relative: bool = False -) -> str: - """Ensures a path is absolute, optionally converting relative paths. +def ensure_absolute_path(path: str) -> str: + """Ensures a path is absolute. Args: - path: The path to validate/convert. - base_path: Required base when converting relative paths (e.g., SYSTEM_ROOT_DIRECTORY). - allow_relative: If False, raises error for relative paths instead of converting. + path: The path to validate. Returns: Absolute path as string - - Raises: - ValueError: If path is None; or path is relative and allow_relative is False; - or base_path is missing/non-absolute when converting. """ if path is None: raise ValueError("Path cannot be None") @@ -32,12 +24,4 @@ def ensure_absolute_path( if path_obj.is_absolute(): return str(path_obj.resolve()) - if not allow_relative: - raise ValueError(f"Path must be absolute. Got relative path: {path}") - - if base_path is None: - raise ValueError("base_path must be provided when converting relative paths") - base = Path(base_path).expanduser() - if not base.is_absolute(): - raise ValueError("base_path must be absolute when converting relative paths") - return str((base / path_obj).resolve()) + raise ValueError(f"Path must be absolute. Got relative path: {path}") diff --git a/cognee/tests/test_path_config.py b/cognee/tests/test_path_config.py index b90ce8cac..55f641479 100644 --- a/cognee/tests/test_path_config.py +++ b/cognee/tests/test_path_config.py @@ -1,19 +1,16 @@ import os -from pathlib import Path - from pathlib import Path import pytest from cognee.root_dir import ensure_absolute_path -# …rest of your test cases using ensure_absolute_path… def test_root_dir_absolute_paths(): """Test absolute path handling in root_dir.py""" # Test with absolute path - abs_path = "C:/absolute/path" if os.name == 'nt' else "/absolute/path" + abs_path = "C:/absolute/path" if os.name == "nt" else "/absolute/path" result = ensure_absolute_path(abs_path, allow_relative=False) assert result == str(Path(abs_path).resolve()) - + # Test with relative path (should fail) rel_path = "relative/path" with pytest.raises(ValueError, match="must be absolute"): @@ -22,55 +19,3 @@ def test_root_dir_absolute_paths(): # Test with None path with pytest.raises(ValueError, match="cannot be None"): ensure_absolute_path(None) -def test_database_relative_paths(): - """Test relative path handling for vector and graph databases""" - system_root = "C:/system/root" if os.name == 'nt' else "/system/root" - - # Test with absolute path - abs_path = "C:/data/vector.db" if os.name == 'nt' else "/data/vector.db" - result = ensure_absolute_path(abs_path, base_path=system_root, allow_relative=True) - assert result == str(Path(abs_path).resolve()) - - # Test with relative path (should convert to absolute) - rel_path = "data/vector.db" - result = ensure_absolute_path(rel_path, base_path=system_root, allow_relative=True) - expected = str((Path(system_root) / rel_path).resolve()) - assert result == expected - - # Test with relative base_path (should fail) - with pytest.raises(ValueError, match="base_path must be absolute"): - ensure_absolute_path(rel_path, base_path="relative/base", allow_relative=True) - - # Test without base_path for relative path - with pytest.raises(ValueError, match="base_path must be provided"): - ensure_absolute_path(rel_path, allow_relative=True) -def test_path_consistency(): - """Test that paths are handled consistently across configurations""" - system_root = "C:/system/root" if os.name == 'nt' else "/system/root" - - # Root directories must be absolute - data_root = "C:/data/root" if os.name == 'nt' else "/data/root" - assert ensure_absolute_path(data_root, allow_relative=False) == str(Path(data_root).resolve()) - - # Database paths can be relative but must resolve against system_root - db_paths = [ - # Vector DB paths - "vector.db", # Simple relative - "data/vector.db", # Nested relative - "../vector.db", # Parent relative - "./vector.db", # Current dir relative - # Graph DB paths - "graph.db", # Simple relative - "data/graph/db", # Nested relative - "../graph.db", # Parent relative - "./graph.db", # Current dir relative - # With different extensions - "data/vector.lancedb", # Vector DB with extension - "data/graph/kuzu", # Graph DB with extension - ] - - for rel_path in db_paths: - result = ensure_absolute_path(rel_path, base_path=system_root, allow_relative=True) - expected = str((Path(system_root) / rel_path).resolve()) - assert result == expected, f"Failed to resolve {rel_path} correctly" - From 3069870a12c58b6e0cf2a1e341eea95a22b9d06b Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 2 Sep 2025 11:27:59 +0200 Subject: [PATCH 72/73] chore: Remove docstring regarding relative path --- cognee/infrastructure/databases/vector/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py index 7a20130bd..f8fad473e 100644 --- a/cognee/infrastructure/databases/vector/config.py +++ b/cognee/infrastructure/databases/vector/config.py @@ -16,7 +16,7 @@ class VectorConfig(BaseSettings): - to_dict: Convert the configuration to a dictionary. Instance variables: - - vector_db_url: The URL of the vector database. Can be relative to system_root_directory. + - vector_db_url: The URL of the vector database. - vector_db_port: The port for the vector database. - vector_db_key: The key for accessing the vector database. - vector_db_provider: The provider for the vector database. From 405b7d80c6e117fb07d2a4bb7ef091d5d875557f Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 2 Sep 2025 11:54:15 +0200 Subject: [PATCH 73/73] refactor: move config path test to unit tests --- cognee/tests/test_path_config.py | 21 ------------------- .../tests/unit/processing/utils/utils_test.py | 21 ++++++++++++++++++- 2 files changed, 20 insertions(+), 22 deletions(-) delete mode 100644 cognee/tests/test_path_config.py diff --git a/cognee/tests/test_path_config.py b/cognee/tests/test_path_config.py deleted file mode 100644 index 55f641479..000000000 --- a/cognee/tests/test_path_config.py +++ /dev/null @@ -1,21 +0,0 @@ -import os -from pathlib import Path -import pytest -from cognee.root_dir import ensure_absolute_path - - -def test_root_dir_absolute_paths(): - """Test absolute path handling in root_dir.py""" - # Test with absolute path - abs_path = "C:/absolute/path" if os.name == "nt" else "/absolute/path" - result = ensure_absolute_path(abs_path, allow_relative=False) - assert result == str(Path(abs_path).resolve()) - - # Test with relative path (should fail) - rel_path = "relative/path" - with pytest.raises(ValueError, match="must be absolute"): - ensure_absolute_path(rel_path, allow_relative=False) - - # Test with None path - with pytest.raises(ValueError, match="cannot be None"): - ensure_absolute_path(None) diff --git a/cognee/tests/unit/processing/utils/utils_test.py b/cognee/tests/unit/processing/utils/utils_test.py index a684df8ed..ca9f8f065 100644 --- a/cognee/tests/unit/processing/utils/utils_test.py +++ b/cognee/tests/unit/processing/utils/utils_test.py @@ -4,8 +4,9 @@ import pytest from unittest.mock import patch, mock_open from io import BytesIO from uuid import uuid4 +from pathlib import Path - +from cognee.root_dir import ensure_absolute_path from cognee.infrastructure.files.utils.get_file_content_hash import get_file_content_hash from cognee.shared.utils import get_anonymous_id @@ -52,3 +53,21 @@ async def test_get_file_content_hash_stream(): expected_hash = hashlib.md5(b"test_data").hexdigest() result = await get_file_content_hash(stream) assert result == expected_hash + + +@pytest.mark.asyncio +async def test_root_dir_absolute_paths(): + """Test absolute path handling in root_dir.py""" + # Test with absolute path + abs_path = "C:/absolute/path" if os.name == "nt" else "/absolute/path" + result = ensure_absolute_path(abs_path) + assert result == str(Path(abs_path).resolve()) + + # Test with relative path (should fail) + rel_path = "relative/path" + with pytest.raises(ValueError, match="must be absolute"): + ensure_absolute_path(rel_path) + + # Test with None path + with pytest.raises(ValueError, match="cannot be None"): + ensure_absolute_path(None)