From 2a462085695a127a61467d1d14d8b2a86333be3a Mon Sep 17 00:00:00 2001 From: xdurawa Date: Mon, 1 Sep 2025 20:47:26 -0400 Subject: [PATCH 001/284] Adding AWS Bedrock support as a LLM provider Signed-off-by: xdurawa --- .github/workflows/test_bedrock_api_key.yml | 28 +++ .../test_bedrock_aws_credentials.yml | 29 ++++ .../workflows/test_bedrock_aws_profile.yml | 37 ++++ .github/workflows/test_suites.yml | 24 +++ README.md | 10 ++ cognee-starter-kit/README.md | 38 +++++ cognee/api/v1/add/add.py | 2 +- cognee/api/v1/search/search.py | 2 +- cognee/infrastructure/llm/config.py | 14 ++ .../llm/bedrock/__init__.py | 6 + .../litellm_instructor/llm/bedrock/adapter.py | 161 ++++++++++++++++++ .../litellm_instructor/llm/get_llm_client.py | 20 +++ cognee/modules/settings/get_settings.py | 21 ++- pyproject.toml | 2 +- uv.lock | 8 +- 15 files changed, 394 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/test_bedrock_api_key.yml create mode 100644 .github/workflows/test_bedrock_aws_credentials.yml create mode 100644 .github/workflows/test_bedrock_aws_profile.yml create mode 100644 cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py create mode 100644 cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py diff --git a/.github/workflows/test_bedrock_api_key.yml b/.github/workflows/test_bedrock_api_key.yml new file mode 100644 index 000000000..3f5ea94b3 --- /dev/null +++ b/.github/workflows/test_bedrock_api_key.yml @@ -0,0 +1,28 @@ +name: test | bedrock | api key + +on: + workflow_call: + +jobs: + test-bedrock-api-key: + name: Run Bedrock API Key Test + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Run Bedrock API Key Test + env: + LLM_PROVIDER: "bedrock" + LLM_API_KEY: ${{ secrets.BEDROCK_API_KEY }} + LLM_MODEL: "us.anthropic.claude-3-5-sonnet-20241022-v2:0" + AWS_REGION_NAME: "us-east-1" + EMBEDDING_PROVIDER: "bedrock" + EMBEDDING_MODEL: "amazon.titan-embed-text-v1" + EMBEDDING_DIMENSIONS: "1536" + run: poetry run python ./examples/python/simple_example.py diff --git a/.github/workflows/test_bedrock_aws_credentials.yml b/.github/workflows/test_bedrock_aws_credentials.yml new file mode 100644 index 000000000..c086dceb3 --- /dev/null +++ b/.github/workflows/test_bedrock_aws_credentials.yml @@ -0,0 +1,29 @@ +name: test | bedrock | aws credentials + +on: + workflow_call: + +jobs: + test-bedrock-aws-credentials: + name: Run Bedrock AWS Credentials Test + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Run Bedrock AWS Credentials Test + env: + LLM_PROVIDER: "bedrock" + LLM_MODEL: "us.anthropic.claude-3-5-sonnet-20240620-v1:0" + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_REGION_NAME: "us-east-1" + EMBEDDING_PROVIDER: "cohere" + EMBEDDING_MODEL: "cohere.embed-english-v3" + EMBEDDING_DIMENSIONS: "1024" + run: poetry run python ./examples/python/simple_example.py diff --git a/.github/workflows/test_bedrock_aws_profile.yml b/.github/workflows/test_bedrock_aws_profile.yml new file mode 100644 index 000000000..aa15074e1 --- /dev/null +++ b/.github/workflows/test_bedrock_aws_profile.yml @@ -0,0 +1,37 @@ +name: test | bedrock | aws profile + +on: + workflow_call: + +jobs: + test-bedrock-aws-profile: + name: Run Bedrock AWS Profile Test + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Configure AWS Profile + run: | + mkdir -p ~/.aws + cat > ~/.aws/credentials << EOF + [bedrock-test] + aws_access_key_id = ${{ secrets.AWS_ACCESS_KEY_ID }} + aws_secret_access_key = ${{ secrets.AWS_SECRET_ACCESS_KEY }} + EOF + + - name: Run Bedrock AWS Profile Test + env: + LLM_PROVIDER: "bedrock" + LLM_MODEL: "us.anthropic.claude-3-5-haiku-20241022-v1:0" + AWS_PROFILE_NAME: "bedrock-test" + AWS_REGION_NAME: "us-east-1" + EMBEDDING_PROVIDER: "bedrock" + EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0" + EMBEDDING_DIMENSIONS: "1024" + run: poetry run python ./examples/python/simple_example.py diff --git a/.github/workflows/test_suites.yml b/.github/workflows/test_suites.yml index d34523ce1..78b878931 100644 --- a/.github/workflows/test_suites.yml +++ b/.github/workflows/test_suites.yml @@ -110,6 +110,24 @@ jobs: uses: ./.github/workflows/test_gemini.yml secrets: inherit + bedrock-tests: + name: Bedrock Tests + needs: [basic-tests, e2e-tests] + uses: ./.github/workflows/test_bedrock_api_key.yml + secrets: inherit + + bedrock-aws-credentials-tests: + name: Bedrock AWS Credentials Tests + needs: [basic-tests, e2e-tests] + uses: ./.github/workflows/test_bedrock_aws_credentials.yml + secrets: inherit + + bedrock-aws-profile-tests: + name: Bedrock AWS Profile Tests + needs: [basic-tests, e2e-tests] + uses: ./.github/workflows/test_bedrock_aws_profile.yml + secrets: inherit + # Ollama tests moved to the end ollama-tests: name: Ollama Tests @@ -143,6 +161,9 @@ jobs: db-examples-tests, mcp-test, gemini-tests, + bedrock-tests, + bedrock-aws-credentials-tests, + bedrock-aws-profile-tests, ollama-tests, relational-db-migration-tests, docker-compose-test, @@ -163,6 +184,9 @@ jobs: "${{ needs.db-examples-tests.result }}" == "success" && "${{ needs.relational-db-migration-tests.result }}" == "success" && "${{ needs.gemini-tests.result }}" == "success" && + "${{ needs.bedrock-tests.result }}" == "success" && + "${{ needs.bedrock-aws-credentials-tests.result }}" == "success" && + "${{ needs.bedrock-aws-profile-tests.result }}" == "success" && "${{ needs.docker-compose-test.result }}" == "success" && "${{ needs.docker-ci-test.result }}" == "success" && "${{ needs.ollama-tests.result }}" == "success" ]]; then diff --git a/README.md b/README.md index 3486d2ce9..70422bc6a 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,16 @@ os.environ["LLM_API_KEY"] = "YOUR OPENAI_API_KEY" ``` You can also set the variables by creating .env file, using our template. + +**Supported LLM Providers:** OpenAI (default), Anthropic, Gemini, Ollama, AWS Bedrock + +**For AWS Bedrock:** Set `LLM_PROVIDER="bedrock"` and use one of three authentication methods: +- API Key: `LLM_API_KEY="your_bedrock_api_key"` +- AWS Credentials: `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` (+ `AWS_SESSION_TOKEN` if needed) +- AWS Profile: `AWS_PROFILE_NAME="your_profile"` + +Use an [inference profile](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_InvokeModel.html#API_runtime_InvokeModel_Example_5:~:text=Use%20an%20inference%20profile%20in%20model%20invocation) for the model IDs. This usually means appending `us.*` (or other region) to the model ID (e.g., `us.anthropic.claude-3-5-sonnet-20241022-v2:0`). See [AWS Bedrock models](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html). + To use different LLM providers, for more info check out our documentation diff --git a/cognee-starter-kit/README.md b/cognee-starter-kit/README.md index c265e278e..5a9369b89 100644 --- a/cognee-starter-kit/README.md +++ b/cognee-starter-kit/README.md @@ -25,6 +25,14 @@ uv sync ## Setup LLM Add environment variables to `.env` file. In case you choose to use OpenAI provider, add just the model and api_key. + +**Supported LLM Providers:** +- OpenAI (default) +- Anthropic +- Gemini +- Ollama +- AWS Bedrock + ``` LLM_PROVIDER="" LLM_MODEL="" @@ -39,6 +47,36 @@ EMBEDDING_API_KEY="" EMBEDDING_API_VERSION="" ``` +**For AWS Bedrock, you have three authentication options:** + +1. **API Key (Bearer Token):** +``` +LLM_PROVIDER="bedrock" +LLM_API_KEY="your_bedrock_api_key" +LLM_MODEL="us.anthropic.claude-3-5-sonnet-20241022-v2:0" +AWS_REGION_NAME="us-east-1" +``` + +2. **AWS Credentials:** +``` +LLM_PROVIDER="bedrock" +LLM_MODEL="us.anthropic.claude-3-5-sonnet-20241022-v2:0" +AWS_ACCESS_KEY_ID="your_aws_access_key" +AWS_SECRET_ACCESS_KEY="your_aws_secret_key" +[if needed] AWS_SESSION_TOKEN="your_session_token" +AWS_REGION_NAME="us-east-1" +``` + +3. **AWS Profile:** +``` +LLM_PROVIDER="bedrock" +LLM_MODEL="us.anthropic.claude-3-5-sonnet-20241022-v2:0" +AWS_PROFILE_NAME="your_aws_profile" +AWS_REGION_NAME="us-east-1" +``` + +**Note:** For Bedrock models, use an [inference profile](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_InvokeModel.html#API_runtime_InvokeModel_Example_5:~:text=Use%20an%20inference%20profile%20in%20model%20invocation) for `LLM_MODEL`. This usually means appending `us.*` (or other region) to the model ID (e.g., `us.anthropic.claude-3-5-sonnet-20241022-v2:0`). See [AWS Bedrock models](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) for available models. + Activate the Python environment: ``` source .venv/bin/activate diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py index 7daaaf1dd..01c58a134 100644 --- a/cognee/api/v1/add/add.py +++ b/cognee/api/v1/add/add.py @@ -127,7 +127,7 @@ async def add( - LLM_API_KEY: API key for your LLM provider (OpenAI, Anthropic, etc.) Optional: - - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama" + - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "bedrock" - LLM_MODEL: Model name (default: "gpt-4o-mini") - DEFAULT_USER_EMAIL: Custom default user email - DEFAULT_USER_PASSWORD: Custom default user password diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index 66ce48cc2..c13d1c366 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -154,7 +154,7 @@ async def search( - LLM_API_KEY: API key for your LLM provider Optional: - - LLM_PROVIDER, LLM_MODEL: Configure LLM for search responses + - LLM_PROVIDER, LLM_MODEL: Configure LLM for search responses (supports: openai, anthropic, gemini, ollama, bedrock) - VECTOR_DB_PROVIDER: Must match what was used during cognify - GRAPH_DATABASE_PROVIDER: Must match what was used during cognify diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index f31aada33..a6acb647d 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -27,6 +27,12 @@ class LLMConfig(BaseSettings): - embedding_rate_limit_enabled - embedding_rate_limit_requests - embedding_rate_limit_interval + - aws_access_key_id (Bedrock) + - aws_secret_access_key (Bedrock) + - aws_session_token (Bedrock) + - aws_region_name (Bedrock) + - aws_profile_name (Bedrock) + - aws_bedrock_runtime_endpoint (Bedrock) Public methods include: - ensure_env_vars_for_ollama @@ -63,6 +69,14 @@ class LLMConfig(BaseSettings): fallback_endpoint: str = "" fallback_model: str = "" + # AWS Bedrock configuration + aws_access_key_id: Optional[str] = None + aws_secret_access_key: Optional[str] = None + aws_session_token: Optional[str] = None + aws_region_name: str = "us-east-1" + aws_profile_name: Optional[str] = None + aws_bedrock_runtime_endpoint: Optional[str] = None + baml_registry: ClassVar[ClientRegistry] = ClientRegistry() model_config = SettingsConfigDict(env_file=".env", extra="allow") diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py new file mode 100644 index 000000000..6fb964a82 --- /dev/null +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py @@ -0,0 +1,6 @@ +"""Bedrock LLM adapter module.""" + +from .adapter import BedrockAdapter + +__all__ = ["BedrockAdapter"] + diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py new file mode 100644 index 000000000..868fe51b8 --- /dev/null +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py @@ -0,0 +1,161 @@ +import litellm +import instructor +from typing import Type, Optional +from pydantic import BaseModel +from litellm.exceptions import ContentPolicyViolationError +from instructor.exceptions import InstructorRetryException + +from cognee.exceptions import InvalidValueError +from cognee.infrastructure.llm.LLMGateway import LLMGateway +from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( + LLMInterface, +) +from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError +from cognee.infrastructure.files.utils.open_data_file import open_data_file +from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import ( + rate_limit_async, + rate_limit_sync, + sleep_and_retry_async, + sleep_and_retry_sync, +) +from cognee.modules.observability.get_observe import get_observe + +observe = get_observe() + + +class BedrockAdapter(LLMInterface): + """ + Adapter for AWS Bedrock API with support for three authentication methods: + 1. API Key (Bearer Token) + 2. AWS Credentials (access key + secret key) + 3. AWS Profile (boto3 credential chain) + """ + + name = "Bedrock" + model: str + api_key: str + aws_access_key_id: str + aws_secret_access_key: str + aws_region_name: str + aws_profile_name: str + + MAX_RETRIES = 5 + + def __init__( + self, + model: str, + api_key: str = None, + aws_access_key_id: str = None, + aws_secret_access_key: str = None, + aws_session_token: str = None, + aws_region_name: str = "us-east-1", + aws_profile_name: str = None, + aws_bedrock_runtime_endpoint: str = None, + max_tokens: int = 16384, + streaming: bool = False, + ): + self.aclient = instructor.from_litellm(litellm.acompletion) + self.client = instructor.from_litellm(litellm.completion) + self.model = model + self.api_key = api_key + self.aws_access_key_id = aws_access_key_id + self.aws_secret_access_key = aws_secret_access_key + self.aws_session_token = aws_session_token + self.aws_region_name = aws_region_name + self.aws_profile_name = aws_profile_name + self.aws_bedrock_runtime_endpoint = aws_bedrock_runtime_endpoint + self.max_tokens = max_tokens + self.streaming = streaming + + def _create_bedrock_request( + self, text_input: str, system_prompt: str, response_model: Type[BaseModel] + ) -> dict: + """Create Bedrock request with authentication and enhanced JSON formatting.""" + enhanced_system_prompt = f"""{system_prompt} + +IMPORTANT: You must respond with valid JSON only. Do not include any text before or after the JSON. The response must be a valid JSON object that can be parsed directly.""" + + request_params = { + "model": self.model, + "custom_llm_provider": "bedrock", + "drop_params": True, + "messages": [ + {"role": "user", "content": text_input}, + {"role": "system", "content": enhanced_system_prompt}, + ], + "response_model": response_model, + "max_retries": self.MAX_RETRIES, + "max_tokens": self.max_tokens, + "stream": self.streaming, + } + + # Add authentication parameters + if self.api_key: + request_params["api_key"] = self.api_key + elif self.aws_access_key_id and self.aws_secret_access_key: + request_params["aws_access_key_id"] = self.aws_access_key_id + request_params["aws_secret_access_key"] = self.aws_secret_access_key + if self.aws_session_token: + request_params["aws_session_token"] = self.aws_session_token + elif self.aws_profile_name: + request_params["aws_profile_name"] = self.aws_profile_name + + # Add optional parameters + if self.aws_region_name: + request_params["aws_region_name"] = self.aws_region_name + if self.aws_bedrock_runtime_endpoint: + request_params["aws_bedrock_runtime_endpoint"] = self.aws_bedrock_runtime_endpoint + + return request_params + + @observe(as_type="generation") + @sleep_and_retry_async() + @rate_limit_async + async def acreate_structured_output( + self, text_input: str, system_prompt: str, response_model: Type[BaseModel] + ) -> BaseModel: + """Generate structured output from AWS Bedrock API.""" + + try: + request_params = self._create_bedrock_request(text_input, system_prompt, response_model) + return await self.aclient.chat.completions.create(**request_params) + + except ( + ContentPolicyViolationError, + InstructorRetryException, + ) as error: + if ( + isinstance(error, InstructorRetryException) + and "content management policy" not in str(error).lower() + ): + raise error + + raise ContentPolicyFilterError( + f"The provided input contains content that is not aligned with our content policy: {text_input}" + ) + + @observe + @sleep_and_retry_sync() + @rate_limit_sync + def create_structured_output( + self, text_input: str, system_prompt: str, response_model: Type[BaseModel] + ) -> BaseModel: + """Generate structured output from AWS Bedrock API (synchronous).""" + + request_params = self._create_bedrock_request(text_input, system_prompt, response_model) + return self.client.chat.completions.create(**request_params) + + def show_prompt(self, text_input: str, system_prompt: str) -> str: + """Format and display the prompt for a user query.""" + if not text_input: + text_input = "No user input provided." + if not system_prompt: + raise InvalidValueError(message="No system prompt path provided.") + system_prompt = LLMGateway.read_query_prompt(system_prompt) + + formatted_prompt = ( + f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n""" + if system_prompt + else None + ) + return formatted_prompt diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py index 22d101077..0ade7a292 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py @@ -20,6 +20,7 @@ class LLMProvider(Enum): - ANTHROPIC: Represents the Anthropic provider. - CUSTOM: Represents a custom provider option. - GEMINI: Represents the Gemini provider. + - BEDROCK: Represents the AWS Bedrock provider. """ OPENAI = "openai" @@ -27,6 +28,7 @@ class LLMProvider(Enum): ANTHROPIC = "anthropic" CUSTOM = "custom" GEMINI = "gemini" + BEDROCK = "bedrock" def get_llm_client(): @@ -137,5 +139,23 @@ def get_llm_client(): streaming=llm_config.llm_streaming, ) + elif provider == LLMProvider.BEDROCK: + from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.bedrock.adapter import ( + BedrockAdapter, + ) + + return BedrockAdapter( + model=llm_config.llm_model, + api_key=llm_config.llm_api_key, + aws_access_key_id=llm_config.aws_access_key_id, + aws_secret_access_key=llm_config.aws_secret_access_key, + aws_session_token=llm_config.aws_session_token, + aws_region_name=llm_config.aws_region_name, + aws_profile_name=llm_config.aws_profile_name, + aws_bedrock_runtime_endpoint=llm_config.aws_bedrock_runtime_endpoint, + max_tokens=max_tokens, + streaming=llm_config.llm_streaming, + ) + else: raise InvalidValueError(message=f"Unsupported LLM provider: {provider}") diff --git a/cognee/modules/settings/get_settings.py b/cognee/modules/settings/get_settings.py index fa7dfc2df..71017350f 100644 --- a/cognee/modules/settings/get_settings.py +++ b/cognee/modules/settings/get_settings.py @@ -15,10 +15,11 @@ class ModelName(Enum): ollama = "ollama" anthropic = "anthropic" gemini = "gemini" + bedrock = "bedrock" class LLMConfig(BaseModel): - api_key: str + api_key: Optional[str] model: str provider: str endpoint: Optional[str] @@ -72,6 +73,10 @@ def get_settings() -> SettingsDict: "value": "gemini", "label": "Gemini", }, + { + "value": "bedrock", + "label": "AWS Bedrock", + }, ] return SettingsDict.model_validate( @@ -134,6 +139,20 @@ def get_settings() -> SettingsDict: "label": "Gemini 2.0 Flash", }, ], + "bedrock": [ + { + "value": "us.anthropic.claude-3-5-sonnet-20241022-v2:0", + "label": "Claude 3.5 Sonnet", + }, + { + "value": "us.anthropic.claude-3-5-haiku-20241022-v1:0", + "label": "Claude 3.5 Haiku", + }, + { + "value": "us.anthropic.claude-3-5-sonnet-20240620-v1:0", + "label": "Claude 3.5 Sonnet (June)", + }, + ], }, }, vector_db={ diff --git a/pyproject.toml b/pyproject.toml index 61076e86b..e6ad0eff4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "sqlalchemy>=2.0.39,<3.0.0", "aiosqlite>=0.20.0,<1.0.0", "tiktoken>=0.8.0,<1.0.0", - "litellm>=1.71.0, <2.0.0", + "litellm>=1.76.0, <2.0.0", "instructor>=1.9.1,<2.0.0", "langfuse>=2.32.0,<3", "filetype>=1.2.0,<2.0.0", diff --git a/uv.lock b/uv.lock index 137263188..963c6805e 100644 --- a/uv.lock +++ b/uv.lock @@ -1060,7 +1060,7 @@ requires-dist = [ { name = "langfuse", specifier = ">=2.32.0,<3" }, { name = "langsmith", marker = "extra == 'langchain'", specifier = ">=0.2.3,<1.0.0" }, { name = "limits", specifier = ">=4.4.1,<5" }, - { name = "litellm", specifier = ">=1.71.0,<2.0.0" }, + { name = "litellm", specifier = ">=1.76.0,<2.0.0" }, { name = "llama-index-core", marker = "extra == 'llama-index'", specifier = ">=0.12.11,<0.13" }, { name = "matplotlib", specifier = ">=3.8.3,<4" }, { name = "mistral-common", marker = "extra == 'mistral'", specifier = ">=1.5.2,<2" }, @@ -3552,7 +3552,7 @@ wheels = [ [[package]] name = "litellm" -version = "1.75.8" +version = "1.76.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -3567,9 +3567,9 @@ dependencies = [ { name = "tiktoken" }, { name = "tokenizers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8d/4e/48e3d6de19afe713223e3bc7009a2003501420de2a5d823c569cefbd9731/litellm-1.75.8.tar.gz", hash = "sha256:92061bd263ff8c33c8fff70ba92cd046adb7ea041a605826a915d108742fe59e", size = 10140384, upload-time = "2025-08-16T21:42:24.23Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c9/e8/47c791c3d2cb4397ddece90840aecfc6cdc4a003f039dde42d7c861f4709/litellm-1.76.0.tar.gz", hash = "sha256:d26d12333135edd72af60e0e310284dac3b079f4d7c47c79dfbb2430b9b4b421", size = 10170569, upload-time = "2025-08-24T05:14:01.176Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5e/82/c4d00fbeafd93c00dab6ea03f33cadd6a97adeb720ba1d89fc319e5cb10b/litellm-1.75.8-py3-none-any.whl", hash = "sha256:0bf004488df8506381ec6e35e1486e2870e8d578a7c3f2427cd497558ce07a2e", size = 8916305, upload-time = "2025-08-16T21:42:21.387Z" }, + { url = "https://files.pythonhosted.org/packages/86/f2/891b4b6c09021046d7f5bcff57178b18f352a67b032d35cc693d79b38620/litellm-1.76.0-py3-none-any.whl", hash = "sha256:357464242fc1eeda384810c9e334e48ad67a50ecd30cf61e86c15f89e2f2e0b4", size = 8953112, upload-time = "2025-08-24T05:13:58.642Z" }, ] [[package]] From c91d1ff0aed90e66073f5a2a284cb2d21237eb23 Mon Sep 17 00:00:00 2001 From: xdurawa Date: Wed, 3 Sep 2025 01:34:21 -0400 Subject: [PATCH 002/284] Remove documentation changes as requested by reviewers - Reverted README.md to original state - Reverted cognee-starter-kit/README.md to original state - Documentation will be updated separately by maintainers --- README.md | 10 ---------- cognee-starter-kit/README.md | 38 ------------------------------------ 2 files changed, 48 deletions(-) diff --git a/README.md b/README.md index 73c6aa898..e618d5bf9 100644 --- a/README.md +++ b/README.md @@ -125,16 +125,6 @@ os.environ["LLM_API_KEY"] = "YOUR OPENAI_API_KEY" ``` You can also set the variables by creating .env file, using our template. - -**Supported LLM Providers:** OpenAI (default), Anthropic, Gemini, Ollama, AWS Bedrock - -**For AWS Bedrock:** Set `LLM_PROVIDER="bedrock"` and use one of three authentication methods: -- API Key: `LLM_API_KEY="your_bedrock_api_key"` -- AWS Credentials: `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` (+ `AWS_SESSION_TOKEN` if needed) -- AWS Profile: `AWS_PROFILE_NAME="your_profile"` - -Use an [inference profile](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_InvokeModel.html#API_runtime_InvokeModel_Example_5:~:text=Use%20an%20inference%20profile%20in%20model%20invocation) for the model IDs. This usually means appending `us.*` (or other region) to the model ID (e.g., `us.anthropic.claude-3-5-sonnet-20241022-v2:0`). See [AWS Bedrock models](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html). - To use different LLM providers, for more info check out our documentation diff --git a/cognee-starter-kit/README.md b/cognee-starter-kit/README.md index 5a9369b89..c265e278e 100644 --- a/cognee-starter-kit/README.md +++ b/cognee-starter-kit/README.md @@ -25,14 +25,6 @@ uv sync ## Setup LLM Add environment variables to `.env` file. In case you choose to use OpenAI provider, add just the model and api_key. - -**Supported LLM Providers:** -- OpenAI (default) -- Anthropic -- Gemini -- Ollama -- AWS Bedrock - ``` LLM_PROVIDER="" LLM_MODEL="" @@ -47,36 +39,6 @@ EMBEDDING_API_KEY="" EMBEDDING_API_VERSION="" ``` -**For AWS Bedrock, you have three authentication options:** - -1. **API Key (Bearer Token):** -``` -LLM_PROVIDER="bedrock" -LLM_API_KEY="your_bedrock_api_key" -LLM_MODEL="us.anthropic.claude-3-5-sonnet-20241022-v2:0" -AWS_REGION_NAME="us-east-1" -``` - -2. **AWS Credentials:** -``` -LLM_PROVIDER="bedrock" -LLM_MODEL="us.anthropic.claude-3-5-sonnet-20241022-v2:0" -AWS_ACCESS_KEY_ID="your_aws_access_key" -AWS_SECRET_ACCESS_KEY="your_aws_secret_key" -[if needed] AWS_SESSION_TOKEN="your_session_token" -AWS_REGION_NAME="us-east-1" -``` - -3. **AWS Profile:** -``` -LLM_PROVIDER="bedrock" -LLM_MODEL="us.anthropic.claude-3-5-sonnet-20241022-v2:0" -AWS_PROFILE_NAME="your_aws_profile" -AWS_REGION_NAME="us-east-1" -``` - -**Note:** For Bedrock models, use an [inference profile](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_InvokeModel.html#API_runtime_InvokeModel_Example_5:~:text=Use%20an%20inference%20profile%20in%20model%20invocation) for `LLM_MODEL`. This usually means appending `us.*` (or other region) to the model ID (e.g., `us.anthropic.claude-3-5-sonnet-20241022-v2:0`). See [AWS Bedrock models](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) for available models. - Activate the Python environment: ``` source .venv/bin/activate From ac5118ee34c4bd149ac26d042e2ffe5292ee3459 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 15 Oct 2025 17:28:51 +0200 Subject: [PATCH 003/284] test:Add load test --- cognee/tests/load_test.py | 61 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 cognee/tests/load_test.py diff --git a/cognee/tests/load_test.py b/cognee/tests/load_test.py new file mode 100644 index 000000000..da9b74ab9 --- /dev/null +++ b/cognee/tests/load_test.py @@ -0,0 +1,61 @@ +import os +import pathlib +import asyncio +import time + +import cognee +from cognee.modules.search.types import SearchType +from cognee.shared.logging_utils import get_logger + +logger = get_logger() + +async def helper_func(num_of_searches): + + start_time = time.time() + + await cognee.cognify() + + await asyncio.gather( + *[ + cognee.search(query_text="Tell me about AI", query_type=SearchType.GRAPH_COMPLETION) + for _ in range(num_of_searches) + ] + ) + + end_time = time.time() + + return end_time - start_time + +async def main(): + + file_path = os.path.join( + pathlib.Path(__file__).resolve().parent, "test_data/artificial-intelligence.pdf" + ) + + num_of_pdfs = 10 + num_of_reps = 5 + upper_boundary_minutes = 3 + average_minutes = 1.5 + + await asyncio.gather( + *[ + cognee.add(file_path, dataset_name=f"dataset_{i}") + for i in range(num_of_pdfs) + ] + ) + + recorded_times = await asyncio.gather( + *[helper_func(num_of_pdfs) for _ in range(num_of_reps)] + ) + + average_recorded_time = sum(recorded_times) / len(recorded_times) + + assert average_recorded_time <= average_minutes * 60 + + assert all(rec_time <= upper_boundary_minutes * 60 for rec_time in recorded_times) + + return + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file From c16459d236a6e07b9267d323387b2be217fd5b46 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 15 Oct 2025 17:58:05 +0200 Subject: [PATCH 004/284] test: Add prune step to the test --- cognee/tests/load_test.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cognee/tests/load_test.py b/cognee/tests/load_test.py index da9b74ab9..c44efad00 100644 --- a/cognee/tests/load_test.py +++ b/cognee/tests/load_test.py @@ -9,7 +9,7 @@ from cognee.shared.logging_utils import get_logger logger = get_logger() -async def helper_func(num_of_searches): +async def process_and_search(num_of_searches): start_time = time.time() @@ -37,6 +37,9 @@ async def main(): upper_boundary_minutes = 3 average_minutes = 1.5 + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + await asyncio.gather( *[ cognee.add(file_path, dataset_name=f"dataset_{i}") @@ -45,7 +48,7 @@ async def main(): ) recorded_times = await asyncio.gather( - *[helper_func(num_of_pdfs) for _ in range(num_of_reps)] + *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)] ) average_recorded_time = sum(recorded_times) / len(recorded_times) @@ -54,8 +57,6 @@ async def main(): assert all(rec_time <= upper_boundary_minutes * 60 for rec_time in recorded_times) - return - if __name__ == "__main__": asyncio.run(main()) \ No newline at end of file From a8ff50ceae262868cb303707f5396abe07cfed38 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 17 Oct 2025 18:09:01 +0200 Subject: [PATCH 005/284] feat: Initial multi-tenancy commit --- cognee/modules/data/methods/create_dataset.py | 5 +-- cognee/modules/data/models/Dataset.py | 1 + cognee/modules/users/methods/create_user.py | 35 +++++-------------- .../modules/users/methods/get_default_user.py | 7 +--- cognee/modules/users/methods/get_user.py | 2 +- .../users/methods/get_user_by_email.py | 2 +- cognee/modules/users/models/Tenant.py | 13 ++++--- cognee/modules/users/models/User.py | 11 +++--- cognee/modules/users/models/UserTenant.py | 12 +++++++ cognee/modules/users/models/__init__.py | 1 + .../get_all_user_permission_datasets.py | 20 +++++------ .../tenants/methods/add_user_to_tenant.py | 25 +++++++++---- .../users/tenants/methods/create_tenant.py | 16 ++++++--- examples/python/permissions_example.py | 4 ++- 14 files changed, 82 insertions(+), 72 deletions(-) create mode 100644 cognee/modules/users/models/UserTenant.py diff --git a/cognee/modules/data/methods/create_dataset.py b/cognee/modules/data/methods/create_dataset.py index c080de0e8..280c9e105 100644 --- a/cognee/modules/data/methods/create_dataset.py +++ b/cognee/modules/data/methods/create_dataset.py @@ -22,8 +22,9 @@ async def create_dataset(dataset_name: str, user: User, session: AsyncSession) - if dataset is None: # Dataset id should be generated based on dataset_name and owner_id/user so multiple users can use the same dataset_name dataset_id = await get_unique_dataset_id(dataset_name=dataset_name, user=user) - dataset = Dataset(id=dataset_id, name=dataset_name, data=[]) - dataset.owner_id = owner_id + dataset = Dataset( + id=dataset_id, name=dataset_name, data=[], owner_id=owner_id, tenant_id=user.tenant_id + ) session.add(dataset) diff --git a/cognee/modules/data/models/Dataset.py b/cognee/modules/data/models/Dataset.py index 797401d5a..00ed4da96 100644 --- a/cognee/modules/data/models/Dataset.py +++ b/cognee/modules/data/models/Dataset.py @@ -18,6 +18,7 @@ class Dataset(Base): updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) owner_id = Column(UUID, index=True) + tenant_id = Column(UUID, index=True, nullable=True) acls = relationship("ACL", back_populates="dataset", cascade="all, delete-orphan") diff --git a/cognee/modules/users/methods/create_user.py b/cognee/modules/users/methods/create_user.py index 1b303bd36..953c70cd6 100644 --- a/cognee/modules/users/methods/create_user.py +++ b/cognee/modules/users/methods/create_user.py @@ -18,7 +18,6 @@ from typing import Optional async def create_user( email: str, password: str, - tenant_id: Optional[str] = None, is_superuser: bool = False, is_active: bool = True, is_verified: bool = False, @@ -30,33 +29,15 @@ async def create_user( async with relational_engine.get_async_session() as session: async with get_user_db_context(session) as user_db: async with get_user_manager_context(user_db) as user_manager: - if tenant_id: - # Check if the tenant already exists - result = await session.execute(select(Tenant).where(Tenant.id == tenant_id)) - tenant = result.scalars().first() - if not tenant: - raise TenantNotFoundError - - user = await user_manager.create( - UserCreate( - email=email, - password=password, - tenant_id=tenant.id, - is_superuser=is_superuser, - is_active=is_active, - is_verified=is_verified, - ) - ) - else: - user = await user_manager.create( - UserCreate( - email=email, - password=password, - is_superuser=is_superuser, - is_active=is_active, - is_verified=is_verified, - ) + user = await user_manager.create( + UserCreate( + email=email, + password=password, + is_superuser=is_superuser, + is_active=is_active, + is_verified=is_verified, ) + ) if auto_login: await session.refresh(user) diff --git a/cognee/modules/users/methods/get_default_user.py b/cognee/modules/users/methods/get_default_user.py index 48073a884..773545f8e 100644 --- a/cognee/modules/users/methods/get_default_user.py +++ b/cognee/modules/users/methods/get_default_user.py @@ -27,12 +27,7 @@ async def get_default_user() -> SimpleNamespace: if user is None: return await create_default_user() - # We return a SimpleNamespace to have the same user type as our SaaS - # SimpleNamespace is just a dictionary which can be accessed through attributes - auth_data = SimpleNamespace( - id=user.id, email=user.email, tenant_id=user.tenant_id, roles=[] - ) - return auth_data + return user except Exception as error: if "principals" in str(error.args): raise DatabaseNotCreatedError() from error diff --git a/cognee/modules/users/methods/get_user.py b/cognee/modules/users/methods/get_user.py index 2678a5a01..a1c87aab7 100644 --- a/cognee/modules/users/methods/get_user.py +++ b/cognee/modules/users/methods/get_user.py @@ -14,7 +14,7 @@ async def get_user(user_id: UUID): user = ( await session.execute( select(User) - .options(selectinload(User.roles), selectinload(User.tenant)) + .options(selectinload(User.roles), selectinload(User.tenants)) .where(User.id == user_id) ) ).scalar() diff --git a/cognee/modules/users/methods/get_user_by_email.py b/cognee/modules/users/methods/get_user_by_email.py index c4bd5b48e..6df989251 100644 --- a/cognee/modules/users/methods/get_user_by_email.py +++ b/cognee/modules/users/methods/get_user_by_email.py @@ -13,7 +13,7 @@ async def get_user_by_email(user_email: str): user = ( await session.execute( select(User) - .options(joinedload(User.roles), joinedload(User.tenant)) + .options(joinedload(User.roles), joinedload(User.tenants)) .where(User.email == user_email) ) ).scalar() diff --git a/cognee/modules/users/models/Tenant.py b/cognee/modules/users/models/Tenant.py index 95023a6ee..b8fa158c5 100644 --- a/cognee/modules/users/models/Tenant.py +++ b/cognee/modules/users/models/Tenant.py @@ -1,7 +1,7 @@ -from sqlalchemy.orm import relationship +from sqlalchemy.orm import relationship, Mapped from sqlalchemy import Column, String, ForeignKey, UUID from .Principal import Principal -from .User import User +from .UserTenant import UserTenant from .Role import Role @@ -13,14 +13,13 @@ class Tenant(Principal): owner_id = Column(UUID, index=True) - # One-to-Many relationship with User; specify the join via User.tenant_id - users = relationship( + users: Mapped[list["User"]] = relationship( # noqa: F821 "User", - back_populates="tenant", - foreign_keys=lambda: [User.tenant_id], + secondary=UserTenant.__tablename__, + back_populates="tenants", ) - # One-to-Many relationship with Role (if needed; similar fix) + # One-to-Many relationship with Role roles = relationship( "Role", back_populates="tenant", diff --git a/cognee/modules/users/models/User.py b/cognee/modules/users/models/User.py index 8972a5932..a98abd3bc 100644 --- a/cognee/modules/users/models/User.py +++ b/cognee/modules/users/models/User.py @@ -6,8 +6,10 @@ from sqlalchemy import ForeignKey, Column, UUID from sqlalchemy.orm import relationship, Mapped from .Principal import Principal +from .UserTenant import UserTenant from .UserRole import UserRole from .Role import Role +from .Tenant import Tenant class User(SQLAlchemyBaseUserTableUUID, Principal): @@ -15,7 +17,7 @@ class User(SQLAlchemyBaseUserTableUUID, Principal): id = Column(UUID, ForeignKey("principals.id", ondelete="CASCADE"), primary_key=True) - # Foreign key to Tenant (Many-to-One relationship) + # Foreign key to current Tenant (Many-to-One relationship) tenant_id = Column(UUID, ForeignKey("tenants.id")) # Many-to-Many Relationship with Roles @@ -25,11 +27,11 @@ class User(SQLAlchemyBaseUserTableUUID, Principal): back_populates="users", ) - # Relationship to Tenant - tenant = relationship( + # Many-to-Many Relationship with Tenants user is a part of + tenants: Mapped[list["Tenant"]] = relationship( "Tenant", + secondary=UserTenant.__tablename__, back_populates="users", - foreign_keys=[tenant_id], ) # ACL Relationship (One-to-Many) @@ -46,7 +48,6 @@ class UserRead(schemas.BaseUser[uuid_UUID]): class UserCreate(schemas.BaseUserCreate): - tenant_id: Optional[uuid_UUID] = None is_verified: bool = True diff --git a/cognee/modules/users/models/UserTenant.py b/cognee/modules/users/models/UserTenant.py new file mode 100644 index 000000000..bfb852aa5 --- /dev/null +++ b/cognee/modules/users/models/UserTenant.py @@ -0,0 +1,12 @@ +from datetime import datetime, timezone +from sqlalchemy import Column, ForeignKey, DateTime, UUID +from cognee.infrastructure.databases.relational import Base + + +class UserTenant(Base): + __tablename__ = "user_tenants" + + created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) + + user_id = Column(UUID, ForeignKey("users.id"), primary_key=True) + tenant_id = Column(UUID, ForeignKey("tenants.id"), primary_key=True) diff --git a/cognee/modules/users/models/__init__.py b/cognee/modules/users/models/__init__.py index ba2f40e49..5114cc45a 100644 --- a/cognee/modules/users/models/__init__.py +++ b/cognee/modules/users/models/__init__.py @@ -1,6 +1,7 @@ from .User import User from .Role import Role from .UserRole import UserRole +from .UserTenant import UserTenant from .DatasetDatabase import DatasetDatabase from .RoleDefaultPermissions import RoleDefaultPermissions from .UserDefaultPermissions import UserDefaultPermissions diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py index 1185dd7ad..a4f538259 100644 --- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py @@ -1,11 +1,8 @@ -from types import SimpleNamespace - from cognee.shared.logging_utils import get_logger from ...models.User import User from cognee.modules.data.models.Dataset import Dataset from cognee.modules.users.permissions.methods import get_principal_datasets -from cognee.modules.users.permissions.methods import get_role, get_tenant logger = get_logger() @@ -25,17 +22,15 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> # Get all datasets User has explicit access to datasets.extend(await get_principal_datasets(user, permission_type)) - if user.tenant_id: - # Get all datasets all tenants have access to - tenant = await get_tenant(user.tenant_id) + # Get all tenants user is a part of + tenants = await user.awaitable_attrs.tenants + + for tenant in tenants: + # Get all datasets all tenant members have access to datasets.extend(await get_principal_datasets(tenant, permission_type)) - # Get all datasets Users roles have access to - if isinstance(user, SimpleNamespace): - # If simple namespace use roles defined in user - roles = user.roles - else: - roles = await user.awaitable_attrs.roles + # Get all datasets accessible by roles user is a part of + roles = await user.awaitable_attrs.roles for role in roles: datasets.extend(await get_principal_datasets(role, permission_type)) @@ -45,4 +40,5 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> # If the dataset id key already exists, leave the dictionary unchanged. unique.setdefault(dataset.id, dataset) + # TODO: Add filtering out of datasets that aren't currently selected tenant of user return list(unique.values()) diff --git a/cognee/modules/users/tenants/methods/add_user_to_tenant.py b/cognee/modules/users/tenants/methods/add_user_to_tenant.py index 1374067a7..b9f5898d0 100644 --- a/cognee/modules/users/tenants/methods/add_user_to_tenant.py +++ b/cognee/modules/users/tenants/methods/add_user_to_tenant.py @@ -1,8 +1,11 @@ +from typing import Optional from uuid import UUID from sqlalchemy.exc import IntegrityError +from sqlalchemy import insert from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.users.models.UserTenant import UserTenant from cognee.modules.users.methods import get_user from cognee.modules.users.permissions.methods import get_tenant from cognee.modules.users.exceptions import ( @@ -12,14 +15,19 @@ from cognee.modules.users.exceptions import ( ) -async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID): +async def add_user_to_tenant( + user_id: UUID, tenant_id: UUID, owner_id: UUID, set_active_tenant: Optional[bool] = True +): """ Add a user with the given id to the tenant with the given id. This can only be successful if the request owner with the given id is the tenant owner. + + If set_active_tenant is true it will automatically set the users active tenant to provided tenant. Args: user_id: Id of the user. tenant_id: Id of the tenant. owner_id: Id of the request owner. + set_active_tenant: If set_active_tenant is true it will automatically set the users active tenant to provided tenant. Returns: None @@ -41,12 +49,17 @@ async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID): ) try: - if user.tenant_id is None: + try: + # Add association directly to the association table + create_user_tenant_statement = insert(UserTenant).values( + user_id=user_id, tenant_id=tenant_id + ) + await session.execute(create_user_tenant_statement) + except IntegrityError: + raise EntityAlreadyExistsError(message="User is already part of group.") + + if set_active_tenant: user.tenant_id = tenant_id - elif user.tenant_id == tenant_id: - return - else: - raise IntegrityError await session.merge(user) await session.commit() diff --git a/cognee/modules/users/tenants/methods/create_tenant.py b/cognee/modules/users/tenants/methods/create_tenant.py index bfd23e08f..665e3cc18 100644 --- a/cognee/modules/users/tenants/methods/create_tenant.py +++ b/cognee/modules/users/tenants/methods/create_tenant.py @@ -1,6 +1,8 @@ from uuid import UUID +from sqlalchemy import insert from sqlalchemy.exc import IntegrityError +from cognee.modules.users.models.UserTenant import UserTenant from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.users.models import Tenant @@ -22,16 +24,22 @@ async def create_tenant(tenant_name: str, user_id: UUID) -> UUID: async with db_engine.get_async_session() as session: try: user = await get_user(user_id) - if user.tenant_id: - raise EntityAlreadyExistsError( - message="User already has a tenant. New tenant cannot be created." - ) tenant = Tenant(name=tenant_name, owner_id=user_id) session.add(tenant) await session.flush() user.tenant_id = tenant.id + + try: + # Add association directly to the association table + create_user_tenant_statement = insert(UserTenant).values( + user_id=user_id, tenant_id=tenant.id + ) + await session.execute(create_user_tenant_statement) + except IntegrityError: + raise EntityAlreadyExistsError(message="User is already part of group.") + await session.merge(user) await session.commit() return tenant.id diff --git a/examples/python/permissions_example.py b/examples/python/permissions_example.py index 4f51b660f..7c140845c 100644 --- a/examples/python/permissions_example.py +++ b/examples/python/permissions_example.py @@ -150,7 +150,9 @@ async def main(): # To add a user to a role he must be part of the same tenant/organization print("\nOperation started as user_2 to add user_3 to CogneeLab tenant/organization") - await add_user_to_tenant(user_id=user_3.id, tenant_id=tenant_id, owner_id=user_2.id) + await add_user_to_tenant( + user_id=user_3.id, tenant_id=tenant_id, owner_id=user_2.id, set_active_tenant=True + ) print( "\nOperation started by user_2, as tenant owner, to add user_3 to Researcher role inside the tenant/organization" From 0c4e3e1f5295746db287eff5101d60c4cf89c1df Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Sun, 19 Oct 2025 20:13:22 +0200 Subject: [PATCH 006/284] fix: Load tenants to default user --- cognee/modules/users/methods/get_default_user.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cognee/modules/users/methods/get_default_user.py b/cognee/modules/users/methods/get_default_user.py index 773545f8e..a48bd8928 100644 --- a/cognee/modules/users/methods/get_default_user.py +++ b/cognee/modules/users/methods/get_default_user.py @@ -18,7 +18,9 @@ async def get_default_user() -> SimpleNamespace: try: async with db_engine.get_async_session() as session: query = ( - select(User).options(selectinload(User.roles)).where(User.email == default_email) + select(User) + .options(selectinload(User.roles), selectinload(User.tenants)) + .where(User.email == default_email) ) result = await session.execute(query) From 12785e31ea327135a4e1968c90f1cb1e5891fad3 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Sun, 19 Oct 2025 21:11:14 +0200 Subject: [PATCH 007/284] fix: Resolve issue with adding user to tenants --- .../tenants/methods/add_user_to_tenant.py | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/cognee/modules/users/tenants/methods/add_user_to_tenant.py b/cognee/modules/users/tenants/methods/add_user_to_tenant.py index b9f5898d0..dabab6b6b 100644 --- a/cognee/modules/users/tenants/methods/add_user_to_tenant.py +++ b/cognee/modules/users/tenants/methods/add_user_to_tenant.py @@ -48,22 +48,18 @@ async def add_user_to_tenant( message="Only tenant owner can add other users to organization." ) - try: - try: - # Add association directly to the association table - create_user_tenant_statement = insert(UserTenant).values( - user_id=user_id, tenant_id=tenant_id - ) - await session.execute(create_user_tenant_statement) - except IntegrityError: - raise EntityAlreadyExistsError(message="User is already part of group.") - - if set_active_tenant: - user.tenant_id = tenant_id - + if set_active_tenant: + user.tenant_id = tenant_id await session.merge(user) await session.commit() - except IntegrityError: - raise EntityAlreadyExistsError( - message="User is already part of a tenant. Only one tenant can be assigned to user." + + try: + # Add association directly to the association table + create_user_tenant_statement = insert(UserTenant).values( + user_id=user_id, tenant_id=tenant_id ) + await session.execute(create_user_tenant_statement) + await session.commit() + + except IntegrityError: + raise EntityAlreadyExistsError(message="User is already part of group.") From 13f0423a55720debc1d04fbe9f005855c008ae53 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Sun, 19 Oct 2025 21:35:50 +0200 Subject: [PATCH 008/284] refactor: Add better TODO message --- .../permissions/methods/get_all_user_permission_datasets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py index a4f538259..ff0f52d27 100644 --- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py @@ -40,5 +40,6 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> # If the dataset id key already exists, leave the dictionary unchanged. unique.setdefault(dataset.id, dataset) - # TODO: Add filtering out of datasets that aren't currently selected tenant of user + # TODO: Add filtering out of datasets that aren't currently selected tenant of user (currently selected tenant is the tenant_id value in the User model) + # TODO: Add endpoint/method to select current Tenant for a user (This UUID value should be stored in tenant_id of User model) return list(unique.values()) From d6bb95e3798984bee76a0e5cd92308097f153649 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Sun, 19 Oct 2025 21:57:39 +0200 Subject: [PATCH 009/284] fix: load tenants and roles when creating user --- cognee/modules/users/methods/create_user.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cognee/modules/users/methods/create_user.py b/cognee/modules/users/methods/create_user.py index 953c70cd6..ef325fb6f 100644 --- a/cognee/modules/users/methods/create_user.py +++ b/cognee/modules/users/methods/create_user.py @@ -42,6 +42,10 @@ async def create_user( if auto_login: await session.refresh(user) + # Update tenants and roles information for User object + _ = await user.awaitable_attrs.tenants + _ = await user.awaitable_attrs.roles + return user except UserAlreadyExists as error: print(f"User {email} already exists") From 4f874deace3b55072bf97c35b45e158d03c5d844 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Sun, 19 Oct 2025 23:50:17 +0200 Subject: [PATCH 010/284] feat: Add tenant select method/endpoint for users --- .../routers/get_permissions_router.py | 32 ++++++++++++ .../get_all_user_permission_datasets.py | 1 - .../modules/users/tenants/methods/__init__.py | 1 + .../users/tenants/methods/create_tenant.py | 7 +-- .../users/tenants/methods/select_tenant.py | 50 +++++++++++++++++++ 5 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 cognee/modules/users/tenants/methods/select_tenant.py diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index 637293268..7959415da 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -220,4 +220,36 @@ def get_permissions_router() -> APIRouter: status_code=200, content={"message": "Tenant created.", "tenant_id": str(tenant_id)} ) + @permissions_router.post("/tenants/{tenant_id}") + async def select_tenant(tenant_id: UUID, user: User = Depends(get_authenticated_user)): + """ + Select current tenant. + + This endpoint selects a tenant with the specified UUID. Tenants are used + to organize users and resources in multi-tenant environments, providing + isolation and access control between different groups or organizations. + + ## Request Parameters + - **tenant_id** (UUID): UUID of the tenant to create + + ## Response + Returns a success message indicating the tenant was created. + """ + send_telemetry( + "Permissions API Endpoint Invoked", + user.id, + additional_properties={ + "endpoint": f"POST /v1/permissions/tenants/{str(tenant_id)}", + "tenant_id": tenant_id, + }, + ) + + from cognee.modules.users.tenants.methods import select_tenant as select_tenant_method + + await select_tenant_method(user_id=user.id, tenant_id=tenant_id) + + return JSONResponse( + status_code=200, content={"message": "Tenant selected.", "tenant_id": str(tenant_id)} + ) + return permissions_router diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py index ff0f52d27..e5dbb0e4b 100644 --- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py @@ -41,5 +41,4 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> unique.setdefault(dataset.id, dataset) # TODO: Add filtering out of datasets that aren't currently selected tenant of user (currently selected tenant is the tenant_id value in the User model) - # TODO: Add endpoint/method to select current Tenant for a user (This UUID value should be stored in tenant_id of User model) return list(unique.values()) diff --git a/cognee/modules/users/tenants/methods/__init__.py b/cognee/modules/users/tenants/methods/__init__.py index 9a052e9c6..39e2b31bb 100644 --- a/cognee/modules/users/tenants/methods/__init__.py +++ b/cognee/modules/users/tenants/methods/__init__.py @@ -1,2 +1,3 @@ from .create_tenant import create_tenant from .add_user_to_tenant import add_user_to_tenant +from .select_tenant import select_tenant diff --git a/cognee/modules/users/tenants/methods/create_tenant.py b/cognee/modules/users/tenants/methods/create_tenant.py index 665e3cc18..60e10db5c 100644 --- a/cognee/modules/users/tenants/methods/create_tenant.py +++ b/cognee/modules/users/tenants/methods/create_tenant.py @@ -30,6 +30,8 @@ async def create_tenant(tenant_name: str, user_id: UUID) -> UUID: await session.flush() user.tenant_id = tenant.id + await session.merge(user) + await session.commit() try: # Add association directly to the association table @@ -37,11 +39,10 @@ async def create_tenant(tenant_name: str, user_id: UUID) -> UUID: user_id=user_id, tenant_id=tenant.id ) await session.execute(create_user_tenant_statement) + await session.commit() except IntegrityError: - raise EntityAlreadyExistsError(message="User is already part of group.") + raise EntityAlreadyExistsError(message="User is already part of tenant.") - await session.merge(user) - await session.commit() return tenant.id except IntegrityError as e: raise EntityAlreadyExistsError(message="Tenant already exists.") from e diff --git a/cognee/modules/users/tenants/methods/select_tenant.py b/cognee/modules/users/tenants/methods/select_tenant.py new file mode 100644 index 000000000..709e46bf2 --- /dev/null +++ b/cognee/modules/users/tenants/methods/select_tenant.py @@ -0,0 +1,50 @@ +from uuid import UUID + +import sqlalchemy.exc +from sqlalchemy import select + +from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.users.models.UserTenant import UserTenant +from cognee.modules.users.methods import get_user +from cognee.modules.users.permissions.methods import get_tenant +from cognee.modules.users.exceptions import UserNotFoundError, TenantNotFoundError + + +async def select_tenant(user_id: UUID, tenant_id: UUID): + """ + Set the users active tenant to provided tenant. + Args: + user_id: Id of the user. + tenant_id: Id of the tenant. + + Returns: + None + + """ + db_engine = get_relational_engine() + async with db_engine.get_async_session() as session: + user = await get_user(user_id) + tenant = await get_tenant(tenant_id) + + if not user: + raise UserNotFoundError + elif not tenant: + raise TenantNotFoundError + + # Check if User is part of Tenant + result = await session.execute( + select(UserTenant) + .where(UserTenant.user_id == user_id) + .where(UserTenant.tenant_id == tenant_id) + ) + + try: + result = result.scalar_one() + except sqlalchemy.exc.NoResultFound as e: + raise TenantNotFoundError("User Tenant relationship not found.") from e + + if result: + # If user is part of tenant update current tenant of user + user.tenant_id = tenant_id + await session.merge(user) + await session.commit() From 6934692e1b7646a493f47ec6a189e041db6cb14a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 20 Oct 2025 15:07:13 +0200 Subject: [PATCH 011/284] refactor: Enable selection of default single user tenant --- .../routers/get_permissions_router.py | 24 ++++++++++++------- .../users/tenants/methods/select_tenant.py | 13 +++++++++- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index 7959415da..eeea9b653 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -1,14 +1,19 @@ from uuid import UUID -from typing import List +from typing import List, Union from fastapi import APIRouter, Depends from fastapi.responses import JSONResponse from cognee.modules.users.models import User +from cognee.api.DTO import InDTO from cognee.modules.users.methods import get_authenticated_user from cognee.shared.utils import send_telemetry +class SelectTenantDTO(InDTO): + tenant_id: UUID | None = None + + def get_permissions_router() -> APIRouter: permissions_router = APIRouter() @@ -220,8 +225,8 @@ def get_permissions_router() -> APIRouter: status_code=200, content={"message": "Tenant created.", "tenant_id": str(tenant_id)} ) - @permissions_router.post("/tenants/{tenant_id}") - async def select_tenant(tenant_id: UUID, user: User = Depends(get_authenticated_user)): + @permissions_router.post("/tenants/select") + async def select_tenant(payload: SelectTenantDTO, user: User = Depends(get_authenticated_user)): """ Select current tenant. @@ -229,8 +234,10 @@ def get_permissions_router() -> APIRouter: to organize users and resources in multi-tenant environments, providing isolation and access control between different groups or organizations. + Sending a null/None value as tenant_id selects his default single user tenant + ## Request Parameters - - **tenant_id** (UUID): UUID of the tenant to create + - **tenant_id** (Union[UUID, None]): UUID of the tenant to select, If null/None is provided use the default single user tenant ## Response Returns a success message indicating the tenant was created. @@ -239,17 +246,18 @@ def get_permissions_router() -> APIRouter: "Permissions API Endpoint Invoked", user.id, additional_properties={ - "endpoint": f"POST /v1/permissions/tenants/{str(tenant_id)}", - "tenant_id": tenant_id, + "endpoint": f"POST /v1/permissions/tenants/{str(payload.tenant_id)}", + "tenant_id": str(payload.tenant_id), }, ) from cognee.modules.users.tenants.methods import select_tenant as select_tenant_method - await select_tenant_method(user_id=user.id, tenant_id=tenant_id) + await select_tenant_method(user_id=user.id, tenant_id=payload.tenant_id) return JSONResponse( - status_code=200, content={"message": "Tenant selected.", "tenant_id": str(tenant_id)} + status_code=200, + content={"message": "Tenant selected.", "tenant_id": str(payload.tenant_id)}, ) return permissions_router diff --git a/cognee/modules/users/tenants/methods/select_tenant.py b/cognee/modules/users/tenants/methods/select_tenant.py index 709e46bf2..732b24858 100644 --- a/cognee/modules/users/tenants/methods/select_tenant.py +++ b/cognee/modules/users/tenants/methods/select_tenant.py @@ -1,4 +1,5 @@ from uuid import UUID +from typing import Union import sqlalchemy.exc from sqlalchemy import select @@ -10,9 +11,11 @@ from cognee.modules.users.permissions.methods import get_tenant from cognee.modules.users.exceptions import UserNotFoundError, TenantNotFoundError -async def select_tenant(user_id: UUID, tenant_id: UUID): +async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]): """ Set the users active tenant to provided tenant. + + If None tenant_id is provided set current Tenant to the default single user-tenant Args: user_id: Id of the user. tenant_id: Id of the tenant. @@ -24,6 +27,14 @@ async def select_tenant(user_id: UUID, tenant_id: UUID): db_engine = get_relational_engine() async with db_engine.get_async_session() as session: user = await get_user(user_id) + + if tenant_id is None: + # If no tenant_id is provided set current Tenant to the single user-tenant + user.tenant_id = None + await session.merge(user) + await session.commit() + return + tenant = await get_tenant(tenant_id) if not user: From c5648e63375d9eb1520f5a007dda520f70c9c145 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 22 Oct 2025 09:22:11 +0200 Subject: [PATCH 012/284] test: Add load test. --- .github/workflows/e2e_tests.yml | 31 ++++++++++++++++++++- cognee/tests/{load_test.py => test_load.py} | 30 ++++++++++++-------- 2 files changed, 49 insertions(+), 12 deletions(-) rename cognee/tests/{load_test.py => test_load.py} (65%) diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 9582a3f3b..5f66e71d2 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -330,4 +330,33 @@ jobs: DB_PORT: 5432 DB_USERNAME: cognee DB_PASSWORD: cognee - run: uv run python ./cognee/tests/test_concurrent_subprocess_access.py \ No newline at end of file + run: uv run python ./cognee/tests/test_concurrent_subprocess_access.py + + test-load: + name: Test Load + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" + + - name: Run Load Test + env: + ENV: 'dev' + ENABLE_BACKEND_ACCESS_CONTROL: True + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + run: uv run python ./cognee/tests/test_load.py \ No newline at end of file diff --git a/cognee/tests/load_test.py b/cognee/tests/test_load.py similarity index 65% rename from cognee/tests/load_test.py rename to cognee/tests/test_load.py index c44efad00..09e2db084 100644 --- a/cognee/tests/load_test.py +++ b/cognee/tests/test_load.py @@ -9,8 +9,8 @@ from cognee.shared.logging_utils import get_logger logger = get_logger() -async def process_and_search(num_of_searches): +async def process_and_search(num_of_searches): start_time = time.time() await cognee.cognify() @@ -26,26 +26,34 @@ async def process_and_search(num_of_searches): return end_time - start_time -async def main(): +async def main(): file_path = os.path.join( pathlib.Path(__file__).resolve().parent, "test_data/artificial-intelligence.pdf" ) + data_directory_path = str( + pathlib.Path( + os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load") + ).resolve() + ) + cognee.config.data_root_directory(data_directory_path) + cognee_directory_path = str( + pathlib.Path( + os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_load") + ).resolve() + ) + cognee.config.system_root_directory(cognee_directory_path) num_of_pdfs = 10 num_of_reps = 5 - upper_boundary_minutes = 3 - average_minutes = 1.5 + upper_boundary_minutes = 10 + average_minutes = 8 await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) - await asyncio.gather( - *[ - cognee.add(file_path, dataset_name=f"dataset_{i}") - for i in range(num_of_pdfs) - ] - ) + for i in range(num_of_pdfs): + await cognee.add(file_path, dataset_name=f"dataset_{i}") recorded_times = await asyncio.gather( *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)] @@ -59,4 +67,4 @@ async def main(): if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main()) From 742866b4c9f1d4aa53ab60fb54b79474fbfea0d2 Mon Sep 17 00:00:00 2001 From: EricXiao Date: Wed, 22 Oct 2025 16:56:46 +0800 Subject: [PATCH 013/284] feat: csv ingestion loader & chunk Signed-off-by: EricXiao --- cognee/cli/commands/cognify_command.py | 9 +- cognee/cli/config.py | 2 +- .../files/utils/guess_file_type.py | 43 +++++ .../files/utils/is_csv_content.py | 181 ++++++++++++++++++ cognee/infrastructure/loaders/LoaderEngine.py | 1 + .../infrastructure/loaders/core/__init__.py | 3 +- .../infrastructure/loaders/core/csv_loader.py | 93 +++++++++ .../loaders/core/text_loader.py | 3 +- .../loaders/supported_loaders.py | 3 +- cognee/modules/chunking/CsvChunker.py | 35 ++++ .../processing/document_types/CsvDocument.py | 33 ++++ .../processing/document_types/__init__.py | 1 + cognee/tasks/chunks/__init__.py | 1 + cognee/tasks/chunks/chunk_by_row.py | 94 +++++++++ cognee/tasks/documents/classify_documents.py | 2 + .../integration/documents/CsvDocument_test.py | 70 +++++++ .../tests/test_data/example_with_header.csv | 3 + .../processing/chunks/chunk_by_row_test.py | 52 +++++ 18 files changed, 623 insertions(+), 6 deletions(-) create mode 100644 cognee/infrastructure/files/utils/is_csv_content.py create mode 100644 cognee/infrastructure/loaders/core/csv_loader.py create mode 100644 cognee/modules/chunking/CsvChunker.py create mode 100644 cognee/modules/data/processing/document_types/CsvDocument.py create mode 100644 cognee/tasks/chunks/chunk_by_row.py create mode 100644 cognee/tests/integration/documents/CsvDocument_test.py create mode 100644 cognee/tests/test_data/example_with_header.csv create mode 100644 cognee/tests/unit/processing/chunks/chunk_by_row_test.py diff --git a/cognee/cli/commands/cognify_command.py b/cognee/cli/commands/cognify_command.py index 16eaf0454..b89c1f70e 100644 --- a/cognee/cli/commands/cognify_command.py +++ b/cognee/cli/commands/cognify_command.py @@ -22,7 +22,7 @@ relationships, and creates semantic connections for enhanced search and reasonin Processing Pipeline: 1. **Document Classification**: Identifies document types and structures -2. **Permission Validation**: Ensures user has processing rights +2. **Permission Validation**: Ensures user has processing rights 3. **Text Chunking**: Breaks content into semantically meaningful segments 4. **Entity Extraction**: Identifies key concepts, people, places, organizations 5. **Relationship Detection**: Discovers connections between entities @@ -97,6 +97,13 @@ After successful cognify processing, use `cognee search` to query the knowledge chunker_class = LangchainChunker except ImportError: fmt.warning("LangchainChunker not available, using TextChunker") + elif args.chunker == "CsvChunker": + try: + from cognee.modules.chunking.CsvChunker import CsvChunker + + chunker_class = CsvChunker + except ImportError: + fmt.warning("CsvChunker not available, using TextChunker") result = await cognee.cognify( datasets=datasets, diff --git a/cognee/cli/config.py b/cognee/cli/config.py index d016608c1..082adbaec 100644 --- a/cognee/cli/config.py +++ b/cognee/cli/config.py @@ -26,7 +26,7 @@ SEARCH_TYPE_CHOICES = [ ] # Chunker choices -CHUNKER_CHOICES = ["TextChunker", "LangchainChunker"] +CHUNKER_CHOICES = ["TextChunker", "LangchainChunker", "CsvChunker"] # Output format choices OUTPUT_FORMAT_CHOICES = ["json", "pretty", "simple"] diff --git a/cognee/infrastructure/files/utils/guess_file_type.py b/cognee/infrastructure/files/utils/guess_file_type.py index dcdd68cad..10f59a400 100644 --- a/cognee/infrastructure/files/utils/guess_file_type.py +++ b/cognee/infrastructure/files/utils/guess_file_type.py @@ -1,6 +1,8 @@ from typing import BinaryIO import filetype + from .is_text_content import is_text_content +from .is_csv_content import is_csv_content class FileTypeException(Exception): @@ -134,3 +136,44 @@ def guess_file_type(file: BinaryIO) -> filetype.Type: raise FileTypeException(f"Unknown file detected: {file.name}.") return file_type + + +class CsvFileType(filetype.Type): + """ + Match CSV file types based on MIME type and extension. + + Public methods: + - match + + Instance variables: + - MIME: The MIME type of the CSV. + - EXTENSION: The file extension of the CSV. + """ + + MIME = "text/csv" + EXTENSION = "csv" + + def __init__(self): + super().__init__(mime=self.MIME, extension=self.EXTENSION) + + def match(self, buf): + """ + Determine if the given buffer contains csv content. + + Parameters: + ----------- + + - buf: The buffer to check for csv content. + + Returns: + -------- + + Returns True if the buffer is identified as csv content, otherwise False. + """ + + return is_csv_content(buf) + + +csv_file_type = CsvFileType() + +filetype.add_type(csv_file_type) diff --git a/cognee/infrastructure/files/utils/is_csv_content.py b/cognee/infrastructure/files/utils/is_csv_content.py new file mode 100644 index 000000000..07b7ea69b --- /dev/null +++ b/cognee/infrastructure/files/utils/is_csv_content.py @@ -0,0 +1,181 @@ +import csv +from collections import Counter + + +def is_csv_content(content): + """ + Heuristically determine whether a bytes-like object is CSV text. + + Strategy (fail-fast and cheap to expensive): + 1) Decode: Try a small ordered list of common encodings with strict errors. + 2) Line sampling: require >= 2 non-empty lines; sample up to 50 lines. + 3) Delimiter detection: + - Prefer csv.Sniffer() with common delimiters. + - Fallback to a lightweight consistency heuristic. + 4) Lightweight parse check: + - Parse a few lines with the delimiter. + - Ensure at least 2 valid rows and relatively stable column counts. + + Returns: + bool: True if the buffer looks like CSV; False otherwise. + """ + try: + encoding_list = [ + "utf-8", + "utf-8-sig", + "utf-32-le", + "utf-32-be", + "utf-16-le", + "utf-16-be", + "gb18030", + "shift_jis", + "cp949", + "cp1252", + "iso-8859-1", + ] + + # Try to decode strictly—if decoding fails for all encodings, it's not text/CSV. + text = None + for enc in encoding_list: + try: + text = content.decode(enc, errors="strict") + break + except UnicodeDecodeError: + continue + if text is None: + return False + + # Reject empty/whitespace-only payloads. + stripped = text.strip() + if not stripped: + return False + + # Split into logical lines and drop empty ones. Require at least two lines. + lines = [ln for ln in text.splitlines() if ln.strip()] + if len(lines) < 2: + return False + + # Take a small sample to keep sniffing cheap and predictable. + sample_lines = lines[:50] + + # Detect delimiter using csv.Sniffer first; if that fails, use our heuristic. + delimiter = _sniff_delimiter(sample_lines) or _heuristic_delimiter(sample_lines) + if not delimiter: + return False + + # Finally, do a lightweight parse sanity check with the chosen delimiter. + return _lightweight_parse_check(sample_lines, delimiter) + except Exception: + return False + + +def _sniff_delimiter(lines): + """ + Try Python's built-in csv.Sniffer on a sample. + + Args: + lines (list[str]): Sample lines (already decoded). + + Returns: + str | None: The detected delimiter if sniffing succeeds; otherwise None. + """ + # Join up to 50 lines to form the sample string Sniffer will inspect. + sample = "\n".join(lines[:50]) + try: + dialect = csv.Sniffer().sniff(sample, delimiters=",\t;|") + return dialect.delimiter + except Exception: + # Sniffer is known to be brittle on small/dirty samples—silently fallback. + return None + + +def _heuristic_delimiter(lines): + """ + Fallback delimiter detection based on count consistency per line. + + Heuristic: + - For each candidate delimiter, count occurrences per line. + - Keep only lines with count > 0 (line must contain the delimiter). + - Require at least half of lines to contain the delimiter (min 2). + - Compute the mode (most common count). If the proportion of lines that + exhibit the modal count is >= 80%, accept that delimiter. + + Args: + lines (list[str]): Sample lines. + + Returns: + str | None: Best delimiter if one meets the consistency threshold; else None. + """ + candidates = [",", "\t", ";", "|"] + best = None + best_score = 0.0 + + for d in candidates: + # Count how many times the delimiter appears in each line. + counts = [ln.count(d) for ln in lines] + # Consider only lines that actually contain the delimiter at least once. + nonzero = [c for c in counts if c > 0] + + # Require that more than half of lines (and at least 2) contain the delimiter. + if len(nonzero) < max(2, int(0.5 * len(lines))): + continue + + # Find the modal count and its frequency. + cnt = Counter(nonzero) + pairs = cnt.most_common(1) + if not pairs: + continue + + mode, mode_freq = pairs[0] + # Consistency ratio: lines with the modal count / total lines in the sample. + consistency = mode_freq / len(lines) + # Accept if consistent enough and better than any previous candidate. + if mode >= 1 and consistency >= 0.80 and consistency > best_score: + best = d + best_score = consistency + + return best + + +def _lightweight_parse_check(lines, delimiter): + """ + Parse a few lines with csv.reader and check structural stability. + + Heuristic: + - Parse up to 5 lines with the given delimiter. + - Count column widths per parsed row. + - Require at least 2 non-empty rows. + - Allow at most 1 row whose width deviates by >2 columns from the first row. + + Args: + lines (list[str]): Sample lines (decoded). + delimiter (str): Delimiter chosen by sniffing/heuristics. + + Returns: + bool: True if parsing looks stable; False otherwise. + """ + try: + # csv.reader accepts any iterable of strings; feeding the first 10 lines is fine. + reader = csv.reader(lines[:10], delimiter=delimiter) + widths = [] + valid_rows = 0 + for row in reader: + if not row: + continue + + widths.append(len(row)) + valid_rows += 1 + + # Need at least two meaningful rows to make a judgment. + if valid_rows < 2: + return False + + if widths: + first = widths[0] + # Count rows whose width deviates significantly (>2) from the first row. + unstable = sum(1 for w in widths if abs(w - first) > 2) + # Permit at most 1 unstable row among the parsed sample. + return unstable <= 1 + return False + except Exception: + return False diff --git a/cognee/infrastructure/loaders/LoaderEngine.py b/cognee/infrastructure/loaders/LoaderEngine.py index 6b62f7641..37e63c9fc 100644 --- a/cognee/infrastructure/loaders/LoaderEngine.py +++ b/cognee/infrastructure/loaders/LoaderEngine.py @@ -30,6 +30,7 @@ class LoaderEngine: "pypdf_loader", "image_loader", "audio_loader", + "csv_loader", "unstructured_loader", "advanced_pdf_loader", ] diff --git a/cognee/infrastructure/loaders/core/__init__.py b/cognee/infrastructure/loaders/core/__init__.py index 8a2df80f9..09819fbd2 100644 --- a/cognee/infrastructure/loaders/core/__init__.py +++ b/cognee/infrastructure/loaders/core/__init__.py @@ -3,5 +3,6 @@ from .text_loader import TextLoader from .audio_loader import AudioLoader from .image_loader import ImageLoader +from .csv_loader import CsvLoader -__all__ = ["TextLoader", "AudioLoader", "ImageLoader"] +__all__ = ["TextLoader", "AudioLoader", "ImageLoader", "CsvLoader"] diff --git a/cognee/infrastructure/loaders/core/csv_loader.py b/cognee/infrastructure/loaders/core/csv_loader.py new file mode 100644 index 000000000..a314a7a24 --- /dev/null +++ b/cognee/infrastructure/loaders/core/csv_loader.py @@ -0,0 +1,93 @@ +import os +from typing import List +import csv +from cognee.infrastructure.loaders.LoaderInterface import LoaderInterface +from cognee.infrastructure.files.storage import get_file_storage, get_storage_config +from cognee.infrastructure.files.utils.get_file_metadata import get_file_metadata + + +class CsvLoader(LoaderInterface): + """ + Core CSV file loader that handles basic CSV file formats. + """ + + @property + def supported_extensions(self) -> List[str]: + """Supported text file extensions.""" + return [ + "csv", + ] + + @property + def supported_mime_types(self) -> List[str]: + """Supported MIME types for text content.""" + return [ + "text/csv", + ] + + @property + def loader_name(self) -> str: + """Unique identifier for this loader.""" + return "csv_loader" + + def can_handle(self, extension: str, mime_type: str) -> bool: + """ + Check if this loader can handle the given file. + + Args: + extension: File extension + mime_type: Optional MIME type + + Returns: + True if file can be handled, False otherwise + """ + if extension in self.supported_extensions and mime_type in self.supported_mime_types: + return True + + return False + + async def load(self, file_path: str, encoding: str = "utf-8", **kwargs): + """ + Load and process the csv file. + + Args: + file_path: Path to the file to load + encoding: Text encoding to use (default: utf-8) + **kwargs: Additional configuration (unused) + + Returns: + LoaderResult containing the file content and metadata + + Raises: + FileNotFoundError: If file doesn't exist + UnicodeDecodeError: If file cannot be decoded with specified encoding + OSError: If file cannot be read + """ + if not os.path.exists(file_path): + raise FileNotFoundError(f"File not found: {file_path}") + + with open(file_path, "rb") as f: + file_metadata = await get_file_metadata(f) + # Name ingested file of current loader based on original file content hash + storage_file_name = "text_" + file_metadata["content_hash"] + ".txt" + + row_texts = [] + row_index = 1 + + with open(file_path, "r", encoding=encoding, newline="") as file: + reader = csv.DictReader(file) + for row in reader: + pairs = [f"{str(k)}: {str(v)}" for k, v in row.items()] + row_text = ", ".join(pairs) + row_texts.append(f"Row {row_index}:\n{row_text}\n") + row_index += 1 + + content = "\n".join(row_texts) + + storage_config = get_storage_config() + data_root_directory = storage_config["data_root_directory"] + storage = get_file_storage(data_root_directory) + + full_file_path = await storage.store(storage_file_name, content) + + return full_file_path diff --git a/cognee/infrastructure/loaders/core/text_loader.py b/cognee/infrastructure/loaders/core/text_loader.py index a6f94be9b..e478edb22 100644 --- a/cognee/infrastructure/loaders/core/text_loader.py +++ b/cognee/infrastructure/loaders/core/text_loader.py @@ -16,7 +16,7 @@ class TextLoader(LoaderInterface): @property def supported_extensions(self) -> List[str]: """Supported text file extensions.""" - return ["txt", "md", "csv", "json", "xml", "yaml", "yml", "log"] + return ["txt", "md", "json", "xml", "yaml", "yml", "log"] @property def supported_mime_types(self) -> List[str]: @@ -24,7 +24,6 @@ class TextLoader(LoaderInterface): return [ "text/plain", "text/markdown", - "text/csv", "application/json", "text/xml", "application/xml", diff --git a/cognee/infrastructure/loaders/supported_loaders.py b/cognee/infrastructure/loaders/supported_loaders.py index d103babe3..b506df5f3 100644 --- a/cognee/infrastructure/loaders/supported_loaders.py +++ b/cognee/infrastructure/loaders/supported_loaders.py @@ -1,5 +1,5 @@ from cognee.infrastructure.loaders.external import PyPdfLoader -from cognee.infrastructure.loaders.core import TextLoader, AudioLoader, ImageLoader +from cognee.infrastructure.loaders.core import TextLoader, AudioLoader, ImageLoader, CsvLoader # Registry for loader implementations supported_loaders = { @@ -7,6 +7,7 @@ supported_loaders = { TextLoader.loader_name: TextLoader, ImageLoader.loader_name: ImageLoader, AudioLoader.loader_name: AudioLoader, + CsvLoader.loader_name: CsvLoader, } # Try adding optional loaders diff --git a/cognee/modules/chunking/CsvChunker.py b/cognee/modules/chunking/CsvChunker.py new file mode 100644 index 000000000..4ba4a969e --- /dev/null +++ b/cognee/modules/chunking/CsvChunker.py @@ -0,0 +1,35 @@ +from cognee.shared.logging_utils import get_logger + + +from cognee.tasks.chunks import chunk_by_row +from cognee.modules.chunking.Chunker import Chunker +from .models.DocumentChunk import DocumentChunk + +logger = get_logger() + + +class CsvChunker(Chunker): + async def read(self): + async for content_text in self.get_text(): + if content_text is None: + continue + + for chunk_data in chunk_by_row(content_text, self.max_chunk_size): + if chunk_data["chunk_size"] <= self.max_chunk_size: + yield DocumentChunk( + id=chunk_data["chunk_id"], + text=chunk_data["text"], + chunk_size=chunk_data["chunk_size"], + is_part_of=self.document, + chunk_index=self.chunk_index, + cut_type=chunk_data["cut_type"], + contains=[], + metadata={ + "index_fields": ["text"], + }, + ) + self.chunk_index += 1 + else: + raise ValueError( + f"Chunk size is larger than the maximum chunk size {self.max_chunk_size}" + ) diff --git a/cognee/modules/data/processing/document_types/CsvDocument.py b/cognee/modules/data/processing/document_types/CsvDocument.py new file mode 100644 index 000000000..3381275bd --- /dev/null +++ b/cognee/modules/data/processing/document_types/CsvDocument.py @@ -0,0 +1,33 @@ +import io +import csv +from typing import Type + +from cognee.modules.chunking.Chunker import Chunker +from cognee.infrastructure.files.utils.open_data_file import open_data_file +from .Document import Document + + +class CsvDocument(Document): + type: str = "csv" + mime_type: str = "text/csv" + + async def read(self, chunker_cls: Type[Chunker], max_chunk_size: int): + async def get_text(): + async with open_data_file( + self.raw_data_location, mode="r", encoding="utf-8", newline="" + ) as file: + content = file.read() + file_like_obj = io.StringIO(content) + reader = csv.DictReader(file_like_obj) + + for row in reader: + pairs = [f"{str(k)}: {str(v)}" for k, v in row.items()] + row_text = ", ".join(pairs) + if not row_text.strip(): + break + yield row_text + + chunker = chunker_cls(self, max_chunk_size=max_chunk_size, get_text=get_text) + + async for chunk in chunker.read(): + yield chunk diff --git a/cognee/modules/data/processing/document_types/__init__.py b/cognee/modules/data/processing/document_types/__init__.py index 2e862f4ba..133dd53f8 100644 --- a/cognee/modules/data/processing/document_types/__init__.py +++ b/cognee/modules/data/processing/document_types/__init__.py @@ -4,3 +4,4 @@ from .TextDocument import TextDocument from .ImageDocument import ImageDocument from .AudioDocument import AudioDocument from .UnstructuredDocument import UnstructuredDocument +from .CsvDocument import CsvDocument diff --git a/cognee/tasks/chunks/__init__.py b/cognee/tasks/chunks/__init__.py index 22ce96be8..37d4de73e 100644 --- a/cognee/tasks/chunks/__init__.py +++ b/cognee/tasks/chunks/__init__.py @@ -1,4 +1,5 @@ from .chunk_by_word import chunk_by_word from .chunk_by_sentence import chunk_by_sentence from .chunk_by_paragraph import chunk_by_paragraph +from .chunk_by_row import chunk_by_row from .remove_disconnected_chunks import remove_disconnected_chunks diff --git a/cognee/tasks/chunks/chunk_by_row.py b/cognee/tasks/chunks/chunk_by_row.py new file mode 100644 index 000000000..8daf13689 --- /dev/null +++ b/cognee/tasks/chunks/chunk_by_row.py @@ -0,0 +1,94 @@ +from typing import Any, Dict, Iterator +from uuid import NAMESPACE_OID, uuid5 + +from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine + + +def _get_pair_size(pair_text: str) -> int: + """ + Calculate the size of a given text in terms of tokens. + + If an embedding engine's tokenizer is available, count the tokens for the provided word. + If the tokenizer is not available, assume the word counts as one token. + + Parameters: + ----------- + + - pair_text (str): The key:value pair text for which the token size is to be calculated. + + Returns: + -------- + + - int: The number of tokens representing the text, typically an integer, depending + on the tokenizer's output. + """ + embedding_engine = get_embedding_engine() + if embedding_engine.tokenizer: + return embedding_engine.tokenizer.count_tokens(pair_text) + else: + return 3 + + +def chunk_by_row( + data: str, + max_chunk_size, +) -> Iterator[Dict[str, Any]]: + """ + Chunk the input text by row while enabling exact text reconstruction. + + This function divides the given text data into smaller chunks on a line-by-line basis, + ensuring that the size of each chunk is less than or equal to the specified maximum + chunk size. It guarantees that when the generated chunks are concatenated, they + reproduce the original text accurately. The tokenization process is handled by + adapters compatible with the vector engine's embedding model. + + Parameters: + ----------- + + - data (str): The input text to be chunked. + - max_chunk_size: The maximum allowed size for each chunk, in terms of tokens or + words. + """ + current_chunk_list = [] + chunk_index = 0 + current_chunk_size = 0 + + lines = data.split("\n\n") + for line in lines: + pairs_text = line.split(", ") + + for pair_text in pairs_text: + pair_size = _get_pair_size(pair_text) + if current_chunk_size > 0 and (current_chunk_size + pair_size > max_chunk_size): + # Yield current cut chunk + current_chunk = ", ".join(current_chunk_list) + chunk_dict = { + "text": current_chunk, + "chunk_size": current_chunk_size, + "chunk_id": uuid5(NAMESPACE_OID, current_chunk), + "chunk_index": chunk_index, + "cut_type": "row_cut", + } + + yield chunk_dict + + # Start new chunk with current pair text + current_chunk_list = [] + current_chunk_size = 0 + chunk_index += 1 + + current_chunk_list.append(pair_text) + current_chunk_size += pair_size + + # Yield row chunk + current_chunk = ", ".join(current_chunk_list) + if current_chunk: + chunk_dict = { + "text": current_chunk, + "chunk_size": current_chunk_size, + "chunk_id": uuid5(NAMESPACE_OID, current_chunk), + "chunk_index": chunk_index, + "cut_type": "row_end", + } + + yield chunk_dict diff --git a/cognee/tasks/documents/classify_documents.py b/cognee/tasks/documents/classify_documents.py index 9fa512906..e4f13ebd1 100644 --- a/cognee/tasks/documents/classify_documents.py +++ b/cognee/tasks/documents/classify_documents.py @@ -7,6 +7,7 @@ from cognee.modules.data.processing.document_types import ( ImageDocument, TextDocument, UnstructuredDocument, + CsvDocument, ) from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.engine.utils.generate_node_id import generate_node_id @@ -15,6 +16,7 @@ from cognee.tasks.documents.exceptions import WrongDataDocumentInputError EXTENSION_TO_DOCUMENT_CLASS = { "pdf": PdfDocument, # Text documents "txt": TextDocument, + "csv": CsvDocument, "docx": UnstructuredDocument, "doc": UnstructuredDocument, "odt": UnstructuredDocument, diff --git a/cognee/tests/integration/documents/CsvDocument_test.py b/cognee/tests/integration/documents/CsvDocument_test.py new file mode 100644 index 000000000..421bb81bd --- /dev/null +++ b/cognee/tests/integration/documents/CsvDocument_test.py @@ -0,0 +1,70 @@ +import os +import sys +import uuid +import pytest +import pathlib +from unittest.mock import patch + +from cognee.modules.chunking.CsvChunker import CsvChunker +from cognee.modules.data.processing.document_types.CsvDocument import CsvDocument +from cognee.tests.integration.documents.AudioDocument_test import mock_get_embedding_engine +from cognee.tests.integration.documents.async_gen_zip import async_gen_zip + +chunk_by_row_module = sys.modules.get("cognee.tasks.chunks.chunk_by_row") + + +GROUND_TRUTH = { + "chunk_size_10": [ + {"token_count": 9, "len_text": 26, "cut_type": "row_cut", "chunk_index": 0}, + {"token_count": 6, "len_text": 29, "cut_type": "row_end", "chunk_index": 1}, + {"token_count": 9, "len_text": 25, "cut_type": "row_cut", "chunk_index": 2}, + {"token_count": 6, "len_text": 30, "cut_type": "row_end", "chunk_index": 3}, + ], + "chunk_size_128": [ + {"token_count": 15, "len_text": 57, "cut_type": "row_end", "chunk_index": 0}, + {"token_count": 15, "len_text": 57, "cut_type": "row_end", "chunk_index": 1}, + ], +} + + +@pytest.mark.parametrize( + "input_file,chunk_size", + [("example_with_header.csv", 10), ("example_with_header.csv", 128)], +) +@patch.object(chunk_by_row_module, "get_embedding_engine", side_effect=mock_get_embedding_engine) +@pytest.mark.asyncio +async def test_CsvDocument(mock_engine, input_file, chunk_size): + # Define file paths of test data + csv_file_path = os.path.join( + pathlib.Path(__file__).parent.parent.parent, + "test_data", + input_file, + ) + + # Define test documents + csv_document = CsvDocument( + id=uuid.uuid4(), + name="example_with_header.csv", + raw_data_location=csv_file_path, + external_metadata="", + mime_type="text/csv", + ) + + # TEST CSV + ground_truth_key = f"chunk_size_{chunk_size}" + async for ground_truth, row_data in async_gen_zip( + GROUND_TRUTH[ground_truth_key], + csv_document.read(chunker_cls=CsvChunker, max_chunk_size=chunk_size), + ): + assert ground_truth["token_count"] == row_data.chunk_size, ( + f'{ground_truth["token_count"] = } != {row_data.chunk_size = }' + ) + assert ground_truth["len_text"] == len(row_data.text), ( + f'{ground_truth["len_text"] = } != {len(row_data.text) = }' + ) + assert ground_truth["cut_type"] == row_data.cut_type, ( + f'{ground_truth["cut_type"] = } != {row_data.cut_type = }' + ) + assert ground_truth["chunk_index"] == row_data.chunk_index, ( + f'{ground_truth["chunk_index"] = } != {row_data.chunk_index = }' + ) diff --git a/cognee/tests/test_data/example_with_header.csv b/cognee/tests/test_data/example_with_header.csv new file mode 100644 index 000000000..dc900e5ef --- /dev/null +++ b/cognee/tests/test_data/example_with_header.csv @@ -0,0 +1,3 @@ +id,name,age,city,country +1,Eric,30,Beijing,China +2,Joe,35,Berlin,Germany diff --git a/cognee/tests/unit/processing/chunks/chunk_by_row_test.py b/cognee/tests/unit/processing/chunks/chunk_by_row_test.py new file mode 100644 index 000000000..7d6a73a06 --- /dev/null +++ b/cognee/tests/unit/processing/chunks/chunk_by_row_test.py @@ -0,0 +1,52 @@ +from itertools import product + +import numpy as np +import pytest + +from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine +from cognee.tasks.chunks import chunk_by_row + +INPUT_TEXTS = "name: John, age: 30, city: New York, country: USA" +max_chunk_size_vals = [8, 32] + + +@pytest.mark.parametrize( + "input_text,max_chunk_size", + list(product([INPUT_TEXTS], max_chunk_size_vals)), +) +def test_chunk_by_row_isomorphism(input_text, max_chunk_size): + chunks = chunk_by_row(input_text, max_chunk_size) + reconstructed_text = ", ".join([chunk["text"] for chunk in chunks]) + assert reconstructed_text == input_text, ( + f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }" + ) + + +@pytest.mark.parametrize( + "input_text,max_chunk_size", + list(product([INPUT_TEXTS], max_chunk_size_vals)), +) +def test_row_chunk_length(input_text, max_chunk_size): + chunks = list(chunk_by_row(data=input_text, max_chunk_size=max_chunk_size)) + embedding_engine = get_embedding_engine() + + chunk_lengths = np.array( + [embedding_engine.tokenizer.count_tokens(chunk["text"]) for chunk in chunks] + ) + + larger_chunks = chunk_lengths[chunk_lengths > max_chunk_size] + assert np.all(chunk_lengths <= max_chunk_size), ( + f"{max_chunk_size = }: {larger_chunks} are too large" + ) + + +@pytest.mark.parametrize( + "input_text,max_chunk_size", + list(product([INPUT_TEXTS], max_chunk_size_vals)), +) +def test_chunk_by_row_chunk_numbering(input_text, max_chunk_size): + chunks = chunk_by_row(data=input_text, max_chunk_size=max_chunk_size) + chunk_indices = np.array([chunk["chunk_index"] for chunk in chunks]) + assert np.all(chunk_indices == np.arange(len(chunk_indices))), ( + f"{chunk_indices = } are not monotonically increasing" + ) From 8566516ceca89a0e85db6aa5ba967f5d8070b2c7 Mon Sep 17 00:00:00 2001 From: EricXiao Date: Wed, 22 Oct 2025 16:59:07 +0800 Subject: [PATCH 014/284] chore: Remove local test code Signed-off-by: EricXiao --- .../loaders/external/advanced_pdf_loader.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cognee/infrastructure/loaders/external/advanced_pdf_loader.py b/cognee/infrastructure/loaders/external/advanced_pdf_loader.py index 6d1412b77..4b3ba296a 100644 --- a/cognee/infrastructure/loaders/external/advanced_pdf_loader.py +++ b/cognee/infrastructure/loaders/external/advanced_pdf_loader.py @@ -227,12 +227,3 @@ class AdvancedPdfLoader(LoaderInterface): if value is None: return "" return str(value).replace("\xa0", " ").strip() - - -if __name__ == "__main__": - loader = AdvancedPdfLoader() - asyncio.run( - loader.load( - "/Users/xiaotao/work/cognee/cognee/infrastructure/loaders/external/attention_is_all_you_need.pdf" - ) - ) From eb40945c6d7394ebb9e997b9bb19631411c2a3a1 Mon Sep 17 00:00:00 2001 From: vasilije Date: Sat, 25 Oct 2025 10:26:46 +0200 Subject: [PATCH 015/284] added logs --- cognee/api/client.py | 5 +++++ docker-compose.yml | 2 +- entrypoint.sh | 6 +++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cognee/api/client.py b/cognee/api/client.py index 6766c12de..19a607ff0 100644 --- a/cognee/api/client.py +++ b/cognee/api/client.py @@ -39,6 +39,8 @@ from cognee.api.v1.users.routers import ( ) from cognee.modules.users.methods.get_authenticated_user import REQUIRE_AUTHENTICATION +# Ensure application logging is configured for container stdout/stderr +setup_logging() logger = get_logger() if os.getenv("ENV", "prod") == "prod": @@ -74,6 +76,9 @@ async def lifespan(app: FastAPI): await get_default_user() + # Emit a clear startup message for docker logs + logger.info("Backend server has started") + yield diff --git a/docker-compose.yml b/docker-compose.yml index 43d9b2607..472f24c21 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,7 +13,7 @@ services: - DEBUG=false # Change to true if debugging - HOST=0.0.0.0 - ENVIRONMENT=local - - LOG_LEVEL=ERROR + - LOG_LEVEL=INFO extra_hosts: # Allows the container to reach your local machine using "host.docker.internal" instead of "localhost" - "host.docker.internal:host-gateway" diff --git a/entrypoint.sh b/entrypoint.sh index bad9b7aa3..496825408 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -43,10 +43,10 @@ sleep 2 if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then if [ "$DEBUG" = "true" ]; then echo "Waiting for the debugger to attach..." - debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level debug --reload cognee.api.client:app + exec debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level debug --reload --access-logfile - --error-logfile - cognee.api.client:app else - gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level debug --reload cognee.api.client:app + exec gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level debug --reload --access-logfile - --error-logfile - cognee.api.client:app fi else - gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level error cognee.api.client:app + exec gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:$HTTP_PORT --log-level error --access-logfile - --error-logfile - cognee.api.client:app fi From 813ee9483691db21efce1cddf88107f7f36b1b88 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Mon, 27 Oct 2025 08:12:37 +0100 Subject: [PATCH 016/284] Initial commit, still wip --- cognee/context_global_variables.py | 27 ++++++++++++++----- .../databases/graph/get_graph_engine.py | 1 + .../utils/get_or_create_dataset_database.py | 21 ++++++++++++++- .../databases/vector/create_vector_engine.py | 2 ++ .../modules/users/models/DatasetDatabase.py | 7 +++++ 5 files changed, 51 insertions(+), 7 deletions(-) diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index d52de4b4e..9a4f49763 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -57,19 +57,34 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ # Set vector and graph database configuration based on dataset database information vector_config = { - "vector_db_url": os.path.join( - databases_directory_path, dataset_database.vector_database_name - ), - "vector_db_key": "", - "vector_db_provider": "lancedb", + "vector_db_provider": dataset_database.vector_database_provider, + "vector_db_url": dataset_database.vector_database_url, + # TODO: Maybe add key to dataset_database, and put it here?? + "vector_db_key": "" } + # vector_config = { + # "vector_db_url": os.path.join( + # databases_directory_path, dataset_database.vector_database_name + # ), + # "vector_db_key": "", + # "vector_db_provider": "lancedb", + # } + graph_config = { - "graph_database_provider": "kuzu", + "graph_database_provider": dataset_database.graph_database_provider, + "graph_database_url": dataset_database.graph_database_url, + "graph_database_name": dataset_database.graph_database_name, "graph_file_path": os.path.join( databases_directory_path, dataset_database.graph_database_name ), } + # graph_config = { + # "graph_database_provider": "kuzu", + # "graph_file_path": os.path.join( + # databases_directory_path, dataset_database.graph_database_name + # ), + # } storage_config = { "data_root_directory": data_root_directory, diff --git a/cognee/infrastructure/databases/graph/get_graph_engine.py b/cognee/infrastructure/databases/graph/get_graph_engine.py index 1ea61d29f..217f63070 100644 --- a/cognee/infrastructure/databases/graph/get_graph_engine.py +++ b/cognee/infrastructure/databases/graph/get_graph_engine.py @@ -69,6 +69,7 @@ def create_graph_engine( graph_database_url=graph_database_url, graph_database_username=graph_database_username, graph_database_password=graph_database_password, + graph_name=graph_database_name, ) if graph_database_provider == "neo4j": diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index 29156025d..2b9b00569 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -6,11 +6,20 @@ from sqlalchemy.exc import IntegrityError from cognee.modules.data.methods import create_dataset from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.infrastructure.databases.vector import get_vectordb_config +from cognee.infrastructure.databases.graph.config import get_graph_config from cognee.modules.data.methods import get_unique_dataset_id from cognee.modules.users.models import DatasetDatabase from cognee.modules.users.models import User +# TODO: Find a better place to define these +default_vector_db_name = "lance.db" +default_vector_db_provider = "lancedb" +default_graph_db_provider = "kuzu" +default_vector_db_url = None +default_graph_db_url = None + async def get_or_create_dataset_database( dataset: Union[str, UUID], user: User, @@ -32,9 +41,12 @@ async def get_or_create_dataset_database( dataset_id = await get_unique_dataset_id(dataset, user) - vector_db_name = f"{dataset_id}.lance.db" + vector_db_name = f"{dataset_id}.db" graph_db_name = f"{dataset_id}.pkl" + vector_config = get_vectordb_config() + graph_config = get_graph_config() + async with db_engine.get_async_session() as session: # Create dataset if it doesn't exist if isinstance(dataset, str): @@ -49,12 +61,19 @@ async def get_or_create_dataset_database( if existing: return existing + # TODO: Set the vector and graph database stuff (name, provider, etc.) based on the whether or + # TODO: not we support multi user for that db. If not, set to default, which is lance and/or kuzu. + # If there are no existing rows build a new row record = DatasetDatabase( owner_id=user.id, dataset_id=dataset_id, vector_database_name=vector_db_name, graph_database_name=graph_db_name, + vector_database_provider=vector_config.vector_db_provider, + graph_database_provider=graph_config.graph_database_provider, + vector_database_url=vector_config.vector_db_url, + graph_database_url=graph_config.graph_database_url, ) try: diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index 639bbb9f6..7e3fb367f 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -1,5 +1,6 @@ from .supported_databases import supported_databases from .embeddings import get_embedding_engine +from cognee.infrastructure.databases.graph.config import get_graph_config from functools import lru_cache @@ -45,6 +46,7 @@ def create_vector_engine( url=vector_db_url, api_key=vector_db_key, embedding_engine=embedding_engine, + graph_name=get_graph_config().graph_database_name ) if vector_db_provider == "pgvector": diff --git a/cognee/modules/users/models/DatasetDatabase.py b/cognee/modules/users/models/DatasetDatabase.py index 0d71d8413..3d3899f4c 100644 --- a/cognee/modules/users/models/DatasetDatabase.py +++ b/cognee/modules/users/models/DatasetDatabase.py @@ -12,8 +12,15 @@ class DatasetDatabase(Base): UUID, ForeignKey("datasets.id", ondelete="CASCADE"), primary_key=True, index=True ) + # TODO: Why is this unique? Isn't it fact that two or more datasets can have the same vector and graph store? vector_database_name = Column(String, unique=True, nullable=False) graph_database_name = Column(String, unique=True, nullable=False) + vector_database_provider = Column(String, unique=True, nullable=False) + graph_database_provider = Column(String, unique=True, nullable=False) + + vector_database_url = Column(String, unique=True, nullable=True) + graph_database_url = Column(String, unique=True, nullable=True) + created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) From 897fbd2f09abfc1c3c5cc30fc2fcf17ed549ae80 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Mon, 27 Oct 2025 15:42:09 +0100 Subject: [PATCH 017/284] load test now uses s3 bucket --- cognee/tests/test_load.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/cognee/tests/test_load.py b/cognee/tests/test_load.py index 09e2db084..f8d007d28 100644 --- a/cognee/tests/test_load.py +++ b/cognee/tests/test_load.py @@ -17,7 +17,9 @@ async def process_and_search(num_of_searches): await asyncio.gather( *[ - cognee.search(query_text="Tell me about AI", query_type=SearchType.GRAPH_COMPLETION) + cognee.search( + query_text="Tell me about the document", query_type=SearchType.GRAPH_COMPLETION + ) for _ in range(num_of_searches) ] ) @@ -28,9 +30,6 @@ async def process_and_search(num_of_searches): async def main(): - file_path = os.path.join( - pathlib.Path(__file__).resolve().parent, "test_data/artificial-intelligence.pdf" - ) data_directory_path = str( pathlib.Path( os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load") @@ -52,8 +51,8 @@ async def main(): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) - for i in range(num_of_pdfs): - await cognee.add(file_path, dataset_name=f"dataset_{i}") + s3_input = "s3://cognee-load-test-s3-bucket" + await cognee.add(s3_input) recorded_times = await asyncio.gather( *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)] From 2b083dd0f110e44341d30a6228abb18591cfabac Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Tue, 28 Oct 2025 09:27:33 +0100 Subject: [PATCH 018/284] small changes to load test --- cognee/tests/test_load.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cognee/tests/test_load.py b/cognee/tests/test_load.py index f8d007d28..a09ce053d 100644 --- a/cognee/tests/test_load.py +++ b/cognee/tests/test_load.py @@ -48,15 +48,15 @@ async def main(): upper_boundary_minutes = 10 average_minutes = 8 - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) + recorded_times = [] + for _ in range(num_of_reps): + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) - s3_input = "s3://cognee-load-test-s3-bucket" - await cognee.add(s3_input) + s3_input = "s3://cognee-test-load-s3-bucket" + await cognee.add(s3_input) - recorded_times = await asyncio.gather( - *[process_and_search(num_of_pdfs) for _ in range(num_of_reps)] - ) + recorded_times.append(await process_and_search(num_of_pdfs)) average_recorded_time = sum(recorded_times) / len(recorded_times) From bbcd8baf3a0b0b6ddd6cac94e12977c301ab0cd5 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Tue, 28 Oct 2025 17:56:32 +0100 Subject: [PATCH 019/284] feature: add multi-user for Falkor db --- .../infrastructure/databases/graph/config.py | 4 +++ .../databases/graph/get_graph_engine.py | 1 + .../utils/get_or_create_dataset_database.py | 35 +++++++++++++++---- .../databases/vector/create_vector_engine.py | 4 +-- .../modules/users/models/DatasetDatabase.py | 12 ++++--- 5 files changed, 42 insertions(+), 14 deletions(-) diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py index b7907313c..23687b359 100644 --- a/cognee/infrastructure/databases/graph/config.py +++ b/cognee/infrastructure/databases/graph/config.py @@ -26,6 +26,7 @@ class GraphConfig(BaseSettings): - graph_database_username - graph_database_password - graph_database_port + - graph_database_key - graph_file_path - graph_model - graph_topology @@ -41,6 +42,7 @@ class GraphConfig(BaseSettings): graph_database_username: str = "" graph_database_password: str = "" graph_database_port: int = 123 + graph_database_key: str = "" graph_file_path: str = "" graph_filename: str = "" graph_model: object = KnowledgeGraph @@ -90,6 +92,7 @@ class GraphConfig(BaseSettings): "graph_database_username": self.graph_database_username, "graph_database_password": self.graph_database_password, "graph_database_port": self.graph_database_port, + "graph_database_key": self.graph_database_key, "graph_file_path": self.graph_file_path, "graph_model": self.graph_model, "graph_topology": self.graph_topology, @@ -116,6 +119,7 @@ class GraphConfig(BaseSettings): "graph_database_username": self.graph_database_username, "graph_database_password": self.graph_database_password, "graph_database_port": self.graph_database_port, + "graph_database_key": self.graph_database_key, "graph_file_path": self.graph_file_path, } diff --git a/cognee/infrastructure/databases/graph/get_graph_engine.py b/cognee/infrastructure/databases/graph/get_graph_engine.py index 217f63070..70c27aab3 100644 --- a/cognee/infrastructure/databases/graph/get_graph_engine.py +++ b/cognee/infrastructure/databases/graph/get_graph_engine.py @@ -33,6 +33,7 @@ def create_graph_engine( graph_database_username="", graph_database_password="", graph_database_port="", + graph_database_key="", ): """ Create a graph engine based on the specified provider type. diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index 2b9b00569..0af94fd3a 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -14,11 +14,14 @@ from cognee.modules.users.models import User # TODO: Find a better place to define these -default_vector_db_name = "lance.db" default_vector_db_provider = "lancedb" default_graph_db_provider = "kuzu" default_vector_db_url = None default_graph_db_url = None +default_vector_db_key = None +default_graph_db_key = None +vector_dbs_with_multi_user_support = ["lancedb", "falkor"] +graph_dbs_with_multi_user_support = ["kuzu", "falkor"] async def get_or_create_dataset_database( dataset: Union[str, UUID], @@ -61,8 +64,24 @@ async def get_or_create_dataset_database( if existing: return existing - # TODO: Set the vector and graph database stuff (name, provider, etc.) based on the whether or - # TODO: not we support multi user for that db. If not, set to default, which is lance and/or kuzu. + # Check if we support multi-user for this provider. If not, use default + if graph_config.graph_database_provider in graph_dbs_with_multi_user_support: + graph_provider = graph_config.graph_database_provider + graph_url = graph_config.graph_database_url + graph_key = graph_config.graph_database_key + else: + graph_provider = default_graph_db_provider + graph_url = default_graph_db_url + graph_key = default_graph_db_key + + if vector_config.vector_db_provider in vector_dbs_with_multi_user_support: + vector_provider = vector_config.vector_db_provider + vector_url = vector_config.vector_db_url + vector_key = vector_config.vector_db_key + else: + vector_provider = default_vector_db_provider + vector_url = default_vector_db_url + vector_key = default_vector_db_key # If there are no existing rows build a new row record = DatasetDatabase( @@ -70,10 +89,12 @@ async def get_or_create_dataset_database( dataset_id=dataset_id, vector_database_name=vector_db_name, graph_database_name=graph_db_name, - vector_database_provider=vector_config.vector_db_provider, - graph_database_provider=graph_config.graph_database_provider, - vector_database_url=vector_config.vector_db_url, - graph_database_url=graph_config.graph_database_url, + vector_database_provider=vector_provider, + graph_database_provider=graph_provider, + vector_database_url=vector_url, + graph_database_url=graph_url, + vector_database_key=vector_key, + graph_database_key=graph_key, ) try: diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index 7e3fb367f..35bbc110a 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -1,6 +1,6 @@ from .supported_databases import supported_databases from .embeddings import get_embedding_engine -from cognee.infrastructure.databases.graph.config import get_graph_config +from cognee.infrastructure.databases.graph.config import get_graph_context_config from functools import lru_cache @@ -46,7 +46,7 @@ def create_vector_engine( url=vector_db_url, api_key=vector_db_key, embedding_engine=embedding_engine, - graph_name=get_graph_config().graph_database_name + graph_name=get_graph_context_config()["graph_database_name"], ) if vector_db_provider == "pgvector": diff --git a/cognee/modules/users/models/DatasetDatabase.py b/cognee/modules/users/models/DatasetDatabase.py index 3d3899f4c..25d610ab9 100644 --- a/cognee/modules/users/models/DatasetDatabase.py +++ b/cognee/modules/users/models/DatasetDatabase.py @@ -12,15 +12,17 @@ class DatasetDatabase(Base): UUID, ForeignKey("datasets.id", ondelete="CASCADE"), primary_key=True, index=True ) - # TODO: Why is this unique? Isn't it fact that two or more datasets can have the same vector and graph store? vector_database_name = Column(String, unique=True, nullable=False) graph_database_name = Column(String, unique=True, nullable=False) - vector_database_provider = Column(String, unique=True, nullable=False) - graph_database_provider = Column(String, unique=True, nullable=False) + vector_database_provider = Column(String, unique=False, nullable=False) + graph_database_provider = Column(String, unique=False, nullable=False) - vector_database_url = Column(String, unique=True, nullable=True) - graph_database_url = Column(String, unique=True, nullable=True) + vector_database_url = Column(String, unique=False, nullable=True) + graph_database_url = Column(String, unique=False, nullable=True) + + vector_database_key = Column(String, unique=False, nullable=True) + graph_database_key = Column(String, unique=False, nullable=True) created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) From 9c9395851c4084b0240ad328cab077e04c4bdcce Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Tue, 28 Oct 2025 18:01:32 +0100 Subject: [PATCH 020/284] chore: ruff formatting --- cognee/context_global_variables.py | 2 +- .../databases/utils/get_or_create_dataset_database.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index 9a4f49763..ee2e37030 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -60,7 +60,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ "vector_db_provider": dataset_database.vector_database_provider, "vector_db_url": dataset_database.vector_database_url, # TODO: Maybe add key to dataset_database, and put it here?? - "vector_db_key": "" + "vector_db_key": "", } # vector_config = { diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index 0af94fd3a..1552a7bbc 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -23,6 +23,7 @@ default_graph_db_key = None vector_dbs_with_multi_user_support = ["lancedb", "falkor"] graph_dbs_with_multi_user_support = ["kuzu", "falkor"] + async def get_or_create_dataset_database( dataset: Union[str, UUID], user: User, From c3f0cb95da2d61dd9523079c1977a67dd031c7c6 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Tue, 28 Oct 2025 18:06:04 +0100 Subject: [PATCH 021/284] fix: delete unnecessary comments, add to config --- cognee/context_global_variables.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index ee2e37030..09a351f15 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -59,32 +59,19 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ vector_config = { "vector_db_provider": dataset_database.vector_database_provider, "vector_db_url": dataset_database.vector_database_url, - # TODO: Maybe add key to dataset_database, and put it here?? - "vector_db_key": "", + "vector_db_key": dataset_database.vector_database_key, + "vector_db_name": dataset_database.vector_database_name, } - # vector_config = { - # "vector_db_url": os.path.join( - # databases_directory_path, dataset_database.vector_database_name - # ), - # "vector_db_key": "", - # "vector_db_provider": "lancedb", - # } - graph_config = { "graph_database_provider": dataset_database.graph_database_provider, "graph_database_url": dataset_database.graph_database_url, "graph_database_name": dataset_database.graph_database_name, + "graph_database_key": dataset_database.graph_database_key, "graph_file_path": os.path.join( databases_directory_path, dataset_database.graph_database_name ), } - # graph_config = { - # "graph_database_provider": "kuzu", - # "graph_file_path": os.path.join( - # databases_directory_path, dataset_database.graph_database_name - # ), - # } storage_config = { "data_root_directory": data_root_directory, From fb7e74eaa8d8bdae50021e0a24e7e6bf6bfc2a09 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 29 Oct 2025 16:28:09 +0100 Subject: [PATCH 022/284] refactor: Enable multi user mode by default if graph and vector db providers support it --- .env.template | 5 ++- cognee/context_global_variables.py | 43 ++++++++++++++++++- cognee/modules/search/methods/search.py | 6 +-- .../users/methods/get_authenticated_user.py | 3 +- .../relational_database_migration_example.py | 3 ++ logs/.gitkeep | 0 logs/README.md | 31 ------------- 7 files changed, 53 insertions(+), 38 deletions(-) delete mode 100644 logs/.gitkeep delete mode 100644 logs/README.md diff --git a/.env.template b/.env.template index 89ac06830..8e1bdd23f 100644 --- a/.env.template +++ b/.env.template @@ -169,8 +169,9 @@ REQUIRE_AUTHENTICATION=False # Vector: LanceDB # Graph: KuzuDB # -# It enforces LanceDB and KuzuDB use and uses them to create databases per Cognee user + dataset -ENABLE_BACKEND_ACCESS_CONTROL=False +# It enforces creation of databases per Cognee user + dataset. Does not work with some graph and database providers. +# Disable mode when using not supported graph/vector databases. +ENABLE_BACKEND_ACCESS_CONTROL=True ################################################################################ # ☁️ Cloud Sync Settings diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index d52de4b4e..8ad855724 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -4,6 +4,8 @@ from typing import Union from uuid import UUID from cognee.base_config import get_base_config +from cognee.infrastructure.databases.vector.config import get_vectordb_context_config +from cognee.infrastructure.databases.graph.config import get_graph_context_config from cognee.infrastructure.databases.utils import get_or_create_dataset_database from cognee.infrastructure.files.storage.config import file_storage_config from cognee.modules.users.methods import get_user @@ -14,11 +16,50 @@ vector_db_config = ContextVar("vector_db_config", default=None) graph_db_config = ContextVar("graph_db_config", default=None) session_user = ContextVar("session_user", default=None) +vector_dbs_with_multi_user_support = ["lancedb"] +graph_dbs_with_multi_user_support = ["kuzu"] + async def set_session_user_context_variable(user): session_user.set(user) +def check_multi_user_support(): + graph_db_config = get_graph_context_config() + vector_db_config = get_vectordb_context_config() + if ( + graph_db_config["graph_database_provider"] in graph_dbs_with_multi_user_support + and vector_db_config["vector_db_provider"] in vector_dbs_with_multi_user_support + ): + return True + else: + return False + + +def check_backend_access_control_mode(): + backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None) + if backend_access_control is None: + # If backend access control is not defined in environment variables, + # enable it by default if graph and vector DBs can support it, otherwise disable it + multi_user_support = check_multi_user_support() + if multi_user_support: + return "true" + else: + return "false" + elif backend_access_control.lower() == "true": + # If enabled, ensure that the current graph and vector DBs can support it + multi_user_support = check_multi_user_support() + if not multi_user_support: + raise EnvironmentError( + "ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control." + ) + else: + return "true" + else: + # If explicitly disabled, return false + return "false" + + async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID): """ If backend access control is enabled this function will ensure all datasets have their own databases, @@ -40,7 +81,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ base_config = get_base_config() - if not os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true": + if not check_backend_access_control_mode() == "true": return user = await get_user(user_id) diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index aab004924..e3d7c220e 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -1,4 +1,3 @@ -import os import json import asyncio from uuid import UUID @@ -9,6 +8,7 @@ from cognee.infrastructure.databases.graph import get_graph_engine from cognee.shared.logging_utils import get_logger from cognee.shared.utils import send_telemetry from cognee.context_global_variables import set_database_global_context_variables +from cognee.context_global_variables import check_backend_access_control_mode from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge @@ -74,7 +74,7 @@ async def search( ) # Use search function filtered by permissions if access control is enabled - if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true": + if check_backend_access_control_mode() == "true": search_results = await authorized_search( query_type=query_type, query_text=query_text, @@ -156,7 +156,7 @@ async def search( ) else: # This is for maintaining backwards compatibility - if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true": + if check_backend_access_control_mode() == "true": return_value = [] for search_result in search_results: prepared_search_results = await prepare_search_result(search_result) diff --git a/cognee/modules/users/methods/get_authenticated_user.py b/cognee/modules/users/methods/get_authenticated_user.py index d78215892..34d82586e 100644 --- a/cognee/modules/users/methods/get_authenticated_user.py +++ b/cognee/modules/users/methods/get_authenticated_user.py @@ -5,6 +5,7 @@ from ..models import User from ..get_fastapi_users import get_fastapi_users from .get_default_user import get_default_user from cognee.shared.logging_utils import get_logger +from cognee.context_global_variables import check_backend_access_control_mode logger = get_logger("get_authenticated_user") @@ -12,7 +13,7 @@ logger = get_logger("get_authenticated_user") # Check environment variable to determine authentication requirement REQUIRE_AUTHENTICATION = ( os.getenv("REQUIRE_AUTHENTICATION", "false").lower() == "true" - or os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true" + or check_backend_access_control_mode() == "true" ) fastapi_users = get_fastapi_users() diff --git a/examples/python/relational_database_migration_example.py b/examples/python/relational_database_migration_example.py index 7e87347bc..98482cb4b 100644 --- a/examples/python/relational_database_migration_example.py +++ b/examples/python/relational_database_migration_example.py @@ -31,6 +31,9 @@ from cognee.infrastructure.databases.vector.pgvector import ( async def main(): + # Disable backend access control to migrate relational data + os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false" + # Clean all data stored in Cognee await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) diff --git a/logs/.gitkeep b/logs/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/logs/README.md b/logs/README.md deleted file mode 100644 index 96ef613b5..000000000 --- a/logs/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# Logs Directory - -This directory contains the application logs for Cognee. - -## Log Files - -- Log files are named by date in the format `YYYY-MM-DD_HH-MM-SS.log` -- Logs are stored in plain text format with a consistent structure -- Each log entry includes: - - Timestamp (ISO format) - - Log level (padded to consistent width) - - Message - - Additional context (if any) - - Logger name (in square brackets) -- Exception tracebacks are included for error logs - -## Sample Log Entry - -``` -2025-03-27T13:05:27.481446Z [INFO ] Structured log message user_id=user123 action=login status=success [TestLogger] -``` - -## Retention Policy - -The system automatically keeps only the 10 most recent log files. Older log files are automatically deleted when new log files are created. This prevents excessive disk usage in long-running deployments. - -## Usage - -Logs are automatically generated by the application's logging mechanism. No manual actions are required to use this feature. - -The logs directory structure is preserved in version control, but the log files themselves are gitignored. From 70f3ced15af7f4f778de94769fa5afa6405d1772 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 29 Oct 2025 16:30:13 +0100 Subject: [PATCH 023/284] fix: PR comment fixes --- .../databases/graph/get_graph_engine.py | 2 +- .../databases/utils/constants.py | 4 ++ .../utils/get_or_create_dataset_database.py | 70 +++++++++---------- .../infrastructure/databases/vector/config.py | 3 + .../databases/vector/create_vector_engine.py | 4 +- 5 files changed, 44 insertions(+), 39 deletions(-) create mode 100644 cognee/infrastructure/databases/utils/constants.py diff --git a/cognee/infrastructure/databases/graph/get_graph_engine.py b/cognee/infrastructure/databases/graph/get_graph_engine.py index 70c27aab3..82e3cad6e 100644 --- a/cognee/infrastructure/databases/graph/get_graph_engine.py +++ b/cognee/infrastructure/databases/graph/get_graph_engine.py @@ -70,7 +70,7 @@ def create_graph_engine( graph_database_url=graph_database_url, graph_database_username=graph_database_username, graph_database_password=graph_database_password, - graph_name=graph_database_name, + database_name=graph_database_name, ) if graph_database_provider == "neo4j": diff --git a/cognee/infrastructure/databases/utils/constants.py b/cognee/infrastructure/databases/utils/constants.py new file mode 100644 index 000000000..fe6390a07 --- /dev/null +++ b/cognee/infrastructure/databases/utils/constants.py @@ -0,0 +1,4 @@ +VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"] +GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor"] + +HYBRID_DBS = ["falkor"] diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index 1552a7bbc..deea46541 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -11,17 +11,11 @@ from cognee.infrastructure.databases.graph.config import get_graph_config from cognee.modules.data.methods import get_unique_dataset_id from cognee.modules.users.models import DatasetDatabase from cognee.modules.users.models import User - - -# TODO: Find a better place to define these -default_vector_db_provider = "lancedb" -default_graph_db_provider = "kuzu" -default_vector_db_url = None -default_graph_db_url = None -default_vector_db_key = None -default_graph_db_key = None -vector_dbs_with_multi_user_support = ["lancedb", "falkor"] -graph_dbs_with_multi_user_support = ["kuzu", "falkor"] +from .constants import ( + GRAPH_DBS_WITH_MULTI_USER_SUPPORT, + VECTOR_DBS_WITH_MULTI_USER_SUPPORT, + HYBRID_DBS, +) async def get_or_create_dataset_database( @@ -45,12 +39,19 @@ async def get_or_create_dataset_database( dataset_id = await get_unique_dataset_id(dataset, user) - vector_db_name = f"{dataset_id}.db" - graph_db_name = f"{dataset_id}.pkl" - vector_config = get_vectordb_config() graph_config = get_graph_config() + graph_db_name = f"{dataset_id}.pkl" + + if graph_config.graph_database_provider in HYBRID_DBS: + vector_db_name = graph_db_name + else: + if vector_config.vector_database_provider == "lancedb": + vector_db_name = f"{dataset_id}.lance.db" + else: + vector_db_name = f"{dataset_id}.db" + async with db_engine.get_async_session() as session: # Create dataset if it doesn't exist if isinstance(dataset, str): @@ -66,23 +67,18 @@ async def get_or_create_dataset_database( return existing # Check if we support multi-user for this provider. If not, use default - if graph_config.graph_database_provider in graph_dbs_with_multi_user_support: - graph_provider = graph_config.graph_database_provider - graph_url = graph_config.graph_database_url - graph_key = graph_config.graph_database_key - else: - graph_provider = default_graph_db_provider - graph_url = default_graph_db_url - graph_key = default_graph_db_key - - if vector_config.vector_db_provider in vector_dbs_with_multi_user_support: - vector_provider = vector_config.vector_db_provider - vector_url = vector_config.vector_db_url - vector_key = vector_config.vector_db_key - else: - vector_provider = default_vector_db_provider - vector_url = default_vector_db_url - vector_key = default_vector_db_key + if graph_config.graph_database_provider not in GRAPH_DBS_WITH_MULTI_USER_SUPPORT: + raise EnvironmentError( + f"Multi-user is currently not supported for the graph database provider: {graph_config.graph_database_provider}. " + f"Supported providers are: {', '.join(GRAPH_DBS_WITH_MULTI_USER_SUPPORT)}. Either use one of these" + f"providers, or disable BACKEND_ACCESS_CONTROL" + ) + if vector_config.vector_db_provider not in VECTOR_DBS_WITH_MULTI_USER_SUPPORT: + raise EnvironmentError( + f"Multi-user is currently not supported for the vector database provider: {vector_config.vector_db_provider}. " + f"Supported providers are: {', '.join(VECTOR_DBS_WITH_MULTI_USER_SUPPORT)}. Either use one of these" + f"providers, or disable BACKEND_ACCESS_CONTROL" + ) # If there are no existing rows build a new row record = DatasetDatabase( @@ -90,12 +86,12 @@ async def get_or_create_dataset_database( dataset_id=dataset_id, vector_database_name=vector_db_name, graph_database_name=graph_db_name, - vector_database_provider=vector_provider, - graph_database_provider=graph_provider, - vector_database_url=vector_url, - graph_database_url=graph_url, - vector_database_key=vector_key, - graph_database_key=graph_key, + vector_database_provider=vector_config.vector_db_provider, + graph_database_provider=graph_config.graph_database_provider, + vector_database_url=vector_config.vector_db_url, + graph_database_url=graph_config.graph_database_url, + vector_database_key=vector_config.vector_db_key, + graph_database_key=graph_config.graph_database_key, ) try: diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py index b6d3ae644..7d28f1668 100644 --- a/cognee/infrastructure/databases/vector/config.py +++ b/cognee/infrastructure/databases/vector/config.py @@ -18,12 +18,14 @@ class VectorConfig(BaseSettings): Instance variables: - vector_db_url: The URL of the vector database. - vector_db_port: The port for the vector database. + - vector_db_name: The name of the vector database. - vector_db_key: The key for accessing the vector database. - vector_db_provider: The provider for the vector database. """ vector_db_url: str = "" vector_db_port: int = 1234 + vector_db_name: str = "" vector_db_key: str = "" vector_db_provider: str = "lancedb" @@ -58,6 +60,7 @@ class VectorConfig(BaseSettings): return { "vector_db_url": self.vector_db_url, "vector_db_port": self.vector_db_port, + "vector_db_name": self.vector_db_name, "vector_db_key": self.vector_db_key, "vector_db_provider": self.vector_db_provider, } diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index 35bbc110a..3fe926978 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -9,6 +9,7 @@ from functools import lru_cache def create_vector_engine( vector_db_provider: str, vector_db_url: str, + vector_db_name: str, vector_db_port: str = "", vector_db_key: str = "", ): @@ -28,6 +29,7 @@ def create_vector_engine( - vector_db_url (str): The URL for the vector database instance. - vector_db_port (str): The port for the vector database instance. Required for some providers. + - vector_db_name (str): The name of the vector database instance. - vector_db_key (str): The API key or access token for the vector database instance. - vector_db_provider (str): The name of the vector database provider to use (e.g., 'pgvector'). @@ -46,7 +48,7 @@ def create_vector_engine( url=vector_db_url, api_key=vector_db_key, embedding_engine=embedding_engine, - graph_name=get_graph_context_config()["graph_database_name"], + database_name=vector_db_name, ) if vector_db_provider == "pgvector": From 6a7660a7c10892657422307b90788d0d6f80b8ab Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 29 Oct 2025 16:31:42 +0100 Subject: [PATCH 024/284] refactor: Return logs folder --- logs/.gitkeep | 0 logs/README.md | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 logs/.gitkeep create mode 100644 logs/README.md diff --git a/logs/.gitkeep b/logs/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/logs/README.md b/logs/README.md new file mode 100644 index 000000000..96ef613b5 --- /dev/null +++ b/logs/README.md @@ -0,0 +1,31 @@ +# Logs Directory + +This directory contains the application logs for Cognee. + +## Log Files + +- Log files are named by date in the format `YYYY-MM-DD_HH-MM-SS.log` +- Logs are stored in plain text format with a consistent structure +- Each log entry includes: + - Timestamp (ISO format) + - Log level (padded to consistent width) + - Message + - Additional context (if any) + - Logger name (in square brackets) +- Exception tracebacks are included for error logs + +## Sample Log Entry + +``` +2025-03-27T13:05:27.481446Z [INFO ] Structured log message user_id=user123 action=login status=success [TestLogger] +``` + +## Retention Policy + +The system automatically keeps only the 10 most recent log files. Older log files are automatically deleted when new log files are created. This prevents excessive disk usage in long-running deployments. + +## Usage + +Logs are automatically generated by the application's logging mechanism. No manual actions are required to use this feature. + +The logs directory structure is preserved in version control, but the log files themselves are gitignored. From 6572cf5cb9bcd7bc3906dc9149a22759069e79b2 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 29 Oct 2025 16:35:44 +0100 Subject: [PATCH 025/284] refactor: use boolean instead of string --- cognee/context_global_variables.py | 10 +++++----- cognee/modules/search/methods/search.py | 4 ++-- cognee/modules/users/methods/get_authenticated_user.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index 8ad855724..b4b848192 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -43,9 +43,9 @@ def check_backend_access_control_mode(): # enable it by default if graph and vector DBs can support it, otherwise disable it multi_user_support = check_multi_user_support() if multi_user_support: - return "true" + return True else: - return "false" + return False elif backend_access_control.lower() == "true": # If enabled, ensure that the current graph and vector DBs can support it multi_user_support = check_multi_user_support() @@ -54,10 +54,10 @@ def check_backend_access_control_mode(): "ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control." ) else: - return "true" + return True else: # If explicitly disabled, return false - return "false" + return False async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID): @@ -81,7 +81,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ base_config = get_base_config() - if not check_backend_access_control_mode() == "true": + if not check_backend_access_control_mode(): return user = await get_user(user_id) diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index e3d7c220e..4a67093e8 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -74,7 +74,7 @@ async def search( ) # Use search function filtered by permissions if access control is enabled - if check_backend_access_control_mode() == "true": + if check_backend_access_control_mode(): search_results = await authorized_search( query_type=query_type, query_text=query_text, @@ -156,7 +156,7 @@ async def search( ) else: # This is for maintaining backwards compatibility - if check_backend_access_control_mode() == "true": + if check_backend_access_control_mode(): return_value = [] for search_result in search_results: prepared_search_results = await prepare_search_result(search_result) diff --git a/cognee/modules/users/methods/get_authenticated_user.py b/cognee/modules/users/methods/get_authenticated_user.py index 34d82586e..3cc16f3a8 100644 --- a/cognee/modules/users/methods/get_authenticated_user.py +++ b/cognee/modules/users/methods/get_authenticated_user.py @@ -13,7 +13,7 @@ logger = get_logger("get_authenticated_user") # Check environment variable to determine authentication requirement REQUIRE_AUTHENTICATION = ( os.getenv("REQUIRE_AUTHENTICATION", "false").lower() == "true" - or check_backend_access_control_mode() == "true" + or check_backend_access_control_mode() ) fastapi_users = get_fastapi_users() From d1581e9ebab143930acf1cec404dda083fb06a9f Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 29 Oct 2025 17:36:56 +0100 Subject: [PATCH 026/284] refactor: disable permissions for code graph example --- examples/python/code_graph_example.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/examples/python/code_graph_example.py b/examples/python/code_graph_example.py index 431069050..1b476a2c3 100644 --- a/examples/python/code_graph_example.py +++ b/examples/python/code_graph_example.py @@ -1,5 +1,7 @@ import argparse import asyncio +import os + import cognee from cognee import SearchType from cognee.shared.logging_utils import setup_logging, ERROR @@ -8,6 +10,9 @@ from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline async def main(repo_path, include_docs): + # Disable permissions feature for this example + os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false" + run_status = False async for run_status in run_code_graph_pipeline(repo_path, include_docs=include_docs): run_status = run_status From eec96e4f1fb30e692b6e448aa9b1e553c0fead98 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 29 Oct 2025 19:14:53 +0100 Subject: [PATCH 027/284] refactor: fix search result for library test --- cognee/tests/test_library.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cognee/tests/test_library.py b/cognee/tests/test_library.py index 81f81ee61..893b836c0 100755 --- a/cognee/tests/test_library.py +++ b/cognee/tests/test_library.py @@ -90,15 +90,17 @@ async def main(): ) search_results = await cognee.search( - query_type=SearchType.GRAPH_COMPLETION, query_text="What information do you contain?" + query_type=SearchType.GRAPH_COMPLETION, + query_text="What information do you contain?", + dataset_ids=[pipeline_run_obj.dataset_id], ) - assert "Mark" in search_results[0], ( + assert "Mark" in search_results[0]["search_result"][0], ( "Failed to update document, no mention of Mark in search results" ) - assert "Cindy" in search_results[0], ( + assert "Cindy" in search_results[0]["search_result"][0], ( "Failed to update document, no mention of Cindy in search results" ) - assert "Artificial intelligence" not in search_results[0], ( + assert "Artificial intelligence" not in search_results[0]["search_result"][0], ( "Failed to update document, Artificial intelligence still mentioned in search results" ) From e5df629ff32d84a240b7a6473b3add1b95f91691 Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Thu, 30 Oct 2025 12:51:43 +0300 Subject: [PATCH 028/284] CI: Extract Windows and MacOS tests to separate job --- .../test_different_operating_systems.yml | 16 ++++++++----- .github/workflows/test_suites.yml | 23 +++++++++++++++---- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test_different_operating_systems.yml b/.github/workflows/test_different_operating_systems.yml index 64f1a14f9..53745ebee 100644 --- a/.github/workflows/test_different_operating_systems.yml +++ b/.github/workflows/test_different_operating_systems.yml @@ -10,6 +10,10 @@ on: required: false type: string default: '["3.10.x", "3.12.x", "3.13.x"]' + os: + required: false + type: string + default: '["ubuntu-22.04", "macos-15", "windows-latest"]' secrets: LLM_PROVIDER: required: true @@ -43,7 +47,7 @@ jobs: strategy: matrix: python-version: ${{ fromJSON(inputs.python-versions) }} - os: [ubuntu-22.04, macos-15, windows-latest] + os: ${{ fromJSON(inputs.os) }} fail-fast: false steps: - name: Check out @@ -79,7 +83,7 @@ jobs: strategy: matrix: python-version: ${{ fromJSON(inputs.python-versions) }} - os: [ ubuntu-22.04, macos-15, windows-latest ] + os: ${{ fromJSON(inputs.os) }} fail-fast: false steps: - name: Check out @@ -115,7 +119,7 @@ jobs: strategy: matrix: python-version: ${{ fromJSON(inputs.python-versions) }} - os: [ ubuntu-22.04, macos-15, windows-latest ] + os: ${{ fromJSON(inputs.os) }} fail-fast: false steps: - name: Check out @@ -151,7 +155,7 @@ jobs: strategy: matrix: python-version: ${{ fromJSON(inputs.python-versions) }} - os: [ ubuntu-22.04, macos-15, windows-latest ] + os: ${{ fromJSON(inputs.os) }} fail-fast: false steps: - name: Check out @@ -180,7 +184,7 @@ jobs: strategy: matrix: python-version: ${{ fromJSON(inputs.python-versions) }} - os: [ ubuntu-22.04, macos-15, windows-latest ] + os: ${{ fromJSON(inputs.os) }} fail-fast: false steps: - name: Check out @@ -217,7 +221,7 @@ jobs: strategy: matrix: python-version: ${{ fromJSON(inputs.python-versions) }} - os: [ ubuntu-22.04, macos-15, windows-latest ] + os: ${{ fromJSON(inputs.os) }} fail-fast: false steps: - name: Check out diff --git a/.github/workflows/test_suites.yml b/.github/workflows/test_suites.yml index 5c1597a93..43099c03b 100644 --- a/.github/workflows/test_suites.yml +++ b/.github/workflows/test_suites.yml @@ -80,14 +80,24 @@ jobs: uses: ./.github/workflows/notebooks_tests.yml secrets: inherit - different-operating-systems-tests: - name: Operating System and Python Tests + different-os-tests-basic: + name: OS and Python Tests Ubuntu needs: [basic-tests, e2e-tests] uses: ./.github/workflows/test_different_operating_systems.yml with: python-versions: '["3.10.x", "3.11.x", "3.12.x", "3.13.x"]' + os: '["ubuntu-22.04"]' secrets: inherit + different-os-tests-extended: + name: OS and Python Tests Extended + needs: [basic-tests, e2e-tests] + uses: ./.github/workflows/test_different_operating_systems.yml + with: + python-versions: '["3.13.x"]' + os: '["macos-15", "windows-latest"]' + secrets: inherit + # Matrix-based vector database tests vector-db-tests: name: Vector DB Tests @@ -135,7 +145,8 @@ jobs: e2e-tests, graph-db-tests, notebook-tests, - different-operating-systems-tests, + different-os-tests-basic, + different-os-tests-extended, vector-db-tests, example-tests, llm-tests, @@ -155,7 +166,8 @@ jobs: cli-tests, graph-db-tests, notebook-tests, - different-operating-systems-tests, + different-os-tests-basic, + different-os-tests-extended, vector-db-tests, example-tests, db-examples-tests, @@ -176,7 +188,8 @@ jobs: "${{ needs.cli-tests.result }}" == "success" && "${{ needs.graph-db-tests.result }}" == "success" && "${{ needs.notebook-tests.result }}" == "success" && - "${{ needs.different-operating-systems-tests.result }}" == "success" && + "${{ needs.different-os-tests-basic.result }}" == "success" && + "${{ needs.different-os-tests-extended.result }}" == "success" && "${{ needs.vector-db-tests.result }}" == "success" && "${{ needs.example-tests.result }}" == "success" && "${{ needs.db-examples-tests.result }}" == "success" && From b30b52921a41132980f77f140e19519d0de23f4a Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Thu, 30 Oct 2025 13:08:09 +0300 Subject: [PATCH 029/284] Remove trailing whitespaces --- .github/workflows/test_different_operating_systems.yml | 4 ++-- .github/workflows/test_suites.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_different_operating_systems.yml b/.github/workflows/test_different_operating_systems.yml index 53745ebee..b6b3f7b3c 100644 --- a/.github/workflows/test_different_operating_systems.yml +++ b/.github/workflows/test_different_operating_systems.yml @@ -13,7 +13,7 @@ on: os: required: false type: string - default: '["ubuntu-22.04", "macos-15", "windows-latest"]' + default: '["ubuntu-22.04", "macos-15", "windows-latest"]' secrets: LLM_PROVIDER: required: true @@ -119,7 +119,7 @@ jobs: strategy: matrix: python-version: ${{ fromJSON(inputs.python-versions) }} - os: ${{ fromJSON(inputs.os) }} + os: ${{ fromJSON(inputs.os) }} fail-fast: false steps: - name: Check out diff --git a/.github/workflows/test_suites.yml b/.github/workflows/test_suites.yml index 43099c03b..44cfb03cf 100644 --- a/.github/workflows/test_suites.yml +++ b/.github/workflows/test_suites.yml @@ -96,7 +96,7 @@ jobs: with: python-versions: '["3.13.x"]' os: '["macos-15", "windows-latest"]' - secrets: inherit + secrets: inherit # Matrix-based vector database tests vector-db-tests: From 6223ecf05ba662f08b7d560196f43df248114adb Mon Sep 17 00:00:00 2001 From: lxobr <122801072+lxobr@users.noreply.github.com> Date: Thu, 30 Oct 2025 13:56:06 +0100 Subject: [PATCH 030/284] feat: optimize repeated entity extraction (#1682) ## Description - Added an `edge_text` field to edges that auto-fills from `relationship_type` if not provided. - Containts edges now store descriptions for better embedding - Updated and refactored indexing so that edge_text gets embedded and exposed - Updated retrieval to use the new embeddings - Added a test to verify edge_text exists in the graph with the correct format. ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [x] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [x] Code refactoring - [x] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [x] **I have tested my changes thoroughly before submitting this PR** - [x] **This PR contains minimal changes necessary to address the issue/feature** - [x] My code follows the project's coding standards and style guidelines - [x] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [x] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- cognee/infrastructure/engine/models/Edge.py | 14 ++- .../modules/chunking/models/DocumentChunk.py | 3 +- .../modules/graph/cognee_graph/CogneeGraph.py | 6 +- .../utils/expand_with_nodes_and_edges.py | 21 +++- .../graph/utils/resolve_edges_to_text.py | 119 +++++++++--------- .../utils/brute_force_triplet_search.py | 2 +- cognee/tasks/storage/index_data_points.py | 55 ++++---- cognee/tasks/storage/index_graph_edges.py | 94 ++++++-------- cognee/tests/test_edge_ingestion.py | 27 ++++ .../databases/test_index_data_points.py | 27 ++++ .../databases/test_index_graph_edges.py | 30 +++-- 11 files changed, 236 insertions(+), 162 deletions(-) create mode 100644 cognee/tests/unit/infrastructure/databases/test_index_data_points.py diff --git a/cognee/infrastructure/engine/models/Edge.py b/cognee/infrastructure/engine/models/Edge.py index 5ad9c84dd..59f01a9ab 100644 --- a/cognee/infrastructure/engine/models/Edge.py +++ b/cognee/infrastructure/engine/models/Edge.py @@ -1,4 +1,4 @@ -from pydantic import BaseModel +from pydantic import BaseModel, field_validator from typing import Optional, Any, Dict @@ -18,9 +18,21 @@ class Edge(BaseModel): # Mixed usage has_items: (Edge(weight=0.5, weights={"confidence": 0.9}), list[Item]) + + # With edge_text for rich embedding representation + contains: (Edge(relationship_type="contains", edge_text="relationship_name: contains; entity_description: Alice"), Entity) """ weight: Optional[float] = None weights: Optional[Dict[str, float]] = None relationship_type: Optional[str] = None properties: Optional[Dict[str, Any]] = None + edge_text: Optional[str] = None + + @field_validator("edge_text", mode="before") + @classmethod + def ensure_edge_text(cls, v, info): + """Auto-populate edge_text from relationship_type if not explicitly provided.""" + if v is None and info.data.get("relationship_type"): + return info.data["relationship_type"] + return v diff --git a/cognee/modules/chunking/models/DocumentChunk.py b/cognee/modules/chunking/models/DocumentChunk.py index 9f8c57486..e024bf00b 100644 --- a/cognee/modules/chunking/models/DocumentChunk.py +++ b/cognee/modules/chunking/models/DocumentChunk.py @@ -1,6 +1,7 @@ from typing import List, Union from cognee.infrastructure.engine import DataPoint +from cognee.infrastructure.engine.models.Edge import Edge from cognee.modules.data.processing.document_types import Document from cognee.modules.engine.models import Entity from cognee.tasks.temporal_graph.models import Event @@ -31,6 +32,6 @@ class DocumentChunk(DataPoint): chunk_index: int cut_type: str is_part_of: Document - contains: List[Union[Entity, Event]] = None + contains: List[Union[Entity, Event, tuple[Edge, Entity]]] = None metadata: dict = {"index_fields": ["text"]} diff --git a/cognee/modules/graph/cognee_graph/CogneeGraph.py b/cognee/modules/graph/cognee_graph/CogneeGraph.py index 9703928f0..cb7562422 100644 --- a/cognee/modules/graph/cognee_graph/CogneeGraph.py +++ b/cognee/modules/graph/cognee_graph/CogneeGraph.py @@ -171,8 +171,10 @@ class CogneeGraph(CogneeAbstractGraph): embedding_map = {result.payload["text"]: result.score for result in edge_distances} for edge in self.edges: - relationship_type = edge.attributes.get("relationship_type") - distance = embedding_map.get(relationship_type, None) + edge_key = edge.attributes.get("edge_text") or edge.attributes.get( + "relationship_type" + ) + distance = embedding_map.get(edge_key, None) if distance is not None: edge.attributes["vector_distance"] = distance diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py index 3b01f5af4..c68eb494d 100644 --- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py +++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py @@ -1,5 +1,6 @@ from typing import Optional +from cognee.infrastructure.engine.models.Edge import Edge from cognee.modules.chunking.models import DocumentChunk from cognee.modules.engine.models import Entity, EntityType from cognee.modules.engine.utils import ( @@ -243,10 +244,26 @@ def _process_graph_nodes( ontology_relationships, ) - # Add entity to data chunk if data_chunk.contains is None: data_chunk.contains = [] - data_chunk.contains.append(entity_node) + + edge_text = "; ".join( + [ + "relationship_name: contains", + f"entity_name: {entity_node.name}", + f"entity_description: {entity_node.description}", + ] + ) + + data_chunk.contains.append( + ( + Edge( + relationship_type="contains", + edge_text=edge_text, + ), + entity_node, + ) + ) def _process_graph_edges( diff --git a/cognee/modules/graph/utils/resolve_edges_to_text.py b/cognee/modules/graph/utils/resolve_edges_to_text.py index eb5bedd2c..5deb13ba8 100644 --- a/cognee/modules/graph/utils/resolve_edges_to_text.py +++ b/cognee/modules/graph/utils/resolve_edges_to_text.py @@ -1,71 +1,70 @@ +import string from typing import List +from collections import Counter + from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge +from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS + + +def _get_top_n_frequent_words( + text: str, stop_words: set = None, top_n: int = 3, separator: str = ", " +) -> str: + """Concatenates the top N frequent words in text.""" + if stop_words is None: + stop_words = DEFAULT_STOP_WORDS + + words = [word.lower().strip(string.punctuation) for word in text.split()] + words = [word for word in words if word and word not in stop_words] + + top_words = [word for word, freq in Counter(words).most_common(top_n)] + return separator.join(top_words) + + +def _create_title_from_text(text: str, first_n_words: int = 7, top_n_words: int = 3) -> str: + """Creates a title by combining first words with most frequent words from the text.""" + first_words = text.split()[:first_n_words] + top_words = _get_top_n_frequent_words(text, top_n=top_n_words) + return f"{' '.join(first_words)}... [{top_words}]" + + +def _extract_nodes_from_edges(retrieved_edges: List[Edge]) -> dict: + """Creates a dictionary of nodes with their names and content.""" + nodes = {} + + for edge in retrieved_edges: + for node in (edge.node1, edge.node2): + if node.id in nodes: + continue + + text = node.attributes.get("text") + if text: + name = _create_title_from_text(text) + content = text + else: + name = node.attributes.get("name", "Unnamed Node") + content = node.attributes.get("description", name) + + nodes[node.id] = {"node": node, "name": name, "content": content} + + return nodes async def resolve_edges_to_text(retrieved_edges: List[Edge]) -> str: - """ - Converts retrieved graph edges into a human-readable string format. + """Converts retrieved graph edges into a human-readable string format.""" + nodes = _extract_nodes_from_edges(retrieved_edges) - Parameters: - ----------- - - - retrieved_edges (list): A list of edges retrieved from the graph. - - Returns: - -------- - - - str: A formatted string representation of the nodes and their connections. - """ - - def _get_nodes(retrieved_edges: List[Edge]) -> dict: - def _get_title(text: str, first_n_words: int = 7, top_n_words: int = 3) -> str: - def _top_n_words(text, stop_words=None, top_n=3, separator=", "): - """Concatenates the top N frequent words in text.""" - if stop_words is None: - from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS - - stop_words = DEFAULT_STOP_WORDS - - import string - - words = [word.lower().strip(string.punctuation) for word in text.split()] - - if stop_words: - words = [word for word in words if word and word not in stop_words] - - from collections import Counter - - top_words = [word for word, freq in Counter(words).most_common(top_n)] - - return separator.join(top_words) - - """Creates a title, by combining first words with most frequent words from the text.""" - first_words = text.split()[:first_n_words] - top_words = _top_n_words(text, top_n=first_n_words) - return f"{' '.join(first_words)}... [{top_words}]" - - """Creates a dictionary of nodes with their names and content.""" - nodes = {} - for edge in retrieved_edges: - for node in (edge.node1, edge.node2): - if node.id not in nodes: - text = node.attributes.get("text") - if text: - name = _get_title(text) - content = text - else: - name = node.attributes.get("name", "Unnamed Node") - content = node.attributes.get("description", name) - nodes[node.id] = {"node": node, "name": name, "content": content} - return nodes - - nodes = _get_nodes(retrieved_edges) node_section = "\n".join( f"Node: {info['name']}\n__node_content_start__\n{info['content']}\n__node_content_end__\n" for info in nodes.values() ) - connection_section = "\n".join( - f"{nodes[edge.node1.id]['name']} --[{edge.attributes['relationship_type']}]--> {nodes[edge.node2.id]['name']}" - for edge in retrieved_edges - ) + + connections = [] + for edge in retrieved_edges: + source_name = nodes[edge.node1.id]["name"] + target_name = nodes[edge.node2.id]["name"] + edge_label = edge.attributes.get("edge_text") or edge.attributes.get("relationship_type") + connections.append(f"{source_name} --[{edge_label}]--> {target_name}") + + connection_section = "\n".join(connections) + return f"Nodes:\n{node_section}\n\nConnections:\n{connection_section}" diff --git a/cognee/modules/retrieval/utils/brute_force_triplet_search.py b/cognee/modules/retrieval/utils/brute_force_triplet_search.py index 1ef7545c2..f8bdbb97d 100644 --- a/cognee/modules/retrieval/utils/brute_force_triplet_search.py +++ b/cognee/modules/retrieval/utils/brute_force_triplet_search.py @@ -71,7 +71,7 @@ async def get_memory_fragment( await memory_fragment.project_graph_from_db( graph_engine, node_properties_to_project=properties_to_project, - edge_properties_to_project=["relationship_name"], + edge_properties_to_project=["relationship_name", "edge_text"], node_type=node_type, node_name=node_name, ) diff --git a/cognee/tasks/storage/index_data_points.py b/cognee/tasks/storage/index_data_points.py index 902789c80..b0ec3a5b4 100644 --- a/cognee/tasks/storage/index_data_points.py +++ b/cognee/tasks/storage/index_data_points.py @@ -8,47 +8,58 @@ logger = get_logger("index_data_points") async def index_data_points(data_points: list[DataPoint]): - created_indexes = {} - index_points = {} + """Index data points in the vector engine by creating embeddings for specified fields. + + Process: + 1. Groups data points into a nested dict: {type_name: {field_name: [points]}} + 2. Creates vector indexes for each (type, field) combination on first encounter + 3. Batches points per (type, field) and creates async indexing tasks + 4. Executes all indexing tasks in parallel for efficient embedding generation + + Args: + data_points: List of DataPoint objects to index. Each DataPoint's metadata must + contain an 'index_fields' list specifying which fields to embed. + + Returns: + The original data_points list. + """ + data_points_by_type = {} vector_engine = get_vector_engine() for data_point in data_points: data_point_type = type(data_point) + type_name = data_point_type.__name__ for field_name in data_point.metadata["index_fields"]: if getattr(data_point, field_name, None) is None: continue - index_name = f"{data_point_type.__name__}_{field_name}" + if type_name not in data_points_by_type: + data_points_by_type[type_name] = {} - if index_name not in created_indexes: - await vector_engine.create_vector_index(data_point_type.__name__, field_name) - created_indexes[index_name] = True - - if index_name not in index_points: - index_points[index_name] = [] + if field_name not in data_points_by_type[type_name]: + await vector_engine.create_vector_index(type_name, field_name) + data_points_by_type[type_name][field_name] = [] indexed_data_point = data_point.model_copy() indexed_data_point.metadata["index_fields"] = [field_name] - index_points[index_name].append(indexed_data_point) + data_points_by_type[type_name][field_name].append(indexed_data_point) - tasks: list[asyncio.Task] = [] batch_size = vector_engine.embedding_engine.get_batch_size() - for index_name_and_field, points in index_points.items(): - first = index_name_and_field.index("_") - index_name = index_name_and_field[:first] - field_name = index_name_and_field[first + 1 :] + batches = ( + (type_name, field_name, points[i : i + batch_size]) + for type_name, fields in data_points_by_type.items() + for field_name, points in fields.items() + for i in range(0, len(points), batch_size) + ) - # Create embedding requests per batch to run in parallel later - for i in range(0, len(points), batch_size): - batch = points[i : i + batch_size] - tasks.append( - asyncio.create_task(vector_engine.index_data_points(index_name, field_name, batch)) - ) + tasks = [ + asyncio.create_task(vector_engine.index_data_points(type_name, field_name, batch_points)) + for type_name, field_name, batch_points in batches + ] - # Run all embedding requests in parallel await asyncio.gather(*tasks) return data_points diff --git a/cognee/tasks/storage/index_graph_edges.py b/cognee/tasks/storage/index_graph_edges.py index 4fa8cfc75..03b5a25a5 100644 --- a/cognee/tasks/storage/index_graph_edges.py +++ b/cognee/tasks/storage/index_graph_edges.py @@ -1,17 +1,44 @@ -import asyncio +from collections import Counter +from typing import Optional, Dict, Any, List, Tuple, Union from cognee.modules.engine.utils.generate_edge_id import generate_edge_id from cognee.shared.logging_utils import get_logger -from collections import Counter -from typing import Optional, Dict, Any, List, Tuple, Union -from cognee.infrastructure.databases.vector import get_vector_engine from cognee.infrastructure.databases.graph import get_graph_engine from cognee.modules.graph.models.EdgeType import EdgeType from cognee.infrastructure.databases.graph.graph_db_interface import EdgeData +from cognee.tasks.storage.index_data_points import index_data_points logger = get_logger() +def _get_edge_text(item: dict) -> str: + """Extract edge text for embedding - prefers edge_text field with fallback.""" + if "edge_text" in item: + return item["edge_text"] + + if "relationship_name" in item: + return item["relationship_name"] + + return "" + + +def create_edge_type_datapoints(edges_data) -> list[EdgeType]: + """Transform raw edge data into EdgeType datapoints.""" + edge_texts = [ + _get_edge_text(item) + for edge in edges_data + for item in edge + if isinstance(item, dict) and "relationship_name" in item + ] + + edge_types = Counter(edge_texts) + + return [ + EdgeType(id=generate_edge_id(edge_id=text), relationship_name=text, number_of_edges=count) + for text, count in edge_types.items() + ] + + async def index_graph_edges( edges_data: Union[List[EdgeData], List[Tuple[str, str, str, Optional[Dict[str, Any]]]]] = None, ): @@ -23,24 +50,17 @@ async def index_graph_edges( the `relationship_name` field. Steps: - 1. Initialize the vector engine and graph engine. - 2. Retrieve graph edge data and count relationship types (`relationship_name`). - 3. Create vector indexes for `relationship_name` if they don't exist. - 4. Transform the counted relationships into `EdgeType` objects. - 5. Index the transformed data points in the vector engine. + 1. Initialize the graph engine if needed and retrieve edge data. + 2. Transform edge data into EdgeType datapoints. + 3. Index the EdgeType datapoints using the standard indexing function. Raises: - RuntimeError: If initialization of the vector engine or graph engine fails. + RuntimeError: If initialization of the graph engine fails. Returns: None """ try: - created_indexes = {} - index_points = {} - - vector_engine = get_vector_engine() - if edges_data is None: graph_engine = await get_graph_engine() _, edges_data = await graph_engine.get_graph_data() @@ -51,47 +71,7 @@ async def index_graph_edges( logger.error("Failed to initialize engines: %s", e) raise RuntimeError("Initialization error") from e - edge_types = Counter( - item.get("relationship_name") - for edge in edges_data - for item in edge - if isinstance(item, dict) and "relationship_name" in item - ) - - for text, count in edge_types.items(): - edge = EdgeType( - id=generate_edge_id(edge_id=text), relationship_name=text, number_of_edges=count - ) - data_point_type = type(edge) - - for field_name in edge.metadata["index_fields"]: - index_name = f"{data_point_type.__name__}.{field_name}" - - if index_name not in created_indexes: - await vector_engine.create_vector_index(data_point_type.__name__, field_name) - created_indexes[index_name] = True - - if index_name not in index_points: - index_points[index_name] = [] - - indexed_data_point = edge.model_copy() - indexed_data_point.metadata["index_fields"] = [field_name] - index_points[index_name].append(indexed_data_point) - - # Get maximum batch size for embedding model - batch_size = vector_engine.embedding_engine.get_batch_size() - tasks: list[asyncio.Task] = [] - - for index_name, indexable_points in index_points.items(): - index_name, field_name = index_name.split(".") - - # Create embedding tasks to run in parallel later - for start in range(0, len(indexable_points), batch_size): - batch = indexable_points[start : start + batch_size] - - tasks.append(vector_engine.index_data_points(index_name, field_name, batch)) - - # Start all embedding tasks and wait for completion - await asyncio.gather(*tasks) + edge_type_datapoints = create_edge_type_datapoints(edges_data) + await index_data_points(edge_type_datapoints) return None diff --git a/cognee/tests/test_edge_ingestion.py b/cognee/tests/test_edge_ingestion.py index 5b23f7819..0d1407fab 100755 --- a/cognee/tests/test_edge_ingestion.py +++ b/cognee/tests/test_edge_ingestion.py @@ -52,6 +52,33 @@ async def test_edge_ingestion(): edge_type_counts = Counter(edge_type[2] for edge_type in graph[1]) + "Tests edge_text presence and format" + contains_edges = [edge for edge in graph[1] if edge[2] == "contains"] + assert len(contains_edges) > 0, "Expected at least one contains edge for edge_text verification" + + edge_properties = contains_edges[0][3] + assert "edge_text" in edge_properties, "Expected edge_text in edge properties" + + edge_text = edge_properties["edge_text"] + assert "relationship_name: contains" in edge_text, ( + f"Expected 'relationship_name: contains' in edge_text, got: {edge_text}" + ) + assert "entity_name:" in edge_text, f"Expected 'entity_name:' in edge_text, got: {edge_text}" + assert "entity_description:" in edge_text, ( + f"Expected 'entity_description:' in edge_text, got: {edge_text}" + ) + + all_edge_texts = [ + edge[3].get("edge_text", "") for edge in contains_edges if "edge_text" in edge[3] + ] + expected_entities = ["dave", "ana", "bob", "dexter", "apples", "cognee"] + found_entity = any( + any(entity in text.lower() for entity in expected_entities) for text in all_edge_texts + ) + assert found_entity, ( + f"Expected to find at least one entity name in edge_text: {all_edge_texts[:3]}" + ) + "Tests the presence of basic nested edges" for basic_nested_edge in basic_nested_edges: assert edge_type_counts.get(basic_nested_edge, 0) >= 1, ( diff --git a/cognee/tests/unit/infrastructure/databases/test_index_data_points.py b/cognee/tests/unit/infrastructure/databases/test_index_data_points.py new file mode 100644 index 000000000..21a5695de --- /dev/null +++ b/cognee/tests/unit/infrastructure/databases/test_index_data_points.py @@ -0,0 +1,27 @@ +import pytest +from unittest.mock import AsyncMock, patch, MagicMock +from cognee.tasks.storage.index_data_points import index_data_points +from cognee.infrastructure.engine import DataPoint + + +class TestDataPoint(DataPoint): + name: str + metadata: dict = {"index_fields": ["name"]} + + +@pytest.mark.asyncio +async def test_index_data_points_calls_vector_engine(): + """Test that index_data_points creates vector index and indexes data.""" + data_points = [TestDataPoint(name="test1")] + + mock_vector_engine = AsyncMock() + mock_vector_engine.embedding_engine.get_batch_size = MagicMock(return_value=100) + + with patch.dict( + index_data_points.__globals__, + {"get_vector_engine": lambda: mock_vector_engine}, + ): + await index_data_points(data_points) + + assert mock_vector_engine.create_vector_index.await_count >= 1 + assert mock_vector_engine.index_data_points.await_count >= 1 diff --git a/cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py b/cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py index 48bbc53e3..cee0896c2 100644 --- a/cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +++ b/cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py @@ -5,8 +5,7 @@ from cognee.tasks.storage.index_graph_edges import index_graph_edges @pytest.mark.asyncio async def test_index_graph_edges_success(): - """Test that index_graph_edges uses the index datapoints and creates vector index.""" - # Create the mocks for the graph and vector engines. + """Test that index_graph_edges retrieves edges and delegates to index_data_points.""" mock_graph_engine = AsyncMock() mock_graph_engine.get_graph_data.return_value = ( None, @@ -15,26 +14,23 @@ async def test_index_graph_edges_success(): [{"relationship_name": "rel2"}], ], ) - mock_vector_engine = AsyncMock() - mock_vector_engine.embedding_engine.get_batch_size = MagicMock(return_value=100) + mock_index_data_points = AsyncMock() - # Patch the globals of the function so that when it does: - # vector_engine = get_vector_engine() - # graph_engine = await get_graph_engine() - # it uses the mocked versions. with patch.dict( index_graph_edges.__globals__, { "get_graph_engine": AsyncMock(return_value=mock_graph_engine), - "get_vector_engine": lambda: mock_vector_engine, + "index_data_points": mock_index_data_points, }, ): await index_graph_edges() - # Assertions on the mock calls. mock_graph_engine.get_graph_data.assert_awaited_once() - assert mock_vector_engine.create_vector_index.await_count == 1 - assert mock_vector_engine.index_data_points.await_count == 1 + mock_index_data_points.assert_awaited_once() + + call_args = mock_index_data_points.call_args[0][0] + assert len(call_args) == 2 + assert all(hasattr(item, "relationship_name") for item in call_args) @pytest.mark.asyncio @@ -42,20 +38,22 @@ async def test_index_graph_edges_no_relationships(): """Test that index_graph_edges handles empty relationships correctly.""" mock_graph_engine = AsyncMock() mock_graph_engine.get_graph_data.return_value = (None, []) - mock_vector_engine = AsyncMock() + mock_index_data_points = AsyncMock() with patch.dict( index_graph_edges.__globals__, { "get_graph_engine": AsyncMock(return_value=mock_graph_engine), - "get_vector_engine": lambda: mock_vector_engine, + "index_data_points": mock_index_data_points, }, ): await index_graph_edges() mock_graph_engine.get_graph_data.assert_awaited_once() - mock_vector_engine.create_vector_index.assert_not_awaited() - mock_vector_engine.index_data_points.assert_not_awaited() + mock_index_data_points.assert_awaited_once() + + call_args = mock_index_data_points.call_args[0][0] + assert len(call_args) == 0 @pytest.mark.asyncio From 908d32912766331476e0159f96950a30bfda1ef0 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 30 Oct 2025 15:15:41 +0100 Subject: [PATCH 031/284] feat: add alembic migrations --- ..._expand_dataset_database_for_multi_user.py | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py diff --git a/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py b/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py new file mode 100644 index 000000000..cd19d09c8 --- /dev/null +++ b/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py @@ -0,0 +1,122 @@ +"""Expand dataset database for multi user + +Revision ID: 76625596c5c3 +Revises: 211ab850ef3d +Create Date: 2025-10-30 12:55:20.239562 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = "76625596c5c3" +down_revision: Union[str, None] = "211ab850ef3d" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _get_column(inspector, table, name, schema=None): + for col in inspector.get_columns(table, schema=schema): + if col["name"] == name: + return col + return None + + +def upgrade() -> None: + conn = op.get_bind() + insp = sa.inspect(conn) + + data = sa.table( + "dataset_database", + sa.Column("dataset_id", sa.UUID, primary_key=True, index=True), # Critical for SQLite + sa.Column("owner_id", sa.UUID, index=True), + sa.Column("vector_database_name", sa.String(), unique=True, nullable=False), + sa.Column("graph_database_name", sa.String(), unique=True, nullable=False), + sa.Column("vector_database_provider", sa.String(), unique=False, nullable=False), + sa.Column("graph_database_provider", sa.String(), unique=False, nullable=False), + sa.Column("vector_database_url", sa.String(), unique=False, nullable=True), + sa.Column("graph_database_url", sa.String(), unique=False, nullable=True), + sa.Column("vector_database_key", sa.String(), unique=False, nullable=True), + sa.Column("graph_database_key", sa.String(), unique=False, nullable=True), + sa.Column("created_at", sa.DateTime(timezone=True)), + sa.Column("updated_at", sa.DateTime(timezone=True)), + ) + + vector_database_provider_column = _get_column( + insp, "dataset_database", "vector_database_provider" + ) + if not vector_database_provider_column: + op.add_column( + "dataset_database", + sa.Column("vector_database_provider", sa.String(), unique=False, nullable=False), + ) + if op.get_context().dialect.name == "sqlite": + with op.batch_alter_table("dataset_database") as batch_op: + batch_op.execute( + data.update().values( + vector_database_provider="lancedb", + ) + ) + else: + conn = op.get_bind() + conn.execute(data.update().values(vector_database_provider="lancedb")) + + graph_database_provider_column = _get_column( + insp, "dataset_database", "graph_database_provider" + ) + if not graph_database_provider_column: + op.add_column( + "dataset_database", + sa.Column("graph_database_provider", sa.String(), unique=False, nullable=False), + ) + if op.get_context().dialect.name == "sqlite": + with op.batch_alter_table("dataset_database") as batch_op: + batch_op.execute( + data.update().values( + graph_database_provider="kuzu", + ) + ) + else: + conn = op.get_bind() + conn.execute(data.update().values(graph_database_provider="kuzu")) + + vector_database_url_column = _get_column(insp, "dataset_database", "vector_database_url") + if not vector_database_url_column: + op.add_column( + "dataset_database", + sa.Column("vector_database_url", sa.String(), unique=False, nullable=True), + ) + + graph_database_url_column = _get_column(insp, "dataset_database", "graph_database_url") + if not graph_database_url_column: + op.add_column( + "dataset_database", + sa.Column("graph_database_url", sa.String(), unique=False, nullable=True), + ) + + vector_database_key_column = _get_column(insp, "dataset_database", "vector_database_key") + if not vector_database_key_column: + op.add_column( + "dataset_database", + sa.Column("vector_database_key", sa.String(), unique=False, nullable=True), + ) + + graph_database_key_column = _get_column(insp, "dataset_database", "graph_database_key") + if not graph_database_key_column: + op.add_column( + "dataset_database", + sa.Column("graph_database_key", sa.String(), unique=False, nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("dataset_database", "vector_database_provider") + op.drop_column("dataset_database", "graph_database_provider") + op.drop_column("dataset_database", "vector_database_url") + op.drop_column("dataset_database", "graph_database_url") + op.drop_column("dataset_database", "vector_database_key") + op.drop_column("dataset_database", "graph_database_key") From ce925615fe843dac238aca74c4b81615fa6beb65 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 30 Oct 2025 15:32:27 +0100 Subject: [PATCH 032/284] fix: fix small naming error --- .../databases/utils/get_or_create_dataset_database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index deea46541..a4e50f665 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -47,7 +47,7 @@ async def get_or_create_dataset_database( if graph_config.graph_database_provider in HYBRID_DBS: vector_db_name = graph_db_name else: - if vector_config.vector_database_provider == "lancedb": + if vector_config.vector_db_provider == "lancedb": vector_db_name = f"{dataset_id}.lance.db" else: vector_db_name = f"{dataset_id}.db" From 28f28f06dd34eec88c0443d780010b451e389674 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 30 Oct 2025 16:04:33 +0100 Subject: [PATCH 033/284] fix: added vector db name to test configs --- cognee/tests/test_parallel_databases.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cognee/tests/test_parallel_databases.py b/cognee/tests/test_parallel_databases.py index 9a590921a..51eb7d3cf 100755 --- a/cognee/tests/test_parallel_databases.py +++ b/cognee/tests/test_parallel_databases.py @@ -33,11 +33,13 @@ async def main(): "vector_db_url": "cognee1.test", "vector_db_key": "", "vector_db_provider": "lancedb", + "vector_db_name": "" } task_2_config = { "vector_db_url": "cognee2.test", "vector_db_key": "", "vector_db_provider": "lancedb", + "vector_db_name": "" } task_1_graph_config = { From 4b0b9bfc539ef448779eb52ce546444dc7780149 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 30 Oct 2025 16:06:15 +0100 Subject: [PATCH 034/284] chore: ruff format --- cognee/tests/test_parallel_databases.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/tests/test_parallel_databases.py b/cognee/tests/test_parallel_databases.py index 51eb7d3cf..3164206ed 100755 --- a/cognee/tests/test_parallel_databases.py +++ b/cognee/tests/test_parallel_databases.py @@ -33,13 +33,13 @@ async def main(): "vector_db_url": "cognee1.test", "vector_db_key": "", "vector_db_provider": "lancedb", - "vector_db_name": "" + "vector_db_name": "", } task_2_config = { "vector_db_url": "cognee2.test", "vector_db_key": "", "vector_db_provider": "lancedb", - "vector_db_name": "" + "vector_db_name": "", } task_1_graph_config = { From ee0ecd52d8115396523c1a62b2c99b3178f5d182 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 30 Oct 2025 16:25:34 +0100 Subject: [PATCH 035/284] refactor: Rewrite tests to work with multi-user mode by default --- .../users/test_conditional_authentication.py | 63 ------------------- 1 file changed, 63 deletions(-) diff --git a/cognee/tests/unit/modules/users/test_conditional_authentication.py b/cognee/tests/unit/modules/users/test_conditional_authentication.py index c4368d796..6568c3cb0 100644 --- a/cognee/tests/unit/modules/users/test_conditional_authentication.py +++ b/cognee/tests/unit/modules/users/test_conditional_authentication.py @@ -107,29 +107,10 @@ class TestConditionalAuthenticationIntegration: # REQUIRE_AUTHENTICATION should be a boolean assert isinstance(REQUIRE_AUTHENTICATION, bool) - # Currently should be False (optional authentication) - assert not REQUIRE_AUTHENTICATION - class TestConditionalAuthenticationEnvironmentVariables: """Test environment variable handling.""" - def test_require_authentication_default_false(self): - """Test that REQUIRE_AUTHENTICATION defaults to false when imported with no env vars.""" - with patch.dict(os.environ, {}, clear=True): - # Remove module from cache to force fresh import - module_name = "cognee.modules.users.methods.get_authenticated_user" - if module_name in sys.modules: - del sys.modules[module_name] - - # Import after patching environment - module will see empty environment - from cognee.modules.users.methods.get_authenticated_user import ( - REQUIRE_AUTHENTICATION, - ) - - importlib.invalidate_caches() - assert not REQUIRE_AUTHENTICATION - def test_require_authentication_true(self): """Test that REQUIRE_AUTHENTICATION=true is parsed correctly when imported.""" with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": "true"}): @@ -145,50 +126,6 @@ class TestConditionalAuthenticationEnvironmentVariables: assert REQUIRE_AUTHENTICATION - def test_require_authentication_false_explicit(self): - """Test that REQUIRE_AUTHENTICATION=false is parsed correctly when imported.""" - with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": "false"}): - # Remove module from cache to force fresh import - module_name = "cognee.modules.users.methods.get_authenticated_user" - if module_name in sys.modules: - del sys.modules[module_name] - - # Import after patching environment - module will see REQUIRE_AUTHENTICATION=false - from cognee.modules.users.methods.get_authenticated_user import ( - REQUIRE_AUTHENTICATION, - ) - - assert not REQUIRE_AUTHENTICATION - - def test_require_authentication_case_insensitive(self): - """Test that environment variable parsing is case insensitive when imported.""" - test_cases = ["TRUE", "True", "tRuE", "FALSE", "False", "fAlSe"] - - for case in test_cases: - with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": case}): - # Remove module from cache to force fresh import - module_name = "cognee.modules.users.methods.get_authenticated_user" - if module_name in sys.modules: - del sys.modules[module_name] - - # Import after patching environment - from cognee.modules.users.methods.get_authenticated_user import ( - REQUIRE_AUTHENTICATION, - ) - - expected = case.lower() == "true" - assert REQUIRE_AUTHENTICATION == expected, f"Failed for case: {case}" - - def test_current_require_authentication_value(self): - """Test that the current REQUIRE_AUTHENTICATION module value is as expected.""" - from cognee.modules.users.methods.get_authenticated_user import ( - REQUIRE_AUTHENTICATION, - ) - - # The module-level variable should currently be False (set at import time) - assert isinstance(REQUIRE_AUTHENTICATION, bool) - assert not REQUIRE_AUTHENTICATION - class TestConditionalAuthenticationEdgeCases: """Test edge cases and error scenarios.""" From 9d8430cfb08e17467390ff53e57d330885c6c7d9 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 30 Oct 2025 16:52:04 +0100 Subject: [PATCH 036/284] refactor: Update unit tests for require auth --- .../test_conditional_authentication_endpoints.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/cognee/tests/unit/api/test_conditional_authentication_endpoints.py b/cognee/tests/unit/api/test_conditional_authentication_endpoints.py index 2eabee91a..6cc37ef38 100644 --- a/cognee/tests/unit/api/test_conditional_authentication_endpoints.py +++ b/cognee/tests/unit/api/test_conditional_authentication_endpoints.py @@ -1,3 +1,4 @@ +import os import pytest from unittest.mock import patch, AsyncMock, MagicMock from uuid import uuid4 @@ -5,8 +6,6 @@ from fastapi.testclient import TestClient from types import SimpleNamespace import importlib -from cognee.api.client import app - # Fixtures for reuse across test classes @pytest.fixture @@ -32,6 +31,10 @@ def mock_authenticated_user(): ) +# To turn off authentication we need to set the environment variable before importing the module +# Also both require_authentication and backend access control must be false +os.environ["REQUIRE_AUTHENTICATION"] = "false" +os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false" gau_mod = importlib.import_module("cognee.modules.users.methods.get_authenticated_user") @@ -40,6 +43,8 @@ class TestConditionalAuthenticationEndpoints: @pytest.fixture def client(self): + from cognee.api.client import app + """Create a test client.""" return TestClient(app) @@ -133,6 +138,8 @@ class TestConditionalAuthenticationBehavior: @pytest.fixture def client(self): + from cognee.api.client import app + return TestClient(app) @pytest.mark.parametrize( @@ -209,6 +216,8 @@ class TestConditionalAuthenticationErrorHandling: @pytest.fixture def client(self): + from cognee.api.client import app + return TestClient(app) @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) @@ -232,7 +241,7 @@ class TestConditionalAuthenticationErrorHandling: # The exact error message may vary depending on the actual database connection # The important thing is that we get a 500 error when user creation fails - def test_current_environment_configuration(self): + def test_current_environment_configuration(self, client): """Test that current environment configuration is working properly.""" # This tests the actual module state without trying to change it from cognee.modules.users.methods.get_authenticated_user import ( From e061f34a28bf9cd27453ea6aac8abf215c7bde8f Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 30 Oct 2025 17:13:10 +0100 Subject: [PATCH 037/284] fix: Resolve issue with dataset names for example --- examples/python/feedback_enrichment_minimal_example.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/python/feedback_enrichment_minimal_example.py b/examples/python/feedback_enrichment_minimal_example.py index 11ef20830..8954bd5f6 100644 --- a/examples/python/feedback_enrichment_minimal_example.py +++ b/examples/python/feedback_enrichment_minimal_example.py @@ -67,7 +67,6 @@ async def run_feedback_enrichment_memify(last_n: int = 5): extraction_tasks=extraction_tasks, enrichment_tasks=enrichment_tasks, data=[{}], # A placeholder to prevent fetching the entire graph - dataset="feedback_enrichment_minimal", ) From 995e7aa4834bd3967342fded1b7ad6af10b4da0e Mon Sep 17 00:00:00 2001 From: Hande <159312713+hande-k@users.noreply.github.com> Date: Thu, 30 Oct 2025 17:38:28 +0100 Subject: [PATCH 038/284] fix: update unsupported vector db log (#1708) ## Description ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- cognee/infrastructure/databases/vector/create_vector_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index d1cf855d7..c54d94f6c 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -133,6 +133,6 @@ def create_vector_engine( else: raise EnvironmentError( - f"Unsupported graph database provider: {vector_db_provider}. " + f"Unsupported vector database provider: {vector_db_provider}. " f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['LanceDB', 'PGVector', 'neptune_analytics', 'ChromaDB'])}" ) From 45bb3130c695260af9ccb00e756a0b4d22f0a85b Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 30 Oct 2025 17:40:00 +0100 Subject: [PATCH 039/284] fix: Use same dataset name accross cognee calls --- cognee/tests/test_feedback_enrichment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/tests/test_feedback_enrichment.py b/cognee/tests/test_feedback_enrichment.py index 02d90db32..378cb0e45 100644 --- a/cognee/tests/test_feedback_enrichment.py +++ b/cognee/tests/test_feedback_enrichment.py @@ -133,7 +133,7 @@ async def main(): extraction_tasks=extraction_tasks, enrichment_tasks=enrichment_tasks, data=[{}], - dataset="feedback_enrichment_test_memify", + dataset=dataset_name, ) nodes_after, edges_after = await graph_engine.get_graph_data() From 1b483276b0077c57eefdc7e7aa93d58501cf7840 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 30 Oct 2025 18:04:27 +0100 Subject: [PATCH 040/284] fix: disable backend access control for rel db test --- cognee/tests/test_relational_db_migration.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cognee/tests/test_relational_db_migration.py b/cognee/tests/test_relational_db_migration.py index 2b69ce854..4557e9e2f 100644 --- a/cognee/tests/test_relational_db_migration.py +++ b/cognee/tests/test_relational_db_migration.py @@ -27,6 +27,9 @@ def normalize_node_name(node_name: str) -> str: async def setup_test_db(): + # Disable backend access control to migrate relational data + os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false" + await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) From a60e53964c7eaea79680508bf33422dc35bd9056 Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Fri, 31 Oct 2025 12:37:38 +0100 Subject: [PATCH 041/284] Potential fix for code scanning alert no. 399: Workflow does not contain permissions Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- .github/workflows/test_suites.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test_suites.yml b/.github/workflows/test_suites.yml index 44cfb03cf..be1e354fc 100644 --- a/.github/workflows/test_suites.yml +++ b/.github/workflows/test_suites.yml @@ -1,4 +1,6 @@ name: Test Suites +permissions: + contents: read on: push: From 3c09433adead92f8a09093069a6d88de63c28409 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 31 Oct 2025 13:57:12 +0100 Subject: [PATCH 042/284] fix: Resolve docling test --- cognee/tests/test_add_docling_document.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/tests/test_add_docling_document.py b/cognee/tests/test_add_docling_document.py index 2c82af66f..c5aa4e9d1 100644 --- a/cognee/tests/test_add_docling_document.py +++ b/cognee/tests/test_add_docling_document.py @@ -39,12 +39,12 @@ async def main(): answer = await cognee.search("Do programmers change light bulbs?") assert len(answer) != 0 - lowercase_answer = answer[0].lower() + lowercase_answer = answer[0]["search_result"][0].lower() assert ("no" in lowercase_answer) or ("none" in lowercase_answer) answer = await cognee.search("What colours are there in the presentation table?") assert len(answer) != 0 - lowercase_answer = answer[0].lower() + lowercase_answer = answer[0]["search_result"][0].lower() assert ( ("red" in lowercase_answer) and ("blue" in lowercase_answer) From 00a1fe71d76ae62deac5832751366061f21bc96f Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 31 Oct 2025 14:33:07 +0100 Subject: [PATCH 043/284] fix: Use multi-user mode search --- examples/python/agentic_reasoning_procurement_example.py | 2 +- examples/python/memify_coding_agent_example.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/python/agentic_reasoning_procurement_example.py b/examples/python/agentic_reasoning_procurement_example.py index 5aa3caa70..4e9d2d7e4 100644 --- a/examples/python/agentic_reasoning_procurement_example.py +++ b/examples/python/agentic_reasoning_procurement_example.py @@ -168,7 +168,7 @@ async def run_procurement_example(): for q in questions: print(f"Question: \n{q}") results = await procurement_system.search_memory(q, search_categories=[category]) - top_answer = results[category][0] + top_answer = results[category][0]["search_result"][0] print(f"Answer: \n{top_answer.strip()}\n") research_notes[category].append({"question": q, "answer": top_answer}) diff --git a/examples/python/memify_coding_agent_example.py b/examples/python/memify_coding_agent_example.py index 1fd3b1528..4a087ba61 100644 --- a/examples/python/memify_coding_agent_example.py +++ b/examples/python/memify_coding_agent_example.py @@ -89,7 +89,7 @@ async def main(): ) print("Coding rules created by memify:") - for coding_rule in coding_rules: + for coding_rule in coding_rules[0]["search_result"][0]: print("- " + coding_rule) # Visualize new graph with added memify context From 4c8b8211979fc12b6e46a911eba1c2e2610187d8 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 31 Oct 2025 14:55:52 +0100 Subject: [PATCH 044/284] fix: resolve test failing --- cognee/tests/test_search_db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/tests/test_search_db.py b/cognee/tests/test_search_db.py index e24abd0f5..bcc4529a9 100644 --- a/cognee/tests/test_search_db.py +++ b/cognee/tests/test_search_db.py @@ -146,7 +146,7 @@ async def main(): assert len(search_results) == 1, ( f"{name}: expected single-element list, got {len(search_results)}" ) - text = search_results[0] + text = search_results[0]["search_result"][0] assert isinstance(text, str), f"{name}: element should be a string" assert text.strip(), f"{name}: string should not be empty" assert "netherlands" in text.lower(), ( From f368a1a4d5d79a4485bc52ff5d9f16fc909942d2 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 31 Oct 2025 20:10:05 +0100 Subject: [PATCH 045/284] fix: set tests to not use multi-user mode --- .github/workflows/search_db_tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/search_db_tests.yml b/.github/workflows/search_db_tests.yml index e3e46dd97..118c1c06c 100644 --- a/.github/workflows/search_db_tests.yml +++ b/.github/workflows/search_db_tests.yml @@ -84,6 +84,7 @@ jobs: GRAPH_DATABASE_PROVIDER: 'neo4j' VECTOR_DB_PROVIDER: 'lancedb' DB_PROVIDER: 'sqlite' + ENABLE_BACKEND_ACCESS_CONTROL: 'false' GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }} GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }} GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }} @@ -135,6 +136,7 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} GRAPH_DATABASE_PROVIDER: 'kuzu' VECTOR_DB_PROVIDER: 'pgvector' + ENABLE_BACKEND_ACCESS_CONTROL: 'false' DB_PROVIDER: 'postgres' DB_NAME: 'cognee_db' DB_HOST: '127.0.0.1' @@ -197,6 +199,7 @@ jobs: GRAPH_DATABASE_URL: ${{ steps.neo4j.outputs.neo4j-url }} GRAPH_DATABASE_USERNAME: ${{ steps.neo4j.outputs.neo4j-username }} GRAPH_DATABASE_PASSWORD: ${{ steps.neo4j.outputs.neo4j-password }} + ENABLE_BACKEND_ACCESS_CONTROL: 'false' DB_NAME: cognee_db DB_HOST: 127.0.0.1 DB_PORT: 5432 From a4a9e762465ecaf0dcdb9b0132db7951d11b437c Mon Sep 17 00:00:00 2001 From: Fahad Shoaib Date: Sun, 2 Nov 2025 17:05:03 +0500 Subject: [PATCH 046/284] feat: add ontology endpoint in REST API - Add POST /api/v1/ontologies endpoint for file upload - Add GET /api/v1/ontologies endpoint for listing ontologies - Implement OntologyService for file management and metadata - Integrate ontology_key parameter in cognify endpoint - Update RDFLibOntologyResolver to support file-like objects - Add essential test suite for ontology endpoints --- cognee/api/client.py | 3 + .../v1/cognify/routers/get_cognify_router.py | 31 +++++- cognee/api/v1/ontologies/__init__.py | 4 + cognee/api/v1/ontologies/ontologies.py | 101 ++++++++++++++++++ cognee/api/v1/ontologies/routers/__init__.py | 0 .../ontologies/routers/get_ontology_router.py | 89 +++++++++++++++ .../rdf_xml/RDFLibOntologyResolver.py | 69 +++++++----- cognee/tests/test_ontology_endpoint.py | 89 +++++++++++++++ 8 files changed, 356 insertions(+), 30 deletions(-) create mode 100644 cognee/api/v1/ontologies/__init__.py create mode 100644 cognee/api/v1/ontologies/ontologies.py create mode 100644 cognee/api/v1/ontologies/routers/__init__.py create mode 100644 cognee/api/v1/ontologies/routers/get_ontology_router.py create mode 100644 cognee/tests/test_ontology_endpoint.py diff --git a/cognee/api/client.py b/cognee/api/client.py index 6766c12de..89e9eb2f5 100644 --- a/cognee/api/client.py +++ b/cognee/api/client.py @@ -23,6 +23,7 @@ from cognee.api.v1.settings.routers import get_settings_router from cognee.api.v1.datasets.routers import get_datasets_router from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router from cognee.api.v1.search.routers import get_search_router +from cognee.api.v1.ontologies.routers.get_ontology_router import get_ontology_router from cognee.api.v1.memify.routers import get_memify_router from cognee.api.v1.add.routers import get_add_router from cognee.api.v1.delete.routers import get_delete_router @@ -258,6 +259,8 @@ app.include_router( app.include_router(get_datasets_router(), prefix="/api/v1/datasets", tags=["datasets"]) +app.include_router(get_ontology_router(), prefix="/api/v1/ontologies", tags=["ontologies"]) + app.include_router(get_settings_router(), prefix="/api/v1/settings", tags=["settings"]) app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["visualize"]) diff --git a/cognee/api/v1/cognify/routers/get_cognify_router.py b/cognee/api/v1/cognify/routers/get_cognify_router.py index 231bbcd11..246cc6c56 100644 --- a/cognee/api/v1/cognify/routers/get_cognify_router.py +++ b/cognee/api/v1/cognify/routers/get_cognify_router.py @@ -41,6 +41,10 @@ class CognifyPayloadDTO(InDTO): custom_prompt: Optional[str] = Field( default="", description="Custom prompt for entity extraction and graph generation" ) + ontology_key: Optional[str] = Field( + default=None, + description="Reference to previously uploaded ontology" + ) def get_cognify_router() -> APIRouter: @@ -68,6 +72,7 @@ def get_cognify_router() -> APIRouter: - **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted). - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking). - **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction. + - **ontology_key** (Optional[str]): Reference to a previously uploaded ontology file to use for knowledge graph construction. ## Response - **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status @@ -82,7 +87,8 @@ def get_cognify_router() -> APIRouter: { "datasets": ["research_papers", "documentation"], "run_in_background": false, - "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections." + "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections.", + "ontology_key": "medical_ontology_v1" } ``` @@ -108,13 +114,36 @@ def get_cognify_router() -> APIRouter: ) from cognee.api.v1.cognify import cognify as cognee_cognify + from cognee.api.v1.ontologies.ontologies import OntologyService try: datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets + config_to_use = None + + if payload.ontology_key: + ontology_service = OntologyService() + try: + ontology_content = ontology_service.get_ontology_content(payload.ontology_key, user) + + from cognee.modules.ontology.ontology_config import Config + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from io import StringIO + + ontology_stream = StringIO(ontology_content) + config_to_use: Config = { + "ontology_config": { + "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_stream) + } + } + except ValueError as e: + return JSONResponse( + status_code=400, content={"error": f"Ontology error: {str(e)}"} + ) cognify_run = await cognee_cognify( datasets, user, + config=config_to_use, run_in_background=payload.run_in_background, custom_prompt=payload.custom_prompt, ) diff --git a/cognee/api/v1/ontologies/__init__.py b/cognee/api/v1/ontologies/__init__.py new file mode 100644 index 000000000..c25064edc --- /dev/null +++ b/cognee/api/v1/ontologies/__init__.py @@ -0,0 +1,4 @@ +from .ontologies import OntologyService +from .routers.get_ontology_router import get_ontology_router + +__all__ = ["OntologyService", "get_ontology_router"] \ No newline at end of file diff --git a/cognee/api/v1/ontologies/ontologies.py b/cognee/api/v1/ontologies/ontologies.py new file mode 100644 index 000000000..fb7f3cd9a --- /dev/null +++ b/cognee/api/v1/ontologies/ontologies.py @@ -0,0 +1,101 @@ +import os +import json +import tempfile +from pathlib import Path +from datetime import datetime, timezone +from typing import Optional +from dataclasses import dataclass + +@dataclass +class OntologyMetadata: + ontology_key: str + filename: str + size_bytes: int + uploaded_at: str + description: Optional[str] = None + +class OntologyService: + def __init__(self): + pass + + @property + def base_dir(self) -> Path: + return Path(tempfile.gettempdir()) / "ontologies" + + def _get_user_dir(self, user_id: str) -> Path: + user_dir = self.base_dir / str(user_id) + user_dir.mkdir(parents=True, exist_ok=True) + return user_dir + + def _get_metadata_path(self, user_dir: Path) -> Path: + return user_dir / "metadata.json" + + def _load_metadata(self, user_dir: Path) -> dict: + metadata_path = self._get_metadata_path(user_dir) + if metadata_path.exists(): + with open(metadata_path, 'r') as f: + return json.load(f) + return {} + + def _save_metadata(self, user_dir: Path, metadata: dict): + metadata_path = self._get_metadata_path(user_dir) + with open(metadata_path, 'w') as f: + json.dump(metadata, f, indent=2) + + async def upload_ontology(self, ontology_key: str, file, user, description: Optional[str] = None) -> OntologyMetadata: + # Validate file format + if not file.filename.lower().endswith('.owl'): + raise ValueError("File must be in .owl format") + + user_dir = self._get_user_dir(str(user.id)) + metadata = self._load_metadata(user_dir) + + # Check for duplicate key + if ontology_key in metadata: + raise ValueError(f"Ontology key '{ontology_key}' already exists") + + # Read file content + content = await file.read() + if len(content) > 10 * 1024 * 1024: # 10MB limit + raise ValueError("File size exceeds 10MB limit") + + # Save file + file_path = user_dir / f"{ontology_key}.owl" + with open(file_path, 'wb') as f: + f.write(content) + + # Update metadata + ontology_metadata = { + "filename": file.filename, + "size_bytes": len(content), + "uploaded_at": datetime.now(timezone.utc).isoformat(), + "description": description + } + metadata[ontology_key] = ontology_metadata + self._save_metadata(user_dir, metadata) + + return OntologyMetadata( + ontology_key=ontology_key, + filename=file.filename, + size_bytes=len(content), + uploaded_at=ontology_metadata["uploaded_at"], + description=description + ) + + def get_ontology_content(self, ontology_key: str, user) -> str: + user_dir = self._get_user_dir(str(user.id)) + metadata = self._load_metadata(user_dir) + + if ontology_key not in metadata: + raise ValueError(f"Ontology key '{ontology_key}' not found") + + file_path = user_dir / f"{ontology_key}.owl" + if not file_path.exists(): + raise ValueError(f"Ontology file for key '{ontology_key}' not found") + + with open(file_path, 'r', encoding='utf-8') as f: + return f.read() + + def list_ontologies(self, user) -> dict: + user_dir = self._get_user_dir(str(user.id)) + return self._load_metadata(user_dir) \ No newline at end of file diff --git a/cognee/api/v1/ontologies/routers/__init__.py b/cognee/api/v1/ontologies/routers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cognee/api/v1/ontologies/routers/get_ontology_router.py b/cognee/api/v1/ontologies/routers/get_ontology_router.py new file mode 100644 index 000000000..c171fa7bb --- /dev/null +++ b/cognee/api/v1/ontologies/routers/get_ontology_router.py @@ -0,0 +1,89 @@ +from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException +from fastapi.responses import JSONResponse +from typing import Optional + +from cognee.modules.users.models import User +from cognee.modules.users.methods import get_authenticated_user +from cognee.shared.utils import send_telemetry +from cognee import __version__ as cognee_version +from ..ontologies import OntologyService + +def get_ontology_router() -> APIRouter: + router = APIRouter() + ontology_service = OntologyService() + + @router.post("", response_model=dict) + async def upload_ontology( + ontology_key: str = Form(...), + ontology_file: UploadFile = File(...), + description: Optional[str] = Form(None), + user: User = Depends(get_authenticated_user) + ): + """ + Upload an ontology file with a named key for later use in cognify operations. + + ## Request Parameters + - **ontology_key** (str): User-defined identifier for the ontology + - **ontology_file** (UploadFile): OWL format ontology file + - **description** (Optional[str]): Optional description of the ontology + + ## Response + Returns metadata about the uploaded ontology including key, filename, size, and upload timestamp. + + ## Error Codes + - **400 Bad Request**: Invalid file format, duplicate key, file size exceeded + - **500 Internal Server Error**: File system or processing errors + """ + send_telemetry( + "Ontology Upload API Endpoint Invoked", + user.id, + additional_properties={ + "endpoint": "POST /api/v1/ontologies", + "cognee_version": cognee_version, + }, + ) + + try: + result = await ontology_service.upload_ontology( + ontology_key, ontology_file, user, description + ) + return { + "ontology_key": result.ontology_key, + "filename": result.filename, + "size_bytes": result.size_bytes, + "uploaded_at": result.uploaded_at + } + except ValueError as e: + return JSONResponse(status_code=400, content={"error": str(e)}) + except Exception as e: + return JSONResponse(status_code=500, content={"error": str(e)}) + + @router.get("", response_model=dict) + async def list_ontologies( + user: User = Depends(get_authenticated_user) + ): + """ + List all uploaded ontologies for the authenticated user. + + ## Response + Returns a dictionary mapping ontology keys to their metadata including filename, size, and upload timestamp. + + ## Error Codes + - **500 Internal Server Error**: File system or processing errors + """ + send_telemetry( + "Ontology List API Endpoint Invoked", + user.id, + additional_properties={ + "endpoint": "GET /api/v1/ontologies", + "cognee_version": cognee_version, + }, + ) + + try: + metadata = ontology_service.list_ontologies(user) + return metadata + except Exception as e: + return JSONResponse(status_code=500, content={"error": str(e)}) + + return router \ No newline at end of file diff --git a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py index 45e32936a..4acc8861b 100644 --- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py @@ -2,7 +2,7 @@ import os import difflib from cognee.shared.logging_utils import get_logger from collections import deque -from typing import List, Tuple, Dict, Optional, Any, Union +from typing import List, Tuple, Dict, Optional, Any, Union, IO from rdflib import Graph, URIRef, RDF, RDFS, OWL from cognee.modules.ontology.exceptions import ( @@ -26,44 +26,55 @@ class RDFLibOntologyResolver(BaseOntologyResolver): def __init__( self, - ontology_file: Optional[Union[str, List[str]]] = None, + ontology_file: Optional[Union[str, List[str], IO]] = None, matching_strategy: Optional[MatchingStrategy] = None, ) -> None: super().__init__(matching_strategy) self.ontology_file = ontology_file try: - files_to_load = [] + self.graph = None if ontology_file is not None: - if isinstance(ontology_file, str): - files_to_load = [ontology_file] - elif isinstance(ontology_file, list): - files_to_load = ontology_file + if hasattr(ontology_file, "read"): + self.graph = Graph() + content = ontology_file.read() + self.graph.parse(data=content, format="xml") + logger.info("Ontology loaded successfully from file object") else: - raise ValueError( - f"ontology_file must be a string, list of strings, or None. Got: {type(ontology_file)}" - ) - - if files_to_load: - self.graph = Graph() - loaded_files = [] - for file_path in files_to_load: - if os.path.exists(file_path): - self.graph.parse(file_path) - loaded_files.append(file_path) - logger.info("Ontology loaded successfully from file: %s", file_path) + files_to_load = [] + if isinstance(ontology_file, str): + files_to_load = [ontology_file] + elif isinstance(ontology_file, list): + files_to_load = ontology_file else: - logger.warning( - "Ontology file '%s' not found. Skipping this file.", - file_path, + raise ValueError( + f"ontology_file must be a string, list of strings, file-like object, or None. Got: {type(ontology_file)}" ) - if not loaded_files: - logger.info( - "No valid ontology files found. No owl ontology will be attached to the graph." - ) - self.graph = None - else: - logger.info("Total ontology files loaded: %d", len(loaded_files)) + if files_to_load: + self.graph = Graph() + loaded_files = [] + for file_path in files_to_load: + if os.path.exists(file_path): + self.graph.parse(file_path) + loaded_files.append(file_path) + logger.info("Ontology loaded successfully from file: %s", file_path) + else: + logger.warning( + "Ontology file '%s' not found. Skipping this file.", + file_path, + ) + + if not loaded_files: + logger.info( + "No valid ontology files found. No owl ontology will be attached to the graph." + ) + self.graph = None + else: + logger.info("Total ontology files loaded: %d", len(loaded_files)) + else: + logger.info( + "No ontology file provided. No owl ontology will be attached to the graph." + ) else: logger.info( "No ontology file provided. No owl ontology will be attached to the graph." diff --git a/cognee/tests/test_ontology_endpoint.py b/cognee/tests/test_ontology_endpoint.py new file mode 100644 index 000000000..4849f8649 --- /dev/null +++ b/cognee/tests/test_ontology_endpoint.py @@ -0,0 +1,89 @@ +import pytest +import uuid +from fastapi.testclient import TestClient +from unittest.mock import patch, Mock, AsyncMock +from types import SimpleNamespace +import importlib +from cognee.api.client import app + +gau_mod = importlib.import_module("cognee.modules.users.methods.get_authenticated_user") + +@pytest.fixture +def client(): + return TestClient(app) + +@pytest.fixture +def mock_user(): + user = Mock() + user.id = "test-user-123" + return user + +@pytest.fixture +def mock_default_user(): + """Mock default user for testing.""" + return SimpleNamespace( + id=uuid.uuid4(), + email="default@example.com", + is_active=True, + tenant_id=uuid.uuid4() + ) + +@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) +def test_upload_ontology_success(mock_get_default_user, client, mock_default_user): + """Test successful ontology upload""" + mock_get_default_user.return_value = mock_default_user + ontology_content = b"" + unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}" + + response = client.post( + "/api/v1/ontologies", + files={"ontology_file": ("test.owl", ontology_content)}, + data={"ontology_key": unique_key, "description": "Test"} + ) + + assert response.status_code == 200 + data = response.json() + assert data["ontology_key"] == unique_key + assert "uploaded_at" in data + +@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) +def test_upload_ontology_invalid_file(mock_get_default_user, client, mock_default_user): + """Test 400 response for non-.owl files""" + mock_get_default_user.return_value = mock_default_user + unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}" + response = client.post( + "/api/v1/ontologies", + files={"ontology_file": ("test.txt", b"not xml")}, + data={"ontology_key": unique_key} + ) + assert response.status_code == 400 + +@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) +def test_upload_ontology_missing_data(mock_get_default_user, client, mock_default_user): + """Test 400 response for missing file or key""" + mock_get_default_user.return_value = mock_default_user + # Missing file + response = client.post("/api/v1/ontologies", data={"ontology_key": "test"}) + assert response.status_code == 400 + + # Missing key + response = client.post("/api/v1/ontologies", files={"ontology_file": ("test.owl", b"xml")}) + assert response.status_code == 400 + +@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) +def test_upload_ontology_unauthorized(mock_get_default_user, client, mock_default_user): + """Test behavior when default user is provided (no explicit authentication)""" + unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}" + mock_get_default_user.return_value = mock_default_user + response = client.post( + "/api/v1/ontologies", + files={"ontology_file": ("test.owl", b"")}, + data={"ontology_key": unique_key} + ) + + # The current system provides a default user when no explicit authentication is given + # This test verifies the system works with conditional authentication + assert response.status_code == 200 + data = response.json() + assert data["ontology_key"] == unique_key + assert "uploaded_at" in data \ No newline at end of file From b8241e58e5794f006122d4b61c4805a669fc217f Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Mon, 3 Nov 2025 16:20:03 +0100 Subject: [PATCH 047/284] CI: Limit deletion integration tests to 60 minutes --- .github/workflows/test_different_operating_systems.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test_different_operating_systems.yml b/.github/workflows/test_different_operating_systems.yml index b6b3f7b3c..7a5288cf6 100644 --- a/.github/workflows/test_different_operating_systems.yml +++ b/.github/workflows/test_different_operating_systems.yml @@ -181,6 +181,7 @@ jobs: run-soft-deletion-test: name: Soft Delete test ${{ matrix.python-version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} + timeout-minutes: 60 strategy: matrix: python-version: ${{ fromJSON(inputs.python-versions) }} @@ -218,6 +219,7 @@ jobs: run-hard-deletion-test: name: Hard Delete test ${{ matrix.python-version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} + timeout-minutes: 60 strategy: matrix: python-version: ${{ fromJSON(inputs.python-versions) }} From 2ab2cffd07ed35a268362af48c2fb668674509f1 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 3 Nov 2025 16:37:03 +0100 Subject: [PATCH 048/284] chore: update test_search_db to work with all graph providers --- cognee/tests/test_search_db.py | 8 +++++++- examples/python/simple_example.py | 8 -------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/cognee/tests/test_search_db.py b/cognee/tests/test_search_db.py index bcc4529a9..ea3f0ea44 100644 --- a/cognee/tests/test_search_db.py +++ b/cognee/tests/test_search_db.py @@ -146,7 +146,13 @@ async def main(): assert len(search_results) == 1, ( f"{name}: expected single-element list, got {len(search_results)}" ) - text = search_results[0]["search_result"][0] + + from cognee.context_global_variables import check_backend_access_control_mode + + if check_backend_access_control_mode(): + text = search_results[0]["search_result"][0] + else: + text = search_results[0] assert isinstance(text, str), f"{name}: element should be a string" assert text.strip(), f"{name}: string should not be empty" assert "netherlands" in text.lower(), ( diff --git a/examples/python/simple_example.py b/examples/python/simple_example.py index c13e48f85..237a8295e 100644 --- a/examples/python/simple_example.py +++ b/examples/python/simple_example.py @@ -59,14 +59,6 @@ async def main(): for result_text in search_results: print(result_text) - # Example output: - # ({'id': UUID('bc338a39-64d6-549a-acec-da60846dd90d'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 1, 211808, tzinfo=datetime.timezone.utc), 'name': 'natural language processing', 'description': 'An interdisciplinary subfield of computer science and information retrieval.'}, {'relationship_name': 'is_a_subfield_of', 'source_node_id': UUID('bc338a39-64d6-549a-acec-da60846dd90d'), 'target_node_id': UUID('6218dbab-eb6a-5759-a864-b3419755ffe0'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 15, 473137, tzinfo=datetime.timezone.utc)}, {'id': UUID('6218dbab-eb6a-5759-a864-b3419755ffe0'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 1, 211808, tzinfo=datetime.timezone.utc), 'name': 'computer science', 'description': 'The study of computation and information processing.'}) - # (...) - # It represents nodes and relationships in the knowledge graph: - # - The first element is the source node (e.g., 'natural language processing'). - # - The second element is the relationship between nodes (e.g., 'is_a_subfield_of'). - # - The third element is the target node (e.g., 'computer science'). - if __name__ == "__main__": logger = setup_logging(log_level=ERROR) From 4424bdc76471342119d59943c38d392dac9d72b0 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Mon, 3 Nov 2025 17:06:51 +0100 Subject: [PATCH 049/284] test: fix path based on pr comment --- cognee/tests/test_load.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/cognee/tests/test_load.py b/cognee/tests/test_load.py index a09ce053d..b38466bc7 100644 --- a/cognee/tests/test_load.py +++ b/cognee/tests/test_load.py @@ -30,17 +30,10 @@ async def process_and_search(num_of_searches): async def main(): - data_directory_path = str( - pathlib.Path( - os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load") - ).resolve() - ) + data_directory_path = os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load") cognee.config.data_root_directory(data_directory_path) - cognee_directory_path = str( - pathlib.Path( - os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_load") - ).resolve() - ) + + cognee_directory_path = os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_load") cognee.config.system_root_directory(cognee_directory_path) num_of_pdfs = 10 From eb8df45dab2cb7a07de7eb97570d364790f80080 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Mon, 3 Nov 2025 18:10:19 +0100 Subject: [PATCH 050/284] test: increase file descriptor limit on workflow load test --- .github/workflows/e2e_tests.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index cf704c76a..79df3ff6b 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -463,6 +463,12 @@ jobs: with: python-version: '3.11.x' + - name: Set File Descriptor Limit + run: sudo prlimit --pid $$ --nofile=4096:4096 + + - name: Verify File Descriptor Limit + run: ulimit -n + - name: Dependencies already installed run: echo "Dependencies already installed in setup" @@ -478,4 +484,9 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + STORAGE_BACKEND: s3 + AWS_REGION: eu-west-1 + AWS_ENDPOINT_URL: https://s3-eu-west-1.amazonaws.com + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }} run: uv run python ./cognee/tests/test_load.py \ No newline at end of file From a7d63df98c7ad21a40679c1b821acb30690c65d8 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Mon, 3 Nov 2025 18:15:18 +0100 Subject: [PATCH 051/284] test: add extra aws dependency to load test --- .github/workflows/e2e_tests.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 79df3ff6b..0596f22d3 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -267,8 +267,6 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} run: uv run python ./cognee/tests/test_edge_ingestion.py - - run_concurrent_subprocess_access_test: name: Concurrent Subprocess access test runs-on: ubuntu-latest @@ -450,7 +448,6 @@ jobs: DB_PASSWORD: cognee run: uv run python ./cognee/tests/test_conversation_history.py - test-load: name: Test Load runs-on: ubuntu-22.04 @@ -462,6 +459,7 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: '3.11.x' + extra-dependencies: "aws" - name: Set File Descriptor Limit run: sudo prlimit --pid $$ --nofile=4096:4096 From c81d06d364d3b1f114fb2e7e81db856e7389386d Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Mon, 3 Nov 2025 19:37:52 +0100 Subject: [PATCH 052/284] Update cognee/context_global_variables.py Co-authored-by: Pavel Zorin --- cognee/context_global_variables.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index b4b848192..6f3965441 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -41,11 +41,7 @@ def check_backend_access_control_mode(): if backend_access_control is None: # If backend access control is not defined in environment variables, # enable it by default if graph and vector DBs can support it, otherwise disable it - multi_user_support = check_multi_user_support() - if multi_user_support: - return True - else: - return False + return check_multi_user_support() elif backend_access_control.lower() == "true": # If enabled, ensure that the current graph and vector DBs can support it multi_user_support = check_multi_user_support() From 53521c2068319d340c3f2b396dbbc7f3f9c80523 Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Mon, 3 Nov 2025 19:42:51 +0100 Subject: [PATCH 053/284] Update cognee/context_global_variables.py Co-authored-by: Pavel Zorin --- cognee/context_global_variables.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index 6f3965441..3afbf6ff2 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -27,13 +27,10 @@ async def set_session_user_context_variable(user): def check_multi_user_support(): graph_db_config = get_graph_context_config() vector_db_config = get_vectordb_context_config() - if ( + return ( graph_db_config["graph_database_provider"] in graph_dbs_with_multi_user_support and vector_db_config["vector_db_provider"] in vector_dbs_with_multi_user_support - ): - return True - else: - return False + ) def check_backend_access_control_mode(): From 46c509778f89d5bebbcbe5f7578159e45841ca2d Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 12:06:16 +0100 Subject: [PATCH 054/284] refactor: Rename access control functions --- cognee/context_global_variables.py | 17 +++++++---------- cognee/modules/search/methods/search.py | 6 +++--- .../users/methods/get_authenticated_user.py | 4 ++-- cognee/tests/test_search_db.py | 4 ++-- 4 files changed, 14 insertions(+), 17 deletions(-) diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index 3afbf6ff2..f17c9187a 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -24,7 +24,7 @@ async def set_session_user_context_variable(user): session_user.set(user) -def check_multi_user_support(): +def multi_user_support_possible(): graph_db_config = get_graph_context_config() vector_db_config = get_vectordb_context_config() return ( @@ -33,24 +33,21 @@ def check_multi_user_support(): ) -def check_backend_access_control_mode(): +def backend_access_control_enabled(): backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None) if backend_access_control is None: # If backend access control is not defined in environment variables, # enable it by default if graph and vector DBs can support it, otherwise disable it - return check_multi_user_support() + return multi_user_support_possible() elif backend_access_control.lower() == "true": # If enabled, ensure that the current graph and vector DBs can support it - multi_user_support = check_multi_user_support() + multi_user_support = multi_user_support_possible() if not multi_user_support: raise EnvironmentError( "ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control." ) - else: - return True - else: - # If explicitly disabled, return false - return False + return True + return False async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID): @@ -74,7 +71,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ base_config = get_base_config() - if not check_backend_access_control_mode(): + if not backend_access_control_enabled(): return user = await get_user(user_id) diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index 4a67093e8..5e465b239 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -8,7 +8,7 @@ from cognee.infrastructure.databases.graph import get_graph_engine from cognee.shared.logging_utils import get_logger from cognee.shared.utils import send_telemetry from cognee.context_global_variables import set_database_global_context_variables -from cognee.context_global_variables import check_backend_access_control_mode +from cognee.context_global_variables import backend_access_control_enabled from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge @@ -74,7 +74,7 @@ async def search( ) # Use search function filtered by permissions if access control is enabled - if check_backend_access_control_mode(): + if backend_access_control_enabled(): search_results = await authorized_search( query_type=query_type, query_text=query_text, @@ -156,7 +156,7 @@ async def search( ) else: # This is for maintaining backwards compatibility - if check_backend_access_control_mode(): + if backend_access_control_enabled(): return_value = [] for search_result in search_results: prepared_search_results = await prepare_search_result(search_result) diff --git a/cognee/modules/users/methods/get_authenticated_user.py b/cognee/modules/users/methods/get_authenticated_user.py index 3cc16f3a8..d6d701737 100644 --- a/cognee/modules/users/methods/get_authenticated_user.py +++ b/cognee/modules/users/methods/get_authenticated_user.py @@ -5,7 +5,7 @@ from ..models import User from ..get_fastapi_users import get_fastapi_users from .get_default_user import get_default_user from cognee.shared.logging_utils import get_logger -from cognee.context_global_variables import check_backend_access_control_mode +from cognee.context_global_variables import backend_access_control_enabled logger = get_logger("get_authenticated_user") @@ -13,7 +13,7 @@ logger = get_logger("get_authenticated_user") # Check environment variable to determine authentication requirement REQUIRE_AUTHENTICATION = ( os.getenv("REQUIRE_AUTHENTICATION", "false").lower() == "true" - or check_backend_access_control_mode() + or backend_access_control_enabled() ) fastapi_users = get_fastapi_users() diff --git a/cognee/tests/test_search_db.py b/cognee/tests/test_search_db.py index ea3f0ea44..bd11dc62e 100644 --- a/cognee/tests/test_search_db.py +++ b/cognee/tests/test_search_db.py @@ -147,9 +147,9 @@ async def main(): f"{name}: expected single-element list, got {len(search_results)}" ) - from cognee.context_global_variables import check_backend_access_control_mode + from cognee.context_global_variables import backend_access_control_enabled - if check_backend_access_control_mode(): + if backend_access_control_enabled(): text = search_results[0]["search_result"][0] else: text = search_results[0] From e11a8b9a51fa4c4f3fea43cd8b2f9585a2a27997 Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Tue, 4 Nov 2025 12:13:48 +0100 Subject: [PATCH 055/284] CI: Added timeouts for all OS tests --- .github/workflows/test_different_operating_systems.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/test_different_operating_systems.yml b/.github/workflows/test_different_operating_systems.yml index 7a5288cf6..02651b474 100644 --- a/.github/workflows/test_different_operating_systems.yml +++ b/.github/workflows/test_different_operating_systems.yml @@ -44,6 +44,7 @@ jobs: run-unit-tests: name: Unit tests ${{ matrix.python-version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} + timeout-minutes: 60 strategy: matrix: python-version: ${{ fromJSON(inputs.python-versions) }} @@ -80,6 +81,7 @@ jobs: run-integration-tests: name: Integration tests ${{ matrix.python-version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} + timeout-minutes: 60 strategy: matrix: python-version: ${{ fromJSON(inputs.python-versions) }} @@ -116,6 +118,7 @@ jobs: run-library-test: name: Library test ${{ matrix.python-version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} + timeout-minutes: 60 strategy: matrix: python-version: ${{ fromJSON(inputs.python-versions) }} @@ -152,6 +155,7 @@ jobs: run-build-test: name: Build test ${{ matrix.python-version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} + timeout-minutes: 60 strategy: matrix: python-version: ${{ fromJSON(inputs.python-versions) }} From e3b707a0c242fb7268d56d2b485990f120fe0462 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 12:20:17 +0100 Subject: [PATCH 056/284] refactor: Change variable names, add setting of current tenant to be optional for tenant creation --- .../users/tenants/methods/add_user_to_tenant.py | 8 ++++---- .../modules/users/tenants/methods/create_tenant.py | 13 +++++++++---- .../modules/users/tenants/methods/select_tenant.py | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/cognee/modules/users/tenants/methods/add_user_to_tenant.py b/cognee/modules/users/tenants/methods/add_user_to_tenant.py index dabab6b6b..edadfe66b 100644 --- a/cognee/modules/users/tenants/methods/add_user_to_tenant.py +++ b/cognee/modules/users/tenants/methods/add_user_to_tenant.py @@ -16,18 +16,18 @@ from cognee.modules.users.exceptions import ( async def add_user_to_tenant( - user_id: UUID, tenant_id: UUID, owner_id: UUID, set_active_tenant: Optional[bool] = True + user_id: UUID, tenant_id: UUID, owner_id: UUID, set_as_active_tenant: Optional[bool] = True ): """ Add a user with the given id to the tenant with the given id. This can only be successful if the request owner with the given id is the tenant owner. - If set_active_tenant is true it will automatically set the users active tenant to provided tenant. + If set_as_active_tenant is true it will automatically set the users active tenant to provided tenant. Args: user_id: Id of the user. tenant_id: Id of the tenant. owner_id: Id of the request owner. - set_active_tenant: If set_active_tenant is true it will automatically set the users active tenant to provided tenant. + set_as_active_tenant: If set_as_active_tenant is true it will automatically set the users active tenant to provided tenant. Returns: None @@ -48,7 +48,7 @@ async def add_user_to_tenant( message="Only tenant owner can add other users to organization." ) - if set_active_tenant: + if set_as_active_tenant: user.tenant_id = tenant_id await session.merge(user) await session.commit() diff --git a/cognee/modules/users/tenants/methods/create_tenant.py b/cognee/modules/users/tenants/methods/create_tenant.py index 60e10db5c..32baa05fd 100644 --- a/cognee/modules/users/tenants/methods/create_tenant.py +++ b/cognee/modules/users/tenants/methods/create_tenant.py @@ -1,6 +1,7 @@ from uuid import UUID from sqlalchemy import insert from sqlalchemy.exc import IntegrityError +from typing import Optional from cognee.modules.users.models.UserTenant import UserTenant from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError @@ -9,13 +10,16 @@ from cognee.modules.users.models import Tenant from cognee.modules.users.methods import get_user -async def create_tenant(tenant_name: str, user_id: UUID) -> UUID: +async def create_tenant( + tenant_name: str, user_id: UUID, set_as_active_tenant: Optional[bool] = True +) -> UUID: """ Create a new tenant with the given name, for the user with the given id. This user is the owner of the tenant. Args: tenant_name: Name of the new tenant. user_id: Id of the user. + set_as_active_tenant: If true, set the newly created tenant as the active tenant for the user. Returns: None @@ -29,9 +33,10 @@ async def create_tenant(tenant_name: str, user_id: UUID) -> UUID: session.add(tenant) await session.flush() - user.tenant_id = tenant.id - await session.merge(user) - await session.commit() + if set_as_active_tenant: + user.tenant_id = tenant.id + await session.merge(user) + await session.commit() try: # Add association directly to the association table diff --git a/cognee/modules/users/tenants/methods/select_tenant.py b/cognee/modules/users/tenants/methods/select_tenant.py index 732b24858..6e72fea2f 100644 --- a/cognee/modules/users/tenants/methods/select_tenant.py +++ b/cognee/modules/users/tenants/methods/select_tenant.py @@ -52,7 +52,7 @@ async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]): try: result = result.scalar_one() except sqlalchemy.exc.NoResultFound as e: - raise TenantNotFoundError("User Tenant relationship not found.") from e + raise TenantNotFoundError("User is not part of the tenant.") from e if result: # If user is part of tenant update current tenant of user From b0f85c9e990f8dd20e6fce8dcd6f29c4050050e8 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 13:01:10 +0100 Subject: [PATCH 057/284] feat: add legacy and modern data_id calculating --- cognee/modules/data/methods/__init__.py | 1 + .../data/methods/get_unique_data_id.py | 71 +++++++++++++++++++ cognee/modules/ingestion/identify.py | 10 +-- 3 files changed, 78 insertions(+), 4 deletions(-) create mode 100644 cognee/modules/data/methods/get_unique_data_id.py diff --git a/cognee/modules/data/methods/__init__.py b/cognee/modules/data/methods/__init__.py index 83913085c..7936a9afd 100644 --- a/cognee/modules/data/methods/__init__.py +++ b/cognee/modules/data/methods/__init__.py @@ -10,6 +10,7 @@ from .get_authorized_dataset import get_authorized_dataset from .get_authorized_dataset_by_name import get_authorized_dataset_by_name from .get_data import get_data from .get_unique_dataset_id import get_unique_dataset_id +from .get_unique_data_id import get_unique_data_id from .get_authorized_existing_datasets import get_authorized_existing_datasets from .get_dataset_ids import get_dataset_ids diff --git a/cognee/modules/data/methods/get_unique_data_id.py b/cognee/modules/data/methods/get_unique_data_id.py new file mode 100644 index 000000000..3fc184ce4 --- /dev/null +++ b/cognee/modules/data/methods/get_unique_data_id.py @@ -0,0 +1,71 @@ +from uuid import uuid5, NAMESPACE_OID, UUID +from typing import Optional +from sqlalchemy import select + +from cognee.modules.data.models.Data import Data +from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.users.models import User + + +async def get_unique_data_id(data_identifier: str, user: User, tenant_id: Optional[UUID]) -> UUID: + """ + Function returns a unique UUID for data based on data identifier, user id and tenant id. + If data with legacy ID exists, return that ID to maintain compatibility. + + Args: + data_identifier: A way to uniquely identify data (e.g. file hash, data name, etc.) + user: User object adding the data + tenant_id: UUID of the tenant for which data is being added + + Returns: + UUID: Unique identifier for the data + """ + + def _get_deprecated_unique_data_id(data_identifier: str, user: User) -> UUID: + """ + Deprecated function, returns a unique UUID for data based on data identifier and user id. + Needed to support legacy data without tenant information. + Args: + data_identifier: A way to uniquely identify data (e.g. file hash, data name, etc.) + user: User object adding the data + + Returns: + UUID: Unique identifier for the data + """ + # return UUID hash of file contents + owner id + tenant_id + return uuid5(NAMESPACE_OID, f"{data_identifier}{str(user.id)}") + + def _get_modern_unique_data_id(data_identifier: str, user: User, tenant_id: UUID) -> UUID: + """ + Function returns a unique UUID for data based on data identifier, user id and tenant id. + Args: + data_identifier: A way to uniquely identify data (e.g. file hash, data name, etc.) + user: User object adding the data + tenant_id: UUID of the tenant for which data is being added + + Returns: + UUID: Unique identifier for the data + """ + # return UUID hash of file contents + owner id + tenant_id + return uuid5(NAMESPACE_OID, f"{data_identifier}{str(user.id)}{str(tenant_id)}") + + # Get all possible data_id values + data_id = { + "modern_data_id": _get_modern_unique_data_id( + data_identifier=data_identifier, user=user, tenant_id=tenant_id + ), + "legacy_data_id": _get_deprecated_unique_data_id( + data_identifier=data_identifier, user=user + ), + } + + # Check if data item with legacy_data_id exists, if so use that one, else use modern_data_id + db_engine = get_relational_engine() + async with db_engine.get_async_session() as session: + legacy_data_point = ( + await session.execute(select(Data).filter(Data.id == data_id["legacy_data_id"])) + ).scalar_one_or_none() + + if not legacy_data_point: + return data_id["modern_data_id"] + return data_id["legacy_data_id"] diff --git a/cognee/modules/ingestion/identify.py b/cognee/modules/ingestion/identify.py index 977ff3f0b..5a0fe379e 100644 --- a/cognee/modules/ingestion/identify.py +++ b/cognee/modules/ingestion/identify.py @@ -1,11 +1,13 @@ -from uuid import uuid5, NAMESPACE_OID +from uuid import UUID from .data_types import IngestionData from cognee.modules.users.models import User +from cognee.modules.data.methods import get_unique_data_id -def identify(data: IngestionData, user: User) -> str: +async def identify(data: IngestionData, user: User) -> UUID: data_content_hash: str = data.get_identifier() - # return UUID hash of file contents + owner id - return uuid5(NAMESPACE_OID, f"{data_content_hash}{user.id}") + return await get_unique_data_id( + data_identifier=data_content_hash, user=user, tenant_id=user.tenant_id + ) From ff388179fb38cd82a59b3a45ea3f343b16c56c86 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 13:11:57 +0100 Subject: [PATCH 058/284] feat: Add dataset_id calculation that handles legacy dataset_id --- .../data/methods/get_unique_data_id.py | 11 ++- .../data/methods/get_unique_dataset_id.py | 70 +++++++++++++++++-- cognee/modules/ingestion/identify.py | 4 +- 3 files changed, 71 insertions(+), 14 deletions(-) diff --git a/cognee/modules/data/methods/get_unique_data_id.py b/cognee/modules/data/methods/get_unique_data_id.py index 3fc184ce4..877b5930c 100644 --- a/cognee/modules/data/methods/get_unique_data_id.py +++ b/cognee/modules/data/methods/get_unique_data_id.py @@ -1,5 +1,4 @@ from uuid import uuid5, NAMESPACE_OID, UUID -from typing import Optional from sqlalchemy import select from cognee.modules.data.models.Data import Data @@ -7,7 +6,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.users.models import User -async def get_unique_data_id(data_identifier: str, user: User, tenant_id: Optional[UUID]) -> UUID: +async def get_unique_data_id(data_identifier: str, user: User) -> UUID: """ Function returns a unique UUID for data based on data identifier, user id and tenant id. If data with legacy ID exists, return that ID to maintain compatibility. @@ -35,7 +34,7 @@ async def get_unique_data_id(data_identifier: str, user: User, tenant_id: Option # return UUID hash of file contents + owner id + tenant_id return uuid5(NAMESPACE_OID, f"{data_identifier}{str(user.id)}") - def _get_modern_unique_data_id(data_identifier: str, user: User, tenant_id: UUID) -> UUID: + def _get_modern_unique_data_id(data_identifier: str, user: User) -> UUID: """ Function returns a unique UUID for data based on data identifier, user id and tenant id. Args: @@ -47,13 +46,11 @@ async def get_unique_data_id(data_identifier: str, user: User, tenant_id: Option UUID: Unique identifier for the data """ # return UUID hash of file contents + owner id + tenant_id - return uuid5(NAMESPACE_OID, f"{data_identifier}{str(user.id)}{str(tenant_id)}") + return uuid5(NAMESPACE_OID, f"{data_identifier}{str(user.id)}{str(user.tenant_id)}") # Get all possible data_id values data_id = { - "modern_data_id": _get_modern_unique_data_id( - data_identifier=data_identifier, user=user, tenant_id=tenant_id - ), + "modern_data_id": _get_modern_unique_data_id(data_identifier=data_identifier, user=user), "legacy_data_id": _get_deprecated_unique_data_id( data_identifier=data_identifier, user=user ), diff --git a/cognee/modules/data/methods/get_unique_dataset_id.py b/cognee/modules/data/methods/get_unique_dataset_id.py index 2caf5fb55..274f24d1a 100644 --- a/cognee/modules/data/methods/get_unique_dataset_id.py +++ b/cognee/modules/data/methods/get_unique_dataset_id.py @@ -1,9 +1,71 @@ from uuid import UUID, uuid5, NAMESPACE_OID -from cognee.modules.users.models import User from typing import Union +from sqlalchemy import select + +from cognee.modules.data.models.Dataset import Dataset +from cognee.modules.users.models import User +from cognee.infrastructure.databases.relational import get_relational_engine async def get_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> UUID: - if isinstance(dataset_name, UUID): - return dataset_name - return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}") + """ + Function returns a unique UUID for dataset based on dataset name, user id and tenant id. + If dataset with legacy ID exists, return that ID to maintain compatibility. + + Args: + dataset_name: string representing the dataset name + user: User object adding the dataset + tenant_id: UUID of the tenant for which dataset is being added + + Returns: + UUID: Unique identifier for the dataset + """ + + def _get_legacy_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> UUID: + """ + Legacy function, returns a unique UUID for dataset based on dataset name and user id. + Needed to support legacy datasets without tenant information. + Args: + dataset_name: string representing the dataset name + user: Current User object adding the dataset + + Returns: + UUID: Unique identifier for the dataset + """ + if isinstance(dataset_name, UUID): + return dataset_name + return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}") + + def _get_modern_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> UUID: + """ + Returns a unique UUID for dataset based on dataset name, user id and tenant_id. + Args: + dataset_name: string representing the dataset name + user: Current User object adding the dataset + tenant_id: UUID of the tenant for which dataset is being added + + Returns: + UUID: Unique identifier for the dataset + """ + if isinstance(dataset_name, UUID): + return dataset_name + return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}{str(user.tenant_id)}") + + # Get all possible dataset_id values + dataset_id = { + "modern_dataset_id": _get_modern_unique_dataset_id(dataset_name=dataset_name, user=user), + "legacy_dataset_id": _get_legacy_unique_dataset_id(dataset_name=dataset_name, user=user), + } + + # Check if dataset with legacy_dataset_id exists, if so use that one, else use modern_dataset_id + db_engine = get_relational_engine() + async with db_engine.get_async_session() as session: + legacy_dataset = ( + await session.execute( + select(Dataset).filter(Dataset.id == dataset_id["legacy_data_id"]) + ) + ).scalar_one_or_none() + + if not legacy_dataset: + return dataset_id["modern_dataset_id"] + return dataset_id["legacy_dataset_id"] diff --git a/cognee/modules/ingestion/identify.py b/cognee/modules/ingestion/identify.py index 5a0fe379e..640fce4a2 100644 --- a/cognee/modules/ingestion/identify.py +++ b/cognee/modules/ingestion/identify.py @@ -8,6 +8,4 @@ from cognee.modules.data.methods import get_unique_data_id async def identify(data: IngestionData, user: User) -> UUID: data_content_hash: str = data.get_identifier() - return await get_unique_data_id( - data_identifier=data_content_hash, user=user, tenant_id=user.tenant_id - ) + return await get_unique_data_id(data_identifier=data_content_hash, user=user) From ac257dca1db4123cf97abacf32e1ecd85dab9afd Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 13:13:42 +0100 Subject: [PATCH 059/284] refactor: Account for async change for identify function --- cognee/modules/pipelines/operations/run_tasks_data_item.py | 2 +- cognee/tasks/ingestion/ingest_data.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/modules/pipelines/operations/run_tasks_data_item.py b/cognee/modules/pipelines/operations/run_tasks_data_item.py index 152e72d7f..2cc449df6 100644 --- a/cognee/modules/pipelines/operations/run_tasks_data_item.py +++ b/cognee/modules/pipelines/operations/run_tasks_data_item.py @@ -69,7 +69,7 @@ async def run_tasks_data_item_incremental( async with open_data_file(file_path) as file: classified_data = ingestion.classify(file) # data_id is the hash of file contents + owner id to avoid duplicate data - data_id = ingestion.identify(classified_data, user) + data_id = await ingestion.identify(classified_data, user) else: # If data was already processed by Cognee get data id data_id = data_item.id diff --git a/cognee/tasks/ingestion/ingest_data.py b/cognee/tasks/ingestion/ingest_data.py index 0572d0f1e..5987f38d5 100644 --- a/cognee/tasks/ingestion/ingest_data.py +++ b/cognee/tasks/ingestion/ingest_data.py @@ -99,7 +99,7 @@ async def ingest_data( # data_id is the hash of original file contents + owner id to avoid duplicate data - data_id = ingestion.identify(classified_data, user) + data_id = await ingestion.identify(classified_data, user) original_file_metadata = classified_data.get_metadata() # Find metadata from Cognee data storage text file From ea675f29d65dcf354d8999106ff3b8db3a8149f2 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 13:15:49 +0100 Subject: [PATCH 060/284] fix: Resolve typo in accessing dictionary for dataset_id --- cognee/modules/data/methods/get_unique_dataset_id.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/modules/data/methods/get_unique_dataset_id.py b/cognee/modules/data/methods/get_unique_dataset_id.py index 274f24d1a..2b765ec78 100644 --- a/cognee/modules/data/methods/get_unique_dataset_id.py +++ b/cognee/modules/data/methods/get_unique_dataset_id.py @@ -62,7 +62,7 @@ async def get_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> U async with db_engine.get_async_session() as session: legacy_dataset = ( await session.execute( - select(Dataset).filter(Dataset.id == dataset_id["legacy_data_id"]) + select(Dataset).filter(Dataset.id == dataset_id["legacy_dataset_id"]) ) ).scalar_one_or_none() From 9d771acc2427592f40caf0e9727c8e8151c5af64 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 13:35:50 +0100 Subject: [PATCH 061/284] refactor: filter out search results --- .../methods/get_all_user_permission_datasets.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py index e5dbb0e4b..a8cb96fbb 100644 --- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py @@ -26,13 +26,16 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> tenants = await user.awaitable_attrs.tenants for tenant in tenants: - # Get all datasets all tenant members have access to - datasets.extend(await get_principal_datasets(tenant, permission_type)) + # If tenant is the user's selected tenant add datasets that users roles in the tenant and the tenant itself + # have access for + if tenant.id == user.tenant_id: + # Get all datasets all tenant members have access to + datasets.extend(await get_principal_datasets(tenant, permission_type)) - # Get all datasets accessible by roles user is a part of - roles = await user.awaitable_attrs.roles - for role in roles: - datasets.extend(await get_principal_datasets(role, permission_type)) + # Get all datasets accessible by roles user is a part of + roles = await user.awaitable_attrs.roles + for role in roles: + datasets.extend(await get_principal_datasets(role, permission_type)) # Deduplicate datasets with same ID unique = {} From 7e3c24100b0606ecf7d9b71ab76bba9be9c64e68 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Tue, 4 Nov 2025 15:09:33 +0100 Subject: [PATCH 062/284] refactor: add structured output to completion retrievers --- .../retrieval/EntityCompletionRetriever.py | 11 +++- .../modules/retrieval/completion_retriever.py | 13 +++- ..._completion_context_extension_retriever.py | 4 ++ .../graph_completion_cot_retriever.py | 65 ++++--------------- .../retrieval/graph_completion_retriever.py | 3 + .../modules/retrieval/temporal_retriever.py | 9 ++- cognee/modules/retrieval/utils/completion.py | 26 +------- .../feedback/generate_improved_answers.py | 6 +- .../graph_completion_retriever_cot_test.py | 20 ++++-- 9 files changed, 67 insertions(+), 90 deletions(-) diff --git a/cognee/modules/retrieval/EntityCompletionRetriever.py b/cognee/modules/retrieval/EntityCompletionRetriever.py index 6086977ce..1f1ddad0a 100644 --- a/cognee/modules/retrieval/EntityCompletionRetriever.py +++ b/cognee/modules/retrieval/EntityCompletionRetriever.py @@ -1,5 +1,5 @@ import asyncio -from typing import Any, Optional, List +from typing import Any, Optional, List, Type from cognee.shared.logging_utils import get_logger from cognee.infrastructure.entities.BaseEntityExtractor import BaseEntityExtractor @@ -85,7 +85,11 @@ class EntityCompletionRetriever(BaseRetriever): return None async def get_completion( - self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None + self, + query: str, + context: Optional[Any] = None, + session_id: Optional[str] = None, + response_model: Type = str, ) -> List[str]: """ Generate completion using provided context or fetch new context. @@ -102,6 +106,7 @@ class EntityCompletionRetriever(BaseRetriever): fetched if not provided. (default None) - session_id (Optional[str]): Optional session identifier for caching. If None, defaults to 'default_session'. (default None) + - response_model (Type): The Pydantic model type for structured output. (default str) Returns: -------- @@ -133,6 +138,7 @@ class EntityCompletionRetriever(BaseRetriever): user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, conversation_history=conversation_history, + response_model=response_model, ), ) else: @@ -141,6 +147,7 @@ class EntityCompletionRetriever(BaseRetriever): context=context, user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, + response_model=response_model, ) if session_save: diff --git a/cognee/modules/retrieval/completion_retriever.py b/cognee/modules/retrieval/completion_retriever.py index bb568924d..f071e41de 100644 --- a/cognee/modules/retrieval/completion_retriever.py +++ b/cognee/modules/retrieval/completion_retriever.py @@ -1,5 +1,5 @@ import asyncio -from typing import Any, Optional +from typing import Any, Optional, Type from cognee.shared.logging_utils import get_logger from cognee.infrastructure.databases.vector import get_vector_engine @@ -75,7 +75,11 @@ class CompletionRetriever(BaseRetriever): raise NoDataError("No data found in the system, please add data first.") from error async def get_completion( - self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None + self, + query: str, + context: Optional[Any] = None, + session_id: Optional[str] = None, + response_model: Type = str, ) -> str: """ Generates an LLM completion using the context. @@ -91,6 +95,7 @@ class CompletionRetriever(BaseRetriever): completion; if None, it retrieves the context for the query. (default None) - session_id (Optional[str]): Optional session identifier for caching. If None, defaults to 'default_session'. (default None) + - response_model (Type): The Pydantic model type for structured output. (default str) Returns: -------- @@ -118,6 +123,7 @@ class CompletionRetriever(BaseRetriever): system_prompt_path=self.system_prompt_path, system_prompt=self.system_prompt, conversation_history=conversation_history, + response_model=response_model, ), ) else: @@ -127,6 +133,7 @@ class CompletionRetriever(BaseRetriever): user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, system_prompt=self.system_prompt, + response_model=response_model, ) if session_save: @@ -137,4 +144,4 @@ class CompletionRetriever(BaseRetriever): session_id=session_id, ) - return completion + return [completion] diff --git a/cognee/modules/retrieval/graph_completion_context_extension_retriever.py b/cognee/modules/retrieval/graph_completion_context_extension_retriever.py index 58b6b586f..6b2c6a9e6 100644 --- a/cognee/modules/retrieval/graph_completion_context_extension_retriever.py +++ b/cognee/modules/retrieval/graph_completion_context_extension_retriever.py @@ -56,6 +56,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever): context: Optional[List[Edge]] = None, session_id: Optional[str] = None, context_extension_rounds=4, + response_model: Type = str, ) -> List[str]: """ Extends the context for a given query by retrieving related triplets and generating new @@ -76,6 +77,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever): defaults to 'default_session'. (default None) - context_extension_rounds: The maximum number of rounds to extend the context with new triplets before halting. (default 4) + - response_model (Type): The Pydantic model type for structured output. (default str) Returns: -------- @@ -143,6 +145,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever): system_prompt_path=self.system_prompt_path, system_prompt=self.system_prompt, conversation_history=conversation_history, + response_model=response_model, ), ) else: @@ -152,6 +155,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever): user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, system_prompt=self.system_prompt, + response_model=response_model, ) if self.save_interaction and context_text and triplets and completion: diff --git a/cognee/modules/retrieval/graph_completion_cot_retriever.py b/cognee/modules/retrieval/graph_completion_cot_retriever.py index 299db6855..39255fe68 100644 --- a/cognee/modules/retrieval/graph_completion_cot_retriever.py +++ b/cognee/modules/retrieval/graph_completion_cot_retriever.py @@ -7,7 +7,7 @@ from cognee.shared.logging_utils import get_logger from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever from cognee.modules.retrieval.utils.completion import ( - generate_structured_completion, + generate_completion, summarize_text, ) from cognee.modules.retrieval.utils.session_cache import ( @@ -44,7 +44,6 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): questions based on reasoning. The public methods are: - get_completion - - get_structured_completion Instance variables include: - validation_system_prompt_path @@ -121,7 +120,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): triplets += await self.get_context(followup_question) context_text = await self.resolve_edges_to_text(list(set(triplets))) - completion = await generate_structured_completion( + completion = await generate_completion( query=query, context=context_text, user_prompt_path=self.user_prompt_path, @@ -165,24 +164,28 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): return completion, context_text, triplets - async def get_structured_completion( + async def get_completion( self, query: str, context: Optional[List[Edge]] = None, session_id: Optional[str] = None, - max_iter: int = 4, + max_iter=4, response_model: Type = str, - ) -> Any: + ) -> List[str]: """ - Generate structured completion responses based on a user query and contextual information. + Generate completion responses based on a user query and contextual information. - This method applies the same chain-of-thought logic as get_completion but returns + This method interacts with a language model client to retrieve a structured response, + using a series of iterations to refine the answers and generate follow-up questions + based on reasoning derived from previous outputs. It raises exceptions if the context + retrieval fails or if the model encounters issues in generating outputs. It returns structured output using the provided response model. Parameters: ----------- + - query (str): The user's query to be processed and answered. - - context (Optional[List[Edge]]): Optional context that may assist in answering the query. + - context (Optional[Any]): Optional context that may assist in answering the query. If not provided, it will be fetched based on the query. (default None) - session_id (Optional[str]): Optional session identifier for caching. If None, defaults to 'default_session'. (default None) @@ -192,7 +195,8 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): Returns: -------- - - Any: The generated structured completion based on the response model. + + - List[str]: A list containing the generated answer to the user's query. """ # Check if session saving is enabled cache_config = CacheConfig() @@ -228,45 +232,4 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): session_id=session_id, ) - return completion - - async def get_completion( - self, - query: str, - context: Optional[List[Edge]] = None, - session_id: Optional[str] = None, - max_iter=4, - ) -> List[str]: - """ - Generate completion responses based on a user query and contextual information. - - This method interacts with a language model client to retrieve a structured response, - using a series of iterations to refine the answers and generate follow-up questions - based on reasoning derived from previous outputs. It raises exceptions if the context - retrieval fails or if the model encounters issues in generating outputs. - - Parameters: - ----------- - - - query (str): The user's query to be processed and answered. - - context (Optional[Any]): Optional context that may assist in answering the query. - If not provided, it will be fetched based on the query. (default None) - - session_id (Optional[str]): Optional session identifier for caching. If None, - defaults to 'default_session'. (default None) - - max_iter: The maximum number of iterations to refine the answer and generate - follow-up questions. (default 4) - - Returns: - -------- - - - List[str]: A list containing the generated answer to the user's query. - """ - completion = await self.get_structured_completion( - query=query, - context=context, - session_id=session_id, - max_iter=max_iter, - response_model=str, - ) - return [completion] diff --git a/cognee/modules/retrieval/graph_completion_retriever.py b/cognee/modules/retrieval/graph_completion_retriever.py index b7ab4edae..b544e8ead 100644 --- a/cognee/modules/retrieval/graph_completion_retriever.py +++ b/cognee/modules/retrieval/graph_completion_retriever.py @@ -146,6 +146,7 @@ class GraphCompletionRetriever(BaseGraphRetriever): query: str, context: Optional[List[Edge]] = None, session_id: Optional[str] = None, + response_model: Type = str, ) -> List[str]: """ Generates a completion using graph connections context based on a query. @@ -188,6 +189,7 @@ class GraphCompletionRetriever(BaseGraphRetriever): system_prompt_path=self.system_prompt_path, system_prompt=self.system_prompt, conversation_history=conversation_history, + response_model=response_model, ), ) else: @@ -197,6 +199,7 @@ class GraphCompletionRetriever(BaseGraphRetriever): user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, system_prompt=self.system_prompt, + response_model=response_model, ) if self.save_interaction and context and triplets and completion: diff --git a/cognee/modules/retrieval/temporal_retriever.py b/cognee/modules/retrieval/temporal_retriever.py index ec68d37bb..38d69ec80 100644 --- a/cognee/modules/retrieval/temporal_retriever.py +++ b/cognee/modules/retrieval/temporal_retriever.py @@ -146,7 +146,11 @@ class TemporalRetriever(GraphCompletionRetriever): return self.descriptions_to_string(top_k_events) async def get_completion( - self, query: str, context: Optional[str] = None, session_id: Optional[str] = None + self, + query: str, + context: Optional[str] = None, + session_id: Optional[str] = None, + response_model: Type = str, ) -> List[str]: """ Generates a response using the query and optional context. @@ -159,6 +163,7 @@ class TemporalRetriever(GraphCompletionRetriever): retrieved based on the query. (default None) - session_id (Optional[str]): Optional session identifier for caching. If None, defaults to 'default_session'. (default None) + - response_model (Type): The Pydantic model type for structured output. (default str) Returns: -------- @@ -186,6 +191,7 @@ class TemporalRetriever(GraphCompletionRetriever): user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, conversation_history=conversation_history, + response_model=response_model, ), ) else: @@ -194,6 +200,7 @@ class TemporalRetriever(GraphCompletionRetriever): context=context, user_prompt_path=self.user_prompt_path, system_prompt_path=self.system_prompt_path, + response_model=response_model, ) if session_save: diff --git a/cognee/modules/retrieval/utils/completion.py b/cognee/modules/retrieval/utils/completion.py index db7a10252..b77d7ef90 100644 --- a/cognee/modules/retrieval/utils/completion.py +++ b/cognee/modules/retrieval/utils/completion.py @@ -3,7 +3,7 @@ from cognee.infrastructure.llm.LLMGateway import LLMGateway from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt -async def generate_structured_completion( +async def generate_completion( query: str, context: str, user_prompt_path: str, @@ -11,8 +11,8 @@ async def generate_structured_completion( system_prompt: Optional[str] = None, conversation_history: Optional[str] = None, response_model: Type = str, -) -> Any: - """Generates a structured completion using LLM with given context and prompts.""" +) -> str: + """Generates a completion using LLM with given context and prompts.""" args = {"question": query, "context": context} user_prompt = render_prompt(user_prompt_path, args) system_prompt = system_prompt if system_prompt else read_query_prompt(system_prompt_path) @@ -28,26 +28,6 @@ async def generate_structured_completion( ) -async def generate_completion( - query: str, - context: str, - user_prompt_path: str, - system_prompt_path: str, - system_prompt: Optional[str] = None, - conversation_history: Optional[str] = None, -) -> str: - """Generates a completion using LLM with given context and prompts.""" - return await generate_structured_completion( - query=query, - context=context, - user_prompt_path=user_prompt_path, - system_prompt_path=system_prompt_path, - system_prompt=system_prompt, - conversation_history=conversation_history, - response_model=str, - ) - - async def summarize_text( text: str, system_prompt_path: str = "summarize_search_results.txt", diff --git a/cognee/tasks/feedback/generate_improved_answers.py b/cognee/tasks/feedback/generate_improved_answers.py index e439cf9e5..d2b143d29 100644 --- a/cognee/tasks/feedback/generate_improved_answers.py +++ b/cognee/tasks/feedback/generate_improved_answers.py @@ -61,7 +61,7 @@ async def _generate_improved_answer_for_single_interaction( ) retrieved_context = await retriever.get_context(query_text) - completion = await retriever.get_structured_completion( + completion = await retriever.get_completion( query=query_text, context=retrieved_context, response_model=ImprovedAnswerResponse, @@ -70,9 +70,9 @@ async def _generate_improved_answer_for_single_interaction( new_context_text = await retriever.resolve_edges_to_text(retrieved_context) if completion: - enrichment.improved_answer = completion.answer + enrichment.improved_answer = completion[0].answer enrichment.new_context = new_context_text - enrichment.explanation = completion.explanation + enrichment.explanation = completion[0].explanation return enrichment else: logger.warning( diff --git a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py index 7fcfe0d6b..bf10dc023 100644 --- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py @@ -206,16 +206,22 @@ class TestGraphCompletionCoTRetriever: retriever = GraphCompletionCotRetriever() # Test with string response model (default) - string_answer = await retriever.get_structured_completion("Who works at Figma?") - assert isinstance(string_answer, str), f"Expected str, got {type(string_answer).__name__}" - assert string_answer.strip(), "Answer should not be empty" + string_answer = await retriever.get_completion("Who works at Figma?") + assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}" + assert all(isinstance(item, str) and item.strip() for item in string_answer), ( + "Answer should not be empty" + ) # Test with structured response model - structured_answer = await retriever.get_structured_completion( + structured_answer = await retriever.get_completion( "Who works at Figma?", response_model=TestAnswer ) - assert isinstance(structured_answer, TestAnswer), ( + assert isinstance(structured_answer, list), ( + f"Expected list, got {type(structured_answer).__name__}" + ) + assert all(isinstance(item, TestAnswer) for item in string_answer), ( f"Expected TestAnswer, got {type(structured_answer).__name__}" ) - assert structured_answer.answer.strip(), "Answer field should not be empty" - assert structured_answer.explanation.strip(), "Explanation field should not be empty" + + assert structured_answer[0].answer.strip(), "Answer field should not be empty" + assert structured_answer[0].explanation.strip(), "Explanation field should not be empty" From 33b05163811c7f7f74db39abfea711103b823132 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Tue, 4 Nov 2025 15:27:03 +0100 Subject: [PATCH 063/284] test: fix completion tests --- ...letion_retriever_context_extension_test.py | 59 +++++++++++++++++++ .../graph_completion_retriever_cot_test.py | 2 +- 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py index 0e21fe351..5335a3ca7 100644 --- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py @@ -2,6 +2,7 @@ import os import pytest import pathlib from typing import Optional, Union +from pydantic import BaseModel import cognee from cognee.low_level import setup, DataPoint @@ -12,6 +13,11 @@ from cognee.modules.retrieval.graph_completion_context_extension_retriever impor ) +class TestAnswer(BaseModel): + answer: str + explanation: str + + class TestGraphCompletionWithContextExtensionRetriever: @pytest.mark.asyncio async def test_graph_completion_extension_context_simple(self): @@ -175,3 +181,56 @@ class TestGraphCompletionWithContextExtensionRetriever: assert all(isinstance(item, str) and item.strip() for item in answer), ( "Answer must contain only non-empty strings" ) + + @pytest.mark.asyncio + async def test_get_structured_completion_extension_context(self): + system_directory_path = os.path.join( + pathlib.Path(__file__).parent, + ".cognee_system/test_get_structured_completion_extension_context", + ) + cognee.config.system_root_directory(system_directory_path) + data_directory_path = os.path.join( + pathlib.Path(__file__).parent, + ".data_storage/test_get_structured_completion_extension_context", + ) + cognee.config.data_root_directory(data_directory_path) + + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + await setup() + + class Company(DataPoint): + name: str + + class Person(DataPoint): + name: str + works_for: Company + + company1 = Company(name="Figma") + person1 = Person(name="Steve Rodger", works_for=company1) + + entities = [company1, person1] + await add_data_points(entities) + + retriever = GraphCompletionContextExtensionRetriever() + + # Test with string response model (default) + string_answer = await retriever.get_completion("Who works at Figma?") + assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}" + assert all(isinstance(item, str) and item.strip() for item in string_answer), ( + "Answer should not be empty" + ) + + # Test with structured response model + structured_answer = await retriever.get_completion( + "Who works at Figma?", response_model=TestAnswer + ) + assert isinstance(structured_answer, list), ( + f"Expected list, got {type(structured_answer).__name__}" + ) + assert all(isinstance(item, TestAnswer) for item in structured_answer), ( + f"Expected TestAnswer, got {type(structured_answer).__name__}" + ) + + assert structured_answer[0].answer.strip(), "Answer field should not be empty" + assert structured_answer[0].explanation.strip(), "Explanation field should not be empty" diff --git a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py index bf10dc023..731e9fccf 100644 --- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py @@ -219,7 +219,7 @@ class TestGraphCompletionCoTRetriever: assert isinstance(structured_answer, list), ( f"Expected list, got {type(structured_answer).__name__}" ) - assert all(isinstance(item, TestAnswer) for item in string_answer), ( + assert all(isinstance(item, TestAnswer) for item in structured_answer), ( f"Expected TestAnswer, got {type(structured_answer).__name__}" ) From f4117c42e9c1bd0630a333bb789faba8686ba5b0 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 16:43:41 +0100 Subject: [PATCH 064/284] fix: Resolve issue with entity extraction test --- cognee/tests/tasks/entity_extraction/entity_extraction_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/tests/tasks/entity_extraction/entity_extraction_test.py b/cognee/tests/tasks/entity_extraction/entity_extraction_test.py index 39e883e09..41a9254ca 100644 --- a/cognee/tests/tasks/entity_extraction/entity_extraction_test.py +++ b/cognee/tests/tasks/entity_extraction/entity_extraction_test.py @@ -55,7 +55,7 @@ async def main(): classified_data = ingestion.classify(file) # data_id is the hash of original file contents + owner id to avoid duplicate data - data_id = ingestion.identify(classified_data, await get_default_user()) + data_id = await ingestion.identify(classified_data, await get_default_user()) await cognee.add(file_path) From cd32b492a469c9bfac14d4b3f20ed99a727a9460 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 17:56:01 +0100 Subject: [PATCH 065/284] refactor: Add filtering of non current tenant results when authorizing dataset --- .../get_all_user_permission_datasets.py | 25 ++++++++++--------- .../users/roles/methods/add_user_to_role.py | 4 ++- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py index a8cb96fbb..ee1de3c72 100644 --- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py @@ -24,18 +24,14 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> # Get all tenants user is a part of tenants = await user.awaitable_attrs.tenants - for tenant in tenants: - # If tenant is the user's selected tenant add datasets that users roles in the tenant and the tenant itself - # have access for - if tenant.id == user.tenant_id: - # Get all datasets all tenant members have access to - datasets.extend(await get_principal_datasets(tenant, permission_type)) + # Get all datasets all tenant members have access to + datasets.extend(await get_principal_datasets(tenant, permission_type)) - # Get all datasets accessible by roles user is a part of - roles = await user.awaitable_attrs.roles - for role in roles: - datasets.extend(await get_principal_datasets(role, permission_type)) + # Get all datasets accessible by roles user is a part of + roles = await user.awaitable_attrs.roles + for role in roles: + datasets.extend(await get_principal_datasets(role, permission_type)) # Deduplicate datasets with same ID unique = {} @@ -43,5 +39,10 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> # If the dataset id key already exists, leave the dictionary unchanged. unique.setdefault(dataset.id, dataset) - # TODO: Add filtering out of datasets that aren't currently selected tenant of user (currently selected tenant is the tenant_id value in the User model) - return list(unique.values()) + # Filter out dataset that aren't part of the current user's tenant + filtered_datasets = [] + for dataset in list(unique.values()): + if dataset.tenant_id == user.tenant_id: + filtered_datasets.append(dataset) + + return filtered_datasets diff --git a/cognee/modules/users/roles/methods/add_user_to_role.py b/cognee/modules/users/roles/methods/add_user_to_role.py index de5e47775..d764ac900 100644 --- a/cognee/modules/users/roles/methods/add_user_to_role.py +++ b/cognee/modules/users/roles/methods/add_user_to_role.py @@ -42,11 +42,13 @@ async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID): .first() ) + user_tenants = await user.awaitable_attrs.tenants + if not user: raise UserNotFoundError elif not role: raise RoleNotFoundError - elif user.tenant_id != role.tenant_id: + elif role.tenant_id not in [tenant.id for tenant in user_tenants]: # TESTME raise TenantNotFoundError( message="User tenant does not match role tenant. User cannot be added to role." ) From bee2fe3ba70307c4a5ea03a5cdc1e07e927708b4 Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Tue, 4 Nov 2025 17:58:34 +0100 Subject: [PATCH 066/284] feat: Add initial custom pipeline (#1716) ## Description Add run_custom_pipeline to have a way to execute a custom collection of tasks in Cognee ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- .github/workflows/examples_tests.yml | 25 ++++++ cognee/__init__.py | 1 + .../modules/run_custom_pipeline/__init__.py | 1 + .../run_custom_pipeline.py | 69 +++++++++++++++ .../modules/users/methods/get_default_user.py | 2 +- .../python/run_custom_pipeline_example.py | 84 +++++++++++++++++++ 6 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 cognee/modules/run_custom_pipeline/__init__.py create mode 100644 cognee/modules/run_custom_pipeline/run_custom_pipeline.py create mode 100644 examples/python/run_custom_pipeline_example.py diff --git a/.github/workflows/examples_tests.yml b/.github/workflows/examples_tests.yml index 57bc88157..36953e259 100644 --- a/.github/workflows/examples_tests.yml +++ b/.github/workflows/examples_tests.yml @@ -210,6 +210,31 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} run: uv run python ./examples/python/memify_coding_agent_example.py + test-custom-pipeline: + name: Run Custom Pipeline Example + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Run Custom Pipeline Example + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + run: uv run python ./examples/python/run_custom_pipeline_example.py + test-permissions-example: name: Run Permissions Example runs-on: ubuntu-22.04 diff --git a/cognee/__init__.py b/cognee/__init__.py index 6e4d2a903..4d150ce4e 100644 --- a/cognee/__init__.py +++ b/cognee/__init__.py @@ -19,6 +19,7 @@ from .api.v1.add import add from .api.v1.delete import delete from .api.v1.cognify import cognify from .modules.memify import memify +from .modules.run_custom_pipeline import run_custom_pipeline from .api.v1.update import update from .api.v1.config.config import config from .api.v1.datasets.datasets import datasets diff --git a/cognee/modules/run_custom_pipeline/__init__.py b/cognee/modules/run_custom_pipeline/__init__.py new file mode 100644 index 000000000..2d30e2e0c --- /dev/null +++ b/cognee/modules/run_custom_pipeline/__init__.py @@ -0,0 +1 @@ +from .run_custom_pipeline import run_custom_pipeline diff --git a/cognee/modules/run_custom_pipeline/run_custom_pipeline.py b/cognee/modules/run_custom_pipeline/run_custom_pipeline.py new file mode 100644 index 000000000..d3df1c060 --- /dev/null +++ b/cognee/modules/run_custom_pipeline/run_custom_pipeline.py @@ -0,0 +1,69 @@ +from typing import Union, Optional, List, Type, Any +from uuid import UUID + +from cognee.shared.logging_utils import get_logger + +from cognee.modules.pipelines import run_pipeline +from cognee.modules.pipelines.tasks.task import Task +from cognee.modules.users.models import User +from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor + +logger = get_logger() + + +async def run_custom_pipeline( + tasks: Union[List[Task], List[str]] = None, + data: Any = None, + dataset: Union[str, UUID] = "main_dataset", + user: User = None, + vector_db_config: Optional[dict] = None, + graph_db_config: Optional[dict] = None, + data_per_batch: int = 20, + run_in_background: bool = False, + pipeline_name: str = "custom_pipeline", +): + """ + Custom pipeline in Cognee, can work with already built graphs. Data needs to be provided which can be processed + with provided tasks. + + Provided tasks and data will be arranged to run the Cognee pipeline and execute graph enrichment/creation. + + This is the core processing step in Cognee that converts raw text and documents + into an intelligent knowledge graph. It analyzes content, extracts entities and + relationships, and creates semantic connections for enhanced search and reasoning. + + Args: + tasks: List of Cognee Tasks to execute. + data: The data to ingest. Can be anything when custom extraction and enrichment tasks are used. + Data provided here will be forwarded to the first extraction task in the pipeline as input. + dataset: Dataset name or dataset uuid to process. + user: User context for authentication and data access. Uses default if None. + vector_db_config: Custom vector database configuration for embeddings storage. + graph_db_config: Custom graph database configuration for relationship storage. + data_per_batch: Number of data items to be processed in parallel. + run_in_background: If True, starts processing asynchronously and returns immediately. + If False, waits for completion before returning. + Background mode recommended for large datasets (>100MB). + Use pipeline_run_id from return value to monitor progress. + """ + + custom_tasks = [ + *tasks, + ] + + # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for + pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background) + + # Run the run_pipeline in the background or blocking based on executor + return await pipeline_executor_func( + pipeline=run_pipeline, + tasks=custom_tasks, + user=user, + data=data, + datasets=dataset, + vector_db_config=vector_db_config, + graph_db_config=graph_db_config, + incremental_loading=False, + data_per_batch=data_per_batch, + pipeline_name=pipeline_name, + ) diff --git a/cognee/modules/users/methods/get_default_user.py b/cognee/modules/users/methods/get_default_user.py index 773545f8e..9e3940617 100644 --- a/cognee/modules/users/methods/get_default_user.py +++ b/cognee/modules/users/methods/get_default_user.py @@ -10,7 +10,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.users.methods.create_default_user import create_default_user -async def get_default_user() -> SimpleNamespace: +async def get_default_user() -> User: db_engine = get_relational_engine() base_config = get_base_config() default_email = base_config.default_user_email or "default_user@example.com" diff --git a/examples/python/run_custom_pipeline_example.py b/examples/python/run_custom_pipeline_example.py new file mode 100644 index 000000000..1ca1b4402 --- /dev/null +++ b/examples/python/run_custom_pipeline_example.py @@ -0,0 +1,84 @@ +import asyncio +import cognee +from cognee.modules.engine.operations.setup import setup +from cognee.modules.users.methods import get_default_user +from cognee.shared.logging_utils import setup_logging, INFO +from cognee.modules.pipelines import Task +from cognee.api.v1.search import SearchType + +# Prerequisites: +# 1. Copy `.env.template` and rename it to `.env`. +# 2. Add your OpenAI API key to the `.env` file in the `LLM_API_KEY` field: +# LLM_API_KEY = "your_key_here" + + +async def main(): + # Create a clean slate for cognee -- reset data and system state + print("Resetting cognee data...") + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + print("Data reset complete.\n") + + # Create relational database and tables + await setup() + + # cognee knowledge graph will be created based on this text + text = """ + Natural language processing (NLP) is an interdisciplinary + subfield of computer science and information retrieval. + """ + + print("Adding text to cognee:") + print(text.strip()) + + # Let's recreate the cognee add pipeline through the custom pipeline framework + from cognee.tasks.ingestion import ingest_data, resolve_data_directories + + user = await get_default_user() + + # Values for tasks need to be filled before calling the pipeline + add_tasks = [ + Task(resolve_data_directories, include_subdirectories=True), + Task( + ingest_data, + "main_dataset", + user, + ), + ] + # Forward tasks to custom pipeline along with data and user information + await cognee.run_custom_pipeline( + tasks=add_tasks, data=text, user=user, dataset="main_dataset", pipeline_name="add_pipeline" + ) + print("Text added successfully.\n") + + # Use LLMs and cognee to create knowledge graph + from cognee.api.v1.cognify.cognify import get_default_tasks + + cognify_tasks = await get_default_tasks(user=user) + print("Recreating existing cognify pipeline in custom pipeline to create knowledge graph...\n") + await cognee.run_custom_pipeline( + tasks=cognify_tasks, user=user, dataset="main_dataset", pipeline_name="cognify_pipeline" + ) + print("Cognify process complete.\n") + + query_text = "Tell me about NLP" + print(f"Searching cognee for insights with query: '{query_text}'") + # Query cognee for insights on the added text + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text=query_text + ) + + print("Search results:") + # Display results + for result_text in search_results: + print(result_text) + + +if __name__ == "__main__": + logger = setup_logging(log_level=INFO) + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(main()) + finally: + loop.run_until_complete(loop.shutdown_asyncgens()) From fb102f29a8fbbfa941641208f41a55e1eb370fb5 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 19:03:56 +0100 Subject: [PATCH 067/284] chore: Add alembic migration for multi-tenant system --- .../c946955da633_multi_tenant_support.py | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 alembic/versions/c946955da633_multi_tenant_support.py diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py new file mode 100644 index 000000000..2ad230974 --- /dev/null +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -0,0 +1,113 @@ +"""Multi Tenant Support + +Revision ID: c946955da633 +Revises: 211ab850ef3d +Create Date: 2025-11-04 18:11:09.325158 + +""" + +from typing import Sequence, Union +from datetime import datetime, timezone +from uuid import uuid4 + +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision: str = "c946955da633" +down_revision: Union[str, None] = "211ab850ef3d" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _define_user_table() -> sa.Table: + table = sa.Table( + "users", + sa.MetaData(), + sa.Column( + "id", + sa.UUID, + sa.ForeignKey("principals.id", ondelete="CASCADE"), + primary_key=True, + nullable=False, + ), + sa.Column("tenant_id", sa.UUID, sa.ForeignKey("tenants.id"), index=True, nullable=True), + ) + return table + + +def _define_dataset_table() -> sa.Table: + # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table + # definition or load what is in the database + table = sa.Table( + "datasets", + sa.MetaData(), + sa.Column("id", sa.UUID, primary_key=True, default=uuid4), + sa.Column("name", sa.Text), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + default=lambda: datetime.now(timezone.utc), + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + onupdate=lambda: datetime.now(timezone.utc), + ), + sa.Column("owner_id", sa.UUID(), sa.ForeignKey("principals.id"), index=True), + sa.Column("tenant_id", sa.UUID(), sa.ForeignKey("tenants.id"), index=True, nullable=True), + ) + + return table + + +def _get_column(inspector, table, name, schema=None): + for col in inspector.get_columns(table, schema=schema): + if col["name"] == name: + return col + return None + + +def upgrade() -> None: + conn = op.get_bind() + insp = sa.inspect(conn) + + dataset = _define_dataset_table() + user = _define_user_table() + + tenant_id_column = _get_column(insp, "datasets", "tenant_id") + if not tenant_id_column: + op.add_column("datasets", sa.Column("tenant_id", sa.UUID(), nullable=True)) + + # Build correlated subquery: select users.tenant_id for each dataset.owner_id + tenant_id_from_dataset_owner = ( + sa.select(user.c.tenant_id).where(user.c.id == dataset.c.owner_id).scalar_subquery() + ) + + # Update statement; restrict to rows where tenant_id is currently NULL + # update_stmt = ( + # sa.update(dataset) + # .values(tenant_id=subq) + # ) + + user = _define_user_table() + if op.get_context().dialect.name == "sqlite": + # If column doesn't exist create new original_extension column and update from values of extension column + with op.batch_alter_table("datasets") as batch_op: + batch_op.execute( + dataset.update().values( + tenant_id=tenant_id_from_dataset_owner, + ) + ) + else: + conn = op.get_bind() + conn.execute(dataset.update().values(tenant_id=tenant_id_from_dataset_owner)) + + op.create_index(op.f("ix_datasets_tenant_id"), "datasets", ["tenant_id"], unique=False) + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + + op.drop_column("datasets", "tenant_id") + # ### end Alembic commands ### From db2a32dd171a7db53487bec4c29474d1f36d1aa2 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 19:17:02 +0100 Subject: [PATCH 068/284] test: Resolve issue permission example --- alembic/versions/c946955da633_multi_tenant_support.py | 9 +-------- examples/python/permissions_example.py | 2 +- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index 2ad230974..09781c85c 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -79,18 +79,11 @@ def upgrade() -> None: if not tenant_id_column: op.add_column("datasets", sa.Column("tenant_id", sa.UUID(), nullable=True)) - # Build correlated subquery: select users.tenant_id for each dataset.owner_id + # Build subquery, select users.tenant_id for each dataset.owner_id tenant_id_from_dataset_owner = ( sa.select(user.c.tenant_id).where(user.c.id == dataset.c.owner_id).scalar_subquery() ) - # Update statement; restrict to rows where tenant_id is currently NULL - # update_stmt = ( - # sa.update(dataset) - # .values(tenant_id=subq) - # ) - - user = _define_user_table() if op.get_context().dialect.name == "sqlite": # If column doesn't exist create new original_extension column and update from values of extension column with op.batch_alter_table("datasets") as batch_op: diff --git a/examples/python/permissions_example.py b/examples/python/permissions_example.py index 7c140845c..5d1195a11 100644 --- a/examples/python/permissions_example.py +++ b/examples/python/permissions_example.py @@ -151,7 +151,7 @@ async def main(): # To add a user to a role he must be part of the same tenant/organization print("\nOperation started as user_2 to add user_3 to CogneeLab tenant/organization") await add_user_to_tenant( - user_id=user_3.id, tenant_id=tenant_id, owner_id=user_2.id, set_active_tenant=True + user_id=user_3.id, tenant_id=tenant_id, owner_id=user_2.id, set_as_active_tenant=True ) print( From f002d3bf0ef24e8db113625971bfb98e6473e6b7 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 20:24:16 +0100 Subject: [PATCH 069/284] refactor: Update permissions example --- .../tenants/methods/add_user_to_tenant.py | 2 +- .../users/tenants/methods/select_tenant.py | 6 ++- examples/python/permissions_example.py | 45 ++++++++++++++++--- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/cognee/modules/users/tenants/methods/add_user_to_tenant.py b/cognee/modules/users/tenants/methods/add_user_to_tenant.py index edadfe66b..eecc49f6f 100644 --- a/cognee/modules/users/tenants/methods/add_user_to_tenant.py +++ b/cognee/modules/users/tenants/methods/add_user_to_tenant.py @@ -16,7 +16,7 @@ from cognee.modules.users.exceptions import ( async def add_user_to_tenant( - user_id: UUID, tenant_id: UUID, owner_id: UUID, set_as_active_tenant: Optional[bool] = True + user_id: UUID, tenant_id: UUID, owner_id: UUID, set_as_active_tenant: Optional[bool] = False ): """ Add a user with the given id to the tenant with the given id. diff --git a/cognee/modules/users/tenants/methods/select_tenant.py b/cognee/modules/users/tenants/methods/select_tenant.py index 6e72fea2f..b444e9b1e 100644 --- a/cognee/modules/users/tenants/methods/select_tenant.py +++ b/cognee/modules/users/tenants/methods/select_tenant.py @@ -7,11 +7,12 @@ from sqlalchemy import select from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.users.models.UserTenant import UserTenant from cognee.modules.users.methods import get_user +from cognee.modules.users.models.User import User from cognee.modules.users.permissions.methods import get_tenant from cognee.modules.users.exceptions import UserNotFoundError, TenantNotFoundError -async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]): +async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]) -> User: """ Set the users active tenant to provided tenant. @@ -33,7 +34,7 @@ async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]): user.tenant_id = None await session.merge(user) await session.commit() - return + return user tenant = await get_tenant(tenant_id) @@ -59,3 +60,4 @@ async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]): user.tenant_id = tenant_id await session.merge(user) await session.commit() + return user diff --git a/examples/python/permissions_example.py b/examples/python/permissions_example.py index 5d1195a11..fdbde00f0 100644 --- a/examples/python/permissions_example.py +++ b/examples/python/permissions_example.py @@ -3,6 +3,7 @@ import cognee import pathlib from cognee.modules.users.exceptions import PermissionDeniedError +from cognee.modules.users.tenants.methods import select_tenant from cognee.shared.logging_utils import get_logger from cognee.modules.search.types import SearchType from cognee.modules.users.methods import create_user @@ -116,6 +117,7 @@ async def main(): print( "\nOperation started as user_2 to give read permission to user_1 for the dataset owned by user_2" ) + await authorized_give_permission_on_datasets( user_1.id, [quantum_dataset_id], @@ -142,6 +144,9 @@ async def main(): print("User 2 is creating CogneeLab tenant/organization") tenant_id = await create_tenant("CogneeLab", user_2.id) + print("User 2 is selecting CogneeLab tenant/organization as active tenant") + await select_tenant(user_id=user_2.id, tenant_id=tenant_id) + print("\nUser 2 is creating Researcher role") role_id = await create_role(role_name="Researcher", owner_id=user_2.id) @@ -150,27 +155,55 @@ async def main(): # To add a user to a role he must be part of the same tenant/organization print("\nOperation started as user_2 to add user_3 to CogneeLab tenant/organization") - await add_user_to_tenant( - user_id=user_3.id, tenant_id=tenant_id, owner_id=user_2.id, set_as_active_tenant=True - ) + await add_user_to_tenant(user_id=user_3.id, tenant_id=tenant_id, owner_id=user_2.id) print( "\nOperation started by user_2, as tenant owner, to add user_3 to Researcher role inside the tenant/organization" ) await add_user_to_role(user_id=user_3.id, role_id=role_id, owner_id=user_2.id) + print("\nOperation as user_3 to select CogneeLab tenant/organization as active tenant") + await select_tenant(user_id=user_3.id, tenant_id=tenant_id) + print( - "\nOperation started as user_2 to give read permission to Researcher role for the dataset owned by user_2" + "\nOperation started as user_2, with CogneeLab as its active tenant, to give read permission to Researcher role for the dataset QUANTUM owned by user_2" + ) + # Even though the dataset owner is user_2, the dataset doesn't belong to the tenant/organization CogneeLab. + # So we can't assign permissions to it when we're acting in the CogneeLab tenant. + try: + await authorized_give_permission_on_datasets( + role_id, + [quantum_dataset_id], + "read", + user_2.id, + ) + except PermissionDeniedError: + print( + "User 2 could not give permission to the role as the QUANTUM dataset is not part of the CogneeLab tenant" + ) + + print( + "We will now create a new QUANTUM dataset in the CogneeLab tenant so that permissions can be assigned to the Researcher role inside the tenant/organization" + ) + # Re-create the QUANTUM dataset in the CogneeLab tenant. The old QUANTUM dataset is still owned by user_2 personally + # and can still be accessed by selecting the personal tenant for user 2. + await cognee.add([text], dataset_name="QUANTUM", user=user_2) + quantum_cognify_result = await cognee.cognify(["QUANTUM"], user=user_2) + + # The recreated Quantum dataset will now have a different dataset_id as it's a new dataset in a different organization + quantum_dataset_id_cognee_lab_tenant = extract_dataset_id_from_cognify(quantum_cognify_result) + print( + "\nOperation started as user_2, with CogneeLab as its active tenant, to give read permission to Researcher role for the dataset QUANTUM owned by the CogneeLab tenant" ) await authorized_give_permission_on_datasets( role_id, - [quantum_dataset_id], + [quantum_dataset_id_cognee_lab_tenant], "read", user_2.id, ) # Now user_3 can read from QUANTUM dataset as part of the Researcher role after proper permissions have been assigned by the QUANTUM dataset owner, user_2. - print("\nSearch result as user_3 on the dataset owned by user_2:") + print("\nSearch result as user_3 on the QUANTUM dataset owned by the CogneeLab organization:") search_results = await cognee.search( query_type=SearchType.GRAPH_COMPLETION, query_text="What is in the document?", From 7782f246d30f159e23c4ae46afa7896936a8a677 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 20:54:00 +0100 Subject: [PATCH 070/284] refactor: Update permissions example to work with new changes --- .../routers/get_permissions_router.py | 2 +- .../users/tenants/methods/select_tenant.py | 9 +++----- examples/python/permissions_example.py | 22 ++++++++++--------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index 20d35e748..db2c72705 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -259,7 +259,7 @@ def get_permissions_router() -> APIRouter: from cognee.modules.users.tenants.methods import select_tenant as select_tenant_method - await select_tenant_method(user_id=user.id, tenant_id=payload.tenant_id) + await select_tenant_method(user=user, tenant_id=payload.tenant_id) return JSONResponse( status_code=200, diff --git a/cognee/modules/users/tenants/methods/select_tenant.py b/cognee/modules/users/tenants/methods/select_tenant.py index b444e9b1e..cb291d5f2 100644 --- a/cognee/modules/users/tenants/methods/select_tenant.py +++ b/cognee/modules/users/tenants/methods/select_tenant.py @@ -6,19 +6,18 @@ from sqlalchemy import select from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.users.models.UserTenant import UserTenant -from cognee.modules.users.methods import get_user from cognee.modules.users.models.User import User from cognee.modules.users.permissions.methods import get_tenant from cognee.modules.users.exceptions import UserNotFoundError, TenantNotFoundError -async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]) -> User: +async def select_tenant(user: User, tenant_id: Union[UUID, None]) -> User: """ Set the users active tenant to provided tenant. If None tenant_id is provided set current Tenant to the default single user-tenant Args: - user_id: Id of the user. + user: User object. tenant_id: Id of the tenant. Returns: @@ -27,8 +26,6 @@ async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]) -> User: """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: - user = await get_user(user_id) - if tenant_id is None: # If no tenant_id is provided set current Tenant to the single user-tenant user.tenant_id = None @@ -46,7 +43,7 @@ async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]) -> User: # Check if User is part of Tenant result = await session.execute( select(UserTenant) - .where(UserTenant.user_id == user_id) + .where(UserTenant.user_id == user.id) .where(UserTenant.tenant_id == tenant_id) ) diff --git a/examples/python/permissions_example.py b/examples/python/permissions_example.py index fdbde00f0..4bbd30bea 100644 --- a/examples/python/permissions_example.py +++ b/examples/python/permissions_example.py @@ -145,7 +145,7 @@ async def main(): tenant_id = await create_tenant("CogneeLab", user_2.id) print("User 2 is selecting CogneeLab tenant/organization as active tenant") - await select_tenant(user_id=user_2.id, tenant_id=tenant_id) + await select_tenant(user=user_2, tenant_id=tenant_id) print("\nUser 2 is creating Researcher role") role_id = await create_role(role_name="Researcher", owner_id=user_2.id) @@ -163,7 +163,7 @@ async def main(): await add_user_to_role(user_id=user_3.id, role_id=role_id, owner_id=user_2.id) print("\nOperation as user_3 to select CogneeLab tenant/organization as active tenant") - await select_tenant(user_id=user_3.id, tenant_id=tenant_id) + await select_tenant(user=user_3, tenant_id=tenant_id) print( "\nOperation started as user_2, with CogneeLab as its active tenant, to give read permission to Researcher role for the dataset QUANTUM owned by user_2" @@ -183,21 +183,23 @@ async def main(): ) print( - "We will now create a new QUANTUM dataset in the CogneeLab tenant so that permissions can be assigned to the Researcher role inside the tenant/organization" + "We will now create a new QUANTUM dataset with the QUANTUM_COGNEE_LAB name in the CogneeLab tenant so that permissions can be assigned to the Researcher role inside the tenant/organization" ) - # Re-create the QUANTUM dataset in the CogneeLab tenant. The old QUANTUM dataset is still owned by user_2 personally + # We can re-create the QUANTUM dataset in the CogneeLab tenant. The old QUANTUM dataset is still owned by user_2 personally # and can still be accessed by selecting the personal tenant for user 2. - await cognee.add([text], dataset_name="QUANTUM", user=user_2) - quantum_cognify_result = await cognee.cognify(["QUANTUM"], user=user_2) + await cognee.add([text], dataset_name="QUANTUM_COGNEE_LAB", user=user_2) + quantum_cognee_lab_cognify_result = await cognee.cognify(["QUANTUM_COGNEE_LAB"], user=user_2) # The recreated Quantum dataset will now have a different dataset_id as it's a new dataset in a different organization - quantum_dataset_id_cognee_lab_tenant = extract_dataset_id_from_cognify(quantum_cognify_result) + quantum_cognee_lab_dataset_id = extract_dataset_id_from_cognify( + quantum_cognee_lab_cognify_result + ) print( "\nOperation started as user_2, with CogneeLab as its active tenant, to give read permission to Researcher role for the dataset QUANTUM owned by the CogneeLab tenant" ) await authorized_give_permission_on_datasets( role_id, - [quantum_dataset_id_cognee_lab_tenant], + [quantum_cognee_lab_dataset_id], "read", user_2.id, ) @@ -207,8 +209,8 @@ async def main(): search_results = await cognee.search( query_type=SearchType.GRAPH_COMPLETION, query_text="What is in the document?", - user=user_1, - dataset_ids=[quantum_dataset_id], + user=user_3, + dataset_ids=[quantum_cognee_lab_dataset_id], ) for result in search_results: print(f"{result}\n") From c2aaec2a827fbdd8f91747989753b4f62a41fa38 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 4 Nov 2025 23:34:51 +0100 Subject: [PATCH 071/284] refactor: Resolve issue with permissions example --- .../api/v1/permissions/routers/get_permissions_router.py | 2 +- cognee/modules/users/tenants/methods/select_tenant.py | 6 ++++-- examples/python/permissions_example.py | 8 ++++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index db2c72705..20d35e748 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -259,7 +259,7 @@ def get_permissions_router() -> APIRouter: from cognee.modules.users.tenants.methods import select_tenant as select_tenant_method - await select_tenant_method(user=user, tenant_id=payload.tenant_id) + await select_tenant_method(user_id=user.id, tenant_id=payload.tenant_id) return JSONResponse( status_code=200, diff --git a/cognee/modules/users/tenants/methods/select_tenant.py b/cognee/modules/users/tenants/methods/select_tenant.py index cb291d5f2..83c11dc91 100644 --- a/cognee/modules/users/tenants/methods/select_tenant.py +++ b/cognee/modules/users/tenants/methods/select_tenant.py @@ -5,19 +5,20 @@ import sqlalchemy.exc from sqlalchemy import select from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.users.methods.get_user import get_user from cognee.modules.users.models.UserTenant import UserTenant from cognee.modules.users.models.User import User from cognee.modules.users.permissions.methods import get_tenant from cognee.modules.users.exceptions import UserNotFoundError, TenantNotFoundError -async def select_tenant(user: User, tenant_id: Union[UUID, None]) -> User: +async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]) -> User: """ Set the users active tenant to provided tenant. If None tenant_id is provided set current Tenant to the default single user-tenant Args: - user: User object. + user_id: UUID of the user. tenant_id: Id of the tenant. Returns: @@ -26,6 +27,7 @@ async def select_tenant(user: User, tenant_id: Union[UUID, None]) -> User: """ db_engine = get_relational_engine() async with db_engine.get_async_session() as session: + user = await get_user(user_id) if tenant_id is None: # If no tenant_id is provided set current Tenant to the single user-tenant user.tenant_id = None diff --git a/examples/python/permissions_example.py b/examples/python/permissions_example.py index 4bbd30bea..c0b104023 100644 --- a/examples/python/permissions_example.py +++ b/examples/python/permissions_example.py @@ -145,7 +145,7 @@ async def main(): tenant_id = await create_tenant("CogneeLab", user_2.id) print("User 2 is selecting CogneeLab tenant/organization as active tenant") - await select_tenant(user=user_2, tenant_id=tenant_id) + await select_tenant(user_id=user_2.id, tenant_id=tenant_id) print("\nUser 2 is creating Researcher role") role_id = await create_role(role_name="Researcher", owner_id=user_2.id) @@ -163,7 +163,7 @@ async def main(): await add_user_to_role(user_id=user_3.id, role_id=role_id, owner_id=user_2.id) print("\nOperation as user_3 to select CogneeLab tenant/organization as active tenant") - await select_tenant(user=user_3, tenant_id=tenant_id) + await select_tenant(user_id=user_3.id, tenant_id=tenant_id) print( "\nOperation started as user_2, with CogneeLab as its active tenant, to give read permission to Researcher role for the dataset QUANTUM owned by user_2" @@ -187,6 +187,10 @@ async def main(): ) # We can re-create the QUANTUM dataset in the CogneeLab tenant. The old QUANTUM dataset is still owned by user_2 personally # and can still be accessed by selecting the personal tenant for user 2. + from cognee.modules.users.methods import get_user + + # Note: We need to update user_2 from the database to refresh its tenant context changes + user_2 = await get_user(user_2.id) await cognee.add([text], dataset_name="QUANTUM_COGNEE_LAB", user=user_2) quantum_cognee_lab_cognify_result = await cognee.cognify(["QUANTUM_COGNEE_LAB"], user=user_2) From eaf8d718b0fc52ebc2aefaee9e8fa524abb96125 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 5 Nov 2025 10:27:54 +0100 Subject: [PATCH 072/284] feat: introduces memify pipeline to save cache sessions into cognee (#1731) ## Description This PR introduces a new memify pipeline to save cache sessions in cognee. The QA sessions are added to the main knowledge base as separate documents. ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [x] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) None ## Pre-submission Checklist - [x] **I have tested my changes thoroughly before submitting this PR** - [x] **This PR contains minimal changes necessary to address the issue/feature** - [x] My code follows the project's coding standards and style guidelines - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have added necessary documentation (if applicable) - [x] All new and existing tests pass - [x] I have searched existing PRs to ensure this change hasn't been submitted already - [x] I have linked any relevant issues in the description - [x] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- .../persist_sessions_in_knowledge_graph.py | 55 ++++++ cognee/tasks/memify/__init__.py | 2 + cognee/tasks/memify/cognify_session.py | 40 ++++ cognee/tasks/memify/extract_user_sessions.py | 73 ++++++++ cognee/tests/test_conversation_history.py | 43 ++++- .../memify_tasks/test_cognify_session.py | 107 +++++++++++ .../test_extract_user_sessions.py | 175 ++++++++++++++++++ ...onversation_session_persistence_example.py | 98 ++++++++++ 8 files changed, 592 insertions(+), 1 deletion(-) create mode 100644 cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py create mode 100644 cognee/tasks/memify/cognify_session.py create mode 100644 cognee/tasks/memify/extract_user_sessions.py create mode 100644 cognee/tests/unit/modules/memify_tasks/test_cognify_session.py create mode 100644 cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py create mode 100644 examples/python/conversation_session_persistence_example.py diff --git a/cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py b/cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py new file mode 100644 index 000000000..c0ba0a4d9 --- /dev/null +++ b/cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py @@ -0,0 +1,55 @@ +from typing import Optional, List + +from cognee import memify +from cognee.context_global_variables import ( + set_database_global_context_variables, + set_session_user_context_variable, +) +from cognee.exceptions import CogneeValidationError +from cognee.modules.data.methods import get_authorized_existing_datasets +from cognee.shared.logging_utils import get_logger +from cognee.modules.pipelines.tasks.task import Task +from cognee.modules.users.models import User +from cognee.tasks.memify import extract_user_sessions, cognify_session + + +logger = get_logger("persist_sessions_in_knowledge_graph") + + +async def persist_sessions_in_knowledge_graph_pipeline( + user: User, + session_ids: Optional[List[str]] = None, + dataset: str = "main_dataset", + run_in_background: bool = False, +): + await set_session_user_context_variable(user) + dataset_to_write = await get_authorized_existing_datasets( + user=user, datasets=[dataset], permission_type="write" + ) + + if not dataset_to_write: + raise CogneeValidationError( + message=f"User (id: {str(user.id)}) does not have write access to dataset: {dataset}", + log=False, + ) + + await set_database_global_context_variables( + dataset_to_write[0].id, dataset_to_write[0].owner_id + ) + + extraction_tasks = [Task(extract_user_sessions, session_ids=session_ids)] + + enrichment_tasks = [ + Task(cognify_session), + ] + + result = await memify( + extraction_tasks=extraction_tasks, + enrichment_tasks=enrichment_tasks, + dataset=dataset_to_write[0].id, + data=[{}], + run_in_background=run_in_background, + ) + + logger.info("Session persistence pipeline completed") + return result diff --git a/cognee/tasks/memify/__init__.py b/cognee/tasks/memify/__init__.py index 692bac443..7e590ed47 100644 --- a/cognee/tasks/memify/__init__.py +++ b/cognee/tasks/memify/__init__.py @@ -1,2 +1,4 @@ from .extract_subgraph import extract_subgraph from .extract_subgraph_chunks import extract_subgraph_chunks +from .cognify_session import cognify_session +from .extract_user_sessions import extract_user_sessions diff --git a/cognee/tasks/memify/cognify_session.py b/cognee/tasks/memify/cognify_session.py new file mode 100644 index 000000000..7c276169a --- /dev/null +++ b/cognee/tasks/memify/cognify_session.py @@ -0,0 +1,40 @@ +import cognee + +from cognee.exceptions import CogneeValidationError, CogneeSystemError +from cognee.shared.logging_utils import get_logger + +logger = get_logger("cognify_session") + + +async def cognify_session(data): + """ + Process and cognify session data into the knowledge graph. + + Adds session content to cognee with a dedicated "user_sessions" node set, + then triggers the cognify pipeline to extract entities and relationships + from the session data. + + Args: + data: Session string containing Question, Context, and Answer information. + + Raises: + CogneeValidationError: If data is None or empty. + CogneeSystemError: If cognee operations fail. + """ + try: + if not data or (isinstance(data, str) and not data.strip()): + logger.warning("Empty session data provided to cognify_session task, skipping") + raise CogneeValidationError(message="Session data cannot be empty", log=False) + + logger.info("Processing session data for cognification") + + await cognee.add(data, node_set=["user_sessions_from_cache"]) + logger.debug("Session data added to cognee with node_set: user_sessions") + await cognee.cognify() + logger.info("Session data successfully cognified") + + except CogneeValidationError: + raise + except Exception as e: + logger.error(f"Error cognifying session data: {str(e)}") + raise CogneeSystemError(message=f"Failed to cognify session data: {str(e)}", log=False) diff --git a/cognee/tasks/memify/extract_user_sessions.py b/cognee/tasks/memify/extract_user_sessions.py new file mode 100644 index 000000000..9779a363e --- /dev/null +++ b/cognee/tasks/memify/extract_user_sessions.py @@ -0,0 +1,73 @@ +from typing import Optional, List + +from cognee.context_global_variables import session_user +from cognee.exceptions import CogneeSystemError +from cognee.infrastructure.databases.cache.get_cache_engine import get_cache_engine +from cognee.shared.logging_utils import get_logger +from cognee.modules.users.models import User + +logger = get_logger("extract_user_sessions") + + +async def extract_user_sessions( + data, + session_ids: Optional[List[str]] = None, +): + """ + Extract Q&A sessions for the current user from cache. + + Retrieves all Q&A triplets from specified session IDs and yields them + as formatted strings combining question, context, and answer. + + Args: + data: Data passed from memify. If empty dict ({}), no external data is provided. + session_ids: Optional list of specific session IDs to extract. + + Yields: + String containing session ID and all Q&A pairs formatted. + + Raises: + CogneeSystemError: If cache engine is unavailable or extraction fails. + """ + try: + if not data or data == [{}]: + logger.info("Fetching session metadata for current user") + + user: User = session_user.get() + if not user: + raise CogneeSystemError(message="No authenticated user found in context", log=False) + + user_id = str(user.id) + + cache_engine = get_cache_engine() + if cache_engine is None: + raise CogneeSystemError( + message="Cache engine not available for session extraction, please enable caching in order to have sessions to save", + log=False, + ) + + if session_ids: + for session_id in session_ids: + try: + qa_data = await cache_engine.get_all_qas(user_id, session_id) + if qa_data: + logger.info(f"Extracted session {session_id} with {len(qa_data)} Q&A pairs") + session_string = f"Session ID: {session_id}\n\n" + for qa_pair in qa_data: + question = qa_pair.get("question", "") + answer = qa_pair.get("answer", "") + session_string += f"Question: {question}\n\nAnswer: {answer}\n\n" + yield session_string + except Exception as e: + logger.warning(f"Failed to extract session {session_id}: {str(e)}") + continue + else: + logger.info( + "No specific session_ids provided. Please specify which sessions to extract." + ) + + except CogneeSystemError: + raise + except Exception as e: + logger.error(f"Error extracting user sessions: {str(e)}") + raise CogneeSystemError(message=f"Failed to extract user sessions: {str(e)}", log=False) diff --git a/cognee/tests/test_conversation_history.py b/cognee/tests/test_conversation_history.py index 30bb54ef1..6b5b737f1 100644 --- a/cognee/tests/test_conversation_history.py +++ b/cognee/tests/test_conversation_history.py @@ -16,9 +16,11 @@ import cognee import pathlib from cognee.infrastructure.databases.cache import get_cache_engine +from cognee.infrastructure.databases.graph import get_graph_engine from cognee.modules.search.types import SearchType from cognee.shared.logging_utils import get_logger from cognee.modules.users.methods import get_default_user +from collections import Counter logger = get_logger() @@ -188,7 +190,6 @@ async def main(): f"GRAPH_SUMMARY_COMPLETION should return non-empty list, got: {result_summary!r}" ) - # Verify saved history_summary = await cache_engine.get_latest_qa(str(user.id), session_id_summary, last_n=10) our_qa_summary = [ h for h in history_summary if h["question"] == "What are the key points about TechCorp?" @@ -228,6 +229,46 @@ async def main(): assert "CONTEXT:" in formatted_history, "Formatted history should contain 'CONTEXT:' prefix" assert "ANSWER:" in formatted_history, "Formatted history should contain 'ANSWER:' prefix" + from cognee.memify_pipelines.persist_sessions_in_knowledge_graph import ( + persist_sessions_in_knowledge_graph_pipeline, + ) + + logger.info("Starting persist_sessions_in_knowledge_graph tests") + + await persist_sessions_in_knowledge_graph_pipeline( + user=user, + session_ids=[session_id_1, session_id_2], + dataset=dataset_name, + run_in_background=False, + ) + + graph_engine = await get_graph_engine() + graph = await graph_engine.get_graph_data() + + type_counts = Counter(node_data[1].get("type", {}) for node_data in graph[0]) + + "Tests the correct number of NodeSet nodes after session persistence" + assert type_counts.get("NodeSet", 0) == 1, ( + f"Number of NodeSets in the graph is incorrect, found {type_counts.get('NodeSet', 0)} but there should be exactly 1." + ) + + "Tests the correct number of DocumentChunk nodes after session persistence" + assert type_counts.get("DocumentChunk", 0) == 4, ( + f"Number of DocumentChunk ndoes in the graph is incorrect, found {type_counts.get('DocumentChunk', 0)} but there should be exactly 4 (2 original documents, 2 sessions)." + ) + + from cognee.infrastructure.databases.vector.get_vector_engine import get_vector_engine + + vector_engine = get_vector_engine() + collection_size = await vector_engine.search( + collection_name="DocumentChunk_text", + query_text="test", + limit=1000, + ) + assert len(collection_size) == 4, ( + f"DocumentChunk_text collection should have exactly 4 embeddings, found {len(collection_size)}" + ) + await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) diff --git a/cognee/tests/unit/modules/memify_tasks/test_cognify_session.py b/cognee/tests/unit/modules/memify_tasks/test_cognify_session.py new file mode 100644 index 000000000..c23640fbd --- /dev/null +++ b/cognee/tests/unit/modules/memify_tasks/test_cognify_session.py @@ -0,0 +1,107 @@ +import pytest +from unittest.mock import AsyncMock, patch + +from cognee.tasks.memify.cognify_session import cognify_session +from cognee.exceptions import CogneeValidationError, CogneeSystemError + + +@pytest.mark.asyncio +async def test_cognify_session_success(): + """Test successful cognification of session data.""" + session_data = ( + "Session ID: test_session\n\nQuestion: What is AI?\n\nAnswer: AI is artificial intelligence" + ) + + with ( + patch("cognee.add", new_callable=AsyncMock) as mock_add, + patch("cognee.cognify", new_callable=AsyncMock) as mock_cognify, + ): + await cognify_session(session_data) + + mock_add.assert_called_once_with(session_data, node_set=["user_sessions_from_cache"]) + mock_cognify.assert_called_once() + + +@pytest.mark.asyncio +async def test_cognify_session_empty_string(): + """Test cognification fails with empty string.""" + with pytest.raises(CogneeValidationError) as exc_info: + await cognify_session("") + + assert "Session data cannot be empty" in str(exc_info.value) + + +@pytest.mark.asyncio +async def test_cognify_session_whitespace_string(): + """Test cognification fails with whitespace-only string.""" + with pytest.raises(CogneeValidationError) as exc_info: + await cognify_session(" \n\t ") + + assert "Session data cannot be empty" in str(exc_info.value) + + +@pytest.mark.asyncio +async def test_cognify_session_none_data(): + """Test cognification fails with None data.""" + with pytest.raises(CogneeValidationError) as exc_info: + await cognify_session(None) + + assert "Session data cannot be empty" in str(exc_info.value) + + +@pytest.mark.asyncio +async def test_cognify_session_add_failure(): + """Test cognification handles cognee.add failure.""" + session_data = "Session ID: test\n\nQuestion: test?" + + with ( + patch("cognee.add", new_callable=AsyncMock) as mock_add, + patch("cognee.cognify", new_callable=AsyncMock), + ): + mock_add.side_effect = Exception("Add operation failed") + + with pytest.raises(CogneeSystemError) as exc_info: + await cognify_session(session_data) + + assert "Failed to cognify session data" in str(exc_info.value) + assert "Add operation failed" in str(exc_info.value) + + +@pytest.mark.asyncio +async def test_cognify_session_cognify_failure(): + """Test cognification handles cognify failure.""" + session_data = "Session ID: test\n\nQuestion: test?" + + with ( + patch("cognee.add", new_callable=AsyncMock), + patch("cognee.cognify", new_callable=AsyncMock) as mock_cognify, + ): + mock_cognify.side_effect = Exception("Cognify operation failed") + + with pytest.raises(CogneeSystemError) as exc_info: + await cognify_session(session_data) + + assert "Failed to cognify session data" in str(exc_info.value) + assert "Cognify operation failed" in str(exc_info.value) + + +@pytest.mark.asyncio +async def test_cognify_session_re_raises_validation_error(): + """Test that CogneeValidationError is re-raised as-is.""" + with pytest.raises(CogneeValidationError): + await cognify_session("") + + +@pytest.mark.asyncio +async def test_cognify_session_with_special_characters(): + """Test cognification with special characters.""" + session_data = "Session: test™ © Question: What's special? Answer: Cognee is special!" + + with ( + patch("cognee.add", new_callable=AsyncMock) as mock_add, + patch("cognee.cognify", new_callable=AsyncMock) as mock_cognify, + ): + await cognify_session(session_data) + + mock_add.assert_called_once_with(session_data, node_set=["user_sessions_from_cache"]) + mock_cognify.assert_called_once() diff --git a/cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py b/cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py new file mode 100644 index 000000000..8cb27fef3 --- /dev/null +++ b/cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py @@ -0,0 +1,175 @@ +import sys +import pytest +from unittest.mock import AsyncMock, MagicMock, patch + +from cognee.tasks.memify.extract_user_sessions import extract_user_sessions +from cognee.exceptions import CogneeSystemError +from cognee.modules.users.models import User + +# Get the actual module object (not the function) for patching +extract_user_sessions_module = sys.modules["cognee.tasks.memify.extract_user_sessions"] + + +@pytest.fixture +def mock_user(): + """Create a mock user.""" + user = MagicMock(spec=User) + user.id = "test-user-123" + return user + + +@pytest.fixture +def mock_qa_data(): + """Create mock Q&A data.""" + return [ + { + "question": "What is cognee?", + "context": "context about cognee", + "answer": "Cognee is a knowledge graph solution", + "time": "2025-01-01T12:00:00", + }, + { + "question": "How does it work?", + "context": "how it works context", + "answer": "It processes data and creates graphs", + "time": "2025-01-01T12:05:00", + }, + ] + + +@pytest.mark.asyncio +async def test_extract_user_sessions_success(mock_user, mock_qa_data): + """Test successful extraction of sessions.""" + mock_cache_engine = AsyncMock() + mock_cache_engine.get_all_qas.return_value = mock_qa_data + + with ( + patch.object(extract_user_sessions_module, "session_user") as mock_session_user, + patch.object( + extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine + ), + ): + mock_session_user.get.return_value = mock_user + + sessions = [] + async for session in extract_user_sessions([{}], session_ids=["test_session"]): + sessions.append(session) + + assert len(sessions) == 1 + assert "Session ID: test_session" in sessions[0] + assert "Question: What is cognee?" in sessions[0] + assert "Answer: Cognee is a knowledge graph solution" in sessions[0] + assert "Question: How does it work?" in sessions[0] + assert "Answer: It processes data and creates graphs" in sessions[0] + + +@pytest.mark.asyncio +async def test_extract_user_sessions_multiple_sessions(mock_user, mock_qa_data): + """Test extraction of multiple sessions.""" + mock_cache_engine = AsyncMock() + mock_cache_engine.get_all_qas.return_value = mock_qa_data + + with ( + patch.object(extract_user_sessions_module, "session_user") as mock_session_user, + patch.object( + extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine + ), + ): + mock_session_user.get.return_value = mock_user + + sessions = [] + async for session in extract_user_sessions([{}], session_ids=["session1", "session2"]): + sessions.append(session) + + assert len(sessions) == 2 + assert mock_cache_engine.get_all_qas.call_count == 2 + + +@pytest.mark.asyncio +async def test_extract_user_sessions_no_data(mock_user, mock_qa_data): + """Test extraction handles empty data parameter.""" + mock_cache_engine = AsyncMock() + mock_cache_engine.get_all_qas.return_value = mock_qa_data + + with ( + patch.object(extract_user_sessions_module, "session_user") as mock_session_user, + patch.object( + extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine + ), + ): + mock_session_user.get.return_value = mock_user + + sessions = [] + async for session in extract_user_sessions(None, session_ids=["test_session"]): + sessions.append(session) + + assert len(sessions) == 1 + + +@pytest.mark.asyncio +async def test_extract_user_sessions_no_session_ids(mock_user): + """Test extraction handles no session IDs provided.""" + mock_cache_engine = AsyncMock() + + with ( + patch.object(extract_user_sessions_module, "session_user") as mock_session_user, + patch.object( + extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine + ), + ): + mock_session_user.get.return_value = mock_user + + sessions = [] + async for session in extract_user_sessions([{}], session_ids=None): + sessions.append(session) + + assert len(sessions) == 0 + mock_cache_engine.get_all_qas.assert_not_called() + + +@pytest.mark.asyncio +async def test_extract_user_sessions_empty_qa_data(mock_user): + """Test extraction handles empty Q&A data.""" + mock_cache_engine = AsyncMock() + mock_cache_engine.get_all_qas.return_value = [] + + with ( + patch.object(extract_user_sessions_module, "session_user") as mock_session_user, + patch.object( + extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine + ), + ): + mock_session_user.get.return_value = mock_user + + sessions = [] + async for session in extract_user_sessions([{}], session_ids=["empty_session"]): + sessions.append(session) + + assert len(sessions) == 0 + + +@pytest.mark.asyncio +async def test_extract_user_sessions_cache_error_handling(mock_user, mock_qa_data): + """Test extraction continues on cache error for specific session.""" + mock_cache_engine = AsyncMock() + mock_cache_engine.get_all_qas.side_effect = [ + mock_qa_data, + Exception("Cache error"), + mock_qa_data, + ] + + with ( + patch.object(extract_user_sessions_module, "session_user") as mock_session_user, + patch.object( + extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine + ), + ): + mock_session_user.get.return_value = mock_user + + sessions = [] + async for session in extract_user_sessions( + [{}], session_ids=["session1", "session2", "session3"] + ): + sessions.append(session) + + assert len(sessions) == 2 diff --git a/examples/python/conversation_session_persistence_example.py b/examples/python/conversation_session_persistence_example.py new file mode 100644 index 000000000..5346f5012 --- /dev/null +++ b/examples/python/conversation_session_persistence_example.py @@ -0,0 +1,98 @@ +import asyncio + +import cognee +from cognee import visualize_graph +from cognee.memify_pipelines.persist_sessions_in_knowledge_graph import ( + persist_sessions_in_knowledge_graph_pipeline, +) +from cognee.modules.search.types import SearchType +from cognee.modules.users.methods import get_default_user +from cognee.shared.logging_utils import get_logger + +logger = get_logger("conversation_session_persistence_example") + + +async def main(): + # NOTE: CACHING has to be enabled for this example to work + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + text_1 = "Cognee is a solution that can build knowledge graph from text, creating an AI memory system" + text_2 = "Germany is a country located next to the Netherlands" + + await cognee.add([text_1, text_2]) + await cognee.cognify() + + question = "What can I use to create a knowledge graph?" + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text=question, + ) + print("\nSession ID: default_session") + print(f"Question: {question}") + print(f"Answer: {search_results}\n") + + question = "You sure about that?" + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text=question + ) + print("\nSession ID: default_session") + print(f"Question: {question}") + print(f"Answer: {search_results}\n") + + question = "This is awesome!" + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, query_text=question + ) + print("\nSession ID: default_session") + print(f"Question: {question}") + print(f"Answer: {search_results}\n") + + question = "Where is Germany?" + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text=question, + session_id="different_session", + ) + print("\nSession ID: different_session") + print(f"Question: {question}") + print(f"Answer: {search_results}\n") + + question = "Next to which country again?" + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text=question, + session_id="different_session", + ) + print("\nSession ID: different_session") + print(f"Question: {question}") + print(f"Answer: {search_results}\n") + + question = "So you remember everything I asked from you?" + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text=question, + session_id="different_session", + ) + print("\nSession ID: different_session") + print(f"Question: {question}") + print(f"Answer: {search_results}\n") + + session_ids_to_persist = ["default_session", "different_session"] + default_user = await get_default_user() + + await persist_sessions_in_knowledge_graph_pipeline( + user=default_user, + session_ids=session_ids_to_persist, + ) + + await visualize_graph() + + +if __name__ == "__main__": + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(main()) + finally: + loop.run_until_complete(loop.shutdown_asyncgens()) From 1643b13c95ba83b08abb0d1afeec80767049db26 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 12:43:01 +0100 Subject: [PATCH 073/284] chore: add table creation for multi-tenancy to migration --- .../c946955da633_multi_tenant_support.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index 09781c85c..fc45644d0 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -75,6 +75,28 @@ def upgrade() -> None: dataset = _define_dataset_table() user = _define_user_table() + if "user_tenants" not in insp.get_table_names(): + tenant_id_from_user = sa.select(user.c.tenant_id).scalar_subquery() + # Define table with all necessary columns including primary key + user_tenants = op.create_table( + "user_tenants", + sa.Column("user_id", sa.UUID, sa.ForeignKey("users.id"), primary_key=True), + sa.Column("tenant_id", sa.UUID, sa.ForeignKey("tenants.id"), primary_key=True), + sa.Column("created_at", sa.DateTime(), default=lambda: datetime.now(timezone.utc)), + ) + if op.get_context().dialect.name == "sqlite": + # If column doesn't exist create new original_extension column and update from values of extension column + with op.batch_alter_table("user_tenants") as batch_op: + batch_op.execute( + user_tenants.update().values( + tenant_id=tenant_id_from_user, + user_id=user.c.id, + ) + ) + else: + conn = op.get_bind() + conn.execute(dataset.update().values(tenant_id=tenant_id_from_user, user_id=user.c.id)) + tenant_id_column = _get_column(insp, "datasets", "tenant_id") if not tenant_id_column: op.add_column("datasets", sa.Column("tenant_id", sa.UUID(), nullable=True)) From c481b87d58415c89a94db63aab90499a64278605 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 5 Nov 2025 12:56:17 +0100 Subject: [PATCH 074/284] refactor: Remove codify and code_graph pipeline from main repo --- .github/workflows/basic_tests.yml | 29 ----- cognee-mcp/src/server.py | 111 ---------------- cognee/api/client.py | 6 +- cognee/api/v1/cognify/code_graph_pipeline.py | 119 ------------------ cognee/api/v1/cognify/routers/__init__.py | 1 - .../routers/get_code_pipeline_router.py | 90 ------------- cognee/modules/pipelines/__init__.py | 1 + .../pipelines/custom_pipeline_interface.py | 12 ++ examples/python/code_graph_example.py | 58 --------- 9 files changed, 14 insertions(+), 413 deletions(-) delete mode 100644 cognee/api/v1/cognify/code_graph_pipeline.py delete mode 100644 cognee/api/v1/cognify/routers/get_code_pipeline_router.py create mode 100644 cognee/modules/pipelines/custom_pipeline_interface.py delete mode 100644 examples/python/code_graph_example.py diff --git a/.github/workflows/basic_tests.yml b/.github/workflows/basic_tests.yml index b7f324310..a93e8dffe 100644 --- a/.github/workflows/basic_tests.yml +++ b/.github/workflows/basic_tests.yml @@ -193,32 +193,3 @@ jobs: - name: Run Simple Examples run: uv run python ./examples/python/simple_example.py - - graph-tests: - name: Run Basic Graph Tests - runs-on: ubuntu-22.04 - env: - LLM_PROVIDER: openai - LLM_MODEL: ${{ secrets.LLM_MODEL }} - LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} - LLM_API_KEY: ${{ secrets.LLM_API_KEY }} - LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} - - EMBEDDING_PROVIDER: openai - EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} - EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} - EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} - EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - steps: - - name: Check out repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Cognee Setup - uses: ./.github/actions/cognee_setup - with: - python-version: ${{ inputs.python-version }} - - - name: Run Graph Tests - run: uv run python ./examples/python/code_graph_example.py --repo_path ./cognee/tasks/graph diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index ce6dad88a..3a64ba65a 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -407,75 +407,6 @@ async def save_interaction(data: str) -> list: ] -@mcp.tool() -async def codify(repo_path: str) -> list: - """ - Analyze and generate a code-specific knowledge graph from a software repository. - - This function launches a background task that processes the provided repository - and builds a code knowledge graph. The function returns immediately while - the processing continues in the background due to MCP timeout constraints. - - Parameters - ---------- - repo_path : str - Path to the code repository to analyze. This can be a local file path or a - relative path to a repository. The path should point to the root of the - repository or a specific directory within it. - - Returns - ------- - list - A list containing a single TextContent object with information about the - background task launch and how to check its status. - - Notes - ----- - - The function launches a background task and returns immediately - - The code graph generation may take significant time for larger repositories - - Use the codify_status tool to check the progress of the operation - - Process results are logged to the standard Cognee log file - - All stdout is redirected to stderr to maintain MCP communication integrity - """ - - if cognee_client.use_api: - error_msg = "❌ Codify operation is not available in API mode. Please use direct mode for code graph pipeline." - logger.error(error_msg) - return [types.TextContent(type="text", text=error_msg)] - - async def codify_task(repo_path: str): - # NOTE: MCP uses stdout to communicate, we must redirect all output - # going to stdout ( like the print function ) to stderr. - with redirect_stdout(sys.stderr): - logger.info("Codify process starting.") - from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline - - results = [] - async for result in run_code_graph_pipeline(repo_path, False): - results.append(result) - logger.info(result) - if all(results): - logger.info("Codify process finished succesfully.") - else: - logger.info("Codify process failed.") - - asyncio.create_task(codify_task(repo_path)) - - log_file = get_log_file_location() - text = ( - f"Background process launched due to MCP timeout limitations.\n" - f"To check current codify status use the codify_status tool\n" - f"or you can check the log file at: {log_file}" - ) - - return [ - types.TextContent( - type="text", - text=text, - ) - ] - - @mcp.tool() async def search(search_query: str, search_type: str) -> list: """ @@ -954,48 +885,6 @@ async def cognify_status(): return [types.TextContent(type="text", text=error_msg)] -@mcp.tool() -async def codify_status(): - """ - Get the current status of the codify pipeline. - - This function retrieves information about current and recently completed codify operations - in the codebase dataset. It provides details on progress, success/failure status, and statistics - about the processed code repositories. - - Returns - ------- - list - A list containing a single TextContent object with the status information as a string. - The status includes information about active and completed jobs for the cognify_code_pipeline. - - Notes - ----- - - The function retrieves pipeline status specifically for the "cognify_code_pipeline" on the "codebase" dataset - - Status information includes job progress, execution time, and completion status - - The status is returned in string format for easy reading - - This operation is not available in API mode - """ - with redirect_stdout(sys.stderr): - try: - from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id - from cognee.modules.users.methods import get_default_user - - user = await get_default_user() - status = await cognee_client.get_pipeline_status( - [await get_unique_dataset_id("codebase", user)], "cognify_code_pipeline" - ) - return [types.TextContent(type="text", text=str(status))] - except NotImplementedError: - error_msg = "❌ Pipeline status is not available in API mode" - logger.error(error_msg) - return [types.TextContent(type="text", text=error_msg)] - except Exception as e: - error_msg = f"❌ Failed to get codify status: {str(e)}" - logger.error(error_msg) - return [types.TextContent(type="text", text=error_msg)] - - def node_to_string(node): node_data = ", ".join( [f'{key}: "{value}"' for key, value in node.items() if key in ["id", "name"]] diff --git a/cognee/api/client.py b/cognee/api/client.py index 6766c12de..ab64f3489 100644 --- a/cognee/api/client.py +++ b/cognee/api/client.py @@ -21,7 +21,7 @@ from cognee.api.v1.notebooks.routers import get_notebooks_router from cognee.api.v1.permissions.routers import get_permissions_router from cognee.api.v1.settings.routers import get_settings_router from cognee.api.v1.datasets.routers import get_datasets_router -from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router +from cognee.api.v1.cognify.routers import get_cognify_router from cognee.api.v1.search.routers import get_search_router from cognee.api.v1.memify.routers import get_memify_router from cognee.api.v1.add.routers import get_add_router @@ -270,10 +270,6 @@ app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["re app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"]) -codegraph_routes = get_code_pipeline_router() -if codegraph_routes: - app.include_router(codegraph_routes, prefix="/api/v1/code-pipeline", tags=["code-pipeline"]) - app.include_router( get_users_router(), prefix="/api/v1/users", diff --git a/cognee/api/v1/cognify/code_graph_pipeline.py b/cognee/api/v1/cognify/code_graph_pipeline.py deleted file mode 100644 index fb3612857..000000000 --- a/cognee/api/v1/cognify/code_graph_pipeline.py +++ /dev/null @@ -1,119 +0,0 @@ -import os -import pathlib -import asyncio -from typing import Optional -from cognee.shared.logging_utils import get_logger, setup_logging -from cognee.modules.observability.get_observe import get_observe - -from cognee.api.v1.search import SearchType, search -from cognee.api.v1.visualize.visualize import visualize_graph -from cognee.modules.cognify.config import get_cognify_config -from cognee.modules.pipelines import run_tasks -from cognee.modules.pipelines.tasks.task import Task -from cognee.modules.users.methods import get_default_user -from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.data.methods import create_dataset -from cognee.tasks.documents import classify_documents, extract_chunks_from_documents -from cognee.tasks.graph import extract_graph_from_data -from cognee.tasks.ingestion import ingest_data -from cognee.tasks.repo_processor import get_non_py_files, get_repo_file_dependencies - -from cognee.tasks.storage import add_data_points -from cognee.tasks.summarization import summarize_text -from cognee.infrastructure.llm import get_max_chunk_tokens -from cognee.infrastructure.databases.relational import get_relational_engine - -observe = get_observe() - -logger = get_logger("code_graph_pipeline") - - -@observe -async def run_code_graph_pipeline( - repo_path, - include_docs=False, - excluded_paths: Optional[list[str]] = None, - supported_languages: Optional[list[str]] = None, -): - import cognee - from cognee.low_level import setup - - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - await setup() - - cognee_config = get_cognify_config() - user = await get_default_user() - detailed_extraction = True - - tasks = [ - Task( - get_repo_file_dependencies, - detailed_extraction=detailed_extraction, - supported_languages=supported_languages, - excluded_paths=excluded_paths, - ), - # Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete - Task(add_data_points, task_config={"batch_size": 30}), - ] - - if include_docs: - # This tasks take a long time to complete - non_code_tasks = [ - Task(get_non_py_files, task_config={"batch_size": 50}), - Task(ingest_data, dataset_name="repo_docs", user=user), - Task(classify_documents), - Task(extract_chunks_from_documents, max_chunk_size=get_max_chunk_tokens()), - Task( - extract_graph_from_data, - graph_model=KnowledgeGraph, - task_config={"batch_size": 50}, - ), - Task( - summarize_text, - summarization_model=cognee_config.summarization_model, - task_config={"batch_size": 50}, - ), - ] - - dataset_name = "codebase" - - # Save dataset to database - db_engine = get_relational_engine() - async with db_engine.get_async_session() as session: - dataset = await create_dataset(dataset_name, user, session) - - if include_docs: - non_code_pipeline_run = run_tasks( - non_code_tasks, dataset.id, repo_path, user, "cognify_pipeline" - ) - async for run_status in non_code_pipeline_run: - yield run_status - - async for run_status in run_tasks( - tasks, dataset.id, repo_path, user, "cognify_code_pipeline", incremental_loading=False - ): - yield run_status - - -if __name__ == "__main__": - - async def main(): - async for run_status in run_code_graph_pipeline("REPO_PATH"): - print(f"{run_status.pipeline_run_id}: {run_status.status}") - - file_path = os.path.join( - pathlib.Path(__file__).parent, ".artifacts", "graph_visualization.html" - ) - await visualize_graph(file_path) - - search_results = await search( - query_type=SearchType.CODE, - query_text="How is Relationship weight calculated?", - ) - - for file in search_results: - print(file["name"]) - - logger = setup_logging(name="code_graph_pipeline") - asyncio.run(main()) diff --git a/cognee/api/v1/cognify/routers/__init__.py b/cognee/api/v1/cognify/routers/__init__.py index a6da4a179..6e5f9cc9d 100644 --- a/cognee/api/v1/cognify/routers/__init__.py +++ b/cognee/api/v1/cognify/routers/__init__.py @@ -1,2 +1 @@ from .get_cognify_router import get_cognify_router -from .get_code_pipeline_router import get_code_pipeline_router diff --git a/cognee/api/v1/cognify/routers/get_code_pipeline_router.py b/cognee/api/v1/cognify/routers/get_code_pipeline_router.py deleted file mode 100644 index e016c60f9..000000000 --- a/cognee/api/v1/cognify/routers/get_code_pipeline_router.py +++ /dev/null @@ -1,90 +0,0 @@ -import json -from cognee.shared.logging_utils import get_logger -from fastapi import APIRouter -from fastapi.responses import JSONResponse -from cognee.api.DTO import InDTO -from cognee.modules.retrieval.code_retriever import CodeRetriever -from cognee.modules.storage.utils import JSONEncoder - - -logger = get_logger() - - -class CodePipelineIndexPayloadDTO(InDTO): - repo_path: str - include_docs: bool = False - - -class CodePipelineRetrievePayloadDTO(InDTO): - query: str - full_input: str - - -def get_code_pipeline_router() -> APIRouter: - try: - import cognee.api.v1.cognify.code_graph_pipeline - except ModuleNotFoundError: - logger.error("codegraph dependencies not found. Skipping codegraph API routes.") - return None - - router = APIRouter() - - @router.post("/index", response_model=None) - async def code_pipeline_index(payload: CodePipelineIndexPayloadDTO): - """ - Run indexation on a code repository. - - This endpoint processes a code repository to create a knowledge graph - of the codebase structure, dependencies, and relationships. - - ## Request Parameters - - **repo_path** (str): Path to the code repository - - **include_docs** (bool): Whether to include documentation files (default: false) - - ## Response - No content returned. Processing results are logged. - - ## Error Codes - - **409 Conflict**: Error during indexation process - """ - from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline - - try: - async for result in run_code_graph_pipeline(payload.repo_path, payload.include_docs): - logger.info(result) - except Exception as error: - return JSONResponse(status_code=409, content={"error": str(error)}) - - @router.post("/retrieve", response_model=list[dict]) - async def code_pipeline_retrieve(payload: CodePipelineRetrievePayloadDTO): - """ - Retrieve context from the code knowledge graph. - - This endpoint searches the indexed code repository to find relevant - context based on the provided query. - - ## Request Parameters - - **query** (str): Search query for code context - - **full_input** (str): Full input text for processing - - ## Response - Returns a list of relevant code files and context as JSON. - - ## Error Codes - - **409 Conflict**: Error during retrieval process - """ - try: - query = ( - payload.full_input.replace("cognee ", "") - if payload.full_input.startswith("cognee ") - else payload.full_input - ) - - retriever = CodeRetriever() - retrieved_files = await retriever.get_context(query) - - return json.dumps(retrieved_files, cls=JSONEncoder) - except Exception as error: - return JSONResponse(status_code=409, content={"error": str(error)}) - - return router diff --git a/cognee/modules/pipelines/__init__.py b/cognee/modules/pipelines/__init__.py index 6fca237ca..a0accaeed 100644 --- a/cognee/modules/pipelines/__init__.py +++ b/cognee/modules/pipelines/__init__.py @@ -2,3 +2,4 @@ from .tasks.task import Task from .operations.run_tasks import run_tasks from .operations.run_parallel import run_tasks_parallel from .operations.pipeline import run_pipeline +from .custom_pipeline_interface import CustomPipelineInterface diff --git a/cognee/modules/pipelines/custom_pipeline_interface.py b/cognee/modules/pipelines/custom_pipeline_interface.py new file mode 100644 index 000000000..04c3d113a --- /dev/null +++ b/cognee/modules/pipelines/custom_pipeline_interface.py @@ -0,0 +1,12 @@ +from typing import Protocol, Any +from abc import abstractmethod + + +class CustomPipelineInterface(Protocol): + """ + Defines an interface for creating and running a custom pipeline. + """ + + @abstractmethod + async def run_pipeline(self) -> Any: + raise NotImplementedError diff --git a/examples/python/code_graph_example.py b/examples/python/code_graph_example.py deleted file mode 100644 index 431069050..000000000 --- a/examples/python/code_graph_example.py +++ /dev/null @@ -1,58 +0,0 @@ -import argparse -import asyncio -import cognee -from cognee import SearchType -from cognee.shared.logging_utils import setup_logging, ERROR - -from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline - - -async def main(repo_path, include_docs): - run_status = False - async for run_status in run_code_graph_pipeline(repo_path, include_docs=include_docs): - run_status = run_status - - # Test CODE search - search_results = await cognee.search(query_type=SearchType.CODE, query_text="test") - assert len(search_results) != 0, "The search results list is empty." - print("\n\nSearch results are:\n") - for result in search_results: - print(f"{result}\n") - - return run_status - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--repo_path", type=str, required=True, help="Path to the repository") - parser.add_argument( - "--include_docs", - type=lambda x: x.lower() in ("true", "1"), - default=False, - help="Whether or not to process non-code files", - ) - parser.add_argument( - "--time", - type=lambda x: x.lower() in ("true", "1"), - default=True, - help="Whether or not to time the pipeline run", - ) - return parser.parse_args() - - -if __name__ == "__main__": - logger = setup_logging(log_level=ERROR) - - args = parse_args() - - if args.time: - import time - - start_time = time.time() - asyncio.run(main(args.repo_path, args.include_docs)) - end_time = time.time() - print("\n" + "=" * 50) - print(f"Pipeline Execution Time: {end_time - start_time:.2f} seconds") - print("=" * 50 + "\n") - else: - asyncio.run(main(args.repo_path, args.include_docs)) From 18e4bb48fdedd0b7efc1e22d78b618d3294c5c9a Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 5 Nov 2025 13:02:56 +0100 Subject: [PATCH 075/284] refactor: remove code and repository related tasks --- .../code/enrich_dependency_graph_checker.py | 35 -- .../code/get_local_dependencies_checker.py | 20 -- .../code/get_repo_dependency_graph_checker.py | 35 -- cognee/tasks/repo_processor/__init__.py | 2 - .../repo_processor/get_local_dependencies.py | 335 ------------------ .../repo_processor/get_non_code_files.py | 158 --------- .../get_repo_file_dependencies.py | 243 ------------- 7 files changed, 828 deletions(-) delete mode 100644 cognee/tasks/code/enrich_dependency_graph_checker.py delete mode 100644 cognee/tasks/code/get_local_dependencies_checker.py delete mode 100644 cognee/tasks/code/get_repo_dependency_graph_checker.py delete mode 100644 cognee/tasks/repo_processor/__init__.py delete mode 100644 cognee/tasks/repo_processor/get_local_dependencies.py delete mode 100644 cognee/tasks/repo_processor/get_non_code_files.py delete mode 100644 cognee/tasks/repo_processor/get_repo_file_dependencies.py diff --git a/cognee/tasks/code/enrich_dependency_graph_checker.py b/cognee/tasks/code/enrich_dependency_graph_checker.py deleted file mode 100644 index 1b3a80210..000000000 --- a/cognee/tasks/code/enrich_dependency_graph_checker.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -import asyncio -import argparse -from cognee.tasks.repo_processor.get_repo_file_dependencies import get_repo_file_dependencies -from cognee.tasks.repo_processor.enrich_dependency_graph import enrich_dependency_graph - - -def main(): - """ - Execute the main logic of the dependency graph processor. - - This function sets up argument parsing to retrieve the repository path, checks the - existence of the specified path, and processes the repository to produce a dependency - graph. If the repository path does not exist, it logs an error message and terminates - without further execution. - """ - parser = argparse.ArgumentParser() - parser.add_argument("repo_path", help="Path to the repository") - args = parser.parse_args() - - repo_path = args.repo_path - if not os.path.exists(repo_path): - print(f"Error: The provided repository path does not exist: {repo_path}") - return - - graph = asyncio.run(get_repo_file_dependencies(repo_path)) - graph = asyncio.run(enrich_dependency_graph(graph)) - for node in graph.nodes: - print(f"Node: {node}") - for _, target, data in graph.out_edges(node, data=True): - print(f" Edge to {target}, data: {data}") - - -if __name__ == "__main__": - main() diff --git a/cognee/tasks/code/get_local_dependencies_checker.py b/cognee/tasks/code/get_local_dependencies_checker.py deleted file mode 100644 index 5d465254a..000000000 --- a/cognee/tasks/code/get_local_dependencies_checker.py +++ /dev/null @@ -1,20 +0,0 @@ -import argparse -import asyncio -from cognee.tasks.repo_processor.get_local_dependencies import get_local_script_dependencies - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Get local script dependencies.") - - # Suggested path: .../cognee/examples/python/simple_example.py - parser.add_argument("script_path", type=str, help="Absolute path to the Python script file") - - # Suggested path: .../cognee - parser.add_argument("repo_path", type=str, help="Absolute path to the repository root") - - args = parser.parse_args() - - dependencies = asyncio.run(get_local_script_dependencies(args.script_path, args.repo_path)) - - print("Dependencies:") - for dependency in dependencies: - print(dependency) diff --git a/cognee/tasks/code/get_repo_dependency_graph_checker.py b/cognee/tasks/code/get_repo_dependency_graph_checker.py deleted file mode 100644 index 0e68cf7fe..000000000 --- a/cognee/tasks/code/get_repo_dependency_graph_checker.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -import asyncio -import argparse -from cognee.tasks.repo_processor.get_repo_file_dependencies import get_repo_file_dependencies - - -def main(): - """ - Parse the command line arguments and print the repository file dependencies. - - This function sets up an argument parser to retrieve the path of a repository. It checks - if the provided path exists and if it doesn’t, it prints an error message and exits. If - the path is valid, it calls an asynchronous function to get the dependencies and prints - the nodes and their relations in the dependency graph. - """ - parser = argparse.ArgumentParser() - parser.add_argument("repo_path", help="Path to the repository") - args = parser.parse_args() - - repo_path = args.repo_path - if not os.path.exists(repo_path): - print(f"Error: The provided repository path does not exist: {repo_path}") - return - - graph = asyncio.run(get_repo_file_dependencies(repo_path)) - - for node in graph.nodes: - print(f"Node: {node}") - edges = graph.edges(node, data=True) - for _, target, data in edges: - print(f" Edge to {target}, Relation: {data.get('relation')}") - - -if __name__ == "__main__": - main() diff --git a/cognee/tasks/repo_processor/__init__.py b/cognee/tasks/repo_processor/__init__.py deleted file mode 100644 index 71ef3c287..000000000 --- a/cognee/tasks/repo_processor/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .get_non_code_files import get_non_py_files -from .get_repo_file_dependencies import get_repo_file_dependencies diff --git a/cognee/tasks/repo_processor/get_local_dependencies.py b/cognee/tasks/repo_processor/get_local_dependencies.py deleted file mode 100644 index f691d4a3e..000000000 --- a/cognee/tasks/repo_processor/get_local_dependencies.py +++ /dev/null @@ -1,335 +0,0 @@ -import os -import aiofiles -import importlib -from typing import AsyncGenerator, Optional -from uuid import NAMESPACE_OID, uuid5 -import tree_sitter_python as tspython -from tree_sitter import Language, Node, Parser, Tree -from cognee.shared.logging_utils import get_logger - -from cognee.low_level import DataPoint -from cognee.shared.CodeGraphEntities import ( - CodeFile, - ImportStatement, - FunctionDefinition, - ClassDefinition, -) - -logger = get_logger() - - -class FileParser: - """ - Handles the parsing of files into source code and an abstract syntax tree - representation. Public methods include: - - - parse_file: Parses a file and returns its source code and syntax tree representation. - """ - - def __init__(self): - self.parsed_files = {} - - async def parse_file(self, file_path: str) -> tuple[str, Tree]: - """ - Parse a file and return its source code along with its syntax tree representation. - - If the file has already been parsed, retrieve the result from memory instead of reading - the file again. - - Parameters: - ----------- - - - file_path (str): The path of the file to parse. - - Returns: - -------- - - - tuple[str, Tree]: A tuple containing the source code of the file and its - corresponding syntax tree representation. - """ - PY_LANGUAGE = Language(tspython.language()) - source_code_parser = Parser(PY_LANGUAGE) - - if file_path not in self.parsed_files: - source_code = await get_source_code(file_path) - source_code_tree = source_code_parser.parse(bytes(source_code, "utf-8")) - self.parsed_files[file_path] = (source_code, source_code_tree) - - return self.parsed_files[file_path] - - -async def get_source_code(file_path: str): - """ - Read source code from a file asynchronously. - - This function attempts to open a file specified by the given file path, read its - contents, and return the source code. In case of any errors during the file reading - process, it logs an error message and returns None. - - Parameters: - ----------- - - - file_path (str): The path to the file from which to read the source code. - - Returns: - -------- - - Returns the contents of the file as a string if successful, or None if an error - occurs. - """ - try: - async with aiofiles.open(file_path, "r", encoding="utf-8") as f: - source_code = await f.read() - return source_code - except Exception as error: - logger.error(f"Error reading file {file_path}: {str(error)}") - return None - - -def resolve_module_path(module_name): - """ - Find the file path of a module. - - Return the file path of the specified module if found, or return None if the module does - not exist or cannot be located. - - Parameters: - ----------- - - - module_name: The name of the module whose file path is to be resolved. - - Returns: - -------- - - The file path of the module as a string or None if the module is not found. - """ - try: - spec = importlib.util.find_spec(module_name) - if spec and spec.origin: - return spec.origin - except ModuleNotFoundError: - return None - return None - - -def find_function_location( - module_path: str, function_name: str, parser: FileParser -) -> Optional[tuple[str, str]]: - """ - Find the location of a function definition in a specified module. - - Parameters: - ----------- - - - module_path (str): The path to the module where the function is defined. - - function_name (str): The name of the function whose location is to be found. - - parser (FileParser): An instance of FileParser used to parse the module's source - code. - - Returns: - -------- - - - Optional[tuple[str, str]]: Returns a tuple containing the module path and the - start point of the function if found; otherwise, returns None. - """ - if not module_path or not os.path.exists(module_path): - return None - - source_code, tree = parser.parse_file(module_path) - root_node: Node = tree.root_node - - for node in root_node.children: - if node.type == "function_definition": - func_name_node = node.child_by_field_name("name") - - if func_name_node and func_name_node.text.decode() == function_name: - return (module_path, node.start_point) # (line, column) - - return None - - -async def get_local_script_dependencies( - repo_path: str, script_path: str, detailed_extraction: bool = False -) -> CodeFile: - """ - Retrieve local script dependencies and create a CodeFile object. - - Parameters: - ----------- - - - repo_path (str): The path to the repository that contains the script. - - script_path (str): The path of the script for which dependencies are being - extracted. - - detailed_extraction (bool): A flag indicating whether to perform a detailed - extraction of code components. - - Returns: - -------- - - - CodeFile: Returns a CodeFile object containing information about the script, - including its dependencies and definitions. - """ - code_file_parser = FileParser() - source_code, source_code_tree = await code_file_parser.parse_file(script_path) - - file_path_relative_to_repo = script_path[len(repo_path) + 1 :] - - if not detailed_extraction: - code_file_node = CodeFile( - id=uuid5(NAMESPACE_OID, script_path), - name=file_path_relative_to_repo, - source_code=source_code, - file_path=script_path, - language="python", - ) - return code_file_node - - code_file_node = CodeFile( - id=uuid5(NAMESPACE_OID, script_path), - name=file_path_relative_to_repo, - source_code=None, - file_path=script_path, - language="python", - ) - - async for part in extract_code_parts(source_code_tree.root_node, script_path=script_path): - part.file_path = script_path - - if isinstance(part, FunctionDefinition): - code_file_node.provides_function_definition.append(part) - if isinstance(part, ClassDefinition): - code_file_node.provides_class_definition.append(part) - if isinstance(part, ImportStatement): - code_file_node.depends_on.append(part) - - return code_file_node - - -def find_node(nodes: list[Node], condition: callable) -> Node: - """ - Find and return the first node that satisfies the given condition. - - Iterate through the provided list of nodes and return the first node for which the - condition callable returns True. If no such node is found, return None. - - Parameters: - ----------- - - - nodes (list[Node]): A list of Node objects to search through. - - condition (callable): A callable that takes a Node and returns a boolean - indicating if the node meets specified criteria. - - Returns: - -------- - - - Node: The first Node that matches the condition, or None if no such node exists. - """ - for node in nodes: - if condition(node): - return node - - return None - - -async def extract_code_parts( - tree_root: Node, script_path: str, existing_nodes: list[DataPoint] = {} -) -> AsyncGenerator[DataPoint, None]: - """ - Extract code parts from a given AST node tree asynchronously. - - Iteratively yields DataPoint nodes representing import statements, function definitions, - and class definitions found in the children of the specified tree root. The function - checks - if nodes are already present in the existing_nodes dictionary to prevent duplicates. - This function has to be used in an asynchronous context, and it requires a valid - tree_root - and proper initialization of existing_nodes. - - Parameters: - ----------- - - - tree_root (Node): The root node of the AST tree containing code parts to extract. - - script_path (str): The file path of the script from which the AST was generated. - - existing_nodes (list[DataPoint]): A dictionary that holds already extracted - DataPoint nodes to avoid duplicates. (default {}) - - Returns: - -------- - - Yields DataPoint nodes representing imported modules, functions, and classes. - """ - for child_node in tree_root.children: - if child_node.type == "import_statement" or child_node.type == "import_from_statement": - parts = child_node.text.decode("utf-8").split() - - if parts[0] == "import": - module_name = parts[1] - function_name = None - elif parts[0] == "from": - module_name = parts[1] - function_name = parts[3] - - if " as " in function_name: - function_name = function_name.split(" as ")[0] - - if " as " in module_name: - module_name = module_name.split(" as ")[0] - - if function_name and "import " + function_name not in existing_nodes: - import_statement_node = ImportStatement( - name=function_name, - module=module_name, - start_point=child_node.start_point, - end_point=child_node.end_point, - file_path=script_path, - source_code=child_node.text, - ) - existing_nodes["import " + function_name] = import_statement_node - - if function_name: - yield existing_nodes["import " + function_name] - - if module_name not in existing_nodes: - import_statement_node = ImportStatement( - name=module_name, - module=module_name, - start_point=child_node.start_point, - end_point=child_node.end_point, - file_path=script_path, - source_code=child_node.text, - ) - existing_nodes[module_name] = import_statement_node - - yield existing_nodes[module_name] - - if child_node.type == "function_definition": - function_node = find_node(child_node.children, lambda node: node.type == "identifier") - function_node_name = function_node.text - - if function_node_name not in existing_nodes: - function_definition_node = FunctionDefinition( - name=function_node_name, - start_point=child_node.start_point, - end_point=child_node.end_point, - file_path=script_path, - source_code=child_node.text, - ) - existing_nodes[function_node_name] = function_definition_node - - yield existing_nodes[function_node_name] - - if child_node.type == "class_definition": - class_name_node = find_node(child_node.children, lambda node: node.type == "identifier") - class_name_node_name = class_name_node.text - - if class_name_node_name not in existing_nodes: - class_definition_node = ClassDefinition( - name=class_name_node_name, - start_point=child_node.start_point, - end_point=child_node.end_point, - file_path=script_path, - source_code=child_node.text, - ) - existing_nodes[class_name_node_name] = class_definition_node - - yield existing_nodes[class_name_node_name] diff --git a/cognee/tasks/repo_processor/get_non_code_files.py b/cognee/tasks/repo_processor/get_non_code_files.py deleted file mode 100644 index b9ab1d4c6..000000000 --- a/cognee/tasks/repo_processor/get_non_code_files.py +++ /dev/null @@ -1,158 +0,0 @@ -import os - - -async def get_non_py_files(repo_path): - """ - Get files that are not .py files and their contents. - - Check if the specified repository path exists and if so, traverse the directory, - collecting the paths of files that do not have a .py extension and meet the - criteria set in the allowed and ignored patterns. Return a list of paths to - those files. - - Parameters: - ----------- - - - repo_path: The file system path to the repository to scan for non-Python files. - - Returns: - -------- - - A list of file paths that are not Python files and meet the specified criteria. - """ - if not os.path.exists(repo_path): - return {} - - IGNORED_PATTERNS = { - ".git", - "__pycache__", - "*.pyc", - "*.pyo", - "*.pyd", - "node_modules", - "*.egg-info", - } - - ALLOWED_EXTENSIONS = { - ".txt", - ".md", - ".csv", - ".json", - ".xml", - ".yaml", - ".yml", - ".html", - ".css", - ".js", - ".ts", - ".jsx", - ".tsx", - ".sql", - ".log", - ".ini", - ".toml", - ".properties", - ".sh", - ".bash", - ".dockerfile", - ".gitignore", - ".gitattributes", - ".makefile", - ".pyproject", - ".requirements", - ".env", - ".pdf", - ".doc", - ".docx", - ".dot", - ".dotx", - ".rtf", - ".wps", - ".wpd", - ".odt", - ".ott", - ".ottx", - ".txt", - ".wp", - ".sdw", - ".sdx", - ".docm", - ".dotm", - # Additional extensions for other programming languages - ".java", - ".c", - ".cpp", - ".h", - ".cs", - ".go", - ".php", - ".rb", - ".swift", - ".pl", - ".lua", - ".rs", - ".scala", - ".kt", - ".sh", - ".sql", - ".v", - ".asm", - ".pas", - ".d", - ".ml", - ".clj", - ".cljs", - ".erl", - ".ex", - ".exs", - ".f", - ".fs", - ".r", - ".pyi", - ".pdb", - ".ipynb", - ".rmd", - ".cabal", - ".hs", - ".nim", - ".vhdl", - ".verilog", - ".svelte", - ".html", - ".css", - ".scss", - ".less", - ".json5", - ".yaml", - ".yml", - } - - def should_process(path): - """ - Determine if a file should be processed based on its extension and path patterns. - - This function checks if the file extension is in the allowed list and ensures that none - of the ignored patterns are present in the provided file path. - - Parameters: - ----------- - - - path: The file path to check for processing eligibility. - - Returns: - -------- - - Returns True if the file should be processed; otherwise, False. - """ - _, ext = os.path.splitext(path) - return ext in ALLOWED_EXTENSIONS and not any( - pattern in path for pattern in IGNORED_PATTERNS - ) - - non_py_files_paths = [ - os.path.join(root, file) - for root, _, files in os.walk(repo_path) - for file in files - if not file.endswith(".py") and should_process(os.path.join(root, file)) - ] - return non_py_files_paths diff --git a/cognee/tasks/repo_processor/get_repo_file_dependencies.py b/cognee/tasks/repo_processor/get_repo_file_dependencies.py deleted file mode 100644 index 06cc3bddb..000000000 --- a/cognee/tasks/repo_processor/get_repo_file_dependencies.py +++ /dev/null @@ -1,243 +0,0 @@ -import asyncio -import math -import os -from pathlib import Path -from typing import Set -from typing import AsyncGenerator, Optional, List -from uuid import NAMESPACE_OID, uuid5 - -from cognee.infrastructure.engine import DataPoint -from cognee.shared.CodeGraphEntities import CodeFile, Repository - -# constant, declared only once -EXCLUDED_DIRS: Set[str] = { - ".venv", - "venv", - "env", - ".env", - "site-packages", - "node_modules", - "dist", - "build", - ".git", - "tests", - "test", -} - - -async def get_source_code_files( - repo_path, - language_config: dict[str, list[str]] | None = None, - excluded_paths: Optional[List[str]] = None, -): - """ - Retrieve Python source code files from the specified repository path. - - This function scans the given repository path for files that have the .py extension - while excluding test files and files within a virtual environment. It returns a list of - absolute paths to the source code files that are not empty. - - Parameters: - ----------- - - repo_path: Root path of the repository to search - - language_config: dict mapping language names to file extensions, e.g., - {'python': ['.py'], 'javascript': ['.js', '.jsx'], ...} - - excluded_paths: Optional list of path fragments or glob patterns to exclude - - Returns: - -------- - A list of (absolute_path, language) tuples for source code files. - """ - - def _get_language_from_extension(file, language_config): - for lang, exts in language_config.items(): - for ext in exts: - if file.endswith(ext): - return lang - return None - - # Default config if not provided - if language_config is None: - language_config = { - "python": [".py"], - "javascript": [".js", ".jsx"], - "typescript": [".ts", ".tsx"], - "java": [".java"], - "csharp": [".cs"], - "go": [".go"], - "rust": [".rs"], - "cpp": [".cpp", ".c", ".h", ".hpp"], - } - - if not os.path.exists(repo_path): - return [] - - source_code_files = set() - for root, _, files in os.walk(repo_path): - for file in files: - lang = _get_language_from_extension(file, language_config) - if lang is None: - continue - # Exclude tests, common build/venv directories and files provided in exclude_paths - excluded_dirs = EXCLUDED_DIRS - excluded_paths = {Path(p).resolve() for p in (excluded_paths or [])} # full paths - - root_path = Path(root).resolve() - root_parts = set(root_path.parts) # same as before - base_name, _ext = os.path.splitext(file) - if ( - base_name.startswith("test_") - or base_name.endswith("_test") - or ".test." in file - or ".spec." in file - or (excluded_dirs & root_parts) # name match - or any( - root_path.is_relative_to(p) # full-path match - for p in excluded_paths - ) - ): - continue - file_path = os.path.abspath(os.path.join(root, file)) - if os.path.getsize(file_path) == 0: - continue - source_code_files.add((file_path, lang)) - - return sorted(list(source_code_files)) - - -def run_coroutine(coroutine_func, *args, **kwargs): - """ - Run a coroutine function until it completes. - - This function creates a new asyncio event loop, sets it as the current loop, and - executes the given coroutine function with the provided arguments. Once the coroutine - completes, the loop is closed. Intended for use in environments where an existing event - loop is not available or desirable. - - Parameters: - ----------- - - - coroutine_func: The coroutine function to be run. - - *args: Positional arguments to pass to the coroutine function. - - **kwargs: Keyword arguments to pass to the coroutine function. - - Returns: - -------- - - The result returned by the coroutine after completion. - """ - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - result = loop.run_until_complete(coroutine_func(*args, **kwargs)) - loop.close() - return result - - -async def get_repo_file_dependencies( - repo_path: str, - detailed_extraction: bool = False, - supported_languages: list = None, - excluded_paths: Optional[List[str]] = None, -) -> AsyncGenerator[DataPoint, None]: - """ - Generate a dependency graph for source files (multi-language) in the given repository path. - - Check the validity of the repository path and yield a repository object followed by the - dependencies of source files within that repository. Raise a FileNotFoundError if the - provided path does not exist. The extraction of detailed dependencies can be controlled - via the `detailed_extraction` argument. Languages considered can be restricted via - the `supported_languages` argument. - - Parameters: - ----------- - - - repo_path (str): The file path to the repository to process. - - detailed_extraction (bool): Whether to perform a detailed extraction of code parts. - - supported_languages (list | None): Subset of languages to include; if None, use defaults. - """ - - if isinstance(repo_path, list) and len(repo_path) == 1: - repo_path = repo_path[0] - - if not os.path.exists(repo_path): - raise FileNotFoundError(f"Repository path {repo_path} does not exist.") - - # Build language config from supported_languages - default_language_config = { - "python": [".py"], - "javascript": [".js", ".jsx"], - "typescript": [".ts", ".tsx"], - "java": [".java"], - "csharp": [".cs"], - "go": [".go"], - "rust": [".rs"], - "cpp": [".cpp", ".c", ".h", ".hpp"], - "c": [".c", ".h"], - } - if supported_languages is not None: - language_config = { - k: v for k, v in default_language_config.items() if k in supported_languages - } - else: - language_config = default_language_config - - source_code_files = await get_source_code_files( - repo_path, language_config=language_config, excluded_paths=excluded_paths - ) - - repo = Repository( - id=uuid5(NAMESPACE_OID, repo_path), - path=repo_path, - ) - - yield repo - - chunk_size = 100 - number_of_chunks = math.ceil(len(source_code_files) / chunk_size) - chunk_ranges = [ - ( - chunk_number * chunk_size, - min((chunk_number + 1) * chunk_size, len(source_code_files)) - 1, - ) - for chunk_number in range(number_of_chunks) - ] - - # Import dependency extractors for each language (Python for now, extend later) - from cognee.tasks.repo_processor.get_local_dependencies import get_local_script_dependencies - import aiofiles - # TODO: Add other language extractors here - - for start_range, end_range in chunk_ranges: - tasks = [] - for file_path, lang in source_code_files[start_range : end_range + 1]: - # For now, only Python is supported; extend with other languages - if lang == "python": - tasks.append( - get_local_script_dependencies(repo_path, file_path, detailed_extraction) - ) - else: - # Placeholder: create a minimal CodeFile for other languages - async def make_codefile_stub(file_path=file_path, lang=lang): - async with aiofiles.open( - file_path, "r", encoding="utf-8", errors="replace" - ) as f: - source = await f.read() - return CodeFile( - id=uuid5(NAMESPACE_OID, file_path), - name=os.path.relpath(file_path, repo_path), - file_path=file_path, - language=lang, - source_code=source, - ) - - tasks.append(make_codefile_stub()) - - results: list[CodeFile] = await asyncio.gather(*tasks) - - for source_code_file in results: - source_code_file.part_of = repo - if getattr( - source_code_file, "language", None - ) is None and source_code_file.file_path.endswith(".py"): - source_code_file.language = "python" - yield source_code_file From 9fc4199958045cb5ed06cfcaf783baae247760d6 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 13:18:47 +0100 Subject: [PATCH 076/284] fix: Resolve issue with cleaning acl table --- .../ab7e313804ae_permission_system_rework.py | 72 +++++++++++-------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/alembic/versions/ab7e313804ae_permission_system_rework.py b/alembic/versions/ab7e313804ae_permission_system_rework.py index bd69b9b41..d83f946a6 100644 --- a/alembic/versions/ab7e313804ae_permission_system_rework.py +++ b/alembic/versions/ab7e313804ae_permission_system_rework.py @@ -144,44 +144,58 @@ def _create_data_permission(conn, user_id, data_id, permission_name): ) +def _get_column(inspector, table, name, schema=None): + for col in inspector.get_columns(table, schema=schema): + if col["name"] == name: + return col + return None + + def upgrade() -> None: conn = op.get_bind() + insp = sa.inspect(conn) - # Recreate ACLs table with default permissions set to datasets instead of documents - op.drop_table("acls") + dataset_id_column = _get_column(insp, "acls", "dataset_id") + if not dataset_id_column: + # Recreate ACLs table with default permissions set to datasets instead of documents + op.drop_table("acls") - acls_table = op.create_table( - "acls", - sa.Column("id", UUID, primary_key=True, default=uuid4), - sa.Column( - "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) - ), - sa.Column( - "updated_at", sa.DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc) - ), - sa.Column("principal_id", UUID, sa.ForeignKey("principals.id")), - sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id")), - sa.Column("dataset_id", UUID, sa.ForeignKey("datasets.id", ondelete="CASCADE")), - ) + acls_table = op.create_table( + "acls", + sa.Column("id", UUID, primary_key=True, default=uuid4), + sa.Column( + "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + onupdate=lambda: datetime.now(timezone.utc), + ), + sa.Column("principal_id", UUID, sa.ForeignKey("principals.id")), + sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id")), + sa.Column("dataset_id", UUID, sa.ForeignKey("datasets.id", ondelete="CASCADE")), + ) - # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table - # definition or load what is in the database - dataset_table = _define_dataset_table() - datasets = conn.execute(sa.select(dataset_table)).fetchall() + # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table + # definition or load what is in the database + dataset_table = _define_dataset_table() + datasets = conn.execute(sa.select(dataset_table)).fetchall() - if not datasets: - return + if not datasets: + return - acl_list = [] + acl_list = [] - for dataset in datasets: - acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "read")) - acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "write")) - acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "share")) - acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "delete")) + for dataset in datasets: + acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "read")) + acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "write")) + acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "share")) + acl_list.append( + _create_dataset_permission(conn, dataset.owner_id, dataset.id, "delete") + ) - if acl_list: - op.bulk_insert(acls_table, acl_list) + if acl_list: + op.bulk_insert(acls_table, acl_list) def downgrade() -> None: From 1b4aa5c67b0412b134e903855f1ad3d12c7eab0e Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 15:15:15 +0100 Subject: [PATCH 077/284] fix: resolve issue with cypher search --- cognee/modules/retrieval/cypher_search_retriever.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cognee/modules/retrieval/cypher_search_retriever.py b/cognee/modules/retrieval/cypher_search_retriever.py index 9978f2536..b8cbce558 100644 --- a/cognee/modules/retrieval/cypher_search_retriever.py +++ b/cognee/modules/retrieval/cypher_search_retriever.py @@ -49,8 +49,9 @@ class CypherSearchRetriever(BaseRetriever): if is_empty: logger.warning("Search attempt on an empty knowledge graph") return [] + from fastapi.encoders import jsonable_encoder - result = await graph_engine.query(query) + result = jsonable_encoder(await graph_engine.query(query)) except Exception as e: logger.error("Failed to execture cypher search retrieval: %s", str(e)) raise CypherSearchError() from e From 1c142734b4f031e466d5a7b111f3ed2e55e338c3 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 15:22:23 +0100 Subject: [PATCH 078/284] refactor: change import to top of file --- cognee/modules/retrieval/cypher_search_retriever.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cognee/modules/retrieval/cypher_search_retriever.py b/cognee/modules/retrieval/cypher_search_retriever.py index b8cbce558..01816f3df 100644 --- a/cognee/modules/retrieval/cypher_search_retriever.py +++ b/cognee/modules/retrieval/cypher_search_retriever.py @@ -1,4 +1,6 @@ from typing import Any, Optional +from fastapi.encoders import jsonable_encoder + from cognee.infrastructure.databases.graph import get_graph_engine from cognee.modules.retrieval.base_retriever import BaseRetriever from cognee.modules.retrieval.utils.completion import generate_completion @@ -49,7 +51,6 @@ class CypherSearchRetriever(BaseRetriever): if is_empty: logger.warning("Search attempt on an empty knowledge graph") return [] - from fastapi.encoders import jsonable_encoder result = jsonable_encoder(await graph_engine.query(query)) except Exception as e: From fa4c50f972e27190fb97d20b2e61726b52bb3f2a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 16:05:33 +0100 Subject: [PATCH 079/284] fix: Resolve issue with sync migration not working for postgresql --- .../211ab850ef3d_add_sync_operations_table.py | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/alembic/versions/211ab850ef3d_add_sync_operations_table.py b/alembic/versions/211ab850ef3d_add_sync_operations_table.py index 370aab1a4..9c6e81f12 100644 --- a/alembic/versions/211ab850ef3d_add_sync_operations_table.py +++ b/alembic/versions/211ab850ef3d_add_sync_operations_table.py @@ -10,6 +10,7 @@ from typing import Sequence, Union from alembic import op import sqlalchemy as sa +from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. @@ -27,6 +28,27 @@ def upgrade() -> None: inspector = sa.inspect(connection) if "sync_operations" not in inspector.get_table_names(): + if op.get_context().dialect.name == "postgresql": + syncstatus = postgresql.ENUM( + "STARTED", + "IN_PROGRESS", + "COMPLETED", + "FAILED", + "CANCELLED", + name="syncstatus", + create_type=False, + ) + else: + syncstatus = sa.Enum( + "STARTED", + "IN_PROGRESS", + "COMPLETED", + "FAILED", + "CANCELLED", + name="syncstatus", + create_type=False, + ) + # Table doesn't exist, create it normally op.create_table( "sync_operations", @@ -34,15 +56,7 @@ def upgrade() -> None: sa.Column("run_id", sa.Text(), nullable=True), sa.Column( "status", - sa.Enum( - "STARTED", - "IN_PROGRESS", - "COMPLETED", - "FAILED", - "CANCELLED", - name="syncstatus", - create_type=False, - ), + syncstatus, nullable=True, ), sa.Column("progress_percentage", sa.Integer(), nullable=True), From c4807a0c6751e05a4fb04439afa183fa7620c8f5 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 16:14:37 +0100 Subject: [PATCH 080/284] refactor: Use user_tenants table to update --- alembic/versions/c946955da633_multi_tenant_support.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index fc45644d0..3f7bde5a2 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -95,7 +95,9 @@ def upgrade() -> None: ) else: conn = op.get_bind() - conn.execute(dataset.update().values(tenant_id=tenant_id_from_user, user_id=user.c.id)) + conn.execute( + user_tenants.update().values(tenant_id=tenant_id_from_user, user_id=user.c.id) + ) tenant_id_column = _get_column(insp, "datasets", "tenant_id") if not tenant_id_column: From 9b6cbaf389b172fb86da42ac1c9c8fe544202aae Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 17:24:11 +0100 Subject: [PATCH 081/284] chore: Add multi tenant migration --- .../211ab850ef3d_add_sync_operations_table.py | 6 ++- .../c946955da633_multi_tenant_support.py | 38 +++++++++++-------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/alembic/versions/211ab850ef3d_add_sync_operations_table.py b/alembic/versions/211ab850ef3d_add_sync_operations_table.py index 9c6e81f12..976439a32 100644 --- a/alembic/versions/211ab850ef3d_add_sync_operations_table.py +++ b/alembic/versions/211ab850ef3d_add_sync_operations_table.py @@ -36,7 +36,8 @@ def upgrade() -> None: "FAILED", "CANCELLED", name="syncstatus", - create_type=False, + create_type=True, + checkfirst=True, ) else: syncstatus = sa.Enum( @@ -46,7 +47,8 @@ def upgrade() -> None: "FAILED", "CANCELLED", name="syncstatus", - create_type=False, + create_type=True, + checkfirst=True, ) # Table doesn't exist, create it normally diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index 3f7bde5a2..6d21f8fc7 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -20,6 +20,10 @@ branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None +def _now(): + return datetime.now(timezone.utc) + + def _define_user_table() -> sa.Table: table = sa.Table( "users", @@ -76,27 +80,29 @@ def upgrade() -> None: user = _define_user_table() if "user_tenants" not in insp.get_table_names(): - tenant_id_from_user = sa.select(user.c.tenant_id).scalar_subquery() # Define table with all necessary columns including primary key user_tenants = op.create_table( "user_tenants", sa.Column("user_id", sa.UUID, sa.ForeignKey("users.id"), primary_key=True), sa.Column("tenant_id", sa.UUID, sa.ForeignKey("tenants.id"), primary_key=True), - sa.Column("created_at", sa.DateTime(), default=lambda: datetime.now(timezone.utc)), + sa.Column( + "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) + ), ) - if op.get_context().dialect.name == "sqlite": - # If column doesn't exist create new original_extension column and update from values of extension column - with op.batch_alter_table("user_tenants") as batch_op: - batch_op.execute( - user_tenants.update().values( - tenant_id=tenant_id_from_user, - user_id=user.c.id, - ) - ) - else: - conn = op.get_bind() - conn.execute( - user_tenants.update().values(tenant_id=tenant_id_from_user, user_id=user.c.id) + + # Get all users with their tenant_id + user_data = conn.execute( + sa.select(user.c.id, user.c.tenant_id).where(user.c.tenant_id.isnot(None)) + ).fetchall() + + # Insert into user_tenants table + if user_data: + op.bulk_insert( + user_tenants, + [ + {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()} + for user_id, tenant_id in user_data + ], ) tenant_id_column = _get_column(insp, "datasets", "tenant_id") @@ -125,6 +131,6 @@ def upgrade() -> None: def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - + op.drop_table("user_tenants") op.drop_column("datasets", "tenant_id") # ### end Alembic commands ### From 215ef7f3c213ea0be6b0a295be8069ba0364878d Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 5 Nov 2025 17:29:40 +0100 Subject: [PATCH 082/284] test: add retriever tests --- .../entity_completion_retriever_test.py | 65 +++++++++++++++ ...letion_retriever_context_extension_test.py | 6 +- .../graph_completion_retriever_cot_test.py | 6 +- .../graph_completion_retriever_test.py | 57 +++++++++++++ .../rag_completion_retriever_test.py | 79 +++++++++++++++++++ .../retrieval/temporal_retriever_test.py | 64 +++++++++++++++ 6 files changed, 271 insertions(+), 6 deletions(-) create mode 100644 cognee/tests/unit/modules/retrieval/entity_completion_retriever_test.py diff --git a/cognee/tests/unit/modules/retrieval/entity_completion_retriever_test.py b/cognee/tests/unit/modules/retrieval/entity_completion_retriever_test.py new file mode 100644 index 000000000..064f4a31a --- /dev/null +++ b/cognee/tests/unit/modules/retrieval/entity_completion_retriever_test.py @@ -0,0 +1,65 @@ +import os +import pytest +import pathlib +from pydantic import BaseModel + +import cognee +from cognee.low_level import setup +from cognee.tasks.storage import add_data_points +from cognee.modules.engine.models import Entity, EntityType +from cognee.modules.retrieval.EntityCompletionRetriever import EntityCompletionRetriever +from cognee.modules.retrieval.entity_extractors.DummyEntityExtractor import DummyEntityExtractor +from cognee.modules.retrieval.context_providers.DummyContextProvider import DummyContextProvider + + +class TestAnswer(BaseModel): + answer: str + explanation: str + + +# TODO: Add more tests, similar to other retrievers. +# TODO: For the tests, one needs to define an Entity Extractor and a Context Provider. +class TestEntityCompletionRetriever: + @pytest.mark.asyncio + async def test_get_entity_structured_completion(self): + system_directory_path = os.path.join( + pathlib.Path(__file__).parent, ".cognee_system/test_get_entity_structured_completion" + ) + cognee.config.system_root_directory(system_directory_path) + data_directory_path = os.path.join( + pathlib.Path(__file__).parent, ".data_storage/test_get_entity_structured_completion" + ) + cognee.config.data_root_directory(data_directory_path) + + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + await setup() + + entity_type = EntityType(name="Person", description="A human individual") + entity = Entity(name="Albert Einstein", is_a=entity_type, description="A famous physicist") + + entities = [entity] + await add_data_points(entities) + + retriever = EntityCompletionRetriever(DummyEntityExtractor(), DummyContextProvider()) + + # Test with string response model (default) + string_answer = await retriever.get_completion("Who is Albert Einstein?") + assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}" + assert all(isinstance(item, str) and item.strip() for item in string_answer), ( + "Answer should not be empty" + ) + + # Test with structured response model + structured_answer = await retriever.get_completion( + "Who is Albert Einstein?", response_model=TestAnswer + ) + assert isinstance(structured_answer, list), ( + f"Expected list, got {type(structured_answer).__name__}" + ) + assert all(isinstance(item, TestAnswer) for item in structured_answer), ( + f"Expected TestAnswer, got {type(structured_answer).__name__}" + ) + + assert structured_answer[0].answer.strip(), "Answer field should not be empty" + assert structured_answer[0].explanation.strip(), "Explanation field should not be empty" diff --git a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py index 5335a3ca7..d15e55c23 100644 --- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py @@ -183,15 +183,15 @@ class TestGraphCompletionWithContextExtensionRetriever: ) @pytest.mark.asyncio - async def test_get_structured_completion_extension_context(self): + async def test_get_graph_structured_completion_extension_context(self): system_directory_path = os.path.join( pathlib.Path(__file__).parent, - ".cognee_system/test_get_structured_completion_extension_context", + ".cognee_system/test_get_graph_structured_completion_extension_context", ) cognee.config.system_root_directory(system_directory_path) data_directory_path = os.path.join( pathlib.Path(__file__).parent, - ".data_storage/test_get_structured_completion_extension_context", + ".data_storage/test_get_graph_structured_completion_extension_context", ) cognee.config.data_root_directory(data_directory_path) diff --git a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py index 731e9fccf..79e4bcec3 100644 --- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py @@ -176,13 +176,13 @@ class TestGraphCompletionCoTRetriever: ) @pytest.mark.asyncio - async def test_get_structured_completion(self): + async def test_get_graph_structured_completion_cot(self): system_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".cognee_system/test_get_structured_completion" + pathlib.Path(__file__).parent, ".cognee_system/test_get_graph_structured_completion_cot" ) cognee.config.system_root_directory(system_directory_path) data_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".data_storage/test_get_structured_completion" + pathlib.Path(__file__).parent, ".data_storage/test_get_graph_structured_completion_cot" ) cognee.config.data_root_directory(data_directory_path) diff --git a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py index f462baced..e320fcef1 100644 --- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py @@ -2,6 +2,7 @@ import os import pytest import pathlib from typing import Optional, Union +from pydantic import BaseModel import cognee from cognee.low_level import setup, DataPoint @@ -10,6 +11,11 @@ from cognee.tasks.storage import add_data_points from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever +class TestAnswer(BaseModel): + answer: str + explanation: str + + class TestGraphCompletionRetriever: @pytest.mark.asyncio async def test_graph_completion_context_simple(self): @@ -221,3 +227,54 @@ class TestGraphCompletionRetriever: context = await retriever.get_context("Who works at Figma?") assert context == [], "Context should be empty on an empty graph" + + @pytest.mark.asyncio + async def test_get_graph_structured_completion(self): + system_directory_path = os.path.join( + pathlib.Path(__file__).parent, ".cognee_system/test_get_graph_structured_completion" + ) + cognee.config.system_root_directory(system_directory_path) + data_directory_path = os.path.join( + pathlib.Path(__file__).parent, ".data_storage/test_get_graph_structured_completion" + ) + cognee.config.data_root_directory(data_directory_path) + + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + await setup() + + class Company(DataPoint): + name: str + + class Person(DataPoint): + name: str + works_for: Company + + company1 = Company(name="Figma") + person1 = Person(name="Steve Rodger", works_for=company1) + + entities = [company1, person1] + await add_data_points(entities) + + retriever = GraphCompletionRetriever() + + # Test with string response model (default) + string_answer = await retriever.get_completion("Who works at Figma?") + assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}" + assert all(isinstance(item, str) and item.strip() for item in string_answer), ( + "Answer should not be empty" + ) + + # Test with structured response model + structured_answer = await retriever.get_completion( + "Who works at Figma?", response_model=TestAnswer + ) + assert isinstance(structured_answer, list), ( + f"Expected list, got {type(structured_answer).__name__}" + ) + assert all(isinstance(item, TestAnswer) for item in structured_answer), ( + f"Expected TestAnswer, got {type(structured_answer).__name__}" + ) + + assert structured_answer[0].answer.strip(), "Answer field should not be empty" + assert structured_answer[0].explanation.strip(), "Explanation field should not be empty" diff --git a/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py b/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py index 252af8352..248ecc047 100644 --- a/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +++ b/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py @@ -3,6 +3,7 @@ from typing import List import pytest import pathlib import cognee +from pydantic import BaseModel from cognee.low_level import setup from cognee.tasks.storage import add_data_points from cognee.infrastructure.databases.vector import get_vector_engine @@ -26,6 +27,11 @@ class DocumentChunkWithEntities(DataPoint): metadata: dict = {"index_fields": ["text"]} +class TestAnswer(BaseModel): + answer: str + explanation: str + + class TestRAGCompletionRetriever: @pytest.mark.asyncio async def test_rag_completion_context_simple(self): @@ -202,3 +208,76 @@ class TestRAGCompletionRetriever: context = await retriever.get_context("Christina Mayer") assert context == "", "Returned context should be empty on an empty graph" + + @pytest.mark.asyncio + async def test_get_rag_structured_completion(self): + system_directory_path = os.path.join( + pathlib.Path(__file__).parent, ".cognee_system/test_get_rag_structured_completion" + ) + cognee.config.system_root_directory(system_directory_path) + data_directory_path = os.path.join( + pathlib.Path(__file__).parent, ".data_storage/test_get_rag_structured_completion" + ) + cognee.config.data_root_directory(data_directory_path) + + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + await setup() + + document = TextDocument( + name="Steve Rodger's career", + raw_data_location="somewhere", + external_metadata="", + mime_type="text/plain", + ) + + chunk1 = DocumentChunk( + text="Steve Rodger", + chunk_size=2, + chunk_index=0, + cut_type="sentence_end", + is_part_of=document, + contains=[], + ) + chunk2 = DocumentChunk( + text="Mike Broski", + chunk_size=2, + chunk_index=1, + cut_type="sentence_end", + is_part_of=document, + contains=[], + ) + chunk3 = DocumentChunk( + text="Christina Mayer", + chunk_size=2, + chunk_index=2, + cut_type="sentence_end", + is_part_of=document, + contains=[], + ) + + entities = [chunk1, chunk2, chunk3] + await add_data_points(entities) + + retriever = CompletionRetriever() + + # Test with string response model (default) + string_answer = await retriever.get_completion("Where does Steve work?") + assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}" + assert all(isinstance(item, str) and item.strip() for item in string_answer), ( + "Answer should not be empty" + ) + + # Test with structured response model + structured_answer = await retriever.get_completion( + "Where does Steve work?", response_model=TestAnswer + ) + assert isinstance(structured_answer, list), ( + f"Expected list, got {type(structured_answer).__name__}" + ) + assert all(isinstance(item, TestAnswer) for item in structured_answer), ( + f"Expected TestAnswer, got {type(structured_answer).__name__}" + ) + + assert structured_answer[0].answer.strip(), "Answer field should not be empty" + assert structured_answer[0].explanation.strip(), "Explanation field should not be empty" diff --git a/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py b/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py index a322cb237..5b274c822 100644 --- a/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +++ b/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py @@ -1,7 +1,13 @@ import asyncio +import os +import pathlib +import cognee from types import SimpleNamespace import pytest +from pydantic import BaseModel +from cognee.low_level import setup, DataPoint +from cognee.tasks.storage import add_data_points from cognee.modules.retrieval.temporal_retriever import TemporalRetriever @@ -141,6 +147,64 @@ async def test_filter_top_k_events_error_handling(): await tr.filter_top_k_events([{}], []) +class TestAnswer(BaseModel): + answer: str + explanation: str + + +@pytest.mark.asyncio +async def test_get_temporal_structured_completion(): + system_directory_path = os.path.join( + pathlib.Path(__file__).parent, ".cognee_system/test_get_temporal_structured_completion" + ) + cognee.config.system_root_directory(system_directory_path) + data_directory_path = os.path.join( + pathlib.Path(__file__).parent, ".data_storage/test_get_temporal_structured_completion" + ) + cognee.config.data_root_directory(data_directory_path) + + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + await setup() + + class Company(DataPoint): + name: str + + class Person(DataPoint): + name: str + works_for: Company + works_since: int + + company1 = Company(name="Figma") + person1 = Person(name="Steve Rodger", works_for=company1, works_since=2015) + + entities = [company1, person1] + await add_data_points(entities) + + retriever = TemporalRetriever() + + # Test with string response model (default) + string_answer = await retriever.get_completion("When did Steve start working at Figma?") + assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}" + assert all(isinstance(item, str) and item.strip() for item in string_answer), ( + "Answer should not be empty" + ) + + # Test with structured response model + structured_answer = await retriever.get_completion( + "When did Steve start working at Figma??", response_model=TestAnswer + ) + assert isinstance(structured_answer, list), ( + f"Expected list, got {type(structured_answer).__name__}" + ) + assert all(isinstance(item, TestAnswer) for item in structured_answer), ( + f"Expected TestAnswer, got {type(structured_answer).__name__}" + ) + + assert structured_answer[0].answer.strip(), "Answer field should not be empty" + assert structured_answer[0].explanation.strip(), "Explanation field should not be empty" + + class _FakeRetriever(TemporalRetriever): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) From 1ef5805c5708ae82eed17335e891eddafd794cf4 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 17:50:13 +0100 Subject: [PATCH 083/284] fix: Resolve issue with sync migration --- .../211ab850ef3d_add_sync_operations_table.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/alembic/versions/211ab850ef3d_add_sync_operations_table.py b/alembic/versions/211ab850ef3d_add_sync_operations_table.py index 976439a32..30049b44b 100644 --- a/alembic/versions/211ab850ef3d_add_sync_operations_table.py +++ b/alembic/versions/211ab850ef3d_add_sync_operations_table.py @@ -27,6 +27,12 @@ def upgrade() -> None: connection = op.get_bind() inspector = sa.inspect(connection) + if op.get_context().dialect.name == "postgresql": + syncstatus_enum = postgresql.ENUM( + "STARTED", "IN_PROGRESS", "COMPLETED", "FAILED", "CANCELLED", name="syncstatus" + ) + syncstatus_enum.create(op.get_bind(), checkfirst=True) + if "sync_operations" not in inspector.get_table_names(): if op.get_context().dialect.name == "postgresql": syncstatus = postgresql.ENUM( @@ -36,8 +42,7 @@ def upgrade() -> None: "FAILED", "CANCELLED", name="syncstatus", - create_type=True, - checkfirst=True, + create_type=False, ) else: syncstatus = sa.Enum( @@ -47,8 +52,7 @@ def upgrade() -> None: "FAILED", "CANCELLED", name="syncstatus", - create_type=True, - checkfirst=True, + create_type=False, ) # Table doesn't exist, create it normally From ce64f242b7a5480bbe9763bbb523fafb7b10e9fb Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 5 Nov 2025 18:04:05 +0100 Subject: [PATCH 084/284] refactor: add droping of index as well --- alembic/versions/c946955da633_multi_tenant_support.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index 6d21f8fc7..ba451fc03 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -126,11 +126,10 @@ def upgrade() -> None: conn = op.get_bind() conn.execute(dataset.update().values(tenant_id=tenant_id_from_dataset_owner)) - op.create_index(op.f("ix_datasets_tenant_id"), "datasets", ["tenant_id"], unique=False) - def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.drop_table("user_tenants") + op.drop_index(op.f("ix_datasets_tenant_id"), table_name="datasets") op.drop_column("datasets", "tenant_id") + op.drop_table("user_tenants") # ### end Alembic commands ### From 79bd2b2576b913528feb92ca6832242133bf9822 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 6 Nov 2025 09:48:01 +0100 Subject: [PATCH 085/284] chore: fixes ruff formatting --- .../v1/cognify/routers/get_cognify_router.py | 15 ++++++++---- cognee/api/v1/ontologies/__init__.py | 2 +- cognee/api/v1/ontologies/ontologies.py | 22 ++++++++++------- .../ontologies/routers/get_ontology_router.py | 11 ++++----- cognee/tests/test_ontology_endpoint.py | 24 ++++++++++++------- 5 files changed, 44 insertions(+), 30 deletions(-) diff --git a/cognee/api/v1/cognify/routers/get_cognify_router.py b/cognee/api/v1/cognify/routers/get_cognify_router.py index 246cc6c56..252ffe7bf 100644 --- a/cognee/api/v1/cognify/routers/get_cognify_router.py +++ b/cognee/api/v1/cognify/routers/get_cognify_router.py @@ -42,8 +42,7 @@ class CognifyPayloadDTO(InDTO): default="", description="Custom prompt for entity extraction and graph generation" ) ontology_key: Optional[str] = Field( - default=None, - description="Reference to previously uploaded ontology" + default=None, description="Reference to previously uploaded ontology" ) @@ -123,16 +122,22 @@ def get_cognify_router() -> APIRouter: if payload.ontology_key: ontology_service = OntologyService() try: - ontology_content = ontology_service.get_ontology_content(payload.ontology_key, user) + ontology_content = ontology_service.get_ontology_content( + payload.ontology_key, user + ) from cognee.modules.ontology.ontology_config import Config - from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import ( + RDFLibOntologyResolver, + ) from io import StringIO ontology_stream = StringIO(ontology_content) config_to_use: Config = { "ontology_config": { - "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_stream) + "ontology_resolver": RDFLibOntologyResolver( + ontology_file=ontology_stream + ) } } except ValueError as e: diff --git a/cognee/api/v1/ontologies/__init__.py b/cognee/api/v1/ontologies/__init__.py index c25064edc..b90d46c3d 100644 --- a/cognee/api/v1/ontologies/__init__.py +++ b/cognee/api/v1/ontologies/__init__.py @@ -1,4 +1,4 @@ from .ontologies import OntologyService from .routers.get_ontology_router import get_ontology_router -__all__ = ["OntologyService", "get_ontology_router"] \ No newline at end of file +__all__ = ["OntologyService", "get_ontology_router"] diff --git a/cognee/api/v1/ontologies/ontologies.py b/cognee/api/v1/ontologies/ontologies.py index fb7f3cd9a..6bfb7658e 100644 --- a/cognee/api/v1/ontologies/ontologies.py +++ b/cognee/api/v1/ontologies/ontologies.py @@ -6,6 +6,7 @@ from datetime import datetime, timezone from typing import Optional from dataclasses import dataclass + @dataclass class OntologyMetadata: ontology_key: str @@ -14,6 +15,7 @@ class OntologyMetadata: uploaded_at: str description: Optional[str] = None + class OntologyService: def __init__(self): pass @@ -33,18 +35,20 @@ class OntologyService: def _load_metadata(self, user_dir: Path) -> dict: metadata_path = self._get_metadata_path(user_dir) if metadata_path.exists(): - with open(metadata_path, 'r') as f: + with open(metadata_path, "r") as f: return json.load(f) return {} def _save_metadata(self, user_dir: Path, metadata: dict): metadata_path = self._get_metadata_path(user_dir) - with open(metadata_path, 'w') as f: + with open(metadata_path, "w") as f: json.dump(metadata, f, indent=2) - async def upload_ontology(self, ontology_key: str, file, user, description: Optional[str] = None) -> OntologyMetadata: + async def upload_ontology( + self, ontology_key: str, file, user, description: Optional[str] = None + ) -> OntologyMetadata: # Validate file format - if not file.filename.lower().endswith('.owl'): + if not file.filename.lower().endswith(".owl"): raise ValueError("File must be in .owl format") user_dir = self._get_user_dir(str(user.id)) @@ -61,7 +65,7 @@ class OntologyService: # Save file file_path = user_dir / f"{ontology_key}.owl" - with open(file_path, 'wb') as f: + with open(file_path, "wb") as f: f.write(content) # Update metadata @@ -69,7 +73,7 @@ class OntologyService: "filename": file.filename, "size_bytes": len(content), "uploaded_at": datetime.now(timezone.utc).isoformat(), - "description": description + "description": description, } metadata[ontology_key] = ontology_metadata self._save_metadata(user_dir, metadata) @@ -79,7 +83,7 @@ class OntologyService: filename=file.filename, size_bytes=len(content), uploaded_at=ontology_metadata["uploaded_at"], - description=description + description=description, ) def get_ontology_content(self, ontology_key: str, user) -> str: @@ -93,9 +97,9 @@ class OntologyService: if not file_path.exists(): raise ValueError(f"Ontology file for key '{ontology_key}' not found") - with open(file_path, 'r', encoding='utf-8') as f: + with open(file_path, "r", encoding="utf-8") as f: return f.read() def list_ontologies(self, user) -> dict: user_dir = self._get_user_dir(str(user.id)) - return self._load_metadata(user_dir) \ No newline at end of file + return self._load_metadata(user_dir) diff --git a/cognee/api/v1/ontologies/routers/get_ontology_router.py b/cognee/api/v1/ontologies/routers/get_ontology_router.py index c171fa7bb..f5c51ba21 100644 --- a/cognee/api/v1/ontologies/routers/get_ontology_router.py +++ b/cognee/api/v1/ontologies/routers/get_ontology_router.py @@ -8,6 +8,7 @@ from cognee.shared.utils import send_telemetry from cognee import __version__ as cognee_version from ..ontologies import OntologyService + def get_ontology_router() -> APIRouter: router = APIRouter() ontology_service = OntologyService() @@ -17,7 +18,7 @@ def get_ontology_router() -> APIRouter: ontology_key: str = Form(...), ontology_file: UploadFile = File(...), description: Optional[str] = Form(None), - user: User = Depends(get_authenticated_user) + user: User = Depends(get_authenticated_user), ): """ Upload an ontology file with a named key for later use in cognify operations. @@ -51,7 +52,7 @@ def get_ontology_router() -> APIRouter: "ontology_key": result.ontology_key, "filename": result.filename, "size_bytes": result.size_bytes, - "uploaded_at": result.uploaded_at + "uploaded_at": result.uploaded_at, } except ValueError as e: return JSONResponse(status_code=400, content={"error": str(e)}) @@ -59,9 +60,7 @@ def get_ontology_router() -> APIRouter: return JSONResponse(status_code=500, content={"error": str(e)}) @router.get("", response_model=dict) - async def list_ontologies( - user: User = Depends(get_authenticated_user) - ): + async def list_ontologies(user: User = Depends(get_authenticated_user)): """ List all uploaded ontologies for the authenticated user. @@ -86,4 +85,4 @@ def get_ontology_router() -> APIRouter: except Exception as e: return JSONResponse(status_code=500, content={"error": str(e)}) - return router \ No newline at end of file + return router diff --git a/cognee/tests/test_ontology_endpoint.py b/cognee/tests/test_ontology_endpoint.py index 4849f8649..b5cedfafe 100644 --- a/cognee/tests/test_ontology_endpoint.py +++ b/cognee/tests/test_ontology_endpoint.py @@ -8,37 +8,40 @@ from cognee.api.client import app gau_mod = importlib.import_module("cognee.modules.users.methods.get_authenticated_user") + @pytest.fixture def client(): return TestClient(app) + @pytest.fixture def mock_user(): user = Mock() user.id = "test-user-123" return user + @pytest.fixture def mock_default_user(): """Mock default user for testing.""" return SimpleNamespace( - id=uuid.uuid4(), - email="default@example.com", - is_active=True, - tenant_id=uuid.uuid4() + id=uuid.uuid4(), email="default@example.com", is_active=True, tenant_id=uuid.uuid4() ) + @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_upload_ontology_success(mock_get_default_user, client, mock_default_user): """Test successful ontology upload""" mock_get_default_user.return_value = mock_default_user - ontology_content = b"" + ontology_content = ( + b"" + ) unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}" response = client.post( "/api/v1/ontologies", files={"ontology_file": ("test.owl", ontology_content)}, - data={"ontology_key": unique_key, "description": "Test"} + data={"ontology_key": unique_key, "description": "Test"}, ) assert response.status_code == 200 @@ -46,6 +49,7 @@ def test_upload_ontology_success(mock_get_default_user, client, mock_default_use assert data["ontology_key"] == unique_key assert "uploaded_at" in data + @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_upload_ontology_invalid_file(mock_get_default_user, client, mock_default_user): """Test 400 response for non-.owl files""" @@ -54,10 +58,11 @@ def test_upload_ontology_invalid_file(mock_get_default_user, client, mock_defaul response = client.post( "/api/v1/ontologies", files={"ontology_file": ("test.txt", b"not xml")}, - data={"ontology_key": unique_key} + data={"ontology_key": unique_key}, ) assert response.status_code == 400 + @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_upload_ontology_missing_data(mock_get_default_user, client, mock_default_user): """Test 400 response for missing file or key""" @@ -70,6 +75,7 @@ def test_upload_ontology_missing_data(mock_get_default_user, client, mock_defaul response = client.post("/api/v1/ontologies", files={"ontology_file": ("test.owl", b"xml")}) assert response.status_code == 400 + @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_upload_ontology_unauthorized(mock_get_default_user, client, mock_default_user): """Test behavior when default user is provided (no explicit authentication)""" @@ -78,7 +84,7 @@ def test_upload_ontology_unauthorized(mock_get_default_user, client, mock_defaul response = client.post( "/api/v1/ontologies", files={"ontology_file": ("test.owl", b"")}, - data={"ontology_key": unique_key} + data={"ontology_key": unique_key}, ) # The current system provides a default user when no explicit authentication is given @@ -86,4 +92,4 @@ def test_upload_ontology_unauthorized(mock_get_default_user, client, mock_defaul assert response.status_code == 200 data = response.json() assert data["ontology_key"] == unique_key - assert "uploaded_at" in data \ No newline at end of file + assert "uploaded_at" in data From 5271ee49019f75196adc3f3fe3069efd7bc72e14 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 11:12:12 +0100 Subject: [PATCH 086/284] fix: Resolve issue with empty node set --- cognee/api/v1/add/routers/get_add_router.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cognee/api/v1/add/routers/get_add_router.py b/cognee/api/v1/add/routers/get_add_router.py index b2e7068b0..39dc1a3e6 100644 --- a/cognee/api/v1/add/routers/get_add_router.py +++ b/cognee/api/v1/add/routers/get_add_router.py @@ -82,7 +82,9 @@ def get_add_router() -> APIRouter: datasetName, user=user, dataset_id=datasetId, - node_set=node_set if node_set else None, + node_set=node_set + if node_set != [""] + else None, # Transform default node_set endpoint value to None ) if isinstance(add_run, PipelineRunErrored): From cc60141c8dec285e48a05e8550782cb4708eb6ad Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 6 Nov 2025 11:46:34 +0100 Subject: [PATCH 087/284] test: change key for wighted edges example --- .github/workflows/weighted_edges_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/weighted_edges_tests.yml b/.github/workflows/weighted_edges_tests.yml index 874ef6ea4..0425ac797 100644 --- a/.github/workflows/weighted_edges_tests.yml +++ b/.github/workflows/weighted_edges_tests.yml @@ -109,12 +109,12 @@ jobs: LLM_PROVIDER: openai LLM_MODEL: gpt-5-mini LLM_ENDPOINT: https://api.openai.com/v1/ - LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} LLM_API_VERSION: "2024-02-01" EMBEDDING_PROVIDER: openai EMBEDDING_MODEL: text-embedding-3-small EMBEDDING_ENDPOINT: https://api.openai.com/v1/ - EMBEDDING_API_KEY: ${{ secrets.LLM_API_KEY }} + EMBEDDING_API_KEY: ${{ secrets.OPENAI_API_KEY }} EMBEDDING_API_VERSION: "2024-02-01" steps: - name: Check out repository From cdb06d2157d2602bd0e3f9c696d38368e119a8e4 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 6 Nov 2025 11:50:58 +0100 Subject: [PATCH 088/284] test: add workflow_dispatch for test purposes --- .github/workflows/weighted_edges_tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/weighted_edges_tests.yml b/.github/workflows/weighted_edges_tests.yml index 0425ac797..10778d1a0 100644 --- a/.github/workflows/weighted_edges_tests.yml +++ b/.github/workflows/weighted_edges_tests.yml @@ -17,6 +17,7 @@ on: - 'cognee/tests/unit/interfaces/graph/test_weighted_edges.py' - 'examples/python/weighted_edges_example.py' - '.github/workflows/weighted_edges_tests.yml' + workflow_dispatch: env: RUNTIME__LOG_LEVEL: ERROR From 5dc8d4e8a1206e9ce4fbf4df157ac945b83283c7 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 6 Nov 2025 11:52:39 +0100 Subject: [PATCH 089/284] test: remove workflow_dispatch --- .github/workflows/weighted_edges_tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/weighted_edges_tests.yml b/.github/workflows/weighted_edges_tests.yml index 10778d1a0..0425ac797 100644 --- a/.github/workflows/weighted_edges_tests.yml +++ b/.github/workflows/weighted_edges_tests.yml @@ -17,7 +17,6 @@ on: - 'cognee/tests/unit/interfaces/graph/test_weighted_edges.py' - 'examples/python/weighted_edges_example.py' - '.github/workflows/weighted_edges_tests.yml' - workflow_dispatch: env: RUNTIME__LOG_LEVEL: ERROR From 4567ffcfff6ca850252fcced54c9324698484f9b Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 6 Nov 2025 12:27:10 +0100 Subject: [PATCH 090/284] test: change endpoint --- .github/workflows/weighted_edges_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/weighted_edges_tests.yml b/.github/workflows/weighted_edges_tests.yml index 0425ac797..d571d354f 100644 --- a/.github/workflows/weighted_edges_tests.yml +++ b/.github/workflows/weighted_edges_tests.yml @@ -108,12 +108,12 @@ jobs: env: LLM_PROVIDER: openai LLM_MODEL: gpt-5-mini - LLM_ENDPOINT: https://api.openai.com/v1/ + LLM_ENDPOINT: https://api.openai.com/v1 LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} LLM_API_VERSION: "2024-02-01" EMBEDDING_PROVIDER: openai EMBEDDING_MODEL: text-embedding-3-small - EMBEDDING_ENDPOINT: https://api.openai.com/v1/ + EMBEDDING_ENDPOINT: https://api.openai.com/v1/embeddings EMBEDDING_API_KEY: ${{ secrets.OPENAI_API_KEY }} EMBEDDING_API_VERSION: "2024-02-01" steps: From 286584eef0039686ac9ad62f0be0a0a7a9637edc Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 6 Nov 2025 12:35:37 +0100 Subject: [PATCH 091/284] test: change embedding config --- .github/workflows/weighted_edges_tests.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/weighted_edges_tests.yml b/.github/workflows/weighted_edges_tests.yml index d571d354f..b722c5e7c 100644 --- a/.github/workflows/weighted_edges_tests.yml +++ b/.github/workflows/weighted_edges_tests.yml @@ -111,11 +111,11 @@ jobs: LLM_ENDPOINT: https://api.openai.com/v1 LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} LLM_API_VERSION: "2024-02-01" - EMBEDDING_PROVIDER: openai - EMBEDDING_MODEL: text-embedding-3-small - EMBEDDING_ENDPOINT: https://api.openai.com/v1/embeddings - EMBEDDING_API_KEY: ${{ secrets.OPENAI_API_KEY }} - EMBEDDING_API_VERSION: "2024-02-01" + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + steps: - name: Check out repository uses: actions/checkout@v4 From a058250c95390df84f1c44419cff8adf3a4e8269 Mon Sep 17 00:00:00 2001 From: Boris Arzentar Date: Thu, 6 Nov 2025 13:03:11 +0100 Subject: [PATCH 092/284] fix: add cognee to the local run environment --- cognee/modules/notebooks/operations/run_in_local_sandbox.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cognee/modules/notebooks/operations/run_in_local_sandbox.py b/cognee/modules/notebooks/operations/run_in_local_sandbox.py index 071deafb7..46499186e 100644 --- a/cognee/modules/notebooks/operations/run_in_local_sandbox.py +++ b/cognee/modules/notebooks/operations/run_in_local_sandbox.py @@ -2,6 +2,8 @@ import io import sys import traceback +import cognee + def wrap_in_async_handler(user_code: str) -> str: return ( @@ -34,6 +36,7 @@ def run_in_local_sandbox(code, environment=None, loop=None): environment["print"] = customPrintFunction environment["running_loop"] = loop + environment["cognee"] = cognee try: exec(code, environment) From 6b81559bb65d8501dc698761ed2fb51ee7c0d839 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 6 Nov 2025 13:04:01 +0100 Subject: [PATCH 093/284] test: changed endpoints in other tests --- .github/workflows/weighted_edges_tests.yml | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/workflows/weighted_edges_tests.yml b/.github/workflows/weighted_edges_tests.yml index b722c5e7c..0b263cdcf 100644 --- a/.github/workflows/weighted_edges_tests.yml +++ b/.github/workflows/weighted_edges_tests.yml @@ -32,7 +32,7 @@ jobs: env: LLM_PROVIDER: openai LLM_MODEL: gpt-5-mini - LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} steps: - name: Check out repository @@ -67,14 +67,13 @@ jobs: env: LLM_PROVIDER: openai LLM_MODEL: gpt-5-mini - LLM_ENDPOINT: https://api.openai.com/v1/ - LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_ENDPOINT: https://api.openai.com/v1 + LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} LLM_API_VERSION: "2024-02-01" - EMBEDDING_PROVIDER: openai - EMBEDDING_MODEL: text-embedding-3-small - EMBEDDING_ENDPOINT: https://api.openai.com/v1/ - EMBEDDING_API_KEY: ${{ secrets.LLM_API_KEY }} - EMBEDDING_API_VERSION: "2024-02-01" + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} steps: - name: Check out repository uses: actions/checkout@v4 @@ -115,7 +114,7 @@ jobs: EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - + steps: - name: Check out repository uses: actions/checkout@v4 From 62c599a4994c4e78c33c66925fb879b72c4ff338 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 6 Nov 2025 13:13:35 +0100 Subject: [PATCH 094/284] test: add dev to CI config for weighted edges tests --- .github/workflows/weighted_edges_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/weighted_edges_tests.yml b/.github/workflows/weighted_edges_tests.yml index 0b263cdcf..2b4a043bf 100644 --- a/.github/workflows/weighted_edges_tests.yml +++ b/.github/workflows/weighted_edges_tests.yml @@ -2,7 +2,7 @@ name: Weighted Edges Tests on: push: - branches: [ main, weighted_edges ] + branches: [ main, dev, weighted_edges ] paths: - 'cognee/modules/graph/utils/get_graph_from_model.py' - 'cognee/infrastructure/engine/models/Edge.py' @@ -10,7 +10,7 @@ on: - 'examples/python/weighted_edges_example.py' - '.github/workflows/weighted_edges_tests.yml' pull_request: - branches: [ main ] + branches: [ main, dev ] paths: - 'cognee/modules/graph/utils/get_graph_from_model.py' - 'cognee/infrastructure/engine/models/Edge.py' From c0e5ce04cedbf3bc4511e73ed2e2276e21c2f978 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 6 Nov 2025 14:13:55 +0100 Subject: [PATCH 095/284] Fix: fixes session history test for multiuser mode (#1746) ## Description Fixes failing session history test ## Type of Change - [x] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [x] **I have tested my changes thoroughly before submitting this PR** - [x] **This PR contains minimal changes necessary to address the issue/feature** - [x] My code follows the project's coding standards and style guidelines - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have added necessary documentation (if applicable) - [x] All new and existing tests pass - [x] I have searched existing PRs to ensure this change hasn't been submitted already - [x] I have linked any relevant issues in the description - [x] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- .../persist_sessions_in_knowledge_graph.py | 2 +- cognee/tasks/memify/cognify_session.py | 7 ++++--- cognee/tests/test_conversation_history.py | 6 +++--- .../modules/memify_tasks/test_cognify_session.py | 12 ++++++++---- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py b/cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py index c0ba0a4d9..92d64c156 100644 --- a/cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +++ b/cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py @@ -40,7 +40,7 @@ async def persist_sessions_in_knowledge_graph_pipeline( extraction_tasks = [Task(extract_user_sessions, session_ids=session_ids)] enrichment_tasks = [ - Task(cognify_session), + Task(cognify_session, dataset_id=dataset_to_write[0].id), ] result = await memify( diff --git a/cognee/tasks/memify/cognify_session.py b/cognee/tasks/memify/cognify_session.py index 7c276169a..f53f9afb1 100644 --- a/cognee/tasks/memify/cognify_session.py +++ b/cognee/tasks/memify/cognify_session.py @@ -6,7 +6,7 @@ from cognee.shared.logging_utils import get_logger logger = get_logger("cognify_session") -async def cognify_session(data): +async def cognify_session(data, dataset_id=None): """ Process and cognify session data into the knowledge graph. @@ -16,6 +16,7 @@ async def cognify_session(data): Args: data: Session string containing Question, Context, and Answer information. + dataset_name: Name of dataset. Raises: CogneeValidationError: If data is None or empty. @@ -28,9 +29,9 @@ async def cognify_session(data): logger.info("Processing session data for cognification") - await cognee.add(data, node_set=["user_sessions_from_cache"]) + await cognee.add(data, dataset_id=dataset_id, node_set=["user_sessions_from_cache"]) logger.debug("Session data added to cognee with node_set: user_sessions") - await cognee.cognify() + await cognee.cognify(datasets=[dataset_id]) logger.info("Session data successfully cognified") except CogneeValidationError: diff --git a/cognee/tests/test_conversation_history.py b/cognee/tests/test_conversation_history.py index 6b5b737f1..783baf563 100644 --- a/cognee/tests/test_conversation_history.py +++ b/cognee/tests/test_conversation_history.py @@ -56,10 +56,10 @@ async def main(): """DataCo is a data analytics company. They help businesses make sense of their data.""" ) - await cognee.add(text_1, dataset_name) - await cognee.add(text_2, dataset_name) + await cognee.add(data=text_1, dataset_name=dataset_name) + await cognee.add(data=text_2, dataset_name=dataset_name) - await cognee.cognify([dataset_name]) + await cognee.cognify(datasets=[dataset_name]) user = await get_default_user() diff --git a/cognee/tests/unit/modules/memify_tasks/test_cognify_session.py b/cognee/tests/unit/modules/memify_tasks/test_cognify_session.py index c23640fbd..8c2448287 100644 --- a/cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +++ b/cognee/tests/unit/modules/memify_tasks/test_cognify_session.py @@ -16,9 +16,11 @@ async def test_cognify_session_success(): patch("cognee.add", new_callable=AsyncMock) as mock_add, patch("cognee.cognify", new_callable=AsyncMock) as mock_cognify, ): - await cognify_session(session_data) + await cognify_session(session_data, dataset_id="123") - mock_add.assert_called_once_with(session_data, node_set=["user_sessions_from_cache"]) + mock_add.assert_called_once_with( + session_data, dataset_id="123", node_set=["user_sessions_from_cache"] + ) mock_cognify.assert_called_once() @@ -101,7 +103,9 @@ async def test_cognify_session_with_special_characters(): patch("cognee.add", new_callable=AsyncMock) as mock_add, patch("cognee.cognify", new_callable=AsyncMock) as mock_cognify, ): - await cognify_session(session_data) + await cognify_session(session_data, dataset_id="123") - mock_add.assert_called_once_with(session_data, node_set=["user_sessions_from_cache"]) + mock_add.assert_called_once_with( + session_data, dataset_id="123", node_set=["user_sessions_from_cache"] + ) mock_cognify.assert_called_once() From ac6dd08855e30349b0666e6af48da3e829079948 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 14:35:26 +0100 Subject: [PATCH 096/284] fix: Resolve issue with sqlite index creation --- alembic/versions/c946955da633_multi_tenant_support.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index ba451fc03..c87500907 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -126,6 +126,8 @@ def upgrade() -> None: conn = op.get_bind() conn.execute(dataset.update().values(tenant_id=tenant_id_from_dataset_owner)) + op.create_index(op.f("ix_datasets_tenant_id"), "datasets", ["tenant_id"]) + def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### From ac751bacf09e26b851b5829d46330a2f7ee7f25e Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 14:51:25 +0100 Subject: [PATCH 097/284] fix: Resolve SQLite migration issue --- .../c946955da633_multi_tenant_support.py | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index c87500907..a87989d9b 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -97,13 +97,23 @@ def upgrade() -> None: # Insert into user_tenants table if user_data: - op.bulk_insert( - user_tenants, - [ - {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()} - for user_id, tenant_id in user_data - ], - ) + if op.get_context().dialect.name == "sqlite": + insert_stmt = user_tenants.insert().values( + [ + {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()} + for user_id, tenant_id in user_data + ] + ) + conn.execute(insert_stmt) + conn.commit() + else: + op.bulk_insert( + user_tenants, + [ + {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()} + for user_id, tenant_id in user_data + ], + ) tenant_id_column = _get_column(insp, "datasets", "tenant_id") if not tenant_id_column: From 5bc83968f81982ad90f37e41c205c90bb1b9b3d5 Mon Sep 17 00:00:00 2001 From: lxobr <122801072+lxobr@users.noreply.github.com> Date: Thu, 6 Nov 2025 15:22:48 +0100 Subject: [PATCH 098/284] feature: text chunker with overlap (#1732) ## Description - Implements `TextChunkerWithOverlap` with configurable `chunk_overlap_ratio` - Abstracts chunk_data generation via `get_chunk_data` callable (defaults to `chunk_by_paragraph`) - Parametrized tests verify `TextChunker` and `TextChunkerWithOverlap` (0% overlap) produce identical output for all edge cases. - Overlap-specific tests validate `TextChunkerWithOverlap` behavior ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [x] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [x] **I have tested my changes thoroughly before submitting this PR** - [x] **This PR contains minimal changes necessary to address the issue/feature** - [x] My code follows the project's coding standards and style guidelines - [x] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [x] All new and existing tests pass - [x] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [x] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: hajdul88 <52442977+hajdul88@users.noreply.github.com> --- .../chunking/text_chunker_with_overlap.py | 124 +++++++ .../modules/chunking/test_text_chunker.py | 248 ++++++++++++++ .../test_text_chunker_with_overlap.py | 324 ++++++++++++++++++ 3 files changed, 696 insertions(+) create mode 100644 cognee/modules/chunking/text_chunker_with_overlap.py create mode 100644 cognee/tests/unit/modules/chunking/test_text_chunker.py create mode 100644 cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py diff --git a/cognee/modules/chunking/text_chunker_with_overlap.py b/cognee/modules/chunking/text_chunker_with_overlap.py new file mode 100644 index 000000000..4b9c23079 --- /dev/null +++ b/cognee/modules/chunking/text_chunker_with_overlap.py @@ -0,0 +1,124 @@ +from cognee.shared.logging_utils import get_logger +from uuid import NAMESPACE_OID, uuid5 + +from cognee.tasks.chunks import chunk_by_paragraph +from cognee.modules.chunking.Chunker import Chunker +from .models.DocumentChunk import DocumentChunk + +logger = get_logger() + + +class TextChunkerWithOverlap(Chunker): + def __init__( + self, + document, + get_text: callable, + max_chunk_size: int, + chunk_overlap_ratio: float = 0.0, + get_chunk_data: callable = None, + ): + super().__init__(document, get_text, max_chunk_size) + self._accumulated_chunk_data = [] + self._accumulated_size = 0 + self.chunk_overlap_ratio = chunk_overlap_ratio + self.chunk_overlap = int(max_chunk_size * chunk_overlap_ratio) + + if get_chunk_data is not None: + self.get_chunk_data = get_chunk_data + elif chunk_overlap_ratio > 0: + paragraph_max_size = int(0.5 * chunk_overlap_ratio * max_chunk_size) + self.get_chunk_data = lambda text: chunk_by_paragraph( + text, paragraph_max_size, batch_paragraphs=True + ) + else: + self.get_chunk_data = lambda text: chunk_by_paragraph( + text, self.max_chunk_size, batch_paragraphs=True + ) + + def _accumulation_overflows(self, chunk_data): + """Check if adding chunk_data would exceed max_chunk_size.""" + return self._accumulated_size + chunk_data["chunk_size"] > self.max_chunk_size + + def _accumulate_chunk_data(self, chunk_data): + """Add chunk_data to the current accumulation.""" + self._accumulated_chunk_data.append(chunk_data) + self._accumulated_size += chunk_data["chunk_size"] + + def _clear_accumulation(self): + """Reset accumulation, keeping overlap chunk_data based on chunk_overlap_ratio.""" + if self.chunk_overlap == 0: + self._accumulated_chunk_data = [] + self._accumulated_size = 0 + return + + # Keep chunk_data from the end that fit in overlap + overlap_chunk_data = [] + overlap_size = 0 + + for chunk_data in reversed(self._accumulated_chunk_data): + if overlap_size + chunk_data["chunk_size"] <= self.chunk_overlap: + overlap_chunk_data.insert(0, chunk_data) + overlap_size += chunk_data["chunk_size"] + else: + break + + self._accumulated_chunk_data = overlap_chunk_data + self._accumulated_size = overlap_size + + def _create_chunk(self, text, size, cut_type, chunk_id=None): + """Create a DocumentChunk with standard metadata.""" + try: + return DocumentChunk( + id=chunk_id or uuid5(NAMESPACE_OID, f"{str(self.document.id)}-{self.chunk_index}"), + text=text, + chunk_size=size, + is_part_of=self.document, + chunk_index=self.chunk_index, + cut_type=cut_type, + contains=[], + metadata={"index_fields": ["text"]}, + ) + except Exception as e: + logger.error(e) + raise e + + def _create_chunk_from_accumulation(self): + """Create a DocumentChunk from current accumulated chunk_data.""" + chunk_text = " ".join(chunk["text"] for chunk in self._accumulated_chunk_data) + return self._create_chunk( + text=chunk_text, + size=self._accumulated_size, + cut_type=self._accumulated_chunk_data[-1]["cut_type"], + ) + + def _emit_chunk(self, chunk_data): + """Emit a chunk when accumulation overflows.""" + if len(self._accumulated_chunk_data) > 0: + chunk = self._create_chunk_from_accumulation() + self._clear_accumulation() + self._accumulate_chunk_data(chunk_data) + else: + # Handle single chunk_data exceeding max_chunk_size + chunk = self._create_chunk( + text=chunk_data["text"], + size=chunk_data["chunk_size"], + cut_type=chunk_data["cut_type"], + chunk_id=chunk_data["chunk_id"], + ) + + self.chunk_index += 1 + return chunk + + async def read(self): + async for content_text in self.get_text(): + for chunk_data in self.get_chunk_data(content_text): + if not self._accumulation_overflows(chunk_data): + self._accumulate_chunk_data(chunk_data) + continue + + yield self._emit_chunk(chunk_data) + + if len(self._accumulated_chunk_data) == 0: + return + + yield self._create_chunk_from_accumulation() diff --git a/cognee/tests/unit/modules/chunking/test_text_chunker.py b/cognee/tests/unit/modules/chunking/test_text_chunker.py new file mode 100644 index 000000000..d535f74b0 --- /dev/null +++ b/cognee/tests/unit/modules/chunking/test_text_chunker.py @@ -0,0 +1,248 @@ +"""Unit tests for TextChunker and TextChunkerWithOverlap behavioral equivalence.""" + +import pytest +from uuid import uuid4 + +from cognee.modules.chunking.TextChunker import TextChunker +from cognee.modules.chunking.text_chunker_with_overlap import TextChunkerWithOverlap +from cognee.modules.data.processing.document_types import Document + + +@pytest.fixture(params=["TextChunker", "TextChunkerWithOverlap"]) +def chunker_class(request): + """Parametrize tests to run against both implementations.""" + return TextChunker if request.param == "TextChunker" else TextChunkerWithOverlap + + +@pytest.fixture +def make_text_generator(): + """Factory for async text generators.""" + + def _factory(*texts): + async def gen(): + for text in texts: + yield text + + return gen + + return _factory + + +async def collect_chunks(chunker): + """Consume async generator and return list of chunks.""" + chunks = [] + async for chunk in chunker.read(): + chunks.append(chunk) + return chunks + + +@pytest.mark.asyncio +async def test_empty_input_produces_no_chunks(chunker_class, make_text_generator): + """Empty input should yield no chunks.""" + document = Document( + id=uuid4(), + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + get_text = make_text_generator("") + chunker = chunker_class(document, get_text, max_chunk_size=512) + chunks = await collect_chunks(chunker) + + assert len(chunks) == 0, "Empty input should produce no chunks" + + +@pytest.mark.asyncio +async def test_whitespace_only_input_emits_single_chunk(chunker_class, make_text_generator): + """Whitespace-only input should produce exactly one chunk with unchanged text.""" + whitespace_text = " \n\t \r\n " + document = Document( + id=uuid4(), + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + get_text = make_text_generator(whitespace_text) + chunker = chunker_class(document, get_text, max_chunk_size=512) + chunks = await collect_chunks(chunker) + + assert len(chunks) == 1, "Whitespace-only input should produce exactly one chunk" + assert chunks[0].text == whitespace_text, "Chunk text should equal input (whitespace preserved)" + assert chunks[0].chunk_index == 0, "First chunk should have index 0" + + +@pytest.mark.asyncio +async def test_single_paragraph_below_limit_emits_one_chunk(chunker_class, make_text_generator): + """Single paragraph below limit should emit exactly one chunk.""" + text = "This is a short paragraph." + document = Document( + id=uuid4(), + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + get_text = make_text_generator(text) + chunker = chunker_class(document, get_text, max_chunk_size=512) + chunks = await collect_chunks(chunker) + + assert len(chunks) == 1, "Single short paragraph should produce exactly one chunk" + assert chunks[0].text == text, "Chunk text should match input" + assert chunks[0].chunk_index == 0, "First chunk should have index 0" + assert chunks[0].chunk_size > 0, "Chunk should have positive size" + + +@pytest.mark.asyncio +async def test_oversized_paragraph_gets_emitted_as_a_single_chunk( + chunker_class, make_text_generator +): + """Oversized paragraph from chunk_by_paragraph should be emitted as single chunk.""" + text = ("A" * 1500) + ". Next sentence." + document = Document( + id=uuid4(), + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + get_text = make_text_generator(text) + chunker = chunker_class(document, get_text, max_chunk_size=50) + chunks = await collect_chunks(chunker) + + assert len(chunks) == 2, "Should produce 2 chunks (oversized paragraph + next sentence)" + assert chunks[0].chunk_size > 50, "First chunk should be oversized" + assert chunks[0].chunk_index == 0, "First chunk should have index 0" + assert chunks[1].chunk_index == 1, "Second chunk should have index 1" + + +@pytest.mark.asyncio +async def test_overflow_on_next_paragraph_emits_separate_chunk(chunker_class, make_text_generator): + """First paragraph near limit plus small paragraph should produce two separate chunks.""" + first_para = " ".join(["word"] * 5) + second_para = "Short text." + text = first_para + " " + second_para + document = Document( + id=uuid4(), + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + get_text = make_text_generator(text) + chunker = chunker_class(document, get_text, max_chunk_size=10) + chunks = await collect_chunks(chunker) + + assert len(chunks) == 2, "Should produce 2 chunks due to overflow" + assert chunks[0].text.strip() == first_para, "First chunk should contain only first paragraph" + assert chunks[1].text.strip() == second_para, ( + "Second chunk should contain only second paragraph" + ) + assert chunks[0].chunk_index == 0, "First chunk should have index 0" + assert chunks[1].chunk_index == 1, "Second chunk should have index 1" + + +@pytest.mark.asyncio +async def test_small_paragraphs_batch_correctly(chunker_class, make_text_generator): + """Multiple small paragraphs should batch together with joiner spaces counted.""" + paragraphs = [" ".join(["word"] * 12) for _ in range(40)] + text = " ".join(paragraphs) + document = Document( + id=uuid4(), + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + get_text = make_text_generator(text) + chunker = chunker_class(document, get_text, max_chunk_size=49) + chunks = await collect_chunks(chunker) + + assert len(chunks) == 20, ( + "Should batch paragraphs (2 per chunk: 12 words × 2 tokens = 24, 24 + 1 joiner + 24 = 49)" + ) + assert all(c.chunk_index == i for i, c in enumerate(chunks)), ( + "Chunk indices should be sequential" + ) + all_text = " ".join(chunk.text.strip() for chunk in chunks) + expected_text = " ".join(paragraphs) + assert all_text == expected_text, "All paragraph text should be preserved with correct spacing" + + +@pytest.mark.asyncio +async def test_alternating_large_and_small_paragraphs_dont_batch( + chunker_class, make_text_generator +): + """Alternating near-max and small paragraphs should each become separate chunks.""" + large1 = "word" * 15 + "." + small1 = "Short." + large2 = "word" * 15 + "." + small2 = "Tiny." + text = large1 + " " + small1 + " " + large2 + " " + small2 + document = Document( + id=uuid4(), + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + max_chunk_size = 10 + get_text = make_text_generator(text) + chunker = chunker_class(document, get_text, max_chunk_size=max_chunk_size) + chunks = await collect_chunks(chunker) + + assert len(chunks) == 4, "Should produce multiple chunks" + assert all(c.chunk_index == i for i, c in enumerate(chunks)), ( + "Chunk indices should be sequential" + ) + assert chunks[0].chunk_size > max_chunk_size, ( + "First chunk should be oversized (large paragraph)" + ) + assert chunks[1].chunk_size <= max_chunk_size, "Second chunk should be small (small paragraph)" + assert chunks[2].chunk_size > max_chunk_size, ( + "Third chunk should be oversized (large paragraph)" + ) + assert chunks[3].chunk_size <= max_chunk_size, "Fourth chunk should be small (small paragraph)" + + +@pytest.mark.asyncio +async def test_chunk_indices_and_ids_are_deterministic(chunker_class, make_text_generator): + """Running chunker twice on identical input should produce identical indices and IDs.""" + sentence1 = "one " * 4 + ". " + sentence2 = "two " * 4 + ". " + sentence3 = "one " * 4 + ". " + sentence4 = "two " * 4 + ". " + text = sentence1 + sentence2 + sentence3 + sentence4 + doc_id = uuid4() + max_chunk_size = 20 + + document1 = Document( + id=doc_id, + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + get_text1 = make_text_generator(text) + chunker1 = chunker_class(document1, get_text1, max_chunk_size=max_chunk_size) + chunks1 = await collect_chunks(chunker1) + + document2 = Document( + id=doc_id, + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + get_text2 = make_text_generator(text) + chunker2 = chunker_class(document2, get_text2, max_chunk_size=max_chunk_size) + chunks2 = await collect_chunks(chunker2) + + assert len(chunks1) == 2, "Should produce exactly 2 chunks (4 sentences, 2 per chunk)" + assert len(chunks2) == 2, "Should produce exactly 2 chunks (4 sentences, 2 per chunk)" + assert [c.chunk_index for c in chunks1] == [0, 1], "First run indices should be [0, 1]" + assert [c.chunk_index for c in chunks2] == [0, 1], "Second run indices should be [0, 1]" + assert chunks1[0].id == chunks2[0].id, "First chunk ID should be deterministic" + assert chunks1[1].id == chunks2[1].id, "Second chunk ID should be deterministic" + assert chunks1[0].id != chunks1[1].id, "Chunk IDs should be unique within a run" diff --git a/cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py b/cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py new file mode 100644 index 000000000..9d7be6936 --- /dev/null +++ b/cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py @@ -0,0 +1,324 @@ +"""Unit tests for TextChunkerWithOverlap overlap behavior.""" + +import sys +import pytest +from uuid import uuid4 +from unittest.mock import patch + +from cognee.modules.chunking.text_chunker_with_overlap import TextChunkerWithOverlap +from cognee.modules.data.processing.document_types import Document +from cognee.tasks.chunks import chunk_by_paragraph + + +@pytest.fixture +def make_text_generator(): + """Factory for async text generators.""" + + def _factory(*texts): + async def gen(): + for text in texts: + yield text + + return gen + + return _factory + + +@pytest.fixture +def make_controlled_chunk_data(): + """Factory for controlled chunk_data generators.""" + + def _factory(*sentences, chunk_size_per_sentence=10): + def _chunk_data(text): + return [ + { + "text": sentence, + "chunk_size": chunk_size_per_sentence, + "cut_type": "sentence", + "chunk_id": uuid4(), + } + for sentence in sentences + ] + + return _chunk_data + + return _factory + + +@pytest.mark.asyncio +async def test_half_overlap_preserves_content_across_chunks( + make_text_generator, make_controlled_chunk_data +): + """With 50% overlap, consecutive chunks should share half their content.""" + s1 = "one" + s2 = "two" + s3 = "three" + s4 = "four" + text = "dummy" + document = Document( + id=uuid4(), + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + get_text = make_text_generator(text) + get_chunk_data = make_controlled_chunk_data(s1, s2, s3, s4, chunk_size_per_sentence=10) + chunker = TextChunkerWithOverlap( + document, + get_text, + max_chunk_size=20, + chunk_overlap_ratio=0.5, + get_chunk_data=get_chunk_data, + ) + chunks = [chunk async for chunk in chunker.read()] + + assert len(chunks) == 3, "Should produce exactly 3 chunks (s1+s2, s2+s3, s3+s4)" + assert [c.chunk_index for c in chunks] == [0, 1, 2], "Chunk indices should be [0, 1, 2]" + assert "one" in chunks[0].text and "two" in chunks[0].text, "Chunk 0 should contain s1 and s2" + assert "two" in chunks[1].text and "three" in chunks[1].text, ( + "Chunk 1 should contain s2 (overlap) and s3" + ) + assert "three" in chunks[2].text and "four" in chunks[2].text, ( + "Chunk 2 should contain s3 (overlap) and s4" + ) + + +@pytest.mark.asyncio +async def test_zero_overlap_produces_no_duplicate_content( + make_text_generator, make_controlled_chunk_data +): + """With 0% overlap, no content should appear in multiple chunks.""" + s1 = "one" + s2 = "two" + s3 = "three" + s4 = "four" + text = "dummy" + document = Document( + id=uuid4(), + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + get_text = make_text_generator(text) + get_chunk_data = make_controlled_chunk_data(s1, s2, s3, s4, chunk_size_per_sentence=10) + chunker = TextChunkerWithOverlap( + document, + get_text, + max_chunk_size=20, + chunk_overlap_ratio=0.0, + get_chunk_data=get_chunk_data, + ) + chunks = [chunk async for chunk in chunker.read()] + + assert len(chunks) == 2, "Should produce exactly 2 chunks (s1+s2, s3+s4)" + assert "one" in chunks[0].text and "two" in chunks[0].text, ( + "First chunk should contain s1 and s2" + ) + assert "three" in chunks[1].text and "four" in chunks[1].text, ( + "Second chunk should contain s3 and s4" + ) + assert "two" not in chunks[1].text and "three" not in chunks[0].text, ( + "No overlap: end of chunk 0 should not appear in chunk 1" + ) + + +@pytest.mark.asyncio +async def test_small_overlap_ratio_creates_minimal_overlap( + make_text_generator, make_controlled_chunk_data +): + """With 25% overlap ratio, chunks should have minimal overlap.""" + s1 = "alpha" + s2 = "beta" + s3 = "gamma" + s4 = "delta" + s5 = "epsilon" + text = "dummy" + document = Document( + id=uuid4(), + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + get_text = make_text_generator(text) + get_chunk_data = make_controlled_chunk_data(s1, s2, s3, s4, s5, chunk_size_per_sentence=10) + chunker = TextChunkerWithOverlap( + document, + get_text, + max_chunk_size=40, + chunk_overlap_ratio=0.25, + get_chunk_data=get_chunk_data, + ) + chunks = [chunk async for chunk in chunker.read()] + + assert len(chunks) == 2, "Should produce exactly 2 chunks" + assert [c.chunk_index for c in chunks] == [0, 1], "Chunk indices should be [0, 1]" + assert all(token in chunks[0].text for token in [s1, s2, s3, s4]), ( + "Chunk 0 should contain s1 through s4" + ) + assert s4 in chunks[1].text and s5 in chunks[1].text, ( + "Chunk 1 should contain overlap s4 and new content s5" + ) + + +@pytest.mark.asyncio +async def test_high_overlap_ratio_creates_significant_overlap( + make_text_generator, make_controlled_chunk_data +): + """With 75% overlap ratio, consecutive chunks should share most content.""" + s1 = "red" + s2 = "blue" + s3 = "green" + s4 = "yellow" + s5 = "purple" + text = "dummy" + document = Document( + id=uuid4(), + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + get_text = make_text_generator(text) + get_chunk_data = make_controlled_chunk_data(s1, s2, s3, s4, s5, chunk_size_per_sentence=5) + chunker = TextChunkerWithOverlap( + document, + get_text, + max_chunk_size=20, + chunk_overlap_ratio=0.75, + get_chunk_data=get_chunk_data, + ) + chunks = [chunk async for chunk in chunker.read()] + + assert len(chunks) == 2, "Should produce exactly 2 chunks with 75% overlap" + assert [c.chunk_index for c in chunks] == [0, 1], "Chunk indices should be [0, 1]" + assert all(token in chunks[0].text for token in [s1, s2, s3, s4]), ( + "Chunk 0 should contain s1, s2, s3, s4" + ) + assert all(token in chunks[1].text for token in [s2, s3, s4, s5]), ( + "Chunk 1 should contain s2, s3, s4 (overlap) and s5" + ) + + +@pytest.mark.asyncio +async def test_single_chunk_no_dangling_overlap(make_text_generator, make_controlled_chunk_data): + """Text that fits in one chunk should produce exactly one chunk, no overlap artifact.""" + s1 = "alpha" + s2 = "beta" + text = "dummy" + document = Document( + id=uuid4(), + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + get_text = make_text_generator(text) + get_chunk_data = make_controlled_chunk_data(s1, s2, chunk_size_per_sentence=10) + chunker = TextChunkerWithOverlap( + document, + get_text, + max_chunk_size=20, + chunk_overlap_ratio=0.5, + get_chunk_data=get_chunk_data, + ) + chunks = [chunk async for chunk in chunker.read()] + + assert len(chunks) == 1, ( + "Should produce exactly 1 chunk when content fits within max_chunk_size" + ) + assert chunks[0].chunk_index == 0, "Single chunk should have index 0" + assert "alpha" in chunks[0].text and "beta" in chunks[0].text, ( + "Single chunk should contain all content" + ) + + +@pytest.mark.asyncio +async def test_paragraph_chunking_with_overlap(make_text_generator): + """Test that chunk_by_paragraph integration produces 25% overlap between chunks.""" + + def mock_get_embedding_engine(): + class MockEngine: + tokenizer = None + + return MockEngine() + + chunk_by_sentence_module = sys.modules.get("cognee.tasks.chunks.chunk_by_sentence") + + max_chunk_size = 20 + overlap_ratio = 0.25 # 5 token overlap + paragraph_max_size = int(0.5 * overlap_ratio * max_chunk_size) # = 2 + + text = ( + "A0 A1. A2 A3. A4 A5. A6 A7. A8 A9. " # 10 tokens (0-9) + "B0 B1. B2 B3. B4 B5. B6 B7. B8 B9. " # 10 tokens (10-19) + "C0 C1. C2 C3. C4 C5. C6 C7. C8 C9. " # 10 tokens (20-29) + "D0 D1. D2 D3. D4 D5. D6 D7. D8 D9. " # 10 tokens (30-39) + "E0 E1. E2 E3. E4 E5. E6 E7. E8 E9." # 10 tokens (40-49) + ) + + document = Document( + id=uuid4(), + name="test_document", + raw_data_location="/test/path", + external_metadata=None, + mime_type="text/plain", + ) + + get_text = make_text_generator(text) + + def get_chunk_data(text_input): + return chunk_by_paragraph( + text_input, max_chunk_size=paragraph_max_size, batch_paragraphs=True + ) + + with patch.object( + chunk_by_sentence_module, "get_embedding_engine", side_effect=mock_get_embedding_engine + ): + chunker = TextChunkerWithOverlap( + document, + get_text, + max_chunk_size=max_chunk_size, + chunk_overlap_ratio=overlap_ratio, + get_chunk_data=get_chunk_data, + ) + chunks = [chunk async for chunk in chunker.read()] + + assert len(chunks) == 3, f"Should produce exactly 3 chunks, got {len(chunks)}" + + assert chunks[0].chunk_index == 0, "First chunk should have index 0" + assert chunks[1].chunk_index == 1, "Second chunk should have index 1" + assert chunks[2].chunk_index == 2, "Third chunk should have index 2" + + assert "A0" in chunks[0].text, "Chunk 0 should start with A0" + assert "A9" in chunks[0].text, "Chunk 0 should contain A9" + assert "B0" in chunks[0].text, "Chunk 0 should contain B0" + assert "B9" in chunks[0].text, "Chunk 0 should contain up to B9 (20 tokens)" + + assert "B" in chunks[1].text, "Chunk 1 should have overlap from B section" + assert "C" in chunks[1].text, "Chunk 1 should contain C section" + assert "D" in chunks[1].text, "Chunk 1 should contain D section" + + assert "D" in chunks[2].text, "Chunk 2 should have overlap from D section" + assert "E0" in chunks[2].text, "Chunk 2 should contain E0" + assert "E9" in chunks[2].text, "Chunk 2 should end with E9" + + chunk_0_end_words = chunks[0].text.split()[-4:] + chunk_1_words = chunks[1].text.split() + overlap_0_1 = any(word in chunk_1_words for word in chunk_0_end_words) + assert overlap_0_1, ( + f"No overlap detected between chunks 0 and 1. " + f"Chunk 0 ends with: {chunk_0_end_words}, " + f"Chunk 1 starts with: {chunk_1_words[:6]}" + ) + + chunk_1_end_words = chunks[1].text.split()[-4:] + chunk_2_words = chunks[2].text.split() + overlap_1_2 = any(word in chunk_2_words for word in chunk_1_end_words) + assert overlap_1_2, ( + f"No overlap detected between chunks 1 and 2. " + f"Chunk 1 ends with: {chunk_1_end_words}, " + f"Chunk 2 starts with: {chunk_2_words[:6]}" + ) From d5caba144c603c59b2960e6c6c88028553db0b57 Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Thu, 6 Nov 2025 15:57:52 +0100 Subject: [PATCH 099/284] CI: Initial Release test workflow --- .github/workflows/e2e_tests.yml | 41 ---------------- .github/workflows/load_tests.yml | 76 ++++++++++++++++++++++++++++++ .github/workflows/release_test.yml | 14 ++++++ 3 files changed, 90 insertions(+), 41 deletions(-) create mode 100644 .github/workflows/load_tests.yml create mode 100644 .github/workflows/release_test.yml diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 0596f22d3..70a4b56e6 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -447,44 +447,3 @@ jobs: DB_USERNAME: cognee DB_PASSWORD: cognee run: uv run python ./cognee/tests/test_conversation_history.py - - test-load: - name: Test Load - runs-on: ubuntu-22.04 - steps: - - name: Check out repository - uses: actions/checkout@v4 - - - name: Cognee Setup - uses: ./.github/actions/cognee_setup - with: - python-version: '3.11.x' - extra-dependencies: "aws" - - - name: Set File Descriptor Limit - run: sudo prlimit --pid $$ --nofile=4096:4096 - - - name: Verify File Descriptor Limit - run: ulimit -n - - - name: Dependencies already installed - run: echo "Dependencies already installed in setup" - - - name: Run Load Test - env: - ENV: 'dev' - ENABLE_BACKEND_ACCESS_CONTROL: True - LLM_MODEL: ${{ secrets.LLM_MODEL }} - LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} - LLM_API_KEY: ${{ secrets.LLM_API_KEY }} - LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} - EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} - EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} - EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} - EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - STORAGE_BACKEND: s3 - AWS_REGION: eu-west-1 - AWS_ENDPOINT_URL: https://s3-eu-west-1.amazonaws.com - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }} - run: uv run python ./cognee/tests/test_load.py \ No newline at end of file diff --git a/.github/workflows/load_tests.yml b/.github/workflows/load_tests.yml new file mode 100644 index 000000000..ff11bb88b --- /dev/null +++ b/.github/workflows/load_tests.yml @@ -0,0 +1,76 @@ +name: Load tests + +permissions: + contents: read + +on: + workflow_dispatch: + workflow_call: + secrets: + LLM_MODEL: + required: true + LLM_ENDPOINT: + required: true + LLM_API_KEY: + required: true + LLM_API_VERSION: + required: true + EMBEDDING_MODEL: + required: true + EMBEDDING_ENDPOINT: + required: true + EMBEDDING_API_KEY: + required: true + EMBEDDING_API_VERSION: + required: true + OPENAI_API_KEY: + required: true + AWS_ACCESS_KEY_ID: + required: true + AWS_SECRET_ACCESS_KEY: + required: true + +jobs: + test-load: + name: Test Load + runs-on: ubuntu-22.04 + timeout-minutes: 60 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + extra-dependencies: "aws" + + - name: Set File Descriptor Limit + run: sudo prlimit --pid $$ --nofile=4096:4096 + + - name: Verify File Descriptor Limit + run: ulimit -n + + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" + + - name: Run Load Test + env: + ENV: 'dev' + ENABLE_BACKEND_ACCESS_CONTROL: True + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + STORAGE_BACKEND: s3 + AWS_REGION: eu-west-1 + AWS_ENDPOINT_URL: https://s3-eu-west-1.amazonaws.com + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }} + run: uv run python ./cognee/tests/test_load.py + + diff --git a/.github/workflows/release_test.yml b/.github/workflows/release_test.yml new file mode 100644 index 000000000..23679bef4 --- /dev/null +++ b/.github/workflows/release_test.yml @@ -0,0 +1,14 @@ +# Long-running, heavy and resource-consuming tests for release validation +name: Release Test Workflow + +permissions: + contents: read + +on: + workflow_dispatch: + +jobs: + load-tests: + name: Load Tests + uses: ./.github/workflows/load_tests.yml + secrets: inherit \ No newline at end of file From ef3a3826698d89cfdf5bed62b6ea9f93576122d8 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 16:23:54 +0100 Subject: [PATCH 100/284] refactor: use batch insert for SQLite as well --- .../c946955da633_multi_tenant_support.py | 26 ++++++------------- 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index a87989d9b..d8fccdfbf 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -97,23 +97,13 @@ def upgrade() -> None: # Insert into user_tenants table if user_data: - if op.get_context().dialect.name == "sqlite": - insert_stmt = user_tenants.insert().values( - [ - {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()} - for user_id, tenant_id in user_data - ] - ) - conn.execute(insert_stmt) - conn.commit() - else: - op.bulk_insert( - user_tenants, - [ - {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()} - for user_id, tenant_id in user_data - ], - ) + op.bulk_insert( + user_tenants, + [ + {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()} + for user_id, tenant_id in user_data + ], + ) tenant_id_column = _get_column(insp, "datasets", "tenant_id") if not tenant_id_column: @@ -141,7 +131,7 @@ def upgrade() -> None: def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("user_tenants") op.drop_index(op.f("ix_datasets_tenant_id"), table_name="datasets") op.drop_column("datasets", "tenant_id") - op.drop_table("user_tenants") # ### end Alembic commands ### From c146de3a4d2f5327f4cffd347a8a782e39906da0 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 16:41:00 +0100 Subject: [PATCH 101/284] fix: Remove creation of database and db tables from env.py --- alembic/env.py | 5 ----- alembic/versions/c946955da633_multi_tenant_support.py | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/alembic/env.py b/alembic/env.py index 1cbef65f7..8ca09968d 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -87,11 +87,6 @@ db_engine = get_relational_engine() print("Using database:", db_engine.db_uri) -if "sqlite" in db_engine.db_uri: - from cognee.infrastructure.utils.run_sync import run_sync - - run_sync(db_engine.create_database()) - config.set_section_option( config.config_ini_section, "SQLALCHEMY_DATABASE_URI", diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index d8fccdfbf..7806fdde8 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -79,6 +79,10 @@ def upgrade() -> None: dataset = _define_dataset_table() user = _define_user_table() + print(insp.get_table_names()) + + print(_get_column(insp, "user_tenants", "tenant_id")) + if "user_tenants" not in insp.get_table_names(): # Define table with all necessary columns including primary key user_tenants = op.create_table( From 49fbad9ec0cee49048c97ee6e80c78963a656042 Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Thu, 6 Nov 2025 16:44:43 +0100 Subject: [PATCH 102/284] CI: Run release workflow on PR to main --- .github/workflows/release_test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/release_test.yml b/.github/workflows/release_test.yml index 23679bef4..6ac3ca515 100644 --- a/.github/workflows/release_test.yml +++ b/.github/workflows/release_test.yml @@ -6,6 +6,9 @@ permissions: on: workflow_dispatch: + pull_request: + branches: + - main jobs: load-tests: From efb46c99f9d0d5ac426540b95aadd0a1bfd3e5de Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 16:47:42 +0100 Subject: [PATCH 103/284] fix: resolve issue with sqlite migration --- alembic/versions/c946955da633_multi_tenant_support.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/alembic/versions/c946955da633_multi_tenant_support.py b/alembic/versions/c946955da633_multi_tenant_support.py index 7806fdde8..d8fccdfbf 100644 --- a/alembic/versions/c946955da633_multi_tenant_support.py +++ b/alembic/versions/c946955da633_multi_tenant_support.py @@ -79,10 +79,6 @@ def upgrade() -> None: dataset = _define_dataset_table() user = _define_user_table() - print(insp.get_table_names()) - - print(_get_column(insp, "user_tenants", "tenant_id")) - if "user_tenants" not in insp.get_table_names(): # Define table with all necessary columns including primary key user_tenants = op.create_table( From 0d68175167af5da9b69e4024f434cbf0bd64b2ae Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 16:53:22 +0100 Subject: [PATCH 104/284] fix: remove database creation from migrations --- alembic/versions/8057ae7329c2_initial_migration.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/alembic/versions/8057ae7329c2_initial_migration.py b/alembic/versions/8057ae7329c2_initial_migration.py index aa0ecd4b8..42e9904a8 100644 --- a/alembic/versions/8057ae7329c2_initial_migration.py +++ b/alembic/versions/8057ae7329c2_initial_migration.py @@ -18,11 +18,8 @@ depends_on: Union[str, Sequence[str], None] = None def upgrade() -> None: - db_engine = get_relational_engine() - # we might want to delete this - await_only(db_engine.create_database()) + pass def downgrade() -> None: - db_engine = get_relational_engine() - await_only(db_engine.delete_database()) + pass From bcc59cf9a0b6f5f765269a7ea2725fbd27e971f5 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 16:57:59 +0100 Subject: [PATCH 105/284] fix: Remove default user creation --- alembic/versions/482cd6517ce4_add_default_user.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/alembic/versions/482cd6517ce4_add_default_user.py b/alembic/versions/482cd6517ce4_add_default_user.py index d85f0f146..fafa111f9 100644 --- a/alembic/versions/482cd6517ce4_add_default_user.py +++ b/alembic/versions/482cd6517ce4_add_default_user.py @@ -23,11 +23,8 @@ depends_on: Union[str, Sequence[str], None] = "8057ae7329c2" def upgrade() -> None: - try: - await_only(create_default_user()) - except UserAlreadyExists: - pass # It's fine if the default user already exists + pass # It's fine if the default user already exists def downgrade() -> None: - await_only(delete_user("default_user@example.com")) + pass From 61e1c2903f5f7372e5281a3c7126cc2bcb71bde5 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 17:00:46 +0100 Subject: [PATCH 106/284] fix: Remove issue with default user creation --- alembic/versions/482cd6517ce4_add_default_user.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alembic/versions/482cd6517ce4_add_default_user.py b/alembic/versions/482cd6517ce4_add_default_user.py index fafa111f9..c8a3dc5d5 100644 --- a/alembic/versions/482cd6517ce4_add_default_user.py +++ b/alembic/versions/482cd6517ce4_add_default_user.py @@ -23,7 +23,7 @@ depends_on: Union[str, Sequence[str], None] = "8057ae7329c2" def upgrade() -> None: - pass # It's fine if the default user already exists + pass def downgrade() -> None: From da5055a0a96ce9647fef7245ab312301e4237165 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 6 Nov 2025 17:11:15 +0100 Subject: [PATCH 107/284] test: add one test that covers all retrievers. delete others --- .../retrieval/EntityCompletionRetriever.py | 2 +- .../modules/retrieval/completion_retriever.py | 4 +- ..._completion_context_extension_retriever.py | 4 +- .../graph_completion_cot_retriever.py | 2 +- .../retrieval/graph_completion_retriever.py | 2 +- .../modules/retrieval/temporal_retriever.py | 2 +- cognee/modules/retrieval/utils/completion.py | 2 +- .../entity_completion_retriever_test.py | 65 ------ ...letion_retriever_context_extension_test.py | 60 ----- .../graph_completion_retriever_cot_test.py | 58 ----- .../graph_completion_retriever_test.py | 58 ----- .../rag_completion_retriever_test.py | 81 +------ .../retrieval/structured_output_tests.py | 206 ++++++++++++++++++ .../retrieval/temporal_retriever_test.py | 66 ------ 14 files changed, 216 insertions(+), 396 deletions(-) delete mode 100644 cognee/tests/unit/modules/retrieval/entity_completion_retriever_test.py create mode 100644 cognee/tests/unit/modules/retrieval/structured_output_tests.py diff --git a/cognee/modules/retrieval/EntityCompletionRetriever.py b/cognee/modules/retrieval/EntityCompletionRetriever.py index 1f1ddad0a..14996f902 100644 --- a/cognee/modules/retrieval/EntityCompletionRetriever.py +++ b/cognee/modules/retrieval/EntityCompletionRetriever.py @@ -90,7 +90,7 @@ class EntityCompletionRetriever(BaseRetriever): context: Optional[Any] = None, session_id: Optional[str] = None, response_model: Type = str, - ) -> List[str]: + ) -> List[Any]: """ Generate completion using provided context or fetch new context. diff --git a/cognee/modules/retrieval/completion_retriever.py b/cognee/modules/retrieval/completion_retriever.py index f071e41de..126ebcab8 100644 --- a/cognee/modules/retrieval/completion_retriever.py +++ b/cognee/modules/retrieval/completion_retriever.py @@ -1,5 +1,5 @@ import asyncio -from typing import Any, Optional, Type +from typing import Any, Optional, Type, List from cognee.shared.logging_utils import get_logger from cognee.infrastructure.databases.vector import get_vector_engine @@ -80,7 +80,7 @@ class CompletionRetriever(BaseRetriever): context: Optional[Any] = None, session_id: Optional[str] = None, response_model: Type = str, - ) -> str: + ) -> List[Any]: """ Generates an LLM completion using the context. diff --git a/cognee/modules/retrieval/graph_completion_context_extension_retriever.py b/cognee/modules/retrieval/graph_completion_context_extension_retriever.py index 6b2c6a9e6..b07d11fd2 100644 --- a/cognee/modules/retrieval/graph_completion_context_extension_retriever.py +++ b/cognee/modules/retrieval/graph_completion_context_extension_retriever.py @@ -1,5 +1,5 @@ import asyncio -from typing import Optional, List, Type +from typing import Optional, List, Type, Any from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge from cognee.shared.logging_utils import get_logger from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever @@ -57,7 +57,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever): session_id: Optional[str] = None, context_extension_rounds=4, response_model: Type = str, - ) -> List[str]: + ) -> List[Any]: """ Extends the context for a given query by retrieving related triplets and generating new completions based on them. diff --git a/cognee/modules/retrieval/graph_completion_cot_retriever.py b/cognee/modules/retrieval/graph_completion_cot_retriever.py index 39255fe68..eb8f502cb 100644 --- a/cognee/modules/retrieval/graph_completion_cot_retriever.py +++ b/cognee/modules/retrieval/graph_completion_cot_retriever.py @@ -171,7 +171,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): session_id: Optional[str] = None, max_iter=4, response_model: Type = str, - ) -> List[str]: + ) -> List[Any]: """ Generate completion responses based on a user query and contextual information. diff --git a/cognee/modules/retrieval/graph_completion_retriever.py b/cognee/modules/retrieval/graph_completion_retriever.py index b544e8ead..df77a11ac 100644 --- a/cognee/modules/retrieval/graph_completion_retriever.py +++ b/cognee/modules/retrieval/graph_completion_retriever.py @@ -147,7 +147,7 @@ class GraphCompletionRetriever(BaseGraphRetriever): context: Optional[List[Edge]] = None, session_id: Optional[str] = None, response_model: Type = str, - ) -> List[str]: + ) -> List[Any]: """ Generates a completion using graph connections context based on a query. diff --git a/cognee/modules/retrieval/temporal_retriever.py b/cognee/modules/retrieval/temporal_retriever.py index 38d69ec80..f3da02c15 100644 --- a/cognee/modules/retrieval/temporal_retriever.py +++ b/cognee/modules/retrieval/temporal_retriever.py @@ -151,7 +151,7 @@ class TemporalRetriever(GraphCompletionRetriever): context: Optional[str] = None, session_id: Optional[str] = None, response_model: Type = str, - ) -> List[str]: + ) -> List[Any]: """ Generates a response using the query and optional context. diff --git a/cognee/modules/retrieval/utils/completion.py b/cognee/modules/retrieval/utils/completion.py index b77d7ef90..c90ce77f4 100644 --- a/cognee/modules/retrieval/utils/completion.py +++ b/cognee/modules/retrieval/utils/completion.py @@ -11,7 +11,7 @@ async def generate_completion( system_prompt: Optional[str] = None, conversation_history: Optional[str] = None, response_model: Type = str, -) -> str: +) -> Any: """Generates a completion using LLM with given context and prompts.""" args = {"question": query, "context": context} user_prompt = render_prompt(user_prompt_path, args) diff --git a/cognee/tests/unit/modules/retrieval/entity_completion_retriever_test.py b/cognee/tests/unit/modules/retrieval/entity_completion_retriever_test.py deleted file mode 100644 index 064f4a31a..000000000 --- a/cognee/tests/unit/modules/retrieval/entity_completion_retriever_test.py +++ /dev/null @@ -1,65 +0,0 @@ -import os -import pytest -import pathlib -from pydantic import BaseModel - -import cognee -from cognee.low_level import setup -from cognee.tasks.storage import add_data_points -from cognee.modules.engine.models import Entity, EntityType -from cognee.modules.retrieval.EntityCompletionRetriever import EntityCompletionRetriever -from cognee.modules.retrieval.entity_extractors.DummyEntityExtractor import DummyEntityExtractor -from cognee.modules.retrieval.context_providers.DummyContextProvider import DummyContextProvider - - -class TestAnswer(BaseModel): - answer: str - explanation: str - - -# TODO: Add more tests, similar to other retrievers. -# TODO: For the tests, one needs to define an Entity Extractor and a Context Provider. -class TestEntityCompletionRetriever: - @pytest.mark.asyncio - async def test_get_entity_structured_completion(self): - system_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".cognee_system/test_get_entity_structured_completion" - ) - cognee.config.system_root_directory(system_directory_path) - data_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".data_storage/test_get_entity_structured_completion" - ) - cognee.config.data_root_directory(data_directory_path) - - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - await setup() - - entity_type = EntityType(name="Person", description="A human individual") - entity = Entity(name="Albert Einstein", is_a=entity_type, description="A famous physicist") - - entities = [entity] - await add_data_points(entities) - - retriever = EntityCompletionRetriever(DummyEntityExtractor(), DummyContextProvider()) - - # Test with string response model (default) - string_answer = await retriever.get_completion("Who is Albert Einstein?") - assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}" - assert all(isinstance(item, str) and item.strip() for item in string_answer), ( - "Answer should not be empty" - ) - - # Test with structured response model - structured_answer = await retriever.get_completion( - "Who is Albert Einstein?", response_model=TestAnswer - ) - assert isinstance(structured_answer, list), ( - f"Expected list, got {type(structured_answer).__name__}" - ) - assert all(isinstance(item, TestAnswer) for item in structured_answer), ( - f"Expected TestAnswer, got {type(structured_answer).__name__}" - ) - - assert structured_answer[0].answer.strip(), "Answer field should not be empty" - assert structured_answer[0].explanation.strip(), "Explanation field should not be empty" diff --git a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py index d15e55c23..29c8b7c95 100644 --- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py @@ -2,7 +2,6 @@ import os import pytest import pathlib from typing import Optional, Union -from pydantic import BaseModel import cognee from cognee.low_level import setup, DataPoint @@ -12,12 +11,6 @@ from cognee.modules.retrieval.graph_completion_context_extension_retriever impor GraphCompletionContextExtensionRetriever, ) - -class TestAnswer(BaseModel): - answer: str - explanation: str - - class TestGraphCompletionWithContextExtensionRetriever: @pytest.mark.asyncio async def test_graph_completion_extension_context_simple(self): @@ -181,56 +174,3 @@ class TestGraphCompletionWithContextExtensionRetriever: assert all(isinstance(item, str) and item.strip() for item in answer), ( "Answer must contain only non-empty strings" ) - - @pytest.mark.asyncio - async def test_get_graph_structured_completion_extension_context(self): - system_directory_path = os.path.join( - pathlib.Path(__file__).parent, - ".cognee_system/test_get_graph_structured_completion_extension_context", - ) - cognee.config.system_root_directory(system_directory_path) - data_directory_path = os.path.join( - pathlib.Path(__file__).parent, - ".data_storage/test_get_graph_structured_completion_extension_context", - ) - cognee.config.data_root_directory(data_directory_path) - - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - await setup() - - class Company(DataPoint): - name: str - - class Person(DataPoint): - name: str - works_for: Company - - company1 = Company(name="Figma") - person1 = Person(name="Steve Rodger", works_for=company1) - - entities = [company1, person1] - await add_data_points(entities) - - retriever = GraphCompletionContextExtensionRetriever() - - # Test with string response model (default) - string_answer = await retriever.get_completion("Who works at Figma?") - assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}" - assert all(isinstance(item, str) and item.strip() for item in string_answer), ( - "Answer should not be empty" - ) - - # Test with structured response model - structured_answer = await retriever.get_completion( - "Who works at Figma?", response_model=TestAnswer - ) - assert isinstance(structured_answer, list), ( - f"Expected list, got {type(structured_answer).__name__}" - ) - assert all(isinstance(item, TestAnswer) for item in structured_answer), ( - f"Expected TestAnswer, got {type(structured_answer).__name__}" - ) - - assert structured_answer[0].answer.strip(), "Answer field should not be empty" - assert structured_answer[0].explanation.strip(), "Explanation field should not be empty" diff --git a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py index 79e4bcec3..ac58793be 100644 --- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py @@ -2,7 +2,6 @@ import os import pytest import pathlib from typing import Optional, Union -from pydantic import BaseModel import cognee from cognee.low_level import setup, DataPoint @@ -10,12 +9,6 @@ from cognee.modules.graph.utils import resolve_edges_to_text from cognee.tasks.storage import add_data_points from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever - -class TestAnswer(BaseModel): - answer: str - explanation: str - - class TestGraphCompletionCoTRetriever: @pytest.mark.asyncio async def test_graph_completion_cot_context_simple(self): @@ -174,54 +167,3 @@ class TestGraphCompletionCoTRetriever: assert all(isinstance(item, str) and item.strip() for item in answer), ( "Answer must contain only non-empty strings" ) - - @pytest.mark.asyncio - async def test_get_graph_structured_completion_cot(self): - system_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".cognee_system/test_get_graph_structured_completion_cot" - ) - cognee.config.system_root_directory(system_directory_path) - data_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".data_storage/test_get_graph_structured_completion_cot" - ) - cognee.config.data_root_directory(data_directory_path) - - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - await setup() - - class Company(DataPoint): - name: str - - class Person(DataPoint): - name: str - works_for: Company - - company1 = Company(name="Figma") - person1 = Person(name="Steve Rodger", works_for=company1) - - entities = [company1, person1] - await add_data_points(entities) - - retriever = GraphCompletionCotRetriever() - - # Test with string response model (default) - string_answer = await retriever.get_completion("Who works at Figma?") - assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}" - assert all(isinstance(item, str) and item.strip() for item in string_answer), ( - "Answer should not be empty" - ) - - # Test with structured response model - structured_answer = await retriever.get_completion( - "Who works at Figma?", response_model=TestAnswer - ) - assert isinstance(structured_answer, list), ( - f"Expected list, got {type(structured_answer).__name__}" - ) - assert all(isinstance(item, TestAnswer) for item in structured_answer), ( - f"Expected TestAnswer, got {type(structured_answer).__name__}" - ) - - assert structured_answer[0].answer.strip(), "Answer field should not be empty" - assert structured_answer[0].explanation.strip(), "Explanation field should not be empty" diff --git a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py index e320fcef1..21e2af199 100644 --- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py @@ -2,7 +2,6 @@ import os import pytest import pathlib from typing import Optional, Union -from pydantic import BaseModel import cognee from cognee.low_level import setup, DataPoint @@ -10,12 +9,6 @@ from cognee.modules.graph.utils import resolve_edges_to_text from cognee.tasks.storage import add_data_points from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever - -class TestAnswer(BaseModel): - answer: str - explanation: str - - class TestGraphCompletionRetriever: @pytest.mark.asyncio async def test_graph_completion_context_simple(self): @@ -227,54 +220,3 @@ class TestGraphCompletionRetriever: context = await retriever.get_context("Who works at Figma?") assert context == [], "Context should be empty on an empty graph" - - @pytest.mark.asyncio - async def test_get_graph_structured_completion(self): - system_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".cognee_system/test_get_graph_structured_completion" - ) - cognee.config.system_root_directory(system_directory_path) - data_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".data_storage/test_get_graph_structured_completion" - ) - cognee.config.data_root_directory(data_directory_path) - - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - await setup() - - class Company(DataPoint): - name: str - - class Person(DataPoint): - name: str - works_for: Company - - company1 = Company(name="Figma") - person1 = Person(name="Steve Rodger", works_for=company1) - - entities = [company1, person1] - await add_data_points(entities) - - retriever = GraphCompletionRetriever() - - # Test with string response model (default) - string_answer = await retriever.get_completion("Who works at Figma?") - assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}" - assert all(isinstance(item, str) and item.strip() for item in string_answer), ( - "Answer should not be empty" - ) - - # Test with structured response model - structured_answer = await retriever.get_completion( - "Who works at Figma?", response_model=TestAnswer - ) - assert isinstance(structured_answer, list), ( - f"Expected list, got {type(structured_answer).__name__}" - ) - assert all(isinstance(item, TestAnswer) for item in structured_answer), ( - f"Expected TestAnswer, got {type(structured_answer).__name__}" - ) - - assert structured_answer[0].answer.strip(), "Answer field should not be empty" - assert structured_answer[0].explanation.strip(), "Explanation field should not be empty" diff --git a/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py b/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py index 248ecc047..37876794f 100644 --- a/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +++ b/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py @@ -3,7 +3,7 @@ from typing import List import pytest import pathlib import cognee -from pydantic import BaseModel + from cognee.low_level import setup from cognee.tasks.storage import add_data_points from cognee.infrastructure.databases.vector import get_vector_engine @@ -26,12 +26,6 @@ class DocumentChunkWithEntities(DataPoint): metadata: dict = {"index_fields": ["text"]} - -class TestAnswer(BaseModel): - answer: str - explanation: str - - class TestRAGCompletionRetriever: @pytest.mark.asyncio async def test_rag_completion_context_simple(self): @@ -208,76 +202,3 @@ class TestRAGCompletionRetriever: context = await retriever.get_context("Christina Mayer") assert context == "", "Returned context should be empty on an empty graph" - - @pytest.mark.asyncio - async def test_get_rag_structured_completion(self): - system_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".cognee_system/test_get_rag_structured_completion" - ) - cognee.config.system_root_directory(system_directory_path) - data_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".data_storage/test_get_rag_structured_completion" - ) - cognee.config.data_root_directory(data_directory_path) - - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - await setup() - - document = TextDocument( - name="Steve Rodger's career", - raw_data_location="somewhere", - external_metadata="", - mime_type="text/plain", - ) - - chunk1 = DocumentChunk( - text="Steve Rodger", - chunk_size=2, - chunk_index=0, - cut_type="sentence_end", - is_part_of=document, - contains=[], - ) - chunk2 = DocumentChunk( - text="Mike Broski", - chunk_size=2, - chunk_index=1, - cut_type="sentence_end", - is_part_of=document, - contains=[], - ) - chunk3 = DocumentChunk( - text="Christina Mayer", - chunk_size=2, - chunk_index=2, - cut_type="sentence_end", - is_part_of=document, - contains=[], - ) - - entities = [chunk1, chunk2, chunk3] - await add_data_points(entities) - - retriever = CompletionRetriever() - - # Test with string response model (default) - string_answer = await retriever.get_completion("Where does Steve work?") - assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}" - assert all(isinstance(item, str) and item.strip() for item in string_answer), ( - "Answer should not be empty" - ) - - # Test with structured response model - structured_answer = await retriever.get_completion( - "Where does Steve work?", response_model=TestAnswer - ) - assert isinstance(structured_answer, list), ( - f"Expected list, got {type(structured_answer).__name__}" - ) - assert all(isinstance(item, TestAnswer) for item in structured_answer), ( - f"Expected TestAnswer, got {type(structured_answer).__name__}" - ) - - assert structured_answer[0].answer.strip(), "Answer field should not be empty" - assert structured_answer[0].explanation.strip(), "Explanation field should not be empty" diff --git a/cognee/tests/unit/modules/retrieval/structured_output_tests.py b/cognee/tests/unit/modules/retrieval/structured_output_tests.py new file mode 100644 index 000000000..95b4b9c20 --- /dev/null +++ b/cognee/tests/unit/modules/retrieval/structured_output_tests.py @@ -0,0 +1,206 @@ +import asyncio + +import pytest +import cognee +import pathlib +import os + +from pydantic import BaseModel +from cognee.low_level import setup, DataPoint +from cognee.tasks.storage import add_data_points +from cognee.modules.chunking.models import DocumentChunk +from cognee.modules.data.processing.document_types import TextDocument +from cognee.modules.engine.models import Entity, EntityType +from cognee.modules.retrieval.entity_extractors.DummyEntityExtractor import DummyEntityExtractor +from cognee.modules.retrieval.context_providers.DummyContextProvider import DummyContextProvider +from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever +from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever +from cognee.modules.retrieval.graph_completion_context_extension_retriever import ( + GraphCompletionContextExtensionRetriever, +) +from cognee.modules.retrieval.EntityCompletionRetriever import EntityCompletionRetriever +from cognee.modules.retrieval.temporal_retriever import TemporalRetriever +from cognee.modules.retrieval.completion_retriever import CompletionRetriever + + +class TestAnswer(BaseModel): + answer: str + explanation: str + + +def _assert_string_answer(answer: list[str]): + assert isinstance(answer, list), f"Expected str, got {type(answer).__name__}" + assert all(isinstance(item, str) and item.strip() for item in answer), "Items should be strings" + assert all(item.strip() for item in answer), "Items should not be empty" + + +def _assert_structured_answer(answer: list[TestAnswer]): + assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}" + assert all(isinstance(x, TestAnswer) for x in answer), "Items should be TestAnswer" + assert all(x.answer.strip() for x in answer), "Answer text should not be empty" + assert all(x.explanation.strip() for x in answer), "Explanation should not be empty" + + +async def _test_get_structured_graph_completion_cot(): + retriever = GraphCompletionCotRetriever() + + # Test with string response model (default) + string_answer = await retriever.get_completion("Who works at Figma?") + _assert_string_answer(string_answer) + + # Test with structured response model + structured_answer = await retriever.get_completion( + "Who works at Figma?", response_model=TestAnswer + ) + _assert_structured_answer(structured_answer) + + +async def _test_get_structured_graph_completion(): + retriever = GraphCompletionRetriever() + + # Test with string response model (default) + string_answer = await retriever.get_completion("Who works at Figma?") + _assert_string_answer(string_answer) + + # Test with structured response model + structured_answer = await retriever.get_completion( + "Who works at Figma?", response_model=TestAnswer + ) + _assert_structured_answer(structured_answer) + + +async def _test_get_structured_graph_completion_temporal(): + retriever = TemporalRetriever() + + # Test with string response model (default) + string_answer = await retriever.get_completion("When did Steve start working at Figma?") + _assert_string_answer(string_answer) + + # Test with structured response model + structured_answer = await retriever.get_completion( + "When did Steve start working at Figma??", response_model=TestAnswer + ) + _assert_structured_answer(structured_answer) + + +async def _test_get_structured_graph_completion_rag(): + retriever = CompletionRetriever() + + # Test with string response model (default) + string_answer = await retriever.get_completion("Where does Steve work?") + _assert_string_answer(string_answer) + + # Test with structured response model + structured_answer = await retriever.get_completion( + "Where does Steve work?", response_model=TestAnswer + ) + _assert_structured_answer(structured_answer) + + +async def _test_get_structured_graph_completion_context_extension(): + retriever = GraphCompletionContextExtensionRetriever() + + # Test with string response model (default) + string_answer = await retriever.get_completion("Who works at Figma?") + _assert_string_answer(string_answer) + + # Test with structured response model + structured_answer = await retriever.get_completion( + "Who works at Figma?", response_model=TestAnswer + ) + _assert_structured_answer(structured_answer) + + +async def _test_get_structured_entity_completion(): + retriever = EntityCompletionRetriever(DummyEntityExtractor(), DummyContextProvider()) + + # Test with string response model (default) + string_answer = await retriever.get_completion("Who is Albert Einstein?") + _assert_string_answer(string_answer) + + # Test with structured response model + structured_answer = await retriever.get_completion( + "Who is Albert Einstein?", response_model=TestAnswer + ) + _assert_structured_answer(structured_answer) + + +class TestStructuredOutputCompletion: + @pytest.mark.asyncio + async def test_get_structured_completion(self): + system_directory_path = os.path.join( + pathlib.Path(__file__).parent, ".cognee_system/test_get_structured_completion" + ) + cognee.config.system_root_directory(system_directory_path) + data_directory_path = os.path.join( + pathlib.Path(__file__).parent, ".data_storage/test_get_structured_completion" + ) + cognee.config.data_root_directory(data_directory_path) + + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + await setup() + + class Company(DataPoint): + name: str + + class Person(DataPoint): + name: str + works_for: Company + works_since: int + + company1 = Company(name="Figma") + person1 = Person(name="Steve Rodger", works_for=company1, works_since=2015) + + entities = [company1, person1] + await add_data_points(entities) + + document = TextDocument( + name="Steve Rodger's career", + raw_data_location="somewhere", + external_metadata="", + mime_type="text/plain", + ) + + chunk1 = DocumentChunk( + text="Steve Rodger", + chunk_size=2, + chunk_index=0, + cut_type="sentence_end", + is_part_of=document, + contains=[], + ) + chunk2 = DocumentChunk( + text="Mike Broski", + chunk_size=2, + chunk_index=1, + cut_type="sentence_end", + is_part_of=document, + contains=[], + ) + chunk3 = DocumentChunk( + text="Christina Mayer", + chunk_size=2, + chunk_index=2, + cut_type="sentence_end", + is_part_of=document, + contains=[], + ) + + entities = [chunk1, chunk2, chunk3] + await add_data_points(entities) + + entity_type = EntityType(name="Person", description="A human individual") + entity = Entity(name="Albert Einstein", is_a=entity_type, description="A famous physicist") + + entities = [entity] + await add_data_points(entities) + + await asyncio.gather( + _test_get_structured_graph_completion_cot(), + _test_get_structured_graph_completion(), + _test_get_structured_graph_completion_temporal(), + _test_get_structured_graph_completion_rag(), + _test_get_structured_graph_completion_context_extension(), + _test_get_structured_entity_completion(), + ) diff --git a/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py b/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py index 5b274c822..22b2d3fe9 100644 --- a/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +++ b/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py @@ -1,13 +1,6 @@ -import asyncio -import os -import pathlib -import cognee from types import SimpleNamespace import pytest -from pydantic import BaseModel -from cognee.low_level import setup, DataPoint -from cognee.tasks.storage import add_data_points from cognee.modules.retrieval.temporal_retriever import TemporalRetriever @@ -146,65 +139,6 @@ async def test_filter_top_k_events_error_handling(): with pytest.raises((KeyError, TypeError)): await tr.filter_top_k_events([{}], []) - -class TestAnswer(BaseModel): - answer: str - explanation: str - - -@pytest.mark.asyncio -async def test_get_temporal_structured_completion(): - system_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".cognee_system/test_get_temporal_structured_completion" - ) - cognee.config.system_root_directory(system_directory_path) - data_directory_path = os.path.join( - pathlib.Path(__file__).parent, ".data_storage/test_get_temporal_structured_completion" - ) - cognee.config.data_root_directory(data_directory_path) - - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - await setup() - - class Company(DataPoint): - name: str - - class Person(DataPoint): - name: str - works_for: Company - works_since: int - - company1 = Company(name="Figma") - person1 = Person(name="Steve Rodger", works_for=company1, works_since=2015) - - entities = [company1, person1] - await add_data_points(entities) - - retriever = TemporalRetriever() - - # Test with string response model (default) - string_answer = await retriever.get_completion("When did Steve start working at Figma?") - assert isinstance(string_answer, list), f"Expected str, got {type(string_answer).__name__}" - assert all(isinstance(item, str) and item.strip() for item in string_answer), ( - "Answer should not be empty" - ) - - # Test with structured response model - structured_answer = await retriever.get_completion( - "When did Steve start working at Figma??", response_model=TestAnswer - ) - assert isinstance(structured_answer, list), ( - f"Expected list, got {type(structured_answer).__name__}" - ) - assert all(isinstance(item, TestAnswer) for item in structured_answer), ( - f"Expected TestAnswer, got {type(structured_answer).__name__}" - ) - - assert structured_answer[0].answer.strip(), "Answer field should not be empty" - assert structured_answer[0].explanation.strip(), "Explanation field should not be empty" - - class _FakeRetriever(TemporalRetriever): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) From 72ba8d0dcb0306cfc5c618c854089bd68f4b9d3f Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 6 Nov 2025 17:12:33 +0100 Subject: [PATCH 108/284] chore: ruff format --- .../graph_completion_retriever_context_extension_test.py | 1 + .../modules/retrieval/graph_completion_retriever_cot_test.py | 1 + .../unit/modules/retrieval/graph_completion_retriever_test.py | 1 + .../unit/modules/retrieval/rag_completion_retriever_test.py | 1 + cognee/tests/unit/modules/retrieval/temporal_retriever_test.py | 1 + 5 files changed, 5 insertions(+) diff --git a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py index 29c8b7c95..0e21fe351 100644 --- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py @@ -11,6 +11,7 @@ from cognee.modules.retrieval.graph_completion_context_extension_retriever impor GraphCompletionContextExtensionRetriever, ) + class TestGraphCompletionWithContextExtensionRetriever: @pytest.mark.asyncio async def test_graph_completion_extension_context_simple(self): diff --git a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py index ac58793be..206cfaf84 100644 --- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py @@ -9,6 +9,7 @@ from cognee.modules.graph.utils import resolve_edges_to_text from cognee.tasks.storage import add_data_points from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever + class TestGraphCompletionCoTRetriever: @pytest.mark.asyncio async def test_graph_completion_cot_context_simple(self): diff --git a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py index 21e2af199..f462baced 100644 --- a/cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +++ b/cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py @@ -9,6 +9,7 @@ from cognee.modules.graph.utils import resolve_edges_to_text from cognee.tasks.storage import add_data_points from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever + class TestGraphCompletionRetriever: @pytest.mark.asyncio async def test_graph_completion_context_simple(self): diff --git a/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py b/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py index 37876794f..9bfed68f3 100644 --- a/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +++ b/cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py @@ -26,6 +26,7 @@ class DocumentChunkWithEntities(DataPoint): metadata: dict = {"index_fields": ["text"]} + class TestRAGCompletionRetriever: @pytest.mark.asyncio async def test_rag_completion_context_simple(self): diff --git a/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py b/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py index 22b2d3fe9..c3c6a47f6 100644 --- a/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +++ b/cognee/tests/unit/modules/retrieval/temporal_retriever_test.py @@ -139,6 +139,7 @@ async def test_filter_top_k_events_error_handling(): with pytest.raises((KeyError, TypeError)): await tr.filter_top_k_events([{}], []) + class _FakeRetriever(TemporalRetriever): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) From 7dec6bfdedf30149113a25aa66bf6c21980b605b Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 18:10:04 +0100 Subject: [PATCH 109/284] refactor: Add migrations as part of python package --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5f0aef1d8..8af35113c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -167,7 +167,6 @@ exclude = [ "/dist", "/.data", "/.github", - "/alembic", "/deployment", "/cognee-mcp", "/cognee-frontend", From 96c8bba5807e13cf376802da28817788ae4d6dbd Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 6 Nov 2025 19:12:09 +0100 Subject: [PATCH 110/284] refactor: Add db creation as step in MCP creation --- cognee-mcp/src/server.py | 4 ++++ cognee/modules/data/models/Dataset.py | 1 + 2 files changed, 5 insertions(+) diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index ce6dad88a..7c708638c 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -1096,6 +1096,10 @@ async def main(): # Skip migrations when in API mode (the API server handles its own database) if not args.no_migration and not args.api_url: + from cognee.modules.engine.operations.setup import setup + + await setup() + # Run Alembic migrations from the main cognee directory where alembic.ini is located logger.info("Running database migrations...") migration_result = subprocess.run( diff --git a/cognee/modules/data/models/Dataset.py b/cognee/modules/data/models/Dataset.py index 00ed4da96..fba065253 100644 --- a/cognee/modules/data/models/Dataset.py +++ b/cognee/modules/data/models/Dataset.py @@ -37,5 +37,6 @@ class Dataset(Base): "createdAt": self.created_at.isoformat(), "updatedAt": self.updated_at.isoformat() if self.updated_at else None, "ownerId": str(self.owner_id), + "tenantId": str(self.tenant_id), "data": [data.to_json() for data in self.data], } From 4ab53c9d64a1cde20c6b38e78eb2583bb43fbf65 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Fri, 7 Nov 2025 10:00:17 +0100 Subject: [PATCH 111/284] changes based on PR comments --- cognee/modules/retrieval/base_graph_retriever.py | 10 +++++++--- cognee/modules/retrieval/base_retriever.py | 10 +++++++--- ...d_output_tests.py => structured_output_test.py} | 14 ++++++-------- .../modules/retrieval/summaries_retriever_test.py | 2 +- 4 files changed, 21 insertions(+), 15 deletions(-) rename cognee/tests/unit/modules/retrieval/{structured_output_tests.py => structured_output_test.py} (94%) diff --git a/cognee/modules/retrieval/base_graph_retriever.py b/cognee/modules/retrieval/base_graph_retriever.py index b0abc2991..b203309ba 100644 --- a/cognee/modules/retrieval/base_graph_retriever.py +++ b/cognee/modules/retrieval/base_graph_retriever.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Any, List, Optional, Type from abc import ABC, abstractmethod from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge @@ -14,7 +14,11 @@ class BaseGraphRetriever(ABC): @abstractmethod async def get_completion( - self, query: str, context: Optional[List[Edge]] = None, session_id: Optional[str] = None - ) -> str: + self, + query: str, + context: Optional[List[Edge]] = None, + session_id: Optional[str] = None, + response_model: Type = str, + ) -> List[Any]: """Generates a response using the query and optional context (triplets).""" pass diff --git a/cognee/modules/retrieval/base_retriever.py b/cognee/modules/retrieval/base_retriever.py index 1533dd44f..b88c741b8 100644 --- a/cognee/modules/retrieval/base_retriever.py +++ b/cognee/modules/retrieval/base_retriever.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any, Optional +from typing import Any, Optional, Type, List class BaseRetriever(ABC): @@ -12,7 +12,11 @@ class BaseRetriever(ABC): @abstractmethod async def get_completion( - self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None - ) -> Any: + self, + query: str, + context: Optional[Any] = None, + session_id: Optional[str] = None, + response_model: Type = str, + ) -> List[Any]: """Generates a response using the query and optional context.""" pass diff --git a/cognee/tests/unit/modules/retrieval/structured_output_tests.py b/cognee/tests/unit/modules/retrieval/structured_output_test.py similarity index 94% rename from cognee/tests/unit/modules/retrieval/structured_output_tests.py rename to cognee/tests/unit/modules/retrieval/structured_output_test.py index 95b4b9c20..4ad3019ff 100644 --- a/cognee/tests/unit/modules/retrieval/structured_output_tests.py +++ b/cognee/tests/unit/modules/retrieval/structured_output_test.py @@ -196,11 +196,9 @@ class TestStructuredOutputCompletion: entities = [entity] await add_data_points(entities) - await asyncio.gather( - _test_get_structured_graph_completion_cot(), - _test_get_structured_graph_completion(), - _test_get_structured_graph_completion_temporal(), - _test_get_structured_graph_completion_rag(), - _test_get_structured_graph_completion_context_extension(), - _test_get_structured_entity_completion(), - ) + await _test_get_structured_graph_completion_cot() + await _test_get_structured_graph_completion() + await _test_get_structured_graph_completion_temporal() + await _test_get_structured_graph_completion_rag() + await _test_get_structured_graph_completion_context_extension() + await _test_get_structured_entity_completion() diff --git a/cognee/tests/unit/modules/retrieval/summaries_retriever_test.py b/cognee/tests/unit/modules/retrieval/summaries_retriever_test.py index fc96081bf..5f4b93425 100644 --- a/cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +++ b/cognee/tests/unit/modules/retrieval/summaries_retriever_test.py @@ -13,7 +13,7 @@ from cognee.modules.retrieval.exceptions.exceptions import NoDataError from cognee.modules.retrieval.summaries_retriever import SummariesRetriever -class TextSummariesRetriever: +class TestSummariesRetriever: @pytest.mark.asyncio async def test_chunk_context(self): system_directory_path = os.path.join( From 7d5d19347a4ccd44eb3bbbe23c821c4c600b7989 Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Fri, 7 Nov 2025 11:11:05 +0100 Subject: [PATCH 112/284] CI: removed unnecessary ulimit --- .github/workflows/load_tests.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/load_tests.yml b/.github/workflows/load_tests.yml index ff11bb88b..f5b64d8ce 100644 --- a/.github/workflows/load_tests.yml +++ b/.github/workflows/load_tests.yml @@ -45,15 +45,9 @@ jobs: python-version: '3.11.x' extra-dependencies: "aws" - - name: Set File Descriptor Limit - run: sudo prlimit --pid $$ --nofile=4096:4096 - - name: Verify File Descriptor Limit run: ulimit -n - - name: Dependencies already installed - run: echo "Dependencies already installed in setup" - - name: Run Load Test env: ENV: 'dev' From 59f758d5c227b04f91e8086915fde078be3089db Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 7 Nov 2025 15:50:49 +0100 Subject: [PATCH 113/284] feat: Add test for multi tenancy, add ability to share name for dataset across tenants for one user --- .github/workflows/e2e_tests.yml | 29 ++- cognee/modules/data/methods/create_dataset.py | 1 + .../modules/data/methods/get_dataset_ids.py | 6 +- cognee/modules/search/methods/search.py | 2 + cognee/tests/test_multi_tenancy.py | 165 ++++++++++++++++++ 5 files changed, 200 insertions(+), 3 deletions(-) create mode 100644 cognee/tests/test_multi_tenancy.py diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 0596f22d3..715487372 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -226,7 +226,7 @@ jobs: - name: Dependencies already installed run: echo "Dependencies already installed in setup" - - name: Run parallel databases test + - name: Run permissions test env: ENV: 'dev' LLM_MODEL: ${{ secrets.LLM_MODEL }} @@ -239,6 +239,31 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} run: uv run python ./cognee/tests/test_permissions.py + test-multi-tenancy: + name: Test multi tenancy with different situations in Cognee + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Run multi tenancy test + env: + ENV: 'dev' + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + run: uv run python ./cognee/tests/test_multi_tenancy.py + test-graph-edges: name: Test graph edge ingestion runs-on: ubuntu-22.04 @@ -487,4 +512,4 @@ jobs: AWS_ENDPOINT_URL: https://s3-eu-west-1.amazonaws.com AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_DEV_USER_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_DEV_USER_SECRET_KEY }} - run: uv run python ./cognee/tests/test_load.py \ No newline at end of file + run: uv run python ./cognee/tests/test_load.py diff --git a/cognee/modules/data/methods/create_dataset.py b/cognee/modules/data/methods/create_dataset.py index 280c9e105..7e28a8255 100644 --- a/cognee/modules/data/methods/create_dataset.py +++ b/cognee/modules/data/methods/create_dataset.py @@ -16,6 +16,7 @@ async def create_dataset(dataset_name: str, user: User, session: AsyncSession) - .options(joinedload(Dataset.data)) .filter(Dataset.name == dataset_name) .filter(Dataset.owner_id == owner_id) + .filter(Dataset.tenant_id == user.tenant_id) ) ).first() diff --git a/cognee/modules/data/methods/get_dataset_ids.py b/cognee/modules/data/methods/get_dataset_ids.py index d4402ff36..a61e85310 100644 --- a/cognee/modules/data/methods/get_dataset_ids.py +++ b/cognee/modules/data/methods/get_dataset_ids.py @@ -27,7 +27,11 @@ async def get_dataset_ids(datasets: Union[list[str], list[UUID]], user): # Get all user owned dataset objects (If a user wants to write to a dataset he is not the owner of it must be provided through UUID.) user_datasets = await get_datasets(user.id) # Filter out non name mentioned datasets - dataset_ids = [dataset.id for dataset in user_datasets if dataset.name in datasets] + dataset_ids = [dataset for dataset in user_datasets if dataset.name in datasets] + # Filter out non current tenant datasets + dataset_ids = [ + dataset.id for dataset in dataset_ids if dataset.tenant_id == user.tenant_id + ] else: raise DatasetTypeError( f"One or more of the provided dataset types is not handled: f{datasets}" diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index 5e465b239..b4278424b 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -172,6 +172,7 @@ async def search( "search_result": [context] if context else None, "dataset_id": datasets[0].id, "dataset_name": datasets[0].name, + "dataset_tenant_id": datasets[0].tenant_id, "graphs": graphs, } ) @@ -181,6 +182,7 @@ async def search( "search_result": [result] if result else None, "dataset_id": datasets[0].id, "dataset_name": datasets[0].name, + "dataset_tenant_id": datasets[0].tenant_id, "graphs": graphs, } ) diff --git a/cognee/tests/test_multi_tenancy.py b/cognee/tests/test_multi_tenancy.py new file mode 100644 index 000000000..7cdcda8d8 --- /dev/null +++ b/cognee/tests/test_multi_tenancy.py @@ -0,0 +1,165 @@ +import cognee +import pytest + +from cognee.modules.users.exceptions import PermissionDeniedError +from cognee.modules.users.tenants.methods import select_tenant +from cognee.modules.users.methods import get_user +from cognee.shared.logging_utils import get_logger +from cognee.modules.search.types import SearchType +from cognee.modules.users.methods import create_user +from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets +from cognee.modules.users.roles.methods import add_user_to_role +from cognee.modules.users.roles.methods import create_role +from cognee.modules.users.tenants.methods import create_tenant +from cognee.modules.users.tenants.methods import add_user_to_tenant +from cognee.modules.engine.operations.setup import setup +from cognee.shared.logging_utils import setup_logging, CRITICAL + +logger = get_logger() + + +async def main(): + # Create a clean slate for cognee -- reset data and system state + print("Resetting cognee data...") + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + print("Data reset complete.\n") + + # Set up the necessary databases and tables for user management. + await setup() + + # Add document for user_1, add it under dataset name AI + text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. + At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages + this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the + preparation and manipulation of quantum state""" + + print("Creating user_1: user_1@example.com") + user_1 = await create_user("user_1@example.com", "example") + await cognee.add([text], dataset_name="AI", user=user_1) + + print("\nCreating user_2: user_2@example.com") + user_2 = await create_user("user_2@example.com", "example") + + # Run cognify for both datasets as the appropriate user/owner + print("\nCreating different datasets for user_1 (AI dataset) and user_2 (QUANTUM dataset)") + ai_cognify_result = await cognee.cognify(["AI"], user=user_1) + + # Extract dataset_ids from cognify results + def extract_dataset_id_from_cognify(cognify_result): + """Extract dataset_id from cognify output dictionary""" + for dataset_id, pipeline_result in cognify_result.items(): + return dataset_id # Return the first dataset_id + return None + + # Get dataset IDs from cognify results + # Note: When we want to work with datasets from other users (search, add, cognify and etc.) we must supply dataset + # information through dataset_id using dataset name only looks for datasets owned by current user + ai_dataset_id = extract_dataset_id_from_cognify(ai_cognify_result) + + # We can see here that user_1 can read his own dataset (AI dataset) + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text="What is in the document?", + user=user_1, + datasets=[ai_dataset_id], + ) + + # Verify that user_2 cannot access user_1's dataset without permission + with pytest.raises(PermissionDeniedError): + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text="What is in the document?", + user=user_2, + datasets=[ai_dataset_id], + ) + + # Create new tenant and role, add user_2 to tenant and role + tenant_id = await create_tenant("CogneeLab", user_1.id) + await select_tenant(user_id=user_1.id, tenant_id=tenant_id) + role_id = await create_role(role_name="Researcher", owner_id=user_1.id) + await add_user_to_tenant( + user_id=user_2.id, tenant_id=tenant_id, owner_id=user_1.id, set_as_active_tenant=True + ) + await add_user_to_role(user_id=user_2.id, role_id=role_id, owner_id=user_1.id) + + # Assert that user_1 cannot give permissions on his dataset to role before switching to the correct tenant + # AI dataset was made with default tenant and not CogneeLab tenant + with pytest.raises(PermissionDeniedError): + await authorized_give_permission_on_datasets( + role_id, + [ai_dataset_id], + "read", + user_1.id, + ) + + # We need to refresh the user object with changes made when switching tenants + user_1 = await get_user(user_1.id) + await cognee.add([text], dataset_name="AI_COGNEE_LAB", user=user_1) + ai_cognee_lab_cognify_result = await cognee.cognify(["AI_COGNEE_LAB"], user=user_1) + + ai_cognee_lab_dataset_id = extract_dataset_id_from_cognify(ai_cognee_lab_cognify_result) + + await authorized_give_permission_on_datasets( + role_id, + [ai_cognee_lab_dataset_id], + "read", + user_1.id, + ) + + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text="What is in the document?", + user=user_2, + dataset_ids=[ai_cognee_lab_dataset_id], + ) + for result in search_results: + print(f"{result}\n") + + # Let's test changing tenants + tenant_id = await create_tenant("CogneeLab2", user_1.id) + await select_tenant(user_id=user_1.id, tenant_id=tenant_id) + + user_1 = await get_user(user_1.id) + await cognee.add([text], dataset_name="AI_COGNEE_LAB", user=user_1) + await cognee.cognify(["AI_COGNEE_LAB"], user=user_1) + + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text="What is in the document?", + user=user_1, + ) + + # Assert only AI_COGNEE_LAB dataset from CogneeLab2 tenant is visible as the currently selected tenant + assert len(search_results) == 1, ( + f"Search results must only contain one dataset from current tenant: {search_results}" + ) + assert search_results[0]["dataset_name"] == "AI_COGNEE_LAB", ( + f"Dict must contain dataset name 'AI_COGNEE_LAB': {search_results[0]}" + ) + assert search_results[0]["dataset_tenant_id"] == user_1.tenant_id, ( + f"Dataset tenant_id must be same as user_1 tenant_id: {search_results[0]}" + ) + + # Switch back to no tenant (default tenant) + await select_tenant(user_id=user_1.id, tenant_id=None) + # Refresh user_1 object + user_1 = await get_user(user_1.id) + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text="What is in the document?", + user=user_1, + ) + assert len(search_results) == 1, ( + f"Search results must only contain one dataset from default tenant: {search_results}" + ) + assert search_results[0]["dataset_name"] == "AI", ( + f"Dict must contain dataset name 'AI': {search_results[0]}" + ) + + +if __name__ == "__main__": + import asyncio + + logger = setup_logging(log_level=CRITICAL) + asyncio.run(main()) From d6e2bd132b85d9e038475ec4adf87140f69e53ce Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 7 Nov 2025 16:37:37 +0100 Subject: [PATCH 114/284] refactor: Remove testme comment --- cognee/modules/users/roles/methods/add_user_to_role.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/modules/users/roles/methods/add_user_to_role.py b/cognee/modules/users/roles/methods/add_user_to_role.py index d764ac900..23bb947f0 100644 --- a/cognee/modules/users/roles/methods/add_user_to_role.py +++ b/cognee/modules/users/roles/methods/add_user_to_role.py @@ -48,7 +48,7 @@ async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID): raise UserNotFoundError elif not role: raise RoleNotFoundError - elif role.tenant_id not in [tenant.id for tenant in user_tenants]: # TESTME + elif role.tenant_id not in [tenant.id for tenant in user_tenants]: raise TenantNotFoundError( message="User tenant does not match role tenant. User cannot be added to role." ) From 234b13a8c9a62ecd7e85c5d250c37b13bd8dace3 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Fri, 7 Nov 2025 17:39:42 +0100 Subject: [PATCH 115/284] feat:add emptiness check to neptune adapter --- .../hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py b/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py index 5357f3d7c..1e16642b5 100644 --- a/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +++ b/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py @@ -416,6 +416,15 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface): self._client.query(f"MATCH (n :{self._VECTOR_NODE_LABEL}) DETACH DELETE n") pass + async def is_empty(self) -> bool: + query = """ + MATCH (n) + RETURN true + LIMIT 1; + """ + query_result = await self._client.query(query) + return len(query_result) == 0 + @staticmethod def _get_scored_result( item: dict, with_vector: bool = False, with_score: bool = False From c069dd276e419caa5b662390cdf3c3192f278af5 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 7 Nov 2025 16:51:11 +0000 Subject: [PATCH 116/284] feat: add model validator to strip quotes from string fields in LLMConfig --- cognee/infrastructure/llm/config.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index 8fd196eaf..dab2fa6c0 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -73,6 +73,26 @@ class LLMConfig(BaseSettings): model_config = SettingsConfigDict(env_file=".env", extra="allow") + @model_validator(mode="after") + def strip_quotes_from_strings(self) -> "LLMConfig": + """ + Strip surrounding quotes from all string fields in the model. + + This handles cases where Docker's --env-file or shell quoting + accidentally includes quotes in the value (e.g., LLM_API_KEY="value"). + + Returns: + LLMConfig: The instance with quotes stripped from all string fields. + """ + for field_name, _ in self.__class__.model_fields.items(): + value = getattr(self, field_name, None) + if isinstance(value, str) and len(value) >= 2: + if (value.startswith('"') and value.endswith('"')) or ( + value.startswith("'") and value.endswith("'") + ): + setattr(self, field_name, value[1:-1]) + return self + def model_post_init(self, __context) -> None: """Initialize the BAML registry after the model is created.""" # Check if BAML is selected as structured output framework but not available From 05c984f98f490e3a8c2ec2a2b643293f724eaf0f Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 7 Nov 2025 16:55:49 +0000 Subject: [PATCH 117/284] feat: enhance LLMConfig to selectively strip quotes from specific string fields --- cognee/infrastructure/llm/config.py | 35 ++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index dab2fa6c0..b72c557c9 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -76,21 +76,36 @@ class LLMConfig(BaseSettings): @model_validator(mode="after") def strip_quotes_from_strings(self) -> "LLMConfig": """ - Strip surrounding quotes from all string fields in the model. + Strip surrounding quotes from specific string fields that often come from + environment variables with extra quotes (e.g., via Docker's --env-file). - This handles cases where Docker's --env-file or shell quoting - accidentally includes quotes in the value (e.g., LLM_API_KEY="value"). - - Returns: - LLMConfig: The instance with quotes stripped from all string fields. + Only applies to known config keys where quotes are invalid or cause issues. """ - for field_name, _ in self.__class__.model_fields.items(): + string_fields_to_strip = [ + "llm_api_key", + "llm_endpoint", + "llm_api_version", + "baml_llm_api_key", + "baml_llm_endpoint", + "baml_llm_api_version", + "fallback_api_key", + "fallback_endpoint", + "fallback_model", + "llm_provider", + "llm_model", + "baml_llm_provider", + "baml_llm_model", + ] + + cls = self.__class__ + for field_name in string_fields_to_strip: + if field_name not in cls.model_fields: + continue value = getattr(self, field_name, None) if isinstance(value, str) and len(value) >= 2: - if (value.startswith('"') and value.endswith('"')) or ( - value.startswith("'") and value.endswith("'") - ): + if value[0] == value[-1] and value[0] in ("'", '"'): setattr(self, field_name, value[1:-1]) + return self def model_post_init(self, __context) -> None: From 21b1f6b39c58d9547f3f14459015ccef46338303 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Fri, 7 Nov 2025 18:28:30 +0100 Subject: [PATCH 118/284] fix: remove add_ and get_developer_rules --- cognee-mcp/src/server.py | 130 --------------------------------------- 1 file changed, 130 deletions(-) diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index 3a64ba65a..7acf16848 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -90,97 +90,6 @@ async def health_check(request): return JSONResponse({"status": "ok"}) -@mcp.tool() -async def cognee_add_developer_rules( - base_path: str = ".", graph_model_file: str = None, graph_model_name: str = None -) -> list: - """ - Ingest core developer rule files into Cognee's memory layer. - - This function loads a predefined set of developer-related configuration, - rule, and documentation files from the base repository and assigns them - to the special 'developer_rules' node set in Cognee. It ensures these - foundational files are always part of the structured memory graph. - - Parameters - ---------- - base_path : str - Root path to resolve relative file paths. Defaults to current directory. - - graph_model_file : str, optional - Optional path to a custom schema file for knowledge graph generation. - - graph_model_name : str, optional - Optional class name to use from the graph_model_file schema. - - Returns - ------- - list - A message indicating how many rule files were scheduled for ingestion, - and how to check their processing status. - - Notes - ----- - - Each file is processed asynchronously in the background. - - Files are attached to the 'developer_rules' node set. - - Missing files are skipped with a logged warning. - """ - - developer_rule_paths = [ - ".cursorrules", - ".cursor/rules", - ".same/todos.md", - ".windsurfrules", - ".clinerules", - "CLAUDE.md", - ".sourcegraph/memory.md", - "AGENT.md", - "AGENTS.md", - ] - - async def cognify_task(file_path: str) -> None: - with redirect_stdout(sys.stderr): - logger.info(f"Starting cognify for: {file_path}") - try: - await cognee_client.add(file_path, node_set=["developer_rules"]) - - model = None - if graph_model_file and graph_model_name: - if cognee_client.use_api: - logger.warning( - "Custom graph models are not supported in API mode, ignoring." - ) - else: - from cognee.shared.data_models import KnowledgeGraph - - model = load_class(graph_model_file, graph_model_name) - - await cognee_client.cognify(graph_model=model) - logger.info(f"Cognify finished for: {file_path}") - except Exception as e: - logger.error(f"Cognify failed for {file_path}: {str(e)}") - raise ValueError(f"Failed to cognify: {str(e)}") - - tasks = [] - for rel_path in developer_rule_paths: - abs_path = os.path.join(base_path, rel_path) - if os.path.isfile(abs_path): - tasks.append(asyncio.create_task(cognify_task(abs_path))) - else: - logger.warning(f"Skipped missing developer rule file: {abs_path}") - log_file = get_log_file_location() - return [ - types.TextContent( - type="text", - text=( - f"Started cognify for {len(tasks)} developer rule files in background.\n" - f"All are added to the `developer_rules` node set.\n" - f"Use `cognify_status` or check logs at {log_file} to monitor progress." - ), - ) - ] - - @mcp.tool() async def cognify( data: str, graph_model_file: str = None, graph_model_name: str = None, custom_prompt: str = None @@ -561,45 +470,6 @@ async def search(search_query: str, search_type: str) -> list: return [types.TextContent(type="text", text=search_results)] -@mcp.tool() -async def get_developer_rules() -> list: - """ - Retrieve all developer rules that were generated based on previous interactions. - - This tool queries the Cognee knowledge graph and returns a list of developer - rules. - - Parameters - ---------- - None - - Returns - ------- - list - A list containing a single TextContent object with the retrieved developer rules. - The format is plain text containing the developer rules in bulletpoints. - - Notes - ----- - - The specific logic for fetching rules is handled internally. - - This tool does not accept any parameters and is intended for simple rule inspection use cases. - """ - - async def fetch_rules_from_cognee() -> str: - """Collect all developer rules from Cognee""" - with redirect_stdout(sys.stderr): - if cognee_client.use_api: - logger.warning("Developer rules retrieval is not available in API mode") - return "Developer rules retrieval is not available in API mode" - - developer_rules = await get_existing_rules(rules_nodeset_name="coding_agent_rules") - return developer_rules - - rules_text = await fetch_rules_from_cognee() - - return [types.TextContent(type="text", text=rules_text)] - - @mcp.tool() async def list_data(dataset_id: str = None) -> list: """ From 3710eec94ff547fb9fb80f3b3b35223098269ffc Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 10 Nov 2025 16:23:34 +0100 Subject: [PATCH 119/284] refactor: update docstring message --- cognee/api/v1/permissions/routers/get_permissions_router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index 20d35e748..63de97eaa 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -246,7 +246,7 @@ def get_permissions_router() -> APIRouter: - **tenant_id** (Union[UUID, None]): UUID of the tenant to select, If null/None is provided use the default single user tenant ## Response - Returns a success message indicating the tenant was created. + Returns a success message along with selected tenant id. """ send_telemetry( "Permissions API Endpoint Invoked", From b5f94c889d00e4043f9f7373b449d4dd165e2391 Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Tue, 11 Nov 2025 12:51:09 +0100 Subject: [PATCH 120/284] Update cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py Co-authored-by: Boris --- .../permissions/methods/get_all_user_permission_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py index ee1de3c72..5eed992db 100644 --- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py @@ -39,7 +39,7 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) -> # If the dataset id key already exists, leave the dictionary unchanged. unique.setdefault(dataset.id, dataset) - # Filter out dataset that aren't part of the current user's tenant + # Filter out dataset that aren't part of the selected user's tenant filtered_datasets = [] for dataset in list(unique.values()): if dataset.tenant_id == user.tenant_id: From ed2d6871356ea94a5d95f9fcbeefc9c2ecd67348 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Tue, 11 Nov 2025 13:52:34 +0100 Subject: [PATCH 121/284] fix: changes based on PR comments --- cognee/context_global_variables.py | 8 ++--- .../databases/utils/constants.py | 4 --- .../utils/get_or_create_dataset_database.py | 36 +++++-------------- 3 files changed, 12 insertions(+), 36 deletions(-) delete mode 100644 cognee/infrastructure/databases/utils/constants.py diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index 6ec467ed9..2d711a8b2 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -16,8 +16,8 @@ vector_db_config = ContextVar("vector_db_config", default=None) graph_db_config = ContextVar("graph_db_config", default=None) session_user = ContextVar("session_user", default=None) -vector_dbs_with_multi_user_support = ["lancedb"] -graph_dbs_with_multi_user_support = ["kuzu"] +VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb"] +GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu"] async def set_session_user_context_variable(user): @@ -28,8 +28,8 @@ def multi_user_support_possible(): graph_db_config = get_graph_context_config() vector_db_config = get_vectordb_context_config() return ( - graph_db_config["graph_database_provider"] in graph_dbs_with_multi_user_support - and vector_db_config["vector_db_provider"] in vector_dbs_with_multi_user_support + graph_db_config["graph_database_provider"] in GRAPH_DBS_WITH_MULTI_USER_SUPPORT + and vector_db_config["vector_db_provider"] in VECTOR_DBS_WITH_MULTI_USER_SUPPORT ) diff --git a/cognee/infrastructure/databases/utils/constants.py b/cognee/infrastructure/databases/utils/constants.py deleted file mode 100644 index fe6390a07..000000000 --- a/cognee/infrastructure/databases/utils/constants.py +++ /dev/null @@ -1,4 +0,0 @@ -VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"] -GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor"] - -HYBRID_DBS = ["falkor"] diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index a4e50f665..61d7840c0 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -11,12 +11,6 @@ from cognee.infrastructure.databases.graph.config import get_graph_config from cognee.modules.data.methods import get_unique_dataset_id from cognee.modules.users.models import DatasetDatabase from cognee.modules.users.models import User -from .constants import ( - GRAPH_DBS_WITH_MULTI_USER_SUPPORT, - VECTOR_DBS_WITH_MULTI_USER_SUPPORT, - HYBRID_DBS, -) - async def get_or_create_dataset_database( dataset: Union[str, UUID], @@ -42,15 +36,15 @@ async def get_or_create_dataset_database( vector_config = get_vectordb_config() graph_config = get_graph_config() - graph_db_name = f"{dataset_id}.pkl" - - if graph_config.graph_database_provider in HYBRID_DBS: - vector_db_name = graph_db_name + if graph_config.graph_database_provider == "kuzu": + graph_db_name = f"{dataset_id}.pkl" else: - if vector_config.vector_db_provider == "lancedb": - vector_db_name = f"{dataset_id}.lance.db" - else: - vector_db_name = f"{dataset_id}.db" + graph_db_name = dataset_id + + if vector_config.vector_db_provider == "lancedb": + vector_db_name = f"{dataset_id}.lance.db" + else: + vector_db_name = dataset_id async with db_engine.get_async_session() as session: # Create dataset if it doesn't exist @@ -66,20 +60,6 @@ async def get_or_create_dataset_database( if existing: return existing - # Check if we support multi-user for this provider. If not, use default - if graph_config.graph_database_provider not in GRAPH_DBS_WITH_MULTI_USER_SUPPORT: - raise EnvironmentError( - f"Multi-user is currently not supported for the graph database provider: {graph_config.graph_database_provider}. " - f"Supported providers are: {', '.join(GRAPH_DBS_WITH_MULTI_USER_SUPPORT)}. Either use one of these" - f"providers, or disable BACKEND_ACCESS_CONTROL" - ) - if vector_config.vector_db_provider not in VECTOR_DBS_WITH_MULTI_USER_SUPPORT: - raise EnvironmentError( - f"Multi-user is currently not supported for the vector database provider: {vector_config.vector_db_provider}. " - f"Supported providers are: {', '.join(VECTOR_DBS_WITH_MULTI_USER_SUPPORT)}. Either use one of these" - f"providers, or disable BACKEND_ACCESS_CONTROL" - ) - # If there are no existing rows build a new row record = DatasetDatabase( owner_id=user.id, From 011a7fb60bd016047da2618bbb61ffa85eb52028 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 11 Nov 2025 13:53:19 +0100 Subject: [PATCH 122/284] fix: Resolve multi user migration --- ..._expand_dataset_database_for_multi_user.py | 54 ++++++------------- 1 file changed, 15 insertions(+), 39 deletions(-) diff --git a/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py b/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py index cd19d09c8..7e13898ae 100644 --- a/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +++ b/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py @@ -14,7 +14,7 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision: str = "76625596c5c3" -down_revision: Union[str, None] = "211ab850ef3d" +down_revision: Union[str, None] = "c946955da633" branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None @@ -30,40 +30,20 @@ def upgrade() -> None: conn = op.get_bind() insp = sa.inspect(conn) - data = sa.table( - "dataset_database", - sa.Column("dataset_id", sa.UUID, primary_key=True, index=True), # Critical for SQLite - sa.Column("owner_id", sa.UUID, index=True), - sa.Column("vector_database_name", sa.String(), unique=True, nullable=False), - sa.Column("graph_database_name", sa.String(), unique=True, nullable=False), - sa.Column("vector_database_provider", sa.String(), unique=False, nullable=False), - sa.Column("graph_database_provider", sa.String(), unique=False, nullable=False), - sa.Column("vector_database_url", sa.String(), unique=False, nullable=True), - sa.Column("graph_database_url", sa.String(), unique=False, nullable=True), - sa.Column("vector_database_key", sa.String(), unique=False, nullable=True), - sa.Column("graph_database_key", sa.String(), unique=False, nullable=True), - sa.Column("created_at", sa.DateTime(timezone=True)), - sa.Column("updated_at", sa.DateTime(timezone=True)), - ) - vector_database_provider_column = _get_column( insp, "dataset_database", "vector_database_provider" ) if not vector_database_provider_column: op.add_column( "dataset_database", - sa.Column("vector_database_provider", sa.String(), unique=False, nullable=False), + sa.Column( + "vector_database_provider", + sa.String(), + unique=False, + nullable=False, + server_default="lancedb", + ), ) - if op.get_context().dialect.name == "sqlite": - with op.batch_alter_table("dataset_database") as batch_op: - batch_op.execute( - data.update().values( - vector_database_provider="lancedb", - ) - ) - else: - conn = op.get_bind() - conn.execute(data.update().values(vector_database_provider="lancedb")) graph_database_provider_column = _get_column( insp, "dataset_database", "graph_database_provider" @@ -71,18 +51,14 @@ def upgrade() -> None: if not graph_database_provider_column: op.add_column( "dataset_database", - sa.Column("graph_database_provider", sa.String(), unique=False, nullable=False), + sa.Column( + "graph_database_provider", + sa.String(), + unique=False, + nullable=False, + server_default="kuzu", + ), ) - if op.get_context().dialect.name == "sqlite": - with op.batch_alter_table("dataset_database") as batch_op: - batch_op.execute( - data.update().values( - graph_database_provider="kuzu", - ) - ) - else: - conn = op.get_bind() - conn.execute(data.update().values(graph_database_provider="kuzu")) vector_database_url_column = _get_column(insp, "dataset_database", "vector_database_url") if not vector_database_url_column: From bb8de7b336a23b6053de3459764d71d7e45b40be Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Tue, 11 Nov 2025 13:56:16 +0100 Subject: [PATCH 123/284] Apply suggestion from @dexters1 --- .../databases/utils/get_or_create_dataset_database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index 61d7840c0..0aa836174 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -35,7 +35,7 @@ async def get_or_create_dataset_database( vector_config = get_vectordb_config() graph_config = get_graph_config() - +Note: for hybrid databases both graph and vector DB name have to be the same if graph_config.graph_database_provider == "kuzu": graph_db_name = f"{dataset_id}.pkl" else: From 20d49eeb76ca81057e902fad67658d5db2dcb0a0 Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Tue, 11 Nov 2025 13:56:35 +0100 Subject: [PATCH 124/284] Apply suggestion from @dexters1 --- .../databases/utils/get_or_create_dataset_database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index 0aa836174..635734d89 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -35,7 +35,7 @@ async def get_or_create_dataset_database( vector_config = get_vectordb_config() graph_config = get_graph_config() -Note: for hybrid databases both graph and vector DB name have to be the same +# Note: for hybrid databases both graph and vector DB name have to be the same if graph_config.graph_database_provider == "kuzu": graph_db_name = f"{dataset_id}.pkl" else: From 41b844a31c3b938a09a07462e59b70b39593313f Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Tue, 11 Nov 2025 13:56:59 +0100 Subject: [PATCH 125/284] Apply suggestion from @dexters1 --- .../databases/utils/get_or_create_dataset_database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index 635734d89..1822221cb 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -35,7 +35,7 @@ async def get_or_create_dataset_database( vector_config = get_vectordb_config() graph_config = get_graph_config() -# Note: for hybrid databases both graph and vector DB name have to be the same + # Note: for hybrid databases both graph and vector DB name have to be the same if graph_config.graph_database_provider == "kuzu": graph_db_name = f"{dataset_id}.pkl" else: From 4f5771230e6177db1f87b448abdc851067317608 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Tue, 11 Nov 2025 14:22:42 +0100 Subject: [PATCH 126/284] fix: PR comment changes --- cognee/context_global_variables.py | 9 ++++++++- .../databases/utils/get_or_create_dataset_database.py | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index 2d711a8b2..6a0f767ff 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -86,10 +86,17 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ base_config.system_root_directory, "databases", str(user.id) ) + if dataset_database.vector_database_provider == "lancedb": + vector_db_url = os.path.join( + databases_directory_path, dataset_database.vector_database_name + ) + else: + vector_db_url = dataset_database.vector_database_url + # Set vector and graph database configuration based on dataset database information vector_config = { "vector_db_provider": dataset_database.vector_database_provider, - "vector_db_url": dataset_database.vector_database_url, + "vector_db_url": vector_db_url, "vector_db_key": dataset_database.vector_database_key, "vector_db_name": dataset_database.vector_database_name, } diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index 61d7840c0..b00616671 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -12,6 +12,7 @@ from cognee.modules.data.methods import get_unique_dataset_id from cognee.modules.users.models import DatasetDatabase from cognee.modules.users.models import User + async def get_or_create_dataset_database( dataset: Union[str, UUID], user: User, From ac6c3ef9deeef9ef8069ba1aecdce8791987bd1e Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Tue, 11 Nov 2025 15:07:59 +0100 Subject: [PATCH 127/284] fix: fix names, add falkor to constants --- cognee/context_global_variables.py | 4 ++-- .../databases/utils/get_or_create_dataset_database.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index 6a0f767ff..d4cedc187 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -16,8 +16,8 @@ vector_db_config = ContextVar("vector_db_config", default=None) graph_db_config = ContextVar("graph_db_config", default=None) session_user = ContextVar("session_user", default=None) -VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb"] -GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu"] +VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"] +GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor"] async def set_session_user_context_variable(user): diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index 311f89ad7..a2e053b3d 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -40,12 +40,12 @@ async def get_or_create_dataset_database( if graph_config.graph_database_provider == "kuzu": graph_db_name = f"{dataset_id}.pkl" else: - graph_db_name = dataset_id + graph_db_name = f"{dataset_id}" if vector_config.vector_db_provider == "lancedb": vector_db_name = f"{dataset_id}.lance.db" else: - vector_db_name = dataset_id + vector_db_name = f"{dataset_id}" async with db_engine.get_async_session() as session: # Create dataset if it doesn't exist From 6a640238760192358220a431db734d19476bb434 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 11 Nov 2025 15:12:58 +0100 Subject: [PATCH 128/284] fix: Update vector db url properly --- cognee/context_global_variables.py | 12 ++--------- .../utils/get_or_create_dataset_database.py | 20 +++++++++++++++++-- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index 6a0f767ff..c2e9e82a9 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -69,8 +69,6 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ """ - base_config = get_base_config() - if not backend_access_control_enabled(): return @@ -79,6 +77,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ # To ensure permissions are enforced properly all datasets will have their own databases dataset_database = await get_or_create_dataset_database(dataset, user) + base_config = get_base_config() data_root_directory = os.path.join( base_config.data_root_directory, str(user.tenant_id or user.id) ) @@ -86,17 +85,10 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ base_config.system_root_directory, "databases", str(user.id) ) - if dataset_database.vector_database_provider == "lancedb": - vector_db_url = os.path.join( - databases_directory_path, dataset_database.vector_database_name - ) - else: - vector_db_url = dataset_database.vector_database_url - # Set vector and graph database configuration based on dataset database information vector_config = { "vector_db_provider": dataset_database.vector_database_provider, - "vector_db_url": vector_db_url, + "vector_db_url": dataset_database.vector_database_url, "vector_db_key": dataset_database.vector_database_key, "vector_db_name": dataset_database.vector_database_name, } diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index 311f89ad7..0df3502ba 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -1,10 +1,12 @@ +import os from uuid import UUID from typing import Union from sqlalchemy import select from sqlalchemy.exc import IntegrityError -from cognee.modules.data.methods import create_dataset +from cognee.base_config import get_base_config +from cognee.modules.data.methods import create_dataset from cognee.infrastructure.databases.relational import get_relational_engine from cognee.infrastructure.databases.vector import get_vectordb_config from cognee.infrastructure.databases.graph.config import get_graph_config @@ -36,6 +38,7 @@ async def get_or_create_dataset_database( vector_config = get_vectordb_config() graph_config = get_graph_config() + # Note: for hybrid databases both graph and vector DB name have to be the same if graph_config.graph_database_provider == "kuzu": graph_db_name = f"{dataset_id}.pkl" @@ -47,6 +50,19 @@ async def get_or_create_dataset_database( else: vector_db_name = dataset_id + base_config = get_base_config() + databases_directory_path = os.path.join( + base_config.system_root_directory, "databases", str(user.id) + ) + + # Determine vector database URL + if vector_config.vector_db_provider == "lancedb": + vector_db_url = os.path.join(databases_directory_path, vector_config.vector_db_name) + else: + vector_db_url = vector_config.vector_database_url + + # Determine graph database URL + async with db_engine.get_async_session() as session: # Create dataset if it doesn't exist if isinstance(dataset, str): @@ -69,7 +85,7 @@ async def get_or_create_dataset_database( graph_database_name=graph_db_name, vector_database_provider=vector_config.vector_db_provider, graph_database_provider=graph_config.graph_database_provider, - vector_database_url=vector_config.vector_db_url, + vector_database_url=vector_db_url, graph_database_url=graph_config.graph_database_url, vector_database_key=vector_config.vector_db_key, graph_database_key=graph_config.graph_database_key, From 8e8aecb76ff66a48e4b5fe18a7ffaac48434f89a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrej=20Mili=C4=87evi=C4=87?= <85933103+siillee@users.noreply.github.com> Date: Tue, 11 Nov 2025 17:03:48 +0100 Subject: [PATCH 129/284] feat: enable multi user for falkor (#1689) ## Description Added multi-user support for Falkor. Adding support for the rest of the graph dbs should be a bit easier after this first one, especially since Falkor is hybrid. There are a few things code quality wise that might need changing, I am open to suggestions. ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [x] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [x] **I have tested my changes thoroughly before submitting this PR** - [x] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: Andrej Milicevic Co-authored-by: Igor Ilic <30923996+dexters1@users.noreply.github.com> Co-authored-by: Igor Ilic --- ..._expand_dataset_database_for_multi_user.py | 98 +++++++++++++++++++ cognee/context_global_variables.py | 25 ++--- .../infrastructure/databases/graph/config.py | 4 + .../databases/graph/get_graph_engine.py | 2 + .../utils/get_or_create_dataset_database.py | 40 +++++++- .../infrastructure/databases/vector/config.py | 3 + .../databases/vector/create_vector_engine.py | 4 + .../modules/users/models/DatasetDatabase.py | 9 ++ cognee/tests/test_parallel_databases.py | 2 + 9 files changed, 172 insertions(+), 15 deletions(-) create mode 100644 alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py diff --git a/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py b/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py new file mode 100644 index 000000000..7e13898ae --- /dev/null +++ b/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py @@ -0,0 +1,98 @@ +"""Expand dataset database for multi user + +Revision ID: 76625596c5c3 +Revises: 211ab850ef3d +Create Date: 2025-10-30 12:55:20.239562 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = "76625596c5c3" +down_revision: Union[str, None] = "c946955da633" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _get_column(inspector, table, name, schema=None): + for col in inspector.get_columns(table, schema=schema): + if col["name"] == name: + return col + return None + + +def upgrade() -> None: + conn = op.get_bind() + insp = sa.inspect(conn) + + vector_database_provider_column = _get_column( + insp, "dataset_database", "vector_database_provider" + ) + if not vector_database_provider_column: + op.add_column( + "dataset_database", + sa.Column( + "vector_database_provider", + sa.String(), + unique=False, + nullable=False, + server_default="lancedb", + ), + ) + + graph_database_provider_column = _get_column( + insp, "dataset_database", "graph_database_provider" + ) + if not graph_database_provider_column: + op.add_column( + "dataset_database", + sa.Column( + "graph_database_provider", + sa.String(), + unique=False, + nullable=False, + server_default="kuzu", + ), + ) + + vector_database_url_column = _get_column(insp, "dataset_database", "vector_database_url") + if not vector_database_url_column: + op.add_column( + "dataset_database", + sa.Column("vector_database_url", sa.String(), unique=False, nullable=True), + ) + + graph_database_url_column = _get_column(insp, "dataset_database", "graph_database_url") + if not graph_database_url_column: + op.add_column( + "dataset_database", + sa.Column("graph_database_url", sa.String(), unique=False, nullable=True), + ) + + vector_database_key_column = _get_column(insp, "dataset_database", "vector_database_key") + if not vector_database_key_column: + op.add_column( + "dataset_database", + sa.Column("vector_database_key", sa.String(), unique=False, nullable=True), + ) + + graph_database_key_column = _get_column(insp, "dataset_database", "graph_database_key") + if not graph_database_key_column: + op.add_column( + "dataset_database", + sa.Column("graph_database_key", sa.String(), unique=False, nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("dataset_database", "vector_database_provider") + op.drop_column("dataset_database", "graph_database_provider") + op.drop_column("dataset_database", "vector_database_url") + op.drop_column("dataset_database", "graph_database_url") + op.drop_column("dataset_database", "vector_database_key") + op.drop_column("dataset_database", "graph_database_key") diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index f17c9187a..62e06fc64 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -16,8 +16,8 @@ vector_db_config = ContextVar("vector_db_config", default=None) graph_db_config = ContextVar("graph_db_config", default=None) session_user = ContextVar("session_user", default=None) -vector_dbs_with_multi_user_support = ["lancedb"] -graph_dbs_with_multi_user_support = ["kuzu"] +VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"] +GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor"] async def set_session_user_context_variable(user): @@ -28,8 +28,8 @@ def multi_user_support_possible(): graph_db_config = get_graph_context_config() vector_db_config = get_vectordb_context_config() return ( - graph_db_config["graph_database_provider"] in graph_dbs_with_multi_user_support - and vector_db_config["vector_db_provider"] in vector_dbs_with_multi_user_support + graph_db_config["graph_database_provider"] in GRAPH_DBS_WITH_MULTI_USER_SUPPORT + and vector_db_config["vector_db_provider"] in VECTOR_DBS_WITH_MULTI_USER_SUPPORT ) @@ -69,8 +69,6 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ """ - base_config = get_base_config() - if not backend_access_control_enabled(): return @@ -79,6 +77,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ # To ensure permissions are enforced properly all datasets will have their own databases dataset_database = await get_or_create_dataset_database(dataset, user) + base_config = get_base_config() data_root_directory = os.path.join( base_config.data_root_directory, str(user.tenant_id or user.id) ) @@ -88,15 +87,17 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ # Set vector and graph database configuration based on dataset database information vector_config = { - "vector_db_url": os.path.join( - databases_directory_path, dataset_database.vector_database_name - ), - "vector_db_key": "", - "vector_db_provider": "lancedb", + "vector_db_provider": dataset_database.vector_database_provider, + "vector_db_url": dataset_database.vector_database_url, + "vector_db_key": dataset_database.vector_database_key, + "vector_db_name": dataset_database.vector_database_name, } graph_config = { - "graph_database_provider": "kuzu", + "graph_database_provider": dataset_database.graph_database_provider, + "graph_database_url": dataset_database.graph_database_url, + "graph_database_name": dataset_database.graph_database_name, + "graph_database_key": dataset_database.graph_database_key, "graph_file_path": os.path.join( databases_directory_path, dataset_database.graph_database_name ), diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py index b7907313c..23687b359 100644 --- a/cognee/infrastructure/databases/graph/config.py +++ b/cognee/infrastructure/databases/graph/config.py @@ -26,6 +26,7 @@ class GraphConfig(BaseSettings): - graph_database_username - graph_database_password - graph_database_port + - graph_database_key - graph_file_path - graph_model - graph_topology @@ -41,6 +42,7 @@ class GraphConfig(BaseSettings): graph_database_username: str = "" graph_database_password: str = "" graph_database_port: int = 123 + graph_database_key: str = "" graph_file_path: str = "" graph_filename: str = "" graph_model: object = KnowledgeGraph @@ -90,6 +92,7 @@ class GraphConfig(BaseSettings): "graph_database_username": self.graph_database_username, "graph_database_password": self.graph_database_password, "graph_database_port": self.graph_database_port, + "graph_database_key": self.graph_database_key, "graph_file_path": self.graph_file_path, "graph_model": self.graph_model, "graph_topology": self.graph_topology, @@ -116,6 +119,7 @@ class GraphConfig(BaseSettings): "graph_database_username": self.graph_database_username, "graph_database_password": self.graph_database_password, "graph_database_port": self.graph_database_port, + "graph_database_key": self.graph_database_key, "graph_file_path": self.graph_file_path, } diff --git a/cognee/infrastructure/databases/graph/get_graph_engine.py b/cognee/infrastructure/databases/graph/get_graph_engine.py index 1ea61d29f..82e3cad6e 100644 --- a/cognee/infrastructure/databases/graph/get_graph_engine.py +++ b/cognee/infrastructure/databases/graph/get_graph_engine.py @@ -33,6 +33,7 @@ def create_graph_engine( graph_database_username="", graph_database_password="", graph_database_port="", + graph_database_key="", ): """ Create a graph engine based on the specified provider type. @@ -69,6 +70,7 @@ def create_graph_engine( graph_database_url=graph_database_url, graph_database_username=graph_database_username, graph_database_password=graph_database_password, + database_name=graph_database_name, ) if graph_database_provider == "neo4j": diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index 29156025d..3684bb100 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -1,11 +1,15 @@ +import os from uuid import UUID from typing import Union from sqlalchemy import select from sqlalchemy.exc import IntegrityError -from cognee.modules.data.methods import create_dataset +from cognee.base_config import get_base_config +from cognee.modules.data.methods import create_dataset from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.infrastructure.databases.vector import get_vectordb_config +from cognee.infrastructure.databases.graph.config import get_graph_config from cognee.modules.data.methods import get_unique_dataset_id from cognee.modules.users.models import DatasetDatabase from cognee.modules.users.models import User @@ -32,8 +36,32 @@ async def get_or_create_dataset_database( dataset_id = await get_unique_dataset_id(dataset, user) - vector_db_name = f"{dataset_id}.lance.db" - graph_db_name = f"{dataset_id}.pkl" + vector_config = get_vectordb_config() + graph_config = get_graph_config() + + # Note: for hybrid databases both graph and vector DB name have to be the same + if graph_config.graph_database_provider == "kuzu": + graph_db_name = f"{dataset_id}.pkl" + else: + graph_db_name = f"{dataset_id}" + + if vector_config.vector_db_provider == "lancedb": + vector_db_name = f"{dataset_id}.lance.db" + else: + vector_db_name = f"{dataset_id}" + + base_config = get_base_config() + databases_directory_path = os.path.join( + base_config.system_root_directory, "databases", str(user.id) + ) + + # Determine vector database URL + if vector_config.vector_db_provider == "lancedb": + vector_db_url = os.path.join(databases_directory_path, vector_config.vector_db_name) + else: + vector_db_url = vector_config.vector_database_url + + # Determine graph database URL async with db_engine.get_async_session() as session: # Create dataset if it doesn't exist @@ -55,6 +83,12 @@ async def get_or_create_dataset_database( dataset_id=dataset_id, vector_database_name=vector_db_name, graph_database_name=graph_db_name, + vector_database_provider=vector_config.vector_db_provider, + graph_database_provider=graph_config.graph_database_provider, + vector_database_url=vector_db_url, + graph_database_url=graph_config.graph_database_url, + vector_database_key=vector_config.vector_db_key, + graph_database_key=graph_config.graph_database_key, ) try: diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py index b6d3ae644..7d28f1668 100644 --- a/cognee/infrastructure/databases/vector/config.py +++ b/cognee/infrastructure/databases/vector/config.py @@ -18,12 +18,14 @@ class VectorConfig(BaseSettings): Instance variables: - vector_db_url: The URL of the vector database. - vector_db_port: The port for the vector database. + - vector_db_name: The name of the vector database. - vector_db_key: The key for accessing the vector database. - vector_db_provider: The provider for the vector database. """ vector_db_url: str = "" vector_db_port: int = 1234 + vector_db_name: str = "" vector_db_key: str = "" vector_db_provider: str = "lancedb" @@ -58,6 +60,7 @@ class VectorConfig(BaseSettings): return { "vector_db_url": self.vector_db_url, "vector_db_port": self.vector_db_port, + "vector_db_name": self.vector_db_name, "vector_db_key": self.vector_db_key, "vector_db_provider": self.vector_db_provider, } diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index c54d94f6c..b182f084b 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -1,5 +1,6 @@ from .supported_databases import supported_databases from .embeddings import get_embedding_engine +from cognee.infrastructure.databases.graph.config import get_graph_context_config from functools import lru_cache @@ -8,6 +9,7 @@ from functools import lru_cache def create_vector_engine( vector_db_provider: str, vector_db_url: str, + vector_db_name: str, vector_db_port: str = "", vector_db_key: str = "", ): @@ -27,6 +29,7 @@ def create_vector_engine( - vector_db_url (str): The URL for the vector database instance. - vector_db_port (str): The port for the vector database instance. Required for some providers. + - vector_db_name (str): The name of the vector database instance. - vector_db_key (str): The API key or access token for the vector database instance. - vector_db_provider (str): The name of the vector database provider to use (e.g., 'pgvector'). @@ -45,6 +48,7 @@ def create_vector_engine( url=vector_db_url, api_key=vector_db_key, embedding_engine=embedding_engine, + database_name=vector_db_name, ) if vector_db_provider.lower() == "pgvector": diff --git a/cognee/modules/users/models/DatasetDatabase.py b/cognee/modules/users/models/DatasetDatabase.py index 0d71d8413..25d610ab9 100644 --- a/cognee/modules/users/models/DatasetDatabase.py +++ b/cognee/modules/users/models/DatasetDatabase.py @@ -15,5 +15,14 @@ class DatasetDatabase(Base): vector_database_name = Column(String, unique=True, nullable=False) graph_database_name = Column(String, unique=True, nullable=False) + vector_database_provider = Column(String, unique=False, nullable=False) + graph_database_provider = Column(String, unique=False, nullable=False) + + vector_database_url = Column(String, unique=False, nullable=True) + graph_database_url = Column(String, unique=False, nullable=True) + + vector_database_key = Column(String, unique=False, nullable=True) + graph_database_key = Column(String, unique=False, nullable=True) + created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) diff --git a/cognee/tests/test_parallel_databases.py b/cognee/tests/test_parallel_databases.py index 9a590921a..3164206ed 100755 --- a/cognee/tests/test_parallel_databases.py +++ b/cognee/tests/test_parallel_databases.py @@ -33,11 +33,13 @@ async def main(): "vector_db_url": "cognee1.test", "vector_db_key": "", "vector_db_provider": "lancedb", + "vector_db_name": "", } task_2_config = { "vector_db_url": "cognee2.test", "vector_db_key": "", "vector_db_provider": "lancedb", + "vector_db_name": "", } task_1_graph_config = { From 432d4a15782d64ca785854c6aa3db109b17d6f62 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 11 Nov 2025 19:44:34 +0100 Subject: [PATCH 130/284] feat: Add initial multi tenant neo4j support --- cognee/context_global_variables.py | 4 +- .../utils/get_or_create_dataset_database.py | 119 ++++++++++++++---- .../modules/users/models/DatasetDatabase.py | 5 +- 3 files changed, 100 insertions(+), 28 deletions(-) diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index 62e06fc64..44ead95af 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -17,7 +17,7 @@ graph_db_config = ContextVar("graph_db_config", default=None) session_user = ContextVar("session_user", default=None) VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"] -GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor"] +GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor", "neo4j"] async def set_session_user_context_variable(user): @@ -101,6 +101,8 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ "graph_file_path": os.path.join( databases_directory_path, dataset_database.graph_database_name ), + "graph_database_username": dataset_database.graph_database_username, + "graph_database_password": dataset_database.graph_database_password, } storage_config = { diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index 3684bb100..0a2638dc5 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -39,30 +39,6 @@ async def get_or_create_dataset_database( vector_config = get_vectordb_config() graph_config = get_graph_config() - # Note: for hybrid databases both graph and vector DB name have to be the same - if graph_config.graph_database_provider == "kuzu": - graph_db_name = f"{dataset_id}.pkl" - else: - graph_db_name = f"{dataset_id}" - - if vector_config.vector_db_provider == "lancedb": - vector_db_name = f"{dataset_id}.lance.db" - else: - vector_db_name = f"{dataset_id}" - - base_config = get_base_config() - databases_directory_path = os.path.join( - base_config.system_root_directory, "databases", str(user.id) - ) - - # Determine vector database URL - if vector_config.vector_db_provider == "lancedb": - vector_db_url = os.path.join(databases_directory_path, vector_config.vector_db_name) - else: - vector_db_url = vector_config.vector_database_url - - # Determine graph database URL - async with db_engine.get_async_session() as session: # Create dataset if it doesn't exist if isinstance(dataset, str): @@ -77,7 +53,96 @@ async def get_or_create_dataset_database( if existing: return existing + # Note: for hybrid databases both graph and vector DB name have to be the same + if graph_config.graph_database_provider == "kuzu": + graph_db_name = f"{dataset_id}.pkl" + else: + graph_db_name = f"{dataset_id}" + + if vector_config.vector_db_provider == "lancedb": + vector_db_name = f"{dataset_id}.lance.db" + else: + vector_db_name = f"{dataset_id}" + + base_config = get_base_config() + databases_directory_path = os.path.join( + base_config.system_root_directory, "databases", str(user.id) + ) + + # Determine vector database URL + if vector_config.vector_db_provider == "lancedb": + vector_db_url = os.path.join(databases_directory_path, vector_config.vector_db_name) + else: + vector_db_url = vector_config.vector_database_url + + # Determine graph database URL + if graph_config.graph_database_provider == "neo4j": + # Auto deploy instance to Aura DB + # OAuth2 token endpoint + + # Your client credentials + client_id = os.environ.get("NEO4J_CLIENT_ID", None) + client_secret = os.environ.get("NEO4J_CLIENT_SECRET", None) + tenant_id = os.environ.get("NEO4J_TENANT_ID", None) + + # Make the request with HTTP Basic Auth + import requests + + def get_aura_token(client_id: str, client_secret: str) -> dict: + url = "https://api.neo4j.io/oauth/token" + data = { + "grant_type": "client_credentials" + } # sent as application/x-www-form-urlencoded + + resp = requests.post(url, data=data, auth=(client_id, client_secret)) + resp.raise_for_status() # raises if the request failed + return resp.json() + + resp = get_aura_token(client_id, client_secret) + + url = "https://api.neo4j.io/v1/instances" + + headers = { + "accept": "application/json", + "Authorization": f"Bearer {resp['access_token']}", + "Content-Type": "application/json", + } + + payload = { + "version": "5", + "region": "europe-west1", + "memory": "1GB", + "name": graph_db_name[0:29], + "type": "professional-db", + "tenant_id": tenant_id, + "cloud_provider": "gcp", + } + + response = requests.post(url, headers=headers, json=payload) + + # Wait for instance to be provisioned + # TODO: Find better way to check when instance is ready + import asyncio + + await asyncio.sleep(180) + + print(response.status_code) + print(response.text) + # TODO: Find better name to name Neo4j instance within 30 character limit + print(graph_db_name[0:29]) + graph_db_name = "neo4j" + graph_db_url = response.json()["data"]["connection_url"] + graph_db_key = resp["access_token"] + graph_db_username = response.json()["data"]["username"] + graph_db_password = response.json()["data"]["password"] + else: + graph_db_url = graph_config.graph_database_url + graph_db_key = graph_config.graph_database_key + graph_db_username = graph_config.graph_database_username + graph_db_password = graph_config.graph_database_password + # If there are no existing rows build a new row + # TODO: Update Dataset Database migrations, also make sure database_name is not unique anymore record = DatasetDatabase( owner_id=user.id, dataset_id=dataset_id, @@ -86,9 +151,11 @@ async def get_or_create_dataset_database( vector_database_provider=vector_config.vector_db_provider, graph_database_provider=graph_config.graph_database_provider, vector_database_url=vector_db_url, - graph_database_url=graph_config.graph_database_url, + graph_database_url=graph_db_url, vector_database_key=vector_config.vector_db_key, - graph_database_key=graph_config.graph_database_key, + graph_database_key=graph_db_key, + graph_database_username=graph_db_username, + graph_database_password=graph_db_password, ) try: diff --git a/cognee/modules/users/models/DatasetDatabase.py b/cognee/modules/users/models/DatasetDatabase.py index 25d610ab9..5d2e4fcd5 100644 --- a/cognee/modules/users/models/DatasetDatabase.py +++ b/cognee/modules/users/models/DatasetDatabase.py @@ -13,7 +13,7 @@ class DatasetDatabase(Base): ) vector_database_name = Column(String, unique=True, nullable=False) - graph_database_name = Column(String, unique=True, nullable=False) + graph_database_name = Column(String, unique=False, nullable=False) vector_database_provider = Column(String, unique=False, nullable=False) graph_database_provider = Column(String, unique=False, nullable=False) @@ -24,5 +24,8 @@ class DatasetDatabase(Base): vector_database_key = Column(String, unique=False, nullable=True) graph_database_key = Column(String, unique=False, nullable=True) + graph_database_username = Column(String, unique=False, nullable=True) + graph_database_password = Column(String, unique=False, nullable=True) + created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) From cc0e1a83ab71f4bb47d1ef0d6308eca185294c86 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 11 Nov 2025 19:55:29 +0100 Subject: [PATCH 131/284] refactor: Disable telemetry for all non telemetry tests --- .github/actions/cognee_setup/action.yml | 4 ++++ .github/workflows/basic_tests.yml | 5 +++++ .github/workflows/cli_tests.yml | 3 +++ .github/workflows/db_examples_tests.yml | 6 +++--- .github/workflows/e2e_tests.yml | 2 +- .github/workflows/examples_tests.yml | 11 +++++++++++ 6 files changed, 27 insertions(+), 4 deletions(-) diff --git a/.github/actions/cognee_setup/action.yml b/.github/actions/cognee_setup/action.yml index 4017d524b..bdc0ae690 100644 --- a/.github/actions/cognee_setup/action.yml +++ b/.github/actions/cognee_setup/action.yml @@ -42,3 +42,7 @@ runs: done fi uv sync --extra api --extra docs --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j --extra redis $EXTRA_ARGS + + - name: Add telemetry identifier for telemetry test and in case telemetry is enabled by accident + run: | + echo "test-machine" > .anon_id diff --git a/.github/workflows/basic_tests.yml b/.github/workflows/basic_tests.yml index b7f324310..98ced21dc 100644 --- a/.github/workflows/basic_tests.yml +++ b/.github/workflows/basic_tests.yml @@ -75,6 +75,7 @@ jobs: name: Run Unit Tests runs-on: ubuntu-22.04 env: + ENV: 'dev' LLM_PROVIDER: openai LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} @@ -104,6 +105,7 @@ jobs: name: Run Integration Tests runs-on: ubuntu-22.04 env: + ENV: 'dev' LLM_PROVIDER: openai LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} @@ -132,6 +134,7 @@ jobs: name: Run Simple Examples runs-on: ubuntu-22.04 env: + ENV: 'dev' LLM_PROVIDER: openai LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} @@ -161,6 +164,7 @@ jobs: name: Run Simple Examples BAML runs-on: ubuntu-22.04 env: + ENV: 'dev' STRUCTURED_OUTPUT_FRAMEWORK: "BAML" BAML_LLM_PROVIDER: openai BAML_LLM_MODEL: ${{ secrets.OPENAI_MODEL }} @@ -198,6 +202,7 @@ jobs: name: Run Basic Graph Tests runs-on: ubuntu-22.04 env: + ENV: 'dev' LLM_PROVIDER: openai LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} diff --git a/.github/workflows/cli_tests.yml b/.github/workflows/cli_tests.yml index 958d341ae..d4f8e5ac0 100644 --- a/.github/workflows/cli_tests.yml +++ b/.github/workflows/cli_tests.yml @@ -39,6 +39,7 @@ jobs: name: CLI Unit Tests runs-on: ubuntu-22.04 env: + ENV: 'dev' LLM_PROVIDER: openai LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} @@ -66,6 +67,7 @@ jobs: name: CLI Integration Tests runs-on: ubuntu-22.04 env: + ENV: 'dev' LLM_PROVIDER: openai LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} @@ -93,6 +95,7 @@ jobs: name: CLI Functionality Tests runs-on: ubuntu-22.04 env: + ENV: 'dev' LLM_PROVIDER: openai LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} diff --git a/.github/workflows/db_examples_tests.yml b/.github/workflows/db_examples_tests.yml index 51ac9a82a..c58bc48ef 100644 --- a/.github/workflows/db_examples_tests.yml +++ b/.github/workflows/db_examples_tests.yml @@ -60,7 +60,7 @@ jobs: - name: Run Neo4j Example env: - ENV: dev + ENV: 'dev' LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} LLM_API_KEY: ${{ secrets.LLM_API_KEY }} @@ -95,7 +95,7 @@ jobs: - name: Run Kuzu Example env: - ENV: dev + ENV: 'dev' LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} LLM_API_KEY: ${{ secrets.LLM_API_KEY }} @@ -141,7 +141,7 @@ jobs: - name: Run PGVector Example env: - ENV: dev + ENV: 'dev' LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} LLM_API_KEY: ${{ secrets.LLM_API_KEY }} diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index bfa75f693..584225afe 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -454,7 +454,7 @@ jobs: - name: Run Conversation session tests env: - ENV: dev + ENV: 'dev' LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} LLM_API_KEY: ${{ secrets.LLM_API_KEY }} diff --git a/.github/workflows/examples_tests.yml b/.github/workflows/examples_tests.yml index 36953e259..f7cc278cb 100644 --- a/.github/workflows/examples_tests.yml +++ b/.github/workflows/examples_tests.yml @@ -21,6 +21,7 @@ jobs: - name: Run Multimedia Example env: + ENV: 'dev' LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: uv run python ./examples/python/multimedia_example.py @@ -40,6 +41,7 @@ jobs: - name: Run Evaluation Framework Example env: + ENV: 'dev' LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} LLM_API_KEY: ${{ secrets.LLM_API_KEY }} @@ -69,6 +71,7 @@ jobs: - name: Run Descriptive Graph Metrics Example env: + ENV: 'dev' LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} LLM_API_KEY: ${{ secrets.LLM_API_KEY }} @@ -99,6 +102,7 @@ jobs: - name: Run Dynamic Steps Tests env: + ENV: 'dev' OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} @@ -124,6 +128,7 @@ jobs: - name: Run Temporal Example env: + ENV: 'dev' OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} @@ -149,6 +154,7 @@ jobs: - name: Run Ontology Demo Example env: + ENV: 'dev' OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} @@ -174,6 +180,7 @@ jobs: - name: Run Agentic Reasoning Example env: + ENV: 'dev' OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} @@ -199,6 +206,7 @@ jobs: - name: Run Memify Tests env: + ENV: 'dev' OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} @@ -224,6 +232,7 @@ jobs: - name: Run Custom Pipeline Example env: + ENV: 'dev' OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} @@ -249,6 +258,7 @@ jobs: - name: Run Memify Tests env: + ENV: 'dev' OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} @@ -274,6 +284,7 @@ jobs: - name: Run Docling Test env: + ENV: 'dev' OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} LLM_MODEL: ${{ secrets.LLM_MODEL }} LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} From ba693b7ef46b617a5aa069ad7bb7bee00fb04586 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 11 Nov 2025 19:58:30 +0100 Subject: [PATCH 132/284] chore: add shell to setting of anon_id in gh action --- .github/actions/cognee_setup/action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/actions/cognee_setup/action.yml b/.github/actions/cognee_setup/action.yml index bdc0ae690..3f5726015 100644 --- a/.github/actions/cognee_setup/action.yml +++ b/.github/actions/cognee_setup/action.yml @@ -44,5 +44,6 @@ runs: uv sync --extra api --extra docs --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j --extra redis $EXTRA_ARGS - name: Add telemetry identifier for telemetry test and in case telemetry is enabled by accident + shell: bash run: | echo "test-machine" > .anon_id From a0a14e7ccccde894798603947db4f18c06dfc154 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 11 Nov 2025 20:05:47 +0100 Subject: [PATCH 133/284] refactor: Update dataset database class --- cognee/modules/users/models/DatasetDatabase.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/cognee/modules/users/models/DatasetDatabase.py b/cognee/modules/users/models/DatasetDatabase.py index f4b7c2aed..4bbfffe4c 100644 --- a/cognee/modules/users/models/DatasetDatabase.py +++ b/cognee/modules/users/models/DatasetDatabase.py @@ -12,7 +12,7 @@ class DatasetDatabase(Base): UUID, ForeignKey("datasets.id", ondelete="CASCADE"), primary_key=True, index=True ) - vector_database_name = Column(String, unique=True, nullable=False) + vector_database_name = Column(String, unique=False, nullable=False) graph_database_name = Column(String, unique=False, nullable=False) vector_database_provider = Column(String, unique=False, nullable=False) @@ -27,14 +27,5 @@ class DatasetDatabase(Base): graph_database_username = Column(String, unique=False, nullable=True) graph_database_password = Column(String, unique=False, nullable=True) - vector_database_provider = Column(String, unique=False, nullable=False) - graph_database_provider = Column(String, unique=False, nullable=False) - - vector_database_url = Column(String, unique=False, nullable=True) - graph_database_url = Column(String, unique=False, nullable=True) - - vector_database_key = Column(String, unique=False, nullable=True) - graph_database_key = Column(String, unique=False, nullable=True) - created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) From b716c2e3c431ce10679a30150afb554d32557212 Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Wed, 12 Nov 2025 13:35:04 +0100 Subject: [PATCH 134/284] Chore: Acceptance Criteria for PRs --- .github/pull_request_template.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 0e6f74188..be9d219c1 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -6,6 +6,14 @@ Please provide a clear, human-generated description of the changes in this PR. DO NOT use AI-generated descriptions. We want to understand your thought process and reasoning. --> +## Acceptance Criteria + + ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) From 056424f244fe029cc3acdd0127f70796bac25377 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Wed, 12 Nov 2025 14:34:30 +0000 Subject: [PATCH 135/284] feat: fs-cache (#1645) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Description Implement File-Based Version of the Redis Cache Adapter Description and acceptance criteria: This PR introduces a file-based cache adapter as an alternative to the existing Redis-based adapter. It provides the same core functionality for caching session data and maintaining context across multiple user interactions but stores data locally in files instead of Redis. Because the shared Kùzu lock mechanism relies on Redis, it is not supported in this implementation. If a lock is configured, the adapter will raise an error to prevent misconfiguration. You can test this adapter by enabling caching with the following settings: caching=True cache_backend="fs" When running multiple searches in a session, the system should correctly maintain conversational context. For example: - What is XY? - Are you sure? - What was my first question? In this case, the adapter should preserve previous user–Cognee interactions within the cache file so that follow-up queries remain context-aware. ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [x] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [x] **I have tested my changes thoroughly before submitting this PR** - [x] **This PR contains minimal changes necessary to address the issue/feature** - [x] My code follows the project's coding standards and style guidelines - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have added necessary documentation (if applicable) - [x] All new and existing tests pass - [x] I have searched existing PRs to ensure this change hasn't been submitted already - [x] I have linked any relevant issues in the description - [x] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Co-authored-by: hajdul88 <52442977+hajdul88@users.noreply.github.com> --- .github/workflows/e2e_tests.yml | 63 +++++- .../infrastructure/databases/cache/config.py | 4 +- .../databases/cache/fscache/FsCacheAdapter.py | 151 +++++++++++++ .../databases/cache/get_cache_engine.py | 30 ++- .../databases/exceptions/exceptions.py | 16 ++ cognee/shared/logging_utils.py | 2 + .../databases/cache/test_cache_config.py | 5 + poetry.lock | 206 +++++++++++++++--- pyproject.toml | 7 + uv.lock | 104 +++++++++ 10 files changed, 543 insertions(+), 45 deletions(-) create mode 100644 cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 584225afe..3dea2548c 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -333,7 +333,7 @@ jobs: python-version: '3.11.x' extra-dependencies: "postgres redis" - - name: Run Concurrent subprocess access test (Kuzu/Lancedb/Postgres) + - name: Run Concurrent subprocess access test (Kuzu/Lancedb/Postgres/Redis) env: ENV: dev LLM_MODEL: ${{ secrets.LLM_MODEL }} @@ -346,6 +346,7 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} GRAPH_DATABASE_PROVIDER: 'kuzu' CACHING: true + CACHE_BACKEND: 'redis' SHARED_KUZU_LOCK: true DB_PROVIDER: 'postgres' DB_NAME: 'cognee_db' @@ -411,8 +412,8 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} run: uv run python ./cognee/tests/test_feedback_enrichment.py - run_conversation_sessions_test: - name: Conversation sessions test + run_conversation_sessions_test_redis: + name: Conversation sessions test (Redis) runs-on: ubuntu-latest defaults: run: @@ -452,7 +453,7 @@ jobs: python-version: '3.11.x' extra-dependencies: "postgres redis" - - name: Run Conversation session tests + - name: Run Conversation session tests (Redis) env: ENV: 'dev' LLM_MODEL: ${{ secrets.LLM_MODEL }} @@ -465,6 +466,60 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} GRAPH_DATABASE_PROVIDER: 'kuzu' CACHING: true + CACHE_BACKEND: 'redis' + DB_PROVIDER: 'postgres' + DB_NAME: 'cognee_db' + DB_HOST: '127.0.0.1' + DB_PORT: 5432 + DB_USERNAME: cognee + DB_PASSWORD: cognee + run: uv run python ./cognee/tests/test_conversation_history.py + + run_conversation_sessions_test_fs: + name: Conversation sessions test (FS) + runs-on: ubuntu-latest + defaults: + run: + shell: bash + services: + postgres: + image: pgvector/pgvector:pg17 + env: + POSTGRES_USER: cognee + POSTGRES_PASSWORD: cognee + POSTGRES_DB: cognee_db + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + extra-dependencies: "postgres" + + - name: Run Conversation session tests (FS) + env: + ENV: dev + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + GRAPH_DATABASE_PROVIDER: 'kuzu' + CACHING: true + CACHE_BACKEND: 'fs' DB_PROVIDER: 'postgres' DB_NAME: 'cognee_db' DB_HOST: '127.0.0.1' diff --git a/cognee/infrastructure/databases/cache/config.py b/cognee/infrastructure/databases/cache/config.py index 3a28827fe..88ac05885 100644 --- a/cognee/infrastructure/databases/cache/config.py +++ b/cognee/infrastructure/databases/cache/config.py @@ -1,6 +1,6 @@ from pydantic_settings import BaseSettings, SettingsConfigDict from functools import lru_cache -from typing import Optional +from typing import Optional, Literal class CacheConfig(BaseSettings): @@ -15,6 +15,7 @@ class CacheConfig(BaseSettings): - agentic_lock_timeout: Maximum time (in seconds) to wait for the lock release. """ + cache_backend: Literal["redis", "fs"] = "fs" caching: bool = False shared_kuzu_lock: bool = False cache_host: str = "localhost" @@ -28,6 +29,7 @@ class CacheConfig(BaseSettings): def to_dict(self) -> dict: return { + "cache_backend": self.cache_backend, "caching": self.caching, "shared_kuzu_lock": self.shared_kuzu_lock, "cache_host": self.cache_host, diff --git a/cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py b/cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py new file mode 100644 index 000000000..497e6afec --- /dev/null +++ b/cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py @@ -0,0 +1,151 @@ +import asyncio +import json +import os +from datetime import datetime +import time +import threading +import diskcache as dc + +from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface +from cognee.infrastructure.databases.exceptions.exceptions import ( + CacheConnectionError, + SharedKuzuLockRequiresRedisError, +) +from cognee.infrastructure.files.storage.get_storage_config import get_storage_config +from cognee.shared.logging_utils import get_logger + +logger = get_logger("FSCacheAdapter") + + +class FSCacheAdapter(CacheDBInterface): + def __init__(self): + default_key = "sessions_db" + + storage_config = get_storage_config() + data_root_directory = storage_config["data_root_directory"] + cache_directory = os.path.join(data_root_directory, ".cognee_fs_cache", default_key) + os.makedirs(cache_directory, exist_ok=True) + self.cache = dc.Cache(directory=cache_directory) + self.cache.expire() + + logger.debug(f"FSCacheAdapter initialized with cache directory: {cache_directory}") + + def acquire_lock(self): + """Lock acquisition is not available for filesystem cache backend.""" + message = "Shared Kuzu lock requires Redis cache backend." + logger.error(message) + raise SharedKuzuLockRequiresRedisError() + + def release_lock(self): + """Lock release is not available for filesystem cache backend.""" + message = "Shared Kuzu lock requires Redis cache backend." + logger.error(message) + raise SharedKuzuLockRequiresRedisError() + + async def add_qa( + self, + user_id: str, + session_id: str, + question: str, + context: str, + answer: str, + ttl: int | None = 86400, + ): + try: + session_key = f"agent_sessions:{user_id}:{session_id}" + + qa_entry = { + "time": datetime.utcnow().isoformat(), + "question": question, + "context": context, + "answer": answer, + } + + existing_value = self.cache.get(session_key) + if existing_value is not None: + value: list = json.loads(existing_value) + value.append(qa_entry) + else: + value = [qa_entry] + + self.cache.set(session_key, json.dumps(value), expire=ttl) + except Exception as e: + error_msg = f"Unexpected error while adding Q&A to diskcache: {str(e)}" + logger.error(error_msg) + raise CacheConnectionError(error_msg) from e + + async def get_latest_qa(self, user_id: str, session_id: str, last_n: int = 5): + session_key = f"agent_sessions:{user_id}:{session_id}" + value = self.cache.get(session_key) + if value is None: + return None + entries = json.loads(value) + return entries[-last_n:] if len(entries) > last_n else entries + + async def get_all_qas(self, user_id: str, session_id: str): + session_key = f"agent_sessions:{user_id}:{session_id}" + value = self.cache.get(session_key) + if value is None: + return None + return json.loads(value) + + async def close(self): + if self.cache is not None: + self.cache.expire() + self.cache.close() + + +async def main(): + adapter = FSCacheAdapter() + session_id = "demo_session" + user_id = "demo_user_id" + + print("\nAdding sample Q/A pairs...") + await adapter.add_qa( + user_id, + session_id, + "What is Redis?", + "Basic DB context", + "Redis is an in-memory data store.", + ) + await adapter.add_qa( + user_id, + session_id, + "Who created Redis?", + "Historical context", + "Salvatore Sanfilippo (antirez).", + ) + + print("\nLatest QA:") + latest = await adapter.get_latest_qa(user_id, session_id) + print(json.dumps(latest, indent=2)) + + print("\nLast 2 QAs:") + last_two = await adapter.get_latest_qa(user_id, session_id, last_n=2) + print(json.dumps(last_two, indent=2)) + + session_id = "session_expire_demo" + + await adapter.add_qa( + user_id, + session_id, + "What is Redis?", + "Database context", + "Redis is an in-memory data store.", + ) + + await adapter.add_qa( + user_id, + session_id, + "Who created Redis?", + "History context", + "Salvatore Sanfilippo (antirez).", + ) + + print(await adapter.get_all_qas(user_id, session_id)) + + await adapter.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/cognee/infrastructure/databases/cache/get_cache_engine.py b/cognee/infrastructure/databases/cache/get_cache_engine.py index c1fa3311c..f70358607 100644 --- a/cognee/infrastructure/databases/cache/get_cache_engine.py +++ b/cognee/infrastructure/databases/cache/get_cache_engine.py @@ -1,9 +1,11 @@ """Factory to get the appropriate cache coordination engine (e.g., Redis).""" from functools import lru_cache +import os from typing import Optional from cognee.infrastructure.databases.cache.config import get_cache_config from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface +from cognee.infrastructure.databases.cache.fscache.FsCacheAdapter import FSCacheAdapter config = get_cache_config() @@ -33,20 +35,28 @@ def create_cache_engine( Returns: -------- - - CacheDBInterface: An instance of the appropriate cache adapter. :TODO: Now we support only Redis. later if we add more here we can split the logic + - CacheDBInterface: An instance of the appropriate cache adapter. """ if config.caching: from cognee.infrastructure.databases.cache.redis.RedisAdapter import RedisAdapter - return RedisAdapter( - host=cache_host, - port=cache_port, - username=cache_username, - password=cache_password, - lock_name=lock_key, - timeout=agentic_lock_expire, - blocking_timeout=agentic_lock_timeout, - ) + if config.cache_backend == "redis": + return RedisAdapter( + host=cache_host, + port=cache_port, + username=cache_username, + password=cache_password, + lock_name=lock_key, + timeout=agentic_lock_expire, + blocking_timeout=agentic_lock_timeout, + ) + elif config.cache_backend == "fs": + return FSCacheAdapter() + else: + raise ValueError( + f"Unsupported cache backend: '{config.cache_backend}'. " + f"Supported backends are: 'redis', 'fs'" + ) else: return None diff --git a/cognee/infrastructure/databases/exceptions/exceptions.py b/cognee/infrastructure/databases/exceptions/exceptions.py index 72b13e3a2..d8dd99c17 100644 --- a/cognee/infrastructure/databases/exceptions/exceptions.py +++ b/cognee/infrastructure/databases/exceptions/exceptions.py @@ -148,3 +148,19 @@ class CacheConnectionError(CogneeConfigurationError): status_code: int = status.HTTP_503_SERVICE_UNAVAILABLE, ): super().__init__(message, name, status_code) + + +class SharedKuzuLockRequiresRedisError(CogneeConfigurationError): + """ + Raised when shared Kuzu locking is requested without configuring the Redis backend. + """ + + def __init__( + self, + message: str = ( + "Shared Kuzu lock requires Redis cache backend. Configure Redis to enable shared Kuzu locking." + ), + name: str = "SharedKuzuLockRequiresRedisError", + status_code: int = status.HTTP_400_BAD_REQUEST, + ): + super().__init__(message, name, status_code) diff --git a/cognee/shared/logging_utils.py b/cognee/shared/logging_utils.py index 0e5120b1d..e8efde72c 100644 --- a/cognee/shared/logging_utils.py +++ b/cognee/shared/logging_utils.py @@ -450,6 +450,8 @@ def setup_logging(log_level=None, name=None): try: msg = self.format(record) stream = self.stream + if hasattr(stream, "closed") and stream.closed: + return stream.write("\n" + msg + self.terminator) self.flush() except Exception: diff --git a/cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py b/cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py index a8d3bda82..837a9955c 100644 --- a/cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +++ b/cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py @@ -8,6 +8,7 @@ def test_cache_config_defaults(): """Test that CacheConfig has the correct default values.""" config = CacheConfig() + assert config.cache_backend == "fs" assert config.caching is False assert config.shared_kuzu_lock is False assert config.cache_host == "localhost" @@ -19,6 +20,7 @@ def test_cache_config_defaults(): def test_cache_config_custom_values(): """Test that CacheConfig accepts custom values.""" config = CacheConfig( + cache_backend="redis", caching=True, shared_kuzu_lock=True, cache_host="redis.example.com", @@ -27,6 +29,7 @@ def test_cache_config_custom_values(): agentic_lock_timeout=180, ) + assert config.cache_backend == "redis" assert config.caching is True assert config.shared_kuzu_lock is True assert config.cache_host == "redis.example.com" @@ -38,6 +41,7 @@ def test_cache_config_custom_values(): def test_cache_config_to_dict(): """Test the to_dict method returns all configuration values.""" config = CacheConfig( + cache_backend="fs", caching=True, shared_kuzu_lock=True, cache_host="test-host", @@ -49,6 +53,7 @@ def test_cache_config_to_dict(): config_dict = config.to_dict() assert config_dict == { + "cache_backend": "fs", "caching": True, "shared_kuzu_lock": True, "cache_host": "test-host", diff --git a/poetry.lock b/poetry.lock index 08fd42660..67de51633 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "accelerate" @@ -539,7 +539,7 @@ description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"redis\" and python_full_version < \"3.11.3\" or python_version == \"3.10\"" +markers = "python_full_version < \"3.11.3\"" files = [ {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, @@ -1231,12 +1231,12 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["main"] -markers = "(platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"dev\" or extra == \"chromadb\" or sys_platform == \"win32\") and (platform_system == \"Windows\" or os_name == \"nt\" or extra == \"llama-index\" or extra == \"dev\" or sys_platform == \"win32\")" +groups = ["main", "dev"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {main = "(platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"dev\" or extra == \"chromadb\" or sys_platform == \"win32\") and (platform_system == \"Windows\" or os_name == \"nt\" or extra == \"llama-index\" or extra == \"dev\" or sys_platform == \"win32\")", dev = "sys_platform == \"win32\""} [[package]] name = "coloredlogs" @@ -2347,7 +2347,7 @@ version = "1.3.0" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" -groups = ["main"] +groups = ["main", "dev"] markers = "python_version == \"3.10\"" files = [ {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, @@ -2408,6 +2408,32 @@ files = [ [package.dependencies] tzdata = "*" +[[package]] +name = "fakeredis" +version = "2.32.0" +description = "Python implementation of redis API, can be used for testing purposes." +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "fakeredis-2.32.0-py3-none-any.whl", hash = "sha256:c9da8228de84060cfdb72c3cf4555c18c59ba7a5ae4d273f75e4822d6f01ecf8"}, + {file = "fakeredis-2.32.0.tar.gz", hash = "sha256:63d745b40eb6c8be4899cf2a53187c097ccca3afbca04fdbc5edc8b936cd1d59"}, +] + +[package.dependencies] +lupa = {version = ">=2.1,<3.0", optional = true, markers = "extra == \"lua\""} +redis = {version = ">=4.3", markers = "python_version > \"3.8\""} +sortedcontainers = ">=2,<3" +typing-extensions = {version = ">=4.7,<5.0", markers = "python_version < \"3.11\""} + +[package.extras] +bf = ["pyprobables (>=0.6)"] +cf = ["pyprobables (>=0.6)"] +json = ["jsonpath-ng (>=1.6,<2.0)"] +lua = ["lupa (>=2.1,<3.0)"] +probabilistic = ["pyprobables (>=0.6)"] +valkey = ["valkey (>=6) ; python_version >= \"3.8\""] + [[package]] name = "fastapi" version = "0.117.1" @@ -2543,6 +2569,7 @@ files = [ {file = "fastuuid-0.12.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9b31dd488d0778c36f8279b306dc92a42f16904cba54acca71e107d65b60b0c"}, {file = "fastuuid-0.12.0-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:b19361ee649365eefc717ec08005972d3d1eb9ee39908022d98e3bfa9da59e37"}, {file = "fastuuid-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:8fc66b11423e6f3e1937385f655bedd67aebe56a3dcec0cb835351cfe7d358c9"}, + {file = "fastuuid-0.12.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:2925f67b88d47cb16aa3eb1ab20fdcf21b94d74490e0818c91ea41434b987493"}, {file = "fastuuid-0.12.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7b15c54d300279ab20a9cc0579ada9c9f80d1bc92997fc61fb7bf3103d7cb26b"}, {file = "fastuuid-0.12.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:458f1bc3ebbd76fdb89ad83e6b81ccd3b2a99fa6707cd3650b27606745cfb170"}, {file = "fastuuid-0.12.0-cp38-cp38-manylinux_2_34_x86_64.whl", hash = "sha256:a8f0f83fbba6dc44271a11b22e15838641b8c45612cdf541b4822a5930f6893c"}, @@ -3705,14 +3732,14 @@ type = ["pytest-mypy"] name = "iniconfig" version = "2.1.0" description = "brain-dead simple config-ini parsing" -optional = true +optional = false python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"deepeval\" or extra == \"dev\"" +groups = ["main", "dev"] files = [ {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, ] +markers = {main = "extra == \"deepeval\" or extra == \"dev\""} [[package]] name = "instructor" @@ -4169,6 +4196,8 @@ groups = ["main"] markers = "extra == \"dlt\"" files = [ {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"}, + {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"}, + {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"}, ] [package.dependencies] @@ -5082,6 +5111,104 @@ win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} [package.extras] dev = ["Sphinx (==8.1.3) ; python_version >= \"3.11\"", "build (==1.2.2) ; python_version >= \"3.11\"", "colorama (==0.4.5) ; python_version < \"3.8\"", "colorama (==0.4.6) ; python_version >= \"3.8\"", "exceptiongroup (==1.1.3) ; python_version >= \"3.7\" and python_version < \"3.11\"", "freezegun (==1.1.0) ; python_version < \"3.8\"", "freezegun (==1.5.0) ; python_version >= \"3.8\"", "mypy (==v0.910) ; python_version < \"3.6\"", "mypy (==v0.971) ; python_version == \"3.6\"", "mypy (==v1.13.0) ; python_version >= \"3.8\"", "mypy (==v1.4.1) ; python_version == \"3.7\"", "myst-parser (==4.0.0) ; python_version >= \"3.11\"", "pre-commit (==4.0.1) ; python_version >= \"3.9\"", "pytest (==6.1.2) ; python_version < \"3.8\"", "pytest (==8.3.2) ; python_version >= \"3.8\"", "pytest-cov (==2.12.1) ; python_version < \"3.8\"", "pytest-cov (==5.0.0) ; python_version == \"3.8\"", "pytest-cov (==6.0.0) ; python_version >= \"3.9\"", "pytest-mypy-plugins (==1.9.3) ; python_version >= \"3.6\" and python_version < \"3.8\"", "pytest-mypy-plugins (==3.1.0) ; python_version >= \"3.8\"", "sphinx-rtd-theme (==3.0.2) ; python_version >= \"3.11\"", "tox (==3.27.1) ; python_version < \"3.8\"", "tox (==4.23.2) ; python_version >= \"3.8\"", "twine (==6.0.1) ; python_version >= \"3.11\""] +[[package]] +name = "lupa" +version = "2.6" +description = "Python wrapper around Lua and LuaJIT" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "lupa-2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6b3dabda836317e63c5ad052826e156610f356a04b3003dfa0dbe66b5d54d671"}, + {file = "lupa-2.6-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:8726d1c123bbe9fbb974ce29825e94121824e66003038ff4532c14cc2ed0c51c"}, + {file = "lupa-2.6-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:f4e159e7d814171199b246f9235ca8961f6461ea8c1165ab428afa13c9289a94"}, + {file = "lupa-2.6-cp310-cp310-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:202160e80dbfddfb79316692a563d843b767e0f6787bbd1c455f9d54052efa6c"}, + {file = "lupa-2.6-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5deede7c5b36ab64f869dae4831720428b67955b0bb186c8349cf6ea121c852b"}, + {file = "lupa-2.6-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86f04901f920bbf7c0cac56807dc9597e42347123e6f1f3ca920f15f54188ce5"}, + {file = "lupa-2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6deef8f851d6afb965c84849aa5b8c38856942df54597a811ce0369ced678610"}, + {file = "lupa-2.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:21f2b5549681c2a13b1170a26159d30875d367d28f0247b81ca347222c755038"}, + {file = "lupa-2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:66eea57630eab5e6f49fdc5d7811c0a2a41f2011be4ea56a087ea76112011eb7"}, + {file = "lupa-2.6-cp310-cp310-win32.whl", hash = "sha256:60a403de8cab262a4fe813085dd77010effa6e2eb1886db2181df803140533b1"}, + {file = "lupa-2.6-cp310-cp310-win_amd64.whl", hash = "sha256:e4656a39d93dfa947cf3db56dc16c7916cb0cc8024acd3a952071263f675df64"}, + {file = "lupa-2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6d988c0f9331b9f2a5a55186701a25444ab10a1432a1021ee58011499ecbbdd5"}, + {file = "lupa-2.6-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:ebe1bbf48259382c72a6fe363dea61a0fd6fe19eab95e2ae881e20f3654587bf"}, + {file = "lupa-2.6-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:a8fcee258487cf77cdd41560046843bb38c2e18989cd19671dd1e2596f798306"}, + {file = "lupa-2.6-cp311-cp311-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:561a8e3be800827884e767a694727ed8482d066e0d6edfcbf423b05e63b05535"}, + {file = "lupa-2.6-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:af880a62d47991cae78b8e9905c008cbfdc4a3a9723a66310c2634fc7644578c"}, + {file = "lupa-2.6-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80b22923aa4023c86c0097b235615f89d469a0c4eee0489699c494d3367c4c85"}, + {file = "lupa-2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:153d2cc6b643f7efb9cfc0c6bb55ec784d5bac1a3660cfc5b958a7b8f38f4a75"}, + {file = "lupa-2.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3fa8777e16f3ded50b72967dc17e23f5a08e4f1e2c9456aff2ebdb57f5b2869f"}, + {file = "lupa-2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8dbdcbe818c02a2f56f5ab5ce2de374dab03e84b25266cfbaef237829bc09b3f"}, + {file = "lupa-2.6-cp311-cp311-win32.whl", hash = "sha256:defaf188fde8f7a1e5ce3a5e6d945e533b8b8d547c11e43b96c9b7fe527f56dc"}, + {file = "lupa-2.6-cp311-cp311-win_amd64.whl", hash = "sha256:9505ae600b5c14f3e17e70f87f88d333717f60411faca1ddc6f3e61dce85fa9e"}, + {file = "lupa-2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:47ce718817ef1cc0c40d87c3d5ae56a800d61af00fbc0fad1ca9be12df2f3b56"}, + {file = "lupa-2.6-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:7aba985b15b101495aa4b07112cdc08baa0c545390d560ad5cfde2e9e34f4d58"}, + {file = "lupa-2.6-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:b766f62f95b2739f2248977d29b0722e589dcf4f0ccfa827ccbd29f0148bd2e5"}, + {file = "lupa-2.6-cp312-cp312-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:00a934c23331f94cb51760097ebfab14b005d55a6b30a2b480e3c53dd2fa290d"}, + {file = "lupa-2.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21de9f38bd475303e34a042b7081aabdf50bd9bafd36ce4faea2f90fd9f15c31"}, + {file = "lupa-2.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cf3bda96d3fc41237e964a69c23647d50d4e28421111360274d4799832c560e9"}, + {file = "lupa-2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5a76ead245da54801a81053794aa3975f213221f6542d14ec4b859ee2e7e0323"}, + {file = "lupa-2.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8dd0861741caa20886ddbda0a121d8e52fb9b5bb153d82fa9bba796962bf30e8"}, + {file = "lupa-2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:239e63948b0b23023f81d9a19a395e768ed3da6a299f84e7963b8f813f6e3f9c"}, + {file = "lupa-2.6-cp312-cp312-win32.whl", hash = "sha256:325894e1099499e7a6f9c351147661a2011887603c71086d36fe0f964d52d1ce"}, + {file = "lupa-2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c735a1ce8ee60edb0fe71d665f1e6b7c55c6021f1d340eb8c865952c602cd36f"}, + {file = "lupa-2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:663a6e58a0f60e7d212017d6678639ac8df0119bc13c2145029dcba084391310"}, + {file = "lupa-2.6-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:d1f5afda5c20b1f3217a80e9bc1b77037f8a6eb11612fd3ada19065303c8f380"}, + {file = "lupa-2.6-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:26f2b3c085fe76e9119e48c1013c1cccdc1f51585d456858290475aa38e7089e"}, + {file = "lupa-2.6-cp313-cp313-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:60d2f902c7b96fb8ab98493dcff315e7bb4d0b44dc9dd76eb37de575025d5685"}, + {file = "lupa-2.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a02d25dee3a3250967c36590128d9220ae02f2eda166a24279da0b481519cbff"}, + {file = "lupa-2.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6eae1ee16b886b8914ff292dbefbf2f48abfbdee94b33a88d1d5475e02423203"}, + {file = "lupa-2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0edd5073a4ee74ab36f74fe61450148e6044f3952b8d21248581f3c5d1a58be"}, + {file = "lupa-2.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0c53ee9f22a8a17e7d4266ad48e86f43771951797042dd51d1494aaa4f5f3f0a"}, + {file = "lupa-2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:de7c0f157a9064a400d828789191a96da7f4ce889969a588b87ec80de9b14772"}, + {file = "lupa-2.6-cp313-cp313-win32.whl", hash = "sha256:ee9523941ae0a87b5b703417720c5d78f72d2f5bc23883a2ea80a949a3ed9e75"}, + {file = "lupa-2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b1335a5835b0a25ebdbc75cf0bda195e54d133e4d994877ef025e218c2e59db9"}, + {file = "lupa-2.6-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:dcb6d0a3264873e1653bc188499f48c1fb4b41a779e315eba45256cfe7bc33c1"}, + {file = "lupa-2.6-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:a37e01f2128f8c36106726cb9d360bac087d58c54b4522b033cc5691c584db18"}, + {file = "lupa-2.6-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:458bd7e9ff3c150b245b0fcfbb9bd2593d1152ea7f0a7b91c1d185846da033fe"}, + {file = "lupa-2.6-cp314-cp314-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:052ee82cac5206a02df77119c325339acbc09f5ce66967f66a2e12a0f3211cad"}, + {file = "lupa-2.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96594eca3c87dd07938009e95e591e43d554c1dbd0385be03c100367141db5a8"}, + {file = "lupa-2.6-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8faddd9d198688c8884091173a088a8e920ecc96cda2ffed576a23574c4b3f6"}, + {file = "lupa-2.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:daebb3a6b58095c917e76ba727ab37b27477fb926957c825205fbda431552134"}, + {file = "lupa-2.6-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:f3154e68972befe0f81564e37d8142b5d5d79931a18309226a04ec92487d4ea3"}, + {file = "lupa-2.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e4dadf77b9fedc0bfa53417cc28dc2278a26d4cbd95c29f8927ad4d8fe0a7ef9"}, + {file = "lupa-2.6-cp314-cp314-win32.whl", hash = "sha256:cb34169c6fa3bab3e8ac58ca21b8a7102f6a94b6a5d08d3636312f3f02fafd8f"}, + {file = "lupa-2.6-cp314-cp314-win_amd64.whl", hash = "sha256:b74f944fe46c421e25d0f8692aef1e842192f6f7f68034201382ac440ef9ea67"}, + {file = "lupa-2.6-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0e21b716408a21ab65723f8841cf7f2f37a844b7a965eeabb785e27fca4099cf"}, + {file = "lupa-2.6-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:589db872a141bfff828340079bbdf3e9a31f2689f4ca0d88f97d9e8c2eae6142"}, + {file = "lupa-2.6-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:cd852a91a4a9d4dcbb9a58100f820a75a425703ec3e3f049055f60b8533b7953"}, + {file = "lupa-2.6-cp314-cp314t-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:0334753be028358922415ca97a64a3048e4ed155413fc4eaf87dd0a7e2752983"}, + {file = "lupa-2.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:661d895cd38c87658a34780fac54a690ec036ead743e41b74c3fb81a9e65a6aa"}, + {file = "lupa-2.6-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6aa58454ccc13878cc177c62529a2056be734da16369e451987ff92784994ca7"}, + {file = "lupa-2.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1425017264e470c98022bba8cff5bd46d054a827f5df6b80274f9cc71dafd24f"}, + {file = "lupa-2.6-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:224af0532d216e3105f0a127410f12320f7c5f1aa0300bdf9646b8d9afb0048c"}, + {file = "lupa-2.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9abb98d5a8fd27c8285302e82199f0e56e463066f88f619d6594a450bf269d80"}, + {file = "lupa-2.6-cp314-cp314t-win32.whl", hash = "sha256:1849efeba7a8f6fb8aa2c13790bee988fd242ae404bd459509640eeea3d1e291"}, + {file = "lupa-2.6-cp314-cp314t-win_amd64.whl", hash = "sha256:fc1498d1a4fc028bc521c26d0fad4ca00ed63b952e32fb95949bda76a04bad52"}, + {file = "lupa-2.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9591700991e333b70dd92b48f152eb4731b8b24af671a9f6f721b74d68ed4499"}, + {file = "lupa-2.6-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:ef8dfa7fe08bc3f4591411b8945bbeb15af8512c3e7ad5e9b1e3a9036cdbbce7"}, + {file = "lupa-2.6-cp38-cp38-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:728c466e91174dad238f8a9c1cbdb8e69ffe559df85f87ee76edac3395300949"}, + {file = "lupa-2.6-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c781170bc7134704ae317a66204d30688b41d3e471e17e659987ea4947e11f20"}, + {file = "lupa-2.6-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:241f4ddab33b9a686fc76667241bebc39a06b74ec40d79ec222f5add9000fe57"}, + {file = "lupa-2.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:c17f6b6193ced33cc7ca0b2b08b319a1b3501b014a3a3f9999c01cafc04c40f5"}, + {file = "lupa-2.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:fa6c1379e83d4104065c151736250a09f3a99e368423c7a20f9c59b15945e9fc"}, + {file = "lupa-2.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:aef1a8bc10c50695e1a33a07dbef803b93eb97fc150fdb19858d704a603a67dd"}, + {file = "lupa-2.6-cp38-cp38-win32.whl", hash = "sha256:10c191bc1d5565e4360d884bea58320975ddb33270cdf9a9f55d1a1efe79aa03"}, + {file = "lupa-2.6-cp38-cp38-win_amd64.whl", hash = "sha256:05681f8ffb41f0c7fbb9ca859cc3a7e4006e9c6350d25358b535c5295c6a9928"}, + {file = "lupa-2.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8897dc6c3249786b2cdf2f83324febb436193d4581b6a71dea49f77bf8b19bb0"}, + {file = "lupa-2.6-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:4446396ca3830be0c106c70db4b4f622c37b2d447874c07952cafb9c57949a4a"}, + {file = "lupa-2.6-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:5826e687c89995a6eaafeae242071ba16448eec1a9ee8e17ed48551b5d1e21c2"}, + {file = "lupa-2.6-cp39-cp39-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:5871935cb36d1d22f9c04ac0db75c06751bd95edcfa0d9309f732de908e297a9"}, + {file = "lupa-2.6-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:43eb6e43ea8512d0d65b995d36dd9d77aa02598035e25b84c23a1b58700c9fb2"}, + {file = "lupa-2.6-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:559714053018d9885cc8c36a33c5b7eb9aad30fb6357719cac3ce4dc6b39157e"}, + {file = "lupa-2.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:57ac88a00ce59bd9d4ddcd4fca8e02564765725f5068786b011c9d1be3de20c5"}, + {file = "lupa-2.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:b683fbd867c2e54c44a686361b75eee7e7a790da55afdbe89f1f23b106de0274"}, + {file = "lupa-2.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d2f656903a2ed2e074bf2b7d300968028dfa327a45b055be8e3b51ef0b82f9bf"}, + {file = "lupa-2.6-cp39-cp39-win32.whl", hash = "sha256:bf28f68ae231b72008523ab5ac23835ba0f76e0e99ec38b59766080a84eb596a"}, + {file = "lupa-2.6-cp39-cp39-win_amd64.whl", hash = "sha256:b4b2e9b3795a9897cf6cfcc58d08210fdc0d13ab47c9a0e13858c68932d8353c"}, + {file = "lupa-2.6.tar.gz", hash = "sha256:9a770a6e89576be3447668d7ced312cd6fd41d3c13c2462c9dc2c2ab570e45d9"}, +] + [[package]] name = "lxml" version = "4.9.4" @@ -7507,7 +7634,7 @@ version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, @@ -8162,14 +8289,14 @@ kaleido = ["kaleido (>=1.0.0)"] name = "pluggy" version = "1.6.0" description = "plugin and hook calling mechanisms for python" -optional = true +optional = false python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"deepeval\" or extra == \"dev\" or extra == \"dlt\" or extra == \"docling\"" +groups = ["main", "dev"] files = [ {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, ] +markers = {main = "extra == \"deepeval\" or extra == \"dev\" or extra == \"dlt\" or extra == \"docling\""} [package.extras] dev = ["pre-commit", "tox"] @@ -8529,6 +8656,7 @@ files = [ {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"}, {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"}, {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"}, + {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"}, {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"}, {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"}, {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"}, @@ -8590,6 +8718,7 @@ files = [ {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"}, @@ -9569,14 +9698,14 @@ files = [ name = "pytest" version = "7.4.4" description = "pytest: simple powerful testing with Python" -optional = true +optional = false python-versions = ">=3.7" -groups = ["main"] -markers = "extra == \"deepeval\" or extra == \"dev\"" +groups = ["main", "dev"] files = [ {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, ] +markers = {main = "extra == \"deepeval\" or extra == \"dev\""} [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} @@ -9663,6 +9792,21 @@ files = [ packaging = ">=17.1" pytest = ">=6.2" +[[package]] +name = "pytest-timeout" +version = "2.4.0" +description = "pytest plugin to abort hanging tests" +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2"}, + {file = "pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a"}, +] + +[package.dependencies] +pytest = ">=7.0.0" + [[package]] name = "pytest-xdist" version = "3.8.0" @@ -10245,10 +10389,9 @@ orjson = ["orjson (>=3.9.14,<4)"] name = "redis" version = "5.3.1" description = "Python client for Redis database and key-value store" -optional = true +optional = false python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"redis\"" files = [ {file = "redis-5.3.1-py3-none-any.whl", hash = "sha256:dc1909bd24669cc31b5f67a039700b16ec30571096c5f1f0d9d2324bff31af97"}, {file = "redis-5.3.1.tar.gz", hash = "sha256:ca49577a531ea64039b5a36db3d6cd1a0c7a60c34124d46924a45b956e8cf14c"}, @@ -11478,6 +11621,18 @@ files = [ {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] +[[package]] +name = "sortedcontainers" +version = "2.4.0" +description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, + {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, +] + [[package]] name = "soupsieve" version = "2.8" @@ -11501,9 +11656,7 @@ groups = ["main"] files = [ {file = "SQLAlchemy-2.0.43-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:21ba7a08a4253c5825d1db389d4299f64a100ef9800e4624c8bf70d8f136e6ed"}, {file = "SQLAlchemy-2.0.43-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11b9503fa6f8721bef9b8567730f664c5a5153d25e247aadc69247c4bc605227"}, - {file = "SQLAlchemy-2.0.43-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07097c0a1886c150ef2adba2ff7437e84d40c0f7dcb44a2c2b9c905ccfc6361c"}, {file = "SQLAlchemy-2.0.43-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:cdeff998cb294896a34e5b2f00e383e7c5c4ef3b4bfa375d9104723f15186443"}, - {file = "SQLAlchemy-2.0.43-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:bcf0724a62a5670e5718957e05c56ec2d6850267ea859f8ad2481838f889b42c"}, {file = "SQLAlchemy-2.0.43-cp37-cp37m-win32.whl", hash = "sha256:c697575d0e2b0a5f0433f679bda22f63873821d991e95a90e9e52aae517b2e32"}, {file = "SQLAlchemy-2.0.43-cp37-cp37m-win_amd64.whl", hash = "sha256:d34c0f6dbefd2e816e8f341d0df7d4763d382e3f452423e752ffd1e213da2512"}, {file = "sqlalchemy-2.0.43-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:70322986c0c699dca241418fcf18e637a4369e0ec50540a2b907b184c8bca069"}, @@ -11538,20 +11691,12 @@ files = [ {file = "sqlalchemy-2.0.43-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9df7126fd9db49e3a5a3999442cc67e9ee8971f3cb9644250107d7296cb2a164"}, {file = "sqlalchemy-2.0.43-cp313-cp313-win32.whl", hash = "sha256:7f1ac7828857fcedb0361b48b9ac4821469f7694089d15550bbcf9ab22564a1d"}, {file = "sqlalchemy-2.0.43-cp313-cp313-win_amd64.whl", hash = "sha256:971ba928fcde01869361f504fcff3b7143b47d30de188b11c6357c0505824197"}, - {file = "sqlalchemy-2.0.43-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4e6aeb2e0932f32950cf56a8b4813cb15ff792fc0c9b3752eaf067cfe298496a"}, - {file = "sqlalchemy-2.0.43-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:61f964a05356f4bca4112e6334ed7c208174511bd56e6b8fc86dad4d024d4185"}, {file = "sqlalchemy-2.0.43-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46293c39252f93ea0910aababa8752ad628bcce3a10d3f260648dd472256983f"}, - {file = "sqlalchemy-2.0.43-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:136063a68644eca9339d02e6693932116f6a8591ac013b0014479a1de664e40a"}, {file = "sqlalchemy-2.0.43-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6e2bf13d9256398d037fef09fd8bf9b0bf77876e22647d10761d35593b9ac547"}, - {file = "sqlalchemy-2.0.43-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:44337823462291f17f994d64282a71c51d738fc9ef561bf265f1d0fd9116a782"}, {file = "sqlalchemy-2.0.43-cp38-cp38-win32.whl", hash = "sha256:13194276e69bb2af56198fef7909d48fd34820de01d9c92711a5fa45497cc7ed"}, {file = "sqlalchemy-2.0.43-cp38-cp38-win_amd64.whl", hash = "sha256:334f41fa28de9f9be4b78445e68530da3c5fa054c907176460c81494f4ae1f5e"}, - {file = "sqlalchemy-2.0.43-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ceb5c832cc30663aeaf5e39657712f4c4241ad1f638d487ef7216258f6d41fe7"}, - {file = "sqlalchemy-2.0.43-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:11f43c39b4b2ec755573952bbcc58d976779d482f6f832d7f33a8d869ae891bf"}, {file = "sqlalchemy-2.0.43-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:413391b2239db55be14fa4223034d7e13325a1812c8396ecd4f2c08696d5ccad"}, - {file = "sqlalchemy-2.0.43-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c379e37b08c6c527181a397212346be39319fb64323741d23e46abd97a400d34"}, {file = "sqlalchemy-2.0.43-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:03d73ab2a37d9e40dec4984d1813d7878e01dbdc742448d44a7341b7a9f408c7"}, - {file = "sqlalchemy-2.0.43-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8cee08f15d9e238ede42e9bbc1d6e7158d0ca4f176e4eab21f88ac819ae3bd7b"}, {file = "sqlalchemy-2.0.43-cp39-cp39-win32.whl", hash = "sha256:b3edaec7e8b6dc5cd94523c6df4f294014df67097c8217a89929c99975811414"}, {file = "sqlalchemy-2.0.43-cp39-cp39-win_amd64.whl", hash = "sha256:227119ce0a89e762ecd882dc661e0aa677a690c914e358f0dd8932a2e8b2765b"}, {file = "sqlalchemy-2.0.43-py3-none-any.whl", hash = "sha256:1681c21dd2ccee222c2fe0bef671d1aef7c504087c9c4e800371cfcc8ac966fc"}, @@ -11920,7 +12065,7 @@ version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["main", "dev"] markers = "python_version == \"3.10\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, @@ -12392,11 +12537,12 @@ version = "4.15.0" description = "Backported and Experimental Type Hints for Python 3.9+" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, ] +markers = {dev = "python_version == \"3.10\""} [[package]] name = "typing-inspect" @@ -13527,4 +13673,4 @@ scraping = ["APScheduler", "beautifulsoup4", "lxml", "playwright", "protego", "t [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.14" -content-hash = "9490de8c950400c004a87333eda35311109bc1708a98e053bc2f66d883f4f702" +content-hash = "b6ede4c196d086f7159f84142c16d16fcc19bc73fcb9ab274a3b6351e6fcbb7e" diff --git a/pyproject.toml b/pyproject.toml index 8af35113c..13266f83e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,8 @@ dependencies = [ "websockets>=15.0.1,<16.0.0", "mistralai>=1.9.10", "tenacity>=9.0.0", + "fakeredis[lua]>=2.32.0", + "diskcache>=5.6.3", ] [project.optional-dependencies] @@ -198,3 +200,8 @@ exclude = [ [tool.ruff.lint] ignore = ["F401"] + +[dependency-groups] +dev = [ + "pytest-timeout>=2.4.0", +] diff --git a/uv.lock b/uv.lock index e2fc1df83..8c35a3366 100644 --- a/uv.lock +++ b/uv.lock @@ -936,6 +936,8 @@ dependencies = [ { name = "aiohttp" }, { name = "aiosqlite" }, { name = "alembic" }, + { name = "diskcache" }, + { name = "fakeredis", extra = ["lua"] }, { name = "fastapi" }, { name = "fastapi-users", extra = ["sqlalchemy"] }, { name = "fastembed" }, @@ -1097,6 +1099,11 @@ scraping = [ { name = "tavily-python" }, ] +[package.dev-dependencies] +dev = [ + { name = "pytest-timeout" }, +] + [package.metadata] requires-dist = [ { name = "aiofiles", specifier = ">=23.2.1,<24.0.0" }, @@ -1114,8 +1121,10 @@ requires-dist = [ { name = "debugpy", marker = "extra == 'debug'", specifier = ">=1.8.9,<2.0.0" }, { name = "deepeval", marker = "extra == 'deepeval'", specifier = ">=3.0.1,<4" }, { name = "deptry", marker = "extra == 'dev'", specifier = ">=0.20.0,<0.21" }, + { name = "diskcache", specifier = ">=5.6.3" }, { name = "dlt", extras = ["sqlalchemy"], marker = "extra == 'dlt'", specifier = ">=1.9.0,<2" }, { name = "docling", marker = "extra == 'docling'", specifier = ">=2.54" }, + { name = "fakeredis", extras = ["lua"], specifier = ">=2.32.0" }, { name = "fastapi", specifier = ">=0.116.2,<1.0.0" }, { name = "fastapi-users", extras = ["sqlalchemy"], specifier = ">=14.0.1,<15.0.0" }, { name = "fastembed", specifier = "<=0.6.0" }, @@ -1203,6 +1212,9 @@ requires-dist = [ ] provides-extras = ["api", "distributed", "scraping", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "groq", "chromadb", "docs", "codegraph", "evals", "graphiti", "aws", "dlt", "baml", "dev", "debug", "redis", "monitoring", "docling"] +[package.metadata.requires-dev] +dev = [{ name = "pytest-timeout", specifier = ">=2.4.0" }] + [[package]] name = "colorama" version = "0.4.6" @@ -2047,6 +2059,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a3/46/8f4097b55e43af39e8e71e1f7aec59ff7398bca54d975c30889bc844719d/faker-37.11.0-py3-none-any.whl", hash = "sha256:1508d2da94dfd1e0087b36f386126d84f8583b3de19ac18e392a2831a6676c57", size = 1975525, upload-time = "2025-10-07T14:48:58.29Z" }, ] +[[package]] +name = "fakeredis" +version = "2.32.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "redis" }, + { name = "sortedcontainers" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e1/2e/94ca3f2ff35f086d7d3eeb924054e328b2ac851f0a20302d942c8d29726c/fakeredis-2.32.0.tar.gz", hash = "sha256:63d745b40eb6c8be4899cf2a53187c097ccca3afbca04fdbc5edc8b936cd1d59", size = 171097, upload-time = "2025-10-07T10:46:58.876Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/1b/84ab7fd197eba5243b6625c78fbcffaa4cf6ac7dda42f95d22165f52187e/fakeredis-2.32.0-py3-none-any.whl", hash = "sha256:c9da8228de84060cfdb72c3cf4555c18c59ba7a5ae4d273f75e4822d6f01ecf8", size = 118422, upload-time = "2025-10-07T10:46:57.643Z" }, +] + +[package.optional-dependencies] +lua = [ + { name = "lupa" }, +] + [[package]] name = "fastapi" version = "0.119.0" @@ -3880,6 +3911,58 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, ] +[[package]] +name = "lupa" +version = "2.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b8/1c/191c3e6ec6502e3dbe25a53e27f69a5daeac3e56de1f73c0138224171ead/lupa-2.6.tar.gz", hash = "sha256:9a770a6e89576be3447668d7ced312cd6fd41d3c13c2462c9dc2c2ab570e45d9", size = 7240282, upload-time = "2025-10-24T07:20:29.738Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/15/713cab5d0dfa4858f83b99b3e0329072df33dc14fc3ebbaa017e0f9755c4/lupa-2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6b3dabda836317e63c5ad052826e156610f356a04b3003dfa0dbe66b5d54d671", size = 954828, upload-time = "2025-10-24T07:17:15.726Z" }, + { url = "https://files.pythonhosted.org/packages/2e/71/704740cbc6e587dd6cc8dabf2f04820ac6a671784e57cc3c29db795476db/lupa-2.6-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:8726d1c123bbe9fbb974ce29825e94121824e66003038ff4532c14cc2ed0c51c", size = 1919259, upload-time = "2025-10-24T07:17:18.586Z" }, + { url = "https://files.pythonhosted.org/packages/eb/18/f248341c423c5d48837e35584c6c3eb4acab7e722b6057d7b3e28e42dae8/lupa-2.6-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:f4e159e7d814171199b246f9235ca8961f6461ea8c1165ab428afa13c9289a94", size = 984998, upload-time = "2025-10-24T07:17:20.428Z" }, + { url = "https://files.pythonhosted.org/packages/44/1e/8a4bd471e018aad76bcb9455d298c2c96d82eced20f2ae8fcec8cd800948/lupa-2.6-cp310-cp310-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:202160e80dbfddfb79316692a563d843b767e0f6787bbd1c455f9d54052efa6c", size = 1174871, upload-time = "2025-10-24T07:17:22.755Z" }, + { url = "https://files.pythonhosted.org/packages/2a/5c/3a3f23fd6a91b0986eea1ceaf82ad3f9b958fe3515a9981fb9c4eb046c8b/lupa-2.6-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5deede7c5b36ab64f869dae4831720428b67955b0bb186c8349cf6ea121c852b", size = 1057471, upload-time = "2025-10-24T07:17:24.908Z" }, + { url = "https://files.pythonhosted.org/packages/45/ac/01be1fed778fb0c8f46ee8cbe344e4d782f6806fac12717f08af87aa4355/lupa-2.6-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86f04901f920bbf7c0cac56807dc9597e42347123e6f1f3ca920f15f54188ce5", size = 2100592, upload-time = "2025-10-24T07:17:27.089Z" }, + { url = "https://files.pythonhosted.org/packages/3f/6c/1a05bb873e30830f8574e10cd0b4cdbc72e9dbad2a09e25810b5e3b1f75d/lupa-2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6deef8f851d6afb965c84849aa5b8c38856942df54597a811ce0369ced678610", size = 1081396, upload-time = "2025-10-24T07:17:29.064Z" }, + { url = "https://files.pythonhosted.org/packages/a2/c2/a19dd80d6dc98b39bbf8135b8198e38aa7ca3360b720eac68d1d7e9286b5/lupa-2.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:21f2b5549681c2a13b1170a26159d30875d367d28f0247b81ca347222c755038", size = 1192007, upload-time = "2025-10-24T07:17:31.362Z" }, + { url = "https://files.pythonhosted.org/packages/4f/43/e1b297225c827f55752e46fdbfb021c8982081b0f24490e42776ea69ae3b/lupa-2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:66eea57630eab5e6f49fdc5d7811c0a2a41f2011be4ea56a087ea76112011eb7", size = 2196661, upload-time = "2025-10-24T07:17:33.484Z" }, + { url = "https://files.pythonhosted.org/packages/2e/8f/2272d429a7fa9dc8dbd6e9c5c9073a03af6007eb22a4c78829fec6a34b80/lupa-2.6-cp310-cp310-win32.whl", hash = "sha256:60a403de8cab262a4fe813085dd77010effa6e2eb1886db2181df803140533b1", size = 1412738, upload-time = "2025-10-24T07:17:35.11Z" }, + { url = "https://files.pythonhosted.org/packages/35/2a/1708911271dd49ad87b4b373b5a4b0e0a0516d3d2af7b76355946c7ee171/lupa-2.6-cp310-cp310-win_amd64.whl", hash = "sha256:e4656a39d93dfa947cf3db56dc16c7916cb0cc8024acd3a952071263f675df64", size = 1656898, upload-time = "2025-10-24T07:17:36.949Z" }, + { url = "https://files.pythonhosted.org/packages/ca/29/1f66907c1ebf1881735afa695e646762c674f00738ebf66d795d59fc0665/lupa-2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6d988c0f9331b9f2a5a55186701a25444ab10a1432a1021ee58011499ecbbdd5", size = 962875, upload-time = "2025-10-24T07:17:39.107Z" }, + { url = "https://files.pythonhosted.org/packages/e6/67/4a748604be360eb9c1c215f6a0da921cd1a2b44b2c5951aae6fb83019d3a/lupa-2.6-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:ebe1bbf48259382c72a6fe363dea61a0fd6fe19eab95e2ae881e20f3654587bf", size = 1935390, upload-time = "2025-10-24T07:17:41.427Z" }, + { url = "https://files.pythonhosted.org/packages/ac/0c/8ef9ee933a350428b7bdb8335a37ef170ab0bb008bbf9ca8f4f4310116b6/lupa-2.6-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:a8fcee258487cf77cdd41560046843bb38c2e18989cd19671dd1e2596f798306", size = 992193, upload-time = "2025-10-24T07:17:43.231Z" }, + { url = "https://files.pythonhosted.org/packages/65/46/e6c7facebdb438db8a65ed247e56908818389c1a5abbf6a36aab14f1057d/lupa-2.6-cp311-cp311-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:561a8e3be800827884e767a694727ed8482d066e0d6edfcbf423b05e63b05535", size = 1165844, upload-time = "2025-10-24T07:17:45.437Z" }, + { url = "https://files.pythonhosted.org/packages/1c/26/9f1154c6c95f175ccbf96aa96c8f569c87f64f463b32473e839137601a8b/lupa-2.6-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:af880a62d47991cae78b8e9905c008cbfdc4a3a9723a66310c2634fc7644578c", size = 1048069, upload-time = "2025-10-24T07:17:47.181Z" }, + { url = "https://files.pythonhosted.org/packages/68/67/2cc52ab73d6af81612b2ea24c870d3fa398443af8e2875e5befe142398b1/lupa-2.6-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80b22923aa4023c86c0097b235615f89d469a0c4eee0489699c494d3367c4c85", size = 2079079, upload-time = "2025-10-24T07:17:49.755Z" }, + { url = "https://files.pythonhosted.org/packages/2e/dc/f843f09bbf325f6e5ee61730cf6c3409fc78c010d968c7c78acba3019ca7/lupa-2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:153d2cc6b643f7efb9cfc0c6bb55ec784d5bac1a3660cfc5b958a7b8f38f4a75", size = 1071428, upload-time = "2025-10-24T07:17:51.991Z" }, + { url = "https://files.pythonhosted.org/packages/2e/60/37533a8d85bf004697449acb97ecdacea851acad28f2ad3803662487dd2a/lupa-2.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3fa8777e16f3ded50b72967dc17e23f5a08e4f1e2c9456aff2ebdb57f5b2869f", size = 1181756, upload-time = "2025-10-24T07:17:53.752Z" }, + { url = "https://files.pythonhosted.org/packages/e4/f2/cf29b20dbb4927b6a3d27c339ac5d73e74306ecc28c8e2c900b2794142ba/lupa-2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8dbdcbe818c02a2f56f5ab5ce2de374dab03e84b25266cfbaef237829bc09b3f", size = 2175687, upload-time = "2025-10-24T07:17:56.228Z" }, + { url = "https://files.pythonhosted.org/packages/94/7c/050e02f80c7131b63db1474bff511e63c545b5a8636a24cbef3fc4da20b6/lupa-2.6-cp311-cp311-win32.whl", hash = "sha256:defaf188fde8f7a1e5ce3a5e6d945e533b8b8d547c11e43b96c9b7fe527f56dc", size = 1412592, upload-time = "2025-10-24T07:17:59.062Z" }, + { url = "https://files.pythonhosted.org/packages/6f/9a/6f2af98aa5d771cea661f66c8eb8f53772ec1ab1dfbce24126cfcd189436/lupa-2.6-cp311-cp311-win_amd64.whl", hash = "sha256:9505ae600b5c14f3e17e70f87f88d333717f60411faca1ddc6f3e61dce85fa9e", size = 1669194, upload-time = "2025-10-24T07:18:01.647Z" }, + { url = "https://files.pythonhosted.org/packages/94/86/ce243390535c39d53ea17ccf0240815e6e457e413e40428a658ea4ee4b8d/lupa-2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:47ce718817ef1cc0c40d87c3d5ae56a800d61af00fbc0fad1ca9be12df2f3b56", size = 951707, upload-time = "2025-10-24T07:18:03.884Z" }, + { url = "https://files.pythonhosted.org/packages/86/85/cedea5e6cbeb54396fdcc55f6b741696f3f036d23cfaf986d50d680446da/lupa-2.6-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:7aba985b15b101495aa4b07112cdc08baa0c545390d560ad5cfde2e9e34f4d58", size = 1916703, upload-time = "2025-10-24T07:18:05.6Z" }, + { url = "https://files.pythonhosted.org/packages/24/be/3d6b5f9a8588c01a4d88129284c726017b2089f3a3fd3ba8bd977292fea0/lupa-2.6-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:b766f62f95b2739f2248977d29b0722e589dcf4f0ccfa827ccbd29f0148bd2e5", size = 985152, upload-time = "2025-10-24T07:18:08.561Z" }, + { url = "https://files.pythonhosted.org/packages/eb/23/9f9a05beee5d5dce9deca4cb07c91c40a90541fc0a8e09db4ee670da550f/lupa-2.6-cp312-cp312-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:00a934c23331f94cb51760097ebfab14b005d55a6b30a2b480e3c53dd2fa290d", size = 1159599, upload-time = "2025-10-24T07:18:10.346Z" }, + { url = "https://files.pythonhosted.org/packages/40/4e/e7c0583083db9d7f1fd023800a9767d8e4391e8330d56c2373d890ac971b/lupa-2.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21de9f38bd475303e34a042b7081aabdf50bd9bafd36ce4faea2f90fd9f15c31", size = 1038686, upload-time = "2025-10-24T07:18:12.112Z" }, + { url = "https://files.pythonhosted.org/packages/1c/9f/5a4f7d959d4feba5e203ff0c31889e74d1ca3153122be4a46dca7d92bf7c/lupa-2.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cf3bda96d3fc41237e964a69c23647d50d4e28421111360274d4799832c560e9", size = 2071956, upload-time = "2025-10-24T07:18:14.572Z" }, + { url = "https://files.pythonhosted.org/packages/92/34/2f4f13ca65d01169b1720176aedc4af17bc19ee834598c7292db232cb6dc/lupa-2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5a76ead245da54801a81053794aa3975f213221f6542d14ec4b859ee2e7e0323", size = 1057199, upload-time = "2025-10-24T07:18:16.379Z" }, + { url = "https://files.pythonhosted.org/packages/35/2a/5f7d2eebec6993b0dcd428e0184ad71afb06a45ba13e717f6501bfed1da3/lupa-2.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8dd0861741caa20886ddbda0a121d8e52fb9b5bb153d82fa9bba796962bf30e8", size = 1173693, upload-time = "2025-10-24T07:18:18.153Z" }, + { url = "https://files.pythonhosted.org/packages/e4/29/089b4d2f8e34417349af3904bb40bec40b65c8731f45e3fd8d497ca573e5/lupa-2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:239e63948b0b23023f81d9a19a395e768ed3da6a299f84e7963b8f813f6e3f9c", size = 2164394, upload-time = "2025-10-24T07:18:20.403Z" }, + { url = "https://files.pythonhosted.org/packages/f3/1b/79c17b23c921f81468a111cad843b076a17ef4b684c4a8dff32a7969c3f0/lupa-2.6-cp312-cp312-win32.whl", hash = "sha256:325894e1099499e7a6f9c351147661a2011887603c71086d36fe0f964d52d1ce", size = 1420647, upload-time = "2025-10-24T07:18:23.368Z" }, + { url = "https://files.pythonhosted.org/packages/b8/15/5121e68aad3584e26e1425a5c9a79cd898f8a152292059e128c206ee817c/lupa-2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c735a1ce8ee60edb0fe71d665f1e6b7c55c6021f1d340eb8c865952c602cd36f", size = 1688529, upload-time = "2025-10-24T07:18:25.523Z" }, + { url = "https://files.pythonhosted.org/packages/28/1d/21176b682ca5469001199d8b95fa1737e29957a3d185186e7a8b55345f2e/lupa-2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:663a6e58a0f60e7d212017d6678639ac8df0119bc13c2145029dcba084391310", size = 947232, upload-time = "2025-10-24T07:18:27.878Z" }, + { url = "https://files.pythonhosted.org/packages/ce/4c/d327befb684660ca13cf79cd1f1d604331808f9f1b6fb6bf57832f8edf80/lupa-2.6-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:d1f5afda5c20b1f3217a80e9bc1b77037f8a6eb11612fd3ada19065303c8f380", size = 1908625, upload-time = "2025-10-24T07:18:29.944Z" }, + { url = "https://files.pythonhosted.org/packages/66/8e/ad22b0a19454dfd08662237a84c792d6d420d36b061f239e084f29d1a4f3/lupa-2.6-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:26f2b3c085fe76e9119e48c1013c1cccdc1f51585d456858290475aa38e7089e", size = 981057, upload-time = "2025-10-24T07:18:31.553Z" }, + { url = "https://files.pythonhosted.org/packages/5c/48/74859073ab276bd0566c719f9ca0108b0cfc1956ca0d68678d117d47d155/lupa-2.6-cp313-cp313-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:60d2f902c7b96fb8ab98493dcff315e7bb4d0b44dc9dd76eb37de575025d5685", size = 1156227, upload-time = "2025-10-24T07:18:33.981Z" }, + { url = "https://files.pythonhosted.org/packages/09/6c/0e9ded061916877253c2266074060eb71ed99fb21d73c8c114a76725bce2/lupa-2.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a02d25dee3a3250967c36590128d9220ae02f2eda166a24279da0b481519cbff", size = 1035752, upload-time = "2025-10-24T07:18:36.32Z" }, + { url = "https://files.pythonhosted.org/packages/dd/ef/f8c32e454ef9f3fe909f6c7d57a39f950996c37a3deb7b391fec7903dab7/lupa-2.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6eae1ee16b886b8914ff292dbefbf2f48abfbdee94b33a88d1d5475e02423203", size = 2069009, upload-time = "2025-10-24T07:18:38.072Z" }, + { url = "https://files.pythonhosted.org/packages/53/dc/15b80c226a5225815a890ee1c11f07968e0aba7a852df41e8ae6fe285063/lupa-2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0edd5073a4ee74ab36f74fe61450148e6044f3952b8d21248581f3c5d1a58be", size = 1056301, upload-time = "2025-10-24T07:18:40.165Z" }, + { url = "https://files.pythonhosted.org/packages/31/14/2086c1425c985acfb30997a67e90c39457122df41324d3c179d6ee2292c6/lupa-2.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0c53ee9f22a8a17e7d4266ad48e86f43771951797042dd51d1494aaa4f5f3f0a", size = 1170673, upload-time = "2025-10-24T07:18:42.426Z" }, + { url = "https://files.pythonhosted.org/packages/10/e5/b216c054cf86576c0191bf9a9f05de6f7e8e07164897d95eea0078dca9b2/lupa-2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:de7c0f157a9064a400d828789191a96da7f4ce889969a588b87ec80de9b14772", size = 2162227, upload-time = "2025-10-24T07:18:46.112Z" }, + { url = "https://files.pythonhosted.org/packages/59/2f/33ecb5bedf4f3bc297ceacb7f016ff951331d352f58e7e791589609ea306/lupa-2.6-cp313-cp313-win32.whl", hash = "sha256:ee9523941ae0a87b5b703417720c5d78f72d2f5bc23883a2ea80a949a3ed9e75", size = 1419558, upload-time = "2025-10-24T07:18:48.371Z" }, + { url = "https://files.pythonhosted.org/packages/f9/b4/55e885834c847ea610e111d87b9ed4768f0afdaeebc00cd46810f25029f6/lupa-2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b1335a5835b0a25ebdbc75cf0bda195e54d133e4d994877ef025e218c2e59db9", size = 1683424, upload-time = "2025-10-24T07:18:50.976Z" }, +] + [[package]] name = "lxml" version = "4.9.4" @@ -6996,6 +7079,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/25/14/e02206388902a828cc26894996dfc68eec50f7583bcddc4b5605d0c18b51/pytest_rerunfailures-12.0-py3-none-any.whl", hash = "sha256:9a1afd04e21b8177faf08a9bbbf44de7a0fe3fc29f8ddbe83b9684bd5f8f92a9", size = 12977, upload-time = "2023-07-05T05:53:43.909Z" }, ] +[[package]] +name = "pytest-timeout" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ac/82/4c9ecabab13363e72d880f2fb504c5f750433b2b6f16e99f4ec21ada284c/pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a", size = 17973, upload-time = "2025-05-05T19:44:34.99Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382, upload-time = "2025-05-05T19:44:33.502Z" }, +] + [[package]] name = "pytest-xdist" version = "3.8.0" @@ -8234,6 +8329,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] +[[package]] +name = "sortedcontainers" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" }, +] + [[package]] name = "soupsieve" version = "2.8" From b017fcc8d0030ee2fef9929d020c1bb3d8d15f12 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 12 Nov 2025 17:58:27 +0100 Subject: [PATCH 136/284] refactor: Make neo4j auto scaling more readable --- .../utils/get_or_create_dataset_database.py | 302 ++++++++++-------- 1 file changed, 166 insertions(+), 136 deletions(-) diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index 27c0d62a3..84742748d 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -1,6 +1,8 @@ import os +import asyncio +import requests from uuid import UUID -from typing import Union +from typing import Union, Optional from sqlalchemy import select from sqlalchemy.exc import IntegrityError @@ -15,6 +17,157 @@ from cognee.modules.users.models import DatasetDatabase from cognee.modules.users.models import User +async def _get_vector_db_info(dataset_id: UUID, user: User) -> dict: + vector_config = get_vectordb_config() + + base_config = get_base_config() + databases_directory_path = os.path.join( + base_config.system_root_directory, "databases", str(user.id) + ) + + # Determine vector configuration + if vector_config.vector_db_provider == "lancedb": + vector_db_name = f"{dataset_id}.lance.db" + vector_db_url = os.path.join(databases_directory_path, vector_db_name) + else: + # Note: for hybrid databases both graph and vector DB name have to be the same + vector_db_name = vector_config.vector_db_name + vector_db_url = vector_config.vector_database_url + + return { + "vector_database_name": vector_db_name, + "vector_database_url": vector_db_url, + "vector_database_provider": vector_config.vector_db_provider, + "vector_database_key": vector_config.vector_db_key, + } + + +async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict: + graph_config = get_graph_config() + + # Determine graph database URL + if graph_config.graph_database_provider == "neo4j": + graph_db_name = f"{dataset_id}" + # Auto deploy instance to Aura DB + # OAuth2 token endpoint + + # Your client credentials + client_id = os.environ.get("NEO4J_CLIENT_ID", None) + client_secret = os.environ.get("NEO4J_CLIENT_SECRET", None) + tenant_id = os.environ.get("NEO4J_TENANT_ID", None) + + # Make the request with HTTP Basic Auth + def get_aura_token(client_id: str, client_secret: str) -> dict: + url = "https://api.neo4j.io/oauth/token" + data = {"grant_type": "client_credentials"} # sent as application/x-www-form-urlencoded + + resp = requests.post(url, data=data, auth=(client_id, client_secret)) + resp.raise_for_status() # raises if the request failed + return resp.json() + + resp = get_aura_token(client_id, client_secret) + + url = "https://api.neo4j.io/v1/instances" + + headers = { + "accept": "application/json", + "Authorization": f"Bearer {resp['access_token']}", + "Content-Type": "application/json", + } + + payload = { + "version": "5", + "region": "europe-west1", + "memory": "1GB", + "name": graph_db_name[0:29], + "type": "professional-db", + "tenant_id": tenant_id, + "cloud_provider": "gcp", + } + + response = requests.post(url, headers=headers, json=payload) + + print(response.status_code) + print(response.text) + # TODO: Find better name to name Neo4j instance within 30 character limit + print(graph_db_name[0:29]) + graph_db_name = "neo4j" + graph_db_url = response.json()["data"]["connection_url"] + graph_db_key = resp["access_token"] + graph_db_username = response.json()["data"]["username"] + graph_db_password = response.json()["data"]["password"] + + async def _wait_for_neo4j_instance_provisioning(instance_id: str, headers: dict): + # Poll until the instance is running + status_url = f"https://api.neo4j.io/v1/instances/{instance_id}" + status = "" + for attempt in range(30): # Try for up to ~5 minutes + status_resp = requests.get(status_url, headers=headers) + status = status_resp.json()["data"]["status"] + if status.lower() == "running": + return + await asyncio.sleep(10) + raise TimeoutError( + f"Neo4j instance '{graph_db_name}' did not become ready within 5 minutes. Status: {status}" + ) + + instance_id = response.json()["data"]["id"] + await _wait_for_neo4j_instance_provisioning(instance_id, headers) + + elif graph_config.graph_database_provider == "kuzu": + # TODO: Add graph file path info for kuzu (also in DatasetDatabase model) + graph_db_name = f"{dataset_id}.pkl" + graph_db_url = graph_config.graph_database_url + graph_db_key = graph_config.graph_database_key + graph_db_username = graph_config.graph_database_username + graph_db_password = graph_config.graph_database_password + elif graph_config.graph_database_provider == "falkor": + # Note: for hybrid databases both graph and vector DB name have to be the same + graph_db_name = f"{dataset_id}" + graph_db_url = graph_config.graph_database_url + graph_db_key = graph_config.graph_database_key + graph_db_username = graph_config.graph_database_username + graph_db_password = graph_config.graph_database_password + else: + raise EnvironmentError( + f"Unsupported graph database provider for backend access control: {graph_config.graph_database_provider}" + ) + + return { + "graph_database_name": graph_db_name, + "graph_database_url": graph_db_url, + "graph_database_provider": graph_config.graph_database_provider, + "graph_database_key": graph_db_key, + "graph_database_username": graph_db_username, + "graph_database_password": graph_db_password, + } + + +async def _existing_dataset_database( + dataset_id: UUID, + user: User, +) -> Optional[DatasetDatabase]: + """ + Check if a DatasetDatabase row already exists for the given owner + dataset. + Return None if it doesn't exist, return the row if it does. + Args: + dataset_id: + user: + + Returns: + DatasetDatabase or None + """ + db_engine = get_relational_engine() + + async with db_engine.get_async_session() as session: + stmt = select(DatasetDatabase).where( + DatasetDatabase.owner_id == user.id, + DatasetDatabase.dataset_id == dataset_id, + ) + existing: DatasetDatabase = await session.scalar(stmt) + return existing + + async def get_or_create_dataset_database( dataset: Union[str, UUID], user: User, @@ -36,150 +189,27 @@ async def get_or_create_dataset_database( dataset_id = await get_unique_dataset_id(dataset, user) - vector_config = get_vectordb_config() - graph_config = get_graph_config() + # If dataset is given as name make sure the dataset is created first + if isinstance(dataset, str): + async with db_engine.get_async_session() as session: + await create_dataset(dataset, user, session) - # Note: for hybrid databases both graph and vector DB name have to be the same - if graph_config.graph_database_provider == "kuzu": - graph_db_name = f"{dataset_id}.pkl" - else: - graph_db_name = f"{dataset_id}" + # If dataset database already exists return it + existing_dataset_database = await _existing_dataset_database(dataset_id, user) + if existing_dataset_database: + return existing_dataset_database - if vector_config.vector_db_provider == "lancedb": - vector_db_name = f"{dataset_id}.lance.db" - else: - vector_db_name = f"{dataset_id}" - - base_config = get_base_config() - databases_directory_path = os.path.join( - base_config.system_root_directory, "databases", str(user.id) - ) - - # Determine vector database URL - if vector_config.vector_db_provider == "lancedb": - vector_db_url = os.path.join(databases_directory_path, vector_config.vector_db_name) - else: - vector_db_url = vector_config.vector_database_url - - # Determine graph database URL + graph_config_dict = await _get_graph_db_info(dataset_id, user) + vector_config_dict = await _get_vector_db_info(dataset_id, user) async with db_engine.get_async_session() as session: - # Create dataset if it doesn't exist - if isinstance(dataset, str): - dataset = await create_dataset(dataset, user, session) - - # Try to fetch an existing row first - stmt = select(DatasetDatabase).where( - DatasetDatabase.owner_id == user.id, - DatasetDatabase.dataset_id == dataset_id, - ) - existing: DatasetDatabase = await session.scalar(stmt) - if existing: - return existing - - # Note: for hybrid databases both graph and vector DB name have to be the same - if graph_config.graph_database_provider == "kuzu": - graph_db_name = f"{dataset_id}.pkl" - else: - graph_db_name = f"{dataset_id}" - - if vector_config.vector_db_provider == "lancedb": - vector_db_name = f"{dataset_id}.lance.db" - else: - vector_db_name = f"{dataset_id}" - - base_config = get_base_config() - databases_directory_path = os.path.join( - base_config.system_root_directory, "databases", str(user.id) - ) - - # Determine vector database URL - if vector_config.vector_db_provider == "lancedb": - vector_db_url = os.path.join(databases_directory_path, vector_config.vector_db_name) - else: - vector_db_url = vector_config.vector_database_url - - # Determine graph database URL - if graph_config.graph_database_provider == "neo4j": - # Auto deploy instance to Aura DB - # OAuth2 token endpoint - - # Your client credentials - client_id = os.environ.get("NEO4J_CLIENT_ID", None) - client_secret = os.environ.get("NEO4J_CLIENT_SECRET", None) - tenant_id = os.environ.get("NEO4J_TENANT_ID", None) - - # Make the request with HTTP Basic Auth - import requests - - def get_aura_token(client_id: str, client_secret: str) -> dict: - url = "https://api.neo4j.io/oauth/token" - data = { - "grant_type": "client_credentials" - } # sent as application/x-www-form-urlencoded - - resp = requests.post(url, data=data, auth=(client_id, client_secret)) - resp.raise_for_status() # raises if the request failed - return resp.json() - - resp = get_aura_token(client_id, client_secret) - - url = "https://api.neo4j.io/v1/instances" - - headers = { - "accept": "application/json", - "Authorization": f"Bearer {resp['access_token']}", - "Content-Type": "application/json", - } - - payload = { - "version": "5", - "region": "europe-west1", - "memory": "1GB", - "name": graph_db_name[0:29], - "type": "professional-db", - "tenant_id": tenant_id, - "cloud_provider": "gcp", - } - - response = requests.post(url, headers=headers, json=payload) - - # Wait for instance to be provisioned - # TODO: Find better way to check when instance is ready - import asyncio - - await asyncio.sleep(180) - - print(response.status_code) - print(response.text) - # TODO: Find better name to name Neo4j instance within 30 character limit - print(graph_db_name[0:29]) - graph_db_name = "neo4j" - graph_db_url = response.json()["data"]["connection_url"] - graph_db_key = resp["access_token"] - graph_db_username = response.json()["data"]["username"] - graph_db_password = response.json()["data"]["password"] - else: - graph_db_url = graph_config.graph_database_url - graph_db_key = graph_config.graph_database_key - graph_db_username = graph_config.graph_database_username - graph_db_password = graph_config.graph_database_password - # If there are no existing rows build a new row # TODO: Update Dataset Database migrations, also make sure database_name is not unique anymore record = DatasetDatabase( owner_id=user.id, dataset_id=dataset_id, - vector_database_name=vector_db_name, - graph_database_name=graph_db_name, - vector_database_provider=vector_config.vector_db_provider, - graph_database_provider=graph_config.graph_database_provider, - vector_database_url=vector_db_url, - graph_database_url=graph_db_url, - vector_database_key=vector_config.vector_db_key, - graph_database_key=graph_db_key, - graph_database_username=graph_db_username, - graph_database_password=graph_db_password, + **graph_config_dict, # Unpack graph db config + **vector_config_dict, # Unpack vector db config ) try: From 0176cd5a6890f7f2e8271ca6baee566fc987fd99 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 12 Nov 2025 18:01:44 +0100 Subject: [PATCH 137/284] refactor: Add todo point --- .../databases/utils/get_or_create_dataset_database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index 84742748d..ab56df787 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -137,7 +137,7 @@ async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict: "graph_database_name": graph_db_name, "graph_database_url": graph_db_url, "graph_database_provider": graph_config.graph_database_provider, - "graph_database_key": graph_db_key, + "graph_database_key": graph_db_key, # TODO: Hashing of keys/passwords in relational DB "graph_database_username": graph_db_username, "graph_database_password": graph_db_password, } From 6bb642d6b828a39c97689ce2b5199dfc1b3f1a81 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 12 Nov 2025 21:24:40 +0100 Subject: [PATCH 138/284] refactor: Start adding multi-user functions to db interfaces --- .../utils/get_or_create_dataset_database.py | 12 +++---- .../vector/lancedb/LanceDBAdapter.py | 22 ++++++++++++ .../databases/vector/vector_db_interface.py | 34 +++++++++++++++++++ 3 files changed, 61 insertions(+), 7 deletions(-) diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index ab56df787..a292d2f5b 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -20,15 +20,13 @@ from cognee.modules.users.models import User async def _get_vector_db_info(dataset_id: UUID, user: User) -> dict: vector_config = get_vectordb_config() - base_config = get_base_config() - databases_directory_path = os.path.join( - base_config.system_root_directory, "databases", str(user.id) - ) - # Determine vector configuration if vector_config.vector_db_provider == "lancedb": - vector_db_name = f"{dataset_id}.lance.db" - vector_db_url = os.path.join(databases_directory_path, vector_db_name) + # TODO: Have the create_database method be called from interface adapter automatically for all providers instead of specifically here + from cognee.infrastructure.databases.vector.lancedb.LanceDBAdapter import LanceDBAdapter + + return await LanceDBAdapter.create_database(dataset_id, user) + else: # Note: for hybrid databases both graph and vector DB name have to be the same vector_db_name = vector_config.vector_db_name diff --git a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py index 30631ac4c..f2d8fcc09 100644 --- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py @@ -1,10 +1,15 @@ import asyncio from os import path +import os +from uuid import UUID import lancedb from pydantic import BaseModel from lancedb.pydantic import LanceModel, Vector from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints +from cognee.base_config import get_base_config +from cognee.infrastructure.databases.vector import get_vectordb_config +from cognee.modules.users.models import User from cognee.infrastructure.databases.exceptions import MissingQueryParameterError from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine.utils import parse_id @@ -357,3 +362,20 @@ class LanceDBAdapter(VectorDBInterface): }, exclude_fields=["metadata"] + related_models_fields, ) + + @classmethod + async def create_database(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: + vector_config = get_vectordb_config() + base_config = get_base_config() + databases_directory_path = os.path.join( + base_config.system_root_directory, "databases", str(user.id) + ) + + vector_db_name = f"{dataset_id}.lance.db" + + return { + "vector_database_name": vector_db_name, + "vector_database_url": os.path.join(databases_directory_path, vector_db_name), + "vector_database_provider": vector_config.vector_db_provider, + "vector_database_key": vector_config.vector_db_key, + } diff --git a/cognee/infrastructure/databases/vector/vector_db_interface.py b/cognee/infrastructure/databases/vector/vector_db_interface.py index 3a3df62eb..b89818275 100644 --- a/cognee/infrastructure/databases/vector/vector_db_interface.py +++ b/cognee/infrastructure/databases/vector/vector_db_interface.py @@ -2,6 +2,8 @@ from typing import List, Protocol, Optional, Union, Any from abc import abstractmethod from cognee.infrastructure.engine import DataPoint from .models.PayloadSchema import PayloadSchema +from uuid import UUID +from cognee.modules.users.models import User class VectorDBInterface(Protocol): @@ -217,3 +219,35 @@ class VectorDBInterface(Protocol): - Any: The schema object suitable for this vector database """ return model_type + + @classmethod + async def create_database(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: + """ + Return a dictionary with connection info for a vector database for the given dataset and user. + Function should auto handle deploying of the actual database if needed. + Needed for Cognee multi-tenant/multi-user and backend access control support. + + Dictionary returned from this function will be used to create a DatasetDatabase row in the relational database. + From which internal mapping of dataset -> database connection info will be done. + + Each dataset needs to map to a unique vector database instance when backend access control is enabled. + + Args: + dataset_id: UUID of the dataset if needed by the database creation logic + user: User object if needed by the database creation logic + Returns: + dict: Connection info for the created vector database instance. + """ + pass + + async def delete_database(self, dataset_id: UUID, user: User) -> None: + """ + Delete the vector database instance for the given dataset and user. + Function should auto handle deleting of the actual database. + Needed for maintaining a database for Cognee multi-tenant/multi-user and backend access control. + + Args: + dataset_id: UUID of the dataset + user: User object + """ + pass From a5bd504daa688efcbf7358ba2fce74a4da359ce4 Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Wed, 12 Nov 2025 21:32:22 +0100 Subject: [PATCH 139/284] Relational DB migration test search (#1752) ## Description Add deterministic Cognee search test after rel DB migration. Test gathers all relevant relationships regarding Customers and their Invoices from relational DB that was migrated and then tries to get the same results with Cognee search. ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- cognee/tests/test_relational_db_migration.py | 53 +++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/cognee/tests/test_relational_db_migration.py b/cognee/tests/test_relational_db_migration.py index 4557e9e2f..ae06e7c5d 100644 --- a/cognee/tests/test_relational_db_migration.py +++ b/cognee/tests/test_relational_db_migration.py @@ -1,6 +1,5 @@ import pathlib import os -from typing import List from cognee.infrastructure.databases.graph import get_graph_engine from cognee.infrastructure.databases.relational import ( get_migration_relational_engine, @@ -10,7 +9,7 @@ from cognee.infrastructure.databases.vector.pgvector import ( create_db_and_tables as create_pgvector_db_and_tables, ) from cognee.tasks.ingestion import migrate_relational_database -from cognee.modules.search.types import SearchResult, SearchType +from cognee.modules.search.types import SearchType import cognee @@ -274,6 +273,55 @@ async def test_schema_only_migration(): print(f"Edge counts: {edge_counts}") +async def test_search_result_quality(): + from cognee.infrastructure.databases.relational import ( + get_migration_relational_engine, + ) + + # Get relational database with original data + migration_engine = get_migration_relational_engine() + from sqlalchemy import text + + async with migration_engine.engine.connect() as conn: + result = await conn.execute( + text(""" + SELECT + c.CustomerId, + c.FirstName, + c.LastName, + GROUP_CONCAT(i.InvoiceId, ',') AS invoice_ids + FROM Customer AS c + LEFT JOIN Invoice AS i ON c.CustomerId = i.CustomerId + GROUP BY c.CustomerId, c.FirstName, c.LastName + """) + ) + + for row in result: + # Get expected invoice IDs from relational DB for each Customer + customer_id = row.CustomerId + invoice_ids = row.invoice_ids.split(",") if row.invoice_ids else [] + print(f"Relational DB Customer {customer_id}: {invoice_ids}") + + # Use Cognee search to get invoice IDs for the same Customer but by providing Customer name + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text=f"List me all the invoices of Customer:{row.FirstName} {row.LastName}.", + top_k=50, + system_prompt="Just return me the invoiceID as a number without any text. This is an example output: ['1', '2', '3']. Where 1, 2, 3 are invoiceIDs of an invoice", + ) + print(f"Cognee search result: {search_results}") + + import ast + + lst = ast.literal_eval(search_results[0]) # converts string -> Python list + # Transfrom both lists to int for comparison, sorting and type consistency + lst = sorted([int(x) for x in lst]) + invoice_ids = sorted([int(x) for x in invoice_ids]) + assert lst == invoice_ids, ( + f"Search results {lst} do not match expected invoice IDs {invoice_ids} for Customer:{customer_id}" + ) + + async def test_migration_sqlite(): database_to_migrate_path = os.path.join(pathlib.Path(__file__).parent, "test_data/") @@ -286,6 +334,7 @@ async def test_migration_sqlite(): ) await relational_db_migration() + await test_search_result_quality() await test_schema_only_migration() From f9cde2f375be2accf6c9bd7fb5f5c681971f692a Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Thu, 13 Nov 2025 13:35:07 +0100 Subject: [PATCH 140/284] Fix: Remove cognee script from pyproject.toml --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 13266f83e..2436911e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -156,7 +156,6 @@ Homepage = "https://www.cognee.ai" Repository = "https://github.com/topoteretes/cognee" [project.scripts] -cognee = "cognee.cli._cognee:main" cognee-cli = "cognee.cli._cognee:main" [build-system] From 3acb581bd03e507885556b6709d3465671b2497c Mon Sep 17 00:00:00 2001 From: martin0731 Date: Thu, 13 Nov 2025 08:31:15 -0500 Subject: [PATCH 141/284] Removed check_permissions_on_dataset.py and related references --- cognee/api/v1/cognify/cognify.py | 25 +++++++----------- .../task_getters/get_cascade_graph_tasks.py | 2 -- .../get_default_tasks_by_indices.py | 8 +++--- cognee/tasks/documents/__init__.py | 1 - .../documents/check_permissions_on_dataset.py | 26 ------------------- examples/python/simple_example.py | 11 +++----- notebooks/cognee_demo.ipynb | 4 +-- 7 files changed, 19 insertions(+), 58 deletions(-) delete mode 100644 cognee/tasks/documents/check_permissions_on_dataset.py diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 0fa345176..4efec365a 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -19,7 +19,6 @@ from cognee.modules.ontology.get_default_ontology_resolver import ( from cognee.modules.users.models import User from cognee.tasks.documents import ( - check_permissions_on_dataset, classify_documents, extract_chunks_from_documents, ) @@ -78,12 +77,11 @@ async def cognify( Processing Pipeline: 1. **Document Classification**: Identifies document types and structures - 2. **Permission Validation**: Ensures user has processing rights - 3. **Text Chunking**: Breaks content into semantically meaningful segments - 4. **Entity Extraction**: Identifies key concepts, people, places, organizations - 5. **Relationship Detection**: Discovers connections between entities - 6. **Graph Construction**: Builds semantic knowledge graph with embeddings - 7. **Content Summarization**: Creates hierarchical summaries for navigation + 2. **Text Chunking**: Breaks content into semantically meaningful segments + 3. **Entity Extraction**: Identifies key concepts, people, places, organizations + 4. **Relationship Detection**: Discovers connections between entities + 5. **Graph Construction**: Builds semantic knowledge graph with embeddings + 6. **Content Summarization**: Creates hierarchical summaries for navigation Graph Model Customization: The `graph_model` parameter allows custom knowledge structures: @@ -274,7 +272,6 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's default_tasks = [ Task(classify_documents), - Task(check_permissions_on_dataset, user=user, permissions=["write"]), Task( extract_chunks_from_documents, max_chunk_size=chunk_size or get_max_chunk_tokens(), @@ -305,14 +302,13 @@ async def get_temporal_tasks( The pipeline includes: 1. Document classification. - 2. Dataset permission checks (requires "write" access). - 3. Document chunking with a specified or default chunk size. - 4. Event and timestamp extraction from chunks. - 5. Knowledge graph extraction from events. - 6. Batched insertion of data points. + 2. Document chunking with a specified or default chunk size. + 3. Event and timestamp extraction from chunks. + 4. Knowledge graph extraction from events. + 5. Batched insertion of data points. Args: - user (User, optional): The user requesting task execution, used for permission checks. + user (User, optional): The user requesting task execution. chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker. chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default. chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify @@ -325,7 +321,6 @@ async def get_temporal_tasks( temporal_tasks = [ Task(classify_documents), - Task(check_permissions_on_dataset, user=user, permissions=["write"]), Task( extract_chunks_from_documents, max_chunk_size=chunk_size or get_max_chunk_tokens(), diff --git a/cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py b/cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py index edac15015..1fbc31c02 100644 --- a/cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +++ b/cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py @@ -8,7 +8,6 @@ from cognee.modules.users.models import User from cognee.shared.data_models import KnowledgeGraph from cognee.shared.utils import send_telemetry from cognee.tasks.documents import ( - check_permissions_on_dataset, classify_documents, extract_chunks_from_documents, ) @@ -31,7 +30,6 @@ async def get_cascade_graph_tasks( cognee_config = get_cognify_config() default_tasks = [ Task(classify_documents), - Task(check_permissions_on_dataset, user=user, permissions=["write"]), Task( extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens() ), # Extract text chunks based on the document type. diff --git a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py index fb10c7eed..6a39a67cf 100644 --- a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +++ b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py @@ -30,8 +30,8 @@ async def get_no_summary_tasks( ontology_file_path=None, ) -> List[Task]: """Returns default tasks without summarization tasks.""" - # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks) - base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker) + # Get base tasks (0=classify, 1=extract_chunks) + base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker) ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path) @@ -51,8 +51,8 @@ async def get_just_chunks_tasks( chunk_size: int = None, chunker=TextChunker, user=None ) -> List[Task]: """Returns default tasks with only chunk extraction and data points addition.""" - # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks) - base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker) + # Get base tasks (0=classify, 1=extract_chunks) + base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker) add_data_points_task = Task(add_data_points, task_config={"batch_size": 10}) diff --git a/cognee/tasks/documents/__init__.py b/cognee/tasks/documents/__init__.py index f4582fbe0..043625f35 100644 --- a/cognee/tasks/documents/__init__.py +++ b/cognee/tasks/documents/__init__.py @@ -1,3 +1,2 @@ from .classify_documents import classify_documents from .extract_chunks_from_documents import extract_chunks_from_documents -from .check_permissions_on_dataset import check_permissions_on_dataset diff --git a/cognee/tasks/documents/check_permissions_on_dataset.py b/cognee/tasks/documents/check_permissions_on_dataset.py deleted file mode 100644 index 01a03de5f..000000000 --- a/cognee/tasks/documents/check_permissions_on_dataset.py +++ /dev/null @@ -1,26 +0,0 @@ -from cognee.modules.data.processing.document_types import Document -from cognee.modules.users.permissions.methods import check_permission_on_dataset -from typing import List - - -async def check_permissions_on_dataset( - documents: List[Document], context: dict, user, permissions -) -> List[Document]: - """ - Validates a user's permissions on a list of documents. - - Notes: - - This function assumes that `check_permission_on_documents` raises an exception if the permission check fails. - - It is designed to validate multiple permissions in a sequential manner for the same set of documents. - - Ensure that the `Document` and `user` objects conform to the expected structure and interfaces. - """ - - for permission in permissions: - await check_permission_on_dataset( - user, - permission, - # TODO: pass dataset through argument instead of context - context["dataset"].id, - ) - - return documents diff --git a/examples/python/simple_example.py b/examples/python/simple_example.py index c13e48f85..347ace365 100644 --- a/examples/python/simple_example.py +++ b/examples/python/simple_example.py @@ -32,16 +32,13 @@ async def main(): print("Cognify process steps:") print("1. Classifying the document: Determining the type and category of the input text.") print( - "2. Checking permissions: Ensuring the user has the necessary rights to process the text." + "2. Extracting text chunks: Breaking down the text into sentences or phrases for analysis." ) print( - "3. Extracting text chunks: Breaking down the text into sentences or phrases for analysis." + "3. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph." ) - print("4. Adding data points: Storing the extracted chunks for processing.") - print( - "5. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph." - ) - print("6. Summarizing text: Creating concise summaries of the content for quick insights.\n") + print("4. Summarizing text: Creating concise summaries of the content for quick insights.") + print("5. Adding data points: Storing the extracted chunks for processing.\n") # Use LLMs and cognee to create knowledge graph await cognee.cognify() diff --git a/notebooks/cognee_demo.ipynb b/notebooks/cognee_demo.ipynb index 09c4c89be..fe6ae50ae 100644 --- a/notebooks/cognee_demo.ipynb +++ b/notebooks/cognee_demo.ipynb @@ -591,7 +591,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "7c431fdef4921ae0", "metadata": { "ExecuteTime": { @@ -609,7 +609,6 @@ "from cognee.modules.pipelines import run_tasks\n", "from cognee.modules.users.models import User\n", "from cognee.tasks.documents import (\n", - " check_permissions_on_dataset,\n", " classify_documents,\n", " extract_chunks_from_documents,\n", ")\n", @@ -627,7 +626,6 @@ "\n", " tasks = [\n", " Task(classify_documents),\n", - " Task(check_permissions_on_dataset, user=user, permissions=[\"write\"]),\n", " Task(\n", " extract_chunks_from_documents, max_chunk_size=get_max_chunk_tokens()\n", " ), # Extract text chunks based on the document type.\n", From 3b7d030817cea67f08af121d936a9e31312ae38c Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 13 Nov 2025 16:06:07 +0100 Subject: [PATCH 142/284] fix: remove duplicate mistral adapter creation --- .../litellm_instructor/llm/get_llm_client.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py index c7dcecc56..bbdfe49e9 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py @@ -162,20 +162,5 @@ def get_llm_client(raise_api_key_error: bool = True): endpoint=llm_config.llm_endpoint, ) - elif provider == LLMProvider.MISTRAL: - if llm_config.llm_api_key is None: - raise LLMAPIKeyNotSetError() - - from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.mistral.adapter import ( - MistralAdapter, - ) - - return MistralAdapter( - api_key=llm_config.llm_api_key, - model=llm_config.llm_model, - max_completion_tokens=max_completion_tokens, - endpoint=llm_config.llm_endpoint, - ) - else: raise UnsupportedLLMProviderError(provider) From c6454338f9374c0e871938523eca237d6e5a1d16 Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Thu, 13 Nov 2025 17:35:16 +0100 Subject: [PATCH 143/284] Fix: MCP remove cognee.add() preprequisite from the doc --- cognee-mcp/src/server.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index 7c708638c..4131be988 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -194,7 +194,6 @@ async def cognify( Prerequisites: - **LLM_API_KEY**: Must be configured (required for entity extraction and graph generation) - - **Data Added**: Must have data previously added via `cognee.add()` - **Vector Database**: Must be accessible for embeddings storage - **Graph Database**: Must be accessible for relationship storage From 2337d36f7b3968cfeff06b00613f7464c8d0ca93 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 13 Nov 2025 18:25:07 +0100 Subject: [PATCH 144/284] feat: add variable to control instructor mode --- cognee/infrastructure/llm/config.py | 2 ++ .../litellm_instructor/llm/anthropic/adapter.py | 8 +++++++- .../litellm_instructor/llm/gemini/adapter.py | 12 +++++++++++- .../llm/generic_llm_api/adapter.py | 12 +++++++++++- .../litellm_instructor/llm/mistral/adapter.py | 8 +++++++- .../litellm_instructor/llm/ollama/adapter.py | 12 +++++++++++- .../litellm_instructor/llm/openai/adapter.py | 11 +++++++++-- 7 files changed, 58 insertions(+), 7 deletions(-) diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index 8fd196eaf..c87054ff6 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -38,6 +38,7 @@ class LLMConfig(BaseSettings): """ structured_output_framework: str = "instructor" + llm_instructor_mode: Optional[str] = None llm_provider: str = "openai" llm_model: str = "openai/gpt-5-mini" llm_endpoint: str = "" @@ -181,6 +182,7 @@ class LLMConfig(BaseSettings): instance. """ return { + "llm_instructor_mode": self.llm_instructor_mode, "provider": self.llm_provider, "model": self.llm_model, "endpoint": self.llm_endpoint, diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py index bf19d6e86..6fb78718e 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py @@ -28,13 +28,19 @@ class AnthropicAdapter(LLMInterface): name = "Anthropic" model: str + default_instructor_mode = "anthropic_tools" def __init__(self, max_completion_tokens: int, model: str = None): import anthropic + config_instructor_mode = get_llm_config().llm_instructor_mode + instructor_mode = ( + config_instructor_mode if config_instructor_mode else self.default_instructor_mode + ) + self.aclient = instructor.patch( create=anthropic.AsyncAnthropic(api_key=get_llm_config().llm_api_key).messages.create, - mode=instructor.Mode.ANTHROPIC_TOOLS, + mode=instructor.Mode(instructor_mode), ) self.model = model diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py index 1187e0cad..68dddc7b7 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py @@ -41,6 +41,7 @@ class GeminiAdapter(LLMInterface): name: str model: str api_key: str + default_instructor_mode = "json_mode" def __init__( self, @@ -63,7 +64,16 @@ class GeminiAdapter(LLMInterface): self.fallback_api_key = fallback_api_key self.fallback_endpoint = fallback_endpoint - self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON) + from cognee.infrastructure.llm.config import get_llm_config + + config_instructor_mode = get_llm_config().llm_instructor_mode + instructor_mode = ( + config_instructor_mode if config_instructor_mode else self.default_instructor_mode + ) + + self.aclient = instructor.from_litellm( + litellm.acompletion, mode=instructor.Mode(instructor_mode) + ) @retry( stop=stop_after_delay(128), diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py index 8bbbaa2cc..ea32dced1 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py @@ -41,6 +41,7 @@ class GenericAPIAdapter(LLMInterface): name: str model: str api_key: str + default_instructor_mode = "json_mode" def __init__( self, @@ -63,7 +64,16 @@ class GenericAPIAdapter(LLMInterface): self.fallback_api_key = fallback_api_key self.fallback_endpoint = fallback_endpoint - self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON) + from cognee.infrastructure.llm.config import get_llm_config + + config_instructor_mode = get_llm_config().llm_instructor_mode + instructor_mode = ( + config_instructor_mode if config_instructor_mode else self.default_instructor_mode + ) + + self.aclient = instructor.from_litellm( + litellm.acompletion, mode=instructor.Mode(instructor_mode) + ) @retry( stop=stop_after_delay(128), diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py index 78a3cbff5..bed88ce3c 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py @@ -37,6 +37,7 @@ class MistralAdapter(LLMInterface): model: str api_key: str max_completion_tokens: int + default_instructor_mode = "mistral_tools" def __init__(self, api_key: str, model: str, max_completion_tokens: int, endpoint: str = None): from mistralai import Mistral @@ -44,9 +45,14 @@ class MistralAdapter(LLMInterface): self.model = model self.max_completion_tokens = max_completion_tokens + config_instructor_mode = get_llm_config().llm_instructor_mode + instructor_mode = ( + config_instructor_mode if config_instructor_mode else self.default_instructor_mode + ) + self.aclient = instructor.from_litellm( litellm.acompletion, - mode=instructor.Mode.MISTRAL_TOOLS, + mode=instructor.Mode(instructor_mode), api_key=get_llm_config().llm_api_key, ) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py index 9c3d185aa..aa24a7911 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py @@ -42,6 +42,8 @@ class OllamaAPIAdapter(LLMInterface): - aclient """ + default_instructor_mode = "json_mode" + def __init__( self, endpoint: str, api_key: str, model: str, name: str, max_completion_tokens: int ): @@ -51,8 +53,16 @@ class OllamaAPIAdapter(LLMInterface): self.endpoint = endpoint self.max_completion_tokens = max_completion_tokens + from cognee.infrastructure.llm.config import get_llm_config + + config_instructor_mode = get_llm_config().llm_instructor_mode + instructor_mode = ( + config_instructor_mode if config_instructor_mode else self.default_instructor_mode + ) + self.aclient = instructor.from_openai( - OpenAI(base_url=self.endpoint, api_key=self.api_key), mode=instructor.Mode.JSON + OpenAI(base_url=self.endpoint, api_key=self.api_key), + mode=instructor.Mode(instructor_mode), ) @retry( diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py index 305b426b8..69367602d 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py @@ -56,6 +56,7 @@ class OpenAIAdapter(LLMInterface): model: str api_key: str api_version: str + default_instructor_mode = "json_schema_mode" MAX_RETRIES = 5 @@ -74,14 +75,20 @@ class OpenAIAdapter(LLMInterface): fallback_api_key: str = None, fallback_endpoint: str = None, ): + from cognee.infrastructure.llm.config import get_llm_config + + config_instructor_mode = get_llm_config().llm_instructor_mode + instructor_mode = ( + config_instructor_mode if config_instructor_mode else self.default_instructor_mode + ) # TODO: With gpt5 series models OpenAI expects JSON_SCHEMA as a mode for structured outputs. # Make sure all new gpt models will work with this mode as well. if "gpt-5" in model: self.aclient = instructor.from_litellm( - litellm.acompletion, mode=instructor.Mode.JSON_SCHEMA + litellm.acompletion, mode=instructor.Mode(instructor_mode) ) self.client = instructor.from_litellm( - litellm.completion, mode=instructor.Mode.JSON_SCHEMA + litellm.completion, mode=instructor.Mode(instructor_mode) ) else: self.aclient = instructor.from_litellm(litellm.acompletion) From 661c194f97df5053f70a52d1638c77c23e0d50e3 Mon Sep 17 00:00:00 2001 From: EricXiao Date: Fri, 14 Nov 2025 15:21:47 +0800 Subject: [PATCH 145/284] fix: Resolve issue with csv suffix classification Signed-off-by: EricXiao --- cognee/infrastructure/files/utils/guess_file_type.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cognee/infrastructure/files/utils/guess_file_type.py b/cognee/infrastructure/files/utils/guess_file_type.py index 78b20c93d..4bc96fe80 100644 --- a/cognee/infrastructure/files/utils/guess_file_type.py +++ b/cognee/infrastructure/files/utils/guess_file_type.py @@ -55,6 +55,10 @@ def guess_file_type(file: BinaryIO, name: Optional[str] = None) -> filetype.Type file_type = Type("text/plain", "txt") return file_type + if ext in [".csv"]: + file_type = Type("text/csv", "csv") + return file_type + file_type = filetype.guess(file) # If file type could not be determined consider it a plain text file as they don't have magic number encoding From 205f5a9e0c6d0fb72cc94fa20ce2ba814ebef0d5 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Fri, 14 Nov 2025 11:05:39 +0100 Subject: [PATCH 146/284] fix: Fix based on PR comments --- .../litellm_instructor/llm/anthropic/adapter.py | 9 +++------ .../litellm_instructor/llm/gemini/adapter.py | 10 +++------- .../llm/generic_llm_api/adapter.py | 10 +++------- .../litellm_instructor/llm/get_llm_client.py | 9 ++++++++- .../litellm_instructor/llm/mistral/adapter.py | 16 ++++++++++------ .../litellm_instructor/llm/ollama/adapter.py | 17 +++++++++-------- .../litellm_instructor/llm/openai/adapter.py | 12 ++++-------- 7 files changed, 40 insertions(+), 43 deletions(-) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py index 6fb78718e..dbf0dfbea 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py @@ -30,17 +30,14 @@ class AnthropicAdapter(LLMInterface): model: str default_instructor_mode = "anthropic_tools" - def __init__(self, max_completion_tokens: int, model: str = None): + def __init__(self, max_completion_tokens: int, model: str = None, instructor_mode: str = None): import anthropic - config_instructor_mode = get_llm_config().llm_instructor_mode - instructor_mode = ( - config_instructor_mode if config_instructor_mode else self.default_instructor_mode - ) + self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode self.aclient = instructor.patch( create=anthropic.AsyncAnthropic(api_key=get_llm_config().llm_api_key).messages.create, - mode=instructor.Mode(instructor_mode), + mode=instructor.Mode(self.instructor_mode), ) self.model = model diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py index 68dddc7b7..226f291d7 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py @@ -50,6 +50,7 @@ class GeminiAdapter(LLMInterface): model: str, api_version: str, max_completion_tokens: int, + instructor_mode: str = None, fallback_model: str = None, fallback_api_key: str = None, fallback_endpoint: str = None, @@ -64,15 +65,10 @@ class GeminiAdapter(LLMInterface): self.fallback_api_key = fallback_api_key self.fallback_endpoint = fallback_endpoint - from cognee.infrastructure.llm.config import get_llm_config - - config_instructor_mode = get_llm_config().llm_instructor_mode - instructor_mode = ( - config_instructor_mode if config_instructor_mode else self.default_instructor_mode - ) + self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode self.aclient = instructor.from_litellm( - litellm.acompletion, mode=instructor.Mode(instructor_mode) + litellm.acompletion, mode=instructor.Mode(self.instructor_mode) ) @retry( diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py index ea32dced1..9d7f25fc5 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py @@ -50,6 +50,7 @@ class GenericAPIAdapter(LLMInterface): model: str, name: str, max_completion_tokens: int, + instructor_mode: str = None, fallback_model: str = None, fallback_api_key: str = None, fallback_endpoint: str = None, @@ -64,15 +65,10 @@ class GenericAPIAdapter(LLMInterface): self.fallback_api_key = fallback_api_key self.fallback_endpoint = fallback_endpoint - from cognee.infrastructure.llm.config import get_llm_config - - config_instructor_mode = get_llm_config().llm_instructor_mode - instructor_mode = ( - config_instructor_mode if config_instructor_mode else self.default_instructor_mode - ) + self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode self.aclient = instructor.from_litellm( - litellm.acompletion, mode=instructor.Mode(instructor_mode) + litellm.acompletion, mode=instructor.Mode(self.instructor_mode) ) @retry( diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py index c7dcecc56..537eda1b2 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py @@ -81,6 +81,7 @@ def get_llm_client(raise_api_key_error: bool = True): model=llm_config.llm_model, transcription_model=llm_config.transcription_model, max_completion_tokens=max_completion_tokens, + instructor_mode=llm_config.llm_instructor_mode, streaming=llm_config.llm_streaming, fallback_api_key=llm_config.fallback_api_key, fallback_endpoint=llm_config.fallback_endpoint, @@ -101,6 +102,7 @@ def get_llm_client(raise_api_key_error: bool = True): llm_config.llm_model, "Ollama", max_completion_tokens=max_completion_tokens, + instructor_mode=llm_config.llm_instructor_mode, ) elif provider == LLMProvider.ANTHROPIC: @@ -109,7 +111,9 @@ def get_llm_client(raise_api_key_error: bool = True): ) return AnthropicAdapter( - max_completion_tokens=max_completion_tokens, model=llm_config.llm_model + max_completion_tokens=max_completion_tokens, + model=llm_config.llm_model, + instructor_mode=llm_config.llm_instructor_mode, ) elif provider == LLMProvider.CUSTOM: @@ -126,6 +130,7 @@ def get_llm_client(raise_api_key_error: bool = True): llm_config.llm_model, "Custom", max_completion_tokens=max_completion_tokens, + instructor_mode=llm_config.llm_instructor_mode, fallback_api_key=llm_config.fallback_api_key, fallback_endpoint=llm_config.fallback_endpoint, fallback_model=llm_config.fallback_model, @@ -145,6 +150,7 @@ def get_llm_client(raise_api_key_error: bool = True): max_completion_tokens=max_completion_tokens, endpoint=llm_config.llm_endpoint, api_version=llm_config.llm_api_version, + instructor_mode=llm_config.llm_instructor_mode, ) elif provider == LLMProvider.MISTRAL: @@ -160,6 +166,7 @@ def get_llm_client(raise_api_key_error: bool = True): model=llm_config.llm_model, max_completion_tokens=max_completion_tokens, endpoint=llm_config.llm_endpoint, + instructor_mode=llm_config.llm_instructor_mode, ) elif provider == LLMProvider.MISTRAL: diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py index bed88ce3c..355cdae0b 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py @@ -39,20 +39,24 @@ class MistralAdapter(LLMInterface): max_completion_tokens: int default_instructor_mode = "mistral_tools" - def __init__(self, api_key: str, model: str, max_completion_tokens: int, endpoint: str = None): + def __init__( + self, + api_key: str, + model: str, + max_completion_tokens: int, + endpoint: str = None, + instructor_mode: str = None, + ): from mistralai import Mistral self.model = model self.max_completion_tokens = max_completion_tokens - config_instructor_mode = get_llm_config().llm_instructor_mode - instructor_mode = ( - config_instructor_mode if config_instructor_mode else self.default_instructor_mode - ) + self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode self.aclient = instructor.from_litellm( litellm.acompletion, - mode=instructor.Mode(instructor_mode), + mode=instructor.Mode(self.instructor_mode), api_key=get_llm_config().llm_api_key, ) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py index aa24a7911..aabd19867 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py @@ -45,7 +45,13 @@ class OllamaAPIAdapter(LLMInterface): default_instructor_mode = "json_mode" def __init__( - self, endpoint: str, api_key: str, model: str, name: str, max_completion_tokens: int + self, + endpoint: str, + api_key: str, + model: str, + name: str, + max_completion_tokens: int, + instructor_mode: str = None, ): self.name = name self.model = model @@ -53,16 +59,11 @@ class OllamaAPIAdapter(LLMInterface): self.endpoint = endpoint self.max_completion_tokens = max_completion_tokens - from cognee.infrastructure.llm.config import get_llm_config - - config_instructor_mode = get_llm_config().llm_instructor_mode - instructor_mode = ( - config_instructor_mode if config_instructor_mode else self.default_instructor_mode - ) + self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode self.aclient = instructor.from_openai( OpenAI(base_url=self.endpoint, api_key=self.api_key), - mode=instructor.Mode(instructor_mode), + mode=instructor.Mode(self.instructor_mode), ) @retry( diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py index 69367602d..778c8eec7 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py @@ -70,25 +70,21 @@ class OpenAIAdapter(LLMInterface): model: str, transcription_model: str, max_completion_tokens: int, + instructor_mode: str = None, streaming: bool = False, fallback_model: str = None, fallback_api_key: str = None, fallback_endpoint: str = None, ): - from cognee.infrastructure.llm.config import get_llm_config - - config_instructor_mode = get_llm_config().llm_instructor_mode - instructor_mode = ( - config_instructor_mode if config_instructor_mode else self.default_instructor_mode - ) + self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode # TODO: With gpt5 series models OpenAI expects JSON_SCHEMA as a mode for structured outputs. # Make sure all new gpt models will work with this mode as well. if "gpt-5" in model: self.aclient = instructor.from_litellm( - litellm.acompletion, mode=instructor.Mode(instructor_mode) + litellm.acompletion, mode=instructor.Mode(self.instructor_mode) ) self.client = instructor.from_litellm( - litellm.completion, mode=instructor.Mode(instructor_mode) + litellm.completion, mode=instructor.Mode(self.instructor_mode) ) else: self.aclient = instructor.from_litellm(litellm.acompletion) From 844b8d635a7646750dc63dd4be13de07f2996940 Mon Sep 17 00:00:00 2001 From: Fahad Shoaib Date: Fri, 14 Nov 2025 22:13:00 +0500 Subject: [PATCH 147/284] feat: enhance ontology handling to support multiple uploads and retrievals --- .../v1/cognify/routers/get_cognify_router.py | 41 +++---- cognee/api/v1/ontologies/ontologies.py | 108 +++++++++++++++--- .../ontologies/routers/get_ontology_router.py | 51 ++++++--- .../rdf_xml/RDFLibOntologyResolver.py | 93 +++++++++------ 4 files changed, 202 insertions(+), 91 deletions(-) diff --git a/cognee/api/v1/cognify/routers/get_cognify_router.py b/cognee/api/v1/cognify/routers/get_cognify_router.py index 252ffe7bf..4f1497e3c 100644 --- a/cognee/api/v1/cognify/routers/get_cognify_router.py +++ b/cognee/api/v1/cognify/routers/get_cognify_router.py @@ -41,8 +41,8 @@ class CognifyPayloadDTO(InDTO): custom_prompt: Optional[str] = Field( default="", description="Custom prompt for entity extraction and graph generation" ) - ontology_key: Optional[str] = Field( - default=None, description="Reference to previously uploaded ontology" + ontology_key: Optional[List[str]] = Field( + default=None, description="Reference to one or more previously uploaded ontologies" ) @@ -71,7 +71,7 @@ def get_cognify_router() -> APIRouter: - **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted). - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking). - **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction. - - **ontology_key** (Optional[str]): Reference to a previously uploaded ontology file to use for knowledge graph construction. + - **ontology_key** (Optional[List[str]]): Reference to one or more previously uploaded ontology files to use for knowledge graph construction. ## Response - **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status @@ -87,7 +87,7 @@ def get_cognify_router() -> APIRouter: "datasets": ["research_papers", "documentation"], "run_in_background": false, "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections.", - "ontology_key": "medical_ontology_v1" + "ontology_key": ["medical_ontology_v1"] } ``` @@ -121,29 +121,22 @@ def get_cognify_router() -> APIRouter: if payload.ontology_key: ontology_service = OntologyService() - try: - ontology_content = ontology_service.get_ontology_content( - payload.ontology_key, user - ) + ontology_contents = ontology_service.get_ontology_contents( + payload.ontology_key, user + ) - from cognee.modules.ontology.ontology_config import Config - from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import ( - RDFLibOntologyResolver, - ) - from io import StringIO + from cognee.modules.ontology.ontology_config import Config + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import ( + RDFLibOntologyResolver, + ) + from io import StringIO - ontology_stream = StringIO(ontology_content) - config_to_use: Config = { - "ontology_config": { - "ontology_resolver": RDFLibOntologyResolver( - ontology_file=ontology_stream - ) - } + ontology_streams = [StringIO(content) for content in ontology_contents] + config_to_use: Config = { + "ontology_config": { + "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_streams) } - except ValueError as e: - return JSONResponse( - status_code=400, content={"error": f"Ontology error: {str(e)}"} - ) + } cognify_run = await cognee_cognify( datasets, diff --git a/cognee/api/v1/ontologies/ontologies.py b/cognee/api/v1/ontologies/ontologies.py index 6bfb7658e..130b4a862 100644 --- a/cognee/api/v1/ontologies/ontologies.py +++ b/cognee/api/v1/ontologies/ontologies.py @@ -3,7 +3,7 @@ import json import tempfile from pathlib import Path from datetime import datetime, timezone -from typing import Optional +from typing import Optional, List from dataclasses import dataclass @@ -47,28 +47,23 @@ class OntologyService: async def upload_ontology( self, ontology_key: str, file, user, description: Optional[str] = None ) -> OntologyMetadata: - # Validate file format if not file.filename.lower().endswith(".owl"): raise ValueError("File must be in .owl format") user_dir = self._get_user_dir(str(user.id)) metadata = self._load_metadata(user_dir) - # Check for duplicate key if ontology_key in metadata: raise ValueError(f"Ontology key '{ontology_key}' already exists") - # Read file content content = await file.read() - if len(content) > 10 * 1024 * 1024: # 10MB limit + if len(content) > 10 * 1024 * 1024: raise ValueError("File size exceeds 10MB limit") - # Save file file_path = user_dir / f"{ontology_key}.owl" with open(file_path, "wb") as f: f.write(content) - # Update metadata ontology_metadata = { "filename": file.filename, "size_bytes": len(content), @@ -86,19 +81,102 @@ class OntologyService: description=description, ) - def get_ontology_content(self, ontology_key: str, user) -> str: + async def upload_ontologies( + self, ontology_key: List[str], files: List, user, descriptions: Optional[List[str]] = None + ) -> List[OntologyMetadata]: + """ + Upload ontology files with their respective keys. + + Args: + ontology_key: List of unique keys for each ontology + files: List of UploadFile objects (same length as keys) + user: Authenticated user + descriptions: Optional list of descriptions for each file + + Returns: + List of OntologyMetadata objects for uploaded files + + Raises: + ValueError: If keys duplicate, file format invalid, or array lengths don't match + """ + if len(ontology_key) != len(files): + raise ValueError("Number of keys must match number of files") + + if len(set(ontology_key)) != len(ontology_key): + raise ValueError("Duplicate ontology keys not allowed") + + if descriptions and len(descriptions) != len(files): + raise ValueError("Number of descriptions must match number of files") + + results = [] user_dir = self._get_user_dir(str(user.id)) metadata = self._load_metadata(user_dir) - if ontology_key not in metadata: - raise ValueError(f"Ontology key '{ontology_key}' not found") + for i, (key, file) in enumerate(zip(ontology_key, files)): + if key in metadata: + raise ValueError(f"Ontology key '{key}' already exists") - file_path = user_dir / f"{ontology_key}.owl" - if not file_path.exists(): - raise ValueError(f"Ontology file for key '{ontology_key}' not found") + if not file.filename.lower().endswith(".owl"): + raise ValueError(f"File '{file.filename}' must be in .owl format") - with open(file_path, "r", encoding="utf-8") as f: - return f.read() + content = await file.read() + if len(content) > 10 * 1024 * 1024: + raise ValueError(f"File '{file.filename}' exceeds 10MB limit") + + file_path = user_dir / f"{key}.owl" + with open(file_path, "wb") as f: + f.write(content) + + ontology_metadata = { + "filename": file.filename, + "size_bytes": len(content), + "uploaded_at": datetime.now(timezone.utc).isoformat(), + "description": descriptions[i] if descriptions else None, + } + metadata[key] = ontology_metadata + + results.append( + OntologyMetadata( + ontology_key=key, + filename=file.filename, + size_bytes=len(content), + uploaded_at=ontology_metadata["uploaded_at"], + description=descriptions[i] if descriptions else None, + ) + ) + + self._save_metadata(user_dir, metadata) + return results + + def get_ontology_contents(self, ontology_key: List[str], user) -> List[str]: + """ + Retrieve ontology content for one or more keys. + + Args: + ontology_key: List of ontology keys to retrieve (can contain single item) + user: Authenticated user + + Returns: + List of ontology content strings + + Raises: + ValueError: If any ontology key not found + """ + user_dir = self._get_user_dir(str(user.id)) + metadata = self._load_metadata(user_dir) + + contents = [] + for key in ontology_key: + if key not in metadata: + raise ValueError(f"Ontology key '{key}' not found") + + file_path = user_dir / f"{key}.owl" + if not file_path.exists(): + raise ValueError(f"Ontology file for key '{key}' not found") + + with open(file_path, "r", encoding="utf-8") as f: + contents.append(f.read()) + return contents def list_ontologies(self, user) -> dict: user_dir = self._get_user_dir(str(user.id)) diff --git a/cognee/api/v1/ontologies/routers/get_ontology_router.py b/cognee/api/v1/ontologies/routers/get_ontology_router.py index f5c51ba21..ee31c683f 100644 --- a/cognee/api/v1/ontologies/routers/get_ontology_router.py +++ b/cognee/api/v1/ontologies/routers/get_ontology_router.py @@ -1,6 +1,6 @@ from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException from fastapi.responses import JSONResponse -from typing import Optional +from typing import Optional, List from cognee.modules.users.models import User from cognee.modules.users.methods import get_authenticated_user @@ -16,23 +16,27 @@ def get_ontology_router() -> APIRouter: @router.post("", response_model=dict) async def upload_ontology( ontology_key: str = Form(...), - ontology_file: UploadFile = File(...), - description: Optional[str] = Form(None), + ontology_file: List[UploadFile] = File(...), + descriptions: Optional[str] = Form(None), user: User = Depends(get_authenticated_user), ): """ - Upload an ontology file with a named key for later use in cognify operations. + Upload ontology files with their respective keys for later use in cognify operations. + + Supports both single and multiple file uploads: + - Single file: ontology_key=["key"], ontology_file=[file] + - Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2] ## Request Parameters - - **ontology_key** (str): User-defined identifier for the ontology - - **ontology_file** (UploadFile): OWL format ontology file - - **description** (Optional[str]): Optional description of the ontology + - **ontology_key** (str): JSON array string of user-defined identifiers for the ontologies + - **ontology_file** (List[UploadFile]): OWL format ontology files + - **descriptions** (Optional[str]): JSON array string of optional descriptions ## Response - Returns metadata about the uploaded ontology including key, filename, size, and upload timestamp. + Returns metadata about uploaded ontologies including keys, filenames, sizes, and upload timestamps. ## Error Codes - - **400 Bad Request**: Invalid file format, duplicate key, file size exceeded + - **400 Bad Request**: Invalid file format, duplicate keys, array length mismatches, file size exceeded - **500 Internal Server Error**: File system or processing errors """ send_telemetry( @@ -45,16 +49,31 @@ def get_ontology_router() -> APIRouter: ) try: - result = await ontology_service.upload_ontology( - ontology_key, ontology_file, user, description + import json + + ontology_keys = json.loads(ontology_key) + description_list = json.loads(descriptions) if descriptions else None + + if not isinstance(ontology_keys, list): + raise ValueError("ontology_key must be a JSON array") + + results = await ontology_service.upload_ontologies( + ontology_keys, ontology_file, user, description_list ) + return { - "ontology_key": result.ontology_key, - "filename": result.filename, - "size_bytes": result.size_bytes, - "uploaded_at": result.uploaded_at, + "uploaded_ontologies": [ + { + "ontology_key": result.ontology_key, + "filename": result.filename, + "size_bytes": result.size_bytes, + "uploaded_at": result.uploaded_at, + "description": result.description, + } + for result in results + ] } - except ValueError as e: + except (json.JSONDecodeError, ValueError) as e: return JSONResponse(status_code=400, content={"error": str(e)}) except Exception as e: return JSONResponse(status_code=500, content={"error": str(e)}) diff --git a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py index 4acc8861b..34d7a946a 100644 --- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py @@ -26,7 +26,7 @@ class RDFLibOntologyResolver(BaseOntologyResolver): def __init__( self, - ontology_file: Optional[Union[str, List[str], IO]] = None, + ontology_file: Optional[Union[str, List[str], IO, List[IO]]] = None, matching_strategy: Optional[MatchingStrategy] = None, ) -> None: super().__init__(matching_strategy) @@ -34,47 +34,68 @@ class RDFLibOntologyResolver(BaseOntologyResolver): try: self.graph = None if ontology_file is not None: + files_to_load = [] + file_objects = [] + if hasattr(ontology_file, "read"): - self.graph = Graph() - content = ontology_file.read() - self.graph.parse(data=content, format="xml") - logger.info("Ontology loaded successfully from file object") - else: - files_to_load = [] - if isinstance(ontology_file, str): - files_to_load = [ontology_file] - elif isinstance(ontology_file, list): + file_objects = [ontology_file] + elif isinstance(ontology_file, str): + files_to_load = [ontology_file] + elif isinstance(ontology_file, list): + if all(hasattr(item, "read") for item in ontology_file): + file_objects = ontology_file + else: files_to_load = ontology_file - else: - raise ValueError( - f"ontology_file must be a string, list of strings, file-like object, or None. Got: {type(ontology_file)}" - ) + else: + raise ValueError( + f"ontology_file must be a string, list of strings, file-like object, list of file-like objects, or None. Got: {type(ontology_file)}" + ) - if files_to_load: - self.graph = Graph() - loaded_files = [] - for file_path in files_to_load: - if os.path.exists(file_path): - self.graph.parse(file_path) - loaded_files.append(file_path) - logger.info("Ontology loaded successfully from file: %s", file_path) - else: - logger.warning( - "Ontology file '%s' not found. Skipping this file.", - file_path, - ) + if file_objects: + self.graph = Graph() + loaded_objects = [] + for file_obj in file_objects: + try: + content = file_obj.read() + self.graph.parse(data=content, format="xml") + loaded_objects.append(file_obj) + logger.info("Ontology loaded successfully from file object") + except Exception as e: + logger.warning("Failed to parse ontology file object: %s", str(e)) - if not loaded_files: - logger.info( - "No valid ontology files found. No owl ontology will be attached to the graph." - ) - self.graph = None - else: - logger.info("Total ontology files loaded: %d", len(loaded_files)) - else: + if not loaded_objects: logger.info( - "No ontology file provided. No owl ontology will be attached to the graph." + "No valid ontology file objects found. No owl ontology will be attached to the graph." ) + self.graph = None + else: + logger.info("Total ontology file objects loaded: %d", len(loaded_objects)) + + elif files_to_load: + self.graph = Graph() + loaded_files = [] + for file_path in files_to_load: + if os.path.exists(file_path): + self.graph.parse(file_path) + loaded_files.append(file_path) + logger.info("Ontology loaded successfully from file: %s", file_path) + else: + logger.warning( + "Ontology file '%s' not found. Skipping this file.", + file_path, + ) + + if not loaded_files: + logger.info( + "No valid ontology files found. No owl ontology will be attached to the graph." + ) + self.graph = None + else: + logger.info("Total ontology files loaded: %d", len(loaded_files)) + else: + logger.info( + "No ontology file provided. No owl ontology will be attached to the graph." + ) else: logger.info( "No ontology file provided. No owl ontology will be attached to the graph." From 01f1c099cc972e2222f1174c515e1baf87fbb9d6 Mon Sep 17 00:00:00 2001 From: Fahad Shoaib Date: Fri, 14 Nov 2025 22:20:54 +0500 Subject: [PATCH 148/284] test: enhance server start test with ontology upload verification - Extend test_cognee_server_start to upload ontology and verify integration - Move test_ontology_endpoint from tests/ to tests/unit/api/ --- cognee/tests/test_cognee_server_start.py | 45 ++++++++++++++++++- .../{ => unit/api}/test_ontology_endpoint.py | 0 2 files changed, 44 insertions(+), 1 deletion(-) rename cognee/tests/{ => unit/api}/test_ontology_endpoint.py (100%) diff --git a/cognee/tests/test_cognee_server_start.py b/cognee/tests/test_cognee_server_start.py index ab68a8ef1..d6aa55a98 100644 --- a/cognee/tests/test_cognee_server_start.py +++ b/cognee/tests/test_cognee_server_start.py @@ -7,6 +7,7 @@ import requests from pathlib import Path import sys import uuid +import json class TestCogneeServerStart(unittest.TestCase): @@ -90,12 +91,31 @@ class TestCogneeServerStart(unittest.TestCase): ) } - payload = {"datasets": [dataset_name]} + ontology_key = f"test_ontology_{uuid.uuid4().hex[:8]}" + payload = {"datasets": [dataset_name], "ontology_key": [ontology_key]} add_response = requests.post(url, headers=headers, data=form_data, files=file, timeout=50) if add_response.status_code not in [200, 201]: add_response.raise_for_status() + ontology_content = b""" + + + + + """ + + ontology_response = requests.post( + "http://127.0.0.1:8000/api/v1/ontologies", + files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))], + data={ + "ontology_key": json.dumps([ontology_key]), + "description": json.dumps(["Test ontology"]), + }, + ) + self.assertEqual(ontology_response.status_code, 200) + # Cognify request url = "http://127.0.0.1:8000/api/v1/cognify" headers = { @@ -107,6 +127,29 @@ class TestCogneeServerStart(unittest.TestCase): if cognify_response.status_code not in [200, 201]: cognify_response.raise_for_status() + datasets_response = requests.get("http://127.0.0.1:8000/api/v1/datasets", headers=headers) + + datasets = datasets_response.json() + dataset_id = None + for dataset in datasets: + if dataset["name"] == dataset_name: + dataset_id = dataset["id"] + break + + graph_response = requests.get( + f"http://127.0.0.1:8000/api/v1/datasets/{dataset_id}/graph", headers=headers + ) + self.assertEqual(graph_response.status_code, 200) + + graph_data = graph_response.json() + ontology_nodes = [ + node for node in graph_data.get("nodes") if node.get("properties").get("ontology_valid") + ] + + self.assertGreater( + len(ontology_nodes), 0, "No ontology nodes found - ontology was not integrated" + ) + # TODO: Add test to verify cognify pipeline is complete before testing search # Search request diff --git a/cognee/tests/test_ontology_endpoint.py b/cognee/tests/unit/api/test_ontology_endpoint.py similarity index 100% rename from cognee/tests/test_ontology_endpoint.py rename to cognee/tests/unit/api/test_ontology_endpoint.py From 1ded09d0f995fa57c9eaa3feafbe64089525a92f Mon Sep 17 00:00:00 2001 From: Fahad Shoaib Date: Sat, 15 Nov 2025 00:06:55 +0500 Subject: [PATCH 149/284] fix: fixed test ontology file content. Added tests to support multiple files and improved validation. --- cognee/tests/test_cognee_server_start.py | 13 +- .../tests/unit/api/test_ontology_endpoint.py | 189 +++++++++++++++++- 2 files changed, 185 insertions(+), 17 deletions(-) diff --git a/cognee/tests/test_cognee_server_start.py b/cognee/tests/test_cognee_server_start.py index d6aa55a98..b266fc7bf 100644 --- a/cognee/tests/test_cognee_server_start.py +++ b/cognee/tests/test_cognee_server_start.py @@ -98,13 +98,12 @@ class TestCogneeServerStart(unittest.TestCase): if add_response.status_code not in [200, 201]: add_response.raise_for_status() - ontology_content = b""" - - - - - """ + ontology_content = b""" + + + + + """ ontology_response = requests.post( "http://127.0.0.1:8000/api/v1/ontologies", diff --git a/cognee/tests/unit/api/test_ontology_endpoint.py b/cognee/tests/unit/api/test_ontology_endpoint.py index b5cedfafe..c04959998 100644 --- a/cognee/tests/unit/api/test_ontology_endpoint.py +++ b/cognee/tests/unit/api/test_ontology_endpoint.py @@ -32,6 +32,8 @@ def mock_default_user(): @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_upload_ontology_success(mock_get_default_user, client, mock_default_user): """Test successful ontology upload""" + import json + mock_get_default_user.return_value = mock_default_user ontology_content = ( b"" @@ -40,14 +42,14 @@ def test_upload_ontology_success(mock_get_default_user, client, mock_default_use response = client.post( "/api/v1/ontologies", - files={"ontology_file": ("test.owl", ontology_content)}, - data={"ontology_key": unique_key, "description": "Test"}, + files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))], + data={"ontology_key": json.dumps([unique_key]), "description": json.dumps(["Test"])}, ) assert response.status_code == 200 data = response.json() - assert data["ontology_key"] == unique_key - assert "uploaded_at" in data + assert data["uploaded_ontologies"][0]["ontology_key"] == unique_key + assert "uploaded_at" in data["uploaded_ontologies"][0] @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) @@ -66,30 +68,197 @@ def test_upload_ontology_invalid_file(mock_get_default_user, client, mock_defaul @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_upload_ontology_missing_data(mock_get_default_user, client, mock_default_user): """Test 400 response for missing file or key""" + import json + mock_get_default_user.return_value = mock_default_user # Missing file - response = client.post("/api/v1/ontologies", data={"ontology_key": "test"}) + response = client.post("/api/v1/ontologies", data={"ontology_key": json.dumps(["test"])}) assert response.status_code == 400 # Missing key - response = client.post("/api/v1/ontologies", files={"ontology_file": ("test.owl", b"xml")}) + response = client.post( + "/api/v1/ontologies", files=[("ontology_file", ("test.owl", b"xml", "application/xml"))] + ) assert response.status_code == 400 @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) def test_upload_ontology_unauthorized(mock_get_default_user, client, mock_default_user): """Test behavior when default user is provided (no explicit authentication)""" + import json + unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}" mock_get_default_user.return_value = mock_default_user response = client.post( "/api/v1/ontologies", - files={"ontology_file": ("test.owl", b"")}, - data={"ontology_key": unique_key}, + files=[("ontology_file", ("test.owl", b"", "application/xml"))], + data={"ontology_key": json.dumps([unique_key])}, ) # The current system provides a default user when no explicit authentication is given # This test verifies the system works with conditional authentication assert response.status_code == 200 data = response.json() - assert data["ontology_key"] == unique_key - assert "uploaded_at" in data + assert data["uploaded_ontologies"][0]["ontology_key"] == unique_key + assert "uploaded_at" in data["uploaded_ontologies"][0] + + +@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) +async def test_upload_multiple_ontologies(mock_get_default_user, client, mock_default_user): + """Test uploading multiple ontology files in single request""" + import io + + # Create mock files + file1_content = b"" + file2_content = b"" + + files = [ + ("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml")), + ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml")), + ] + data = { + "ontology_key": '["vehicles", "manufacturers"]', + "descriptions": '["Base vehicles", "Car manufacturers"]', + } + + response = client.post("/api/v1/ontologies", files=files, data=data) + + assert response.status_code == 200 + result = response.json() + assert "uploaded_ontologies" in result + assert len(result["uploaded_ontologies"]) == 2 + assert result["uploaded_ontologies"][0]["ontology_key"] == "vehicles" + assert result["uploaded_ontologies"][1]["ontology_key"] == "manufacturers" + + +@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) +async def test_upload_endpoint_accepts_arrays(mock_get_default_user, client, mock_default_user): + """Test that upload endpoint accepts array parameters""" + import io + import json + + file_content = b"" + + files = [("ontology_file", ("single.owl", io.BytesIO(file_content), "application/xml"))] + data = { + "ontology_key": json.dumps(["single_key"]), + "descriptions": json.dumps(["Single ontology"]), + } + + response = client.post("/api/v1/ontologies", files=files, data=data) + + assert response.status_code == 200 + result = response.json() + assert result["uploaded_ontologies"][0]["ontology_key"] == "single_key" + + +@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) +async def test_cognify_with_multiple_ontologies(mock_get_default_user, client, mock_default_user): + """Test cognify endpoint accepts multiple ontology keys""" + payload = { + "datasets": ["test_dataset"], + "ontology_key": ["ontology1", "ontology2"], # Array instead of string + "run_in_background": False, + } + + response = client.post("/api/v1/cognify", json=payload) + + # Should not fail due to ontology_key type + assert response.status_code in [200, 400, 409] # May fail for other reasons, not type + + +@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) +async def test_complete_multifile_workflow(mock_get_default_user, client, mock_default_user): + """Test complete workflow: upload multiple ontologies → cognify with multiple keys""" + import io + import json + + # Step 1: Upload multiple ontologies + file1_content = b""" + + + """ + + file2_content = b""" + + + """ + + files = [ + ("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml")), + ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml")), + ] + data = { + "ontology_key": json.dumps(["vehicles", "manufacturers"]), + "descriptions": json.dumps(["Vehicle ontology", "Manufacturer ontology"]), + } + + upload_response = client.post("/api/v1/ontologies", files=files, data=data) + assert upload_response.status_code == 200 + + # Step 2: Verify ontologies are listed + list_response = client.get("/api/v1/ontologies") + assert list_response.status_code == 200 + ontologies = list_response.json() + assert "vehicles" in ontologies + assert "manufacturers" in ontologies + + # Step 3: Test cognify with multiple ontologies + cognify_payload = { + "datasets": ["test_dataset"], + "ontology_key": ["vehicles", "manufacturers"], + "run_in_background": False, + } + + cognify_response = client.post("/api/v1/cognify", json=cognify_payload) + # Should not fail due to ontology handling (may fail for dataset reasons) + assert cognify_response.status_code != 400 # Not a validation error + + +@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) +async def test_multifile_error_handling(mock_get_default_user, client, mock_default_user): + """Test error handling for invalid multifile uploads""" + import io + import json + + # Test mismatched array lengths + file_content = b"" + files = [("ontology_file", ("test.owl", io.BytesIO(file_content), "application/xml"))] + data = { + "ontology_key": json.dumps(["key1", "key2"]), # 2 keys, 1 file + "descriptions": json.dumps(["desc1"]), + } + + response = client.post("/api/v1/ontologies", files=files, data=data) + assert response.status_code == 400 + assert "Number of keys must match number of files" in response.json()["error"] + + # Test duplicate keys + files = [ + ("ontology_file", ("test1.owl", io.BytesIO(file_content), "application/xml")), + ("ontology_file", ("test2.owl", io.BytesIO(file_content), "application/xml")), + ] + data = { + "ontology_key": json.dumps(["duplicate", "duplicate"]), + "descriptions": json.dumps(["desc1", "desc2"]), + } + + response = client.post("/api/v1/ontologies", files=files, data=data) + assert response.status_code == 400 + assert "Duplicate ontology keys not allowed" in response.json()["error"] + + +@patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) +async def test_cognify_missing_ontology_key(mock_get_default_user, client, mock_default_user): + """Test cognify with non-existent ontology key""" + payload = { + "datasets": ["test_dataset"], + "ontology_key": ["nonexistent_key"], + "run_in_background": False, + } + + response = client.post("/api/v1/cognify", json=payload) + assert response.status_code == 409 + assert "Ontology key 'nonexistent_key' not found" in response.json()["error"] From 983bfae4fcc9046fd520f0c24733e491679d54cf Mon Sep 17 00:00:00 2001 From: EricXiao Date: Mon, 17 Nov 2025 14:41:55 +0800 Subject: [PATCH 150/284] chore: remove unnecessary csv file type Signed-off-by: EricXiao --- .../files/utils/is_csv_content.py | 181 ------------------ 1 file changed, 181 deletions(-) delete mode 100644 cognee/infrastructure/files/utils/is_csv_content.py diff --git a/cognee/infrastructure/files/utils/is_csv_content.py b/cognee/infrastructure/files/utils/is_csv_content.py deleted file mode 100644 index 07b7ea69b..000000000 --- a/cognee/infrastructure/files/utils/is_csv_content.py +++ /dev/null @@ -1,181 +0,0 @@ -import csv -from collections import Counter - - -def is_csv_content(content): - """ - Heuristically determine whether a bytes-like object is CSV text. - - Strategy (fail-fast and cheap to expensive): - 1) Decode: Try a small ordered list of common encodings with strict errors. - 2) Line sampling: require >= 2 non-empty lines; sample up to 50 lines. - 3) Delimiter detection: - - Prefer csv.Sniffer() with common delimiters. - - Fallback to a lightweight consistency heuristic. - 4) Lightweight parse check: - - Parse a few lines with the delimiter. - - Ensure at least 2 valid rows and relatively stable column counts. - - Returns: - bool: True if the buffer looks like CSV; False otherwise. - """ - try: - encoding_list = [ - "utf-8", - "utf-8-sig", - "utf-32-le", - "utf-32-be", - "utf-16-le", - "utf-16-be", - "gb18030", - "shift_jis", - "cp949", - "cp1252", - "iso-8859-1", - ] - - # Try to decode strictly—if decoding fails for all encodings, it's not text/CSV. - text = None - for enc in encoding_list: - try: - text = content.decode(enc, errors="strict") - break - except UnicodeDecodeError: - continue - if text is None: - return False - - # Reject empty/whitespace-only payloads. - stripped = text.strip() - if not stripped: - return False - - # Split into logical lines and drop empty ones. Require at least two lines. - lines = [ln for ln in text.splitlines() if ln.strip()] - if len(lines) < 2: - return False - - # Take a small sample to keep sniffing cheap and predictable. - sample_lines = lines[:50] - - # Detect delimiter using csv.Sniffer first; if that fails, use our heuristic. - delimiter = _sniff_delimiter(sample_lines) or _heuristic_delimiter(sample_lines) - if not delimiter: - return False - - # Finally, do a lightweight parse sanity check with the chosen delimiter. - return _lightweight_parse_check(sample_lines, delimiter) - except Exception: - return False - - -def _sniff_delimiter(lines): - """ - Try Python's built-in csv.Sniffer on a sample. - - Args: - lines (list[str]): Sample lines (already decoded). - - Returns: - str | None: The detected delimiter if sniffing succeeds; otherwise None. - """ - # Join up to 50 lines to form the sample string Sniffer will inspect. - sample = "\n".join(lines[:50]) - try: - dialect = csv.Sniffer().sniff(sample, delimiters=",\t;|") - return dialect.delimiter - except Exception: - # Sniffer is known to be brittle on small/dirty samples—silently fallback. - return None - - -def _heuristic_delimiter(lines): - """ - Fallback delimiter detection based on count consistency per line. - - Heuristic: - - For each candidate delimiter, count occurrences per line. - - Keep only lines with count > 0 (line must contain the delimiter). - - Require at least half of lines to contain the delimiter (min 2). - - Compute the mode (most common count). If the proportion of lines that - exhibit the modal count is >= 80%, accept that delimiter. - - Args: - lines (list[str]): Sample lines. - - Returns: - str | None: Best delimiter if one meets the consistency threshold; else None. - """ - candidates = [",", "\t", ";", "|"] - best = None - best_score = 0.0 - - for d in candidates: - # Count how many times the delimiter appears in each line. - counts = [ln.count(d) for ln in lines] - # Consider only lines that actually contain the delimiter at least once. - nonzero = [c for c in counts if c > 0] - - # Require that more than half of lines (and at least 2) contain the delimiter. - if len(nonzero) < max(2, int(0.5 * len(lines))): - continue - - # Find the modal count and its frequency. - cnt = Counter(nonzero) - pairs = cnt.most_common(1) - if not pairs: - continue - - mode, mode_freq = pairs[0] - # Consistency ratio: lines with the modal count / total lines in the sample. - consistency = mode_freq / len(lines) - # Accept if consistent enough and better than any previous candidate. - if mode >= 1 and consistency >= 0.80 and consistency > best_score: - best = d - best_score = consistency - - return best - - -def _lightweight_parse_check(lines, delimiter): - """ - Parse a few lines with csv.reader and check structural stability. - - Heuristic: - - Parse up to 5 lines with the given delimiter. - - Count column widths per parsed row. - - Require at least 2 non-empty rows. - - Allow at most 1 row whose width deviates by >2 columns from the first row. - - Args: - lines (list[str]): Sample lines (decoded). - delimiter (str): Delimiter chosen by sniffing/heuristics. - - Returns: - bool: True if parsing looks stable; False otherwise. - """ - try: - # csv.reader accepts any iterable of strings; feeding the first 10 lines is fine. - reader = csv.reader(lines[:10], delimiter=delimiter) - widths = [] - valid_rows = 0 - for row in reader: - if not row: - continue - - widths.append(len(row)) - valid_rows += 1 - - # Need at least two meaningful rows to make a judgment. - if valid_rows < 2: - return False - - if widths: - first = widths[0] - # Count rows whose width deviates significantly (>2) from the first row. - unstable = sum(1 for w in widths if abs(w - first) > 2) - # Permit at most 1 unstable row among the parsed sample. - return unstable <= 1 - return False - except Exception: - return False From 0a4b1068a253df8fb4e39a93ee18a73c911ee49e Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Mon, 17 Nov 2025 17:42:22 +0100 Subject: [PATCH 151/284] feat: add kwargs to openai adapter functions --- .../litellm_instructor/llm/openai/adapter.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py index 305b426b8..152f43e33 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py @@ -108,7 +108,7 @@ class OpenAIAdapter(LLMInterface): reraise=True, ) async def acreate_structured_output( - self, text_input: str, system_prompt: str, response_model: Type[BaseModel] + self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs ) -> BaseModel: """ Generate a response from a user query. @@ -149,6 +149,7 @@ class OpenAIAdapter(LLMInterface): api_version=self.api_version, response_model=response_model, max_retries=self.MAX_RETRIES, + **kwargs, ) except ( ContentFilterFinishReasonError, @@ -174,6 +175,7 @@ class OpenAIAdapter(LLMInterface): # api_base=self.fallback_endpoint, response_model=response_model, max_retries=self.MAX_RETRIES, + **kwargs, ) except ( ContentFilterFinishReasonError, @@ -199,7 +201,7 @@ class OpenAIAdapter(LLMInterface): reraise=True, ) def create_structured_output( - self, text_input: str, system_prompt: str, response_model: Type[BaseModel] + self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs ) -> BaseModel: """ Generate a response from a user query. @@ -239,6 +241,7 @@ class OpenAIAdapter(LLMInterface): api_version=self.api_version, response_model=response_model, max_retries=self.MAX_RETRIES, + **kwargs, ) @retry( @@ -248,7 +251,7 @@ class OpenAIAdapter(LLMInterface): before_sleep=before_sleep_log(logger, logging.DEBUG), reraise=True, ) - async def create_transcript(self, input): + async def create_transcript(self, input, **kwargs): """ Generate an audio transcript from a user query. @@ -275,6 +278,7 @@ class OpenAIAdapter(LLMInterface): api_base=self.endpoint, api_version=self.api_version, max_retries=self.MAX_RETRIES, + **kwargs, ) return transcription @@ -286,7 +290,7 @@ class OpenAIAdapter(LLMInterface): before_sleep=before_sleep_log(logger, logging.DEBUG), reraise=True, ) - async def transcribe_image(self, input) -> BaseModel: + async def transcribe_image(self, input, **kwargs) -> BaseModel: """ Generate a transcription of an image from a user query. @@ -331,4 +335,5 @@ class OpenAIAdapter(LLMInterface): api_version=self.api_version, max_completion_tokens=300, max_retries=self.MAX_RETRIES, + **kwargs, ) From fe55071849c06ecd3fe70602d56bb1f2904538a4 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Wed, 19 Nov 2025 15:33:50 +0100 Subject: [PATCH 152/284] Feature/cog 3407 fixing integration test in ci (#1810) ## Description This PR should fix the web crawler integration test issue in our CI ## Type of Change - [x] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- .../test_default_url_crawler.py | 2 +- .../web_url_crawler/test_tavily_crawler.py | 2 +- .../web_url_crawler/test_url_adding_e2e.py | 40 ++++++------------- 3 files changed, 15 insertions(+), 29 deletions(-) diff --git a/cognee/tests/integration/web_url_crawler/test_default_url_crawler.py b/cognee/tests/integration/web_url_crawler/test_default_url_crawler.py index 156cc87a4..f48c1cedc 100644 --- a/cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +++ b/cognee/tests/integration/web_url_crawler/test_default_url_crawler.py @@ -5,7 +5,7 @@ from cognee.tasks.web_scraper import DefaultUrlCrawler @pytest.mark.asyncio async def test_fetch(): crawler = DefaultUrlCrawler() - url = "https://en.wikipedia.org/wiki/Large_language_model" + url = "https://httpbin.org/html" results = await crawler.fetch_urls(url) assert len(results) == 1 assert isinstance(results, dict) diff --git a/cognee/tests/integration/web_url_crawler/test_tavily_crawler.py b/cognee/tests/integration/web_url_crawler/test_tavily_crawler.py index 946ce8378..19ffdc4ea 100644 --- a/cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +++ b/cognee/tests/integration/web_url_crawler/test_tavily_crawler.py @@ -11,7 +11,7 @@ skip_in_ci = pytest.mark.skipif( @skip_in_ci @pytest.mark.asyncio async def test_fetch(): - url = "https://en.wikipedia.org/wiki/Large_language_model" + url = "https://httpbin.org/html" results = await fetch_with_tavily(url) assert isinstance(results, dict) assert len(results) == 1 diff --git a/cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py b/cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py index d91b075aa..cc8ae24d0 100644 --- a/cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +++ b/cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py @@ -14,9 +14,7 @@ async def test_url_saves_as_html_file(): await cognee.prune.prune_system(metadata=True) try: - original_file_path = await save_data_item_to_storage( - "https://en.wikipedia.org/wiki/Large_language_model" - ) + original_file_path = await save_data_item_to_storage("https://httpbin.org/html") file_path = get_data_file_path(original_file_path) assert file_path.endswith(".html") file = Path(file_path) @@ -44,9 +42,7 @@ async def test_saved_html_is_valid(): await cognee.prune.prune_system(metadata=True) try: - original_file_path = await save_data_item_to_storage( - "https://en.wikipedia.org/wiki/Large_language_model" - ) + original_file_path = await save_data_item_to_storage("https://httpbin.org/html") file_path = get_data_file_path(original_file_path) content = Path(file_path).read_text() @@ -72,7 +68,7 @@ async def test_add_url(): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) - await cognee.add("https://en.wikipedia.org/wiki/Large_language_model") + await cognee.add("https://httpbin.org/html") skip_in_ci = pytest.mark.skipif( @@ -88,7 +84,7 @@ async def test_add_url_with_tavily(): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) - await cognee.add("https://en.wikipedia.org/wiki/Large_language_model") + await cognee.add("https://httpbin.org/html") @pytest.mark.asyncio @@ -98,7 +94,7 @@ async def test_add_url_without_incremental_loading(): try: await cognee.add( - "https://en.wikipedia.org/wiki/Large_language_model", + "https://httpbin.org/html", incremental_loading=False, ) except Exception as e: @@ -112,7 +108,7 @@ async def test_add_url_with_incremental_loading(): try: await cognee.add( - "https://en.wikipedia.org/wiki/Large_language_model", + "https://httpbin.org/html", incremental_loading=True, ) except Exception as e: @@ -125,7 +121,7 @@ async def test_add_url_can_define_preferred_loader_as_list_of_str(): await cognee.prune.prune_system(metadata=True) await cognee.add( - "https://en.wikipedia.org/wiki/Large_language_model", + "https://httpbin.org/html", preferred_loaders=["beautiful_soup_loader"], ) @@ -144,7 +140,7 @@ async def test_add_url_with_extraction_rules(): try: await cognee.add( - "https://en.wikipedia.org/wiki/Large_language_model", + "https://httpbin.org/html", preferred_loaders={"beautiful_soup_loader": {"extraction_rules": extraction_rules}}, ) except Exception as e: @@ -163,9 +159,7 @@ async def test_loader_is_none_by_default(): } try: - original_file_path = await save_data_item_to_storage( - "https://en.wikipedia.org/wiki/Large_language_model" - ) + original_file_path = await save_data_item_to_storage("https://httpbin.org/html") file_path = get_data_file_path(original_file_path) assert file_path.endswith(".html") file = Path(file_path) @@ -196,9 +190,7 @@ async def test_beautiful_soup_loader_is_selected_loader_if_preferred_loader_prov } try: - original_file_path = await save_data_item_to_storage( - "https://en.wikipedia.org/wiki/Large_language_model" - ) + original_file_path = await save_data_item_to_storage("https://httpbin.org/html") file_path = get_data_file_path(original_file_path) assert file_path.endswith(".html") file = Path(file_path) @@ -225,9 +217,7 @@ async def test_beautiful_soup_loader_works_with_and_without_arguments(): await cognee.prune.prune_system(metadata=True) try: - original_file_path = await save_data_item_to_storage( - "https://en.wikipedia.org/wiki/Large_language_model" - ) + original_file_path = await save_data_item_to_storage("https://httpbin.org/html") file_path = get_data_file_path(original_file_path) assert file_path.endswith(".html") file = Path(file_path) @@ -263,9 +253,7 @@ async def test_beautiful_soup_loader_successfully_loads_file_if_required_args_pr await cognee.prune.prune_system(metadata=True) try: - original_file_path = await save_data_item_to_storage( - "https://en.wikipedia.org/wiki/Large_language_model" - ) + original_file_path = await save_data_item_to_storage("https://httpbin.org/html") file_path = get_data_file_path(original_file_path) assert file_path.endswith(".html") file = Path(file_path) @@ -302,9 +290,7 @@ async def test_beautiful_soup_loads_file_successfully(): } try: - original_file_path = await save_data_item_to_storage( - "https://en.wikipedia.org/wiki/Large_language_model" - ) + original_file_path = await save_data_item_to_storage("https://httpbin.org/html") file_path = get_data_file_path(original_file_path) assert file_path.endswith(".html") original_file = Path(file_path) From 7360729db12f12f1f8794b819b2c0f402b7e5e00 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 19 Nov 2025 23:04:19 +0100 Subject: [PATCH 153/284] fix: Resolve issue with BAML rate limit handling --- .../extraction/acreate_structured_output.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py index 6ef27e51d..3069993e9 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py @@ -1,7 +1,15 @@ import asyncio from typing import Type -from cognee.shared.logging_utils import get_logger +from pydantic import BaseModel +from tenacity import ( + retry, + stop_after_delay, + wait_exponential_jitter, + retry_if_not_exception_type, + before_sleep_log, +) +from cognee.shared.logging_utils import get_logger from cognee.infrastructure.llm.config import get_llm_config from cognee.infrastructure.llm.structured_output_framework.baml.baml_src.extraction.create_dynamic_baml_type import ( create_dynamic_baml_type, @@ -10,12 +18,17 @@ from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.type TypeBuilder, ) from cognee.infrastructure.llm.structured_output_framework.baml.baml_client import b -from pydantic import BaseModel - +import logging logger = get_logger() +@retry( + stop=stop_after_delay(128), + wait=wait_exponential_jitter(2, 128), + before_sleep=before_sleep_log(logger, logging.DEBUG), + reraise=True, +) async def acreate_structured_output( text_input: str, system_prompt: str, response_model: Type[BaseModel] ): From 30e3971d44816db50d9e83eee39bf6d69b98a328 Mon Sep 17 00:00:00 2001 From: Fahad Shoaib Date: Thu, 20 Nov 2025 15:36:15 +0500 Subject: [PATCH 154/284] fix: add auth headers to ontology upload request and enhance ontology content --- cognee/tests/test_cognee_server_start.py | 53 +++++++++++++++++++++--- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/cognee/tests/test_cognee_server_start.py b/cognee/tests/test_cognee_server_start.py index b266fc7bf..ddffe53a4 100644 --- a/cognee/tests/test_cognee_server_start.py +++ b/cognee/tests/test_cognee_server_start.py @@ -98,15 +98,56 @@ class TestCogneeServerStart(unittest.TestCase): if add_response.status_code not in [200, 201]: add_response.raise_for_status() - ontology_content = b""" - - - - - """ + ontology_content = b""" + + + + + + + + + + + + + + + + A failure caused by physical components. + + + + + An error caused by software logic or configuration. + + + + A human being or individual. + + + + + Programmers + + + + Light Bulb + + + + Hardware Problem + + + """ ontology_response = requests.post( "http://127.0.0.1:8000/api/v1/ontologies", + headers=headers, files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))], data={ "ontology_key": json.dumps([ontology_key]), From 8cfb6c41eeca3b2ad0e34fb4b6043f4b5f6a8c00 Mon Sep 17 00:00:00 2001 From: Fahad Shoaib Date: Thu, 20 Nov 2025 15:54:09 +0500 Subject: [PATCH 155/284] fix: remove async from ontology endpoint test functions --- cognee/tests/unit/api/test_ontology_endpoint.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cognee/tests/unit/api/test_ontology_endpoint.py b/cognee/tests/unit/api/test_ontology_endpoint.py index c04959998..d53c5ab44 100644 --- a/cognee/tests/unit/api/test_ontology_endpoint.py +++ b/cognee/tests/unit/api/test_ontology_endpoint.py @@ -104,7 +104,7 @@ def test_upload_ontology_unauthorized(mock_get_default_user, client, mock_defaul @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -async def test_upload_multiple_ontologies(mock_get_default_user, client, mock_default_user): +def test_upload_multiple_ontologies(mock_get_default_user, client, mock_default_user): """Test uploading multiple ontology files in single request""" import io @@ -132,7 +132,7 @@ async def test_upload_multiple_ontologies(mock_get_default_user, client, mock_de @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -async def test_upload_endpoint_accepts_arrays(mock_get_default_user, client, mock_default_user): +def test_upload_endpoint_accepts_arrays(mock_get_default_user, client, mock_default_user): """Test that upload endpoint accepts array parameters""" import io import json @@ -153,7 +153,7 @@ async def test_upload_endpoint_accepts_arrays(mock_get_default_user, client, moc @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -async def test_cognify_with_multiple_ontologies(mock_get_default_user, client, mock_default_user): +def test_cognify_with_multiple_ontologies(mock_get_default_user, client, mock_default_user): """Test cognify endpoint accepts multiple ontology keys""" payload = { "datasets": ["test_dataset"], @@ -168,7 +168,7 @@ async def test_cognify_with_multiple_ontologies(mock_get_default_user, client, m @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -async def test_complete_multifile_workflow(mock_get_default_user, client, mock_default_user): +def test_complete_multifile_workflow(mock_get_default_user, client, mock_default_user): """Test complete workflow: upload multiple ontologies → cognify with multiple keys""" import io import json @@ -218,7 +218,7 @@ async def test_complete_multifile_workflow(mock_get_default_user, client, mock_d @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -async def test_multifile_error_handling(mock_get_default_user, client, mock_default_user): +def test_multifile_error_handling(mock_get_default_user, client, mock_default_user): """Test error handling for invalid multifile uploads""" import io import json @@ -251,7 +251,7 @@ async def test_multifile_error_handling(mock_get_default_user, client, mock_defa @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock) -async def test_cognify_missing_ontology_key(mock_get_default_user, client, mock_default_user): +def test_cognify_missing_ontology_key(mock_get_default_user, client, mock_default_user): """Test cognify with non-existent ontology key""" payload = { "datasets": ["test_dataset"], From 4e880eca8422872b26a568b18b9f1339ce362c18 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Thu, 20 Nov 2025 15:47:22 +0100 Subject: [PATCH 156/284] chore: update env template --- .env.template | 1 + 1 file changed, 1 insertion(+) diff --git a/.env.template b/.env.template index ae2cb1338..376233b1f 100644 --- a/.env.template +++ b/.env.template @@ -21,6 +21,7 @@ LLM_PROVIDER="openai" LLM_ENDPOINT="" LLM_API_VERSION="" LLM_MAX_TOKENS="16384" +LLM_INSTRUCTOR_MODE="json_schema_mode" # this mode is used for gpt-5 models EMBEDDING_PROVIDER="openai" EMBEDDING_MODEL="openai/text-embedding-3-large" From 2176ec16b8e440087f96410fed979528e8159ca2 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 20 Nov 2025 17:03:36 +0100 Subject: [PATCH 157/284] chore: changes url for crawler tests (#1816) Updates crawler test url to avoid blocking and unavailable sites in CI. ## Description ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- .../test_default_url_crawler.py | 2 +- .../web_url_crawler/test_tavily_crawler.py | 2 +- .../web_url_crawler/test_url_adding_e2e.py | 26 +++++++++---------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cognee/tests/integration/web_url_crawler/test_default_url_crawler.py b/cognee/tests/integration/web_url_crawler/test_default_url_crawler.py index f48c1cedc..af2595b14 100644 --- a/cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +++ b/cognee/tests/integration/web_url_crawler/test_default_url_crawler.py @@ -5,7 +5,7 @@ from cognee.tasks.web_scraper import DefaultUrlCrawler @pytest.mark.asyncio async def test_fetch(): crawler = DefaultUrlCrawler() - url = "https://httpbin.org/html" + url = "http://example.com/" results = await crawler.fetch_urls(url) assert len(results) == 1 assert isinstance(results, dict) diff --git a/cognee/tests/integration/web_url_crawler/test_tavily_crawler.py b/cognee/tests/integration/web_url_crawler/test_tavily_crawler.py index 19ffdc4ea..5db9b58ce 100644 --- a/cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +++ b/cognee/tests/integration/web_url_crawler/test_tavily_crawler.py @@ -11,7 +11,7 @@ skip_in_ci = pytest.mark.skipif( @skip_in_ci @pytest.mark.asyncio async def test_fetch(): - url = "https://httpbin.org/html" + url = "http://example.com/" results = await fetch_with_tavily(url) assert isinstance(results, dict) assert len(results) == 1 diff --git a/cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py b/cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py index cc8ae24d0..200f40a94 100644 --- a/cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +++ b/cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py @@ -14,7 +14,7 @@ async def test_url_saves_as_html_file(): await cognee.prune.prune_system(metadata=True) try: - original_file_path = await save_data_item_to_storage("https://httpbin.org/html") + original_file_path = await save_data_item_to_storage("http://example.com/") file_path = get_data_file_path(original_file_path) assert file_path.endswith(".html") file = Path(file_path) @@ -42,7 +42,7 @@ async def test_saved_html_is_valid(): await cognee.prune.prune_system(metadata=True) try: - original_file_path = await save_data_item_to_storage("https://httpbin.org/html") + original_file_path = await save_data_item_to_storage("http://example.com/") file_path = get_data_file_path(original_file_path) content = Path(file_path).read_text() @@ -68,7 +68,7 @@ async def test_add_url(): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) - await cognee.add("https://httpbin.org/html") + await cognee.add("http://example.com/") skip_in_ci = pytest.mark.skipif( @@ -84,7 +84,7 @@ async def test_add_url_with_tavily(): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) - await cognee.add("https://httpbin.org/html") + await cognee.add("http://example.com/") @pytest.mark.asyncio @@ -94,7 +94,7 @@ async def test_add_url_without_incremental_loading(): try: await cognee.add( - "https://httpbin.org/html", + "http://example.com/", incremental_loading=False, ) except Exception as e: @@ -108,7 +108,7 @@ async def test_add_url_with_incremental_loading(): try: await cognee.add( - "https://httpbin.org/html", + "http://example.com/", incremental_loading=True, ) except Exception as e: @@ -121,7 +121,7 @@ async def test_add_url_can_define_preferred_loader_as_list_of_str(): await cognee.prune.prune_system(metadata=True) await cognee.add( - "https://httpbin.org/html", + "http://example.com/", preferred_loaders=["beautiful_soup_loader"], ) @@ -140,7 +140,7 @@ async def test_add_url_with_extraction_rules(): try: await cognee.add( - "https://httpbin.org/html", + "http://example.com/", preferred_loaders={"beautiful_soup_loader": {"extraction_rules": extraction_rules}}, ) except Exception as e: @@ -159,7 +159,7 @@ async def test_loader_is_none_by_default(): } try: - original_file_path = await save_data_item_to_storage("https://httpbin.org/html") + original_file_path = await save_data_item_to_storage("http://example.com/") file_path = get_data_file_path(original_file_path) assert file_path.endswith(".html") file = Path(file_path) @@ -190,7 +190,7 @@ async def test_beautiful_soup_loader_is_selected_loader_if_preferred_loader_prov } try: - original_file_path = await save_data_item_to_storage("https://httpbin.org/html") + original_file_path = await save_data_item_to_storage("http://example.com/") file_path = get_data_file_path(original_file_path) assert file_path.endswith(".html") file = Path(file_path) @@ -217,7 +217,7 @@ async def test_beautiful_soup_loader_works_with_and_without_arguments(): await cognee.prune.prune_system(metadata=True) try: - original_file_path = await save_data_item_to_storage("https://httpbin.org/html") + original_file_path = await save_data_item_to_storage("http://example.com/") file_path = get_data_file_path(original_file_path) assert file_path.endswith(".html") file = Path(file_path) @@ -253,7 +253,7 @@ async def test_beautiful_soup_loader_successfully_loads_file_if_required_args_pr await cognee.prune.prune_system(metadata=True) try: - original_file_path = await save_data_item_to_storage("https://httpbin.org/html") + original_file_path = await save_data_item_to_storage("http://example.com/") file_path = get_data_file_path(original_file_path) assert file_path.endswith(".html") file = Path(file_path) @@ -290,7 +290,7 @@ async def test_beautiful_soup_loads_file_successfully(): } try: - original_file_path = await save_data_item_to_storage("https://httpbin.org/html") + original_file_path = await save_data_item_to_storage("http://example.com/") file_path = get_data_file_path(original_file_path) assert file_path.endswith(".html") original_file = Path(file_path) From 68d81a912519e8e19eda6a2657c14853d72234f2 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 20 Nov 2025 18:37:15 +0100 Subject: [PATCH 158/284] refactor: Update multi-user database dataset creation mechanism --- .../databases/graph/graph_db_interface.py | 34 +++++++ .../databases/graph/neo4j_driver/adapter.py | 91 +++++++++++++++++++ .../utils/get_or_create_dataset_database.py | 72 +-------------- .../vector/lancedb/LanceDBAdapter.py | 2 +- .../databases/vector/vector_db_interface.py | 15 +-- 5 files changed, 139 insertions(+), 75 deletions(-) diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py index 67df1a27c..6d323764b 100644 --- a/cognee/infrastructure/databases/graph/graph_db_interface.py +++ b/cognee/infrastructure/databases/graph/graph_db_interface.py @@ -6,6 +6,7 @@ from typing import Optional, Dict, Any, List, Tuple, Type, Union from uuid import NAMESPACE_OID, UUID, uuid5 from cognee.shared.logging_utils import get_logger from cognee.infrastructure.engine import DataPoint +from cognee.modules.users.models.User import User from cognee.modules.data.models.graph_relationship_ledger import GraphRelationshipLedger from cognee.infrastructure.databases.relational.get_relational_engine import get_relational_engine @@ -398,3 +399,36 @@ class GraphDBInterface(ABC): - node_id (Union[str, UUID]): Unique identifier of the node for which to retrieve connections. """ raise NotImplementedError + + @classmethod + async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: + """ + Return a dictionary with connection info for a graph database for the given dataset. + Function can auto handle deploying of the actual database if needed, but is not necessary. + Only providing connection info is sufficient, this info will be mapped when trying to connect to the provided dataset in the future. + Needed for Cognee multi-tenant/multi-user and backend access control support. + + Dictionary returned from this function will be used to create a DatasetDatabase row in the relational database. + From which internal mapping of dataset -> database connection info will be done. + + Each dataset needs to map to a unique graph database when backend access control is enabled to facilitate a separation of concern for data. + + Args: + dataset_id: UUID of the dataset if needed by the database creation logic + user: User object if needed by the database creation logic + Returns: + dict: Connection info for the created graph database instance. + """ + pass + + async def delete_dataset(self, dataset_id: UUID, user: User) -> None: + """ + Delete the graph database for the given dataset. + Function should auto handle deleting of the actual database or send a request to the proper service to delete the database. + Needed for maintaining a database for Cognee multi-tenant/multi-user and backend access control. + + Args: + dataset_id: UUID of the dataset + user: User object + """ + pass diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py index 6216e107e..dfcf36499 100644 --- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py @@ -1,7 +1,9 @@ """Neo4j Adapter for Graph Database""" +import os import json import asyncio +import requests from uuid import UUID from textwrap import dedent from neo4j import AsyncSession @@ -12,6 +14,7 @@ from typing import Optional, Any, List, Dict, Type, Tuple from cognee.infrastructure.engine import DataPoint from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int +from cognee.modules.users.models import User from cognee.tasks.temporal_graph.models import Timestamp from cognee.shared.logging_utils import get_logger, ERROR from cognee.infrastructure.databases.graph.graph_db_interface import ( @@ -1470,3 +1473,91 @@ class Neo4jAdapter(GraphDBInterface): time_ids_list = [item["id"] for item in time_nodes if "id" in item] return ", ".join(f"'{uid}'" for uid in time_ids_list) + + @classmethod + async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: + """ + Create a new Neo4j Aura instance for the dataset. Return connection info that will be mapped to the dataset. + + Args: + dataset_id: Dataset UUID + user: User object who owns the dataset and is making the request + + Returns: + dict: Connection details for the created Neo4j instance + + """ + graph_db_name = f"{dataset_id}" + + # Client credentials + client_id = os.environ.get("NEO4J_CLIENT_ID", None) + client_secret = os.environ.get("NEO4J_CLIENT_SECRET", None) + tenant_id = os.environ.get("NEO4J_TENANT_ID", None) + + # Make the request with HTTP Basic Auth + def get_aura_token(client_id: str, client_secret: str) -> dict: + url = "https://api.neo4j.io/oauth/token" + data = {"grant_type": "client_credentials"} # sent as application/x-www-form-urlencoded + + resp = requests.post(url, data=data, auth=(client_id, client_secret)) + resp.raise_for_status() # raises if the request failed + return resp.json() + + resp = get_aura_token(client_id, client_secret) + + url = "https://api.neo4j.io/v1/instances" + + headers = { + "accept": "application/json", + "Authorization": f"Bearer {resp['access_token']}", + "Content-Type": "application/json", + } + + # TODO: Maybe we can allow **kwargs parameter forwarding for cases like these + # Too allow different configurations between datasets + payload = { + "version": "5", + "region": "europe-west1", + "memory": "1GB", + "name": graph_db_name[0:29], + "type": "professional-db", + "tenant_id": tenant_id, + "cloud_provider": "gcp", + } + + response = requests.post(url, headers=headers, json=payload) + + print(response.status_code) + print(response.text) + # TODO: Find better name to name Neo4j instance within 30 character limit + print(graph_db_name[0:29]) + graph_db_name = "neo4j" + graph_db_url = response.json()["data"]["connection_url"] + graph_db_key = resp["access_token"] + graph_db_username = response.json()["data"]["username"] + graph_db_password = response.json()["data"]["password"] + + async def _wait_for_neo4j_instance_provisioning(instance_id: str, headers: dict): + # Poll until the instance is running + status_url = f"https://api.neo4j.io/v1/instances/{instance_id}" + status = "" + for attempt in range(30): # Try for up to ~5 minutes + status_resp = requests.get(status_url, headers=headers) + status = status_resp.json()["data"]["status"] + if status.lower() == "running": + return + await asyncio.sleep(10) + raise TimeoutError( + f"Neo4j instance '{graph_db_name}' did not become ready within 5 minutes. Status: {status}" + ) + + instance_id = response.json()["data"]["id"] + await _wait_for_neo4j_instance_provisioning(instance_id, headers) + return { + "graph_database_name": graph_db_name, + "graph_database_url": graph_db_url, + "graph_database_provider": "neo4j", + "graph_database_key": graph_db_key, # TODO: Hashing of keys/passwords in relational DB + "graph_database_username": graph_db_username, + "graph_database_password": graph_db_password, + } diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index a292d2f5b..b60640d4c 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -25,7 +25,7 @@ async def _get_vector_db_info(dataset_id: UUID, user: User) -> dict: # TODO: Have the create_database method be called from interface adapter automatically for all providers instead of specifically here from cognee.infrastructure.databases.vector.lancedb.LanceDBAdapter import LanceDBAdapter - return await LanceDBAdapter.create_database(dataset_id, user) + return await LanceDBAdapter.create_dataset(dataset_id, user) else: # Note: for hybrid databases both graph and vector DB name have to be the same @@ -42,75 +42,11 @@ async def _get_vector_db_info(dataset_id: UUID, user: User) -> dict: async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict: graph_config = get_graph_config() - # Determine graph database URL if graph_config.graph_database_provider == "neo4j": - graph_db_name = f"{dataset_id}" - # Auto deploy instance to Aura DB - # OAuth2 token endpoint + from cognee.infrastructure.databases.graph.neo4j_driver.adapter import Neo4jAdapter - # Your client credentials - client_id = os.environ.get("NEO4J_CLIENT_ID", None) - client_secret = os.environ.get("NEO4J_CLIENT_SECRET", None) - tenant_id = os.environ.get("NEO4J_TENANT_ID", None) - - # Make the request with HTTP Basic Auth - def get_aura_token(client_id: str, client_secret: str) -> dict: - url = "https://api.neo4j.io/oauth/token" - data = {"grant_type": "client_credentials"} # sent as application/x-www-form-urlencoded - - resp = requests.post(url, data=data, auth=(client_id, client_secret)) - resp.raise_for_status() # raises if the request failed - return resp.json() - - resp = get_aura_token(client_id, client_secret) - - url = "https://api.neo4j.io/v1/instances" - - headers = { - "accept": "application/json", - "Authorization": f"Bearer {resp['access_token']}", - "Content-Type": "application/json", - } - - payload = { - "version": "5", - "region": "europe-west1", - "memory": "1GB", - "name": graph_db_name[0:29], - "type": "professional-db", - "tenant_id": tenant_id, - "cloud_provider": "gcp", - } - - response = requests.post(url, headers=headers, json=payload) - - print(response.status_code) - print(response.text) - # TODO: Find better name to name Neo4j instance within 30 character limit - print(graph_db_name[0:29]) - graph_db_name = "neo4j" - graph_db_url = response.json()["data"]["connection_url"] - graph_db_key = resp["access_token"] - graph_db_username = response.json()["data"]["username"] - graph_db_password = response.json()["data"]["password"] - - async def _wait_for_neo4j_instance_provisioning(instance_id: str, headers: dict): - # Poll until the instance is running - status_url = f"https://api.neo4j.io/v1/instances/{instance_id}" - status = "" - for attempt in range(30): # Try for up to ~5 minutes - status_resp = requests.get(status_url, headers=headers) - status = status_resp.json()["data"]["status"] - if status.lower() == "running": - return - await asyncio.sleep(10) - raise TimeoutError( - f"Neo4j instance '{graph_db_name}' did not become ready within 5 minutes. Status: {status}" - ) - - instance_id = response.json()["data"]["id"] - await _wait_for_neo4j_instance_provisioning(instance_id, headers) + return await Neo4jAdapter.create_dataset(dataset_id, user) elif graph_config.graph_database_provider == "kuzu": # TODO: Add graph file path info for kuzu (also in DatasetDatabase model) @@ -176,6 +112,8 @@ async def get_or_create_dataset_database( • If the row already exists, it is fetched and returned. • Otherwise a new one is created atomically and returned. + DatasetDatabase row contains connection and provider info for vector and graph databases. + Parameters ---------- user : User diff --git a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py index f2d8fcc09..a93fbc818 100644 --- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py @@ -364,7 +364,7 @@ class LanceDBAdapter(VectorDBInterface): ) @classmethod - async def create_database(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: + async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: vector_config = get_vectordb_config() base_config = get_base_config() databases_directory_path = os.path.join( diff --git a/cognee/infrastructure/databases/vector/vector_db_interface.py b/cognee/infrastructure/databases/vector/vector_db_interface.py index b89818275..12ace1a6c 100644 --- a/cognee/infrastructure/databases/vector/vector_db_interface.py +++ b/cognee/infrastructure/databases/vector/vector_db_interface.py @@ -221,16 +221,17 @@ class VectorDBInterface(Protocol): return model_type @classmethod - async def create_database(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: + async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: """ - Return a dictionary with connection info for a vector database for the given dataset and user. - Function should auto handle deploying of the actual database if needed. + Return a dictionary with connection info for a vector database for the given dataset. + Function can auto handle deploying of the actual database if needed, but is not necessary. + Only providing connection info is sufficient, this info will be mapped when trying to connect to the provided dataset in the future. Needed for Cognee multi-tenant/multi-user and backend access control support. Dictionary returned from this function will be used to create a DatasetDatabase row in the relational database. From which internal mapping of dataset -> database connection info will be done. - Each dataset needs to map to a unique vector database instance when backend access control is enabled. + Each dataset needs to map to a unique vector database when backend access control is enabled to facilitate a separation of concern for data. Args: dataset_id: UUID of the dataset if needed by the database creation logic @@ -240,10 +241,10 @@ class VectorDBInterface(Protocol): """ pass - async def delete_database(self, dataset_id: UUID, user: User) -> None: + async def delete_dataset(self, dataset_id: UUID, user: User) -> None: """ - Delete the vector database instance for the given dataset and user. - Function should auto handle deleting of the actual database. + Delete the vector database for the given dataset. + Function should auto handle deleting of the actual database or send a request to the proper service to delete the database. Needed for maintaining a database for Cognee multi-tenant/multi-user and backend access control. Args: From 080081071337cdf4c12ef7837942b0b0a335722e Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 20 Nov 2025 18:46:02 +0100 Subject: [PATCH 159/284] refactor: remove print statements --- .../databases/graph/neo4j_driver/adapter.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py index dfcf36499..43e5ea654 100644 --- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py @@ -1519,7 +1519,9 @@ class Neo4jAdapter(GraphDBInterface): "version": "5", "region": "europe-west1", "memory": "1GB", - "name": graph_db_name[0:29], + "name": graph_db_name[ + 0:29 + ], # TODO: Find better name to name Neo4j instance within 30 character limit "type": "professional-db", "tenant_id": tenant_id, "cloud_provider": "gcp", @@ -1527,10 +1529,6 @@ class Neo4jAdapter(GraphDBInterface): response = requests.post(url, headers=headers, json=payload) - print(response.status_code) - print(response.text) - # TODO: Find better name to name Neo4j instance within 30 character limit - print(graph_db_name[0:29]) graph_db_name = "neo4j" graph_db_url = response.json()["data"]["connection_url"] graph_db_key = resp["access_token"] From 204f9c2e4ad6dc706c03deed68adfbc4744ae6df Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Fri, 21 Nov 2025 16:20:19 +0100 Subject: [PATCH 160/284] fix: PR comment changes --- .env.template | 5 ++++- cognee/infrastructure/llm/config.py | 4 ++-- .../litellm_instructor/llm/get_llm_client.py | 12 ++++++------ 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.env.template b/.env.template index 376233b1f..61853b983 100644 --- a/.env.template +++ b/.env.template @@ -21,7 +21,10 @@ LLM_PROVIDER="openai" LLM_ENDPOINT="" LLM_API_VERSION="" LLM_MAX_TOKENS="16384" -LLM_INSTRUCTOR_MODE="json_schema_mode" # this mode is used for gpt-5 models +# Instructor's modes determine how structured data is requested from and extracted from LLM responses +# You can change this type (i.e. mode) via this env variable +# Each LLM has its own default value, e.g. gpt-5 models have "json_schema_mode" +LLM_INSTRUCTOR_MODE="" EMBEDDING_PROVIDER="openai" EMBEDDING_MODEL="openai/text-embedding-3-large" diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index c87054ff6..2e300dc0c 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -38,7 +38,7 @@ class LLMConfig(BaseSettings): """ structured_output_framework: str = "instructor" - llm_instructor_mode: Optional[str] = None + llm_instructor_mode: str = "" llm_provider: str = "openai" llm_model: str = "openai/gpt-5-mini" llm_endpoint: str = "" @@ -182,7 +182,7 @@ class LLMConfig(BaseSettings): instance. """ return { - "llm_instructor_mode": self.llm_instructor_mode, + "llm_instructor_mode": self.llm_instructor_mode.lower(), "provider": self.llm_provider, "model": self.llm_model, "endpoint": self.llm_endpoint, diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py index 537eda1b2..6ab3b91ad 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py @@ -81,7 +81,7 @@ def get_llm_client(raise_api_key_error: bool = True): model=llm_config.llm_model, transcription_model=llm_config.transcription_model, max_completion_tokens=max_completion_tokens, - instructor_mode=llm_config.llm_instructor_mode, + instructor_mode=llm_config.llm_instructor_mode.lower(), streaming=llm_config.llm_streaming, fallback_api_key=llm_config.fallback_api_key, fallback_endpoint=llm_config.fallback_endpoint, @@ -102,7 +102,7 @@ def get_llm_client(raise_api_key_error: bool = True): llm_config.llm_model, "Ollama", max_completion_tokens=max_completion_tokens, - instructor_mode=llm_config.llm_instructor_mode, + instructor_mode=llm_config.llm_instructor_mode.lower(), ) elif provider == LLMProvider.ANTHROPIC: @@ -113,7 +113,7 @@ def get_llm_client(raise_api_key_error: bool = True): return AnthropicAdapter( max_completion_tokens=max_completion_tokens, model=llm_config.llm_model, - instructor_mode=llm_config.llm_instructor_mode, + instructor_mode=llm_config.llm_instructor_mode.lower(), ) elif provider == LLMProvider.CUSTOM: @@ -130,7 +130,7 @@ def get_llm_client(raise_api_key_error: bool = True): llm_config.llm_model, "Custom", max_completion_tokens=max_completion_tokens, - instructor_mode=llm_config.llm_instructor_mode, + instructor_mode=llm_config.llm_instructor_mode.lower(), fallback_api_key=llm_config.fallback_api_key, fallback_endpoint=llm_config.fallback_endpoint, fallback_model=llm_config.fallback_model, @@ -150,7 +150,7 @@ def get_llm_client(raise_api_key_error: bool = True): max_completion_tokens=max_completion_tokens, endpoint=llm_config.llm_endpoint, api_version=llm_config.llm_api_version, - instructor_mode=llm_config.llm_instructor_mode, + instructor_mode=llm_config.llm_instructor_mode.lower(), ) elif provider == LLMProvider.MISTRAL: @@ -166,7 +166,7 @@ def get_llm_client(raise_api_key_error: bool = True): model=llm_config.llm_model, max_completion_tokens=max_completion_tokens, endpoint=llm_config.llm_endpoint, - instructor_mode=llm_config.llm_instructor_mode, + instructor_mode=llm_config.llm_instructor_mode.lower(), ) elif provider == LLMProvider.MISTRAL: From af8c55e82bd5dcefb526ffc106f3dcbc1a881a24 Mon Sep 17 00:00:00 2001 From: Boris Arzentar Date: Mon, 24 Nov 2025 16:16:47 +0100 Subject: [PATCH 161/284] version: 0.5.0.dev0 --- poetry.lock | 62 +++++++++++++++++++++----------------------------- pyproject.toml | 2 +- uv.lock | 10 +++++++- 3 files changed, 36 insertions(+), 38 deletions(-) diff --git a/poetry.lock b/poetry.lock index 67de51633..0736a7bb7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "accelerate" @@ -1231,12 +1231,12 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["main", "dev"] +groups = ["main"] +markers = "(platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"dev\" or extra == \"chromadb\" or sys_platform == \"win32\") and (platform_system == \"Windows\" or os_name == \"nt\" or extra == \"llama-index\" or extra == \"dev\" or sys_platform == \"win32\")" files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -markers = {main = "(platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"dev\" or extra == \"chromadb\" or sys_platform == \"win32\") and (platform_system == \"Windows\" or os_name == \"nt\" or extra == \"llama-index\" or extra == \"dev\" or sys_platform == \"win32\")", dev = "sys_platform == \"win32\""} [[package]] name = "coloredlogs" @@ -2347,7 +2347,7 @@ version = "1.3.0" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" -groups = ["main", "dev"] +groups = ["main"] markers = "python_version == \"3.10\"" files = [ {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, @@ -3732,14 +3732,14 @@ type = ["pytest-mypy"] name = "iniconfig" version = "2.1.0" description = "brain-dead simple config-ini parsing" -optional = false +optional = true python-versions = ">=3.8" -groups = ["main", "dev"] +groups = ["main"] +markers = "extra == \"deepeval\" or extra == \"dev\"" files = [ {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, ] -markers = {main = "extra == \"deepeval\" or extra == \"dev\""} [[package]] name = "instructor" @@ -4196,8 +4196,6 @@ groups = ["main"] markers = "extra == \"dlt\"" files = [ {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"}, - {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"}, - {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"}, ] [package.dependencies] @@ -7634,7 +7632,7 @@ version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" -groups = ["main", "dev"] +groups = ["main"] files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, @@ -8289,14 +8287,14 @@ kaleido = ["kaleido (>=1.0.0)"] name = "pluggy" version = "1.6.0" description = "plugin and hook calling mechanisms for python" -optional = false +optional = true python-versions = ">=3.9" -groups = ["main", "dev"] +groups = ["main"] +markers = "extra == \"deepeval\" or extra == \"dev\" or extra == \"dlt\" or extra == \"docling\"" files = [ {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, ] -markers = {main = "extra == \"deepeval\" or extra == \"dev\" or extra == \"dlt\" or extra == \"docling\""} [package.extras] dev = ["pre-commit", "tox"] @@ -8656,7 +8654,6 @@ files = [ {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"}, {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"}, {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"}, - {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"}, {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"}, {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"}, {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"}, @@ -8718,7 +8715,6 @@ files = [ {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"}, - {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"}, @@ -9698,14 +9694,14 @@ files = [ name = "pytest" version = "7.4.4" description = "pytest: simple powerful testing with Python" -optional = false +optional = true python-versions = ">=3.7" -groups = ["main", "dev"] +groups = ["main"] +markers = "extra == \"deepeval\" or extra == \"dev\"" files = [ {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, ] -markers = {main = "extra == \"deepeval\" or extra == \"dev\""} [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} @@ -9792,21 +9788,6 @@ files = [ packaging = ">=17.1" pytest = ">=6.2" -[[package]] -name = "pytest-timeout" -version = "2.4.0" -description = "pytest plugin to abort hanging tests" -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2"}, - {file = "pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a"}, -] - -[package.dependencies] -pytest = ">=7.0.0" - [[package]] name = "pytest-xdist" version = "3.8.0" @@ -11656,7 +11637,9 @@ groups = ["main"] files = [ {file = "SQLAlchemy-2.0.43-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:21ba7a08a4253c5825d1db389d4299f64a100ef9800e4624c8bf70d8f136e6ed"}, {file = "SQLAlchemy-2.0.43-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11b9503fa6f8721bef9b8567730f664c5a5153d25e247aadc69247c4bc605227"}, + {file = "SQLAlchemy-2.0.43-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07097c0a1886c150ef2adba2ff7437e84d40c0f7dcb44a2c2b9c905ccfc6361c"}, {file = "SQLAlchemy-2.0.43-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:cdeff998cb294896a34e5b2f00e383e7c5c4ef3b4bfa375d9104723f15186443"}, + {file = "SQLAlchemy-2.0.43-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:bcf0724a62a5670e5718957e05c56ec2d6850267ea859f8ad2481838f889b42c"}, {file = "SQLAlchemy-2.0.43-cp37-cp37m-win32.whl", hash = "sha256:c697575d0e2b0a5f0433f679bda22f63873821d991e95a90e9e52aae517b2e32"}, {file = "SQLAlchemy-2.0.43-cp37-cp37m-win_amd64.whl", hash = "sha256:d34c0f6dbefd2e816e8f341d0df7d4763d382e3f452423e752ffd1e213da2512"}, {file = "sqlalchemy-2.0.43-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:70322986c0c699dca241418fcf18e637a4369e0ec50540a2b907b184c8bca069"}, @@ -11691,12 +11674,20 @@ files = [ {file = "sqlalchemy-2.0.43-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9df7126fd9db49e3a5a3999442cc67e9ee8971f3cb9644250107d7296cb2a164"}, {file = "sqlalchemy-2.0.43-cp313-cp313-win32.whl", hash = "sha256:7f1ac7828857fcedb0361b48b9ac4821469f7694089d15550bbcf9ab22564a1d"}, {file = "sqlalchemy-2.0.43-cp313-cp313-win_amd64.whl", hash = "sha256:971ba928fcde01869361f504fcff3b7143b47d30de188b11c6357c0505824197"}, + {file = "sqlalchemy-2.0.43-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4e6aeb2e0932f32950cf56a8b4813cb15ff792fc0c9b3752eaf067cfe298496a"}, + {file = "sqlalchemy-2.0.43-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:61f964a05356f4bca4112e6334ed7c208174511bd56e6b8fc86dad4d024d4185"}, {file = "sqlalchemy-2.0.43-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46293c39252f93ea0910aababa8752ad628bcce3a10d3f260648dd472256983f"}, + {file = "sqlalchemy-2.0.43-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:136063a68644eca9339d02e6693932116f6a8591ac013b0014479a1de664e40a"}, {file = "sqlalchemy-2.0.43-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6e2bf13d9256398d037fef09fd8bf9b0bf77876e22647d10761d35593b9ac547"}, + {file = "sqlalchemy-2.0.43-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:44337823462291f17f994d64282a71c51d738fc9ef561bf265f1d0fd9116a782"}, {file = "sqlalchemy-2.0.43-cp38-cp38-win32.whl", hash = "sha256:13194276e69bb2af56198fef7909d48fd34820de01d9c92711a5fa45497cc7ed"}, {file = "sqlalchemy-2.0.43-cp38-cp38-win_amd64.whl", hash = "sha256:334f41fa28de9f9be4b78445e68530da3c5fa054c907176460c81494f4ae1f5e"}, + {file = "sqlalchemy-2.0.43-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ceb5c832cc30663aeaf5e39657712f4c4241ad1f638d487ef7216258f6d41fe7"}, + {file = "sqlalchemy-2.0.43-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:11f43c39b4b2ec755573952bbcc58d976779d482f6f832d7f33a8d869ae891bf"}, {file = "sqlalchemy-2.0.43-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:413391b2239db55be14fa4223034d7e13325a1812c8396ecd4f2c08696d5ccad"}, + {file = "sqlalchemy-2.0.43-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c379e37b08c6c527181a397212346be39319fb64323741d23e46abd97a400d34"}, {file = "sqlalchemy-2.0.43-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:03d73ab2a37d9e40dec4984d1813d7878e01dbdc742448d44a7341b7a9f408c7"}, + {file = "sqlalchemy-2.0.43-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8cee08f15d9e238ede42e9bbc1d6e7158d0ca4f176e4eab21f88ac819ae3bd7b"}, {file = "sqlalchemy-2.0.43-cp39-cp39-win32.whl", hash = "sha256:b3edaec7e8b6dc5cd94523c6df4f294014df67097c8217a89929c99975811414"}, {file = "sqlalchemy-2.0.43-cp39-cp39-win_amd64.whl", hash = "sha256:227119ce0a89e762ecd882dc661e0aa677a690c914e358f0dd8932a2e8b2765b"}, {file = "sqlalchemy-2.0.43-py3-none-any.whl", hash = "sha256:1681c21dd2ccee222c2fe0bef671d1aef7c504087c9c4e800371cfcc8ac966fc"}, @@ -12065,7 +12056,7 @@ version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" -groups = ["main", "dev"] +groups = ["main"] markers = "python_version == \"3.10\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, @@ -12537,12 +12528,11 @@ version = "4.15.0" description = "Backported and Experimental Type Hints for Python 3.9+" optional = false python-versions = ">=3.9" -groups = ["main", "dev"] +groups = ["main"] files = [ {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, ] -markers = {dev = "python_version == \"3.10\""} [[package]] name = "typing-inspect" diff --git a/pyproject.toml b/pyproject.toml index 2436911e8..a9b895dfb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "cognee" -version = "0.3.9" +version = "0.5.0.dev0" description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning." authors = [ { name = "Vasilije Markovic" }, diff --git a/uv.lock b/uv.lock index 8c35a3366..cc66c3d7e 100644 --- a/uv.lock +++ b/uv.lock @@ -929,7 +929,7 @@ wheels = [ [[package]] name = "cognee" -version = "0.3.9" +version = "0.5.0.dev0" source = { editable = "." } dependencies = [ { name = "aiofiles" }, @@ -2560,6 +2560,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7f/91/ae2eb6b7979e2f9b035a9f612cf70f1bf54aad4e1d125129bef1eae96f19/greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d", size = 584358, upload-time = "2025-08-07T13:18:23.708Z" }, { url = "https://files.pythonhosted.org/packages/f7/85/433de0c9c0252b22b16d413c9407e6cb3b41df7389afc366ca204dbc1393/greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5", size = 1113550, upload-time = "2025-08-07T13:42:37.467Z" }, { url = "https://files.pythonhosted.org/packages/a1/8d/88f3ebd2bc96bf7747093696f4335a0a8a4c5acfcf1b757717c0d2474ba3/greenlet-3.2.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8854167e06950ca75b898b104b63cc646573aa5fef1353d4508ecdd1ee76254f", size = 1137126, upload-time = "2025-08-07T13:18:20.239Z" }, + { url = "https://files.pythonhosted.org/packages/f1/29/74242b7d72385e29bcc5563fba67dad94943d7cd03552bac320d597f29b2/greenlet-3.2.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f47617f698838ba98f4ff4189aef02e7343952df3a615f847bb575c3feb177a7", size = 1544904, upload-time = "2025-11-04T12:42:04.763Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e2/1572b8eeab0f77df5f6729d6ab6b141e4a84ee8eb9bc8c1e7918f94eda6d/greenlet-3.2.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af41be48a4f60429d5cad9d22175217805098a9ef7c40bfef44f7669fb9d74d8", size = 1611228, upload-time = "2025-11-04T12:42:08.423Z" }, { url = "https://files.pythonhosted.org/packages/d6/6f/b60b0291d9623c496638c582297ead61f43c4b72eef5e9c926ef4565ec13/greenlet-3.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:73f49b5368b5359d04e18d15828eecc1806033db5233397748f4ca813ff1056c", size = 298654, upload-time = "2025-08-07T13:50:00.469Z" }, { url = "https://files.pythonhosted.org/packages/a4/de/f28ced0a67749cac23fecb02b694f6473f47686dff6afaa211d186e2ef9c/greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2", size = 272305, upload-time = "2025-08-07T13:15:41.288Z" }, { url = "https://files.pythonhosted.org/packages/09/16/2c3792cba130000bf2a31c5272999113f4764fd9d874fb257ff588ac779a/greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246", size = 632472, upload-time = "2025-08-07T13:42:55.044Z" }, @@ -2569,6 +2571,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" }, { url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" }, { url = "https://files.pythonhosted.org/packages/3f/cc/b07000438a29ac5cfb2194bfc128151d52f333cee74dd7dfe3fb733fc16c/greenlet-3.2.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:55e9c5affaa6775e2c6b67659f3a71684de4c549b3dd9afca3bc773533d284fa", size = 1142073, upload-time = "2025-08-07T13:18:21.737Z" }, + { url = "https://files.pythonhosted.org/packages/67/24/28a5b2fa42d12b3d7e5614145f0bd89714c34c08be6aabe39c14dd52db34/greenlet-3.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9c6de1940a7d828635fbd254d69db79e54619f165ee7ce32fda763a9cb6a58c", size = 1548385, upload-time = "2025-11-04T12:42:11.067Z" }, + { url = "https://files.pythonhosted.org/packages/6a/05/03f2f0bdd0b0ff9a4f7b99333d57b53a7709c27723ec8123056b084e69cd/greenlet-3.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03c5136e7be905045160b1b9fdca93dd6727b180feeafda6818e6496434ed8c5", size = 1613329, upload-time = "2025-11-04T12:42:12.928Z" }, { url = "https://files.pythonhosted.org/packages/d8/0f/30aef242fcab550b0b3520b8e3561156857c94288f0332a79928c31a52cf/greenlet-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:9c40adce87eaa9ddb593ccb0fa6a07caf34015a29bf8d344811665b573138db9", size = 299100, upload-time = "2025-08-07T13:44:12.287Z" }, { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" }, { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" }, @@ -2578,6 +2582,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" }, { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" }, { url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" }, + { url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846, upload-time = "2025-11-04T12:42:15.191Z" }, + { url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814, upload-time = "2025-11-04T12:42:17.175Z" }, { url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" }, { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" }, { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" }, @@ -2587,6 +2593,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" }, { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" }, { url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" }, + { url = "https://files.pythonhosted.org/packages/1c/53/f9c440463b3057485b8594d7a638bed53ba531165ef0ca0e6c364b5cc807/greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b", size = 1564759, upload-time = "2025-11-04T12:42:19.395Z" }, + { url = "https://files.pythonhosted.org/packages/47/e4/3bb4240abdd0a8d23f4f88adec746a3099f0d86bfedb623f063b2e3b4df0/greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929", size = 1634288, upload-time = "2025-11-04T12:42:21.174Z" }, { url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" }, ] From 3b78eb88bd4bc778089f1061408cb413b5e7ff20 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Mon, 24 Nov 2025 16:38:23 +0100 Subject: [PATCH 162/284] fix: use s3 config --- cognee/api/v1/search/search.py | 2 +- .../infrastructure/files/storage/s3_config.py | 3 ++ cognee/infrastructure/llm/config.py | 14 ------- .../litellm_instructor/llm/bedrock/adapter.py | 41 +++++++------------ .../litellm_instructor/llm/get_llm_client.py | 8 +--- 5 files changed, 19 insertions(+), 49 deletions(-) diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index e64bcb848..49f7aee51 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -161,7 +161,7 @@ async def search( - LLM_API_KEY: API key for your LLM provider Optional: - - LLM_PROVIDER, LLM_MODEL: Configure LLM for search responses (supports: openai, anthropic, gemini, ollama, bedrock) + - LLM_PROVIDER, LLM_MODEL: Configure LLM for search responses - VECTOR_DB_PROVIDER: Must match what was used during cognify - GRAPH_DATABASE_PROVIDER: Must match what was used during cognify diff --git a/cognee/infrastructure/files/storage/s3_config.py b/cognee/infrastructure/files/storage/s3_config.py index 0b9372b7e..4cc6b1d63 100644 --- a/cognee/infrastructure/files/storage/s3_config.py +++ b/cognee/infrastructure/files/storage/s3_config.py @@ -8,6 +8,9 @@ class S3Config(BaseSettings): aws_endpoint_url: Optional[str] = None aws_access_key_id: Optional[str] = None aws_secret_access_key: Optional[str] = None + aws_session_token: Optional[str] = None + aws_profile_name: Optional[str] = None + aws_bedrock_runtime_endpoint: Optional[str] = None model_config = SettingsConfigDict(env_file=".env", extra="allow") diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index 091f8e6ea..7aa8f33f7 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -27,12 +27,6 @@ class LLMConfig(BaseSettings): - embedding_rate_limit_enabled - embedding_rate_limit_requests - embedding_rate_limit_interval - - aws_access_key_id (Bedrock) - - aws_secret_access_key (Bedrock) - - aws_session_token (Bedrock) - - aws_region_name (Bedrock) - - aws_profile_name (Bedrock) - - aws_bedrock_runtime_endpoint (Bedrock) Public methods include: - ensure_env_vars_for_ollama @@ -71,14 +65,6 @@ class LLMConfig(BaseSettings): fallback_endpoint: str = "" fallback_model: str = "" - # AWS Bedrock configuration - aws_access_key_id: Optional[str] = None - aws_secret_access_key: Optional[str] = None - aws_session_token: Optional[str] = None - aws_region_name: str = "us-east-1" - aws_profile_name: Optional[str] = None - aws_bedrock_runtime_endpoint: Optional[str] = None - baml_registry: ClassVar[ClientRegistry] = ClientRegistry() model_config = SettingsConfigDict(env_file=".env", extra="allow") diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py index 868fe51b8..66f484164 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py @@ -11,6 +11,7 @@ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.ll LLMInterface, ) from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError +from cognee.infrastructure.files.storage.s3_config import get_s3_config from cognee.infrastructure.files.utils.open_data_file import open_data_file from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import ( rate_limit_async, @@ -34,10 +35,6 @@ class BedrockAdapter(LLMInterface): name = "Bedrock" model: str api_key: str - aws_access_key_id: str - aws_secret_access_key: str - aws_region_name: str - aws_profile_name: str MAX_RETRIES = 5 @@ -45,12 +42,6 @@ class BedrockAdapter(LLMInterface): self, model: str, api_key: str = None, - aws_access_key_id: str = None, - aws_secret_access_key: str = None, - aws_session_token: str = None, - aws_region_name: str = "us-east-1", - aws_profile_name: str = None, - aws_bedrock_runtime_endpoint: str = None, max_tokens: int = 16384, streaming: bool = False, ): @@ -58,12 +49,6 @@ class BedrockAdapter(LLMInterface): self.client = instructor.from_litellm(litellm.completion) self.model = model self.api_key = api_key - self.aws_access_key_id = aws_access_key_id - self.aws_secret_access_key = aws_secret_access_key - self.aws_session_token = aws_session_token - self.aws_region_name = aws_region_name - self.aws_profile_name = aws_profile_name - self.aws_bedrock_runtime_endpoint = aws_bedrock_runtime_endpoint self.max_tokens = max_tokens self.streaming = streaming @@ -89,22 +74,24 @@ IMPORTANT: You must respond with valid JSON only. Do not include any text before "stream": self.streaming, } + s3_config = get_s3_config() + # Add authentication parameters if self.api_key: request_params["api_key"] = self.api_key - elif self.aws_access_key_id and self.aws_secret_access_key: - request_params["aws_access_key_id"] = self.aws_access_key_id - request_params["aws_secret_access_key"] = self.aws_secret_access_key - if self.aws_session_token: - request_params["aws_session_token"] = self.aws_session_token - elif self.aws_profile_name: - request_params["aws_profile_name"] = self.aws_profile_name + elif s3_config.aws_access_key_id and s3_config.aws_secret_access_key: + request_params["aws_access_key_id"] = s3_config.aws_access_key_id + request_params["aws_secret_access_key"] = s3_config.aws_secret_access_key + if s3_config.aws_session_token: + request_params["aws_session_token"] = s3_config.aws_session_token + elif s3_config.aws_profile_name: + request_params["aws_profile_name"] = s3_config.aws_profile_name # Add optional parameters - if self.aws_region_name: - request_params["aws_region_name"] = self.aws_region_name - if self.aws_bedrock_runtime_endpoint: - request_params["aws_bedrock_runtime_endpoint"] = self.aws_bedrock_runtime_endpoint + if s3_config.aws_region_name: + request_params["aws_region_name"] = s3_config.aws_region_name + if s3_config.aws_bedrock_runtime_endpoint: + request_params["aws_bedrock_runtime_endpoint"] = s3_config.aws_bedrock_runtime_endpoint return request_params diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py index 946698a95..489f7ae8e 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py @@ -156,13 +156,7 @@ def get_llm_client(): return BedrockAdapter( model=llm_config.llm_model, api_key=llm_config.llm_api_key, - aws_access_key_id=llm_config.aws_access_key_id, - aws_secret_access_key=llm_config.aws_secret_access_key, - aws_session_token=llm_config.aws_session_token, - aws_region_name=llm_config.aws_region_name, - aws_profile_name=llm_config.aws_profile_name, - aws_bedrock_runtime_endpoint=llm_config.aws_bedrock_runtime_endpoint, - max_tokens=max_tokens, + max_tokens=max_completion_tokens, streaming=llm_config.llm_streaming, ) From c2c64a417c4805c5d5aeb59b4e5f9519b729ee85 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Mon, 24 Nov 2025 17:44:51 +0100 Subject: [PATCH 163/284] fix: fixes ontology api endpoint tests + poetry lock(#1824) ## Description This PR fixes the failing CI tests related to the new ontology api endpoint. ## Type of Change - [x] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [x] **I have tested my changes thoroughly before submitting this PR** - [x] **This PR contains minimal changes necessary to address the issue/feature** - [x] My code follows the project's coding standards and style guidelines - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have added necessary documentation (if applicable) - [x] All new and existing tests pass - [x] I have searched existing PRs to ensure this change hasn't been submitted already - [x] I have linked any relevant issues in the description - [x] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- .../tests/unit/api/test_ontology_endpoint.py | 10 +++- poetry.lock | 52 +++++++++++++------ 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/cognee/tests/unit/api/test_ontology_endpoint.py b/cognee/tests/unit/api/test_ontology_endpoint.py index d53c5ab44..af3a4d90e 100644 --- a/cognee/tests/unit/api/test_ontology_endpoint.py +++ b/cognee/tests/unit/api/test_ontology_endpoint.py @@ -25,7 +25,10 @@ def mock_user(): def mock_default_user(): """Mock default user for testing.""" return SimpleNamespace( - id=uuid.uuid4(), email="default@example.com", is_active=True, tenant_id=uuid.uuid4() + id=str(uuid.uuid4()), + email="default@example.com", + is_active=True, + tenant_id=str(uuid.uuid4()), ) @@ -108,6 +111,7 @@ def test_upload_multiple_ontologies(mock_get_default_user, client, mock_default_ """Test uploading multiple ontology files in single request""" import io + mock_get_default_user.return_value = mock_default_user # Create mock files file1_content = b"" file2_content = b"" @@ -137,6 +141,7 @@ def test_upload_endpoint_accepts_arrays(mock_get_default_user, client, mock_defa import io import json + mock_get_default_user.return_value = mock_default_user file_content = b"" files = [("ontology_file", ("single.owl", io.BytesIO(file_content), "application/xml"))] @@ -173,6 +178,7 @@ def test_complete_multifile_workflow(mock_get_default_user, client, mock_default import io import json + mock_get_default_user.return_value = mock_default_user # Step 1: Upload multiple ontologies file1_content = b""" =1.0.0)"] name = "pluggy" version = "1.6.0" description = "plugin and hook calling mechanisms for python" -optional = true +optional = false python-versions = ">=3.9" -groups = ["main"] -markers = "extra == \"deepeval\" or extra == \"dev\" or extra == \"dlt\" or extra == \"docling\"" +groups = ["main", "dev"] files = [ {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, ] +markers = {main = "extra == \"deepeval\" or extra == \"dev\" or extra == \"dlt\" or extra == \"docling\""} [package.extras] dev = ["pre-commit", "tox"] @@ -8654,6 +8656,7 @@ files = [ {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"}, {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"}, {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"}, + {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"}, {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"}, {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"}, {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"}, @@ -8715,6 +8718,7 @@ files = [ {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"}, @@ -9694,14 +9698,14 @@ files = [ name = "pytest" version = "7.4.4" description = "pytest: simple powerful testing with Python" -optional = true +optional = false python-versions = ">=3.7" -groups = ["main"] -markers = "extra == \"deepeval\" or extra == \"dev\"" +groups = ["main", "dev"] files = [ {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, ] +markers = {main = "extra == \"deepeval\" or extra == \"dev\""} [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} @@ -9788,6 +9792,21 @@ files = [ packaging = ">=17.1" pytest = ">=6.2" +[[package]] +name = "pytest-timeout" +version = "2.4.0" +description = "pytest plugin to abort hanging tests" +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2"}, + {file = "pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a"}, +] + +[package.dependencies] +pytest = ">=7.0.0" + [[package]] name = "pytest-xdist" version = "3.8.0" @@ -12056,7 +12075,7 @@ version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["main", "dev"] markers = "python_version == \"3.10\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, @@ -12528,11 +12547,12 @@ version = "4.15.0" description = "Backported and Experimental Type Hints for Python 3.9+" optional = false python-versions = ">=3.9" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, ] +markers = {dev = "python_version == \"3.10\""} [[package]] name = "typing-inspect" From 64a3ee96c45c14ce4c4ad3d16d0edbea204e2d26 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 24 Nov 2025 20:31:28 +0100 Subject: [PATCH 164/284] refactor: Create new abstraction for dataset database mapping and handling --- cognee/context_global_variables.py | 1 + .../dataset_database_handler/__init__.py | 3 + .../dataset_database_handler_interface.py | 43 +++++++ .../supported_dataset_database_handlers.py | 15 +++ .../use_dataset_database_handler.py | 5 + .../infrastructure/databases/graph/config.py | 3 + .../databases/graph/get_graph_engine.py | 1 + .../databases/graph/graph_db_interface.py | 34 ----- .../graph/kuzu/KuzuDatasetDatabaseHandler.py | 57 +++++++++ .../Neo4jAuraDatasetDatabaseHandler.py | 118 ++++++++++++++++++ .../databases/graph/neo4j_driver/adapter.py | 89 ------------- .../utils/get_or_create_dataset_database.py | 58 ++------- .../infrastructure/databases/vector/config.py | 2 + .../databases/vector/create_vector_engine.py | 1 + .../vector/lancedb/LanceDBAdapter.py | 17 --- .../lancedb/LanceDBDatasetDatabaseHandler.py | 41 ++++++ 16 files changed, 300 insertions(+), 188 deletions(-) create mode 100644 cognee/infrastructure/databases/dataset_database_handler/__init__.py create mode 100644 cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py create mode 100644 cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py create mode 100644 cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py create mode 100644 cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py create mode 100644 cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDatasetDatabaseHandler.py create mode 100644 cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index 44ead95af..2b6ffa058 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -27,6 +27,7 @@ async def set_session_user_context_variable(user): def multi_user_support_possible(): graph_db_config = get_graph_context_config() vector_db_config = get_vectordb_context_config() + # TODO: Make sure dataset database handler and provider match, remove multi_user support check, add error if no dataset database handler exists for provider return ( graph_db_config["graph_database_provider"] in GRAPH_DBS_WITH_MULTI_USER_SUPPORT and vector_db_config["vector_db_provider"] in VECTOR_DBS_WITH_MULTI_USER_SUPPORT diff --git a/cognee/infrastructure/databases/dataset_database_handler/__init__.py b/cognee/infrastructure/databases/dataset_database_handler/__init__.py new file mode 100644 index 000000000..a74017113 --- /dev/null +++ b/cognee/infrastructure/databases/dataset_database_handler/__init__.py @@ -0,0 +1,3 @@ +from .dataset_database_handler_interface import DatasetDatabaseHandlerInterface +from .supported_dataset_database_handlers import supported_dataset_database_handlers +from .use_dataset_database_handler import use_dataset_database_handler diff --git a/cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py b/cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py new file mode 100644 index 000000000..6dadee6cf --- /dev/null +++ b/cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py @@ -0,0 +1,43 @@ +from typing import Optional +from uuid import UUID +from abc import ABC, abstractmethod + +from cognee.modules.users.models.User import User + + +class DatasetDatabaseHandlerInterface(ABC): + @classmethod + @abstractmethod + async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: + """ + Return a dictionary with connection info for a graph or vector database for the given dataset. + Function can auto handle deploying of the actual database if needed, but is not necessary. + Only providing connection info is sufficient, this info will be mapped when trying to connect to the provided dataset in the future. + Needed for Cognee multi-tenant/multi-user and backend access control support. + + Dictionary returned from this function will be used to create a DatasetDatabase row in the relational database. + From which internal mapping of dataset -> database connection info will be done. + + Each dataset needs to map to a unique graph or vector database when backend access control is enabled to facilitate a separation of concern for data. + + Args: + dataset_id: UUID of the dataset if needed by the database creation logic + user: User object if needed by the database creation logic + Returns: + dict: Connection info for the created graph or vector database instance. + """ + pass + + @classmethod + @abstractmethod + async def delete_dataset(cls, dataset_id: UUID, user: User) -> None: + """ + Delete the graph or vector database for the given dataset. + Function should auto handle deleting of the actual database or send a request to the proper service to delete/mark the database as not needed for the given dataset. + Needed for maintaining a database for Cognee multi-tenant/multi-user and backend access control. + + Args: + dataset_id: UUID of the dataset + user: User object + """ + pass diff --git a/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py b/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py new file mode 100644 index 000000000..9cc7d9f93 --- /dev/null +++ b/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py @@ -0,0 +1,15 @@ +from cognee.infrastructure.databases.graph.neo4j_driver.Neo4jAuraDatasetDatabaseHandler import ( + Neo4jAuraDatasetDatabaseHandler, +) +from cognee.infrastructure.databases.vector.lancedb.LanceDBDatasetDatabaseHandler import ( + LanceDBDatasetDatabaseHandler, +) +from cognee.infrastructure.databases.graph.kuzu.KuzuDatasetDatabaseHandler import ( + KuzuDatasetDatabaseHandler, +) + +supported_dataset_database_handlers = { + "neo4j_aura": Neo4jAuraDatasetDatabaseHandler, + "lancedb": LanceDBDatasetDatabaseHandler, + "kuzu": KuzuDatasetDatabaseHandler, +} diff --git a/cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py b/cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py new file mode 100644 index 000000000..a583de354 --- /dev/null +++ b/cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py @@ -0,0 +1,5 @@ +from .supported_dataset_database_handlers import supported_dataset_database_handlers + + +def use_dataset_database_handler(dataset_database_handler_name, dataset_database_handler): + supported_dataset_database_handlers[dataset_database_handler_name] = dataset_database_handler diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py index 23687b359..bcf97ebfa 100644 --- a/cognee/infrastructure/databases/graph/config.py +++ b/cognee/infrastructure/databases/graph/config.py @@ -47,6 +47,7 @@ class GraphConfig(BaseSettings): graph_filename: str = "" graph_model: object = KnowledgeGraph graph_topology: object = KnowledgeGraph + graph_dataset_database_handler: str = "kuzu" model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True) # Model validator updates graph_filename and path dynamically after class creation based on current database provider @@ -97,6 +98,7 @@ class GraphConfig(BaseSettings): "graph_model": self.graph_model, "graph_topology": self.graph_topology, "model_config": self.model_config, + "graph_dataset_database_handler": self.graph_dataset_database_handler, } def to_hashable_dict(self) -> dict: @@ -121,6 +123,7 @@ class GraphConfig(BaseSettings): "graph_database_port": self.graph_database_port, "graph_database_key": self.graph_database_key, "graph_file_path": self.graph_file_path, + "graph_dataset_database_handler": self.graph_dataset_database_handler, } diff --git a/cognee/infrastructure/databases/graph/get_graph_engine.py b/cognee/infrastructure/databases/graph/get_graph_engine.py index 82e3cad6e..c37af2102 100644 --- a/cognee/infrastructure/databases/graph/get_graph_engine.py +++ b/cognee/infrastructure/databases/graph/get_graph_engine.py @@ -34,6 +34,7 @@ def create_graph_engine( graph_database_password="", graph_database_port="", graph_database_key="", + graph_dataset_database_handler="", ): """ Create a graph engine based on the specified provider type. diff --git a/cognee/infrastructure/databases/graph/graph_db_interface.py b/cognee/infrastructure/databases/graph/graph_db_interface.py index 6d323764b..67df1a27c 100644 --- a/cognee/infrastructure/databases/graph/graph_db_interface.py +++ b/cognee/infrastructure/databases/graph/graph_db_interface.py @@ -6,7 +6,6 @@ from typing import Optional, Dict, Any, List, Tuple, Type, Union from uuid import NAMESPACE_OID, UUID, uuid5 from cognee.shared.logging_utils import get_logger from cognee.infrastructure.engine import DataPoint -from cognee.modules.users.models.User import User from cognee.modules.data.models.graph_relationship_ledger import GraphRelationshipLedger from cognee.infrastructure.databases.relational.get_relational_engine import get_relational_engine @@ -399,36 +398,3 @@ class GraphDBInterface(ABC): - node_id (Union[str, UUID]): Unique identifier of the node for which to retrieve connections. """ raise NotImplementedError - - @classmethod - async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: - """ - Return a dictionary with connection info for a graph database for the given dataset. - Function can auto handle deploying of the actual database if needed, but is not necessary. - Only providing connection info is sufficient, this info will be mapped when trying to connect to the provided dataset in the future. - Needed for Cognee multi-tenant/multi-user and backend access control support. - - Dictionary returned from this function will be used to create a DatasetDatabase row in the relational database. - From which internal mapping of dataset -> database connection info will be done. - - Each dataset needs to map to a unique graph database when backend access control is enabled to facilitate a separation of concern for data. - - Args: - dataset_id: UUID of the dataset if needed by the database creation logic - user: User object if needed by the database creation logic - Returns: - dict: Connection info for the created graph database instance. - """ - pass - - async def delete_dataset(self, dataset_id: UUID, user: User) -> None: - """ - Delete the graph database for the given dataset. - Function should auto handle deleting of the actual database or send a request to the proper service to delete the database. - Needed for maintaining a database for Cognee multi-tenant/multi-user and backend access control. - - Args: - dataset_id: UUID of the dataset - user: User object - """ - pass diff --git a/cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py b/cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py new file mode 100644 index 000000000..8859422f9 --- /dev/null +++ b/cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py @@ -0,0 +1,57 @@ +import os +import asyncio +import requests +from uuid import UUID +from typing import Optional + +from cognee.modules.users.models import User + +from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface + + +class KuzuDatasetDatabaseHandler(DatasetDatabaseHandlerInterface): + """ + Handler for interacting with Kuzu Dataset databases. + """ + + @classmethod + async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: + """ + Create a new Kuzu instance for the dataset. Return connection info that will be mapped to the dataset. + + Args: + dataset_id: Dataset UUID + user: User object who owns the dataset and is making the request + + Returns: + dict: Connection details for the created Kuzu instance + + """ + from cognee.infrastructure.databases.graph.config import get_graph_config + + graph_config = get_graph_config() + + if graph_config.graph_database_provider != "kuzu": + raise ValueError( + "KuzuDatasetDatabaseHandler can only be used with Kuzu graph database provider." + ) + + # TODO: Add graph file path info for kuzu (also in DatasetDatabase model) + graph_db_name = f"{dataset_id}.pkl" + graph_db_url = graph_config.graph_database_url + graph_db_key = graph_config.graph_database_key + graph_db_username = graph_config.graph_database_username + graph_db_password = graph_config.graph_database_password + + return { + "graph_database_name": graph_db_name, + "graph_database_url": graph_db_url, + "graph_database_provider": graph_config.graph_database_provider, + "graph_database_key": graph_db_key, + "graph_database_username": graph_db_username, + "graph_database_password": graph_db_password, + } + + @classmethod + async def delete_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]): + pass diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDatasetDatabaseHandler.py b/cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDatasetDatabaseHandler.py new file mode 100644 index 000000000..cc38abed0 --- /dev/null +++ b/cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDatasetDatabaseHandler.py @@ -0,0 +1,118 @@ +import os +import asyncio +import requests +from uuid import UUID +from typing import Optional + +from cognee.infrastructure.databases.graph import get_graph_config +from cognee.modules.users.models import User + +from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface + + +class Neo4jAuraDatasetDatabaseHandler(DatasetDatabaseHandlerInterface): + """ + Handler for interacting with Neo4j Aura Dataset databases. + """ + + @classmethod + async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: + """ + Create a new Neo4j Aura instance for the dataset. Return connection info that will be mapped to the dataset. + + Args: + dataset_id: Dataset UUID + user: User object who owns the dataset and is making the request + + Returns: + dict: Connection details for the created Neo4j instance + + """ + graph_config = get_graph_config() + + if graph_config.graph_database_provider != "neo4j": + raise ValueError( + "Neo4jAuraDatasetDatabaseHandler can only be used with Neo4j graph database provider." + ) + + graph_db_name = f"{dataset_id}" + + # Client credentials + client_id = os.environ.get("NEO4J_CLIENT_ID", None) + client_secret = os.environ.get("NEO4J_CLIENT_SECRET", None) + tenant_id = os.environ.get("NEO4J_TENANT_ID", None) + + if client_id is None or client_secret is None or tenant_id is None: + raise ValueError( + "NEO4J_CLIENT_ID, NEO4J_CLIENT_SECRET, and NEO4J_TENANT_ID environment variables must be set to use Neo4j Aura DatasetDatabase Handling." + ) + + # Make the request with HTTP Basic Auth + def get_aura_token(client_id: str, client_secret: str) -> dict: + url = "https://api.neo4j.io/oauth/token" + data = {"grant_type": "client_credentials"} # sent as application/x-www-form-urlencoded + + resp = requests.post(url, data=data, auth=(client_id, client_secret)) + resp.raise_for_status() # raises if the request failed + return resp.json() + + resp = get_aura_token(client_id, client_secret) + + url = "https://api.neo4j.io/v1/instances" + + headers = { + "accept": "application/json", + "Authorization": f"Bearer {resp['access_token']}", + "Content-Type": "application/json", + } + + # TODO: Maybe we can allow **kwargs parameter forwarding for cases like these + # Too allow different configurations between datasets + payload = { + "version": "5", + "region": "europe-west1", + "memory": "1GB", + "name": graph_db_name[ + 0:29 + ], # TODO: Find better name to name Neo4j instance within 30 character limit + "type": "professional-db", + "tenant_id": tenant_id, + "cloud_provider": "gcp", + } + + response = requests.post(url, headers=headers, json=payload) + + graph_db_name = "neo4j" # Has to be 'neo4j' for Aura + graph_db_url = response.json()["data"]["connection_url"] + graph_db_key = resp["access_token"] + graph_db_username = response.json()["data"]["username"] + graph_db_password = response.json()["data"]["password"] + + async def _wait_for_neo4j_instance_provisioning(instance_id: str, headers: dict): + # Poll until the instance is running + status_url = f"https://api.neo4j.io/v1/instances/{instance_id}" + status = "" + for attempt in range(30): # Try for up to ~5 minutes + status_resp = requests.get(status_url, headers=headers) + status = status_resp.json()["data"]["status"] + if status.lower() == "running": + return + await asyncio.sleep(10) + raise TimeoutError( + f"Neo4j instance '{graph_db_name}' did not become ready within 5 minutes. Status: {status}" + ) + + instance_id = response.json()["data"]["id"] + await _wait_for_neo4j_instance_provisioning(instance_id, headers) + return { + "graph_database_name": graph_db_name, + "graph_database_url": graph_db_url, + "graph_database_provider": "neo4j", + "graph_database_key": graph_db_key, # TODO: Hashing of keys/passwords in relational DB + "graph_database_username": graph_db_username, + "graph_database_password": graph_db_password, + } + + @classmethod + async def delete_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]): + pass diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py index 43e5ea654..6216e107e 100644 --- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py @@ -1,9 +1,7 @@ """Neo4j Adapter for Graph Database""" -import os import json import asyncio -import requests from uuid import UUID from textwrap import dedent from neo4j import AsyncSession @@ -14,7 +12,6 @@ from typing import Optional, Any, List, Dict, Type, Tuple from cognee.infrastructure.engine import DataPoint from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int -from cognee.modules.users.models import User from cognee.tasks.temporal_graph.models import Timestamp from cognee.shared.logging_utils import get_logger, ERROR from cognee.infrastructure.databases.graph.graph_db_interface import ( @@ -1473,89 +1470,3 @@ class Neo4jAdapter(GraphDBInterface): time_ids_list = [item["id"] for item in time_nodes if "id" in item] return ", ".join(f"'{uid}'" for uid in time_ids_list) - - @classmethod - async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: - """ - Create a new Neo4j Aura instance for the dataset. Return connection info that will be mapped to the dataset. - - Args: - dataset_id: Dataset UUID - user: User object who owns the dataset and is making the request - - Returns: - dict: Connection details for the created Neo4j instance - - """ - graph_db_name = f"{dataset_id}" - - # Client credentials - client_id = os.environ.get("NEO4J_CLIENT_ID", None) - client_secret = os.environ.get("NEO4J_CLIENT_SECRET", None) - tenant_id = os.environ.get("NEO4J_TENANT_ID", None) - - # Make the request with HTTP Basic Auth - def get_aura_token(client_id: str, client_secret: str) -> dict: - url = "https://api.neo4j.io/oauth/token" - data = {"grant_type": "client_credentials"} # sent as application/x-www-form-urlencoded - - resp = requests.post(url, data=data, auth=(client_id, client_secret)) - resp.raise_for_status() # raises if the request failed - return resp.json() - - resp = get_aura_token(client_id, client_secret) - - url = "https://api.neo4j.io/v1/instances" - - headers = { - "accept": "application/json", - "Authorization": f"Bearer {resp['access_token']}", - "Content-Type": "application/json", - } - - # TODO: Maybe we can allow **kwargs parameter forwarding for cases like these - # Too allow different configurations between datasets - payload = { - "version": "5", - "region": "europe-west1", - "memory": "1GB", - "name": graph_db_name[ - 0:29 - ], # TODO: Find better name to name Neo4j instance within 30 character limit - "type": "professional-db", - "tenant_id": tenant_id, - "cloud_provider": "gcp", - } - - response = requests.post(url, headers=headers, json=payload) - - graph_db_name = "neo4j" - graph_db_url = response.json()["data"]["connection_url"] - graph_db_key = resp["access_token"] - graph_db_username = response.json()["data"]["username"] - graph_db_password = response.json()["data"]["password"] - - async def _wait_for_neo4j_instance_provisioning(instance_id: str, headers: dict): - # Poll until the instance is running - status_url = f"https://api.neo4j.io/v1/instances/{instance_id}" - status = "" - for attempt in range(30): # Try for up to ~5 minutes - status_resp = requests.get(status_url, headers=headers) - status = status_resp.json()["data"]["status"] - if status.lower() == "running": - return - await asyncio.sleep(10) - raise TimeoutError( - f"Neo4j instance '{graph_db_name}' did not become ready within 5 minutes. Status: {status}" - ) - - instance_id = response.json()["data"]["id"] - await _wait_for_neo4j_instance_provisioning(instance_id, headers) - return { - "graph_database_name": graph_db_name, - "graph_database_url": graph_db_url, - "graph_database_provider": "neo4j", - "graph_database_key": graph_db_key, # TODO: Hashing of keys/passwords in relational DB - "graph_database_username": graph_db_username, - "graph_database_password": graph_db_password, - } diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index b60640d4c..f4bacca7e 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -20,61 +20,23 @@ from cognee.modules.users.models import User async def _get_vector_db_info(dataset_id: UUID, user: User) -> dict: vector_config = get_vectordb_config() - # Determine vector configuration - if vector_config.vector_db_provider == "lancedb": - # TODO: Have the create_database method be called from interface adapter automatically for all providers instead of specifically here - from cognee.infrastructure.databases.vector.lancedb.LanceDBAdapter import LanceDBAdapter + from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import ( + supported_dataset_database_handlers, + ) - return await LanceDBAdapter.create_dataset(dataset_id, user) - - else: - # Note: for hybrid databases both graph and vector DB name have to be the same - vector_db_name = vector_config.vector_db_name - vector_db_url = vector_config.vector_database_url - - return { - "vector_database_name": vector_db_name, - "vector_database_url": vector_db_url, - "vector_database_provider": vector_config.vector_db_provider, - "vector_database_key": vector_config.vector_db_key, - } + handler = supported_dataset_database_handlers[vector_config.vector_dataset_database_handler] + return await handler.create_dataset(dataset_id, user) async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict: graph_config = get_graph_config() - # Determine graph database URL - if graph_config.graph_database_provider == "neo4j": - from cognee.infrastructure.databases.graph.neo4j_driver.adapter import Neo4jAdapter - return await Neo4jAdapter.create_dataset(dataset_id, user) + from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import ( + supported_dataset_database_handlers, + ) - elif graph_config.graph_database_provider == "kuzu": - # TODO: Add graph file path info for kuzu (also in DatasetDatabase model) - graph_db_name = f"{dataset_id}.pkl" - graph_db_url = graph_config.graph_database_url - graph_db_key = graph_config.graph_database_key - graph_db_username = graph_config.graph_database_username - graph_db_password = graph_config.graph_database_password - elif graph_config.graph_database_provider == "falkor": - # Note: for hybrid databases both graph and vector DB name have to be the same - graph_db_name = f"{dataset_id}" - graph_db_url = graph_config.graph_database_url - graph_db_key = graph_config.graph_database_key - graph_db_username = graph_config.graph_database_username - graph_db_password = graph_config.graph_database_password - else: - raise EnvironmentError( - f"Unsupported graph database provider for backend access control: {graph_config.graph_database_provider}" - ) - - return { - "graph_database_name": graph_db_name, - "graph_database_url": graph_db_url, - "graph_database_provider": graph_config.graph_database_provider, - "graph_database_key": graph_db_key, # TODO: Hashing of keys/passwords in relational DB - "graph_database_username": graph_db_username, - "graph_database_password": graph_db_password, - } + handler = supported_dataset_database_handlers[graph_config.graph_dataset_database_handler] + return await handler.create_dataset(dataset_id, user) async def _existing_dataset_database( diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py index 7d28f1668..86b2a0fce 100644 --- a/cognee/infrastructure/databases/vector/config.py +++ b/cognee/infrastructure/databases/vector/config.py @@ -28,6 +28,7 @@ class VectorConfig(BaseSettings): vector_db_name: str = "" vector_db_key: str = "" vector_db_provider: str = "lancedb" + vector_dataset_database_handler: str = "lancedb" model_config = SettingsConfigDict(env_file=".env", extra="allow") @@ -63,6 +64,7 @@ class VectorConfig(BaseSettings): "vector_db_name": self.vector_db_name, "vector_db_key": self.vector_db_key, "vector_db_provider": self.vector_db_provider, + "vector_dataset_database_handler": self.vector_dataset_database_handler, } diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index b182f084b..02e01e288 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -12,6 +12,7 @@ def create_vector_engine( vector_db_name: str, vector_db_port: str = "", vector_db_key: str = "", + vector_dataset_database_handler: str = "", ): """ Create a vector database engine based on the specified provider. diff --git a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py index a93fbc818..b52f78517 100644 --- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py @@ -362,20 +362,3 @@ class LanceDBAdapter(VectorDBInterface): }, exclude_fields=["metadata"] + related_models_fields, ) - - @classmethod - async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: - vector_config = get_vectordb_config() - base_config = get_base_config() - databases_directory_path = os.path.join( - base_config.system_root_directory, "databases", str(user.id) - ) - - vector_db_name = f"{dataset_id}.lance.db" - - return { - "vector_database_name": vector_db_name, - "vector_database_url": os.path.join(databases_directory_path, vector_db_name), - "vector_database_provider": vector_config.vector_db_provider, - "vector_database_key": vector_config.vector_db_key, - } diff --git a/cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py b/cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py new file mode 100644 index 000000000..8a80dddcf --- /dev/null +++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py @@ -0,0 +1,41 @@ +import os +from uuid import UUID +from typing import Optional + +from cognee.modules.users.models import User +from cognee.base_config import get_base_config +from cognee.infrastructure.databases.vector import get_vectordb_config +from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface + + +class LanceDBDatasetDatabaseHandler(DatasetDatabaseHandlerInterface): + """ + Handler for interacting with LanceDB Dataset databases. + """ + + @classmethod + async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: + vector_config = get_vectordb_config() + base_config = get_base_config() + + if vector_config.vector_db_provider != "lancedb": + raise ValueError( + "LanceDBDatasetDatabaseHandler can only be used with LanceDB vector database provider." + ) + + databases_directory_path = os.path.join( + base_config.system_root_directory, "databases", str(user.id) + ) + + vector_db_name = f"{dataset_id}.lance.db" + + return { + "vector_database_name": vector_db_name, + "vector_database_url": os.path.join(databases_directory_path, vector_db_name), + "vector_database_provider": vector_config.vector_db_provider, + "vector_database_key": vector_config.vector_db_key, + } + + @classmethod + async def delete_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]): + pass From 02b17786588b7be4c582a2fdfe93a5412c074cda Mon Sep 17 00:00:00 2001 From: rajeevrajeshuni Date: Tue, 25 Nov 2025 12:22:15 +0530 Subject: [PATCH 165/284] Adding support for audio/image transcription for all other providers --- cognee/infrastructure/llm/LLMGateway.py | 13 -- .../llm/anthropic/adapter.py | 27 ++-- .../litellm_instructor/llm/gemini/adapter.py | 59 +++---- .../llm/generic_llm_api/adapter.py | 132 +++++++++++++++- .../litellm_instructor/llm/get_llm_client.py | 13 +- .../litellm_instructor/llm/llm_interface.py | 47 +++++- .../litellm_instructor/llm/mistral/adapter.py | 66 ++++++-- .../litellm_instructor/llm/ollama/adapter.py | 118 ++------------ .../litellm_instructor/llm/openai/adapter.py | 148 +++--------------- uv.lock | 2 +- 10 files changed, 313 insertions(+), 312 deletions(-) diff --git a/cognee/infrastructure/llm/LLMGateway.py b/cognee/infrastructure/llm/LLMGateway.py index ab5bb35d7..66a364110 100644 --- a/cognee/infrastructure/llm/LLMGateway.py +++ b/cognee/infrastructure/llm/LLMGateway.py @@ -34,19 +34,6 @@ class LLMGateway: text_input=text_input, system_prompt=system_prompt, response_model=response_model ) - @staticmethod - def create_structured_output( - text_input: str, system_prompt: str, response_model: Type[BaseModel] - ) -> BaseModel: - from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.get_llm_client import ( - get_llm_client, - ) - - llm_client = get_llm_client() - return llm_client.create_structured_output( - text_input=text_input, system_prompt=system_prompt, response_model=response_model - ) - @staticmethod def create_transcript(input) -> Coroutine: from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.get_llm_client import ( diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py index dbf0dfbea..818d3adb7 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py @@ -3,7 +3,9 @@ from typing import Type from pydantic import BaseModel import litellm import instructor +import anthropic from cognee.shared.logging_utils import get_logger +from cognee.modules.observability.get_observe import get_observe from tenacity import ( retry, stop_after_delay, @@ -12,27 +14,32 @@ from tenacity import ( before_sleep_log, ) -from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( - LLMInterface, +from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import ( + GenericAPIAdapter, ) from cognee.infrastructure.llm.config import get_llm_config logger = get_logger() +observe = get_observe() -class AnthropicAdapter(LLMInterface): +class AnthropicAdapter(GenericAPIAdapter): """ Adapter for interfacing with the Anthropic API, enabling structured output generation and prompt display. """ - name = "Anthropic" - model: str default_instructor_mode = "anthropic_tools" - def __init__(self, max_completion_tokens: int, model: str = None, instructor_mode: str = None): - import anthropic - + def __init__( + self, api_key: str, model: str, max_completion_tokens: int, instructor_mode: str = None + ): + super().__init__( + api_key=api_key, + model=model, + max_completion_tokens=max_completion_tokens, + name="Anthropic", + ) self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode self.aclient = instructor.patch( @@ -40,9 +47,7 @@ class AnthropicAdapter(LLMInterface): mode=instructor.Mode(self.instructor_mode), ) - self.model = model - self.max_completion_tokens = max_completion_tokens - + @observe(as_type="generation") @retry( stop=stop_after_delay(128), wait=wait_exponential_jitter(2, 128), diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py index 226f291d7..bae665052 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py @@ -1,4 +1,4 @@ -"""Adapter for Generic API LLM provider API""" +"""Adapter for Gemini API LLM provider""" import litellm import instructor @@ -8,12 +8,7 @@ from openai import ContentFilterFinishReasonError from litellm.exceptions import ContentPolicyViolationError from instructor.core import InstructorRetryException -from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError -from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( - LLMInterface, -) import logging -from cognee.shared.logging_utils import get_logger from tenacity import ( retry, stop_after_delay, @@ -22,55 +17,65 @@ from tenacity import ( before_sleep_log, ) +from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError +from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import ( + GenericAPIAdapter, +) +from cognee.shared.logging_utils import get_logger +from cognee.modules.observability.get_observe import get_observe + logger = get_logger() +observe = get_observe() -class GeminiAdapter(LLMInterface): +class GeminiAdapter(GenericAPIAdapter): """ Adapter for Gemini API LLM provider. This class initializes the API adapter with necessary credentials and configurations for interacting with the gemini LLM models. It provides methods for creating structured outputs - based on user input and system prompts. + based on user input and system prompts, as well as multimodal processing capabilities. Public methods: - - acreate_structured_output(text_input: str, system_prompt: str, response_model: - Type[BaseModel]) -> BaseModel + - acreate_structured_output(text_input: str, system_prompt: str, response_model: Type[BaseModel]) -> BaseModel + - create_transcript(input) -> BaseModel: Transcribe audio files to text + - transcribe_image(input) -> BaseModel: Inherited from GenericAPIAdapter """ - name: str - model: str - api_key: str default_instructor_mode = "json_mode" def __init__( self, - endpoint, api_key: str, model: str, - api_version: str, max_completion_tokens: int, + endpoint: str = None, + api_version: str = None, + transcription_model: str = None, instructor_mode: str = None, fallback_model: str = None, fallback_api_key: str = None, fallback_endpoint: str = None, ): - self.model = model - self.api_key = api_key - self.endpoint = endpoint - self.api_version = api_version - self.max_completion_tokens = max_completion_tokens - - self.fallback_model = fallback_model - self.fallback_api_key = fallback_api_key - self.fallback_endpoint = fallback_endpoint - + super().__init__( + api_key=api_key, + model=model, + max_completion_tokens=max_completion_tokens, + name="Gemini", + endpoint=endpoint, + api_version=api_version, + transcription_model=transcription_model, + fallback_model=fallback_model, + fallback_api_key=fallback_api_key, + fallback_endpoint=fallback_endpoint, + ) self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode self.aclient = instructor.from_litellm( litellm.acompletion, mode=instructor.Mode(self.instructor_mode) ) + @observe(as_type="generation") @retry( stop=stop_after_delay(128), wait=wait_exponential_jitter(2, 128), @@ -118,7 +123,7 @@ class GeminiAdapter(LLMInterface): }, ], api_key=self.api_key, - max_retries=5, + max_retries=self.MAX_RETRIES, api_base=self.endpoint, api_version=self.api_version, response_model=response_model, @@ -152,7 +157,7 @@ class GeminiAdapter(LLMInterface): "content": system_prompt, }, ], - max_retries=5, + max_retries=self.MAX_RETRIES, api_key=self.fallback_api_key, api_base=self.fallback_endpoint, response_model=response_model, diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py index 9d7f25fc5..9987711b9 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py @@ -1,8 +1,10 @@ """Adapter for Generic API LLM provider API""" +import base64 +import mimetypes import litellm import instructor -from typing import Type +from typing import Type, Optional from pydantic import BaseModel from openai import ContentFilterFinishReasonError from litellm.exceptions import ContentPolicyViolationError @@ -12,6 +14,8 @@ from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( LLMInterface, ) +from cognee.infrastructure.files.utils.open_data_file import open_data_file +from cognee.modules.observability.get_observe import get_observe import logging from cognee.shared.logging_utils import get_logger from tenacity import ( @@ -23,6 +27,7 @@ from tenacity import ( ) logger = get_logger() +observe = get_observe() class GenericAPIAdapter(LLMInterface): @@ -38,18 +43,19 @@ class GenericAPIAdapter(LLMInterface): Type[BaseModel]) -> BaseModel """ - name: str - model: str - api_key: str + MAX_RETRIES = 5 default_instructor_mode = "json_mode" def __init__( self, - endpoint, api_key: str, model: str, - name: str, max_completion_tokens: int, + name: str, + endpoint: str = None, + api_version: str = None, + transcription_model: str = None, + image_transcribe_model: str = None, instructor_mode: str = None, fallback_model: str = None, fallback_api_key: str = None, @@ -58,9 +64,11 @@ class GenericAPIAdapter(LLMInterface): self.name = name self.model = model self.api_key = api_key + self.api_version = api_version self.endpoint = endpoint self.max_completion_tokens = max_completion_tokens - + self.transcription_model = transcription_model or model + self.image_transcribe_model = image_transcribe_model or model self.fallback_model = fallback_model self.fallback_api_key = fallback_api_key self.fallback_endpoint = fallback_endpoint @@ -71,6 +79,7 @@ class GenericAPIAdapter(LLMInterface): litellm.acompletion, mode=instructor.Mode(self.instructor_mode) ) + @observe(as_type="generation") @retry( stop=stop_after_delay(128), wait=wait_exponential_jitter(2, 128), @@ -170,3 +179,112 @@ class GenericAPIAdapter(LLMInterface): raise ContentPolicyFilterError( f"The provided input contains content that is not aligned with our content policy: {text_input}" ) from error + + @observe(as_type="transcription") + @retry( + stop=stop_after_delay(128), + wait=wait_exponential_jitter(2, 128), + retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError), + before_sleep=before_sleep_log(logger, logging.DEBUG), + reraise=True, + ) + async def create_transcript(self, input) -> Optional[BaseModel]: + """ + Generate an audio transcript from a user query. + + This method creates a transcript from the specified audio file, raising a + FileNotFoundError if the file does not exist. The audio file is processed and the + transcription is retrieved from the API. + + Parameters: + ----------- + - input: The path to the audio file that needs to be transcribed. + + Returns: + -------- + The generated transcription of the audio file. + """ + async with open_data_file(input, mode="rb") as audio_file: + encoded_string = base64.b64encode(audio_file.read()).decode("utf-8") + mime_type, _ = mimetypes.guess_type(input) + if not mime_type or not mime_type.startswith("audio/"): + raise ValueError( + f"Could not determine MIME type for audio file: {input}. Is the extension correct?" + ) + return litellm.completion( + model=self.transcription_model, + messages=[ + { + "role": "user", + "content": [ + { + "type": "file", + "file": {"file_data": f"data:{mime_type};base64,{encoded_string}"}, + }, + {"type": "text", "text": "Transcribe the following audio precisely."}, + ], + } + ], + api_key=self.api_key, + api_version=self.api_version, + max_completion_tokens=self.max_completion_tokens, + api_base=self.endpoint, + max_retries=self.MAX_RETRIES, + ) + + @observe(as_type="transcribe_image") + @retry( + stop=stop_after_delay(128), + wait=wait_exponential_jitter(2, 128), + retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError), + before_sleep=before_sleep_log(logger, logging.DEBUG), + reraise=True, + ) + async def transcribe_image(self, input) -> Optional[BaseModel]: + """ + Generate a transcription of an image from a user query. + + This method encodes the image and sends a request to the API to obtain a + description of the contents of the image. + + Parameters: + ----------- + - input: The path to the image file that needs to be transcribed. + + Returns: + -------- + - BaseModel: A structured output generated by the model, returned as an instance of + BaseModel. + """ + async with open_data_file(input, mode="rb") as image_file: + encoded_image = base64.b64encode(image_file.read()).decode("utf-8") + mime_type, _ = mimetypes.guess_type(input) + if not mime_type or not mime_type.startswith("image/"): + raise ValueError( + f"Could not determine MIME type for image file: {input}. Is the extension correct?" + ) + return litellm.completion( + model=self.image_transcribe_model, + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What's in this image?", + }, + { + "type": "image_url", + "image_url": { + "url": f"data:{mime_type};base64,{encoded_image}", + }, + }, + ], + } + ], + api_key=self.api_key, + api_base=self.endpoint, + api_version=self.api_version, + max_completion_tokens=300, + max_retries=self.MAX_RETRIES, + ) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py index 39558f36d..de6cfaf19 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py @@ -97,11 +97,10 @@ def get_llm_client(raise_api_key_error: bool = True): ) return OllamaAPIAdapter( - llm_config.llm_endpoint, llm_config.llm_api_key, llm_config.llm_model, - "Ollama", - max_completion_tokens=max_completion_tokens, + max_completion_tokens, + llm_config.llm_endpoint, instructor_mode=llm_config.llm_instructor_mode.lower(), ) @@ -111,8 +110,9 @@ def get_llm_client(raise_api_key_error: bool = True): ) return AnthropicAdapter( - max_completion_tokens=max_completion_tokens, - model=llm_config.llm_model, + llm_config.llm_api_key, + llm_config.llm_model, + max_completion_tokens, instructor_mode=llm_config.llm_instructor_mode.lower(), ) @@ -125,11 +125,10 @@ def get_llm_client(raise_api_key_error: bool = True): ) return GenericAPIAdapter( - llm_config.llm_endpoint, llm_config.llm_api_key, llm_config.llm_model, + max_completion_tokens, "Custom", - max_completion_tokens=max_completion_tokens, instructor_mode=llm_config.llm_instructor_mode.lower(), fallback_api_key=llm_config.fallback_api_key, fallback_endpoint=llm_config.fallback_endpoint, diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py index b02105484..f8352737d 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py @@ -1,6 +1,6 @@ """LLM Interface""" -from typing import Type, Protocol +from typing import Type, Protocol, Optional from abc import abstractmethod from pydantic import BaseModel from cognee.infrastructure.llm.LLMGateway import LLMGateway @@ -8,13 +8,12 @@ from cognee.infrastructure.llm.LLMGateway import LLMGateway class LLMInterface(Protocol): """ - Define an interface for LLM models with methods for structured output and prompt - display. + Define an interface for LLM models with methods for structured output, multimodal processing, and prompt display. Methods: - - acreate_structured_output(text_input: str, system_prompt: str, response_model: - Type[BaseModel]) - - show_prompt(text_input: str, system_prompt: str) + - acreate_structured_output(text_input: str, system_prompt: str, response_model: Type[BaseModel]) + - create_transcript(input): Transcribe audio files to text + - transcribe_image(input): Analyze image files and return text description """ @abstractmethod @@ -36,3 +35,39 @@ class LLMInterface(Protocol): output. """ raise NotImplementedError + + @abstractmethod + async def create_transcript(self, input) -> Optional[BaseModel]: + """ + Transcribe audio content to text. + + This method should be implemented by subclasses that support audio transcription. + If not implemented, returns None and should be handled gracefully by callers. + + Parameters: + ----------- + - input: The path to the audio file that needs to be transcribed. + + Returns: + -------- + - BaseModel: A structured output containing the transcription, or None if not supported. + """ + raise NotImplementedError + + @abstractmethod + async def transcribe_image(self, input) -> Optional[BaseModel]: + """ + Analyze image content and return text description. + + This method should be implemented by subclasses that support image analysis. + If not implemented, returns None and should be handled gracefully by callers. + + Parameters: + ----------- + - input: The path to the image file that needs to be analyzed. + + Returns: + -------- + - BaseModel: A structured output containing the image description, or None if not supported. + """ + raise NotImplementedError diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py index 355cdae0b..0fa35923f 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py @@ -2,12 +2,12 @@ import litellm import instructor from pydantic import BaseModel from typing import Type -from litellm import JSONSchemaValidationError +from litellm import JSONSchemaValidationError, transcription from cognee.shared.logging_utils import get_logger from cognee.modules.observability.get_observe import get_observe -from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( - LLMInterface, +from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import ( + GenericAPIAdapter, ) from cognee.infrastructure.llm.config import get_llm_config @@ -19,12 +19,13 @@ from tenacity import ( retry_if_not_exception_type, before_sleep_log, ) +from mistralai import Mistral logger = get_logger() observe = get_observe() -class MistralAdapter(LLMInterface): +class MistralAdapter(GenericAPIAdapter): """ Adapter for Mistral AI API, for structured output generation and prompt display. @@ -33,10 +34,6 @@ class MistralAdapter(LLMInterface): - show_prompt """ - name = "Mistral" - model: str - api_key: str - max_completion_tokens: int default_instructor_mode = "mistral_tools" def __init__( @@ -45,12 +42,21 @@ class MistralAdapter(LLMInterface): model: str, max_completion_tokens: int, endpoint: str = None, + transcription_model: str = None, + image_transcribe_model: str = None, instructor_mode: str = None, ): from mistralai import Mistral - self.model = model - self.max_completion_tokens = max_completion_tokens + super().__init__( + api_key=api_key, + model=model, + max_completion_tokens=max_completion_tokens, + name="Mistral", + endpoint=endpoint, + transcription_model=transcription_model, + image_transcribe_model=image_transcribe_model, + ) self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode @@ -60,6 +66,7 @@ class MistralAdapter(LLMInterface): api_key=get_llm_config().llm_api_key, ) + @observe(as_type="generation") @retry( stop=stop_after_delay(128), wait=wait_exponential_jitter(2, 128), @@ -117,3 +124,42 @@ class MistralAdapter(LLMInterface): logger.error(f"Schema validation failed: {str(e)}") logger.debug(f"Raw response: {e.raw_response}") raise ValueError(f"Response failed schema validation: {str(e)}") + + @observe(as_type="transcription") + @retry( + stop=stop_after_delay(128), + wait=wait_exponential_jitter(2, 128), + retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError), + before_sleep=before_sleep_log(logger, logging.DEBUG), + reraise=True, + ) + async def create_transcript(self, input): + """ + Generate an audio transcript from a user query. + + This method creates a transcript from the specified audio file. + The audio file is processed and the transcription is retrieved from the API. + + Parameters: + ----------- + - input: The path to the audio file that needs to be transcribed. + + Returns: + -------- + The generated transcription of the audio file. + """ + transcription_model = self.transcription_model + if self.transcription_model.startswith("mistral"): + transcription_model = self.transcription_model.split("/")[-1] + file_name = input.split("/")[-1] + client = Mistral(api_key=self.api_key) + with open(input, "rb") as f: + transcription_response = client.audio.transcriptions.complete( + model=transcription_model, + file={ + "content": f, + "file_name": file_name, + }, + ) + # TODO: We need to standardize return type of create_transcript across different models. + return transcription_response diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py index aabd19867..163637a95 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py @@ -5,12 +5,12 @@ import instructor from typing import Type from openai import OpenAI from pydantic import BaseModel - -from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( - LLMInterface, -) from cognee.infrastructure.files.utils.open_data_file import open_data_file from cognee.shared.logging_utils import get_logger +from cognee.modules.observability.get_observe import get_observe +from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import ( + GenericAPIAdapter, +) from tenacity import ( retry, stop_after_delay, @@ -20,9 +20,10 @@ from tenacity import ( ) logger = get_logger() +observe = get_observe() -class OllamaAPIAdapter(LLMInterface): +class OllamaAPIAdapter(GenericAPIAdapter): """ Adapter for a Generic API LLM provider using instructor with an OpenAI backend. @@ -46,18 +47,20 @@ class OllamaAPIAdapter(LLMInterface): def __init__( self, - endpoint: str, api_key: str, model: str, name: str, max_completion_tokens: int, + endpoint: str, instructor_mode: str = None, ): - self.name = name - self.model = model - self.api_key = api_key - self.endpoint = endpoint - self.max_completion_tokens = max_completion_tokens + super().__init__( + api_key=api_key, + model=model, + max_completion_tokens=max_completion_tokens, + name="Ollama", + endpoint=endpoint, + ) self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode @@ -66,6 +69,7 @@ class OllamaAPIAdapter(LLMInterface): mode=instructor.Mode(self.instructor_mode), ) + @observe(as_type="generation") @retry( stop=stop_after_delay(128), wait=wait_exponential_jitter(2, 128), @@ -113,95 +117,3 @@ class OllamaAPIAdapter(LLMInterface): ) return response - - @retry( - stop=stop_after_delay(128), - wait=wait_exponential_jitter(2, 128), - retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError), - before_sleep=before_sleep_log(logger, logging.DEBUG), - reraise=True, - ) - async def create_transcript(self, input_file: str) -> str: - """ - Generate an audio transcript from a user query. - - This synchronous method takes an input audio file and returns its transcription. Raises - a FileNotFoundError if the input file does not exist, and raises a ValueError if - transcription fails or returns no text. - - Parameters: - ----------- - - - input_file (str): The path to the audio file to be transcribed. - - Returns: - -------- - - - str: The transcription of the audio as a string. - """ - - async with open_data_file(input_file, mode="rb") as audio_file: - transcription = self.aclient.audio.transcriptions.create( - model="whisper-1", # Ensure the correct model for transcription - file=audio_file, - language="en", - ) - - # Ensure the response contains a valid transcript - if not hasattr(transcription, "text"): - raise ValueError("Transcription failed. No text returned.") - - return transcription.text - - @retry( - stop=stop_after_delay(128), - wait=wait_exponential_jitter(2, 128), - retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError), - before_sleep=before_sleep_log(logger, logging.DEBUG), - reraise=True, - ) - async def transcribe_image(self, input_file: str) -> str: - """ - Transcribe content from an image using base64 encoding. - - This synchronous method takes an input image file, encodes it as base64, and returns the - transcription of its content. Raises a FileNotFoundError if the input file does not - exist, and raises a ValueError if the transcription fails or no valid response is - received. - - Parameters: - ----------- - - - input_file (str): The path to the image file to be transcribed. - - Returns: - -------- - - - str: The transcription of the image's content as a string. - """ - - async with open_data_file(input_file, mode="rb") as image_file: - encoded_image = base64.b64encode(image_file.read()).decode("utf-8") - - response = self.aclient.chat.completions.create( - model=self.model, - messages=[ - { - "role": "user", - "content": [ - {"type": "text", "text": "What's in this image?"}, - { - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}, - }, - ], - } - ], - max_completion_tokens=300, - ) - - # Ensure response is valid before accessing .choices[0].message.content - if not hasattr(response, "choices") or not response.choices: - raise ValueError("Image transcription failed. No response received.") - - return response.choices[0].message.content diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py index 778c8eec7..e9943c335 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py @@ -1,4 +1,3 @@ -import base64 import litellm import instructor from typing import Type @@ -16,8 +15,8 @@ from tenacity import ( before_sleep_log, ) -from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( - LLMInterface, +from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import ( + GenericAPIAdapter, ) from cognee.infrastructure.llm.exceptions import ( ContentPolicyFilterError, @@ -31,7 +30,7 @@ logger = get_logger() observe = get_observe() -class OpenAIAdapter(LLMInterface): +class OpenAIAdapter(GenericAPIAdapter): """ Adapter for OpenAI's GPT-3, GPT-4 API. @@ -52,12 +51,7 @@ class OpenAIAdapter(LLMInterface): - MAX_RETRIES """ - name = "OpenAI" - model: str - api_key: str - api_version: str default_instructor_mode = "json_schema_mode" - MAX_RETRIES = 5 """Adapter for OpenAI's GPT-3, GPT=4 API""" @@ -65,17 +59,29 @@ class OpenAIAdapter(LLMInterface): def __init__( self, api_key: str, - endpoint: str, - api_version: str, model: str, - transcription_model: str, max_completion_tokens: int, + endpoint: str = None, + api_version: str = None, + transcription_model: str = None, instructor_mode: str = None, streaming: bool = False, fallback_model: str = None, fallback_api_key: str = None, fallback_endpoint: str = None, ): + super().__init__( + api_key=api_key, + model=model, + max_completion_tokens=max_completion_tokens, + name="OpenAI", + endpoint=endpoint, + api_version=api_version, + transcription_model=transcription_model, + fallback_model=fallback_model, + fallback_api_key=fallback_api_key, + fallback_endpoint=fallback_endpoint, + ) self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode # TODO: With gpt5 series models OpenAI expects JSON_SCHEMA as a mode for structured outputs. # Make sure all new gpt models will work with this mode as well. @@ -90,18 +96,8 @@ class OpenAIAdapter(LLMInterface): self.aclient = instructor.from_litellm(litellm.acompletion) self.client = instructor.from_litellm(litellm.completion) - self.transcription_model = transcription_model - self.model = model - self.api_key = api_key - self.endpoint = endpoint - self.api_version = api_version - self.max_completion_tokens = max_completion_tokens self.streaming = streaming - self.fallback_model = fallback_model - self.fallback_api_key = fallback_api_key - self.fallback_endpoint = fallback_endpoint - @observe(as_type="generation") @retry( stop=stop_after_delay(128), @@ -174,7 +170,7 @@ class OpenAIAdapter(LLMInterface): }, ], api_key=self.fallback_api_key, - # api_base=self.fallback_endpoint, + api_base=self.fallback_endpoint, response_model=response_model, max_retries=self.MAX_RETRIES, ) @@ -193,57 +189,7 @@ class OpenAIAdapter(LLMInterface): f"The provided input contains content that is not aligned with our content policy: {text_input}" ) from error - @observe - @retry( - stop=stop_after_delay(128), - wait=wait_exponential_jitter(2, 128), - retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError), - before_sleep=before_sleep_log(logger, logging.DEBUG), - reraise=True, - ) - def create_structured_output( - self, text_input: str, system_prompt: str, response_model: Type[BaseModel] - ) -> BaseModel: - """ - Generate a response from a user query. - - This method creates structured output by sending a synchronous request to the OpenAI API - using the provided parameters to generate a completion based on the user input and - system prompt. - - Parameters: - ----------- - - - text_input (str): The input text provided by the user for generating a response. - - system_prompt (str): The system's prompt to guide the model's response. - - response_model (Type[BaseModel]): The expected model type for the response. - - Returns: - -------- - - - BaseModel: A structured output generated by the model, returned as an instance of - BaseModel. - """ - - return self.client.chat.completions.create( - model=self.model, - messages=[ - { - "role": "user", - "content": f"""{text_input}""", - }, - { - "role": "system", - "content": system_prompt, - }, - ], - api_key=self.api_key, - api_base=self.endpoint, - api_version=self.api_version, - response_model=response_model, - max_retries=self.MAX_RETRIES, - ) - + @observe(as_type="transcription") @retry( stop=stop_after_delay(128), wait=wait_exponential_jitter(2, 128), @@ -282,56 +228,4 @@ class OpenAIAdapter(LLMInterface): return transcription - @retry( - stop=stop_after_delay(128), - wait=wait_exponential_jitter(2, 128), - retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError), - before_sleep=before_sleep_log(logger, logging.DEBUG), - reraise=True, - ) - async def transcribe_image(self, input) -> BaseModel: - """ - Generate a transcription of an image from a user query. - - This method encodes the image and sends a request to the OpenAI API to obtain a - description of the contents of the image. - - Parameters: - ----------- - - - input: The path to the image file that needs to be transcribed. - - Returns: - -------- - - - BaseModel: A structured output generated by the model, returned as an instance of - BaseModel. - """ - async with open_data_file(input, mode="rb") as image_file: - encoded_image = base64.b64encode(image_file.read()).decode("utf-8") - - return litellm.completion( - model=self.model, - messages=[ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What's in this image?", - }, - { - "type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{encoded_image}", - }, - }, - ], - } - ], - api_key=self.api_key, - api_base=self.endpoint, - api_version=self.api_version, - max_completion_tokens=300, - max_retries=self.MAX_RETRIES, - ) + # transcribe image inherited from GenericAdapter diff --git a/uv.lock b/uv.lock index cc66c3d7e..d8fb3805b 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10, <3.14" resolution-markers = [ "python_full_version >= '3.13' and platform_python_implementation != 'PyPy' and sys_platform == 'darwin'", From 09fbf2276828043b8ed1458f50b3ab7efcaa04d2 Mon Sep 17 00:00:00 2001 From: rajeevrajeshuni Date: Tue, 25 Nov 2025 12:24:30 +0530 Subject: [PATCH 166/284] uv lock version revert --- uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index d8fb3805b..cc66c3d7e 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.10, <3.14" resolution-markers = [ "python_full_version >= '3.13' and platform_python_implementation != 'PyPy' and sys_platform == 'darwin'", From e0d48c043a1594c567135a1e73cd209c1d07eba1 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Tue, 25 Nov 2025 12:58:07 +0100 Subject: [PATCH 167/284] fix: fixes to adapter and tests --- .github/workflows/test_bedrock_api_key.yml | 28 ------ .../test_bedrock_aws_credentials.yml | 29 ------ .../workflows/test_bedrock_aws_profile.yml | 37 -------- .github/workflows/test_llms.yml | 88 +++++++++++++++++++ .github/workflows/test_suites.yml | 24 ----- .../llm/bedrock/__init__.py | 1 - .../litellm_instructor/llm/bedrock/adapter.py | 35 ++++---- .../litellm_instructor/llm/get_llm_client.py | 7 +- cognee/modules/settings/get_settings.py | 12 +-- 9 files changed, 117 insertions(+), 144 deletions(-) delete mode 100644 .github/workflows/test_bedrock_api_key.yml delete mode 100644 .github/workflows/test_bedrock_aws_credentials.yml delete mode 100644 .github/workflows/test_bedrock_aws_profile.yml diff --git a/.github/workflows/test_bedrock_api_key.yml b/.github/workflows/test_bedrock_api_key.yml deleted file mode 100644 index 3f5ea94b3..000000000 --- a/.github/workflows/test_bedrock_api_key.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: test | bedrock | api key - -on: - workflow_call: - -jobs: - test-bedrock-api-key: - name: Run Bedrock API Key Test - runs-on: ubuntu-22.04 - steps: - - name: Check out repository - uses: actions/checkout@v4 - - - name: Cognee Setup - uses: ./.github/actions/cognee_setup - with: - python-version: '3.11.x' - - - name: Run Bedrock API Key Test - env: - LLM_PROVIDER: "bedrock" - LLM_API_KEY: ${{ secrets.BEDROCK_API_KEY }} - LLM_MODEL: "us.anthropic.claude-3-5-sonnet-20241022-v2:0" - AWS_REGION_NAME: "us-east-1" - EMBEDDING_PROVIDER: "bedrock" - EMBEDDING_MODEL: "amazon.titan-embed-text-v1" - EMBEDDING_DIMENSIONS: "1536" - run: poetry run python ./examples/python/simple_example.py diff --git a/.github/workflows/test_bedrock_aws_credentials.yml b/.github/workflows/test_bedrock_aws_credentials.yml deleted file mode 100644 index c086dceb3..000000000 --- a/.github/workflows/test_bedrock_aws_credentials.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: test | bedrock | aws credentials - -on: - workflow_call: - -jobs: - test-bedrock-aws-credentials: - name: Run Bedrock AWS Credentials Test - runs-on: ubuntu-22.04 - steps: - - name: Check out repository - uses: actions/checkout@v4 - - - name: Cognee Setup - uses: ./.github/actions/cognee_setup - with: - python-version: '3.11.x' - - - name: Run Bedrock AWS Credentials Test - env: - LLM_PROVIDER: "bedrock" - LLM_MODEL: "us.anthropic.claude-3-5-sonnet-20240620-v1:0" - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_REGION_NAME: "us-east-1" - EMBEDDING_PROVIDER: "cohere" - EMBEDDING_MODEL: "cohere.embed-english-v3" - EMBEDDING_DIMENSIONS: "1024" - run: poetry run python ./examples/python/simple_example.py diff --git a/.github/workflows/test_bedrock_aws_profile.yml b/.github/workflows/test_bedrock_aws_profile.yml deleted file mode 100644 index aa15074e1..000000000 --- a/.github/workflows/test_bedrock_aws_profile.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: test | bedrock | aws profile - -on: - workflow_call: - -jobs: - test-bedrock-aws-profile: - name: Run Bedrock AWS Profile Test - runs-on: ubuntu-22.04 - steps: - - name: Check out repository - uses: actions/checkout@v4 - - - name: Cognee Setup - uses: ./.github/actions/cognee_setup - with: - python-version: '3.11.x' - - - name: Configure AWS Profile - run: | - mkdir -p ~/.aws - cat > ~/.aws/credentials << EOF - [bedrock-test] - aws_access_key_id = ${{ secrets.AWS_ACCESS_KEY_ID }} - aws_secret_access_key = ${{ secrets.AWS_SECRET_ACCESS_KEY }} - EOF - - - name: Run Bedrock AWS Profile Test - env: - LLM_PROVIDER: "bedrock" - LLM_MODEL: "us.anthropic.claude-3-5-haiku-20241022-v1:0" - AWS_PROFILE_NAME: "bedrock-test" - AWS_REGION_NAME: "us-east-1" - EMBEDDING_PROVIDER: "bedrock" - EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0" - EMBEDDING_DIMENSIONS: "1024" - run: poetry run python ./examples/python/simple_example.py diff --git a/.github/workflows/test_llms.yml b/.github/workflows/test_llms.yml index 6b0221309..0cbbc7b3a 100644 --- a/.github/workflows/test_llms.yml +++ b/.github/workflows/test_llms.yml @@ -84,3 +84,91 @@ jobs: EMBEDDING_DIMENSIONS: "3072" EMBEDDING_MAX_TOKENS: "8191" run: uv run python ./examples/python/simple_example.py + + test-bedrock-api-key: + name: Run Bedrock API Key Test + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + extra-dependencies: "aws" + + - name: Run Bedrock API Key Test + env: + LLM_PROVIDER: "bedrock" + LLM_API_KEY: ${{ secrets.BEDROCK_API_KEY }} + LLM_MODEL: "eu.amazon.nova-lite-v1:0" + LLM_MAX_TOKENS: "16384" + AWS_REGION_NAME: "eu-west-1" + EMBEDDING_PROVIDER: "bedrock" + EMBEDDING_API_KEY: ${{ secrets.BEDROCK_API_KEY }} + EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0" + EMBEDDING_DIMENSIONS: "1024" + EMBEDDING_MAX_TOKENS: "8191" + run: poetry run python ./examples/python/simple_example.py + + test-bedrock-aws-credentials: + name: Run Bedrock AWS Credentials Test + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + extra-dependencies: "aws" + + - name: Run Bedrock API Key Test + env: + LLM_PROVIDER: "bedrock" + LLM_MODEL: "eu.amazon.nova-lite-v1:0" + LLM_MAX_TOKENS: "16384" + AWS_REGION_NAME: "eu-west-1" + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + EMBEDDING_PROVIDER: "bedrock" + EMBEDDING_API_KEY: ${{ secrets.BEDROCK_API_KEY }} + EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0" + EMBEDDING_DIMENSIONS: "1024" + EMBEDDING_MAX_TOKENS: "8191" + run: poetry run python ./examples/python/simple_example.py + + test-bedrock-aws-profile: + name: Run Bedrock AWS Profile Test + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Configure AWS Profile + run: | + mkdir -p ~/.aws + cat > ~/.aws/credentials << EOF + [bedrock-test] + aws_access_key_id = ${{ secrets.AWS_ACCESS_KEY_ID }} + aws_secret_access_key = ${{ secrets.AWS_SECRET_ACCESS_KEY }} + EOF + + - name: Run Bedrock AWS Profile Test + env: + LLM_PROVIDER: "bedrock" + LLM_MODEL: "eu.amazon.nova-lite-v1:0" + AWS_PROFILE_NAME: "bedrock-test" + AWS_REGION_NAME: "eu-west-1" + EMBEDDING_PROVIDER: "bedrock" + EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0" + EMBEDDING_DIMENSIONS: "1024" + EMBEDDING_MAX_TOKENS: "8191" + run: poetry run python ./examples/python/simple_example.py \ No newline at end of file diff --git a/.github/workflows/test_suites.yml b/.github/workflows/test_suites.yml index 9f2767faf..be1e354fc 100644 --- a/.github/workflows/test_suites.yml +++ b/.github/workflows/test_suites.yml @@ -139,24 +139,6 @@ jobs: uses: ./.github/workflows/test_llms.yml secrets: inherit - bedrock-tests: - name: Bedrock Tests - needs: [basic-tests, e2e-tests] - uses: ./.github/workflows/test_bedrock_api_key.yml - secrets: inherit - - bedrock-aws-credentials-tests: - name: Bedrock AWS Credentials Tests - needs: [basic-tests, e2e-tests] - uses: ./.github/workflows/test_bedrock_aws_credentials.yml - secrets: inherit - - bedrock-aws-profile-tests: - name: Bedrock AWS Profile Tests - needs: [basic-tests, e2e-tests] - uses: ./.github/workflows/test_bedrock_aws_profile.yml - secrets: inherit - # Ollama tests moved to the end ollama-tests: name: Ollama Tests @@ -193,9 +175,6 @@ jobs: db-examples-tests, mcp-test, llm-tests, - bedrock-tests, - bedrock-aws-credentials-tests, - bedrock-aws-profile-tests, ollama-tests, relational-db-migration-tests, docker-compose-test, @@ -218,9 +197,6 @@ jobs: "${{ needs.db-examples-tests.result }}" == "success" && "${{ needs.relational-db-migration-tests.result }}" == "success" && "${{ needs.llm-tests.result }}" == "success" && - "${{ needs.bedrock-tests.result }}" == "success" && - "${{ needs.bedrock-aws-credentials-tests.result }}" == "success" && - "${{ needs.bedrock-aws-profile-tests.result }}" == "success" && "${{ needs.docker-compose-test.result }}" == "success" && "${{ needs.docker-ci-test.result }}" == "success" && "${{ needs.ollama-tests.result }}" == "success" ]]; then diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py index 6fb964a82..ad7cdf994 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py @@ -3,4 +3,3 @@ from .adapter import BedrockAdapter __all__ = ["BedrockAdapter"] - diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py index 66f484164..c461a0886 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py @@ -1,18 +1,19 @@ import litellm import instructor -from typing import Type, Optional +from typing import Type from pydantic import BaseModel from litellm.exceptions import ContentPolicyViolationError from instructor.exceptions import InstructorRetryException -from cognee.exceptions import InvalidValueError from cognee.infrastructure.llm.LLMGateway import LLMGateway from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import ( LLMInterface, ) -from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError +from cognee.infrastructure.llm.exceptions import ( + ContentPolicyFilterError, + MissingSystemPromptPathError, +) from cognee.infrastructure.files.storage.s3_config import get_s3_config -from cognee.infrastructure.files.utils.open_data_file import open_data_file from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import ( rate_limit_async, rate_limit_sync, @@ -35,6 +36,7 @@ class BedrockAdapter(LLMInterface): name = "Bedrock" model: str api_key: str + default_instructor_mode = "json_schema_mode" MAX_RETRIES = 5 @@ -42,23 +44,23 @@ class BedrockAdapter(LLMInterface): self, model: str, api_key: str = None, - max_tokens: int = 16384, + max_completion_tokens: int = 16384, streaming: bool = False, + instructor_mode: str = None, ): - self.aclient = instructor.from_litellm(litellm.acompletion) + self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode + + self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode(self.instructor_mode)) self.client = instructor.from_litellm(litellm.completion) self.model = model self.api_key = api_key - self.max_tokens = max_tokens + self.max_completion_tokens = max_completion_tokens self.streaming = streaming def _create_bedrock_request( self, text_input: str, system_prompt: str, response_model: Type[BaseModel] ) -> dict: - """Create Bedrock request with authentication and enhanced JSON formatting.""" - enhanced_system_prompt = f"""{system_prompt} - -IMPORTANT: You must respond with valid JSON only. Do not include any text before or after the JSON. The response must be a valid JSON object that can be parsed directly.""" + """Create Bedrock request with authentication.""" request_params = { "model": self.model, @@ -66,11 +68,11 @@ IMPORTANT: You must respond with valid JSON only. Do not include any text before "drop_params": True, "messages": [ {"role": "user", "content": text_input}, - {"role": "system", "content": enhanced_system_prompt}, + {"role": "system", "content": system_prompt}, ], "response_model": response_model, "max_retries": self.MAX_RETRIES, - "max_tokens": self.max_tokens, + "max_completion_tokens": self.max_completion_tokens, "stream": self.streaming, } @@ -87,9 +89,10 @@ IMPORTANT: You must respond with valid JSON only. Do not include any text before elif s3_config.aws_profile_name: request_params["aws_profile_name"] = s3_config.aws_profile_name + if s3_config.aws_region: + request_params["aws_region_name"] = s3_config.aws_region + # Add optional parameters - if s3_config.aws_region_name: - request_params["aws_region_name"] = s3_config.aws_region_name if s3_config.aws_bedrock_runtime_endpoint: request_params["aws_bedrock_runtime_endpoint"] = s3_config.aws_bedrock_runtime_endpoint @@ -137,7 +140,7 @@ IMPORTANT: You must respond with valid JSON only. Do not include any text before if not text_input: text_input = "No user input provided." if not system_prompt: - raise InvalidValueError(message="No system prompt path provided.") + raise MissingSystemPromptPathError() system_prompt = LLMGateway.read_query_prompt(system_prompt) formatted_prompt = ( diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py index 086fd84de..954d85c1d 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py @@ -172,8 +172,8 @@ def get_llm_client(raise_api_key_error: bool = True): ) elif provider == LLMProvider.BEDROCK: - if llm_config.llm_api_key is None and raise_api_key_error: - raise LLMAPIKeyNotSetError() + # if llm_config.llm_api_key is None and raise_api_key_error: + # raise LLMAPIKeyNotSetError() from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.bedrock.adapter import ( BedrockAdapter, @@ -182,8 +182,9 @@ def get_llm_client(raise_api_key_error: bool = True): return BedrockAdapter( model=llm_config.llm_model, api_key=llm_config.llm_api_key, - max_tokens=max_completion_tokens, + max_completion_tokens=max_completion_tokens, streaming=llm_config.llm_streaming, + instructor_mode=llm_config.llm_instructor_mode.lower(), ) else: diff --git a/cognee/modules/settings/get_settings.py b/cognee/modules/settings/get_settings.py index 7e58e981f..37093bb35 100644 --- a/cognee/modules/settings/get_settings.py +++ b/cognee/modules/settings/get_settings.py @@ -164,16 +164,16 @@ def get_settings() -> SettingsDict: ], "bedrock": [ { - "value": "us.anthropic.claude-3-5-sonnet-20241022-v2:0", - "label": "Claude 3.5 Sonnet", + "value": "eu.anthropic.claude-haiku-4-5-20251001-v1:0", + "label": "Claude 4.5 Sonnet", }, { - "value": "us.anthropic.claude-3-5-haiku-20241022-v1:0", - "label": "Claude 3.5 Haiku", + "value": "eu.anthropic.claude-haiku-4-5-20251001-v1:0", + "label": "Claude 4.5 Haiku", }, { - "value": "us.anthropic.claude-3-5-sonnet-20240620-v1:0", - "label": "Claude 3.5 Sonnet (June)", + "value": "eu.amazon.nova-lite-v1:0", + "label": "Amazon Nova Lite", }, ], }, From 4c6bed885e04d1a97367493de276cc781bbfe8f4 Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Tue, 25 Nov 2025 13:02:26 +0100 Subject: [PATCH 168/284] chore: ruff format --- .../litellm_instructor/llm/bedrock/adapter.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py index c461a0886..1faec2d0b 100644 --- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py @@ -50,7 +50,9 @@ class BedrockAdapter(LLMInterface): ): self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode - self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode(self.instructor_mode)) + self.aclient = instructor.from_litellm( + litellm.acompletion, mode=instructor.Mode(self.instructor_mode) + ) self.client = instructor.from_litellm(litellm.completion) self.model = model self.api_key = api_key From 9e652a3a935fecf7d973e9478bff2237df1cf09d Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Tue, 25 Nov 2025 13:34:18 +0100 Subject: [PATCH 169/284] fix: use uv instead of poetry in CI tests --- .github/workflows/test_llms.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_llms.yml b/.github/workflows/test_llms.yml index 0cbbc7b3a..cc21dc97b 100644 --- a/.github/workflows/test_llms.yml +++ b/.github/workflows/test_llms.yml @@ -110,7 +110,7 @@ jobs: EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0" EMBEDDING_DIMENSIONS: "1024" EMBEDDING_MAX_TOKENS: "8191" - run: poetry run python ./examples/python/simple_example.py + run: uv run python ./examples/python/simple_example.py test-bedrock-aws-credentials: name: Run Bedrock AWS Credentials Test @@ -138,7 +138,7 @@ jobs: EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0" EMBEDDING_DIMENSIONS: "1024" EMBEDDING_MAX_TOKENS: "8191" - run: poetry run python ./examples/python/simple_example.py + run: uv run python ./examples/python/simple_example.py test-bedrock-aws-profile: name: Run Bedrock AWS Profile Test @@ -171,4 +171,4 @@ jobs: EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0" EMBEDDING_DIMENSIONS: "1024" EMBEDDING_MAX_TOKENS: "8191" - run: poetry run python ./examples/python/simple_example.py \ No newline at end of file + run: uv run python ./examples/python/simple_example.py \ No newline at end of file From 593f17fcdcbc7f064c5bf25371bb371fc629eeed Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 25 Nov 2025 15:41:01 +0100 Subject: [PATCH 170/284] refactor: Add better handling of configuration for dataset to database handler --- .env.template | 4 ++ cognee/context_global_variables.py | 65 ++++++++++++++----- .../supported_dataset_database_handlers.py | 9 ++- .../use_dataset_database_handler.py | 9 ++- .../utils/get_or_create_dataset_database.py | 8 +-- cognee/shared/logging_utils.py | 4 ++ 6 files changed, 71 insertions(+), 28 deletions(-) diff --git a/.env.template b/.env.template index ae2cb1338..d178965e8 100644 --- a/.env.template +++ b/.env.template @@ -93,6 +93,8 @@ DB_NAME=cognee_db # Default (local file-based) GRAPH_DATABASE_PROVIDER="kuzu" +# Handler for multi-user access control mode, it handles how should the mapping/creation of separate DBs be handled per Cognee dataset +GRAPH_DATASET_DATABASE_HANDLER="kuzu" # -- To switch to Remote Kuzu uncomment and fill these: ------------------------------------------------------------- #GRAPH_DATABASE_PROVIDER="kuzu" @@ -117,6 +119,8 @@ VECTOR_DB_PROVIDER="lancedb" # Not needed if a cloud vector database is not used VECTOR_DB_URL= VECTOR_DB_KEY= +# Handler for multi-user access control mode, it handles how should the mapping/creation of separate DBs be handled per Cognee dataset +VECTOR_DATASET_DATABASE_HANDLER="lancedb" ################################################################################ # 🧩 Ontology resolver settings diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index 2b6ffa058..0e7e16178 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -4,8 +4,8 @@ from typing import Union from uuid import UUID from cognee.base_config import get_base_config -from cognee.infrastructure.databases.vector.config import get_vectordb_context_config -from cognee.infrastructure.databases.graph.config import get_graph_context_config +from cognee.infrastructure.databases.vector.config import get_vectordb_config +from cognee.infrastructure.databases.graph.config import get_graph_config from cognee.infrastructure.databases.utils import get_or_create_dataset_database from cognee.infrastructure.files.storage.config import file_storage_config from cognee.modules.users.methods import get_user @@ -16,23 +16,59 @@ vector_db_config = ContextVar("vector_db_config", default=None) graph_db_config = ContextVar("graph_db_config", default=None) session_user = ContextVar("session_user", default=None) -VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"] -GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor", "neo4j"] - async def set_session_user_context_variable(user): session_user.set(user) def multi_user_support_possible(): - graph_db_config = get_graph_context_config() - vector_db_config = get_vectordb_context_config() - # TODO: Make sure dataset database handler and provider match, remove multi_user support check, add error if no dataset database handler exists for provider - return ( - graph_db_config["graph_database_provider"] in GRAPH_DBS_WITH_MULTI_USER_SUPPORT - and vector_db_config["vector_db_provider"] in VECTOR_DBS_WITH_MULTI_USER_SUPPORT + graph_db_config = get_graph_config() + vector_db_config = get_vectordb_config() + + graph_handler = graph_db_config.graph_dataset_database_handler + vector_handler = vector_db_config.vector_dataset_database_handler + from cognee.infrastructure.databases.dataset_database_handler import ( + supported_dataset_database_handlers, ) + if graph_handler not in supported_dataset_database_handlers: + raise EnvironmentError( + "Unsupported graph dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n" + f"Selected graph dataset to database handler: {graph_handler}\n" + f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n" + ) + + if vector_handler not in supported_dataset_database_handlers: + raise EnvironmentError( + "Unsupported vector dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n" + f"Selected vector dataset to database handler: {vector_handler}\n" + f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n" + ) + + if ( + supported_dataset_database_handlers[graph_handler]["handler_provider"] + != graph_db_config.graph_database_provider + ): + raise EnvironmentError( + "The selected graph dataset to database handler does not work with the configured graph database provider. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n" + f"Selected graph database provider: {graph_db_config.graph_database_provider}\n" + f"Selected graph dataset to database handler: {graph_handler}\n" + f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n" + ) + + if ( + supported_dataset_database_handlers[vector_handler]["handler_provider"] + != vector_db_config.vector_db_provider + ): + raise EnvironmentError( + "The selected vector dataset to database handler does not work with the configured vector database provider. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n" + f"Selected vector database provider: {vector_db_config.vector_db_provider}\n" + f"Selected vector dataset to database handler: {vector_handler}\n" + f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n" + ) + + return True + def backend_access_control_enabled(): backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None) @@ -42,12 +78,7 @@ def backend_access_control_enabled(): return multi_user_support_possible() elif backend_access_control.lower() == "true": # If enabled, ensure that the current graph and vector DBs can support it - multi_user_support = multi_user_support_possible() - if not multi_user_support: - raise EnvironmentError( - "ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control." - ) - return True + return multi_user_support_possible() return False diff --git a/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py b/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py index 9cc7d9f93..adaa45e33 100644 --- a/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +++ b/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py @@ -9,7 +9,10 @@ from cognee.infrastructure.databases.graph.kuzu.KuzuDatasetDatabaseHandler impor ) supported_dataset_database_handlers = { - "neo4j_aura": Neo4jAuraDatasetDatabaseHandler, - "lancedb": LanceDBDatasetDatabaseHandler, - "kuzu": KuzuDatasetDatabaseHandler, + "neo4j_aura": { + "handler_instance": Neo4jAuraDatasetDatabaseHandler, + "handler_provider": "neo4j", + }, + "lancedb": {"handler_instance": LanceDBDatasetDatabaseHandler, "handler_provider": "lancedb"}, + "kuzu": {"handler_instance": KuzuDatasetDatabaseHandler, "handler_provider": "kuzu"}, } diff --git a/cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py b/cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py index a583de354..bca2128ee 100644 --- a/cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +++ b/cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py @@ -1,5 +1,10 @@ from .supported_dataset_database_handlers import supported_dataset_database_handlers -def use_dataset_database_handler(dataset_database_handler_name, dataset_database_handler): - supported_dataset_database_handlers[dataset_database_handler_name] = dataset_database_handler +def use_dataset_database_handler( + dataset_database_handler_name, dataset_database_handler, dataset_database_provider +): + supported_dataset_database_handlers[dataset_database_handler_name] = { + "handler_instance": dataset_database_handler, + "handler_provider": dataset_database_provider, + } diff --git a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py index f4bacca7e..665355e30 100644 --- a/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +++ b/cognee/infrastructure/databases/utils/get_or_create_dataset_database.py @@ -1,13 +1,9 @@ -import os -import asyncio -import requests from uuid import UUID from typing import Union, Optional from sqlalchemy import select from sqlalchemy.exc import IntegrityError -from cognee.base_config import get_base_config from cognee.modules.data.methods import create_dataset from cognee.infrastructure.databases.relational import get_relational_engine from cognee.infrastructure.databases.vector import get_vectordb_config @@ -25,7 +21,7 @@ async def _get_vector_db_info(dataset_id: UUID, user: User) -> dict: ) handler = supported_dataset_database_handlers[vector_config.vector_dataset_database_handler] - return await handler.create_dataset(dataset_id, user) + return await handler["handler_instance"].create_dataset(dataset_id, user) async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict: @@ -36,7 +32,7 @@ async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict: ) handler = supported_dataset_database_handlers[graph_config.graph_dataset_database_handler] - return await handler.create_dataset(dataset_id, user) + return await handler["handler_instance"].create_dataset(dataset_id, user) async def _existing_dataset_database( diff --git a/cognee/shared/logging_utils.py b/cognee/shared/logging_utils.py index e8efde72c..70a0bd37e 100644 --- a/cognee/shared/logging_utils.py +++ b/cognee/shared/logging_utils.py @@ -534,6 +534,10 @@ def setup_logging(log_level=None, name=None): # Get a configured logger and log system information logger = structlog.get_logger(name if name else __name__) + logger.warning( + "From version 0.5.0 onwards, Cognee will run with multi-user access control mode set to on by default. Data isolation between different users and datasets will be enforced and data created before multi-user access control mode was turned on won't be accessible by default. To disable multi-user access control mode and regain access to old data set the environment variable ENABLE_BACKEND_ACCESS_CONTROL to false before starting Cognee. For more information, please refer to the Cognee documentation." + ) + if logs_dir is not None: logger.info(f"Log file created at: {log_file_path}", log_file=log_file_path) From 2e02aafbaed6caa38c86b343b6fa4a1cef51683a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 25 Nov 2025 15:55:36 +0100 Subject: [PATCH 171/284] refactor: Remove unused imports --- .../databases/vector/lancedb/LanceDBAdapter.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py index b52f78517..30631ac4c 100644 --- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py @@ -1,15 +1,10 @@ import asyncio from os import path -import os -from uuid import UUID import lancedb from pydantic import BaseModel from lancedb.pydantic import LanceModel, Vector from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints -from cognee.base_config import get_base_config -from cognee.infrastructure.databases.vector import get_vectordb_config -from cognee.modules.users.models import User from cognee.infrastructure.databases.exceptions import MissingQueryParameterError from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine.utils import parse_id From 5f3b7764068ea7c31013b42d8133f2375930d4d0 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 25 Nov 2025 16:38:34 +0100 Subject: [PATCH 172/284] chore: add todo for enhancing db connections --- cognee/modules/users/models/DatasetDatabase.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cognee/modules/users/models/DatasetDatabase.py b/cognee/modules/users/models/DatasetDatabase.py index 4bbfffe4c..75e650bcd 100644 --- a/cognee/modules/users/models/DatasetDatabase.py +++ b/cognee/modules/users/models/DatasetDatabase.py @@ -24,6 +24,9 @@ class DatasetDatabase(Base): vector_database_key = Column(String, unique=False, nullable=True) graph_database_key = Column(String, unique=False, nullable=True) + # TODO: Instead of specifying and forwawrding all these individual fields, consider using a JSON field to store + # configuration details for different database types. This would make it more flexible to add new database types + # without changing the database schema. graph_database_username = Column(String, unique=False, nullable=True) graph_database_password = Column(String, unique=False, nullable=True) From e46c0c4f6c4bdd2acf1ff64bce9c147602038fba Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Tue, 25 Nov 2025 17:37:19 +0100 Subject: [PATCH 173/284] CodeRabbit config --- .coderabbit.yaml | 154 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 .coderabbit.yaml diff --git a/.coderabbit.yaml b/.coderabbit.yaml new file mode 100644 index 000000000..7240d9edf --- /dev/null +++ b/.coderabbit.yaml @@ -0,0 +1,154 @@ +# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json +# .coderabbit.yaml +language: en +early_access: false +enable_free_tier: true +reviews: + enabled: true + profile: chill + instructions: >- + # Code Review Instructions + + - Ensure the code follows best practices and coding standards. + - For **Python** code, follow + [PEP 20](https://www.python.org/dev/peps/pep-0020/) and + [CEP-8](https://gist.github.com/reactive-firewall/b7ee98df9e636a51806e62ef9c4ab161) + standards. + + # Documentation Review Instructions + - Verify that documentation and comments are clear and comprehensive. + - Verify that documentation and comments are free of spelling mistakes. + + # Test Code Review Instructions + - Ensure that test code is automated, comprehensive, and follows testing best practices. + - Verify that all critical functionality is covered by tests. + - Ensure that test code follow + [CEP-8](https://gist.github.com/reactive-firewall/d840ee9990e65f302ce2a8d78ebe73f6) + + # Misc. + - Confirm that the code meets the project's requirements and objectives. + - Confirm that copyright years are up-to date whenever a file is changed. + request_changes_workflow: true + high_level_summary: true + high_level_summary_placeholder: '@coderabbitai summary' + auto_title_placeholder: '@coderabbitai' + review_status: true + poem: false + collapse_walkthrough: false + sequence_diagrams: false + changed_files_summary: true + path_filters: ['!*.xc*/**', '!node_modules/**', '!dist/**', '!build/**', '!.git/**', '!venv/**', '!__pycache__/**'] + path_instructions: + - path: README.md + instructions: >- + 1. Consider the file 'README.md' the overview/introduction of the project. + Also consider the 'README.md' file the first place to look for project documentation. + + 2. When reviewing the file 'README.md' it should be linted with help + from the tools `markdownlint` and `languagetool`, pointing out any issues. + + 3. You may assume the file 'README.md' will contain GitHub flavor Markdown. + - path: '**/*.py' + instructions: >- + When reviewing Python code for this project: + + 1. Prioritize portability over clarity, especially when dealing with cross-Python compatibility. However, with the priority in mind, do still consider improvements to clarity when relevant. + + 2. As a general guideline, consider the code style advocated in the PEP 8 standard (excluding the use of spaces for indentation) and evaluate suggested changes for code style compliance. + + 3. As a style convention, consider the code style advocated in [CEP-8](https://gist.github.com/reactive-firewall/b7ee98df9e636a51806e62ef9c4ab161) and evaluate suggested changes for code style compliance. + + 4. As a general guideline, try to provide any relevant, official, and supporting documentation links to any tool's suggestions in review comments. This guideline is important for posterity. + + 5. As a general rule, undocumented function definitions and class definitions in the project's Python code are assumed incomplete. Please consider suggesting a short summary of the code for any of these incomplete definitions as docstrings when reviewing. + - path: cognee/tests/* + instructions: >- + When reviewing test code: + + 1. Prioritize portability over clarity, especially when dealing with cross-Python compatibility. However, with the priority in mind, do still consider improvements to clarity when relevant. + + 2. As a general guideline, consider the code style advocated in the PEP 8 standard (excluding the use of spaces for indentation) and evaluate suggested changes for code style compliance. + + 3. As a style convention, consider the code style advocated in [CEP-8](https://gist.github.com/reactive-firewall/b7ee98df9e636a51806e62ef9c4ab161) and evaluate suggested changes for code style compliance, pointing out any violations discovered. + + 4. As a general guideline, try to provide any relevant, official, and supporting documentation links to any tool's suggestions in review comments. This guideline is important for posterity. + + 5. As a project rule, Python source files with names prefixed by the string "test_" and located in the project's "tests" directory are the project's unit-testing code. It is safe, albeit a heuristic, to assume these are considered part of the project's minimal acceptance testing unless a justifying exception to this assumption is documented. + + 6. As a project rule, any files without extensions and with names prefixed by either the string "check_" or the string "test_", and located in the project's "tests" directory, are the project's non-unit test code. "Non-unit test" in this context refers to any type of testing other than unit testing, such as (but not limited to) functional testing, style linting, regression testing, etc. It can also be assumed that non-unit testing code is usually written as Bash shell scripts. + - path: requirements.txt + instructions: >- + * The project's own Python dependencies are recorded in 'requirements.txt' for production code. + + * The project's testing-specific Python dependencies are recorded in 'tests/requirements.txt' and are used for testing the project. + + * The project's documentation-specific Python dependencies are recorded in 'docs/requirements.txt' and are used only for generating Python-focused documentation for the project. 'docs/requirements.txt' may be absent if not applicable. + + Consider these 'requirements.txt' files the records of truth regarding project dependencies. + - path: .github/** + instructions: >- + * When the project is hosted on GitHub: All GitHub-specific configurations, templates, and tools should be found in the '.github' directory tree. + + * 'actionlint' erroneously generates false positives when dealing with GitHub's `${{ ... }}` syntax in conditionals. + + * 'actionlint' erroneously generates incorrect solutions when suggesting the removal of valid `${{ ... }}` syntax. + abort_on_close: true + auto_review: + enabled: true + auto_incremental_review: true + ignore_title_keywords: [] + labels: [] + drafts: false + base_branches: + - dev + - main + tools: + languagetool: + enabled: true + language: en-US + configuration: + level: picky + mother_tongue: en + dictionary: + - 'reactive-firewall' + - 'CEP-9' + - 'CEP-8' + - 'CEP-7' + - 'CEP-5' + - 'Shellscript' + - 'bash' + disabled_rules: + - EN_QUOTES + - CONSECUTIVE_SPACES + enabled_rules: + - STYLE + - EN_CONTRACTION_SPELLING + - EN_WORD_COHERENCY + - IT_IS_OBVIOUS + - TWELFTH_OF_NEVER + - OXFORD_SPELLING + - PASSIVE_VOICE + shellcheck: + enabled: true + ruff: + enabled: true + configuration: + extend_select: + - E # Pycodestyle errors (style issues) + - F # PyFlakes codes (logical errors) + - W # Pycodestyle warnings + - N # PEP 8 naming conventions + ignore: + - W191 + - W391 + - E117 + - D208 + line_length: 100 + dummy_variable_rgx: '^(_.*|junk|extra)$' # Variables starting with '_' or named 'junk' or 'extras', are considered dummy variables + markdownlint: + enabled: true + yamllint: + enabled: true + configuration_file: ".yamllint.conf" +chat: + auto_reply: true \ No newline at end of file From ff20f021cc998966bdec1bc2b2c7a3cc4bd0b85a Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Tue, 25 Nov 2025 17:49:56 +0100 Subject: [PATCH 174/284] fix comments --- .coderabbit.yaml | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/.coderabbit.yaml b/.coderabbit.yaml index 7240d9edf..ffd7f1563 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -103,31 +103,6 @@ reviews: - dev - main tools: - languagetool: - enabled: true - language: en-US - configuration: - level: picky - mother_tongue: en - dictionary: - - 'reactive-firewall' - - 'CEP-9' - - 'CEP-8' - - 'CEP-7' - - 'CEP-5' - - 'Shellscript' - - 'bash' - disabled_rules: - - EN_QUOTES - - CONSECUTIVE_SPACES - enabled_rules: - - STYLE - - EN_CONTRACTION_SPELLING - - EN_WORD_COHERENCY - - IT_IS_OBVIOUS - - TWELFTH_OF_NEVER - - OXFORD_SPELLING - - PASSIVE_VOICE shellcheck: enabled: true ruff: @@ -148,7 +123,6 @@ reviews: markdownlint: enabled: true yamllint: - enabled: true - configuration_file: ".yamllint.conf" + enabled: true chat: auto_reply: true \ No newline at end of file From 0f8cec64d51580c29665ebe94089e32fc9639f3c Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Tue, 25 Nov 2025 17:51:32 +0100 Subject: [PATCH 175/284] fix comments --- .coderabbit.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.coderabbit.yaml b/.coderabbit.yaml index ffd7f1563..6373cd2f3 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -123,6 +123,6 @@ reviews: markdownlint: enabled: true yamllint: - enabled: true + enabled: true chat: auto_reply: true \ No newline at end of file From 69777ef0a5d80b3a2a10d91d59a9e4f051d019ca Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 25 Nov 2025 17:53:21 +0100 Subject: [PATCH 176/284] feat: Add ability to handle custom connection resolution to avoid storing security critical data in rel dbx --- cognee/context_global_variables.py | 11 ++++- .../dataset_database_handler_interface.py | 40 +++++++++++++++++- .../graph/kuzu/KuzuDatasetDatabaseHandler.py | 6 ++- .../Neo4jAuraDatasetDatabaseHandler.py | 8 ++-- .../databases/utils/__init__.py | 1 + ...esolve_dataset_database_connection_info.py | 42 +++++++++++++++++++ .../modules/users/models/DatasetDatabase.py | 6 +-- 7 files changed, 103 insertions(+), 11 deletions(-) create mode 100644 cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py diff --git a/cognee/context_global_variables.py b/cognee/context_global_variables.py index 0e7e16178..58fff2dff 100644 --- a/cognee/context_global_variables.py +++ b/cognee/context_global_variables.py @@ -7,6 +7,7 @@ from cognee.base_config import get_base_config from cognee.infrastructure.databases.vector.config import get_vectordb_config from cognee.infrastructure.databases.graph.config import get_graph_config from cognee.infrastructure.databases.utils import get_or_create_dataset_database +from cognee.infrastructure.databases.utils import resolve_dataset_database_connection_info from cognee.infrastructure.files.storage.config import file_storage_config from cognee.modules.users.methods import get_user @@ -108,6 +109,8 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ # To ensure permissions are enforced properly all datasets will have their own databases dataset_database = await get_or_create_dataset_database(dataset, user) + # Ensure that all connection info is resolved properly + dataset_database = await resolve_dataset_database_connection_info(dataset_database) base_config = get_base_config() data_root_directory = os.path.join( @@ -133,8 +136,12 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_ "graph_file_path": os.path.join( databases_directory_path, dataset_database.graph_database_name ), - "graph_database_username": dataset_database.graph_database_username, - "graph_database_password": dataset_database.graph_database_password, + "graph_database_username": dataset_database.graph_database_connection_info.get( + "graph_database_username", "" + ), + "graph_database_password": dataset_database.graph_database_connection_info.get( + "graph_database_password", "" + ), } storage_config = { diff --git a/cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py b/cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py index 6dadee6cf..01ee46c48 100644 --- a/cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +++ b/cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py @@ -3,6 +3,7 @@ from uuid import UUID from abc import ABC, abstractmethod from cognee.modules.users.models.User import User +from cognee.modules.users.models.DatasetDatabase import DatasetDatabase class DatasetDatabaseHandlerInterface(ABC): @@ -10,7 +11,7 @@ class DatasetDatabaseHandlerInterface(ABC): @abstractmethod async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: """ - Return a dictionary with connection info for a graph or vector database for the given dataset. + Return a dictionary with database connection/resolution info for a graph or vector database for the given dataset. Function can auto handle deploying of the actual database if needed, but is not necessary. Only providing connection info is sufficient, this info will be mapped when trying to connect to the provided dataset in the future. Needed for Cognee multi-tenant/multi-user and backend access control support. @@ -18,6 +19,10 @@ class DatasetDatabaseHandlerInterface(ABC): Dictionary returned from this function will be used to create a DatasetDatabase row in the relational database. From which internal mapping of dataset -> database connection info will be done. + The returned dictionary is stored verbatim in the relational database and is later passed to + resolve_dataset_connection_info() at connection time. For safe credential handling, prefer + returning only references to secrets or role identifiers, not plaintext credentials. + Each dataset needs to map to a unique graph or vector database when backend access control is enabled to facilitate a separation of concern for data. Args: @@ -28,6 +33,39 @@ class DatasetDatabaseHandlerInterface(ABC): """ pass + @classmethod + async def resolve_dataset_connection_info( + cls, dataset_database: DatasetDatabase + ) -> DatasetDatabase: + """ + Resolve runtime connection details for a dataset’s backing graph/vector database. + Function is intended to be overwritten to implement custom logic for resolving connection info. + + This method is invoked right before the application opens a connection for a given dataset. + It receives the DatasetDatabase row that was persisted when create_dataset() ran and must + return a modified instance of DatasetDatabase with concrete connection parameters that the client/driver can use. + Do not update these new DatasetDatabase values in the relational database to avoid storing secure credentials. + + In case of separate graph and vector database handlers, each handler should implement its own logic for resolving + connection info and only change parameters related to its appropriate database, the resolution function will then + be called one after another with the updated DatasetDatabase value from the previous function as the input. + + Typical behavior: + - If the DatasetDatabase row already contains raw connection fields (e.g., host/port/db/user/password + or api_url/api_key), return them as-is. + - If the row stores only references (e.g., secret IDs, vault paths, cloud resource ARNs/IDs, IAM + roles, SSO tokens), resolve those references by calling the appropriate secret manager or provider + API to obtain short-lived credentials and assemble the final connection DatasetDatabase object. + - Do not persist any resolved or decrypted secrets back to the relational database. Return them only + to the caller. + + Args: + dataset_database: DatasetDatabase row from the relational database + Returns: + DatasetDatabase: Updated instance with resolved connection info + """ + return dataset_database + @classmethod @abstractmethod async def delete_dataset(cls, dataset_id: UUID, user: User) -> None: diff --git a/cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py b/cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py index 8859422f9..a2b2da8f4 100644 --- a/cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +++ b/cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py @@ -48,8 +48,10 @@ class KuzuDatasetDatabaseHandler(DatasetDatabaseHandlerInterface): "graph_database_url": graph_db_url, "graph_database_provider": graph_config.graph_database_provider, "graph_database_key": graph_db_key, - "graph_database_username": graph_db_username, - "graph_database_password": graph_db_password, + "graph_database_connection_info": { + "graph_database_username": graph_db_username, + "graph_database_password": graph_db_password, + }, } @classmethod diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDatasetDatabaseHandler.py b/cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDatasetDatabaseHandler.py index cc38abed0..d1e5eee6f 100644 --- a/cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDatasetDatabaseHandler.py +++ b/cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDatasetDatabaseHandler.py @@ -108,9 +108,11 @@ class Neo4jAuraDatasetDatabaseHandler(DatasetDatabaseHandlerInterface): "graph_database_name": graph_db_name, "graph_database_url": graph_db_url, "graph_database_provider": "neo4j", - "graph_database_key": graph_db_key, # TODO: Hashing of keys/passwords in relational DB - "graph_database_username": graph_db_username, - "graph_database_password": graph_db_password, + "graph_database_key": graph_db_key, + "graph_database_connection_info": { # TODO: Hashing of keys/passwords in relational DB + "graph_database_username": graph_db_username, + "graph_database_password": graph_db_password, + }, } @classmethod diff --git a/cognee/infrastructure/databases/utils/__init__.py b/cognee/infrastructure/databases/utils/__init__.py index 1dfa15640..f31d1e0dc 100644 --- a/cognee/infrastructure/databases/utils/__init__.py +++ b/cognee/infrastructure/databases/utils/__init__.py @@ -1 +1,2 @@ from .get_or_create_dataset_database import get_or_create_dataset_database +from .resolve_dataset_database_connection_info import resolve_dataset_database_connection_info diff --git a/cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py b/cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py new file mode 100644 index 000000000..4d8c19403 --- /dev/null +++ b/cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py @@ -0,0 +1,42 @@ +from cognee.infrastructure.databases.vector import get_vectordb_config +from cognee.infrastructure.databases.graph.config import get_graph_config +from cognee.modules.users.models.DatasetDatabase import DatasetDatabase + + +async def _get_vector_db_connection_info(dataset_database: DatasetDatabase) -> DatasetDatabase: + vector_config = get_vectordb_config() + + from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import ( + supported_dataset_database_handlers, + ) + + handler = supported_dataset_database_handlers[vector_config.vector_dataset_database_handler] + return await handler["handler_instance"].resolve_dataset_connection_info(dataset_database) + + +async def _get_graph_db_connection_info(dataset_database: DatasetDatabase) -> DatasetDatabase: + graph_config = get_graph_config() + + from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import ( + supported_dataset_database_handlers, + ) + + handler = supported_dataset_database_handlers[graph_config.graph_dataset_database_handler] + return await handler["handler_instance"].resolve_dataset_connection_info(dataset_database) + + +async def resolve_dataset_database_connection_info( + dataset_database: DatasetDatabase, +) -> DatasetDatabase: + """ + Resolve the connection info for the given DatasetDatabase instance. + Resolve both vector and graph database connection info and return the updated DatasetDatabase instance. + + Args: + dataset_database: DatasetDatabase instance + Returns: + DatasetDatabase instance with resolved connection info + """ + dataset_database = await _get_vector_db_connection_info(dataset_database) + dataset_database = await _get_graph_db_connection_info(dataset_database) + return dataset_database diff --git a/cognee/modules/users/models/DatasetDatabase.py b/cognee/modules/users/models/DatasetDatabase.py index 75e650bcd..b864fb951 100644 --- a/cognee/modules/users/models/DatasetDatabase.py +++ b/cognee/modules/users/models/DatasetDatabase.py @@ -1,6 +1,6 @@ from datetime import datetime, timezone -from sqlalchemy import Column, DateTime, String, UUID, ForeignKey +from sqlalchemy import Column, DateTime, String, UUID, ForeignKey, JSON from cognee.infrastructure.databases.relational import Base @@ -27,8 +27,8 @@ class DatasetDatabase(Base): # TODO: Instead of specifying and forwawrding all these individual fields, consider using a JSON field to store # configuration details for different database types. This would make it more flexible to add new database types # without changing the database schema. - graph_database_username = Column(String, unique=False, nullable=True) - graph_database_password = Column(String, unique=False, nullable=True) + graph_database_connection_info = Column(JSON, unique=False, nullable=True) + vector_database_connection_info = Column(JSON, unique=False, nullable=True) created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) From cf9edf2663e87cfa9a77972015f1f39beb4a462f Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 25 Nov 2025 18:03:35 +0100 Subject: [PATCH 177/284] chore: Add migration for new dataset database model field --- ...d2b2_expand_dataset_database_with_json_.py | 66 +++++++++++++++++++ .../modules/users/models/DatasetDatabase.py | 4 +- 2 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py diff --git a/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py b/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py new file mode 100644 index 000000000..becd29226 --- /dev/null +++ b/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py @@ -0,0 +1,66 @@ +"""Expand dataset database with json connection field + +Revision ID: 46a6ce2bd2b2 +Revises: 76625596c5c3 +Create Date: 2025-11-25 17:56:28.938931 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = "46a6ce2bd2b2" +down_revision: Union[str, None] = "76625596c5c3" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _get_column(inspector, table, name, schema=None): + for col in inspector.get_columns(table, schema=schema): + if col["name"] == name: + return col + return None + + +def upgrade() -> None: + conn = op.get_bind() + insp = sa.inspect(conn) + + vector_database_connection_info_column = _get_column( + insp, "dataset_database", "vector_database_connection_info" + ) + if not vector_database_connection_info_column: + op.add_column( + "dataset_database", + sa.Column( + "vector_database_connection_info", + sa.JSON(), + unique=False, + nullable=False, + default={}, + ), + ) + + graph_database_connection_info_column = _get_column( + insp, "dataset_database", "graph_database_connection_info" + ) + if not graph_database_connection_info_column: + op.add_column( + "dataset_database", + sa.Column( + "graph_database_connection_info", + sa.JSON(), + unique=False, + nullable=False, + default={}, + ), + ) + + +def downgrade() -> None: + op.drop_column("dataset_database", "vector_database_connection_info") + op.drop_column("dataset_database", "graph_database_connection_info") diff --git a/cognee/modules/users/models/DatasetDatabase.py b/cognee/modules/users/models/DatasetDatabase.py index b864fb951..fee323d2f 100644 --- a/cognee/modules/users/models/DatasetDatabase.py +++ b/cognee/modules/users/models/DatasetDatabase.py @@ -27,8 +27,8 @@ class DatasetDatabase(Base): # TODO: Instead of specifying and forwawrding all these individual fields, consider using a JSON field to store # configuration details for different database types. This would make it more flexible to add new database types # without changing the database schema. - graph_database_connection_info = Column(JSON, unique=False, nullable=True) - vector_database_connection_info = Column(JSON, unique=False, nullable=True) + graph_database_connection_info = Column(JSON, unique=False, nullable=False, default={}) + vector_database_connection_info = Column(JSON, unique=False, nullable=False, default={}) created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) From 39c6eba571474c8cb9e64a4d12de5cef20400080 Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Tue, 25 Nov 2025 18:09:43 +0100 Subject: [PATCH 178/284] coderabbit fix --- .coderabbit.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.coderabbit.yaml b/.coderabbit.yaml index 6373cd2f3..0cad35c84 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -3,8 +3,7 @@ language: en early_access: false enable_free_tier: true -reviews: - enabled: true +reviews: profile: chill instructions: >- # Code Review Instructions @@ -28,7 +27,7 @@ reviews: # Misc. - Confirm that the code meets the project's requirements and objectives. - Confirm that copyright years are up-to date whenever a file is changed. - request_changes_workflow: true + request_changes_workflow: false high_level_summary: true high_level_summary_placeholder: '@coderabbitai summary' auto_title_placeholder: '@coderabbitai' From 7c5a17ecb5b54a2ac591e254c4a64f74113d7e5a Mon Sep 17 00:00:00 2001 From: Andrej Milicevic Date: Wed, 26 Nov 2025 11:02:36 +0100 Subject: [PATCH 179/284] test: add extra dependency to bedrock ci test --- .github/workflows/test_llms.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test_llms.yml b/.github/workflows/test_llms.yml index cc21dc97b..bb9d1792a 100644 --- a/.github/workflows/test_llms.yml +++ b/.github/workflows/test_llms.yml @@ -151,6 +151,7 @@ jobs: uses: ./.github/actions/cognee_setup with: python-version: '3.11.x' + extra-dependencies: "aws" - name: Configure AWS Profile run: | From 2f06c3a97eff930ce89363c497d5f101082423ee Mon Sep 17 00:00:00 2001 From: Boris Arzentar Date: Wed, 26 Nov 2025 12:24:14 +0100 Subject: [PATCH 180/284] fix: install nvm and node for -ui cli command --- cognee-frontend/package-lock.json | 2475 +++++++++++------ cognee-frontend/package.json | 14 +- .../src/app/(graph)/CrewAITrigger.tsx | 119 - .../src/app/(graph)/GraphControls.tsx | 9 +- cognee-frontend/src/app/(graph)/GraphView.tsx | 19 +- .../src/app/(graph)/GraphVisualization.tsx | 11 +- cognee-frontend/src/app/dashboard/page.tsx | 4 +- cognee-frontend/src/app/page.tsx | 2 +- .../src/modules/ingestion/useDatasets.ts | 1 - .../src/{middleware.ts => proxy.ts} | 2 +- .../src/ui/Partials/FeedbackForm.tsx | 69 - cognee-frontend/src/ui/Partials/index.ts | 1 - .../src/ui/elements/Notebook/Notebook.tsx | 10 +- .../elements/Notebook/NotebookCellHeader.tsx | 2 +- cognee-frontend/tsconfig.json | 5 +- cognee/api/v1/ui/node_setup.py | 305 ++ cognee/api/v1/ui/npm_utils.py | 45 + cognee/api/v1/ui/ui.py | 106 +- 18 files changed, 2081 insertions(+), 1118 deletions(-) delete mode 100644 cognee-frontend/src/app/(graph)/CrewAITrigger.tsx rename cognee-frontend/src/{middleware.ts => proxy.ts} (93%) delete mode 100644 cognee-frontend/src/ui/Partials/FeedbackForm.tsx create mode 100644 cognee/api/v1/ui/node_setup.py create mode 100644 cognee/api/v1/ui/npm_utils.py diff --git a/cognee-frontend/package-lock.json b/cognee-frontend/package-lock.json index eb5560bd7..29826027a 100644 --- a/cognee-frontend/package-lock.json +++ b/cognee-frontend/package-lock.json @@ -8,13 +8,13 @@ "name": "cognee-frontend", "version": "1.0.0", "dependencies": { - "@auth0/nextjs-auth0": "^4.6.0", + "@auth0/nextjs-auth0": "^4.13.1", "classnames": "^2.5.1", "culori": "^4.0.1", "d3-force-3d": "^3.0.6", - "next": "15.3.3", - "react": "^19.0.0", - "react-dom": "^19.0.0", + "next": "16.0.4", + "react": "^19.2.0", + "react-dom": "^19.2.0", "react-force-graph-2d": "^1.27.1", "uuid": "^9.0.1" }, @@ -23,11 +23,11 @@ "@tailwindcss/postcss": "^4.1.7", "@types/culori": "^4.0.0", "@types/node": "^20", - "@types/react": "^18", - "@types/react-dom": "^18", + "@types/react": "^19", + "@types/react-dom": "^19", "@types/uuid": "^9.0.8", "eslint": "^9", - "eslint-config-next": "^15.3.3", + "eslint-config-next": "^16.0.4", "eslint-config-prettier": "^10.1.5", "tailwindcss": "^4.1.7", "typescript": "^5" @@ -38,6 +38,7 @@ "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz", "integrity": "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==", "dev": true, + "license": "MIT", "engines": { "node": ">=10" }, @@ -45,79 +46,314 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/@ampproject/remapping": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz", - "integrity": "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==", - "dev": true, + "node_modules/@auth0/nextjs-auth0": { + "version": "4.13.1", + "resolved": "https://registry.npmjs.org/@auth0/nextjs-auth0/-/nextjs-auth0-4.13.1.tgz", + "integrity": "sha512-RGZKddUa/kNdudhxYsJ0xSPtx10qr3qQ6i1aq2PY5x+JJ7f1Ifkb6QwuwREzxJlWh80YQ0S5Ef26JWHCPN2PkQ==", + "license": "MIT", "dependencies": { - "@jridgewell/gen-mapping": "^0.3.5", - "@jridgewell/trace-mapping": "^0.3.24" + "@edge-runtime/cookies": "^5.0.1", + "@panva/hkdf": "^1.2.1", + "jose": "^6.0.11", + "oauth4webapi": "^3.8.2", + "openid-client": "^6.8.0", + "swr": "^2.2.5" + }, + "peerDependencies": { + "next": "^14.2.25 || ^15.2.3 || ^16.0.0", + "react": "^18.0.0 || ^19.0.0 || ^19.0.0-0", + "react-dom": "^18.0.0 || ^19.0.0 || ^19.0.0-0" + } + }, + "node_modules/@babel/code-frame": { + "version": "7.27.1", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz", + "integrity": "sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-validator-identifier": "^7.27.1", + "js-tokens": "^4.0.0", + "picocolors": "^1.1.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/compat-data": { + "version": "7.28.5", + "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.28.5.tgz", + "integrity": "sha512-6uFXyCayocRbqhZOB+6XcuZbkMNimwfVGFji8CTZnCzOHVGvDqzvitu1re2AU5LROliz7eQPhB8CpAMvnx9EjA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/core": { + "version": "7.28.5", + "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.28.5.tgz", + "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", + "dev": true, + "license": "MIT", + "peer": true, + "dependencies": { + "@babel/code-frame": "^7.27.1", + "@babel/generator": "^7.28.5", + "@babel/helper-compilation-targets": "^7.27.2", + "@babel/helper-module-transforms": "^7.28.3", + "@babel/helpers": "^7.28.4", + "@babel/parser": "^7.28.5", + "@babel/template": "^7.27.2", + "@babel/traverse": "^7.28.5", + "@babel/types": "^7.28.5", + "@jridgewell/remapping": "^2.3.5", + "convert-source-map": "^2.0.0", + "debug": "^4.1.0", + "gensync": "^1.0.0-beta.2", + "json5": "^2.2.3", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/babel" + } + }, + "node_modules/@babel/generator": { + "version": "7.28.5", + "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.28.5.tgz", + "integrity": "sha512-3EwLFhZ38J4VyIP6WNtt2kUdW9dokXA9Cr4IVIFHuCpZ3H8/YFOl5JjZHisrn1fATPBmKKqXzDFvh9fUwHz6CQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.28.5", + "@babel/types": "^7.28.5", + "@jridgewell/gen-mapping": "^0.3.12", + "@jridgewell/trace-mapping": "^0.3.28", + "jsesc": "^3.0.2" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-compilation-targets": { + "version": "7.27.2", + "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.27.2.tgz", + "integrity": "sha512-2+1thGUUWWjLTYTHZWK1n8Yga0ijBz1XAhUXcKy81rd5g6yh7hGqMp45v7cadSbEHc9G3OTv45SyneRN3ps4DQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/compat-data": "^7.27.2", + "@babel/helper-validator-option": "^7.27.1", + "browserslist": "^4.24.0", + "lru-cache": "^5.1.1", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-globals": { + "version": "7.28.0", + "resolved": "https://registry.npmjs.org/@babel/helper-globals/-/helper-globals-7.28.0.tgz", + "integrity": "sha512-+W6cISkXFa1jXsDEdYA8HeevQT/FULhxzR99pxphltZcVaugps53THCeiWA8SguxxpSp3gKPiuYfSWopkLQ4hw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-imports": { + "version": "7.27.1", + "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.27.1.tgz", + "integrity": "sha512-0gSFWUPNXNopqtIPQvlD5WgXYI5GY2kP2cCvoT8kczjbfcfuIljTbcWrulD1CIPIX2gt1wghbDy08yE1p+/r3w==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/traverse": "^7.27.1", + "@babel/types": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-transforms": { + "version": "7.28.3", + "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.28.3.tgz", + "integrity": "sha512-gytXUbs8k2sXS9PnQptz5o0QnpLL51SwASIORY6XaBKF88nsOT0Zw9szLqlSGQDP/4TljBAD5y98p2U1fqkdsw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-module-imports": "^7.27.1", + "@babel/helper-validator-identifier": "^7.27.1", + "@babel/traverse": "^7.28.3" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/@babel/helper-string-parser": { + "version": "7.27.1", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz", + "integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.28.5", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz", + "integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-option": { + "version": "7.27.1", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.27.1.tgz", + "integrity": "sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helpers": { + "version": "7.28.4", + "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.28.4.tgz", + "integrity": "sha512-HFN59MmQXGHVyYadKLVumYsA9dBFun/ldYxipEjzA4196jpLZd8UjEEBLkbEkvfYreDqJhZxYAWFPtrfhNpj4w==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/template": "^7.27.2", + "@babel/types": "^7.28.4" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/parser": { + "version": "7.28.5", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.28.5.tgz", + "integrity": "sha512-KKBU1VGYR7ORr3At5HAtUQ+TV3SzRCXmA/8OdDZiLDBIZxVyzXuztPjfLd3BV1PRAQGCMWWSHYhL0F8d5uHBDQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/types": "^7.28.5" + }, + "bin": { + "parser": "bin/babel-parser.js" }, "engines": { "node": ">=6.0.0" } }, - "node_modules/@auth0/nextjs-auth0": { - "version": "4.8.0", - "resolved": "https://registry.npmjs.org/@auth0/nextjs-auth0/-/nextjs-auth0-4.8.0.tgz", - "integrity": "sha512-tgo4f6u1Ac4MqjwiYtllr9DzcNK+ThDoU4VYj6uTTqebwGGSdhXZWAkHFwWgabgWdeYHXpKYZ0xS9dQZv/PNAQ==", + "node_modules/@babel/template": { + "version": "7.27.2", + "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.27.2.tgz", + "integrity": "sha512-LPDZ85aEJyYSd18/DkjNh4/y1ntkE5KwUHWTiqgRxruuZL2F1yuHligVHLvcHY2vMHXttKFpJn6LwfI7cw7ODw==", + "dev": true, + "license": "MIT", "dependencies": { - "@edge-runtime/cookies": "^5.0.1", - "@panva/hkdf": "^1.2.1", - "jose": "^5.9.6", - "oauth4webapi": "^3.1.2", - "swr": "^2.2.5" + "@babel/code-frame": "^7.27.1", + "@babel/parser": "^7.27.2", + "@babel/types": "^7.27.1" }, - "peerDependencies": { - "next": "^14.2.25 || ^15.2.3", - "react": "^18.0.0 || ^19.0.0 || ^19.0.0-0", - "react-dom": "^18.0.0 || ^19.0.0 || ^19.0.0-0" + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/traverse": { + "version": "7.28.5", + "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.28.5.tgz", + "integrity": "sha512-TCCj4t55U90khlYkVV/0TfkJkAkUg3jZFA3Neb7unZT8CPok7iiRfaX0F+WnqWqt7OxhOn0uBKXCw4lbL8W0aQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.27.1", + "@babel/generator": "^7.28.5", + "@babel/helper-globals": "^7.28.0", + "@babel/parser": "^7.28.5", + "@babel/template": "^7.27.2", + "@babel/types": "^7.28.5", + "debug": "^4.3.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/types": { + "version": "7.28.5", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.28.5.tgz", + "integrity": "sha512-qQ5m48eI/MFLQ5PxQj4PFaprjyCTLI37ElWMmNs0K8Lk3dVeOdNpB3ks8jc7yM5CDmVC73eMVk/trk3fgmrUpA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-string-parser": "^7.27.1", + "@babel/helper-validator-identifier": "^7.28.5" + }, + "engines": { + "node": ">=6.9.0" } }, "node_modules/@edge-runtime/cookies": { "version": "5.0.2", "resolved": "https://registry.npmjs.org/@edge-runtime/cookies/-/cookies-5.0.2.tgz", "integrity": "sha512-Sd8LcWpZk/SWEeKGE8LT6gMm5MGfX/wm+GPnh1eBEtCpya3vYqn37wYknwAHw92ONoyyREl1hJwxV/Qx2DWNOg==", + "license": "MIT", "engines": { "node": ">=16" } }, "node_modules/@emnapi/core": { - "version": "1.4.5", - "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.4.5.tgz", - "integrity": "sha512-XsLw1dEOpkSX/WucdqUhPWP7hDxSvZiY+fsUC14h+FtQ2Ifni4znbBt8punRX+Uj2JG/uDb8nEHVKvrVlvdZ5Q==", + "version": "1.7.1", + "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.7.1.tgz", + "integrity": "sha512-o1uhUASyo921r2XtHYOHy7gdkGLge8ghBEQHMWmyJFoXlpU58kIrhhN3w26lpQb6dspetweapMn2CSNwQ8I4wg==", "dev": true, + "license": "MIT", "optional": true, "dependencies": { - "@emnapi/wasi-threads": "1.0.4", + "@emnapi/wasi-threads": "1.1.0", "tslib": "^2.4.0" } }, "node_modules/@emnapi/runtime": { - "version": "1.4.5", - "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.4.5.tgz", - "integrity": "sha512-++LApOtY0pEEz1zrd9vy1/zXVaVJJ/EbAF3u0fXIzPJEDtnITsBGbbK0EkM72amhl/R5b+5xx0Y/QhcVOpuulg==", + "version": "1.7.1", + "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.7.1.tgz", + "integrity": "sha512-PVtJr5CmLwYAU9PZDMITZoR5iAOShYREoR45EyyLrbntV50mdePTgUn4AmOw90Ifcj+x2kRjdzr1HP3RrNiHGA==", + "license": "MIT", "optional": true, "dependencies": { "tslib": "^2.4.0" } }, "node_modules/@emnapi/wasi-threads": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.0.4.tgz", - "integrity": "sha512-PJR+bOmMOPH8AtcTGAyYNiuJ3/Fcoj2XN/gBEWzDIKh254XO+mM9XoXHk5GNEhodxeMznbg7BlRojVbKN+gC6g==", + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.1.0.tgz", + "integrity": "sha512-WI0DdZ8xFSbgMjR1sFsKABJ/C5OnRrjT06JXbZKexJGrDuPTzZdDYfFlsgcCXCyf+suG5QU2e/y1Wo2V/OapLQ==", "dev": true, + "license": "MIT", "optional": true, "dependencies": { "tslib": "^2.4.0" } }, "node_modules/@eslint-community/eslint-utils": { - "version": "4.7.0", - "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.7.0.tgz", - "integrity": "sha512-dyybb3AcajC7uha6CvhdVRJqaKyn7w2YKqKyAN37NKYgZT36w+iRb0Dymmc5qEJ549c/S31cMMSFd75bteCpCw==", + "version": "4.9.0", + "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.9.0.tgz", + "integrity": "sha512-ayVFHdtZ+hsq1t2Dy24wCmGXGe4q9Gu3smhLYALJrr473ZH27MsnSL+LKUlimp4BWJqMDMLmPpx/Q9R3OAlL4g==", "dev": true, + "license": "MIT", "dependencies": { "eslint-visitor-keys": "^3.4.3" }, @@ -136,6 +372,7 @@ "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz", "integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==", "dev": true, + "license": "Apache-2.0", "engines": { "node": "^12.22.0 || ^14.17.0 || >=16.0.0" }, @@ -144,21 +381,23 @@ } }, "node_modules/@eslint-community/regexpp": { - "version": "4.12.1", - "resolved": "https://registry.npmjs.org/@eslint-community/regexpp/-/regexpp-4.12.1.tgz", - "integrity": "sha512-CCZCDJuduB9OUkFkY2IgppNZMi2lBQgD2qzwXkEia16cge2pijY/aXi96CJMquDMn3nJdlPV1A5KrJEXwfLNzQ==", + "version": "4.12.2", + "resolved": "https://registry.npmjs.org/@eslint-community/regexpp/-/regexpp-4.12.2.tgz", + "integrity": "sha512-EriSTlt5OC9/7SXkRSCAhfSxxoSUgBm33OH+IkwbdpgoqsSsUg7y3uh+IICI/Qg4BBWr3U2i39RpmycbxMq4ew==", "dev": true, + "license": "MIT", "engines": { "node": "^12.0.0 || ^14.0.0 || >=16.0.0" } }, "node_modules/@eslint/config-array": { - "version": "0.21.0", - "resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.21.0.tgz", - "integrity": "sha512-ENIdc4iLu0d93HeYirvKmrzshzofPw6VkZRKQGe9Nv46ZnWUzcF1xV01dcvEg/1wXUR61OmmlSfyeyO7EvjLxQ==", + "version": "0.21.1", + "resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.21.1.tgz", + "integrity": "sha512-aw1gNayWpdI/jSYVgzN5pL0cfzU02GT3NBpeT/DXbx1/1x7ZKxFPd9bwrzygx/qiwIQiJ1sw/zD8qY/kRvlGHA==", "dev": true, + "license": "Apache-2.0", "dependencies": { - "@eslint/object-schema": "^2.1.6", + "@eslint/object-schema": "^2.1.7", "debug": "^4.3.1", "minimatch": "^3.1.2" }, @@ -167,19 +406,24 @@ } }, "node_modules/@eslint/config-helpers": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.3.0.tgz", - "integrity": "sha512-ViuymvFmcJi04qdZeDc2whTHryouGcDlaxPqarTD0ZE10ISpxGUVZGZDx4w01upyIynL3iu6IXH2bS1NhclQMw==", + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.4.2.tgz", + "integrity": "sha512-gBrxN88gOIf3R7ja5K9slwNayVcZgK6SOUORm2uBzTeIEfeVaIhOpCtTox3P6R7o2jLFwLFTLnC7kU/RGcYEgw==", "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@eslint/core": "^0.17.0" + }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, "node_modules/@eslint/core": { - "version": "0.15.1", - "resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.15.1.tgz", - "integrity": "sha512-bkOp+iumZCCbt1K1CmWf0R9pM5yKpDv+ZXtvSyQpudrI9kuFLp+bM2WOPXImuD/ceQuaa8f5pj93Y7zyECIGNA==", + "version": "0.17.0", + "resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.17.0.tgz", + "integrity": "sha512-yL/sLrpmtDaFEiUj1osRP4TI2MDz1AddJL+jZ7KSqvBuliN4xqYY54IfdN8qD8Toa6g1iloph1fxQNkjOxrrpQ==", "dev": true, + "license": "Apache-2.0", "dependencies": { "@types/json-schema": "^7.0.15" }, @@ -192,6 +436,7 @@ "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-3.3.1.tgz", "integrity": "sha512-gtF186CXhIl1p4pJNGZw8Yc6RlshoePRvE0X91oPGb3vZ8pM3qOS9W9NGPat9LziaBV7XrJWGylNQXkGcnM3IQ==", "dev": true, + "license": "MIT", "dependencies": { "ajv": "^6.12.4", "debug": "^4.3.2", @@ -211,10 +456,11 @@ } }, "node_modules/@eslint/js": { - "version": "9.31.0", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.31.0.tgz", - "integrity": "sha512-LOm5OVt7D4qiKCqoiPbA7LWmI+tbw1VbTUowBcUMgQSuM6poJufkFkYDcQpo5KfgD39TnNySV26QjOh7VFpSyw==", + "version": "9.39.1", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.1.tgz", + "integrity": "sha512-S26Stp4zCy88tH94QbBv3XCuzRQiZ9yXofEILmglYTh/Ug/a9/umqvgFtYBAo3Lp0nsI/5/qH1CCrbdK3AP1Tw==", "dev": true, + "license": "MIT", "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" }, @@ -223,21 +469,23 @@ } }, "node_modules/@eslint/object-schema": { - "version": "2.1.6", - "resolved": "https://registry.npmjs.org/@eslint/object-schema/-/object-schema-2.1.6.tgz", - "integrity": "sha512-RBMg5FRL0I0gs51M/guSAj5/e14VQ4tpZnQNWwuDT66P14I43ItmPfIZRhO9fUVIPOAQXU47atlywZ/czoqFPA==", + "version": "2.1.7", + "resolved": "https://registry.npmjs.org/@eslint/object-schema/-/object-schema-2.1.7.tgz", + "integrity": "sha512-VtAOaymWVfZcmZbp6E2mympDIHvyjXs/12LqWYjVw6qjrfF+VK+fyG33kChz3nnK+SU5/NeHOqrTEHS8sXO3OA==", "dev": true, + "license": "Apache-2.0", "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, "node_modules/@eslint/plugin-kit": { - "version": "0.3.4", - "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.3.4.tgz", - "integrity": "sha512-Ul5l+lHEcw3L5+k8POx6r74mxEYKG5kOb6Xpy2gCRW6zweT6TEhAf8vhxGgjhqrd/VO/Dirhsb+1hNpD1ue9hw==", + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.4.1.tgz", + "integrity": "sha512-43/qtrDUokr7LJqoF2c3+RInu/t4zfrpYdoSDfYyhg52rwLV6TnOvdG4fXm7IkSB3wErkcmJS9iEhjVtOSEjjA==", "dev": true, + "license": "Apache-2.0", "dependencies": { - "@eslint/core": "^0.15.1", + "@eslint/core": "^0.17.0", "levn": "^0.4.1" }, "engines": { @@ -249,41 +497,31 @@ "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz", "integrity": "sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==", "dev": true, + "license": "Apache-2.0", "engines": { "node": ">=18.18.0" } }, "node_modules/@humanfs/node": { - "version": "0.16.6", - "resolved": "https://registry.npmjs.org/@humanfs/node/-/node-0.16.6.tgz", - "integrity": "sha512-YuI2ZHQL78Q5HbhDiBA1X4LmYdXCKCMQIfw0pw7piHJwyREFebJUvrQN4cMssyES6x+vfUbx1CIpaQUKYdQZOw==", + "version": "0.16.7", + "resolved": "https://registry.npmjs.org/@humanfs/node/-/node-0.16.7.tgz", + "integrity": "sha512-/zUx+yOsIrG4Y43Eh2peDeKCxlRt/gET6aHfaKpuq267qXdYDFViVHfMaLyygZOnl0kGWxFIgsBy8QFuTLUXEQ==", "dev": true, + "license": "Apache-2.0", "dependencies": { "@humanfs/core": "^0.19.1", - "@humanwhocodes/retry": "^0.3.0" + "@humanwhocodes/retry": "^0.4.0" }, "engines": { "node": ">=18.18.0" } }, - "node_modules/@humanfs/node/node_modules/@humanwhocodes/retry": { - "version": "0.3.1", - "resolved": "https://registry.npmjs.org/@humanwhocodes/retry/-/retry-0.3.1.tgz", - "integrity": "sha512-JBxkERygn7Bv/GbN5Rv8Ul6LVknS+5Bp6RgDC/O8gEBU/yeH5Ui5C/OlWrTb6qct7LjjfT6Re2NxB0ln0yYybA==", - "dev": true, - "engines": { - "node": ">=18.18" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/nzakas" - } - }, "node_modules/@humanwhocodes/module-importer": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz", "integrity": "sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA==", "dev": true, + "license": "Apache-2.0", "engines": { "node": ">=12.22" }, @@ -297,6 +535,7 @@ "resolved": "https://registry.npmjs.org/@humanwhocodes/retry/-/retry-0.4.3.tgz", "integrity": "sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ==", "dev": true, + "license": "Apache-2.0", "engines": { "node": ">=18.18" }, @@ -305,13 +544,24 @@ "url": "https://github.com/sponsors/nzakas" } }, + "node_modules/@img/colour": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.0.0.tgz", + "integrity": "sha512-A5P/LfWGFSl6nsckYtjw9da+19jB8hkJ6ACTGcDfEJ0aE+l2n2El7dsVM7UVHZQ9s2lmYMWlrS21YLy2IR1LUw==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=18" + } + }, "node_modules/@img/sharp-darwin-arm64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.3.tgz", - "integrity": "sha512-ryFMfvxxpQRsgZJqBd4wsttYQbCxsJksrv9Lw/v798JcQ8+w84mBWuXwl+TT0WJ/WrYOLaYpwQXi3sA9nTIaIg==", + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.5.tgz", + "integrity": "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==", "cpu": [ "arm64" ], + "license": "Apache-2.0", "optional": true, "os": [ "darwin" @@ -323,16 +573,17 @@ "url": "https://opencollective.com/libvips" }, "optionalDependencies": { - "@img/sharp-libvips-darwin-arm64": "1.2.0" + "@img/sharp-libvips-darwin-arm64": "1.2.4" } }, "node_modules/@img/sharp-darwin-x64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.3.tgz", - "integrity": "sha512-yHpJYynROAj12TA6qil58hmPmAwxKKC7reUqtGLzsOHfP7/rniNGTL8tjWX6L3CTV4+5P4ypcS7Pp+7OB+8ihA==", + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.5.tgz", + "integrity": "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==", "cpu": [ "x64" ], + "license": "Apache-2.0", "optional": true, "os": [ "darwin" @@ -344,16 +595,17 @@ "url": "https://opencollective.com/libvips" }, "optionalDependencies": { - "@img/sharp-libvips-darwin-x64": "1.2.0" + "@img/sharp-libvips-darwin-x64": "1.2.4" } }, "node_modules/@img/sharp-libvips-darwin-arm64": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.0.tgz", - "integrity": "sha512-sBZmpwmxqwlqG9ueWFXtockhsxefaV6O84BMOrhtg/YqbTaRdqDE7hxraVE3y6gVM4eExmfzW4a8el9ArLeEiQ==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.4.tgz", + "integrity": "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==", "cpu": [ "arm64" ], + "license": "LGPL-3.0-or-later", "optional": true, "os": [ "darwin" @@ -363,12 +615,13 @@ } }, "node_modules/@img/sharp-libvips-darwin-x64": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.0.tgz", - "integrity": "sha512-M64XVuL94OgiNHa5/m2YvEQI5q2cl9d/wk0qFTDVXcYzi43lxuiFTftMR1tOnFQovVXNZJ5TURSDK2pNe9Yzqg==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.4.tgz", + "integrity": "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==", "cpu": [ "x64" ], + "license": "LGPL-3.0-or-later", "optional": true, "os": [ "darwin" @@ -378,12 +631,13 @@ } }, "node_modules/@img/sharp-libvips-linux-arm": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.0.tgz", - "integrity": "sha512-mWd2uWvDtL/nvIzThLq3fr2nnGfyr/XMXlq8ZJ9WMR6PXijHlC3ksp0IpuhK6bougvQrchUAfzRLnbsen0Cqvw==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.4.tgz", + "integrity": "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==", "cpu": [ "arm" ], + "license": "LGPL-3.0-or-later", "optional": true, "os": [ "linux" @@ -393,12 +647,13 @@ } }, "node_modules/@img/sharp-libvips-linux-arm64": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.0.tgz", - "integrity": "sha512-RXwd0CgG+uPRX5YYrkzKyalt2OJYRiJQ8ED/fi1tq9WQW2jsQIn0tqrlR5l5dr/rjqq6AHAxURhj2DVjyQWSOA==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.4.tgz", + "integrity": "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==", "cpu": [ "arm64" ], + "license": "LGPL-3.0-or-later", "optional": true, "os": [ "linux" @@ -408,12 +663,29 @@ } }, "node_modules/@img/sharp-libvips-linux-ppc64": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.0.tgz", - "integrity": "sha512-Xod/7KaDDHkYu2phxxfeEPXfVXFKx70EAFZ0qyUdOjCcxbjqyJOEUpDe6RIyaunGxT34Anf9ue/wuWOqBW2WcQ==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.4.tgz", + "integrity": "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==", "cpu": [ "ppc64" ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-riscv64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-riscv64/-/sharp-libvips-linux-riscv64-1.2.4.tgz", + "integrity": "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==", + "cpu": [ + "riscv64" + ], + "license": "LGPL-3.0-or-later", "optional": true, "os": [ "linux" @@ -423,12 +695,13 @@ } }, "node_modules/@img/sharp-libvips-linux-s390x": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.0.tgz", - "integrity": "sha512-eMKfzDxLGT8mnmPJTNMcjfO33fLiTDsrMlUVcp6b96ETbnJmd4uvZxVJSKPQfS+odwfVaGifhsB07J1LynFehw==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.4.tgz", + "integrity": "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==", "cpu": [ "s390x" ], + "license": "LGPL-3.0-or-later", "optional": true, "os": [ "linux" @@ -438,12 +711,13 @@ } }, "node_modules/@img/sharp-libvips-linux-x64": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.0.tgz", - "integrity": "sha512-ZW3FPWIc7K1sH9E3nxIGB3y3dZkpJlMnkk7z5tu1nSkBoCgw2nSRTFHI5pB/3CQaJM0pdzMF3paf9ckKMSE9Tg==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.4.tgz", + "integrity": "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==", "cpu": [ "x64" ], + "license": "LGPL-3.0-or-later", "optional": true, "os": [ "linux" @@ -453,12 +727,13 @@ } }, "node_modules/@img/sharp-libvips-linuxmusl-arm64": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.0.tgz", - "integrity": "sha512-UG+LqQJbf5VJ8NWJ5Z3tdIe/HXjuIdo4JeVNADXBFuG7z9zjoegpzzGIyV5zQKi4zaJjnAd2+g2nna8TZvuW9Q==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.4.tgz", + "integrity": "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==", "cpu": [ "arm64" ], + "license": "LGPL-3.0-or-later", "optional": true, "os": [ "linux" @@ -468,12 +743,13 @@ } }, "node_modules/@img/sharp-libvips-linuxmusl-x64": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.0.tgz", - "integrity": "sha512-SRYOLR7CXPgNze8akZwjoGBoN1ThNZoqpOgfnOxmWsklTGVfJiGJoC/Lod7aNMGA1jSsKWM1+HRX43OP6p9+6Q==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.4.tgz", + "integrity": "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==", "cpu": [ "x64" ], + "license": "LGPL-3.0-or-later", "optional": true, "os": [ "linux" @@ -483,12 +759,13 @@ } }, "node_modules/@img/sharp-linux-arm": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.3.tgz", - "integrity": "sha512-oBK9l+h6KBN0i3dC8rYntLiVfW8D8wH+NPNT3O/WBHeW0OQWCjfWksLUaPidsrDKpJgXp3G3/hkmhptAW0I3+A==", + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.5.tgz", + "integrity": "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==", "cpu": [ "arm" ], + "license": "Apache-2.0", "optional": true, "os": [ "linux" @@ -500,16 +777,17 @@ "url": "https://opencollective.com/libvips" }, "optionalDependencies": { - "@img/sharp-libvips-linux-arm": "1.2.0" + "@img/sharp-libvips-linux-arm": "1.2.4" } }, "node_modules/@img/sharp-linux-arm64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.3.tgz", - "integrity": "sha512-QdrKe3EvQrqwkDrtuTIjI0bu6YEJHTgEeqdzI3uWJOH6G1O8Nl1iEeVYRGdj1h5I21CqxSvQp1Yv7xeU3ZewbA==", + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.5.tgz", + "integrity": "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==", "cpu": [ "arm64" ], + "license": "Apache-2.0", "optional": true, "os": [ "linux" @@ -521,16 +799,17 @@ "url": "https://opencollective.com/libvips" }, "optionalDependencies": { - "@img/sharp-libvips-linux-arm64": "1.2.0" + "@img/sharp-libvips-linux-arm64": "1.2.4" } }, "node_modules/@img/sharp-linux-ppc64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.3.tgz", - "integrity": "sha512-GLtbLQMCNC5nxuImPR2+RgrviwKwVql28FWZIW1zWruy6zLgA5/x2ZXk3mxj58X/tszVF69KK0Is83V8YgWhLA==", + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.5.tgz", + "integrity": "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==", "cpu": [ "ppc64" ], + "license": "Apache-2.0", "optional": true, "os": [ "linux" @@ -542,16 +821,39 @@ "url": "https://opencollective.com/libvips" }, "optionalDependencies": { - "@img/sharp-libvips-linux-ppc64": "1.2.0" + "@img/sharp-libvips-linux-ppc64": "1.2.4" + } + }, + "node_modules/@img/sharp-linux-riscv64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-riscv64/-/sharp-linux-riscv64-0.34.5.tgz", + "integrity": "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==", + "cpu": [ + "riscv64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-riscv64": "1.2.4" } }, "node_modules/@img/sharp-linux-s390x": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.3.tgz", - "integrity": "sha512-3gahT+A6c4cdc2edhsLHmIOXMb17ltffJlxR0aC2VPZfwKoTGZec6u5GrFgdR7ciJSsHT27BD3TIuGcuRT0KmQ==", + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.5.tgz", + "integrity": "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==", "cpu": [ "s390x" ], + "license": "Apache-2.0", "optional": true, "os": [ "linux" @@ -563,16 +865,17 @@ "url": "https://opencollective.com/libvips" }, "optionalDependencies": { - "@img/sharp-libvips-linux-s390x": "1.2.0" + "@img/sharp-libvips-linux-s390x": "1.2.4" } }, "node_modules/@img/sharp-linux-x64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.3.tgz", - "integrity": "sha512-8kYso8d806ypnSq3/Ly0QEw90V5ZoHh10yH0HnrzOCr6DKAPI6QVHvwleqMkVQ0m+fc7EH8ah0BB0QPuWY6zJQ==", + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.5.tgz", + "integrity": "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==", "cpu": [ "x64" ], + "license": "Apache-2.0", "optional": true, "os": [ "linux" @@ -584,16 +887,17 @@ "url": "https://opencollective.com/libvips" }, "optionalDependencies": { - "@img/sharp-libvips-linux-x64": "1.2.0" + "@img/sharp-libvips-linux-x64": "1.2.4" } }, "node_modules/@img/sharp-linuxmusl-arm64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.3.tgz", - "integrity": "sha512-vAjbHDlr4izEiXM1OTggpCcPg9tn4YriK5vAjowJsHwdBIdx0fYRsURkxLG2RLm9gyBq66gwtWI8Gx0/ov+JKQ==", + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.5.tgz", + "integrity": "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==", "cpu": [ "arm64" ], + "license": "Apache-2.0", "optional": true, "os": [ "linux" @@ -605,16 +909,17 @@ "url": "https://opencollective.com/libvips" }, "optionalDependencies": { - "@img/sharp-libvips-linuxmusl-arm64": "1.2.0" + "@img/sharp-libvips-linuxmusl-arm64": "1.2.4" } }, "node_modules/@img/sharp-linuxmusl-x64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.3.tgz", - "integrity": "sha512-gCWUn9547K5bwvOn9l5XGAEjVTTRji4aPTqLzGXHvIr6bIDZKNTA34seMPgM0WmSf+RYBH411VavCejp3PkOeQ==", + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.5.tgz", + "integrity": "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==", "cpu": [ "x64" ], + "license": "Apache-2.0", "optional": true, "os": [ "linux" @@ -626,19 +931,20 @@ "url": "https://opencollective.com/libvips" }, "optionalDependencies": { - "@img/sharp-libvips-linuxmusl-x64": "1.2.0" + "@img/sharp-libvips-linuxmusl-x64": "1.2.4" } }, "node_modules/@img/sharp-wasm32": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.3.tgz", - "integrity": "sha512-+CyRcpagHMGteySaWos8IbnXcHgfDn7pO2fiC2slJxvNq9gDipYBN42/RagzctVRKgxATmfqOSulgZv5e1RdMg==", + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.5.tgz", + "integrity": "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==", "cpu": [ "wasm32" ], + "license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT", "optional": true, "dependencies": { - "@emnapi/runtime": "^1.4.4" + "@emnapi/runtime": "^1.7.0" }, "engines": { "node": "^18.17.0 || ^20.3.0 || >=21.0.0" @@ -648,12 +954,13 @@ } }, "node_modules/@img/sharp-win32-arm64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.3.tgz", - "integrity": "sha512-MjnHPnbqMXNC2UgeLJtX4XqoVHHlZNd+nPt1kRPmj63wURegwBhZlApELdtxM2OIZDRv/DFtLcNhVbd1z8GYXQ==", + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.5.tgz", + "integrity": "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==", "cpu": [ "arm64" ], + "license": "Apache-2.0 AND LGPL-3.0-or-later", "optional": true, "os": [ "win32" @@ -666,12 +973,13 @@ } }, "node_modules/@img/sharp-win32-ia32": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.3.tgz", - "integrity": "sha512-xuCdhH44WxuXgOM714hn4amodJMZl3OEvf0GVTm0BEyMeA2to+8HEdRPShH0SLYptJY1uBw+SCFP9WVQi1Q/cw==", + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.5.tgz", + "integrity": "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg==", "cpu": [ "ia32" ], + "license": "Apache-2.0 AND LGPL-3.0-or-later", "optional": true, "os": [ "win32" @@ -684,12 +992,13 @@ } }, "node_modules/@img/sharp-win32-x64": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.3.tgz", - "integrity": "sha512-OWwz05d++TxzLEv4VnsTz5CmZ6mI6S05sfQGEMrNrQcOEERbX46332IvE7pO/EUiw7jUrrS40z/M7kPyjfl04g==", + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.5.tgz", + "integrity": "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==", "cpu": [ "x64" ], + "license": "Apache-2.0 AND LGPL-3.0-or-later", "optional": true, "os": [ "win32" @@ -701,48 +1010,51 @@ "url": "https://opencollective.com/libvips" } }, - "node_modules/@isaacs/fs-minipass": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz", - "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==", - "dev": true, - "dependencies": { - "minipass": "^7.0.4" - }, - "engines": { - "node": ">=18.0.0" - } - }, "node_modules/@jridgewell/gen-mapping": { - "version": "0.3.12", - "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.12.tgz", - "integrity": "sha512-OuLGC46TjB5BbN1dH8JULVVZY4WTdkF7tV9Ys6wLL1rubZnCMstOhNHueU5bLCrnRuDhKPDM4g6sw4Bel5Gzqg==", + "version": "0.3.13", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", + "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==", "dev": true, + "license": "MIT", "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.0", "@jridgewell/trace-mapping": "^0.3.24" } }, + "node_modules/@jridgewell/remapping": { + "version": "2.3.5", + "resolved": "https://registry.npmjs.org/@jridgewell/remapping/-/remapping-2.3.5.tgz", + "integrity": "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, "node_modules/@jridgewell/resolve-uri": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", "dev": true, + "license": "MIT", "engines": { "node": ">=6.0.0" } }, "node_modules/@jridgewell/sourcemap-codec": { - "version": "1.5.4", - "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.4.tgz", - "integrity": "sha512-VT2+G1VQs/9oz078bLrYbecdZKs912zQlkelYpuf+SXF+QvZDYJlbx/LSx+meSAwdDFnF8FVXW92AVjjkVmgFw==", - "dev": true + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", + "dev": true, + "license": "MIT" }, "node_modules/@jridgewell/trace-mapping": { - "version": "0.3.29", - "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.29.tgz", - "integrity": "sha512-uw6guiW/gcAGPDhLmd77/6lW8QLeiV5RUTsAX46Db6oLhGaVj4lhnPwb184s1bkc8kdVg/+h988dro8GRDpmYQ==", + "version": "0.3.31", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz", + "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==", "dev": true, + "license": "MIT", "dependencies": { "@jridgewell/resolve-uri": "^3.1.0", "@jridgewell/sourcemap-codec": "^1.4.14" @@ -753,6 +1065,7 @@ "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-0.2.12.tgz", "integrity": "sha512-ZVWUcfwY4E/yPitQJl481FjFo3K22D6qF0DuFH6Y/nbnE11GY5uguDxZMGXPQ8WQ0128MXQD7TnfHyK4oWoIJQ==", "dev": true, + "license": "MIT", "optional": true, "dependencies": { "@emnapi/core": "^1.4.3", @@ -761,26 +1074,29 @@ } }, "node_modules/@next/env": { - "version": "15.3.3", - "resolved": "https://registry.npmjs.org/@next/env/-/env-15.3.3.tgz", - "integrity": "sha512-OdiMrzCl2Xi0VTjiQQUK0Xh7bJHnOuET2s+3V+Y40WJBAXrJeGA3f+I8MZJ/YQ3mVGi5XGR1L66oFlgqXhQ4Vw==" + "version": "16.0.4", + "resolved": "https://registry.npmjs.org/@next/env/-/env-16.0.4.tgz", + "integrity": "sha512-FDPaVoB1kYhtOz6Le0Jn2QV7RZJ3Ngxzqri7YX4yu3Ini+l5lciR7nA9eNDpKTmDm7LWZtxSju+/CQnwRBn2pA==", + "license": "MIT" }, "node_modules/@next/eslint-plugin-next": { - "version": "15.4.3", - "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-15.4.3.tgz", - "integrity": "sha512-wYYbP29uZlm9lqD1C6HDgW9WNNt6AlTogYKYpDyATs0QrKYIv/rPueoIDRH6qttXGCe3zNrb7hxfQx4w8OSkLA==", + "version": "16.0.4", + "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-16.0.4.tgz", + "integrity": "sha512-0emoVyL4Z5NEkRNb63ko/BqLC9OFULcY7mJ3lSerBCqgh/UFcjnvodyikV2bTl7XygwcamJxJAfxCo1oAVfH6g==", "dev": true, + "license": "MIT", "dependencies": { "fast-glob": "3.3.1" } }, "node_modules/@next/swc-darwin-arm64": { - "version": "15.3.3", - "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-15.3.3.tgz", - "integrity": "sha512-WRJERLuH+O3oYB4yZNVahSVFmtxRNjNF1I1c34tYMoJb0Pve+7/RaLAJJizyYiFhjYNGHRAE1Ri2Fd23zgDqhg==", + "version": "16.0.4", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.0.4.tgz", + "integrity": "sha512-TN0cfB4HT2YyEio9fLwZY33J+s+vMIgC84gQCOLZOYusW7ptgjIn8RwxQt0BUpoo9XRRVVWEHLld0uhyux1ZcA==", "cpu": [ "arm64" ], + "license": "MIT", "optional": true, "os": [ "darwin" @@ -790,12 +1106,13 @@ } }, "node_modules/@next/swc-darwin-x64": { - "version": "15.3.3", - "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-15.3.3.tgz", - "integrity": "sha512-XHdzH/yBc55lu78k/XwtuFR/ZXUTcflpRXcsu0nKmF45U96jt1tsOZhVrn5YH+paw66zOANpOnFQ9i6/j+UYvw==", + "version": "16.0.4", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.0.4.tgz", + "integrity": "sha512-XsfI23jvimCaA7e+9f3yMCoVjrny2D11G6H8NCcgv+Ina/TQhKPXB9P4q0WjTuEoyZmcNvPdrZ+XtTh3uPfH7Q==", "cpu": [ "x64" ], + "license": "MIT", "optional": true, "os": [ "darwin" @@ -805,12 +1122,13 @@ } }, "node_modules/@next/swc-linux-arm64-gnu": { - "version": "15.3.3", - "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-15.3.3.tgz", - "integrity": "sha512-VZ3sYL2LXB8znNGcjhocikEkag/8xiLgnvQts41tq6i+wql63SMS1Q6N8RVXHw5pEUjiof+II3HkDd7GFcgkzw==", + "version": "16.0.4", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.0.4.tgz", + "integrity": "sha512-uo8X7qHDy4YdJUhaoJDMAbL8VT5Ed3lijip2DdBHIB4tfKAvB1XBih6INH2L4qIi4jA0Qq1J0ErxcOocBmUSwg==", "cpu": [ "arm64" ], + "license": "MIT", "optional": true, "os": [ "linux" @@ -820,12 +1138,13 @@ } }, "node_modules/@next/swc-linux-arm64-musl": { - "version": "15.3.3", - "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-15.3.3.tgz", - "integrity": "sha512-h6Y1fLU4RWAp1HPNJWDYBQ+e3G7sLckyBXhmH9ajn8l/RSMnhbuPBV/fXmy3muMcVwoJdHL+UtzRzs0nXOf9SA==", + "version": "16.0.4", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.0.4.tgz", + "integrity": "sha512-pvR/AjNIAxsIz0PCNcZYpH+WmNIKNLcL4XYEfo+ArDi7GsxKWFO5BvVBLXbhti8Coyv3DE983NsitzUsGH5yTw==", "cpu": [ "arm64" ], + "license": "MIT", "optional": true, "os": [ "linux" @@ -835,12 +1154,13 @@ } }, "node_modules/@next/swc-linux-x64-gnu": { - "version": "15.3.3", - "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-15.3.3.tgz", - "integrity": "sha512-jJ8HRiF3N8Zw6hGlytCj5BiHyG/K+fnTKVDEKvUCyiQ/0r5tgwO7OgaRiOjjRoIx2vwLR+Rz8hQoPrnmFbJdfw==", + "version": "16.0.4", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.0.4.tgz", + "integrity": "sha512-2hebpsd5MRRtgqmT7Jj/Wze+wG+ZEXUK2KFFL4IlZ0amEEFADo4ywsifJNeFTQGsamH3/aXkKWymDvgEi+pc2Q==", "cpu": [ "x64" ], + "license": "MIT", "optional": true, "os": [ "linux" @@ -850,12 +1170,13 @@ } }, "node_modules/@next/swc-linux-x64-musl": { - "version": "15.3.3", - "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-15.3.3.tgz", - "integrity": "sha512-HrUcTr4N+RgiiGn3jjeT6Oo208UT/7BuTr7K0mdKRBtTbT4v9zJqCDKO97DUqqoBK1qyzP1RwvrWTvU6EPh/Cw==", + "version": "16.0.4", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.0.4.tgz", + "integrity": "sha512-pzRXf0LZZ8zMljH78j8SeLncg9ifIOp3ugAFka+Bq8qMzw6hPXOc7wydY7ardIELlczzzreahyTpwsim/WL3Sg==", "cpu": [ "x64" ], + "license": "MIT", "optional": true, "os": [ "linux" @@ -865,12 +1186,13 @@ } }, "node_modules/@next/swc-win32-arm64-msvc": { - "version": "15.3.3", - "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-15.3.3.tgz", - "integrity": "sha512-SxorONgi6K7ZUysMtRF3mIeHC5aA3IQLmKFQzU0OuhuUYwpOBc1ypaLJLP5Bf3M9k53KUUUj4vTPwzGvl/NwlQ==", + "version": "16.0.4", + "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.0.4.tgz", + "integrity": "sha512-7G/yJVzum52B5HOqqbQYX9bJHkN+c4YyZ2AIvEssMHQlbAWOn3iIJjD4sM6ihWsBxuljiTKJovEYlD1K8lCUHw==", "cpu": [ "arm64" ], + "license": "MIT", "optional": true, "os": [ "win32" @@ -880,12 +1202,13 @@ } }, "node_modules/@next/swc-win32-x64-msvc": { - "version": "15.3.3", - "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-15.3.3.tgz", - "integrity": "sha512-4QZG6F8enl9/S2+yIiOiju0iCTFd93d8VC1q9LZS4p/Xuk81W2QDjCFeoogmrWWkAD59z8ZxepBQap2dKS5ruw==", + "version": "16.0.4", + "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.0.4.tgz", + "integrity": "sha512-0Vy4g8SSeVkuU89g2OFHqGKM4rxsQtihGfenjx2tRckPrge5+gtFnRWGAAwvGXr0ty3twQvcnYjEyOrLHJ4JWA==", "cpu": [ "x64" ], + "license": "MIT", "optional": true, "os": [ "win32" @@ -899,6 +1222,7 @@ "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", "dev": true, + "license": "MIT", "dependencies": { "@nodelib/fs.stat": "2.0.5", "run-parallel": "^1.1.9" @@ -912,6 +1236,7 @@ "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", "dev": true, + "license": "MIT", "engines": { "node": ">= 8" } @@ -921,6 +1246,7 @@ "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", "dev": true, + "license": "MIT", "dependencies": { "@nodelib/fs.scandir": "2.1.5", "fastq": "^1.6.0" @@ -934,6 +1260,7 @@ "resolved": "https://registry.npmjs.org/@nolyfill/is-core-module/-/is-core-module-1.0.39.tgz", "integrity": "sha512-nn5ozdjYQpUCZlWGuxcJY/KpxkWQs4DcbMCmKojjyrYDEAGy4Ce19NN4v5MduafTwJlbKc99UA8YhSVqq9yPZA==", "dev": true, + "license": "MIT", "engines": { "node": ">=12.4.0" } @@ -942,6 +1269,7 @@ "version": "1.2.1", "resolved": "https://registry.npmjs.org/@panva/hkdf/-/hkdf-1.2.1.tgz", "integrity": "sha512-6oclG6Y3PiDFcoyk8srjLfVKyMfVCKJ27JwNPViuXziFpmdz+MZnZN/aKY0JGXgYuO/VghU0jcOAZgWXZ1Dmrw==", + "license": "MIT", "funding": { "url": "https://github.com/sponsors/panva" } @@ -950,78 +1278,67 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/@rtsao/scc/-/scc-1.1.0.tgz", "integrity": "sha512-zt6OdqaDoOnJ1ZYsCYGt9YmWzDXl4vQdKTyJev62gFhRGKdx7mcT54V9KIjg+d2wi9EXsPvAPKe7i7WjfVWB8g==", - "dev": true - }, - "node_modules/@rushstack/eslint-patch": { - "version": "1.12.0", - "resolved": "https://registry.npmjs.org/@rushstack/eslint-patch/-/eslint-patch-1.12.0.tgz", - "integrity": "sha512-5EwMtOqvJMMa3HbmxLlF74e+3/HhwBTMcvt3nqVJgGCozO6hzIPOBlwm8mGVNR9SN2IJpxSnlxczyDjcn7qIyw==", - "dev": true - }, - "node_modules/@swc/counter": { - "version": "0.1.3", - "resolved": "https://registry.npmjs.org/@swc/counter/-/counter-0.1.3.tgz", - "integrity": "sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==" + "dev": true, + "license": "MIT" }, "node_modules/@swc/helpers": { "version": "0.5.15", "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.15.tgz", "integrity": "sha512-JQ5TuMi45Owi4/BIMAJBoSQoOJu12oOk/gADqlcUL9JEdHB8vyjUSsxqeNXnmXHjYKMi2WcYtezGEEhqUI/E2g==", + "license": "Apache-2.0", "dependencies": { "tslib": "^2.8.0" } }, "node_modules/@tailwindcss/node": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.1.11.tgz", - "integrity": "sha512-yzhzuGRmv5QyU9qLNg4GTlYI6STedBWRE7NjxP45CsFYYq9taI0zJXZBMqIC/c8fViNLhmrbpSFS57EoxUmD6Q==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.1.17.tgz", + "integrity": "sha512-csIkHIgLb3JisEFQ0vxr2Y57GUNYh447C8xzwj89U/8fdW8LhProdxvnVH6U8M2Y73QKiTIH+LWbK3V2BBZsAg==", "dev": true, + "license": "MIT", "dependencies": { - "@ampproject/remapping": "^2.3.0", - "enhanced-resolve": "^5.18.1", - "jiti": "^2.4.2", - "lightningcss": "1.30.1", - "magic-string": "^0.30.17", + "@jridgewell/remapping": "^2.3.4", + "enhanced-resolve": "^5.18.3", + "jiti": "^2.6.1", + "lightningcss": "1.30.2", + "magic-string": "^0.30.21", "source-map-js": "^1.2.1", - "tailwindcss": "4.1.11" + "tailwindcss": "4.1.17" } }, "node_modules/@tailwindcss/oxide": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.11.tgz", - "integrity": "sha512-Q69XzrtAhuyfHo+5/HMgr1lAiPP/G40OMFAnws7xcFEYqcypZmdW8eGXaOUIeOl1dzPJBPENXgbjsOyhg2nkrg==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.17.tgz", + "integrity": "sha512-F0F7d01fmkQhsTjXezGBLdrl1KresJTcI3DB8EkScCldyKp3Msz4hub4uyYaVnk88BAS1g5DQjjF6F5qczheLA==", "dev": true, - "hasInstallScript": true, - "dependencies": { - "detect-libc": "^2.0.4", - "tar": "^7.4.3" - }, + "license": "MIT", "engines": { "node": ">= 10" }, "optionalDependencies": { - "@tailwindcss/oxide-android-arm64": "4.1.11", - "@tailwindcss/oxide-darwin-arm64": "4.1.11", - "@tailwindcss/oxide-darwin-x64": "4.1.11", - "@tailwindcss/oxide-freebsd-x64": "4.1.11", - "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.11", - "@tailwindcss/oxide-linux-arm64-gnu": "4.1.11", - "@tailwindcss/oxide-linux-arm64-musl": "4.1.11", - "@tailwindcss/oxide-linux-x64-gnu": "4.1.11", - "@tailwindcss/oxide-linux-x64-musl": "4.1.11", - "@tailwindcss/oxide-wasm32-wasi": "4.1.11", - "@tailwindcss/oxide-win32-arm64-msvc": "4.1.11", - "@tailwindcss/oxide-win32-x64-msvc": "4.1.11" + "@tailwindcss/oxide-android-arm64": "4.1.17", + "@tailwindcss/oxide-darwin-arm64": "4.1.17", + "@tailwindcss/oxide-darwin-x64": "4.1.17", + "@tailwindcss/oxide-freebsd-x64": "4.1.17", + "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.17", + "@tailwindcss/oxide-linux-arm64-gnu": "4.1.17", + "@tailwindcss/oxide-linux-arm64-musl": "4.1.17", + "@tailwindcss/oxide-linux-x64-gnu": "4.1.17", + "@tailwindcss/oxide-linux-x64-musl": "4.1.17", + "@tailwindcss/oxide-wasm32-wasi": "4.1.17", + "@tailwindcss/oxide-win32-arm64-msvc": "4.1.17", + "@tailwindcss/oxide-win32-x64-msvc": "4.1.17" } }, "node_modules/@tailwindcss/oxide-android-arm64": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.11.tgz", - "integrity": "sha512-3IfFuATVRUMZZprEIx9OGDjG3Ou3jG4xQzNTvjDoKmU9JdmoCohQJ83MYd0GPnQIu89YoJqvMM0G3uqLRFtetg==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.17.tgz", + "integrity": "sha512-BMqpkJHgOZ5z78qqiGE6ZIRExyaHyuxjgrJ6eBO5+hfrfGkuya0lYfw8fRHG77gdTjWkNWEEm+qeG2cDMxArLQ==", "cpu": [ "arm64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "android" @@ -1031,13 +1348,14 @@ } }, "node_modules/@tailwindcss/oxide-darwin-arm64": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.11.tgz", - "integrity": "sha512-ESgStEOEsyg8J5YcMb1xl8WFOXfeBmrhAwGsFxxB2CxY9evy63+AtpbDLAyRkJnxLy2WsD1qF13E97uQyP1lfQ==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.17.tgz", + "integrity": "sha512-EquyumkQweUBNk1zGEU/wfZo2qkp/nQKRZM8bUYO0J+Lums5+wl2CcG1f9BgAjn/u9pJzdYddHWBiFXJTcxmOg==", "cpu": [ "arm64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "darwin" @@ -1047,13 +1365,14 @@ } }, "node_modules/@tailwindcss/oxide-darwin-x64": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.11.tgz", - "integrity": "sha512-EgnK8kRchgmgzG6jE10UQNaH9Mwi2n+yw1jWmof9Vyg2lpKNX2ioe7CJdf9M5f8V9uaQxInenZkOxnTVL3fhAw==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.17.tgz", + "integrity": "sha512-gdhEPLzke2Pog8s12oADwYu0IAw04Y2tlmgVzIN0+046ytcgx8uZmCzEg4VcQh+AHKiS7xaL8kGo/QTiNEGRog==", "cpu": [ "x64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "darwin" @@ -1063,13 +1382,14 @@ } }, "node_modules/@tailwindcss/oxide-freebsd-x64": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.11.tgz", - "integrity": "sha512-xdqKtbpHs7pQhIKmqVpxStnY1skuNh4CtbcyOHeX1YBE0hArj2romsFGb6yUmzkq/6M24nkxDqU8GYrKrz+UcA==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.17.tgz", + "integrity": "sha512-hxGS81KskMxML9DXsaXT1H0DyA+ZBIbyG/sSAjWNe2EDl7TkPOBI42GBV3u38itzGUOmFfCzk1iAjDXds8Oh0g==", "cpu": [ "x64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "freebsd" @@ -1079,13 +1399,14 @@ } }, "node_modules/@tailwindcss/oxide-linux-arm-gnueabihf": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.11.tgz", - "integrity": "sha512-ryHQK2eyDYYMwB5wZL46uoxz2zzDZsFBwfjssgB7pzytAeCCa6glsiJGjhTEddq/4OsIjsLNMAiMlHNYnkEEeg==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.17.tgz", + "integrity": "sha512-k7jWk5E3ldAdw0cNglhjSgv501u7yrMf8oeZ0cElhxU6Y2o7f8yqelOp3fhf7evjIS6ujTI3U8pKUXV2I4iXHQ==", "cpu": [ "arm" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1095,13 +1416,14 @@ } }, "node_modules/@tailwindcss/oxide-linux-arm64-gnu": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.11.tgz", - "integrity": "sha512-mYwqheq4BXF83j/w75ewkPJmPZIqqP1nhoghS9D57CLjsh3Nfq0m4ftTotRYtGnZd3eCztgbSPJ9QhfC91gDZQ==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.17.tgz", + "integrity": "sha512-HVDOm/mxK6+TbARwdW17WrgDYEGzmoYayrCgmLEw7FxTPLcp/glBisuyWkFz/jb7ZfiAXAXUACfyItn+nTgsdQ==", "cpu": [ "arm64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1111,13 +1433,14 @@ } }, "node_modules/@tailwindcss/oxide-linux-arm64-musl": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.11.tgz", - "integrity": "sha512-m/NVRFNGlEHJrNVk3O6I9ggVuNjXHIPoD6bqay/pubtYC9QIdAMpS+cswZQPBLvVvEF6GtSNONbDkZrjWZXYNQ==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.17.tgz", + "integrity": "sha512-HvZLfGr42i5anKtIeQzxdkw/wPqIbpeZqe7vd3V9vI3RQxe3xU1fLjss0TjyhxWcBaipk7NYwSrwTwK1hJARMg==", "cpu": [ "arm64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1127,13 +1450,14 @@ } }, "node_modules/@tailwindcss/oxide-linux-x64-gnu": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.1.11.tgz", - "integrity": "sha512-YW6sblI7xukSD2TdbbaeQVDysIm/UPJtObHJHKxDEcW2exAtY47j52f8jZXkqE1krdnkhCMGqP3dbniu1Te2Fg==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.1.17.tgz", + "integrity": "sha512-M3XZuORCGB7VPOEDH+nzpJ21XPvK5PyjlkSFkFziNHGLc5d6g3di2McAAblmaSUNl8IOmzYwLx9NsE7bplNkwQ==", "cpu": [ "x64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1143,13 +1467,14 @@ } }, "node_modules/@tailwindcss/oxide-linux-x64-musl": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.11.tgz", - "integrity": "sha512-e3C/RRhGunWYNC3aSF7exsQkdXzQ/M+aYuZHKnw4U7KQwTJotnWsGOIVih0s2qQzmEzOFIJ3+xt7iq67K/p56Q==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.17.tgz", + "integrity": "sha512-k7f+pf9eXLEey4pBlw+8dgfJHY4PZ5qOUFDyNf7SI6lHjQ9Zt7+NcscjpwdCEbYi6FI5c2KDTDWyf2iHcCSyyQ==", "cpu": [ "x64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1159,9 +1484,9 @@ } }, "node_modules/@tailwindcss/oxide-wasm32-wasi": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.1.11.tgz", - "integrity": "sha512-Xo1+/GU0JEN/C/dvcammKHzeM6NqKovG+6921MR6oadee5XPBaKOumrJCXvopJ/Qb5TH7LX/UAywbqrP4lax0g==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.1.17.tgz", + "integrity": "sha512-cEytGqSSoy7zK4JRWiTCx43FsKP/zGr0CsuMawhH67ONlH+T79VteQeJQRO/X7L0juEUA8ZyuYikcRBf0vsxhg==", "bundleDependencies": [ "@napi-rs/wasm-runtime", "@emnapi/core", @@ -1174,27 +1499,29 @@ "wasm32" ], "dev": true, + "license": "MIT", "optional": true, "dependencies": { - "@emnapi/core": "^1.4.3", - "@emnapi/runtime": "^1.4.3", - "@emnapi/wasi-threads": "^1.0.2", - "@napi-rs/wasm-runtime": "^0.2.11", - "@tybys/wasm-util": "^0.9.0", - "tslib": "^2.8.0" + "@emnapi/core": "^1.6.0", + "@emnapi/runtime": "^1.6.0", + "@emnapi/wasi-threads": "^1.1.0", + "@napi-rs/wasm-runtime": "^1.0.7", + "@tybys/wasm-util": "^0.10.1", + "tslib": "^2.4.0" }, "engines": { "node": ">=14.0.0" } }, "node_modules/@tailwindcss/oxide-win32-arm64-msvc": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.11.tgz", - "integrity": "sha512-UgKYx5PwEKrac3GPNPf6HVMNhUIGuUh4wlDFR2jYYdkX6pL/rn73zTq/4pzUm8fOjAn5L8zDeHp9iXmUGOXZ+w==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.17.tgz", + "integrity": "sha512-JU5AHr7gKbZlOGvMdb4722/0aYbU+tN6lv1kONx0JK2cGsh7g148zVWLM0IKR3NeKLv+L90chBVYcJ8uJWbC9A==", "cpu": [ "arm64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "win32" @@ -1204,13 +1531,14 @@ } }, "node_modules/@tailwindcss/oxide-win32-x64-msvc": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.11.tgz", - "integrity": "sha512-YfHoggn1j0LK7wR82TOucWc5LDCguHnoS879idHekmmiR7g9HUtMw9MI0NHatS28u/Xlkfi9w5RJWgz2Dl+5Qg==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.17.tgz", + "integrity": "sha512-SKWM4waLuqx0IH+FMDUw6R66Hu4OuTALFgnleKbqhgGU30DY20NORZMZUKgLRjQXNN2TLzKvh48QXTig4h4bGw==", "cpu": [ "x64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "win32" @@ -1220,108 +1548,114 @@ } }, "node_modules/@tailwindcss/postcss": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/@tailwindcss/postcss/-/postcss-4.1.11.tgz", - "integrity": "sha512-q/EAIIpF6WpLhKEuQSEVMZNMIY8KhWoAemZ9eylNAih9jxMGAYPPWBn3I9QL/2jZ+e7OEz/tZkX5HwbBR4HohA==", + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/@tailwindcss/postcss/-/postcss-4.1.17.tgz", + "integrity": "sha512-+nKl9N9mN5uJ+M7dBOOCzINw94MPstNR/GtIhz1fpZysxL/4a+No64jCBD6CPN+bIHWFx3KWuu8XJRrj/572Dw==", "dev": true, + "license": "MIT", "dependencies": { "@alloc/quick-lru": "^5.2.0", - "@tailwindcss/node": "4.1.11", - "@tailwindcss/oxide": "4.1.11", + "@tailwindcss/node": "4.1.17", + "@tailwindcss/oxide": "4.1.17", "postcss": "^8.4.41", - "tailwindcss": "4.1.11" + "tailwindcss": "4.1.17" } }, "node_modules/@tweenjs/tween.js": { "version": "25.0.0", "resolved": "https://registry.npmjs.org/@tweenjs/tween.js/-/tween.js-25.0.0.tgz", - "integrity": "sha512-XKLA6syeBUaPzx4j3qwMqzzq+V4uo72BnlbOjmuljLrRqdsd3qnzvZZoxvMHZ23ndsRS4aufU6JOZYpCbU6T1A==" + "integrity": "sha512-XKLA6syeBUaPzx4j3qwMqzzq+V4uo72BnlbOjmuljLrRqdsd3qnzvZZoxvMHZ23ndsRS4aufU6JOZYpCbU6T1A==", + "license": "MIT" }, "node_modules/@tybys/wasm-util": { - "version": "0.10.0", - "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.10.0.tgz", - "integrity": "sha512-VyyPYFlOMNylG45GoAe0xDoLwWuowvf92F9kySqzYh8vmYm7D2u4iUJKa1tOUpS70Ku13ASrOkS4ScXFsTaCNQ==", + "version": "0.10.1", + "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.10.1.tgz", + "integrity": "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==", "dev": true, + "license": "MIT", "optional": true, "dependencies": { "tslib": "^2.4.0" } }, "node_modules/@types/culori": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/@types/culori/-/culori-4.0.0.tgz", - "integrity": "sha512-aFljQwjb++sl6TAyEXeHTiK/fk9epZOQ+nMmadjnAvzZFIvNoQ0x8XQYfcOaRTBwmDUPUlghhZCJ66MTcqQAsg==", - "dev": true + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/@types/culori/-/culori-4.0.1.tgz", + "integrity": "sha512-43M51r/22CjhbOXyGT361GZ9vncSVQ39u62x5eJdBQFviI8zWp2X5jzqg7k4M6PVgDQAClpy2bUe2dtwEgEDVQ==", + "dev": true, + "license": "MIT" }, "node_modules/@types/estree": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/@types/json-schema": { "version": "7.0.15", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/@types/json5": { "version": "0.0.29", "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz", "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/@types/node": { - "version": "20.19.9", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.9.tgz", - "integrity": "sha512-cuVNgarYWZqxRJDQHEB58GEONhOK79QVR/qYx4S7kcUObQvUwvFnYxJuuHUKm2aieN9X3yZB4LZsuYNU1Qphsw==", + "version": "20.19.25", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz", + "integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==", "dev": true, + "license": "MIT", "dependencies": { "undici-types": "~6.21.0" } }, - "node_modules/@types/prop-types": { - "version": "15.7.15", - "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.15.tgz", - "integrity": "sha512-F6bEyamV9jKGAFBEmlQnesRPGOQqS2+Uwi0Em15xenOxHaf2hv6L8YCVn3rPdPJOiJfPiCnLIRyvwVaqMY3MIw==", - "dev": true - }, "node_modules/@types/react": { - "version": "18.3.23", - "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.23.tgz", - "integrity": "sha512-/LDXMQh55EzZQ0uVAZmKKhfENivEvWz6E+EYzh+/MCjMhNsotd+ZHhBGIjFDTi6+fz0OhQQQLbTgdQIxxCsC0w==", + "version": "19.2.7", + "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.7.tgz", + "integrity": "sha512-MWtvHrGZLFttgeEj28VXHxpmwYbor/ATPYbBfSFZEIRK0ecCFLl2Qo55z52Hss+UV9CRN7trSeq1zbgx7YDWWg==", "dev": true, + "license": "MIT", + "peer": true, "dependencies": { - "@types/prop-types": "*", - "csstype": "^3.0.2" + "csstype": "^3.2.2" } }, "node_modules/@types/react-dom": { - "version": "18.3.7", - "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.3.7.tgz", - "integrity": "sha512-MEe3UeoENYVFXzoXEWsvcpg6ZvlrFNlOQ7EOsvhI3CfAXwzPfO8Qwuxd40nepsYKqyyVQnTdEfv68q91yLcKrQ==", + "version": "19.2.3", + "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.2.3.tgz", + "integrity": "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ==", "dev": true, + "license": "MIT", "peerDependencies": { - "@types/react": "^18.0.0" + "@types/react": "^19.2.0" } }, "node_modules/@types/uuid": { "version": "9.0.8", "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz", "integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/@typescript-eslint/eslint-plugin": { - "version": "8.38.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.38.0.tgz", - "integrity": "sha512-CPoznzpuAnIOl4nhj4tRr4gIPj5AfKgkiJmGQDaq+fQnRJTYlcBjbX3wbciGmpoPf8DREufuPRe1tNMZnGdanA==", + "version": "8.48.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.48.0.tgz", + "integrity": "sha512-XxXP5tL1txl13YFtrECECQYeZjBZad4fyd3cFV4a19LkAY/bIp9fev3US4S5fDVV2JaYFiKAZ/GRTOLer+mbyQ==", "dev": true, + "license": "MIT", "dependencies": { "@eslint-community/regexpp": "^4.10.0", - "@typescript-eslint/scope-manager": "8.38.0", - "@typescript-eslint/type-utils": "8.38.0", - "@typescript-eslint/utils": "8.38.0", - "@typescript-eslint/visitor-keys": "8.38.0", + "@typescript-eslint/scope-manager": "8.48.0", + "@typescript-eslint/type-utils": "8.48.0", + "@typescript-eslint/utils": "8.48.0", + "@typescript-eslint/visitor-keys": "8.48.0", "graphemer": "^1.4.0", "ignore": "^7.0.0", "natural-compare": "^1.4.0", @@ -1335,9 +1669,9 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "@typescript-eslint/parser": "^8.38.0", + "@typescript-eslint/parser": "^8.48.0", "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <5.9.0" + "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/eslint-plugin/node_modules/ignore": { @@ -1345,20 +1679,23 @@ "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz", "integrity": "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==", "dev": true, + "license": "MIT", "engines": { "node": ">= 4" } }, "node_modules/@typescript-eslint/parser": { - "version": "8.38.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.38.0.tgz", - "integrity": "sha512-Zhy8HCvBUEfBECzIl1PKqF4p11+d0aUJS1GeUiuqK9WmOug8YCmC4h4bjyBvMyAMI9sbRczmrYL5lKg/YMbrcQ==", + "version": "8.48.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.48.0.tgz", + "integrity": "sha512-jCzKdm/QK0Kg4V4IK/oMlRZlY+QOcdjv89U2NgKHZk1CYTj82/RVSx1mV/0gqCVMJ/DA+Zf/S4NBWNF8GQ+eqQ==", "dev": true, + "license": "MIT", + "peer": true, "dependencies": { - "@typescript-eslint/scope-manager": "8.38.0", - "@typescript-eslint/types": "8.38.0", - "@typescript-eslint/typescript-estree": "8.38.0", - "@typescript-eslint/visitor-keys": "8.38.0", + "@typescript-eslint/scope-manager": "8.48.0", + "@typescript-eslint/types": "8.48.0", + "@typescript-eslint/typescript-estree": "8.48.0", + "@typescript-eslint/visitor-keys": "8.48.0", "debug": "^4.3.4" }, "engines": { @@ -1370,17 +1707,18 @@ }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <5.9.0" + "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/project-service": { - "version": "8.38.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.38.0.tgz", - "integrity": "sha512-dbK7Jvqcb8c9QfH01YB6pORpqX1mn5gDZc9n63Ak/+jD67oWXn3Gs0M6vddAN+eDXBCS5EmNWzbSxsn9SzFWWg==", + "version": "8.48.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.48.0.tgz", + "integrity": "sha512-Ne4CTZyRh1BecBf84siv42wv5vQvVmgtk8AuiEffKTUo3DrBaGYZueJSxxBZ8fjk/N3DrgChH4TOdIOwOwiqqw==", "dev": true, + "license": "MIT", "dependencies": { - "@typescript-eslint/tsconfig-utils": "^8.38.0", - "@typescript-eslint/types": "^8.38.0", + "@typescript-eslint/tsconfig-utils": "^8.48.0", + "@typescript-eslint/types": "^8.48.0", "debug": "^4.3.4" }, "engines": { @@ -1391,17 +1729,18 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "typescript": ">=4.8.4 <5.9.0" + "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/scope-manager": { - "version": "8.38.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.38.0.tgz", - "integrity": "sha512-WJw3AVlFFcdT9Ri1xs/lg8LwDqgekWXWhH3iAF+1ZM+QPd7oxQ6jvtW/JPwzAScxitILUIFs0/AnQ/UWHzbATQ==", + "version": "8.48.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.48.0.tgz", + "integrity": "sha512-uGSSsbrtJrLduti0Q1Q9+BF1/iFKaxGoQwjWOIVNJv0o6omrdyR8ct37m4xIl5Zzpkp69Kkmvom7QFTtue89YQ==", "dev": true, + "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.38.0", - "@typescript-eslint/visitor-keys": "8.38.0" + "@typescript-eslint/types": "8.48.0", + "@typescript-eslint/visitor-keys": "8.48.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -1412,10 +1751,11 @@ } }, "node_modules/@typescript-eslint/tsconfig-utils": { - "version": "8.38.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.38.0.tgz", - "integrity": "sha512-Lum9RtSE3EroKk/bYns+sPOodqb2Fv50XOl/gMviMKNvanETUuUcC9ObRbzrJ4VSd2JalPqgSAavwrPiPvnAiQ==", + "version": "8.48.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.48.0.tgz", + "integrity": "sha512-WNebjBdFdyu10sR1M4OXTt2OkMd5KWIL+LLfeH9KhgP+jzfDV/LI3eXzwJ1s9+Yc0Kzo2fQCdY/OpdusCMmh6w==", "dev": true, + "license": "MIT", "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" }, @@ -1424,18 +1764,19 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "typescript": ">=4.8.4 <5.9.0" + "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/type-utils": { - "version": "8.38.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.38.0.tgz", - "integrity": "sha512-c7jAvGEZVf0ao2z+nnz8BUaHZD09Agbh+DY7qvBQqLiz8uJzRgVPj5YvOh8I8uEiH8oIUGIfHzMwUcGVco/SJg==", + "version": "8.48.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.48.0.tgz", + "integrity": "sha512-zbeVaVqeXhhab6QNEKfK96Xyc7UQuoFWERhEnj3mLVnUWrQnv15cJNseUni7f3g557gm0e46LZ6IJ4NJVOgOpw==", "dev": true, + "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.38.0", - "@typescript-eslint/typescript-estree": "8.38.0", - "@typescript-eslint/utils": "8.38.0", + "@typescript-eslint/types": "8.48.0", + "@typescript-eslint/typescript-estree": "8.48.0", + "@typescript-eslint/utils": "8.48.0", "debug": "^4.3.4", "ts-api-utils": "^2.1.0" }, @@ -1448,14 +1789,15 @@ }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <5.9.0" + "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/types": { - "version": "8.38.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.38.0.tgz", - "integrity": "sha512-wzkUfX3plUqij4YwWaJyqhiPE5UCRVlFpKn1oCRn2O1bJ592XxWJj8ROQ3JD5MYXLORW84063z3tZTb/cs4Tyw==", + "version": "8.48.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.48.0.tgz", + "integrity": "sha512-cQMcGQQH7kwKoVswD1xdOytxQR60MWKM1di26xSUtxehaDs/32Zpqsu5WJlXTtTTqyAVK8R7hvsUnIXRS+bjvA==", "dev": true, + "license": "MIT", "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" }, @@ -1465,20 +1807,20 @@ } }, "node_modules/@typescript-eslint/typescript-estree": { - "version": "8.38.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.38.0.tgz", - "integrity": "sha512-fooELKcAKzxux6fA6pxOflpNS0jc+nOQEEOipXFNjSlBS6fqrJOVY/whSn70SScHrcJ2LDsxWrneFoWYSVfqhQ==", + "version": "8.48.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.48.0.tgz", + "integrity": "sha512-ljHab1CSO4rGrQIAyizUS6UGHHCiAYhbfcIZ1zVJr5nMryxlXMVWS3duFPSKvSUbFPwkXMFk1k0EMIjub4sRRQ==", "dev": true, + "license": "MIT", "dependencies": { - "@typescript-eslint/project-service": "8.38.0", - "@typescript-eslint/tsconfig-utils": "8.38.0", - "@typescript-eslint/types": "8.38.0", - "@typescript-eslint/visitor-keys": "8.38.0", + "@typescript-eslint/project-service": "8.48.0", + "@typescript-eslint/tsconfig-utils": "8.48.0", + "@typescript-eslint/types": "8.48.0", + "@typescript-eslint/visitor-keys": "8.48.0", "debug": "^4.3.4", - "fast-glob": "^3.3.2", - "is-glob": "^4.0.3", "minimatch": "^9.0.4", "semver": "^7.6.0", + "tinyglobby": "^0.2.15", "ts-api-utils": "^2.1.0" }, "engines": { @@ -1489,7 +1831,7 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "typescript": ">=4.8.4 <5.9.0" + "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": { @@ -1497,43 +1839,17 @@ "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", "dev": true, + "license": "MIT", "dependencies": { "balanced-match": "^1.0.0" } }, - "node_modules/@typescript-eslint/typescript-estree/node_modules/fast-glob": { - "version": "3.3.3", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz", - "integrity": "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==", - "dev": true, - "dependencies": { - "@nodelib/fs.stat": "^2.0.2", - "@nodelib/fs.walk": "^1.2.3", - "glob-parent": "^5.1.2", - "merge2": "^1.3.0", - "micromatch": "^4.0.8" - }, - "engines": { - "node": ">=8.6.0" - } - }, - "node_modules/@typescript-eslint/typescript-estree/node_modules/glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "dev": true, - "dependencies": { - "is-glob": "^4.0.1" - }, - "engines": { - "node": ">= 6" - } - }, "node_modules/@typescript-eslint/typescript-estree/node_modules/minimatch": { "version": "9.0.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", "dev": true, + "license": "ISC", "dependencies": { "brace-expansion": "^2.0.1" }, @@ -1544,16 +1860,30 @@ "url": "https://github.com/sponsors/isaacs" } }, - "node_modules/@typescript-eslint/utils": { - "version": "8.38.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.38.0.tgz", - "integrity": "sha512-hHcMA86Hgt+ijJlrD8fX0j1j8w4C92zue/8LOPAFioIno+W0+L7KqE8QZKCcPGc/92Vs9x36w/4MPTJhqXdyvg==", + "node_modules/@typescript-eslint/typescript-estree/node_modules/semver": { + "version": "7.7.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz", + "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==", "dev": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/@typescript-eslint/utils": { + "version": "8.48.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.48.0.tgz", + "integrity": "sha512-yTJO1XuGxCsSfIVt1+1UrLHtue8xz16V8apzPYI06W0HbEbEWHxHXgZaAgavIkoh+GeV6hKKd5jm0sS6OYxWXQ==", + "dev": true, + "license": "MIT", "dependencies": { "@eslint-community/eslint-utils": "^4.7.0", - "@typescript-eslint/scope-manager": "8.38.0", - "@typescript-eslint/types": "8.38.0", - "@typescript-eslint/typescript-estree": "8.38.0" + "@typescript-eslint/scope-manager": "8.48.0", + "@typescript-eslint/types": "8.48.0", + "@typescript-eslint/typescript-estree": "8.48.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -1564,16 +1894,17 @@ }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0", - "typescript": ">=4.8.4 <5.9.0" + "typescript": ">=4.8.4 <6.0.0" } }, "node_modules/@typescript-eslint/visitor-keys": { - "version": "8.38.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.38.0.tgz", - "integrity": "sha512-pWrTcoFNWuwHlA9CvlfSsGWs14JxfN1TH25zM5L7o0pRLhsoZkDnTsXfQRJBEWJoV5DL0jf+Z+sxiud+K0mq1g==", + "version": "8.48.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.48.0.tgz", + "integrity": "sha512-T0XJMaRPOH3+LBbAfzR2jalckP1MSG/L9eUtY0DEzUyVaXJ/t6zN0nR7co5kz0Jko/nkSYCBRkz1djvjajVTTg==", "dev": true, + "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.38.0", + "@typescript-eslint/types": "8.48.0", "eslint-visitor-keys": "^4.2.1" }, "engines": { @@ -1592,6 +1923,7 @@ "arm" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "android" @@ -1605,6 +1937,7 @@ "arm64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "android" @@ -1618,6 +1951,7 @@ "arm64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "darwin" @@ -1631,6 +1965,7 @@ "x64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "darwin" @@ -1644,6 +1979,7 @@ "x64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "freebsd" @@ -1657,6 +1993,7 @@ "arm" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1670,6 +2007,7 @@ "arm" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1683,6 +2021,7 @@ "arm64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1696,6 +2035,7 @@ "arm64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1709,6 +2049,7 @@ "ppc64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1722,6 +2063,7 @@ "riscv64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1735,6 +2077,7 @@ "riscv64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1748,6 +2091,7 @@ "s390x" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1761,6 +2105,7 @@ "x64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1774,6 +2119,7 @@ "x64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "linux" @@ -1787,6 +2133,7 @@ "wasm32" ], "dev": true, + "license": "MIT", "optional": true, "dependencies": { "@napi-rs/wasm-runtime": "^0.2.11" @@ -1803,6 +2150,7 @@ "arm64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "win32" @@ -1816,6 +2164,7 @@ "ia32" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "win32" @@ -1829,6 +2178,7 @@ "x64" ], "dev": true, + "license": "MIT", "optional": true, "os": [ "win32" @@ -1838,6 +2188,7 @@ "version": "1.5.3", "resolved": "https://registry.npmjs.org/accessor-fn/-/accessor-fn-1.5.3.tgz", "integrity": "sha512-rkAofCwe/FvYFUlMB0v0gWmhqtfAtV1IUkdPbfhTUyYniu5LrC0A0UJkTH0Jv3S8SvwkmfuAlY+mQIJATdocMA==", + "license": "MIT", "engines": { "node": ">=12" } @@ -1847,6 +2198,8 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, + "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -1859,6 +2212,7 @@ "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz", "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==", "dev": true, + "license": "MIT", "peerDependencies": { "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" } @@ -1868,6 +2222,7 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", "dev": true, + "license": "MIT", "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -1884,6 +2239,7 @@ "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", "dev": true, + "license": "MIT", "dependencies": { "color-convert": "^2.0.1" }, @@ -1898,13 +2254,15 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "dev": true + "dev": true, + "license": "Python-2.0" }, "node_modules/aria-query": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.2.tgz", "integrity": "sha512-COROpnaoap1E2F000S62r6A60uHZnmlvomhfyT2DlTcrY1OrBKn2UhH7qn5wTC9zMvD0AY7csdPSNwKP+7WiQw==", "dev": true, + "license": "Apache-2.0", "engines": { "node": ">= 0.4" } @@ -1914,6 +2272,7 @@ "resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.2.tgz", "integrity": "sha512-LHE+8BuR7RYGDKvnrmcuSq3tDcKv9OFEXQt/HpbZhY7V6h0zlUXutnAD82GiFx9rdieCMjkvtcsPqBwgUl1Iiw==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.3", "is-array-buffer": "^3.0.5" @@ -1930,6 +2289,7 @@ "resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.9.tgz", "integrity": "sha512-FmeCCAenzH0KH381SPT5FZmiA/TmpndpcaShhfgEN9eCVjnFBqq3l1xrI42y8+PPLI6hypzou4GXw00WHmPBLQ==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.4", @@ -1952,6 +2312,7 @@ "resolved": "https://registry.npmjs.org/array.prototype.findlast/-/array.prototype.findlast-1.2.5.tgz", "integrity": "sha512-CVvd6FHg1Z3POpBLxO6E6zr+rSKEQ9L6rZHAaY7lLfhKsWYUBBOuMs0e9o24oopj6H+geRCX0YJ+TJLBK2eHyQ==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.7", "define-properties": "^1.2.1", @@ -1972,6 +2333,7 @@ "resolved": "https://registry.npmjs.org/array.prototype.findlastindex/-/array.prototype.findlastindex-1.2.6.tgz", "integrity": "sha512-F/TKATkzseUExPlfvmwQKGITM3DGTK+vkAsCZoDc5daVygbJBnjEUCbgkAvVFsgfXfX4YIqZ/27G3k3tdXrTxQ==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.4", @@ -1993,6 +2355,7 @@ "resolved": "https://registry.npmjs.org/array.prototype.flat/-/array.prototype.flat-1.3.3.tgz", "integrity": "sha512-rwG/ja1neyLqCuGZ5YYrznA62D4mZXg0i1cIskIUKSiqF3Cje9/wXAls9B9s1Wa2fomMsIv8czB8jZcPmxCXFg==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "define-properties": "^1.2.1", @@ -2011,6 +2374,7 @@ "resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.3.3.tgz", "integrity": "sha512-Y7Wt51eKJSyi80hFrJCePGGNo5ktJCslFuboqJsbf57CCPcm5zztluPlc4/aD8sWsKvlwatezpV4U1efk8kpjg==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "define-properties": "^1.2.1", @@ -2029,6 +2393,7 @@ "resolved": "https://registry.npmjs.org/array.prototype.tosorted/-/array.prototype.tosorted-1.1.4.tgz", "integrity": "sha512-p6Fx8B7b7ZhL/gmUsAy0D15WhvDccw3mnGNbZpi3pmeJdxtWsj2jEaI4Y6oo3XiHfzuSgPwKc04MYt6KgvC/wA==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.7", "define-properties": "^1.2.1", @@ -2045,6 +2410,7 @@ "resolved": "https://registry.npmjs.org/arraybuffer.prototype.slice/-/arraybuffer.prototype.slice-1.0.4.tgz", "integrity": "sha512-BNoCY6SXXPQ7gF2opIP4GBE+Xw7U+pHMYKuzjgCN3GwiaIR09UUeKfheyIry77QtrCBlC0KK0q5/TER/tYh3PQ==", "dev": true, + "license": "MIT", "dependencies": { "array-buffer-byte-length": "^1.0.1", "call-bind": "^1.0.8", @@ -2065,13 +2431,15 @@ "version": "0.0.8", "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.8.tgz", "integrity": "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/async-function": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/async-function/-/async-function-1.0.0.tgz", "integrity": "sha512-hsU18Ae8CDTR6Kgu9DYf0EbCr/a5iGL0rytQDobUcdpYOKokk8LEjVphnXkDkgpi0wYVsqrXuP0bZxJaTqdgoA==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" } @@ -2081,6 +2449,7 @@ "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz", "integrity": "sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ==", "dev": true, + "license": "MIT", "dependencies": { "possible-typed-array-names": "^1.0.0" }, @@ -2092,10 +2461,11 @@ } }, "node_modules/axe-core": { - "version": "4.10.3", - "resolved": "https://registry.npmjs.org/axe-core/-/axe-core-4.10.3.tgz", - "integrity": "sha512-Xm7bpRXnDSX2YE2YFfBk2FnF0ep6tmG7xPh8iHee8MIcrgq762Nkce856dYtJYLkuIoYZvGfTs/PbZhideTcEg==", + "version": "4.11.0", + "resolved": "https://registry.npmjs.org/axe-core/-/axe-core-4.11.0.tgz", + "integrity": "sha512-ilYanEU8vxxBexpJd8cWM4ElSQq4QctCLKih0TSfjIfCQTeyH/6zVrmIJfLPrKTKJRbiG+cfnZbQIjAlJmF1jQ==", "dev": true, + "license": "MPL-2.0", "engines": { "node": ">=4" } @@ -2105,6 +2475,7 @@ "resolved": "https://registry.npmjs.org/axobject-query/-/axobject-query-4.1.0.tgz", "integrity": "sha512-qIj0G9wZbMGNLjLmg1PT6v2mE9AH2zlnADJD/2tC6E00hgmhUOfEB6greHPAfLRSufHqROIUTkw6E+M3lH0PTQ==", "dev": true, + "license": "Apache-2.0", "engines": { "node": ">= 0.4" } @@ -2113,12 +2484,24 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "dev": true + "dev": true, + "license": "MIT" + }, + "node_modules/baseline-browser-mapping": { + "version": "2.8.31", + "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.31.tgz", + "integrity": "sha512-a28v2eWrrRWPpJSzxc+mKwm0ZtVx/G8SepdQZDArnXYU/XS+IF6mp8aB/4E+hH1tyGCoDo3KlUCdlSxGDsRkAw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "baseline-browser-mapping": "dist/cli.js" + } }, "node_modules/bezier-js": { "version": "6.1.4", "resolved": "https://registry.npmjs.org/bezier-js/-/bezier-js-6.1.4.tgz", "integrity": "sha512-PA0FW9ZpcHbojUCMu28z9Vg/fNkwTj5YhusSAjHHDfHDGLxJ6YUKrAN2vk1fP2MMOxVw4Oko16FMlRGVBGqLKg==", + "license": "MIT", "funding": { "type": "individual", "url": "https://github.com/Pomax/bezierjs/blob/master/FUNDING.md" @@ -2129,6 +2512,7 @@ "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", "dev": true, + "license": "MIT", "dependencies": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" @@ -2139,6 +2523,7 @@ "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", "dev": true, + "license": "MIT", "dependencies": { "fill-range": "^7.1.1" }, @@ -2146,15 +2531,39 @@ "node": ">=8" } }, - "node_modules/busboy": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz", - "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==", + "node_modules/browserslist": { + "version": "4.28.0", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.0.tgz", + "integrity": "sha512-tbydkR/CxfMwelN0vwdP/pLkDwyAASZ+VfWm4EOwlB6SWhx1sYnWLqo8N5j0rAzPfzfRaxt0mM/4wPU/Su84RQ==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "peer": true, "dependencies": { - "streamsearch": "^1.1.0" + "baseline-browser-mapping": "^2.8.25", + "caniuse-lite": "^1.0.30001754", + "electron-to-chromium": "^1.5.249", + "node-releases": "^2.0.27", + "update-browserslist-db": "^1.1.4" + }, + "bin": { + "browserslist": "cli.js" }, "engines": { - "node": ">=10.16.0" + "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" } }, "node_modules/call-bind": { @@ -2162,6 +2571,7 @@ "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.8.tgz", "integrity": "sha512-oKlSFMcMwpUg2ednkhQ454wfWiU/ul3CkJe/PEHcTKuiX6RpbehUiFMXu13HalGZxfUwCQzZG747YXBn1im9ww==", "dev": true, + "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.0", "es-define-property": "^1.0.0", @@ -2180,6 +2590,7 @@ "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", "dev": true, + "license": "MIT", "dependencies": { "es-errors": "^1.3.0", "function-bind": "^1.1.2" @@ -2193,6 +2604,7 @@ "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", "dev": true, + "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.2", "get-intrinsic": "^1.3.0" @@ -2209,14 +2621,15 @@ "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", "dev": true, + "license": "MIT", "engines": { "node": ">=6" } }, "node_modules/caniuse-lite": { - "version": "1.0.30001727", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001727.tgz", - "integrity": "sha512-pB68nIHmbN6L/4C6MH1DokyR3bYqFwjaSs/sWDHGj4CTcFtQUQMuJftVwWkXq7mNWOybD3KhUv3oWHoGxgP14Q==", + "version": "1.0.30001757", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001757.tgz", + "integrity": "sha512-r0nnL/I28Zi/yjk1el6ilj27tKcdjLsNqAOZr0yVjWPrSQyHgKI2INaEWw21bAQSv2LXRt1XuCS/GomNpWOxsQ==", "funding": [ { "type": "opencollective", @@ -2230,12 +2643,14 @@ "type": "github", "url": "https://github.com/sponsors/ai" } - ] + ], + "license": "CC-BY-4.0" }, "node_modules/canvas-color-tracker": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/canvas-color-tracker/-/canvas-color-tracker-1.3.2.tgz", "integrity": "sha512-ryQkDX26yJ3CXzb3hxUVNlg1NKE4REc5crLBq661Nxzr8TNd236SaEf2ffYLXyI5tSABSeguHLqcVq4vf9L3Zg==", + "license": "MIT", "dependencies": { "tinycolor2": "^1.6.0" }, @@ -2248,6 +2663,7 @@ "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", "dev": true, + "license": "MIT", "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" @@ -2259,43 +2675,24 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, - "node_modules/chownr": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz", - "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==", - "dev": true, - "engines": { - "node": ">=18" - } - }, "node_modules/classnames": { "version": "2.5.1", "resolved": "https://registry.npmjs.org/classnames/-/classnames-2.5.1.tgz", - "integrity": "sha512-saHYOzhIQs6wy2sVxTM6bUDsQO4F50V9RQ22qBpEdCW+I+/Wmke2HOl6lS6dTpdxVhb88/I6+Hs+438c3lfUow==" + "integrity": "sha512-saHYOzhIQs6wy2sVxTM6bUDsQO4F50V9RQ22qBpEdCW+I+/Wmke2HOl6lS6dTpdxVhb88/I6+Hs+438c3lfUow==", + "license": "MIT" }, "node_modules/client-only": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz", - "integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==" - }, - "node_modules/color": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz", - "integrity": "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==", - "optional": true, - "dependencies": { - "color-convert": "^2.0.1", - "color-string": "^1.9.0" - }, - "engines": { - "node": ">=12.5.0" - } + "integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==", + "license": "MIT" }, "node_modules/color-convert": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "devOptional": true, + "dev": true, + "license": "MIT", "dependencies": { "color-name": "~1.1.4" }, @@ -2307,29 +2704,29 @@ "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "devOptional": true - }, - "node_modules/color-string": { - "version": "1.9.1", - "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz", - "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==", - "optional": true, - "dependencies": { - "color-name": "^1.0.0", - "simple-swizzle": "^0.2.2" - } + "dev": true, + "license": "MIT" }, "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", - "dev": true + "dev": true, + "license": "MIT" + }, + "node_modules/convert-source-map": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", + "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", + "dev": true, + "license": "MIT" }, "node_modules/cross-spawn": { "version": "7.0.6", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", "dev": true, + "license": "MIT", "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", @@ -2340,15 +2737,17 @@ } }, "node_modules/csstype": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", - "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", - "dev": true + "version": "3.2.3", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz", + "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==", + "dev": true, + "license": "MIT" }, "node_modules/culori": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/culori/-/culori-4.0.2.tgz", "integrity": "sha512-1+BhOB8ahCn4O0cep0Sh2l9KCOfOdY+BXJnKMHFFzDEouSr/el18QwXEMRlOj9UY5nCeA8UN3a/82rUWRBeyBw==", + "license": "MIT", "engines": { "node": "^12.20.0 || ^14.13.1 || >=16.0.0" } @@ -2357,6 +2756,7 @@ "version": "3.2.4", "resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz", "integrity": "sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg==", + "license": "ISC", "dependencies": { "internmap": "1 - 2" }, @@ -2367,12 +2767,14 @@ "node_modules/d3-binarytree": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/d3-binarytree/-/d3-binarytree-1.0.2.tgz", - "integrity": "sha512-cElUNH+sHu95L04m92pG73t2MEJXKu+GeKUN1TJkFsu93E5W8E9Sc3kHEGJKgenGvj19m6upSn2EunvMgMD2Yw==" + "integrity": "sha512-cElUNH+sHu95L04m92pG73t2MEJXKu+GeKUN1TJkFsu93E5W8E9Sc3kHEGJKgenGvj19m6upSn2EunvMgMD2Yw==", + "license": "MIT" }, "node_modules/d3-color": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz", "integrity": "sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA==", + "license": "ISC", "engines": { "node": ">=12" } @@ -2381,6 +2783,7 @@ "version": "3.0.1", "resolved": "https://registry.npmjs.org/d3-dispatch/-/d3-dispatch-3.0.1.tgz", "integrity": "sha512-rzUyPU/S7rwUflMyLc1ETDeBj0NRuHKKAcvukozwhshr6g6c5d8zh4c2gQjY2bZ0dXeGLWc1PF174P2tVvKhfg==", + "license": "ISC", "engines": { "node": ">=12" } @@ -2389,6 +2792,7 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/d3-drag/-/d3-drag-3.0.0.tgz", "integrity": "sha512-pWbUJLdETVA8lQNJecMxoXfH6x+mO2UQo8rSmZ+QqxcbyA3hfeprFgIT//HW2nlHChWeIIMwS2Fq+gEARkhTkg==", + "license": "ISC", "dependencies": { "d3-dispatch": "1 - 3", "d3-selection": "3" @@ -2401,6 +2805,7 @@ "version": "3.0.1", "resolved": "https://registry.npmjs.org/d3-ease/-/d3-ease-3.0.1.tgz", "integrity": "sha512-wR/XK3D3XcLIZwpbvQwQ5fK+8Ykds1ip7A2Txe0yxncXSdq1L9skcG7blcedkOX+ZcgxGAmLX1FrRGbADwzi0w==", + "license": "BSD-3-Clause", "engines": { "node": ">=12" } @@ -2409,6 +2814,7 @@ "version": "3.0.6", "resolved": "https://registry.npmjs.org/d3-force-3d/-/d3-force-3d-3.0.6.tgz", "integrity": "sha512-4tsKHUPLOVkyfEffZo1v6sFHvGFwAIIjt/W8IThbp08DYAsXZck+2pSHEG5W1+gQgEvFLdZkYvmJAbRM2EzMnA==", + "license": "MIT", "dependencies": { "d3-binarytree": "1", "d3-dispatch": "1 - 3", @@ -2424,6 +2830,7 @@ "version": "3.1.0", "resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.0.tgz", "integrity": "sha512-YyUI6AEuY/Wpt8KWLgZHsIU86atmikuoOmCfommt0LYHiQSPjvX2AcFc38PX0CBpr2RCyZhjex+NS/LPOv6YqA==", + "license": "ISC", "engines": { "node": ">=12" } @@ -2432,6 +2839,7 @@ "version": "3.0.1", "resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz", "integrity": "sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g==", + "license": "ISC", "dependencies": { "d3-color": "1 - 3" }, @@ -2442,12 +2850,14 @@ "node_modules/d3-octree": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/d3-octree/-/d3-octree-1.1.0.tgz", - "integrity": "sha512-F8gPlqpP+HwRPMO/8uOu5wjH110+6q4cgJvgJT6vlpy3BEaDIKlTZrgHKZSp/i1InRpVfh4puY/kvL6MxK930A==" + "integrity": "sha512-F8gPlqpP+HwRPMO/8uOu5wjH110+6q4cgJvgJT6vlpy3BEaDIKlTZrgHKZSp/i1InRpVfh4puY/kvL6MxK930A==", + "license": "MIT" }, "node_modules/d3-quadtree": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/d3-quadtree/-/d3-quadtree-3.0.1.tgz", "integrity": "sha512-04xDrxQTDTCFwP5H6hRhsRcb9xxv2RzkcsygFzmkSIOJy3PeRJP7sNk3VRIbKXcog561P9oU0/rVH6vDROAgUw==", + "license": "ISC", "engines": { "node": ">=12" } @@ -2456,6 +2866,7 @@ "version": "4.0.2", "resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz", "integrity": "sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ==", + "license": "ISC", "dependencies": { "d3-array": "2.10.0 - 3", "d3-format": "1 - 3", @@ -2471,6 +2882,7 @@ "version": "3.1.0", "resolved": "https://registry.npmjs.org/d3-scale-chromatic/-/d3-scale-chromatic-3.1.0.tgz", "integrity": "sha512-A3s5PWiZ9YCXFye1o246KoscMWqf8BsD9eRiJ3He7C9OBaxKhAd5TFCdEx/7VbKtxxTsu//1mMJFrEt572cEyQ==", + "license": "ISC", "dependencies": { "d3-color": "1 - 3", "d3-interpolate": "1 - 3" @@ -2483,6 +2895,8 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", + "license": "ISC", + "peer": true, "engines": { "node": ">=12" } @@ -2491,6 +2905,7 @@ "version": "3.1.0", "resolved": "https://registry.npmjs.org/d3-time/-/d3-time-3.1.0.tgz", "integrity": "sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q==", + "license": "ISC", "dependencies": { "d3-array": "2 - 3" }, @@ -2502,6 +2917,7 @@ "version": "4.1.0", "resolved": "https://registry.npmjs.org/d3-time-format/-/d3-time-format-4.1.0.tgz", "integrity": "sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg==", + "license": "ISC", "dependencies": { "d3-time": "1 - 3" }, @@ -2513,6 +2929,7 @@ "version": "3.0.1", "resolved": "https://registry.npmjs.org/d3-timer/-/d3-timer-3.0.1.tgz", "integrity": "sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA==", + "license": "ISC", "engines": { "node": ">=12" } @@ -2521,6 +2938,7 @@ "version": "3.0.1", "resolved": "https://registry.npmjs.org/d3-transition/-/d3-transition-3.0.1.tgz", "integrity": "sha512-ApKvfjsSR6tg06xrL434C0WydLr7JewBB3V+/39RMHsaXTOG0zmt/OAXeng5M5LBm0ojmxJrpomQVZ1aPvBL4w==", + "license": "ISC", "dependencies": { "d3-color": "1 - 3", "d3-dispatch": "1 - 3", @@ -2539,6 +2957,7 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/d3-zoom/-/d3-zoom-3.0.0.tgz", "integrity": "sha512-b8AmV3kfQaqWAuacbPuNbL6vahnOJflOhexLzMMNLga62+/nh0JzvJ0aO/5a5MVgUFGS7Hu1P9P03o3fJkDCyw==", + "license": "ISC", "dependencies": { "d3-dispatch": "1 - 3", "d3-drag": "2 - 3", @@ -2554,13 +2973,15 @@ "version": "1.0.8", "resolved": "https://registry.npmjs.org/damerau-levenshtein/-/damerau-levenshtein-1.0.8.tgz", "integrity": "sha512-sdQSFB7+llfUcQHUQO3+B8ERRj0Oa4w9POWMI/puGtuf7gFywGmkaLCElnudfTiKZV+NvHqL0ifzdrI8Ro7ESA==", - "dev": true + "dev": true, + "license": "BSD-2-Clause" }, "node_modules/data-view-buffer": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/data-view-buffer/-/data-view-buffer-1.0.2.tgz", "integrity": "sha512-EmKO5V3OLXh1rtK2wgXRansaK1/mtVdTUEiEI0W8RkvgT05kfxaH29PliLnpLP73yYO6142Q72QNa8Wx/A5CqQ==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.3", "es-errors": "^1.3.0", @@ -2578,6 +2999,7 @@ "resolved": "https://registry.npmjs.org/data-view-byte-length/-/data-view-byte-length-1.0.2.tgz", "integrity": "sha512-tuhGbE6CfTM9+5ANGf+oQb72Ky/0+s3xKUpHvShfiz2RxMFgFPjsXuRLBVMtvMs15awe45SRb83D6wH4ew6wlQ==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.3", "es-errors": "^1.3.0", @@ -2595,6 +3017,7 @@ "resolved": "https://registry.npmjs.org/data-view-byte-offset/-/data-view-byte-offset-1.0.1.tgz", "integrity": "sha512-BS8PfmtDGnrgYdOonGZQdLZslWIeCGFP9tpan0hi1Co2Zr2NKADsvGYA8XxuG/4UWgJ6Cjtv+YJnB6MM69QGlQ==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", @@ -2608,10 +3031,11 @@ } }, "node_modules/debug": { - "version": "4.4.1", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz", - "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", "dev": true, + "license": "MIT", "dependencies": { "ms": "^2.1.3" }, @@ -2628,13 +3052,15 @@ "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/define-data-property": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==", "dev": true, + "license": "MIT", "dependencies": { "es-define-property": "^1.0.0", "es-errors": "^1.3.0", @@ -2652,6 +3078,7 @@ "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz", "integrity": "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==", "dev": true, + "license": "MIT", "dependencies": { "define-data-property": "^1.0.1", "has-property-descriptors": "^1.0.0", @@ -2668,15 +3095,17 @@ "version": "2.0.3", "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", + "license": "MIT", "engines": { "node": ">=6" } }, "node_modules/detect-libc": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.4.tgz", - "integrity": "sha512-3UDv+G9CsCKO1WKMGw9fwq/SWJYbI0c5Y7LU1AXYoDdbhE2AHQ6N6Nb34sG8Fj7T5APy8qXDCKuuIHd1BR0tVA==", + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", + "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", "devOptional": true, + "license": "Apache-2.0", "engines": { "node": ">=8" } @@ -2686,6 +3115,7 @@ "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz", "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==", "dev": true, + "license": "Apache-2.0", "dependencies": { "esutils": "^2.0.2" }, @@ -2698,6 +3128,7 @@ "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", "dev": true, + "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", @@ -2707,17 +3138,26 @@ "node": ">= 0.4" } }, + "node_modules/electron-to-chromium": { + "version": "1.5.260", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.260.tgz", + "integrity": "sha512-ov8rBoOBhVawpzdre+Cmz4FB+y66Eqrk6Gwqd8NGxuhv99GQ8XqMAr351KEkOt7gukXWDg6gJWEMKgL2RLMPtA==", + "dev": true, + "license": "ISC" + }, "node_modules/emoji-regex": { "version": "9.2.2", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/enhanced-resolve": { - "version": "5.18.2", - "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.2.tgz", - "integrity": "sha512-6Jw4sE1maoRJo3q8MsSIn2onJFbLTOjY9hlx4DZXmOKvLRd1Ok2kXmAGXaafL2+ijsJZ1ClYbl/pmqr9+k4iUQ==", + "version": "5.18.3", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.3.tgz", + "integrity": "sha512-d4lC8xfavMeBjzGr2vECC3fsGXziXZQyJxD868h2M/mBI3PwAuODxAkLkq5HYuvrPYcUtiLzsTo8U3PgX3Ocww==", "dev": true, + "license": "MIT", "dependencies": { "graceful-fs": "^4.2.4", "tapable": "^2.2.0" @@ -2731,6 +3171,7 @@ "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.24.0.tgz", "integrity": "sha512-WSzPgsdLtTcQwm4CROfS5ju2Wa1QQcVeT37jFjYzdFz1r9ahadC8B8/a4qxJxM+09F18iumCdRmlr96ZYkQvEg==", "dev": true, + "license": "MIT", "dependencies": { "array-buffer-byte-length": "^1.0.2", "arraybuffer.prototype.slice": "^1.0.4", @@ -2799,6 +3240,7 @@ "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" } @@ -2808,6 +3250,7 @@ "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" } @@ -2817,6 +3260,7 @@ "resolved": "https://registry.npmjs.org/es-iterator-helpers/-/es-iterator-helpers-1.2.1.tgz", "integrity": "sha512-uDn+FE1yrDzyC0pCo961B2IHbdM8y/ACZsKD4dG6WqrjV53BADjwa7D+1aom2rsNVfLyDgU/eigvlJGJ08OQ4w==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", @@ -2844,6 +3288,7 @@ "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", "dev": true, + "license": "MIT", "dependencies": { "es-errors": "^1.3.0" }, @@ -2856,6 +3301,7 @@ "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", "dev": true, + "license": "MIT", "dependencies": { "es-errors": "^1.3.0", "get-intrinsic": "^1.2.6", @@ -2871,6 +3317,7 @@ "resolved": "https://registry.npmjs.org/es-shim-unscopables/-/es-shim-unscopables-1.1.0.tgz", "integrity": "sha512-d9T8ucsEhh8Bi1woXCf+TIKDIROLG5WCkxg8geBCbvk22kzwC5G2OnXVMO6FUsvQlgUUXQ2itephWDLqDzbeCw==", "dev": true, + "license": "MIT", "dependencies": { "hasown": "^2.0.2" }, @@ -2883,6 +3330,7 @@ "resolved": "https://registry.npmjs.org/es-to-primitive/-/es-to-primitive-1.3.0.tgz", "integrity": "sha512-w+5mJ3GuFL+NjVtJlvydShqE1eN3h3PbI7/5LAsYJP/2qtuMXjfL2LpHSRqo4b4eSF5K/DH1JXKUAHSB2UW50g==", "dev": true, + "license": "MIT", "dependencies": { "is-callable": "^1.2.7", "is-date-object": "^1.0.5", @@ -2895,11 +3343,22 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/escape-string-regexp": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", "dev": true, + "license": "MIT", "engines": { "node": ">=10" }, @@ -2908,24 +3367,25 @@ } }, "node_modules/eslint": { - "version": "9.31.0", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.31.0.tgz", - "integrity": "sha512-QldCVh/ztyKJJZLr4jXNUByx3gR+TDYZCRXEktiZoUR3PGy4qCmSbkxcIle8GEwGpb5JBZazlaJ/CxLidXdEbQ==", + "version": "9.39.1", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.1.tgz", + "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==", "dev": true, + "license": "MIT", + "peer": true, "dependencies": { - "@eslint-community/eslint-utils": "^4.2.0", + "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", - "@eslint/config-array": "^0.21.0", - "@eslint/config-helpers": "^0.3.0", - "@eslint/core": "^0.15.0", + "@eslint/config-array": "^0.21.1", + "@eslint/config-helpers": "^0.4.2", + "@eslint/core": "^0.17.0", "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.31.0", - "@eslint/plugin-kit": "^0.3.1", + "@eslint/js": "9.39.1", + "@eslint/plugin-kit": "^0.4.1", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", "@humanwhocodes/retry": "^0.4.2", "@types/estree": "^1.0.6", - "@types/json-schema": "^7.0.15", "ajv": "^6.12.4", "chalk": "^4.0.0", "cross-spawn": "^7.0.6", @@ -2968,24 +3428,24 @@ } }, "node_modules/eslint-config-next": { - "version": "15.4.3", - "resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-15.4.3.tgz", - "integrity": "sha512-blytVMTpdqqlLBvYOvwT51m5eqRHNofKR/pfBSeeHiQMSY33kCph31hAK3DiAsL/RamVJRQzHwTRbbNr+7c/sw==", + "version": "16.0.4", + "resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-16.0.4.tgz", + "integrity": "sha512-FknAsm/uexYriO6UXzV2QEm4Yz/5DVQCtMUHx0FRYAKqqf5ia8xPqdyoqXzoCc45nRF5brkFpBYMvtciavzD4g==", "dev": true, + "license": "MIT", "dependencies": { - "@next/eslint-plugin-next": "15.4.3", - "@rushstack/eslint-patch": "^1.10.3", - "@typescript-eslint/eslint-plugin": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0", - "@typescript-eslint/parser": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0", + "@next/eslint-plugin-next": "16.0.4", "eslint-import-resolver-node": "^0.3.6", "eslint-import-resolver-typescript": "^3.5.2", - "eslint-plugin-import": "^2.31.0", + "eslint-plugin-import": "^2.32.0", "eslint-plugin-jsx-a11y": "^6.10.0", "eslint-plugin-react": "^7.37.0", - "eslint-plugin-react-hooks": "^5.0.0" + "eslint-plugin-react-hooks": "^7.0.0", + "globals": "16.4.0", + "typescript-eslint": "^8.46.0" }, "peerDependencies": { - "eslint": "^7.23.0 || ^8.0.0 || ^9.0.0", + "eslint": ">=9.0.0", "typescript": ">=3.3.1" }, "peerDependenciesMeta": { @@ -2994,11 +3454,25 @@ } } }, + "node_modules/eslint-config-next/node_modules/globals": { + "version": "16.4.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-16.4.0.tgz", + "integrity": "sha512-ob/2LcVVaVGCYN+r14cnwnoDPUufjiYgSqRhiFD0Q1iI4Odora5RE8Iv1D24hAz5oMophRGkGz+yuvQmmUMnMw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/eslint-config-prettier": { "version": "10.1.8", "resolved": "https://registry.npmjs.org/eslint-config-prettier/-/eslint-config-prettier-10.1.8.tgz", "integrity": "sha512-82GZUjRS0p/jganf6q1rEO25VSoHH0hKPCTrgillPjdI/3bgBhAE1QzHrHTizjpRvy6pGAvKjDJtk2pF9NDq8w==", "dev": true, + "license": "MIT", "bin": { "eslint-config-prettier": "bin/cli.js" }, @@ -3014,6 +3488,7 @@ "resolved": "https://registry.npmjs.org/eslint-import-resolver-node/-/eslint-import-resolver-node-0.3.9.tgz", "integrity": "sha512-WFj2isz22JahUv+B788TlO3N6zL3nNJGU8CcZbPZvVEkBPaJdCV4vy5wyghty5ROFbCRnm132v8BScu5/1BQ8g==", "dev": true, + "license": "MIT", "dependencies": { "debug": "^3.2.7", "is-core-module": "^2.13.0", @@ -3025,6 +3500,7 @@ "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", "dev": true, + "license": "MIT", "dependencies": { "ms": "^2.1.1" } @@ -3034,6 +3510,7 @@ "resolved": "https://registry.npmjs.org/eslint-import-resolver-typescript/-/eslint-import-resolver-typescript-3.10.1.tgz", "integrity": "sha512-A1rHYb06zjMGAxdLSkN2fXPBwuSaQ0iO5M/hdyS0Ajj1VBaRp0sPD3dn1FhME3c/JluGFbwSxyCfqdSbtQLAHQ==", "dev": true, + "license": "ISC", "dependencies": { "@nolyfill/is-core-module": "1.0.39", "debug": "^4.4.0", @@ -3068,6 +3545,7 @@ "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.12.1.tgz", "integrity": "sha512-L8jSWTze7K2mTg0vos/RuLRS5soomksDPoJLXIslC7c8Wmut3bx7CPpJijDcBZtxQ5lrbUdM+s0OlNbz0DCDNw==", "dev": true, + "license": "MIT", "dependencies": { "debug": "^3.2.7" }, @@ -3085,6 +3563,7 @@ "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", "dev": true, + "license": "MIT", "dependencies": { "ms": "^2.1.1" } @@ -3094,6 +3573,7 @@ "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.32.0.tgz", "integrity": "sha512-whOE1HFo/qJDyX4SnXzP4N6zOWn79WhnCUY/iDR0mPfQZO8wcYE4JClzI2oZrhBnnMUCBCHZhO6VQyoBU95mZA==", "dev": true, + "license": "MIT", "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.9", @@ -3127,24 +3607,17 @@ "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", "dev": true, + "license": "MIT", "dependencies": { "ms": "^2.1.1" } }, - "node_modules/eslint-plugin-import/node_modules/semver": { - "version": "6.3.1", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", - "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", - "dev": true, - "bin": { - "semver": "bin/semver.js" - } - }, "node_modules/eslint-plugin-jsx-a11y": { "version": "6.10.2", "resolved": "https://registry.npmjs.org/eslint-plugin-jsx-a11y/-/eslint-plugin-jsx-a11y-6.10.2.tgz", "integrity": "sha512-scB3nz4WmG75pV8+3eRUQOHZlNSUhFNq37xnpgRkCCELU3XMvXAxLk1eqWWyE22Ki4Q01Fnsw9BA3cJHDPgn2Q==", "dev": true, + "license": "MIT", "dependencies": { "aria-query": "^5.3.2", "array-includes": "^3.1.8", @@ -3174,6 +3647,7 @@ "resolved": "https://registry.npmjs.org/eslint-plugin-react/-/eslint-plugin-react-7.37.5.tgz", "integrity": "sha512-Qteup0SqU15kdocexFNAJMvCJEfa2xUKNV4CC1xsVMrIIqEy3SQ/rqyxCWNzfrd3/ldy6HMlD2e0JDVpDg2qIA==", "dev": true, + "license": "MIT", "dependencies": { "array-includes": "^3.1.8", "array.prototype.findlast": "^1.2.5", @@ -3202,12 +3676,20 @@ } }, "node_modules/eslint-plugin-react-hooks": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-5.2.0.tgz", - "integrity": "sha512-+f15FfK64YQwZdJNELETdn5ibXEUQmW1DZL6KXhNnc2heoy/sg9VJJeT7n8TlMWouzWqSWavFkIhHyIbIAEapg==", + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-7.0.1.tgz", + "integrity": "sha512-O0d0m04evaNzEPoSW+59Mezf8Qt0InfgGIBJnpC0h3NH/WjUAR7BIKUfysC6todmtiZ/A0oUVS8Gce0WhBrHsA==", "dev": true, + "license": "MIT", + "dependencies": { + "@babel/core": "^7.24.4", + "@babel/parser": "^7.24.4", + "hermes-parser": "^0.25.1", + "zod": "^3.25.0 || ^4.0.0", + "zod-validation-error": "^3.5.0 || ^4.0.0" + }, "engines": { - "node": ">=10" + "node": ">=18" }, "peerDependencies": { "eslint": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0-0 || ^9.0.0" @@ -3218,6 +3700,7 @@ "resolved": "https://registry.npmjs.org/resolve/-/resolve-2.0.0-next.5.tgz", "integrity": "sha512-U7WjGVG9sH8tvjW5SmGbQuui75FiyjAX72HX15DwBBwF9dNiQZRQAg9nnPhYy+TUnE0+VcrttuvNI8oSxZcocA==", "dev": true, + "license": "MIT", "dependencies": { "is-core-module": "^2.13.0", "path-parse": "^1.0.7", @@ -3230,20 +3713,12 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/eslint-plugin-react/node_modules/semver": { - "version": "6.3.1", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", - "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", - "dev": true, - "bin": { - "semver": "bin/semver.js" - } - }, "node_modules/eslint-scope": { "version": "8.4.0", "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-8.4.0.tgz", "integrity": "sha512-sNXOfKCn74rt8RICKMvJS7XKV/Xk9kA7DyJr8mJik3S7Cwgy3qlkkmyS2uQB3jiJg6VNdZd/pDBJu0nvG2NlTg==", "dev": true, + "license": "BSD-2-Clause", "dependencies": { "esrecurse": "^4.3.0", "estraverse": "^5.2.0" @@ -3260,6 +3735,7 @@ "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.1.tgz", "integrity": "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ==", "dev": true, + "license": "Apache-2.0", "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" }, @@ -3272,6 +3748,7 @@ "resolved": "https://registry.npmjs.org/espree/-/espree-10.4.0.tgz", "integrity": "sha512-j6PAQ2uUr79PZhBjP5C5fhl8e39FmRnOjsD5lGnWrFU8i2G776tBK7+nP8KuQUTTyAZUwfQqXAgrVH5MbH9CYQ==", "dev": true, + "license": "BSD-2-Clause", "dependencies": { "acorn": "^8.15.0", "acorn-jsx": "^5.3.2", @@ -3289,6 +3766,7 @@ "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.6.0.tgz", "integrity": "sha512-ca9pw9fomFcKPvFLXhBKUK90ZvGibiGOvRJNbjljY7s7uq/5YO4BOzcYtJqExdx99rF6aAcnRxHmcUHcz6sQsg==", "dev": true, + "license": "BSD-3-Clause", "dependencies": { "estraverse": "^5.1.0" }, @@ -3301,6 +3779,7 @@ "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", "dev": true, + "license": "BSD-2-Clause", "dependencies": { "estraverse": "^5.2.0" }, @@ -3313,6 +3792,7 @@ "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", "dev": true, + "license": "BSD-2-Clause", "engines": { "node": ">=4.0" } @@ -3322,6 +3802,7 @@ "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", "dev": true, + "license": "BSD-2-Clause", "engines": { "node": ">=0.10.0" } @@ -3330,13 +3811,15 @@ "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/fast-glob": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.1.tgz", "integrity": "sha512-kNFPyjhh5cKjrUltxs+wFx+ZkbRaxxmZ+X0ZU31SOsxCEtP9VPgtq2teZw1DebupL5GmDaNQ6yKMMVcM41iqDg==", "dev": true, + "license": "MIT", "dependencies": { "@nodelib/fs.stat": "^2.0.2", "@nodelib/fs.walk": "^1.2.3", @@ -3353,6 +3836,7 @@ "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", "dev": true, + "license": "ISC", "dependencies": { "is-glob": "^4.0.1" }, @@ -3364,19 +3848,22 @@ "version": "2.1.0", "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/fast-levenshtein": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/fastq": { "version": "1.19.1", "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.1.tgz", "integrity": "sha512-GwLTyxkCXjXbxqIhTsMI2Nui8huMPtnxg7krajPJAjnEG/iiOS7i+zCtWGZR9G0NBKbXKh6X9m9UIsYX/N6vvQ==", "dev": true, + "license": "ISC", "dependencies": { "reusify": "^1.0.4" } @@ -3386,6 +3873,7 @@ "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz", "integrity": "sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ==", "dev": true, + "license": "MIT", "dependencies": { "flat-cache": "^4.0.0" }, @@ -3398,6 +3886,7 @@ "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", "dev": true, + "license": "MIT", "dependencies": { "to-regex-range": "^5.0.1" }, @@ -3410,6 +3899,7 @@ "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==", "dev": true, + "license": "MIT", "dependencies": { "locate-path": "^6.0.0", "path-exists": "^4.0.0" @@ -3426,6 +3916,7 @@ "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-4.0.1.tgz", "integrity": "sha512-f7ccFPK3SXFHpx15UIGyRJ/FJQctuKZ0zVuN3frBo4HnK3cay9VEW0R6yPYFHC0AgqhukPzKjq22t5DmAyqGyw==", "dev": true, + "license": "MIT", "dependencies": { "flatted": "^3.2.9", "keyv": "^4.5.4" @@ -3438,12 +3929,14 @@ "version": "3.3.3", "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.3.tgz", "integrity": "sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==", - "dev": true + "dev": true, + "license": "ISC" }, "node_modules/float-tooltip": { "version": "1.7.5", "resolved": "https://registry.npmjs.org/float-tooltip/-/float-tooltip-1.7.5.tgz", "integrity": "sha512-/kXzuDnnBqyyWyhDMH7+PfP8J/oXiAavGzcRxASOMRHFuReDtofizLLJsf7nnDLAfEaMW4pVWaXrAjtnglpEkg==", + "license": "MIT", "dependencies": { "d3-selection": "2 - 3", "kapsule": "^1.16", @@ -3458,6 +3951,7 @@ "resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.5.tgz", "integrity": "sha512-dKx12eRCVIzqCxFGplyFKJMPvLEWgmNtUrpTiJIR5u97zEhRG8ySrtboPHZXx7daLxQVrl643cTzbab2tkQjxg==", "dev": true, + "license": "MIT", "dependencies": { "is-callable": "^1.2.7" }, @@ -3469,9 +3963,10 @@ } }, "node_modules/force-graph": { - "version": "1.50.1", - "resolved": "https://registry.npmjs.org/force-graph/-/force-graph-1.50.1.tgz", - "integrity": "sha512-CtldBdsUHLmlnerVYe09V9Bxi5iz8GZce1WdBSkwGAFgNFTYn6cW90NQ1lOh/UVm0NhktMRHKugXrS9Sl8Bl3A==", + "version": "1.51.0", + "resolved": "https://registry.npmjs.org/force-graph/-/force-graph-1.51.0.tgz", + "integrity": "sha512-aTnihCmiMA0ItLJLCbrQYS9mzriopW24goFPgUnKAAmAlPogTSmFWqoBPMXzIfPb7bs04Hur5zEI4WYgLW3Sig==", + "license": "MIT", "dependencies": { "@tweenjs/tween.js": "18 - 25", "accessor-fn": "1", @@ -3498,6 +3993,7 @@ "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", "dev": true, + "license": "MIT", "funding": { "url": "https://github.com/sponsors/ljharb" } @@ -3507,6 +4003,7 @@ "resolved": "https://registry.npmjs.org/function.prototype.name/-/function.prototype.name-1.1.8.tgz", "integrity": "sha512-e5iwyodOHhbMr/yNrc7fDYG4qlbIvI5gajyzPnb5TCwyhjApznQh1BMFou9b30SevY43gCJKXycoCBjMbsuW0Q==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", @@ -3527,15 +4024,37 @@ "resolved": "https://registry.npmjs.org/functions-have-names/-/functions-have-names-1.2.3.tgz", "integrity": "sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ==", "dev": true, + "license": "MIT", "funding": { "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/generator-function": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/generator-function/-/generator-function-2.0.1.tgz", + "integrity": "sha512-SFdFmIJi+ybC0vjlHN0ZGVGHc3lgE0DxPAT0djjVg+kjOnSqclqmj0KQ7ykTOLP6YxoqOvuAODGdcHJn+43q3g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gensync": { + "version": "1.0.0-beta.2", + "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", + "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, "node_modules/get-intrinsic": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", "dev": true, + "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.2", "es-define-property": "^1.0.1", @@ -3560,6 +4079,7 @@ "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", "dev": true, + "license": "MIT", "dependencies": { "dunder-proto": "^1.0.1", "es-object-atoms": "^1.0.0" @@ -3573,6 +4093,7 @@ "resolved": "https://registry.npmjs.org/get-symbol-description/-/get-symbol-description-1.1.0.tgz", "integrity": "sha512-w9UMqWwJxHNOvoNzSJ2oPF5wvYcvP7jUvYzhp67yEhTi17ZDBBC1z9pTdGuzjD+EFIqLSYRweZjqfiPzQ06Ebg==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.3", "es-errors": "^1.3.0", @@ -3586,10 +4107,11 @@ } }, "node_modules/get-tsconfig": { - "version": "4.10.1", - "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.1.tgz", - "integrity": "sha512-auHyJ4AgMz7vgS8Hp3N6HXSmlMdUyhSUrfBF16w153rxtLIEOE+HGqaBppczZvnHLqQJfiHotCYpNhl0lUROFQ==", + "version": "4.13.0", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.0.tgz", + "integrity": "sha512-1VKTZJCwBrvbd+Wn3AOgQP/2Av+TfTCOlE4AcRJE72W1ksZXbAx8PPBR9RzgTeSPzlPMHrbANMH3LbltH73wxQ==", "dev": true, + "license": "MIT", "dependencies": { "resolve-pkg-maps": "^1.0.0" }, @@ -3602,6 +4124,7 @@ "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", "dev": true, + "license": "ISC", "dependencies": { "is-glob": "^4.0.3" }, @@ -3614,6 +4137,7 @@ "resolved": "https://registry.npmjs.org/globals/-/globals-14.0.0.tgz", "integrity": "sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==", "dev": true, + "license": "MIT", "engines": { "node": ">=18" }, @@ -3626,6 +4150,7 @@ "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz", "integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==", "dev": true, + "license": "MIT", "dependencies": { "define-properties": "^1.2.1", "gopd": "^1.0.1" @@ -3642,6 +4167,7 @@ "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" }, @@ -3653,19 +4179,22 @@ "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", - "dev": true + "dev": true, + "license": "ISC" }, "node_modules/graphemer": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/graphemer/-/graphemer-1.4.0.tgz", "integrity": "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/has-bigints": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.1.0.tgz", "integrity": "sha512-R3pbpkcIqv2Pm3dUwgjclDRVmWpTJW2DcMzcIhEXEx1oh/CEMObMm3KLmRJOdvhM7o4uQBnwr8pzRK2sJWIqfg==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" }, @@ -3678,6 +4207,7 @@ "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", "dev": true, + "license": "MIT", "engines": { "node": ">=8" } @@ -3687,6 +4217,7 @@ "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==", "dev": true, + "license": "MIT", "dependencies": { "es-define-property": "^1.0.0" }, @@ -3699,6 +4230,7 @@ "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.2.0.tgz", "integrity": "sha512-KIL7eQPfHQRC8+XluaIw7BHUwwqL19bQn4hzNgdr+1wXoU0KKj6rufu47lhY7KbJR2C6T6+PfyN0Ea7wkSS+qQ==", "dev": true, + "license": "MIT", "dependencies": { "dunder-proto": "^1.0.0" }, @@ -3714,6 +4246,7 @@ "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" }, @@ -3726,6 +4259,7 @@ "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", "dev": true, + "license": "MIT", "dependencies": { "has-symbols": "^1.0.3" }, @@ -3741,6 +4275,7 @@ "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", "dev": true, + "license": "MIT", "dependencies": { "function-bind": "^1.1.2" }, @@ -3748,11 +4283,29 @@ "node": ">= 0.4" } }, + "node_modules/hermes-estree": { + "version": "0.25.1", + "resolved": "https://registry.npmjs.org/hermes-estree/-/hermes-estree-0.25.1.tgz", + "integrity": "sha512-0wUoCcLp+5Ev5pDW2OriHC2MJCbwLwuRx+gAqMTOkGKJJiBCLjtrvy4PWUGn6MIVefecRpzoOZ/UV6iGdOr+Cw==", + "dev": true, + "license": "MIT" + }, + "node_modules/hermes-parser": { + "version": "0.25.1", + "resolved": "https://registry.npmjs.org/hermes-parser/-/hermes-parser-0.25.1.tgz", + "integrity": "sha512-6pEjquH3rqaI6cYAXYPcz9MS4rY6R4ngRgrgfDshRptUZIc3lw0MCIJIGDj9++mfySOuPTHB4nrSW99BCvOPIA==", + "dev": true, + "license": "MIT", + "dependencies": { + "hermes-estree": "0.25.1" + } + }, "node_modules/ignore": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", "integrity": "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==", "dev": true, + "license": "MIT", "engines": { "node": ">= 4" } @@ -3762,6 +4315,7 @@ "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz", "integrity": "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==", "dev": true, + "license": "MIT", "dependencies": { "parent-module": "^1.0.0", "resolve-from": "^4.0.0" @@ -3778,6 +4332,7 @@ "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", "dev": true, + "license": "MIT", "engines": { "node": ">=0.8.19" } @@ -3786,6 +4341,7 @@ "version": "1.4.2", "resolved": "https://registry.npmjs.org/index-array-by/-/index-array-by-1.4.2.tgz", "integrity": "sha512-SP23P27OUKzXWEC/TOyWlwLviofQkCSCKONnc62eItjp69yCZZPqDQtr3Pw5gJDnPeUMqExmKydNZaJO0FU9pw==", + "license": "MIT", "engines": { "node": ">=12" } @@ -3795,6 +4351,7 @@ "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.1.0.tgz", "integrity": "sha512-4gd7VpWNQNB4UKKCFFVcp1AVv+FMOgs9NKzjHKusc8jTMhd5eL1NqQqOpE0KzMds804/yHlglp3uxgluOqAPLw==", "dev": true, + "license": "MIT", "dependencies": { "es-errors": "^1.3.0", "hasown": "^2.0.2", @@ -3808,6 +4365,7 @@ "version": "2.0.3", "resolved": "https://registry.npmjs.org/internmap/-/internmap-2.0.3.tgz", "integrity": "sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg==", + "license": "ISC", "engines": { "node": ">=12" } @@ -3817,6 +4375,7 @@ "resolved": "https://registry.npmjs.org/is-array-buffer/-/is-array-buffer-3.0.5.tgz", "integrity": "sha512-DDfANUiiG2wC1qawP66qlTugJeL5HyzMpfr8lLK+jMQirGzNod0B12cFB/9q838Ru27sBwfw78/rdoU7RERz6A==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", @@ -3829,17 +4388,12 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/is-arrayish": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz", - "integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==", - "optional": true - }, "node_modules/is-async-function": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-async-function/-/is-async-function-2.1.1.tgz", "integrity": "sha512-9dgM/cZBnNvjzaMYHVoxxfPj2QXt22Ev7SuuPrs+xav0ukGB0S6d4ydZdEiM48kLx5kDV+QBPrpVnFyefL8kkQ==", "dev": true, + "license": "MIT", "dependencies": { "async-function": "^1.0.0", "call-bound": "^1.0.3", @@ -3859,6 +4413,7 @@ "resolved": "https://registry.npmjs.org/is-bigint/-/is-bigint-1.1.0.tgz", "integrity": "sha512-n4ZT37wG78iz03xPRKJrHTdZbe3IicyucEtdRsV5yglwc3GyUfbAfpSeD0FJ41NbUNSt5wbhqfp1fS+BgnvDFQ==", "dev": true, + "license": "MIT", "dependencies": { "has-bigints": "^1.0.2" }, @@ -3874,6 +4429,7 @@ "resolved": "https://registry.npmjs.org/is-boolean-object/-/is-boolean-object-1.2.2.tgz", "integrity": "sha512-wa56o2/ElJMYqjCjGkXri7it5FbebW5usLw/nPmCMs5DeZ7eziSYZhSmPRn0txqeW4LnAmQQU7FgqLpsEFKM4A==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.3", "has-tostringtag": "^1.0.2" @@ -3890,15 +4446,30 @@ "resolved": "https://registry.npmjs.org/is-bun-module/-/is-bun-module-2.0.0.tgz", "integrity": "sha512-gNCGbnnnnFAUGKeZ9PdbyeGYJqewpmc2aKHUEMO5nQPWU9lOmv7jcmQIv+qHD8fXW6W7qfuCwX4rY9LNRjXrkQ==", "dev": true, + "license": "MIT", "dependencies": { "semver": "^7.7.1" } }, + "node_modules/is-bun-module/node_modules/semver": { + "version": "7.7.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz", + "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==", + "dev": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/is-callable": { "version": "1.2.7", "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.7.tgz", "integrity": "sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" }, @@ -3911,6 +4482,7 @@ "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.16.1.tgz", "integrity": "sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w==", "dev": true, + "license": "MIT", "dependencies": { "hasown": "^2.0.2" }, @@ -3926,6 +4498,7 @@ "resolved": "https://registry.npmjs.org/is-data-view/-/is-data-view-1.0.2.tgz", "integrity": "sha512-RKtWF8pGmS87i2D6gqQu/l7EYRlVdfzemCJN/P3UOs//x1QE7mfhvzHIApBTRf7axvT6DMGwSwBXYCT0nfB9xw==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.2", "get-intrinsic": "^1.2.6", @@ -3943,6 +4516,7 @@ "resolved": "https://registry.npmjs.org/is-date-object/-/is-date-object-1.1.0.tgz", "integrity": "sha512-PwwhEakHVKTdRNVOw+/Gyh0+MzlCl4R6qKvkhuvLtPMggI1WAHt9sOwZxQLSGpUaDnrdyDsomoRgNnCfKNSXXg==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.2", "has-tostringtag": "^1.0.2" @@ -3959,6 +4533,7 @@ "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", "dev": true, + "license": "MIT", "engines": { "node": ">=0.10.0" } @@ -3968,6 +4543,7 @@ "resolved": "https://registry.npmjs.org/is-finalizationregistry/-/is-finalizationregistry-1.1.1.tgz", "integrity": "sha512-1pC6N8qWJbWoPtEjgcL2xyhQOP491EQjeUo3qTKcmV8YSDDJrOepfG8pcC7h/QgnQHYSv0mJ3Z/ZWxmatVrysg==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.3" }, @@ -3979,13 +4555,15 @@ } }, "node_modules/is-generator-function": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/is-generator-function/-/is-generator-function-1.1.0.tgz", - "integrity": "sha512-nPUB5km40q9e8UfN/Zc24eLlzdSf9OfKByBw9CIdw4H1giPMeA0OIJvbchsCu4npfI2QcMVBsGEBHKZ7wLTWmQ==", + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/is-generator-function/-/is-generator-function-1.1.2.tgz", + "integrity": "sha512-upqt1SkGkODW9tsGNG5mtXTXtECizwtS2kA161M+gJPc1xdb/Ax629af6YrTwcOeQHbewrPNlE5Dx7kzvXTizA==", "dev": true, + "license": "MIT", "dependencies": { - "call-bound": "^1.0.3", - "get-proto": "^1.0.0", + "call-bound": "^1.0.4", + "generator-function": "^2.0.0", + "get-proto": "^1.0.1", "has-tostringtag": "^1.0.2", "safe-regex-test": "^1.1.0" }, @@ -4001,6 +4579,7 @@ "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", "dev": true, + "license": "MIT", "dependencies": { "is-extglob": "^2.1.1" }, @@ -4013,6 +4592,7 @@ "resolved": "https://registry.npmjs.org/is-map/-/is-map-2.0.3.tgz", "integrity": "sha512-1Qed0/Hr2m+YqxnM09CjA2d/i6YZNfF6R2oRAOj36eUdS6qIV/huPJNSEpKbupewFs+ZsJlxsjjPbc0/afW6Lw==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" }, @@ -4025,6 +4605,7 @@ "resolved": "https://registry.npmjs.org/is-negative-zero/-/is-negative-zero-2.0.3.tgz", "integrity": "sha512-5KoIu2Ngpyek75jXodFvnafB6DJgr3u8uuK0LEZJjrU19DrMD3EVERaR8sjz8CCGgpZvxPl9SuE1GMVPFHx1mw==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" }, @@ -4037,6 +4618,7 @@ "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", "dev": true, + "license": "MIT", "engines": { "node": ">=0.12.0" } @@ -4046,6 +4628,7 @@ "resolved": "https://registry.npmjs.org/is-number-object/-/is-number-object-1.1.1.tgz", "integrity": "sha512-lZhclumE1G6VYD8VHe35wFaIif+CTy5SJIi5+3y4psDgWu4wPDoBhF8NxUOinEc7pHgiTsT6MaBb92rKhhD+Xw==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.3", "has-tostringtag": "^1.0.2" @@ -4062,6 +4645,7 @@ "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.2.1.tgz", "integrity": "sha512-MjYsKHO5O7mCsmRGxWcLWheFqN9DJ/2TmngvjKXihe6efViPqc274+Fx/4fYj/r03+ESvBdTXK0V6tA3rgez1g==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.2", "gopd": "^1.2.0", @@ -4080,6 +4664,7 @@ "resolved": "https://registry.npmjs.org/is-set/-/is-set-2.0.3.tgz", "integrity": "sha512-iPAjerrse27/ygGLxw+EBR9agv9Y6uLeYVJMu+QNCoouJ1/1ri0mGrcWpfCqFZuzzx3WjtwxG098X+n4OuRkPg==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" }, @@ -4092,6 +4677,7 @@ "resolved": "https://registry.npmjs.org/is-shared-array-buffer/-/is-shared-array-buffer-1.0.4.tgz", "integrity": "sha512-ISWac8drv4ZGfwKl5slpHG9OwPNty4jOWPRIhBpxOoD+hqITiwuipOQ2bNthAzwA3B4fIjO4Nln74N0S9byq8A==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.3" }, @@ -4107,6 +4693,7 @@ "resolved": "https://registry.npmjs.org/is-string/-/is-string-1.1.1.tgz", "integrity": "sha512-BtEeSsoaQjlSPBemMQIrY1MY0uM6vnS1g5fmufYOtnxLGUZM2178PKbhsk7Ffv58IX+ZtcvoGwccYsh0PglkAA==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.3", "has-tostringtag": "^1.0.2" @@ -4123,6 +4710,7 @@ "resolved": "https://registry.npmjs.org/is-symbol/-/is-symbol-1.1.1.tgz", "integrity": "sha512-9gGx6GTtCQM73BgmHQXfDmLtfjjTUDSyoxTCbp5WtoixAhfgsDirWIcVQ/IHpvI5Vgd5i/J5F7B9cN/WlVbC/w==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.2", "has-symbols": "^1.1.0", @@ -4140,6 +4728,7 @@ "resolved": "https://registry.npmjs.org/is-typed-array/-/is-typed-array-1.1.15.tgz", "integrity": "sha512-p3EcsicXjit7SaskXHs1hA91QxgTw46Fv6EFKKGS5DRFLD8yKnohjF3hxoju94b/OcMZoQukzpPpBE9uLVKzgQ==", "dev": true, + "license": "MIT", "dependencies": { "which-typed-array": "^1.1.16" }, @@ -4155,6 +4744,7 @@ "resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.2.tgz", "integrity": "sha512-K5pXYOm9wqY1RgjpL3YTkF39tni1XajUIkawTLUo9EZEVUFga5gSQJF8nNS7ZwJQ02y+1YCNYcMh+HIf1ZqE+w==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" }, @@ -4167,6 +4757,7 @@ "resolved": "https://registry.npmjs.org/is-weakref/-/is-weakref-1.1.1.tgz", "integrity": "sha512-6i9mGWSlqzNMEqpCp93KwRS1uUOodk2OJ6b+sq7ZPDSy2WuI5NFIxp/254TytR8ftefexkWn5xNiHUNpPOfSew==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.3" }, @@ -4182,6 +4773,7 @@ "resolved": "https://registry.npmjs.org/is-weakset/-/is-weakset-2.0.4.tgz", "integrity": "sha512-mfcwb6IzQyOKTs84CQMrOwW4gQcaTOAWJ0zzJCl2WSPDrWk/OzDaImWFH3djXhb24g4eudZfLRozAvPGw4d9hQ==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.3", "get-intrinsic": "^1.2.6" @@ -4197,19 +4789,22 @@ "version": "2.0.5", "resolved": "https://registry.npmjs.org/isarray/-/isarray-2.0.5.tgz", "integrity": "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", - "dev": true + "dev": true, + "license": "ISC" }, "node_modules/iterator.prototype": { "version": "1.1.5", "resolved": "https://registry.npmjs.org/iterator.prototype/-/iterator.prototype-1.1.5.tgz", "integrity": "sha512-H0dkQoCa3b2VEeKQBOxFph+JAbcrQdE7KC0UkqwpLmv2EC4P41QXP+rqo9wYodACiG5/WM5s9oDApTU8utwj9g==", "dev": true, + "license": "MIT", "dependencies": { "define-data-property": "^1.1.4", "es-object-atoms": "^1.0.0", @@ -4226,23 +4821,26 @@ "version": "1.1.2", "resolved": "https://registry.npmjs.org/jerrypick/-/jerrypick-1.1.2.tgz", "integrity": "sha512-YKnxXEekXKzhpf7CLYA0A+oDP8V0OhICNCr5lv96FvSsDEmrb0GKM776JgQvHTMjr7DTTPEVv/1Ciaw0uEWzBA==", + "license": "MIT", "engines": { "node": ">=12" } }, "node_modules/jiti": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.4.2.tgz", - "integrity": "sha512-rg9zJN+G4n2nfJl5MW3BMygZX56zKPNVEYYqq7adpmMh4Jn2QNEwhvQlFy6jPVdcod7txZtKHWnyZiA3a0zP7A==", + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz", + "integrity": "sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==", "dev": true, + "license": "MIT", "bin": { "jiti": "lib/jiti-cli.mjs" } }, "node_modules/jose": { - "version": "5.10.0", - "resolved": "https://registry.npmjs.org/jose/-/jose-5.10.0.tgz", - "integrity": "sha512-s+3Al/p9g32Iq+oqXxkW//7jk2Vig6FF1CFqzVXoTUXt2qz89YWbL+OwS17NFYEvxC35n0FKeGO2LGYSxeM2Gg==", + "version": "6.1.2", + "resolved": "https://registry.npmjs.org/jose/-/jose-6.1.2.tgz", + "integrity": "sha512-MpcPtHLE5EmztuFIqB0vzHAWJPpmN1E6L4oo+kze56LIs3MyXIj9ZHMDxqOvkP38gBR7K1v3jqd4WU2+nrfONQ==", + "license": "MIT", "funding": { "url": "https://github.com/sponsors/panva" } @@ -4250,13 +4848,15 @@ "node_modules/js-tokens": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", + "license": "MIT" }, "node_modules/js-yaml": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", - "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz", + "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", "dev": true, + "license": "MIT", "dependencies": { "argparse": "^2.0.1" }, @@ -4264,34 +4864,51 @@ "js-yaml": "bin/js-yaml.js" } }, + "node_modules/jsesc": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz", + "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==", + "dev": true, + "license": "MIT", + "bin": { + "jsesc": "bin/jsesc" + }, + "engines": { + "node": ">=6" + } + }, "node_modules/json-buffer": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz", "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/json-schema-traverse": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/json-stable-stringify-without-jsonify": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/json5": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.2.tgz", - "integrity": "sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true, - "dependencies": { - "minimist": "^1.2.0" - }, + "license": "MIT", "bin": { "json5": "lib/cli.js" + }, + "engines": { + "node": ">=6" } }, "node_modules/jsx-ast-utils": { @@ -4299,6 +4916,7 @@ "resolved": "https://registry.npmjs.org/jsx-ast-utils/-/jsx-ast-utils-3.3.5.tgz", "integrity": "sha512-ZZow9HBI5O6EPgSJLUb8n2NKgmVWTwCvHGwFuJlMjvLFqlGG6pjirPhtdsseaLZjSibD8eegzmYpUZwoIlj2cQ==", "dev": true, + "license": "MIT", "dependencies": { "array-includes": "^3.1.6", "array.prototype.flat": "^1.3.1", @@ -4313,6 +4931,7 @@ "version": "1.16.3", "resolved": "https://registry.npmjs.org/kapsule/-/kapsule-1.16.3.tgz", "integrity": "sha512-4+5mNNf4vZDSwPhKprKwz3330iisPrb08JyMgbsdFrimBCKNHecua/WBwvVg3n7vwx0C1ARjfhwIpbrbd9n5wg==", + "license": "MIT", "dependencies": { "lodash-es": "4" }, @@ -4325,6 +4944,7 @@ "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==", "dev": true, + "license": "MIT", "dependencies": { "json-buffer": "3.0.1" } @@ -4333,13 +4953,15 @@ "version": "0.3.23", "resolved": "https://registry.npmjs.org/language-subtag-registry/-/language-subtag-registry-0.3.23.tgz", "integrity": "sha512-0K65Lea881pHotoGEa5gDlMxt3pctLi2RplBb7Ezh4rRdLEOtgi7n4EwK9lamnUCkKBqaeKRVebTq6BAxSkpXQ==", - "dev": true + "dev": true, + "license": "CC0-1.0" }, "node_modules/language-tags": { "version": "1.0.9", "resolved": "https://registry.npmjs.org/language-tags/-/language-tags-1.0.9.tgz", "integrity": "sha512-MbjN408fEndfiQXbFQ1vnd+1NoLDsnQW41410oQBXiyXDMYH5z505juWa4KUE1LqxRC7DgOgZDbKLxHIwm27hA==", "dev": true, + "license": "MIT", "dependencies": { "language-subtag-registry": "^0.3.20" }, @@ -4352,6 +4974,7 @@ "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==", "dev": true, + "license": "MIT", "dependencies": { "prelude-ls": "^1.2.1", "type-check": "~0.4.0" @@ -4361,10 +4984,11 @@ } }, "node_modules/lightningcss": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.30.1.tgz", - "integrity": "sha512-xi6IyHML+c9+Q3W0S4fCQJOym42pyurFiJUHEcEyHS0CeKzia4yZDEsLlqOFykxOdHpNy0NmvVO31vcSqAxJCg==", + "version": "1.30.2", + "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.30.2.tgz", + "integrity": "sha512-utfs7Pr5uJyyvDETitgsaqSyjCb2qNRAtuqUeWIAKztsOYdcACf2KtARYXg2pSvhkt+9NfoaNY7fxjl6nuMjIQ==", "dev": true, + "license": "MPL-2.0", "dependencies": { "detect-libc": "^2.0.3" }, @@ -4376,26 +5000,49 @@ "url": "https://opencollective.com/parcel" }, "optionalDependencies": { - "lightningcss-darwin-arm64": "1.30.1", - "lightningcss-darwin-x64": "1.30.1", - "lightningcss-freebsd-x64": "1.30.1", - "lightningcss-linux-arm-gnueabihf": "1.30.1", - "lightningcss-linux-arm64-gnu": "1.30.1", - "lightningcss-linux-arm64-musl": "1.30.1", - "lightningcss-linux-x64-gnu": "1.30.1", - "lightningcss-linux-x64-musl": "1.30.1", - "lightningcss-win32-arm64-msvc": "1.30.1", - "lightningcss-win32-x64-msvc": "1.30.1" + "lightningcss-android-arm64": "1.30.2", + "lightningcss-darwin-arm64": "1.30.2", + "lightningcss-darwin-x64": "1.30.2", + "lightningcss-freebsd-x64": "1.30.2", + "lightningcss-linux-arm-gnueabihf": "1.30.2", + "lightningcss-linux-arm64-gnu": "1.30.2", + "lightningcss-linux-arm64-musl": "1.30.2", + "lightningcss-linux-x64-gnu": "1.30.2", + "lightningcss-linux-x64-musl": "1.30.2", + "lightningcss-win32-arm64-msvc": "1.30.2", + "lightningcss-win32-x64-msvc": "1.30.2" } }, - "node_modules/lightningcss-darwin-arm64": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.30.1.tgz", - "integrity": "sha512-c8JK7hyE65X1MHMN+Viq9n11RRC7hgin3HhYKhrMyaXflk5GVplZ60IxyoVtzILeKr+xAJwg6zK6sjTBJ0FKYQ==", + "node_modules/lightningcss-android-arm64": { + "version": "1.30.2", + "resolved": "https://registry.npmjs.org/lightningcss-android-arm64/-/lightningcss-android-arm64-1.30.2.tgz", + "integrity": "sha512-BH9sEdOCahSgmkVhBLeU7Hc9DWeZ1Eb6wNS6Da8igvUwAe0sqROHddIlvU06q3WyXVEOYDZ6ykBZQnjTbmo4+A==", "cpu": [ "arm64" ], "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-darwin-arm64": { + "version": "1.30.2", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.30.2.tgz", + "integrity": "sha512-ylTcDJBN3Hp21TdhRT5zBOIi73P6/W0qwvlFEk22fkdXchtNTOU4Qc37SkzV+EKYxLouZ6M4LG9NfZ1qkhhBWA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", "optional": true, "os": [ "darwin" @@ -4409,13 +5056,14 @@ } }, "node_modules/lightningcss-darwin-x64": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.30.1.tgz", - "integrity": "sha512-k1EvjakfumAQoTfcXUcHQZhSpLlkAuEkdMBsI/ivWw9hL+7FtilQc0Cy3hrx0AAQrVtQAbMI7YjCgYgvn37PzA==", + "version": "1.30.2", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.30.2.tgz", + "integrity": "sha512-oBZgKchomuDYxr7ilwLcyms6BCyLn0z8J0+ZZmfpjwg9fRVZIR5/GMXd7r9RH94iDhld3UmSjBM6nXWM2TfZTQ==", "cpu": [ "x64" ], "dev": true, + "license": "MPL-2.0", "optional": true, "os": [ "darwin" @@ -4429,13 +5077,14 @@ } }, "node_modules/lightningcss-freebsd-x64": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.30.1.tgz", - "integrity": "sha512-kmW6UGCGg2PcyUE59K5r0kWfKPAVy4SltVeut+umLCFoJ53RdCUWxcRDzO1eTaxf/7Q2H7LTquFHPL5R+Gjyig==", + "version": "1.30.2", + "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.30.2.tgz", + "integrity": "sha512-c2bH6xTrf4BDpK8MoGG4Bd6zAMZDAXS569UxCAGcA7IKbHNMlhGQ89eRmvpIUGfKWNVdbhSbkQaWhEoMGmGslA==", "cpu": [ "x64" ], "dev": true, + "license": "MPL-2.0", "optional": true, "os": [ "freebsd" @@ -4449,13 +5098,14 @@ } }, "node_modules/lightningcss-linux-arm-gnueabihf": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.30.1.tgz", - "integrity": "sha512-MjxUShl1v8pit+6D/zSPq9S9dQ2NPFSQwGvxBCYaBYLPlCWuPh9/t1MRS8iUaR8i+a6w7aps+B4N0S1TYP/R+Q==", + "version": "1.30.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.30.2.tgz", + "integrity": "sha512-eVdpxh4wYcm0PofJIZVuYuLiqBIakQ9uFZmipf6LF/HRj5Bgm0eb3qL/mr1smyXIS1twwOxNWndd8z0E374hiA==", "cpu": [ "arm" ], "dev": true, + "license": "MPL-2.0", "optional": true, "os": [ "linux" @@ -4469,13 +5119,14 @@ } }, "node_modules/lightningcss-linux-arm64-gnu": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.30.1.tgz", - "integrity": "sha512-gB72maP8rmrKsnKYy8XUuXi/4OctJiuQjcuqWNlJQ6jZiWqtPvqFziskH3hnajfvKB27ynbVCucKSm2rkQp4Bw==", + "version": "1.30.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.30.2.tgz", + "integrity": "sha512-UK65WJAbwIJbiBFXpxrbTNArtfuznvxAJw4Q2ZGlU8kPeDIWEX1dg3rn2veBVUylA2Ezg89ktszWbaQnxD/e3A==", "cpu": [ "arm64" ], "dev": true, + "license": "MPL-2.0", "optional": true, "os": [ "linux" @@ -4489,13 +5140,14 @@ } }, "node_modules/lightningcss-linux-arm64-musl": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.30.1.tgz", - "integrity": "sha512-jmUQVx4331m6LIX+0wUhBbmMX7TCfjF5FoOH6SD1CttzuYlGNVpA7QnrmLxrsub43ClTINfGSYyHe2HWeLl5CQ==", + "version": "1.30.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.30.2.tgz", + "integrity": "sha512-5Vh9dGeblpTxWHpOx8iauV02popZDsCYMPIgiuw97OJ5uaDsL86cnqSFs5LZkG3ghHoX5isLgWzMs+eD1YzrnA==", "cpu": [ "arm64" ], "dev": true, + "license": "MPL-2.0", "optional": true, "os": [ "linux" @@ -4509,13 +5161,14 @@ } }, "node_modules/lightningcss-linux-x64-gnu": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.30.1.tgz", - "integrity": "sha512-piWx3z4wN8J8z3+O5kO74+yr6ze/dKmPnI7vLqfSqI8bccaTGY5xiSGVIJBDd5K5BHlvVLpUB3S2YCfelyJ1bw==", + "version": "1.30.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.30.2.tgz", + "integrity": "sha512-Cfd46gdmj1vQ+lR6VRTTadNHu6ALuw2pKR9lYq4FnhvgBc4zWY1EtZcAc6EffShbb1MFrIPfLDXD6Xprbnni4w==", "cpu": [ "x64" ], "dev": true, + "license": "MPL-2.0", "optional": true, "os": [ "linux" @@ -4529,13 +5182,14 @@ } }, "node_modules/lightningcss-linux-x64-musl": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.30.1.tgz", - "integrity": "sha512-rRomAK7eIkL+tHY0YPxbc5Dra2gXlI63HL+v1Pdi1a3sC+tJTcFrHX+E86sulgAXeI7rSzDYhPSeHHjqFhqfeQ==", + "version": "1.30.2", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.30.2.tgz", + "integrity": "sha512-XJaLUUFXb6/QG2lGIW6aIk6jKdtjtcffUT0NKvIqhSBY3hh9Ch+1LCeH80dR9q9LBjG3ewbDjnumefsLsP6aiA==", "cpu": [ "x64" ], "dev": true, + "license": "MPL-2.0", "optional": true, "os": [ "linux" @@ -4549,13 +5203,14 @@ } }, "node_modules/lightningcss-win32-arm64-msvc": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.30.1.tgz", - "integrity": "sha512-mSL4rqPi4iXq5YVqzSsJgMVFENoa4nGTT/GjO2c0Yl9OuQfPsIfncvLrEW6RbbB24WtZ3xP/2CCmI3tNkNV4oA==", + "version": "1.30.2", + "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.30.2.tgz", + "integrity": "sha512-FZn+vaj7zLv//D/192WFFVA0RgHawIcHqLX9xuWiQt7P0PtdFEVaxgF9rjM/IRYHQXNnk61/H/gb2Ei+kUQ4xQ==", "cpu": [ "arm64" ], "dev": true, + "license": "MPL-2.0", "optional": true, "os": [ "win32" @@ -4569,13 +5224,14 @@ } }, "node_modules/lightningcss-win32-x64-msvc": { - "version": "1.30.1", - "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.30.1.tgz", - "integrity": "sha512-PVqXh48wh4T53F/1CCu8PIPCxLzWyCnn/9T5W1Jpmdy5h9Cwd+0YQS6/LwhHXSafuc61/xg9Lv5OrCby6a++jg==", + "version": "1.30.2", + "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.30.2.tgz", + "integrity": "sha512-5g1yc73p+iAkid5phb4oVFMB45417DkRevRbt/El/gKXJk4jid+vPFF/AXbxn05Aky8PapwzZrdJShv5C0avjw==", "cpu": [ "x64" ], "dev": true, + "license": "MPL-2.0", "optional": true, "os": [ "win32" @@ -4593,6 +5249,7 @@ "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==", "dev": true, + "license": "MIT", "dependencies": { "p-locate": "^5.0.0" }, @@ -4606,18 +5263,21 @@ "node_modules/lodash-es": { "version": "4.17.21", "resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.21.tgz", - "integrity": "sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==" + "integrity": "sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==", + "license": "MIT" }, "node_modules/lodash.merge": { "version": "4.6.2", "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/loose-envify": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", + "license": "MIT", "dependencies": { "js-tokens": "^3.0.0 || ^4.0.0" }, @@ -4625,13 +5285,24 @@ "loose-envify": "cli.js" } }, - "node_modules/magic-string": { - "version": "0.30.17", - "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.17.tgz", - "integrity": "sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA==", + "node_modules/lru-cache": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", + "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", "dev": true, + "license": "ISC", "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.0" + "yallist": "^3.0.2" + } + }, + "node_modules/magic-string": { + "version": "0.30.21", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz", + "integrity": "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.5" } }, "node_modules/math-intrinsics": { @@ -4639,6 +5310,7 @@ "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" } @@ -4648,6 +5320,7 @@ "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", "dev": true, + "license": "MIT", "engines": { "node": ">= 8" } @@ -4657,6 +5330,7 @@ "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", "dev": true, + "license": "MIT", "dependencies": { "braces": "^3.0.3", "picomatch": "^2.3.1" @@ -4670,6 +5344,7 @@ "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", "dev": true, + "license": "ISC", "dependencies": { "brace-expansion": "^1.1.7" }, @@ -4682,51 +5357,17 @@ "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", "dev": true, + "license": "MIT", "funding": { "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/minipass": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", - "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", - "dev": true, - "engines": { - "node": ">=16 || 14 >=14.17" - } - }, - "node_modules/minizlib": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.2.tgz", - "integrity": "sha512-oG62iEk+CYt5Xj2YqI5Xi9xWUeZhDI8jjQmC5oThVH5JGCTgIjr7ciJDzC7MBzYd//WvR1OTmP5Q38Q8ShQtVA==", - "dev": true, - "dependencies": { - "minipass": "^7.1.2" - }, - "engines": { - "node": ">= 18" - } - }, - "node_modules/mkdirp": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz", - "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==", - "dev": true, - "bin": { - "mkdirp": "dist/cjs/src/bin.js" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/nanoid": { "version": "3.3.11", @@ -4738,6 +5379,7 @@ "url": "https://github.com/sponsors/ai" } ], + "license": "MIT", "bin": { "nanoid": "bin/nanoid.cjs" }, @@ -4746,10 +5388,11 @@ } }, "node_modules/napi-postinstall": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/napi-postinstall/-/napi-postinstall-0.3.2.tgz", - "integrity": "sha512-tWVJxJHmBWLy69PvO96TZMZDrzmw5KeiZBz3RHmiM2XZ9grBJ2WgMAFVVg25nqp3ZjTFUs2Ftw1JhscL3Teliw==", + "version": "0.3.4", + "resolved": "https://registry.npmjs.org/napi-postinstall/-/napi-postinstall-0.3.4.tgz", + "integrity": "sha512-PHI5f1O0EP5xJ9gQmFGMS6IZcrVvTjpXjz7Na41gTE7eE2hK11lg04CECCYEEjdc17EV4DO+fkGEtt7TpTaTiQ==", "dev": true, + "license": "MIT", "bin": { "napi-postinstall": "lib/cli.js" }, @@ -4764,17 +5407,18 @@ "version": "1.4.0", "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/next": { - "version": "15.3.3", - "resolved": "https://registry.npmjs.org/next/-/next-15.3.3.tgz", - "integrity": "sha512-JqNj29hHNmCLtNvd090SyRbXJiivQ+58XjCcrC50Crb5g5u2zi7Y2YivbsEfzk6AtVI80akdOQbaMZwWB1Hthw==", + "version": "16.0.4", + "resolved": "https://registry.npmjs.org/next/-/next-16.0.4.tgz", + "integrity": "sha512-vICcxKusY8qW7QFOzTvnRL1ejz2ClTqDKtm1AcUjm2mPv/lVAdgpGNsftsPRIDJOXOjRQO68i1dM8Lp8GZnqoA==", + "license": "MIT", + "peer": true, "dependencies": { - "@next/env": "15.3.3", - "@swc/counter": "0.1.3", + "@next/env": "16.0.4", "@swc/helpers": "0.5.15", - "busboy": "1.6.0", "caniuse-lite": "^1.0.30001579", "postcss": "8.4.31", "styled-jsx": "5.1.6" @@ -4783,22 +5427,22 @@ "next": "dist/bin/next" }, "engines": { - "node": "^18.18.0 || ^19.8.0 || >= 20.0.0" + "node": ">=20.9.0" }, "optionalDependencies": { - "@next/swc-darwin-arm64": "15.3.3", - "@next/swc-darwin-x64": "15.3.3", - "@next/swc-linux-arm64-gnu": "15.3.3", - "@next/swc-linux-arm64-musl": "15.3.3", - "@next/swc-linux-x64-gnu": "15.3.3", - "@next/swc-linux-x64-musl": "15.3.3", - "@next/swc-win32-arm64-msvc": "15.3.3", - "@next/swc-win32-x64-msvc": "15.3.3", - "sharp": "^0.34.1" + "@next/swc-darwin-arm64": "16.0.4", + "@next/swc-darwin-x64": "16.0.4", + "@next/swc-linux-arm64-gnu": "16.0.4", + "@next/swc-linux-arm64-musl": "16.0.4", + "@next/swc-linux-x64-gnu": "16.0.4", + "@next/swc-linux-x64-musl": "16.0.4", + "@next/swc-win32-arm64-msvc": "16.0.4", + "@next/swc-win32-x64-msvc": "16.0.4", + "sharp": "^0.34.4" }, "peerDependencies": { "@opentelemetry/api": "^1.1.0", - "@playwright/test": "^1.41.2", + "@playwright/test": "^1.51.1", "babel-plugin-react-compiler": "*", "react": "^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0", "react-dom": "^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0", @@ -4837,6 +5481,7 @@ "url": "https://github.com/sponsors/ai" } ], + "license": "MIT", "dependencies": { "nanoid": "^3.3.6", "picocolors": "^1.0.0", @@ -4846,10 +5491,18 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/node-releases": { + "version": "2.0.27", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.27.tgz", + "integrity": "sha512-nmh3lCkYZ3grZvqcCH+fjmQ7X+H0OeZgP40OierEaAptX4XofMh5kwNbWh7lBduUzCcV/8kZ+NDLCwm2iorIlA==", + "dev": true, + "license": "MIT" + }, "node_modules/oauth4webapi": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.6.0.tgz", - "integrity": "sha512-OwXPTXjKPOldTpAa19oksrX9TYHA0rt+VcUFTkJ7QKwgmevPpNm9Cn5vFZUtIo96FiU6AfPuUUGzoXqgOzibWg==", + "version": "3.8.3", + "resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz", + "integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==", + "license": "MIT", "funding": { "url": "https://github.com/sponsors/panva" } @@ -4858,6 +5511,7 @@ "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "license": "MIT", "engines": { "node": ">=0.10.0" } @@ -4867,6 +5521,7 @@ "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" }, @@ -4879,6 +5534,7 @@ "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" } @@ -4888,6 +5544,7 @@ "resolved": "https://registry.npmjs.org/object.assign/-/object.assign-4.1.7.tgz", "integrity": "sha512-nK28WOo+QIjBkDduTINE4JkF/UJJKyf2EJxvJKfblDpyg0Q+pkOHNTL0Qwy6NP6FhE/EnzV73BxxqcJaXY9anw==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", @@ -4908,6 +5565,7 @@ "resolved": "https://registry.npmjs.org/object.entries/-/object.entries-1.1.9.tgz", "integrity": "sha512-8u/hfXFRBD1O0hPUjioLhoWFHRmt6tKA4/vZPyckBr18l1KE9uHrFaFaUi8MDRTpi4uak2goyPTSNJLXX2k2Hw==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.4", @@ -4923,6 +5581,7 @@ "resolved": "https://registry.npmjs.org/object.fromentries/-/object.fromentries-2.0.8.tgz", "integrity": "sha512-k6E21FzySsSK5a21KRADBd/NGneRegFO5pLHfdQLpRDETUNJueLXs3WCzyQ3tFRDYgbq3KHGXfTbi2bs8WQ6rQ==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.7", "define-properties": "^1.2.1", @@ -4941,6 +5600,7 @@ "resolved": "https://registry.npmjs.org/object.groupby/-/object.groupby-1.0.3.tgz", "integrity": "sha512-+Lhy3TQTuzXI5hevh8sBGqbmurHbbIjAi0Z4S63nthVLmLxfbj4T54a4CfZrXIrt9iP4mVAPYMo/v99taj3wjQ==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.7", "define-properties": "^1.2.1", @@ -4955,6 +5615,7 @@ "resolved": "https://registry.npmjs.org/object.values/-/object.values-1.2.1.tgz", "integrity": "sha512-gXah6aZrcUxjWg2zR2MwouP2eHlCBzdV4pygudehaKXSGW4v2AsRQUK+lwwXhii6KFZcunEnmSUoYp5CXibxtA==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", @@ -4968,11 +5629,25 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/openid-client": { + "version": "6.8.1", + "resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz", + "integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==", + "license": "MIT", + "dependencies": { + "jose": "^6.1.0", + "oauth4webapi": "^3.8.2" + }, + "funding": { + "url": "https://github.com/sponsors/panva" + } + }, "node_modules/optionator": { "version": "0.9.4", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", "integrity": "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==", "dev": true, + "license": "MIT", "dependencies": { "deep-is": "^0.1.3", "fast-levenshtein": "^2.0.6", @@ -4990,6 +5665,7 @@ "resolved": "https://registry.npmjs.org/own-keys/-/own-keys-1.0.1.tgz", "integrity": "sha512-qFOyK5PjiWZd+QQIh+1jhdb9LpxTF0qs7Pm8o5QHYZ0M3vKqSqzsZaEB6oWlxZ+q2sJBMI/Ktgd2N5ZwQoRHfg==", "dev": true, + "license": "MIT", "dependencies": { "get-intrinsic": "^1.2.6", "object-keys": "^1.1.1", @@ -5007,6 +5683,7 @@ "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", "dev": true, + "license": "MIT", "dependencies": { "yocto-queue": "^0.1.0" }, @@ -5022,6 +5699,7 @@ "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz", "integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==", "dev": true, + "license": "MIT", "dependencies": { "p-limit": "^3.0.2" }, @@ -5037,6 +5715,7 @@ "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", "dev": true, + "license": "MIT", "dependencies": { "callsites": "^3.0.0" }, @@ -5049,6 +5728,7 @@ "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", "dev": true, + "license": "MIT", "engines": { "node": ">=8" } @@ -5058,6 +5738,7 @@ "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", "dev": true, + "license": "MIT", "engines": { "node": ">=8" } @@ -5066,18 +5747,21 @@ "version": "1.0.7", "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/picocolors": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", - "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==" + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "license": "ISC" }, "node_modules/picomatch": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", "dev": true, + "license": "MIT", "engines": { "node": ">=8.6" }, @@ -5090,6 +5774,7 @@ "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz", "integrity": "sha512-/+5VFTchJDoVj3bhoqi6UeymcD00DAwb1nJwamzPvHEszJ4FpF6SNNbUbOS8yI56qHzdV8eK0qEfOSiodkTdxg==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" } @@ -5113,6 +5798,7 @@ "url": "https://github.com/sponsors/ai" } ], + "license": "MIT", "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", @@ -5123,9 +5809,10 @@ } }, "node_modules/preact": { - "version": "10.26.9", - "resolved": "https://registry.npmjs.org/preact/-/preact-10.26.9.tgz", - "integrity": "sha512-SSjF9vcnF27mJK1XyFMNJzFd5u3pQiATFqoaDy03XuN00u4ziveVVEGt5RKJrDR8MHE/wJo9Nnad56RLzS2RMA==", + "version": "10.27.2", + "resolved": "https://registry.npmjs.org/preact/-/preact-10.27.2.tgz", + "integrity": "sha512-5SYSgFKSyhCbk6SrXyMpqjb5+MQBgfvEKE/OC+PujcY34sOpqtr+0AZQtPYx5IA6VxynQ7rUPCtKzyovpj9Bpg==", + "license": "MIT", "funding": { "type": "opencollective", "url": "https://opencollective.com/preact" @@ -5136,6 +5823,7 @@ "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.8.0" } @@ -5144,6 +5832,7 @@ "version": "15.8.1", "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz", "integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==", + "license": "MIT", "dependencies": { "loose-envify": "^1.4.0", "object-assign": "^4.1.1", @@ -5155,6 +5844,7 @@ "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", "dev": true, + "license": "MIT", "engines": { "node": ">=6" } @@ -5177,33 +5867,39 @@ "type": "consulting", "url": "https://feross.org/support" } - ] + ], + "license": "MIT" }, "node_modules/react": { - "version": "19.1.0", - "resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz", - "integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==", + "version": "19.2.0", + "resolved": "https://registry.npmjs.org/react/-/react-19.2.0.tgz", + "integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==", + "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } }, "node_modules/react-dom": { - "version": "19.1.0", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.0.tgz", - "integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==", + "version": "19.2.0", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.0.tgz", + "integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==", + "license": "MIT", + "peer": true, "dependencies": { - "scheduler": "^0.26.0" + "scheduler": "^0.27.0" }, "peerDependencies": { - "react": "^19.1.0" + "react": "^19.2.0" } }, "node_modules/react-force-graph-2d": { - "version": "1.28.0", - "resolved": "https://registry.npmjs.org/react-force-graph-2d/-/react-force-graph-2d-1.28.0.tgz", - "integrity": "sha512-NYA8GLxJnoZyLWjob8xea38B1cZqSGdcA8lDpvTc1hcJrpzFyBEHkeJ4xtFoJp66tsM4PAlj5af4HWnU0OQ3Sg==", + "version": "1.29.0", + "resolved": "https://registry.npmjs.org/react-force-graph-2d/-/react-force-graph-2d-1.29.0.tgz", + "integrity": "sha512-Xv5IIk+hsZmB3F2ibja/t6j/b0/1T9dtFOQacTUoLpgzRHrO6wPu1GtQ2LfRqI/imgtaapnXUgQaE8g8enPo5w==", + "license": "MIT", "dependencies": { - "force-graph": "^1.50", + "force-graph": "^1.51", "prop-types": "15", "react-kapsule": "^2.5" }, @@ -5217,12 +5913,14 @@ "node_modules/react-is": { "version": "16.13.1", "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", - "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==" + "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==", + "license": "MIT" }, "node_modules/react-kapsule": { "version": "2.5.7", "resolved": "https://registry.npmjs.org/react-kapsule/-/react-kapsule-2.5.7.tgz", "integrity": "sha512-kifAF4ZPD77qZKc4CKLmozq6GY1sBzPEJTIJb0wWFK6HsePJatK3jXplZn2eeAt3x67CDozgi7/rO8fNQ/AL7A==", + "license": "MIT", "dependencies": { "jerrypick": "^1.1.1" }, @@ -5238,6 +5936,7 @@ "resolved": "https://registry.npmjs.org/reflect.getprototypeof/-/reflect.getprototypeof-1.0.10.tgz", "integrity": "sha512-00o4I+DVrefhv+nX0ulyi3biSHCPDe+yLv5o/p6d/UVlirijB8E16FtfwSAi4g3tcqrQ4lRAqQSoFEZJehYEcw==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "define-properties": "^1.2.1", @@ -5260,6 +5959,7 @@ "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.4.tgz", "integrity": "sha512-dYqgNSZbDwkaJ2ceRd9ojCGjBq+mOm9LmtXnAnEGyHhN/5R7iDW2TRw3h+o/jCFxus3P2LfWIIiwowAjANm7IA==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "define-properties": "^1.2.1", @@ -5276,12 +5976,13 @@ } }, "node_modules/resolve": { - "version": "1.22.10", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.10.tgz", - "integrity": "sha512-NPRy+/ncIMeDlTAsuqwKIiferiawhefFJtkNSW0qZJEqMEb+qBt/77B/jGeeek+F0uOeN05CDa6HXbbIgtVX4w==", + "version": "1.22.11", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.11.tgz", + "integrity": "sha512-RfqAvLnMl313r7c9oclB1HhUEAezcpLjz95wFH4LVuhk9JF/r22qmVP9AMmOU4vMX7Q8pN8jwNg/CSpdFnMjTQ==", "dev": true, + "license": "MIT", "dependencies": { - "is-core-module": "^2.16.0", + "is-core-module": "^2.16.1", "path-parse": "^1.0.7", "supports-preserve-symlinks-flag": "^1.0.0" }, @@ -5300,6 +6001,7 @@ "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", "dev": true, + "license": "MIT", "engines": { "node": ">=4" } @@ -5309,6 +6011,7 @@ "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", "dev": true, + "license": "MIT", "funding": { "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" } @@ -5318,6 +6021,7 @@ "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.1.0.tgz", "integrity": "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==", "dev": true, + "license": "MIT", "engines": { "iojs": ">=1.0.0", "node": ">=0.10.0" @@ -5342,6 +6046,7 @@ "url": "https://feross.org/support" } ], + "license": "MIT", "dependencies": { "queue-microtask": "^1.2.2" } @@ -5351,6 +6056,7 @@ "resolved": "https://registry.npmjs.org/safe-array-concat/-/safe-array-concat-1.1.3.tgz", "integrity": "sha512-AURm5f0jYEOydBj7VQlVvDrjeFgthDdEF5H1dP+6mNpoXOMo1quQqJ4wvJDyRZ9+pO3kGWoOdmV08cSv2aJV6Q==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.2", @@ -5370,6 +6076,7 @@ "resolved": "https://registry.npmjs.org/safe-push-apply/-/safe-push-apply-1.0.0.tgz", "integrity": "sha512-iKE9w/Z7xCzUMIZqdBsp6pEQvwuEebH4vdpjcDWnyzaI6yl6O9FHvVpmGelvEHNsoY6wGblkxR6Zty/h00WiSA==", "dev": true, + "license": "MIT", "dependencies": { "es-errors": "^1.3.0", "isarray": "^2.0.5" @@ -5386,6 +6093,7 @@ "resolved": "https://registry.npmjs.org/safe-regex-test/-/safe-regex-test-1.1.0.tgz", "integrity": "sha512-x/+Cz4YrimQxQccJf5mKEbIa1NzeCRNI5Ecl/ekmlYaampdNLPalVyIcCZNNH3MvmqBugV5TMYZXv0ljslUlaw==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", @@ -5399,20 +6107,19 @@ } }, "node_modules/scheduler": { - "version": "0.26.0", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.26.0.tgz", - "integrity": "sha512-NlHwttCI/l5gCPR3D1nNXtWABUmBwvZpEQiD4IXSbIDq8BzLIK/7Ir5gTFSGZDUu37K5cMNp0hFtzO38sC7gWA==" + "version": "0.27.0", + "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz", + "integrity": "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==", + "license": "MIT" }, "node_modules/semver": { - "version": "7.7.2", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz", - "integrity": "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==", - "devOptional": true, + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "dev": true, + "license": "ISC", "bin": { "semver": "bin/semver.js" - }, - "engines": { - "node": ">=10" } }, "node_modules/set-function-length": { @@ -5420,6 +6127,7 @@ "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==", "dev": true, + "license": "MIT", "dependencies": { "define-data-property": "^1.1.4", "es-errors": "^1.3.0", @@ -5437,6 +6145,7 @@ "resolved": "https://registry.npmjs.org/set-function-name/-/set-function-name-2.0.2.tgz", "integrity": "sha512-7PGFlmtwsEADb0WYyvCMa1t+yke6daIG4Wirafur5kcf+MhUnPms1UeR0CKQdTZD81yESwMHbtn+TR+dMviakQ==", "dev": true, + "license": "MIT", "dependencies": { "define-data-property": "^1.1.4", "es-errors": "^1.3.0", @@ -5452,6 +6161,7 @@ "resolved": "https://registry.npmjs.org/set-proto/-/set-proto-1.0.0.tgz", "integrity": "sha512-RJRdvCo6IAnPdsvP/7m6bsQqNnn1FCBX5ZNtFL98MmFF/4xAIJTIg1YbHW5DC2W5SKZanrC6i4HsJqlajw/dZw==", "dev": true, + "license": "MIT", "dependencies": { "dunder-proto": "^1.0.1", "es-errors": "^1.3.0", @@ -5462,15 +6172,16 @@ } }, "node_modules/sharp": { - "version": "0.34.3", - "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.3.tgz", - "integrity": "sha512-eX2IQ6nFohW4DbvHIOLRB3MHFpYqaqvXd3Tp5e/T/dSH83fxaNJQRvDMhASmkNTsNTVF2/OOopzRCt7xokgPfg==", + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz", + "integrity": "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==", "hasInstallScript": true, + "license": "Apache-2.0", "optional": true, "dependencies": { - "color": "^4.2.3", - "detect-libc": "^2.0.4", - "semver": "^7.7.2" + "@img/colour": "^1.0.0", + "detect-libc": "^2.1.2", + "semver": "^7.7.3" }, "engines": { "node": "^18.17.0 || ^20.3.0 || >=21.0.0" @@ -5479,28 +6190,43 @@ "url": "https://opencollective.com/libvips" }, "optionalDependencies": { - "@img/sharp-darwin-arm64": "0.34.3", - "@img/sharp-darwin-x64": "0.34.3", - "@img/sharp-libvips-darwin-arm64": "1.2.0", - "@img/sharp-libvips-darwin-x64": "1.2.0", - "@img/sharp-libvips-linux-arm": "1.2.0", - "@img/sharp-libvips-linux-arm64": "1.2.0", - "@img/sharp-libvips-linux-ppc64": "1.2.0", - "@img/sharp-libvips-linux-s390x": "1.2.0", - "@img/sharp-libvips-linux-x64": "1.2.0", - "@img/sharp-libvips-linuxmusl-arm64": "1.2.0", - "@img/sharp-libvips-linuxmusl-x64": "1.2.0", - "@img/sharp-linux-arm": "0.34.3", - "@img/sharp-linux-arm64": "0.34.3", - "@img/sharp-linux-ppc64": "0.34.3", - "@img/sharp-linux-s390x": "0.34.3", - "@img/sharp-linux-x64": "0.34.3", - "@img/sharp-linuxmusl-arm64": "0.34.3", - "@img/sharp-linuxmusl-x64": "0.34.3", - "@img/sharp-wasm32": "0.34.3", - "@img/sharp-win32-arm64": "0.34.3", - "@img/sharp-win32-ia32": "0.34.3", - "@img/sharp-win32-x64": "0.34.3" + "@img/sharp-darwin-arm64": "0.34.5", + "@img/sharp-darwin-x64": "0.34.5", + "@img/sharp-libvips-darwin-arm64": "1.2.4", + "@img/sharp-libvips-darwin-x64": "1.2.4", + "@img/sharp-libvips-linux-arm": "1.2.4", + "@img/sharp-libvips-linux-arm64": "1.2.4", + "@img/sharp-libvips-linux-ppc64": "1.2.4", + "@img/sharp-libvips-linux-riscv64": "1.2.4", + "@img/sharp-libvips-linux-s390x": "1.2.4", + "@img/sharp-libvips-linux-x64": "1.2.4", + "@img/sharp-libvips-linuxmusl-arm64": "1.2.4", + "@img/sharp-libvips-linuxmusl-x64": "1.2.4", + "@img/sharp-linux-arm": "0.34.5", + "@img/sharp-linux-arm64": "0.34.5", + "@img/sharp-linux-ppc64": "0.34.5", + "@img/sharp-linux-riscv64": "0.34.5", + "@img/sharp-linux-s390x": "0.34.5", + "@img/sharp-linux-x64": "0.34.5", + "@img/sharp-linuxmusl-arm64": "0.34.5", + "@img/sharp-linuxmusl-x64": "0.34.5", + "@img/sharp-wasm32": "0.34.5", + "@img/sharp-win32-arm64": "0.34.5", + "@img/sharp-win32-ia32": "0.34.5", + "@img/sharp-win32-x64": "0.34.5" + } + }, + "node_modules/sharp/node_modules/semver": { + "version": "7.7.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz", + "integrity": "sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==", + "license": "ISC", + "optional": true, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" } }, "node_modules/shebang-command": { @@ -5508,6 +6234,7 @@ "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", "dev": true, + "license": "MIT", "dependencies": { "shebang-regex": "^3.0.0" }, @@ -5520,6 +6247,7 @@ "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", "dev": true, + "license": "MIT", "engines": { "node": ">=8" } @@ -5529,6 +6257,7 @@ "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", "dev": true, + "license": "MIT", "dependencies": { "es-errors": "^1.3.0", "object-inspect": "^1.13.3", @@ -5548,6 +6277,7 @@ "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", "dev": true, + "license": "MIT", "dependencies": { "es-errors": "^1.3.0", "object-inspect": "^1.13.3" @@ -5564,6 +6294,7 @@ "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", @@ -5582,6 +6313,7 @@ "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", @@ -5596,19 +6328,11 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/simple-swizzle": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz", - "integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==", - "optional": true, - "dependencies": { - "is-arrayish": "^0.3.1" - } - }, "node_modules/source-map-js": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", + "license": "BSD-3-Clause", "engines": { "node": ">=0.10.0" } @@ -5617,13 +6341,15 @@ "version": "0.0.5", "resolved": "https://registry.npmjs.org/stable-hash/-/stable-hash-0.0.5.tgz", "integrity": "sha512-+L3ccpzibovGXFK+Ap/f8LOS0ahMrHTf3xu7mMLSpEGU0EO9ucaysSylKo9eRDFNhWve/y275iPmIZ4z39a9iA==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/stop-iteration-iterator": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/stop-iteration-iterator/-/stop-iteration-iterator-1.1.0.tgz", "integrity": "sha512-eLoXW/DHyl62zxY4SCaIgnRhuMr6ri4juEYARS8E6sCEqzKpOiE521Ucofdx+KnDZl5xmvGYaaKCk5FEOxJCoQ==", "dev": true, + "license": "MIT", "dependencies": { "es-errors": "^1.3.0", "internal-slot": "^1.1.0" @@ -5632,19 +6358,12 @@ "node": ">= 0.4" } }, - "node_modules/streamsearch": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz", - "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==", - "engines": { - "node": ">=10.0.0" - } - }, "node_modules/string.prototype.includes": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/string.prototype.includes/-/string.prototype.includes-2.0.1.tgz", "integrity": "sha512-o7+c9bW6zpAdJHTtujeePODAhkuicdAryFsfVKwA+wGw89wJ4GTY484WTucM9hLtDEOpOvI+aHnzqnC5lHp4Rg==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.7", "define-properties": "^1.2.1", @@ -5659,6 +6378,7 @@ "resolved": "https://registry.npmjs.org/string.prototype.matchall/-/string.prototype.matchall-4.0.12.tgz", "integrity": "sha512-6CC9uyBL+/48dYizRf7H7VAYCMCNTBeM78x/VTUe9bFEaxBepPJDa1Ow99LqI/1yF7kuy7Q3cQsYMrcjGUcskA==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", @@ -5686,6 +6406,7 @@ "resolved": "https://registry.npmjs.org/string.prototype.repeat/-/string.prototype.repeat-1.0.0.tgz", "integrity": "sha512-0u/TldDbKD8bFCQ/4f5+mNRrXwZ8hg2w7ZR8wa16e8z9XpePWl3eGEcUD0OXpEH/VJH/2G3gjUtR3ZOiBe2S/w==", "dev": true, + "license": "MIT", "dependencies": { "define-properties": "^1.1.3", "es-abstract": "^1.17.5" @@ -5696,6 +6417,7 @@ "resolved": "https://registry.npmjs.org/string.prototype.trim/-/string.prototype.trim-1.2.10.tgz", "integrity": "sha512-Rs66F0P/1kedk5lyYyH9uBzuiI/kNRmwJAR9quK6VOtIpZ2G+hMZd+HQbbv25MgCA6gEffoMZYxlTod4WcdrKA==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.2", @@ -5717,6 +6439,7 @@ "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.9.tgz", "integrity": "sha512-G7Ok5C6E/j4SGfyLCloXTrngQIQU3PWtXGst3yM7Bea9FRURf1S42ZHlZZtsNque2FN2PoUhfZXYLNWwEr4dLQ==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.2", @@ -5735,6 +6458,7 @@ "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.8.tgz", "integrity": "sha512-UXSH262CSZY1tfu3G3Secr6uGLCFVPMhIqHjlgCUtCCcgihYc/xKs9djMTMUOb2j1mVSeU8EU6NWc/iQKU6Gfg==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.7", "define-properties": "^1.2.1", @@ -5752,6 +6476,7 @@ "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", "integrity": "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==", "dev": true, + "license": "MIT", "engines": { "node": ">=4" } @@ -5761,6 +6486,7 @@ "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", "dev": true, + "license": "MIT", "engines": { "node": ">=8" }, @@ -5772,6 +6498,7 @@ "version": "5.1.6", "resolved": "https://registry.npmjs.org/styled-jsx/-/styled-jsx-5.1.6.tgz", "integrity": "sha512-qSVyDTeMotdvQYoHWLNGwRFJHC+i+ZvdBRYosOFgC+Wg1vx4frN2/RG/NA7SYqqvKNLf39P2LSRA2pu6n0XYZA==", + "license": "MIT", "dependencies": { "client-only": "0.0.1" }, @@ -5795,6 +6522,7 @@ "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", "dev": true, + "license": "MIT", "dependencies": { "has-flag": "^4.0.0" }, @@ -5807,6 +6535,7 @@ "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.4" }, @@ -5815,9 +6544,10 @@ } }, "node_modules/swr": { - "version": "2.3.4", - "resolved": "https://registry.npmjs.org/swr/-/swr-2.3.4.tgz", - "integrity": "sha512-bYd2lrhc+VarcpkgWclcUi92wYCpOgMws9Sd1hG1ntAu0NEy+14CbotuFjshBU2kt9rYj9TSmDcybpxpeTU1fg==", + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/swr/-/swr-2.3.6.tgz", + "integrity": "sha512-wfHRmHWk/isGNMwlLGlZX5Gzz/uTgo0o2IRuTMcf4CPuPFJZlq0rDaKUx+ozB5nBOReNV1kiOyzMfj+MBMikLw==", + "license": "MIT", "dependencies": { "dequal": "^2.0.3", "use-sync-external-store": "^1.4.0" @@ -5827,50 +6557,41 @@ } }, "node_modules/tailwindcss": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.11.tgz", - "integrity": "sha512-2E9TBm6MDD/xKYe+dvJZAmg3yxIEDNRc0jwlNyDg/4Fil2QcSLjFKGVff0lAf1jjeaArlG/M75Ey/EYr/OJtBA==", - "dev": true + "version": "4.1.17", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.17.tgz", + "integrity": "sha512-j9Ee2YjuQqYT9bbRTfTZht9W/ytp5H+jJpZKiYdP/bpnXARAuELt9ofP0lPnmHjbga7SNQIxdTAXCmtKVYjN+Q==", + "dev": true, + "license": "MIT" }, "node_modules/tapable": { - "version": "2.2.2", - "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.2.tgz", - "integrity": "sha512-Re10+NauLTMCudc7T5WLFLAwDhQ0JWdrMK+9B2M8zR5hRExKmsRDCBA7/aV/pNJFltmBFO5BAMlQFi/vq3nKOg==", + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.3.0.tgz", + "integrity": "sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg==", "dev": true, + "license": "MIT", "engines": { "node": ">=6" - } - }, - "node_modules/tar": { - "version": "7.4.3", - "resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz", - "integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==", - "dev": true, - "dependencies": { - "@isaacs/fs-minipass": "^4.0.0", - "chownr": "^3.0.0", - "minipass": "^7.1.2", - "minizlib": "^3.0.1", - "mkdirp": "^3.0.1", - "yallist": "^5.0.0" }, - "engines": { - "node": ">=18" + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" } }, "node_modules/tinycolor2": { "version": "1.6.0", "resolved": "https://registry.npmjs.org/tinycolor2/-/tinycolor2-1.6.0.tgz", - "integrity": "sha512-XPaBkWQJdsf3pLKJV9p4qN/S+fm2Oj8AIPo1BTUhg5oxkvm9+SVEGFdhyOz7tTdUTfvxMiAs4sp6/eZO2Ew+pw==" + "integrity": "sha512-XPaBkWQJdsf3pLKJV9p4qN/S+fm2Oj8AIPo1BTUhg5oxkvm9+SVEGFdhyOz7tTdUTfvxMiAs4sp6/eZO2Ew+pw==", + "license": "MIT" }, "node_modules/tinyglobby": { - "version": "0.2.14", - "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.14.tgz", - "integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==", + "version": "0.2.15", + "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", + "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==", "dev": true, + "license": "MIT", "dependencies": { - "fdir": "^6.4.4", - "picomatch": "^4.0.2" + "fdir": "^6.5.0", + "picomatch": "^4.0.3" }, "engines": { "node": ">=12.0.0" @@ -5880,10 +6601,14 @@ } }, "node_modules/tinyglobby/node_modules/fdir": { - "version": "6.4.6", - "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.6.tgz", - "integrity": "sha512-hiFoqpyZcfNm1yc4u8oWCf9A2c4D3QjCrks3zmoVKVxpQRzmPNar1hUJcBG2RQHvEVGDN+Jm81ZheVLAQMK6+w==", + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", + "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", "dev": true, + "license": "MIT", + "engines": { + "node": ">=12.0.0" + }, "peerDependencies": { "picomatch": "^3 || ^4" }, @@ -5898,6 +6623,8 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, + "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -5910,6 +6637,7 @@ "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", "dev": true, + "license": "MIT", "dependencies": { "is-number": "^7.0.0" }, @@ -5922,6 +6650,7 @@ "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.1.0.tgz", "integrity": "sha512-CUgTZL1irw8u29bzrOD/nH85jqyc74D6SshFgujOIA7osm2Rz7dYH77agkx7H4FBNxDq7Cjf+IjaX/8zwFW+ZQ==", "dev": true, + "license": "MIT", "engines": { "node": ">=18.12" }, @@ -5934,6 +6663,7 @@ "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-3.15.0.tgz", "integrity": "sha512-2Ac2RgzDe/cn48GvOe3M+o82pEFewD3UPbyoUHHdKasHwJKjds4fLXWf/Ux5kATBKN20oaFGu+jbElp1pos0mg==", "dev": true, + "license": "MIT", "dependencies": { "@types/json5": "^0.0.29", "json5": "^1.0.2", @@ -5941,16 +6671,31 @@ "strip-bom": "^3.0.0" } }, + "node_modules/tsconfig-paths/node_modules/json5": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.2.tgz", + "integrity": "sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA==", + "dev": true, + "license": "MIT", + "dependencies": { + "minimist": "^1.2.0" + }, + "bin": { + "json5": "lib/cli.js" + } + }, "node_modules/tslib": { "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==" + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "license": "0BSD" }, "node_modules/type-check": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==", "dev": true, + "license": "MIT", "dependencies": { "prelude-ls": "^1.2.1" }, @@ -5963,6 +6708,7 @@ "resolved": "https://registry.npmjs.org/typed-array-buffer/-/typed-array-buffer-1.0.3.tgz", "integrity": "sha512-nAYYwfY3qnzX30IkA6AQZjVbtK6duGontcQm1WSG1MD94YLqK0515GNApXkoxKOWMusVssAHWLh9SeaoefYFGw==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.3", "es-errors": "^1.3.0", @@ -5977,6 +6723,7 @@ "resolved": "https://registry.npmjs.org/typed-array-byte-length/-/typed-array-byte-length-1.0.3.tgz", "integrity": "sha512-BaXgOuIxz8n8pIq3e7Atg/7s+DpiYrxn4vdot3w9KbnBhcRQq6o3xemQdIfynqSeXeDrF32x+WvfzmOjPiY9lg==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.8", "for-each": "^0.3.3", @@ -5996,6 +6743,7 @@ "resolved": "https://registry.npmjs.org/typed-array-byte-offset/-/typed-array-byte-offset-1.0.4.tgz", "integrity": "sha512-bTlAFB/FBYMcuX81gbL4OcpH5PmlFHqlCCpAl8AlEzMz5k53oNDvN8p1PNOWLEmI2x4orp3raOFB51tv9X+MFQ==", "dev": true, + "license": "MIT", "dependencies": { "available-typed-arrays": "^1.0.7", "call-bind": "^1.0.8", @@ -6017,6 +6765,7 @@ "resolved": "https://registry.npmjs.org/typed-array-length/-/typed-array-length-1.0.7.tgz", "integrity": "sha512-3KS2b+kL7fsuk/eJZ7EQdnEmQoaho/r6KUef7hxvltNA5DR8NAUM+8wJMbJyZ4G9/7i3v5zPBIMN5aybAh2/Jg==", "dev": true, + "license": "MIT", "dependencies": { "call-bind": "^1.0.7", "for-each": "^0.3.3", @@ -6033,10 +6782,12 @@ } }, "node_modules/typescript": { - "version": "5.8.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz", - "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, + "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -6045,11 +6796,36 @@ "node": ">=14.17" } }, + "node_modules/typescript-eslint": { + "version": "8.48.0", + "resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.48.0.tgz", + "integrity": "sha512-fcKOvQD9GUn3Xw63EgiDqhvWJ5jsyZUaekl3KVpGsDJnN46WJTe3jWxtQP9lMZm1LJNkFLlTaWAxK2vUQR+cqw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/eslint-plugin": "8.48.0", + "@typescript-eslint/parser": "8.48.0", + "@typescript-eslint/typescript-estree": "8.48.0", + "@typescript-eslint/utils": "8.48.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.57.0 || ^9.0.0", + "typescript": ">=4.8.4 <6.0.0" + } + }, "node_modules/unbox-primitive": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.1.0.tgz", "integrity": "sha512-nWJ91DjeOkej/TA8pXQ3myruKpKEYgqvpw9lz4OPHj/NWFNluYrjbz9j01CJ8yKQd2g4jFoOkINCTW2I5LEEyw==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.3", "has-bigints": "^1.0.2", @@ -6067,7 +6843,8 @@ "version": "6.21.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/unrs-resolver": { "version": "1.11.1", @@ -6075,6 +6852,7 @@ "integrity": "sha512-bSjt9pjaEBnNiGgc9rUiHGKv5l4/TGzDmYw3RhnkJGtLhbnnA/5qJj7x3dNDCRx/PJxu774LlH8lCOlB4hEfKg==", "dev": true, "hasInstallScript": true, + "license": "MIT", "dependencies": { "napi-postinstall": "^0.3.0" }, @@ -6103,19 +6881,52 @@ "@unrs/resolver-binding-win32-x64-msvc": "1.11.1" } }, + "node_modules/update-browserslist-db": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.4.tgz", + "integrity": "sha512-q0SPT4xyU84saUX+tomz1WLkxUbuaJnR1xWt17M7fJtEJigJeWUNGUqrauFXsHnqev9y9JTRGwk13tFBuKby4A==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "escalade": "^3.2.0", + "picocolors": "^1.1.1" + }, + "bin": { + "update-browserslist-db": "cli.js" + }, + "peerDependencies": { + "browserslist": ">= 4.21.0" + } + }, "node_modules/uri-js": { "version": "4.4.1", "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", "dev": true, + "license": "BSD-2-Clause", "dependencies": { "punycode": "^2.1.0" } }, "node_modules/use-sync-external-store": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.5.0.tgz", - "integrity": "sha512-Rb46I4cGGVBmjamjphe8L/UnvJD+uPPtTkNvX5mZgqdbavhI4EbgIWJiIHXJ8bc/i9EQGPRh4DwEURJ552Do0A==", + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.6.0.tgz", + "integrity": "sha512-Pp6GSwGP/NrPIrxVFAIkOQeyw8lFenOHijQWkUTrDvrF4ALqylP2C/KCkeS9dpUM3KvYRQhna5vt7IL95+ZQ9w==", + "license": "MIT", "peerDependencies": { "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" } @@ -6128,6 +6939,7 @@ "https://github.com/sponsors/broofa", "https://github.com/sponsors/ctavan" ], + "license": "MIT", "bin": { "uuid": "dist/bin/uuid" } @@ -6137,6 +6949,7 @@ "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", "dev": true, + "license": "ISC", "dependencies": { "isexe": "^2.0.0" }, @@ -6152,6 +6965,7 @@ "resolved": "https://registry.npmjs.org/which-boxed-primitive/-/which-boxed-primitive-1.1.1.tgz", "integrity": "sha512-TbX3mj8n0odCBFVlY8AxkqcHASw3L60jIuF8jFP78az3C2YhmGvqbHBpAjTRH2/xqYunrJ9g1jSyjCjpoWzIAA==", "dev": true, + "license": "MIT", "dependencies": { "is-bigint": "^1.1.0", "is-boolean-object": "^1.2.1", @@ -6171,6 +6985,7 @@ "resolved": "https://registry.npmjs.org/which-builtin-type/-/which-builtin-type-1.2.1.tgz", "integrity": "sha512-6iBczoX+kDQ7a3+YJBnh3T+KZRxM/iYNPXicqk66/Qfm1b93iu+yOImkg0zHbj5LNOcNv1TEADiZ0xa34B4q6Q==", "dev": true, + "license": "MIT", "dependencies": { "call-bound": "^1.0.2", "function.prototype.name": "^1.1.6", @@ -6198,6 +7013,7 @@ "resolved": "https://registry.npmjs.org/which-collection/-/which-collection-1.0.2.tgz", "integrity": "sha512-K4jVyjnBdgvc86Y6BkaLZEN933SwYOuBFkdmBu9ZfkcAbdVbpITnDmjvZ/aQjRXQrv5EPkTnD1s39GiiqbngCw==", "dev": true, + "license": "MIT", "dependencies": { "is-map": "^2.0.3", "is-set": "^2.0.3", @@ -6216,6 +7032,7 @@ "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.19.tgz", "integrity": "sha512-rEvr90Bck4WZt9HHFC4DJMsjvu7x+r6bImz0/BrbWb7A2djJ8hnZMrWnHo9F8ssv0OMErasDhftrfROTyqSDrw==", "dev": true, + "license": "MIT", "dependencies": { "available-typed-arrays": "^1.0.7", "call-bind": "^1.0.8", @@ -6237,30 +7054,54 @@ "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==", "dev": true, + "license": "MIT", "engines": { "node": ">=0.10.0" } }, "node_modules/yallist": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz", - "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==", + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", + "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", "dev": true, - "engines": { - "node": ">=18" - } + "license": "ISC" }, "node_modules/yocto-queue": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", "dev": true, + "license": "MIT", "engines": { "node": ">=10" }, "funding": { "url": "https://github.com/sponsors/sindresorhus" } + }, + "node_modules/zod": { + "version": "4.1.13", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.1.13.tgz", + "integrity": "sha512-AvvthqfqrAhNH9dnfmrfKzX5upOdjUVJYFqNSlkmGf64gRaTzlPwz99IHYnVs28qYAybvAlBV+H7pn0saFY4Ig==", + "dev": true, + "license": "MIT", + "peer": true, + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-validation-error": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/zod-validation-error/-/zod-validation-error-4.0.2.tgz", + "integrity": "sha512-Q6/nZLe6jxuU80qb/4uJ4t5v2VEZ44lzQjPDhYJNztRQ4wyWc6VF3D3Kb/fAuPetZQnhS3hnajCf9CsWesghLQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18.0.0" + }, + "peerDependencies": { + "zod": "^3.25.0 || ^4.0.0" + } } } } diff --git a/cognee-frontend/package.json b/cognee-frontend/package.json index 68aef29eb..4195945fd 100644 --- a/cognee-frontend/package.json +++ b/cognee-frontend/package.json @@ -9,13 +9,13 @@ "lint": "next lint" }, "dependencies": { - "@auth0/nextjs-auth0": "^4.6.0", + "@auth0/nextjs-auth0": "^4.13.1", "classnames": "^2.5.1", "culori": "^4.0.1", "d3-force-3d": "^3.0.6", - "next": "15.3.3", - "react": "^19.0.0", - "react-dom": "^19.0.0", + "next": "16.0.4", + "react": "^19.2.0", + "react-dom": "^19.2.0", "react-force-graph-2d": "^1.27.1", "uuid": "^9.0.1" }, @@ -24,11 +24,11 @@ "@tailwindcss/postcss": "^4.1.7", "@types/culori": "^4.0.0", "@types/node": "^20", - "@types/react": "^18", - "@types/react-dom": "^18", + "@types/react": "^19", + "@types/react-dom": "^19", "@types/uuid": "^9.0.8", "eslint": "^9", - "eslint-config-next": "^15.3.3", + "eslint-config-next": "^16.0.4", "eslint-config-prettier": "^10.1.5", "tailwindcss": "^4.1.7", "typescript": "^5" diff --git a/cognee-frontend/src/app/(graph)/CrewAITrigger.tsx b/cognee-frontend/src/app/(graph)/CrewAITrigger.tsx deleted file mode 100644 index 7a164c614..000000000 --- a/cognee-frontend/src/app/(graph)/CrewAITrigger.tsx +++ /dev/null @@ -1,119 +0,0 @@ -import { useState } from "react"; -import { fetch } from "@/utils"; -import { v4 as uuid4 } from "uuid"; -import { LoadingIndicator } from "@/ui/App"; -import { CTAButton, Input } from "@/ui/elements"; - -interface CrewAIFormPayload extends HTMLFormElement { - username1: HTMLInputElement; - username2: HTMLInputElement; -} - -interface CrewAITriggerProps { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - onData: (data: any) => void; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - onActivity: (activities: any) => void; -} - -export default function CrewAITrigger({ onData, onActivity }: CrewAITriggerProps) { - const [isCrewAIRunning, setIsCrewAIRunning] = useState(false); - - const handleRunCrewAI = (event: React.FormEvent) => { - event.preventDefault(); - const formElements = event.currentTarget; - - const crewAIConfig = { - username1: formElements.username1.value, - username2: formElements.username2.value, - }; - - const backendApiUrl = process.env.NEXT_PUBLIC_BACKEND_API_URL; - const wsUrl = backendApiUrl.replace(/^http(s)?/, "ws"); - - const websocket = new WebSocket(`${wsUrl}/v1/crewai/subscribe`); - - onActivity([{ id: uuid4(), timestamp: Date.now(), activity: "Dispatching hiring crew agents" }]); - - websocket.onmessage = (event) => { - const data = JSON.parse(event.data); - - if (data.status === "PipelineRunActivity") { - onActivity([data.payload]); - return; - } - - onData({ - nodes: data.payload.nodes, - links: data.payload.edges, - }); - - const nodes_type_map: { [key: string]: number } = {}; - - for (let i = 0; i < data.payload.nodes.length; i++) { - const node = data.payload.nodes[i]; - if (!nodes_type_map[node.type]) { - nodes_type_map[node.type] = 0; - } - nodes_type_map[node.type] += 1; - } - - const activityMessage = Object.entries(nodes_type_map).reduce((message, [type, count]) => { - return `${message}\n | ${type}: ${count}`; - }, "Graph updated:"); - - onActivity([{ - id: uuid4(), - timestamp: Date.now(), - activity: activityMessage, - }]); - - if (data.status === "PipelineRunCompleted") { - websocket.close(); - } - }; - - onData(null); - setIsCrewAIRunning(true); - - return fetch("/v1/crewai/run", { - method: "POST", - body: JSON.stringify(crewAIConfig), - headers: { - "Content-Type": "application/json", - }, - }) - .then(response => response.json()) - .then(() => { - onActivity([{ id: uuid4(), timestamp: Date.now(), activity: "Hiring crew agents made a decision" }]); - }) - .catch(() => { - onActivity([{ id: uuid4(), timestamp: Date.now(), activity: "Hiring crew agents had problems while executing" }]); - }) - .finally(() => { - websocket.close(); - setIsCrewAIRunning(false); - }); - }; - - return ( -
-

Cognee Dev Mexican Standoff

- Agents compare GitHub profiles, and make a decision who is a better developer -
-
- - -
-
- - -
-
- - Start Mexican Standoff - {isCrewAIRunning && } - -
- ); -} diff --git a/cognee-frontend/src/app/(graph)/GraphControls.tsx b/cognee-frontend/src/app/(graph)/GraphControls.tsx index 24948bff7..e83fb1107 100644 --- a/cognee-frontend/src/app/(graph)/GraphControls.tsx +++ b/cognee-frontend/src/app/(graph)/GraphControls.tsx @@ -6,7 +6,6 @@ import { NodeObject, LinkObject } from "react-force-graph-2d"; import { ChangeEvent, useEffect, useImperativeHandle, useRef, useState } from "react"; import { DeleteIcon } from "@/ui/Icons"; -// import { FeedbackForm } from "@/ui/Partials"; import { CTAButton, Input, NeutralButton, Select } from "@/ui/elements"; interface GraphControlsProps { @@ -111,7 +110,7 @@ export default function GraphControls({ data, isAddNodeFormOpen, onGraphShapeCha }; const [isAuthShapeChangeEnabled, setIsAuthShapeChangeEnabled] = useState(true); - const shapeChangeTimeout = useRef(); + const shapeChangeTimeout = useRef(null); useEffect(() => { onGraphShapeChange(DEFAULT_GRAPH_SHAPE); @@ -230,12 +229,6 @@ export default function GraphControls({ data, isAddNodeFormOpen, onGraphShapeCha )} {/* )} */} - - {/* {selectedTab === "feedback" && ( -
- {}} /> -
- )} */} ); diff --git a/cognee-frontend/src/app/(graph)/GraphView.tsx b/cognee-frontend/src/app/(graph)/GraphView.tsx index 2435e055c..0c0a0d3b4 100644 --- a/cognee-frontend/src/app/(graph)/GraphView.tsx +++ b/cognee-frontend/src/app/(graph)/GraphView.tsx @@ -1,6 +1,6 @@ "use client"; -import { useCallback, useRef, useState, MutableRefObject } from "react"; +import { useCallback, useRef, useState, RefObject } from "react"; import Link from "next/link"; import { TextLogo } from "@/ui/App"; @@ -47,11 +47,11 @@ export default function GraphView() { updateData(newData); }, []); - const graphRef = useRef(); + const graphRef = useRef(null); - const graphControls = useRef(); + const graphControls = useRef(null); - const activityLog = useRef(); + const activityLog = useRef(null); return (
@@ -74,21 +74,18 @@ export default function GraphView() {
} + ref={graphRef as RefObject} data={data} - graphControls={graphControls as MutableRefObject} + graphControls={graphControls as RefObject} />
- {/*
- activityLog.current?.updateActivityLog(activities)} /> -
*/}

Activity Log

- } /> + } />
@@ -96,7 +93,7 @@ export default function GraphView() {
} + ref={graphControls as RefObject} isAddNodeFormOpen={isAddNodeFormOpen} onFitIntoView={() => graphRef.current!.zoomToFit(1000, 50)} onGraphShapeChange={(shape) => graphRef.current!.setGraphShape(shape)} diff --git a/cognee-frontend/src/app/(graph)/GraphVisualization.tsx b/cognee-frontend/src/app/(graph)/GraphVisualization.tsx index 6846e3aee..db4494d73 100644 --- a/cognee-frontend/src/app/(graph)/GraphVisualization.tsx +++ b/cognee-frontend/src/app/(graph)/GraphVisualization.tsx @@ -1,7 +1,7 @@ "use client"; import classNames from "classnames"; -import { MutableRefObject, useEffect, useImperativeHandle, useRef, useState, useCallback } from "react"; +import { RefObject, useEffect, useImperativeHandle, useRef, useState, useCallback } from "react"; import { forceCollide, forceManyBody } from "d3-force-3d"; import dynamic from "next/dynamic"; import { GraphControlsAPI } from "./GraphControls"; @@ -16,9 +16,9 @@ const ForceGraph = dynamic(() => import("react-force-graph-2d"), { import type { ForceGraphMethods, GraphData, LinkObject, NodeObject } from "react-force-graph-2d"; interface GraphVisuzaliationProps { - ref: MutableRefObject; + ref: RefObject; data?: GraphData; - graphControls: MutableRefObject; + graphControls: RefObject; className?: string; } @@ -205,7 +205,7 @@ export default function GraphVisualization({ ref, data, graphControls, className // eslint-disable-next-line @typescript-eslint/no-unused-vars function handleDagError(loopNodeIds: (string | number)[]) {} - const graphRef = useRef(); + const graphRef = useRef(null); useEffect(() => { if (data && graphRef.current) { @@ -224,6 +224,7 @@ export default function GraphVisualization({ ref, data, graphControls, className ) => { if (!graphRef.current) { console.warn("GraphVisualization: graphRef not ready yet"); + // eslint-disable-next-line @typescript-eslint/no-explicit-any return undefined as any; } @@ -239,7 +240,7 @@ export default function GraphVisualization({ ref, data, graphControls, className return (
} width={dimensions.width} height={dimensions.height} dagMode={graphShape as unknown as undefined} diff --git a/cognee-frontend/src/app/dashboard/page.tsx b/cognee-frontend/src/app/dashboard/page.tsx index 7ff3abaf4..d5d86bfc5 100644 --- a/cognee-frontend/src/app/dashboard/page.tsx +++ b/cognee-frontend/src/app/dashboard/page.tsx @@ -1,8 +1,8 @@ -"use server"; +"use client"; import Dashboard from "./Dashboard"; -export default async function Page() { +export default function Page() { const accessToken = ""; return ( diff --git a/cognee-frontend/src/app/page.tsx b/cognee-frontend/src/app/page.tsx index 80db75b20..44ead8769 100644 --- a/cognee-frontend/src/app/page.tsx +++ b/cognee-frontend/src/app/page.tsx @@ -1,3 +1,3 @@ export { default } from "./dashboard/page"; -// export const dynamic = "force-dynamic"; +export const dynamic = "force-dynamic"; diff --git a/cognee-frontend/src/modules/ingestion/useDatasets.ts b/cognee-frontend/src/modules/ingestion/useDatasets.ts index 6a125b591..e25d9f932 100644 --- a/cognee-frontend/src/modules/ingestion/useDatasets.ts +++ b/cognee-frontend/src/modules/ingestion/useDatasets.ts @@ -13,7 +13,6 @@ export interface Dataset { function useDatasets(useCloud = false) { const [datasets, setDatasets] = useState([]); - // eslint-disable-next-line @typescript-eslint/no-explicit-any // const statusTimeout = useRef(null); // const fetchDatasetStatuses = useCallback((datasets: Dataset[]) => { diff --git a/cognee-frontend/src/middleware.ts b/cognee-frontend/src/proxy.ts similarity index 93% rename from cognee-frontend/src/middleware.ts rename to cognee-frontend/src/proxy.ts index f53d88367..1b1a85f75 100644 --- a/cognee-frontend/src/middleware.ts +++ b/cognee-frontend/src/proxy.ts @@ -2,7 +2,7 @@ import { NextResponse, type NextRequest } from "next/server"; // import { auth0 } from "./modules/auth/auth0"; // eslint-disable-next-line @typescript-eslint/no-unused-vars -export async function middleware(request: NextRequest) { +export async function proxy(request: NextRequest) { // if (process.env.USE_AUTH0_AUTHORIZATION?.toLowerCase() === "true") { // if (request.nextUrl.pathname === "/auth/token") { // return NextResponse.next(); diff --git a/cognee-frontend/src/ui/Partials/FeedbackForm.tsx b/cognee-frontend/src/ui/Partials/FeedbackForm.tsx deleted file mode 100644 index db5b6f325..000000000 --- a/cognee-frontend/src/ui/Partials/FeedbackForm.tsx +++ /dev/null @@ -1,69 +0,0 @@ -"use client"; - -import { useState } from "react"; -import { LoadingIndicator } from "@/ui/App"; -import { fetch, useBoolean } from "@/utils"; -import { CTAButton, TextArea } from "@/ui/elements"; - -interface SignInFormPayload extends HTMLFormElement { - feedback: HTMLTextAreaElement; -} - -interface FeedbackFormProps { - onSuccess: () => void; -} - -export default function FeedbackForm({ onSuccess }: FeedbackFormProps) { - const { - value: isSubmittingFeedback, - setTrue: disableFeedbackSubmit, - setFalse: enableFeedbackSubmit, - } = useBoolean(false); - - const [feedbackError, setFeedbackError] = useState(null); - - const signIn = (event: React.FormEvent) => { - event.preventDefault(); - const formElements = event.currentTarget; - - setFeedbackError(null); - disableFeedbackSubmit(); - - fetch("/v1/crewai/feedback", { - method: "POST", - body: JSON.stringify({ - feedback: formElements.feedback.value, - }), - headers: { - "Content-Type": "application/json", - }, - }) - .then(response => response.json()) - .then(() => { - onSuccess(); - formElements.feedback.value = ""; - }) - .catch(error => setFeedbackError(error.detail)) - .finally(() => enableFeedbackSubmit()); - }; - - return ( -
-
-
- -