diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 775eb2912..70a4b56e6 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -358,6 +358,34 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} run: uv run python ./cognee/tests/tasks/entity_extraction/entity_extraction_test.py + test-feedback-enrichment: + name: Test Feedback Enrichment + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: '3.11.x' + + - name: Dependencies already installed + run: echo "Dependencies already installed in setup" + + - name: Run Feedback Enrichment Test + env: + ENV: 'dev' + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + run: uv run python ./cognee/tests/test_feedback_enrichment.py + run_conversation_sessions_test: name: Conversation sessions test runs-on: ubuntu-latest diff --git a/cognee-mcp/README.md b/cognee-mcp/README.md index d14bc9fa1..9ac8b4973 100644 --- a/cognee-mcp/README.md +++ b/cognee-mcp/README.md @@ -110,6 +110,47 @@ If you'd rather run cognee-mcp in a container, you have two options: # For stdio transport (default) docker run -e TRANSPORT_MODE=stdio --env-file ./.env --rm -it cognee/cognee-mcp:main ``` + + **Installing optional dependencies at runtime:** + + You can install optional dependencies when running the container by setting the `EXTRAS` environment variable: + ```bash + # Install a single optional dependency group at runtime + docker run \ + -e TRANSPORT_MODE=http \ + -e EXTRAS=aws \ + --env-file ./.env \ + -p 8000:8000 \ + --rm -it cognee/cognee-mcp:main + + # Install multiple optional dependency groups at runtime (comma-separated) + docker run \ + -e TRANSPORT_MODE=sse \ + -e EXTRAS=aws,postgres,neo4j \ + --env-file ./.env \ + -p 8000:8000 \ + --rm -it cognee/cognee-mcp:main + ``` + + **Available optional dependency groups:** + - `aws` - S3 storage support + - `postgres` / `postgres-binary` - PostgreSQL database support + - `neo4j` - Neo4j graph database support + - `neptune` - AWS Neptune support + - `chromadb` - ChromaDB vector store support + - `scraping` - Web scraping capabilities + - `distributed` - Modal distributed execution + - `langchain` - LangChain integration + - `llama-index` - LlamaIndex integration + - `anthropic` - Anthropic models + - `groq` - Groq models + - `mistral` - Mistral models + - `ollama` / `huggingface` - Local model support + - `docs` - Document processing + - `codegraph` - Code analysis + - `monitoring` - Sentry & Langfuse monitoring + - `redis` - Redis support + - And more (see [pyproject.toml](https://github.com/topoteretes/cognee/blob/main/pyproject.toml) for full list) 2. **Pull from Docker Hub** (no build required): ```bash # With HTTP transport (recommended for web deployments) @@ -119,6 +160,17 @@ If you'd rather run cognee-mcp in a container, you have two options: # With stdio transport (default) docker run -e TRANSPORT_MODE=stdio --env-file ./.env --rm -it cognee/cognee-mcp:main ``` + + **With runtime installation of optional dependencies:** + ```bash + # Install optional dependencies from Docker Hub image + docker run \ + -e TRANSPORT_MODE=http \ + -e EXTRAS=aws,postgres \ + --env-file ./.env \ + -p 8000:8000 \ + --rm -it cognee/cognee-mcp:main + ``` ### **Important: Docker vs Direct Usage** **Docker uses environment variables**, not command line arguments: diff --git a/cognee-mcp/entrypoint.sh b/cognee-mcp/entrypoint.sh index 2f122bbfd..cf7d19f0a 100644 --- a/cognee-mcp/entrypoint.sh +++ b/cognee-mcp/entrypoint.sh @@ -4,6 +4,42 @@ set -e # Exit on error echo "Debug mode: $DEBUG" echo "Environment: $ENVIRONMENT" +# Install optional dependencies if EXTRAS is set +if [ -n "$EXTRAS" ]; then + echo "Installing optional dependencies: $EXTRAS" + + # Get the cognee version that's currently installed + COGNEE_VERSION=$(uv pip show cognee | grep "Version:" | awk '{print $2}') + echo "Current cognee version: $COGNEE_VERSION" + + # Build the extras list for cognee + IFS=',' read -ra EXTRA_ARRAY <<< "$EXTRAS" + # Combine base extras from pyproject.toml with requested extras + ALL_EXTRAS="" + for extra in "${EXTRA_ARRAY[@]}"; do + # Trim whitespace + extra=$(echo "$extra" | xargs) + # Add to extras list if not already present + if [[ ! "$ALL_EXTRAS" =~ (^|,)"$extra"(,|$) ]]; then + if [ -z "$ALL_EXTRAS" ]; then + ALL_EXTRAS="$extra" + else + ALL_EXTRAS="$ALL_EXTRAS,$extra" + fi + fi + done + + echo "Installing cognee with extras: $ALL_EXTRAS" + echo "Running: uv pip install 'cognee[$ALL_EXTRAS]==$COGNEE_VERSION'" + uv pip install "cognee[$ALL_EXTRAS]==$COGNEE_VERSION" + + # Verify installation + echo "" + echo "✓ Optional dependencies installation completed" +else + echo "No optional dependencies specified" +fi + # Set default transport mode if not specified TRANSPORT_MODE=${TRANSPORT_MODE:-"stdio"} echo "Transport mode: $TRANSPORT_MODE" diff --git a/cognee/api/v1/add/routers/get_add_router.py b/cognee/api/v1/add/routers/get_add_router.py index 4d0063cc9..b2e7068b0 100644 --- a/cognee/api/v1/add/routers/get_add_router.py +++ b/cognee/api/v1/add/routers/get_add_router.py @@ -10,6 +10,7 @@ from cognee.modules.users.methods import get_authenticated_user from cognee.shared.utils import send_telemetry from cognee.modules.pipelines.models import PipelineRunErrored from cognee.shared.logging_utils import get_logger +from cognee import __version__ as cognee_version logger = get_logger() @@ -63,7 +64,11 @@ def get_add_router() -> APIRouter: send_telemetry( "Add API Endpoint Invoked", user.id, - additional_properties={"endpoint": "POST /v1/add", "node_set": node_set}, + additional_properties={ + "endpoint": "POST /v1/add", + "node_set": node_set, + "cognee_version": cognee_version, + }, ) from cognee.api.v1.add import add as cognee_add diff --git a/cognee/api/v1/cognify/routers/get_cognify_router.py b/cognee/api/v1/cognify/routers/get_cognify_router.py index 9e4bdbbfd..231bbcd11 100644 --- a/cognee/api/v1/cognify/routers/get_cognify_router.py +++ b/cognee/api/v1/cognify/routers/get_cognify_router.py @@ -29,7 +29,7 @@ from cognee.modules.pipelines.queues.pipeline_run_info_queues import ( ) from cognee.shared.logging_utils import get_logger from cognee.shared.utils import send_telemetry - +from cognee import __version__ as cognee_version logger = get_logger("api.cognify") @@ -98,6 +98,7 @@ def get_cognify_router() -> APIRouter: user.id, additional_properties={ "endpoint": "POST /v1/cognify", + "cognee_version": cognee_version, }, ) diff --git a/cognee/api/v1/datasets/routers/get_datasets_router.py b/cognee/api/v1/datasets/routers/get_datasets_router.py index be8b5af8d..eff87b3af 100644 --- a/cognee/api/v1/datasets/routers/get_datasets_router.py +++ b/cognee/api/v1/datasets/routers/get_datasets_router.py @@ -24,6 +24,7 @@ from cognee.modules.users.permissions.methods import ( from cognee.modules.graph.methods import get_formatted_graph_data from cognee.modules.pipelines.models import PipelineRunStatus from cognee.shared.utils import send_telemetry +from cognee import __version__ as cognee_version logger = get_logger() @@ -100,6 +101,7 @@ def get_datasets_router() -> APIRouter: user.id, additional_properties={ "endpoint": "GET /v1/datasets", + "cognee_version": cognee_version, }, ) @@ -147,6 +149,7 @@ def get_datasets_router() -> APIRouter: user.id, additional_properties={ "endpoint": "POST /v1/datasets", + "cognee_version": cognee_version, }, ) @@ -201,6 +204,7 @@ def get_datasets_router() -> APIRouter: additional_properties={ "endpoint": f"DELETE /v1/datasets/{str(dataset_id)}", "dataset_id": str(dataset_id), + "cognee_version": cognee_version, }, ) @@ -246,6 +250,7 @@ def get_datasets_router() -> APIRouter: "endpoint": f"DELETE /v1/datasets/{str(dataset_id)}/data/{str(data_id)}", "dataset_id": str(dataset_id), "data_id": str(data_id), + "cognee_version": cognee_version, }, ) @@ -327,6 +332,7 @@ def get_datasets_router() -> APIRouter: additional_properties={ "endpoint": f"GET /v1/datasets/{str(dataset_id)}/data", "dataset_id": str(dataset_id), + "cognee_version": cognee_version, }, ) @@ -387,6 +393,7 @@ def get_datasets_router() -> APIRouter: additional_properties={ "endpoint": "GET /v1/datasets/status", "datasets": [str(dataset_id) for dataset_id in datasets], + "cognee_version": cognee_version, }, ) @@ -433,6 +440,7 @@ def get_datasets_router() -> APIRouter: "endpoint": f"GET /v1/datasets/{str(dataset_id)}/data/{str(data_id)}/raw", "dataset_id": str(dataset_id), "data_id": str(data_id), + "cognee_version": cognee_version, }, ) diff --git a/cognee/api/v1/delete/routers/get_delete_router.py b/cognee/api/v1/delete/routers/get_delete_router.py index 9e6aa5799..3ff97681d 100644 --- a/cognee/api/v1/delete/routers/get_delete_router.py +++ b/cognee/api/v1/delete/routers/get_delete_router.py @@ -6,6 +6,7 @@ from cognee.shared.logging_utils import get_logger from cognee.modules.users.models import User from cognee.modules.users.methods import get_authenticated_user from cognee.shared.utils import send_telemetry +from cognee import __version__ as cognee_version logger = get_logger() @@ -39,6 +40,7 @@ def get_delete_router() -> APIRouter: "endpoint": "DELETE /v1/delete", "dataset_id": str(dataset_id), "data_id": str(data_id), + "cognee_version": cognee_version, }, ) diff --git a/cognee/api/v1/memify/routers/get_memify_router.py b/cognee/api/v1/memify/routers/get_memify_router.py index 1976d7414..cc07a3a0c 100644 --- a/cognee/api/v1/memify/routers/get_memify_router.py +++ b/cognee/api/v1/memify/routers/get_memify_router.py @@ -12,6 +12,7 @@ from cognee.modules.users.methods import get_authenticated_user from cognee.shared.utils import send_telemetry from cognee.modules.pipelines.models import PipelineRunErrored from cognee.shared.logging_utils import get_logger +from cognee import __version__ as cognee_version logger = get_logger() @@ -73,7 +74,7 @@ def get_memify_router() -> APIRouter: send_telemetry( "Memify API Endpoint Invoked", user.id, - additional_properties={"endpoint": "POST /v1/memify"}, + additional_properties={"endpoint": "POST /v1/memify", "cognee_version": cognee_version}, ) if not payload.dataset_id and not payload.dataset_name: diff --git a/cognee/api/v1/permissions/routers/get_permissions_router.py b/cognee/api/v1/permissions/routers/get_permissions_router.py index 637293268..565e95732 100644 --- a/cognee/api/v1/permissions/routers/get_permissions_router.py +++ b/cognee/api/v1/permissions/routers/get_permissions_router.py @@ -7,6 +7,7 @@ from fastapi.responses import JSONResponse from cognee.modules.users.models import User from cognee.modules.users.methods import get_authenticated_user from cognee.shared.utils import send_telemetry +from cognee import __version__ as cognee_version def get_permissions_router() -> APIRouter: @@ -48,6 +49,7 @@ def get_permissions_router() -> APIRouter: "endpoint": f"POST /v1/permissions/datasets/{str(principal_id)}", "dataset_ids": str(dataset_ids), "principal_id": str(principal_id), + "cognee_version": cognee_version, }, ) @@ -89,6 +91,7 @@ def get_permissions_router() -> APIRouter: additional_properties={ "endpoint": "POST /v1/permissions/roles", "role_name": role_name, + "cognee_version": cognee_version, }, ) @@ -133,6 +136,7 @@ def get_permissions_router() -> APIRouter: "endpoint": f"POST /v1/permissions/users/{str(user_id)}/roles", "user_id": str(user_id), "role_id": str(role_id), + "cognee_version": cognee_version, }, ) @@ -175,6 +179,7 @@ def get_permissions_router() -> APIRouter: "endpoint": f"POST /v1/permissions/users/{str(user_id)}/tenants", "user_id": str(user_id), "tenant_id": str(tenant_id), + "cognee_version": cognee_version, }, ) @@ -209,6 +214,7 @@ def get_permissions_router() -> APIRouter: additional_properties={ "endpoint": "POST /v1/permissions/tenants", "tenant_name": tenant_name, + "cognee_version": cognee_version, }, ) diff --git a/cognee/api/v1/search/routers/get_search_router.py b/cognee/api/v1/search/routers/get_search_router.py index 36d1c567e..171c03e49 100644 --- a/cognee/api/v1/search/routers/get_search_router.py +++ b/cognee/api/v1/search/routers/get_search_router.py @@ -13,6 +13,7 @@ from cognee.modules.users.models import User from cognee.modules.search.operations import get_history from cognee.modules.users.methods import get_authenticated_user from cognee.shared.utils import send_telemetry +from cognee import __version__ as cognee_version # Note: Datasets sent by name will only map to datasets owned by the request sender @@ -61,9 +62,7 @@ def get_search_router() -> APIRouter: send_telemetry( "Search API Endpoint Invoked", user.id, - additional_properties={ - "endpoint": "GET /v1/search", - }, + additional_properties={"endpoint": "GET /v1/search", "cognee_version": cognee_version}, ) try: @@ -118,6 +117,7 @@ def get_search_router() -> APIRouter: "top_k": payload.top_k, "only_context": payload.only_context, "use_combined_context": payload.use_combined_context, + "cognee_version": cognee_version, }, ) diff --git a/cognee/api/v1/sync/routers/get_sync_router.py b/cognee/api/v1/sync/routers/get_sync_router.py index d74ae4e7d..a7d466c10 100644 --- a/cognee/api/v1/sync/routers/get_sync_router.py +++ b/cognee/api/v1/sync/routers/get_sync_router.py @@ -12,6 +12,7 @@ from cognee.modules.sync.methods import get_running_sync_operations_for_user, ge from cognee.shared.utils import send_telemetry from cognee.shared.logging_utils import get_logger from cognee.api.v1.sync import SyncResponse +from cognee import __version__ as cognee_version from cognee.context_global_variables import set_database_global_context_variables logger = get_logger() @@ -99,6 +100,7 @@ def get_sync_router() -> APIRouter: user.id, additional_properties={ "endpoint": "POST /v1/sync", + "cognee_version": cognee_version, "dataset_ids": [str(id) for id in request.dataset_ids] if request.dataset_ids else "*", @@ -205,6 +207,7 @@ def get_sync_router() -> APIRouter: user.id, additional_properties={ "endpoint": "GET /v1/sync/status", + "cognee_version": cognee_version, }, ) diff --git a/cognee/api/v1/ui/ui.py b/cognee/api/v1/ui/ui.py index 51088c3e1..344acf87b 100644 --- a/cognee/api/v1/ui/ui.py +++ b/cognee/api/v1/ui/ui.py @@ -503,7 +503,7 @@ def start_ui( if start_mcp: logger.info("Starting Cognee MCP server with Docker...") try: - image = "cognee/cognee-mcp:feature-standalone-mcp" # TODO: change to "cognee/cognee-mcp:main" right before merging into main + image = "cognee/cognee-mcp:main" subprocess.run(["docker", "pull", image], check=True) import uuid @@ -538,9 +538,7 @@ def start_ui( env_file = os.path.join(cwd, ".env") docker_cmd.extend(["--env-file", env_file]) - docker_cmd.append( - image - ) # TODO: change to "cognee/cognee-mcp:main" right before merging into main + docker_cmd.append(image) mcp_process = subprocess.Popen( docker_cmd, diff --git a/cognee/api/v1/update/routers/get_update_router.py b/cognee/api/v1/update/routers/get_update_router.py index 4101e1e31..95e43b94f 100644 --- a/cognee/api/v1/update/routers/get_update_router.py +++ b/cognee/api/v1/update/routers/get_update_router.py @@ -9,6 +9,7 @@ from cognee.shared.logging_utils import get_logger from cognee.modules.users.models import User from cognee.modules.users.methods import get_authenticated_user from cognee.shared.utils import send_telemetry +from cognee import __version__ as cognee_version from cognee.modules.pipelines.models.PipelineRunInfo import ( PipelineRunErrored, ) @@ -64,6 +65,7 @@ def get_update_router() -> APIRouter: "dataset_id": str(dataset_id), "data_id": str(data_id), "node_set": str(node_set), + "cognee_version": cognee_version, }, ) diff --git a/cognee/api/v1/users/routers/get_visualize_router.py b/cognee/api/v1/users/routers/get_visualize_router.py index 95e79d3d5..5dc3868a6 100644 --- a/cognee/api/v1/users/routers/get_visualize_router.py +++ b/cognee/api/v1/users/routers/get_visualize_router.py @@ -8,6 +8,7 @@ from cognee.modules.users.models import User from cognee.context_global_variables import set_database_global_context_variables from cognee.shared.utils import send_telemetry +from cognee import __version__ as cognee_version logger = get_logger() @@ -46,6 +47,7 @@ def get_visualize_router() -> APIRouter: additional_properties={ "endpoint": "GET /v1/visualize", "dataset_id": str(dataset_id), + "cognee_version": cognee_version, }, ) diff --git a/cognee/base_config.py b/cognee/base_config.py index a2ad06249..a4c88e0da 100644 --- a/cognee/base_config.py +++ b/cognee/base_config.py @@ -1,4 +1,5 @@ import os +from pathlib import Path from typing import Optional from functools import lru_cache from cognee.root_dir import get_absolute_path, ensure_absolute_path @@ -11,6 +12,9 @@ class BaseConfig(BaseSettings): data_root_directory: str = get_absolute_path(".data_storage") system_root_directory: str = get_absolute_path(".cognee_system") cache_root_directory: str = get_absolute_path(".cognee_cache") + logs_root_directory: str = os.getenv( + "COGNEE_LOGS_DIR", str(os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs")) + ) monitoring_tool: object = Observer.NONE @pydantic.model_validator(mode="after") @@ -30,6 +34,8 @@ class BaseConfig(BaseSettings): # Require absolute paths for root directories self.data_root_directory = ensure_absolute_path(self.data_root_directory) self.system_root_directory = ensure_absolute_path(self.system_root_directory) + self.logs_root_directory = ensure_absolute_path(self.logs_root_directory) + # Set monitoring tool based on available keys if self.langfuse_public_key and self.langfuse_secret_key: self.monitoring_tool = Observer.LANGFUSE @@ -49,6 +55,7 @@ class BaseConfig(BaseSettings): "system_root_directory": self.system_root_directory, "monitoring_tool": self.monitoring_tool, "cache_root_directory": self.cache_root_directory, + "logs_root_directory": self.logs_root_directory, } diff --git a/cognee/infrastructure/databases/graph/kuzu/adapter.py b/cognee/infrastructure/databases/graph/kuzu/adapter.py index 2d3866888..8dd160665 100644 --- a/cognee/infrastructure/databases/graph/kuzu/adapter.py +++ b/cognee/infrastructure/databases/graph/kuzu/adapter.py @@ -1366,9 +1366,15 @@ class KuzuAdapter(GraphDBInterface): params[param_name] = values where_clause = " AND ".join(where_clauses) - nodes_query = ( - f"MATCH (n:Node) WHERE {where_clause} RETURN n.id, {{properties: n.properties}}" - ) + nodes_query = f""" + MATCH (n:Node) + WHERE {where_clause} + RETURN n.id, {{ + name: n.name, + type: n.type, + properties: n.properties + }} + """ edges_query = f""" MATCH (n1:Node)-[r:EDGE]->(n2:Node) WHERE {where_clause.replace("n.", "n1.")} AND {where_clause.replace("n.", "n2.")} diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index 639bbb9f6..d1cf855d7 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -47,7 +47,7 @@ def create_vector_engine( embedding_engine=embedding_engine, ) - if vector_db_provider == "pgvector": + if vector_db_provider.lower() == "pgvector": from cognee.infrastructure.databases.relational import get_relational_config # Get configuration for postgres database @@ -78,7 +78,7 @@ def create_vector_engine( embedding_engine, ) - elif vector_db_provider == "chromadb": + elif vector_db_provider.lower() == "chromadb": try: import chromadb except ImportError: @@ -94,7 +94,7 @@ def create_vector_engine( embedding_engine=embedding_engine, ) - elif vector_db_provider == "neptune_analytics": + elif vector_db_provider.lower() == "neptune_analytics": try: from langchain_aws import NeptuneAnalyticsGraph except ImportError: @@ -122,7 +122,7 @@ def create_vector_engine( embedding_engine=embedding_engine, ) - else: + elif vector_db_provider.lower() == "lancedb": from .lancedb.LanceDBAdapter import LanceDBAdapter return LanceDBAdapter( @@ -130,3 +130,9 @@ def create_vector_engine( api_key=vector_db_key, embedding_engine=embedding_engine, ) + + else: + raise EnvironmentError( + f"Unsupported graph database provider: {vector_db_provider}. " + f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['LanceDB', 'PGVector', 'neptune_analytics', 'ChromaDB'])}" + ) diff --git a/cognee/infrastructure/files/utils/guess_file_type.py b/cognee/infrastructure/files/utils/guess_file_type.py index dcdd68cad..4e3ff6824 100644 --- a/cognee/infrastructure/files/utils/guess_file_type.py +++ b/cognee/infrastructure/files/utils/guess_file_type.py @@ -22,89 +22,6 @@ class FileTypeException(Exception): self.message = message -class TxtFileType(filetype.Type): - """ - Represents a text file type with specific MIME and extension properties. - - Public methods: - - match: Determines whether a given buffer matches the text file type. - """ - - MIME = "text/plain" - EXTENSION = "txt" - - def __init__(self): - super(TxtFileType, self).__init__(mime=TxtFileType.MIME, extension=TxtFileType.EXTENSION) - - def match(self, buf): - """ - Determine if the given buffer contains text content. - - Parameters: - ----------- - - - buf: The buffer to check for text content. - - Returns: - -------- - - Returns True if the buffer is identified as text content, otherwise False. - """ - return is_text_content(buf) - - -txt_file_type = TxtFileType() - -filetype.add_type(txt_file_type) - - -class CustomPdfMatcher(filetype.Type): - """ - Match PDF file types based on MIME type and extension. - - Public methods: - - match - - Instance variables: - - MIME: The MIME type of the PDF. - - EXTENSION: The file extension of the PDF. - """ - - MIME = "application/pdf" - EXTENSION = "pdf" - - def __init__(self): - super(CustomPdfMatcher, self).__init__( - mime=CustomPdfMatcher.MIME, extension=CustomPdfMatcher.EXTENSION - ) - - def match(self, buf): - """ - Determine if the provided buffer is a PDF file. - - This method checks for the presence of the PDF signature in the buffer. - - Raises: - - TypeError: If the buffer is not of bytes type. - - Parameters: - ----------- - - - buf: The buffer containing the data to be checked. - - Returns: - -------- - - Returns True if the buffer contains a PDF signature, otherwise returns False. - """ - return b"PDF-" in buf - - -custom_pdf_matcher = CustomPdfMatcher() - -filetype.add_type(custom_pdf_matcher) - - def guess_file_type(file: BinaryIO) -> filetype.Type: """ Guess the file type from the given binary file stream. diff --git a/cognee/infrastructure/llm/prompts/extract_query_time.txt b/cognee/infrastructure/llm/prompts/extract_query_time.txt index 763d0e1c4..ce78c3471 100644 --- a/cognee/infrastructure/llm/prompts/extract_query_time.txt +++ b/cognee/infrastructure/llm/prompts/extract_query_time.txt @@ -1,15 +1,13 @@ -For the purposes of identifying timestamps in a query, you are tasked with extracting relevant timestamps from the query. -## Timestamp requirements -- If the query contains interval extrack both starts_at and ends_at properties -- If the query contains an instantaneous timestamp, starts_at and ends_at should be the same -- If the query its open-ended (before 2009 or after 2009), the corresponding non defined end of the time should be none - -For example: "before 2009" -- starts_at: None, ends_at: 2009 or "after 2009" -- starts_at: 2009, ends_at: None -- Put always the data that comes first in time as starts_at and the timestamps that comes second in time as ends_at -- If starts_at or ends_at cannot be extracted both of them has to be None -## Output Format -Your reply should be a JSON: list of dictionaries with the following structure: -```python -class QueryInterval(BaseModel): - starts_at: Optional[Timestamp] = None - ends_at: Optional[Timestamp] = None -``` \ No newline at end of file +You are tasked with identifying relevant time periods where the answer to a given query should be searched. +Current date is: `{{ time_now }}`. Determine relevant period(s) and return structured intervals. + +Extraction rules: + +1. Query without specific timestamp: use the time period with starts_at set to None and ends_at set to now. +2. Explicit time intervals: If the query specifies a range (e.g., from 2010 to 2020, between January and March 2023), extract both start and end dates. Always assign the earlier date to starts_at and the later date to ends_at. +3. Single timestamp: If the query refers to one specific moment (e.g., in 2015, on March 5, 2022), set starts_at and ends_at to that same timestamp. +4. Open-ended time references: For phrases such as "before X" or "after X", represent the unspecified side as None. For example: before 2009 → starts_at: None, ends_at: 2009; after 2009 → starts_at: 2009, ends_at: None. +5. Current-time references ("now", "current", "today"): If the query explicitly refers to the present, set both starts_at and ends_at to now (the ingestion timestamp). +6. "Who is" and "Who was" questions: These imply a general identity or biographical inquiry without a specific temporal scope. Set both starts_at and ends_at to None. +7. Ordering rule: Always ensure the earlier date is assigned to starts_at and the later date to ends_at. +8. No temporal information: If no valid or inferable time reference is found, set both starts_at and ends_at to None. \ No newline at end of file diff --git a/cognee/infrastructure/llm/prompts/feedback_reaction_prompt.txt b/cognee/infrastructure/llm/prompts/feedback_reaction_prompt.txt new file mode 100644 index 000000000..c77ed8fca --- /dev/null +++ b/cognee/infrastructure/llm/prompts/feedback_reaction_prompt.txt @@ -0,0 +1,14 @@ +A question was previously answered, but the answer received negative feedback. +Please reconsider and improve the response. + +Question: {question} +Context originally used: {context} +Previous answer: {wrong_answer} +Feedback on that answer: {negative_feedback} + +Task: Provide a better response. The new answer should be short and direct. +Then explain briefly why this answer is better. + +Format your reply as: +Answer: +Explanation: diff --git a/cognee/infrastructure/llm/prompts/feedback_report_prompt.txt b/cognee/infrastructure/llm/prompts/feedback_report_prompt.txt new file mode 100644 index 000000000..2d4194f4d --- /dev/null +++ b/cognee/infrastructure/llm/prompts/feedback_report_prompt.txt @@ -0,0 +1,13 @@ +Write a concise, stand-alone paragraph that explains the correct answer to the question below. +The paragraph should read naturally on its own, providing all necessary context and reasoning +so the answer is clear and well-supported. + +Question: {question} +Correct answer: {improved_answer} +Supporting context: {new_context} + +Your paragraph should: +- First sentence clearly states the correct answer as a full sentence +- Remainder flows from first sentence and provides explanation based on context +- Use simple, direct language that is easy to follow +- Use shorter sentences, no long-winded explanations diff --git a/cognee/infrastructure/llm/prompts/feedback_user_context_prompt.txt b/cognee/infrastructure/llm/prompts/feedback_user_context_prompt.txt new file mode 100644 index 000000000..3d9a25f96 --- /dev/null +++ b/cognee/infrastructure/llm/prompts/feedback_user_context_prompt.txt @@ -0,0 +1,5 @@ +Question: {question} +Context: {context} + +Provide a one paragraph human readable summary of this interaction context, +listing all the relevant facts and information in a simple and direct way. diff --git a/cognee/infrastructure/loaders/LoaderEngine.py b/cognee/infrastructure/loaders/LoaderEngine.py index 725f37b14..f9511e7c5 100644 --- a/cognee/infrastructure/loaders/LoaderEngine.py +++ b/cognee/infrastructure/loaders/LoaderEngine.py @@ -1,6 +1,7 @@ import filetype from typing import Dict, List, Optional, Any from .LoaderInterface import LoaderInterface +from cognee.infrastructure.files.utils.guess_file_type import guess_file_type from cognee.shared.logging_utils import get_logger logger = get_logger(__name__) @@ -80,7 +81,7 @@ class LoaderEngine: """ from pathlib import Path - file_info = filetype.guess(file_path) + file_info = guess_file_type(file_path) path_extension = Path(file_path).suffix.lstrip(".") diff --git a/cognee/modules/ontology/get_default_ontology_resolver.py b/cognee/modules/ontology/get_default_ontology_resolver.py index f9aebe59a..7d87c10a6 100644 --- a/cognee/modules/ontology/get_default_ontology_resolver.py +++ b/cognee/modules/ontology/get_default_ontology_resolver.py @@ -21,7 +21,8 @@ def get_ontology_resolver_from_env( Supported value: "rdflib". matching_strategy (str): The matching strategy to apply. Supported value: "fuzzy". - ontology_file_path (str): Path to the ontology file required for the resolver. + ontology_file_path (str): Path to the ontology file(s) required for the resolver. + Can be a single path or comma-separated paths for multiple files. Returns: BaseOntologyResolver: An instance of the requested ontology resolver. @@ -31,8 +32,13 @@ def get_ontology_resolver_from_env( or if required parameters are missing. """ if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path: + if "," in ontology_file_path: + file_paths = [path.strip() for path in ontology_file_path.split(",")] + else: + file_paths = ontology_file_path + return RDFLibOntologyResolver( - matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path + matching_strategy=FuzzyMatchingStrategy(), ontology_file=file_paths ) else: raise EnvironmentError( diff --git a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py index 2a7a03751..45e32936a 100644 --- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py @@ -2,7 +2,7 @@ import os import difflib from cognee.shared.logging_utils import get_logger from collections import deque -from typing import List, Tuple, Dict, Optional, Any +from typing import List, Tuple, Dict, Optional, Any, Union from rdflib import Graph, URIRef, RDF, RDFS, OWL from cognee.modules.ontology.exceptions import ( @@ -26,22 +26,50 @@ class RDFLibOntologyResolver(BaseOntologyResolver): def __init__( self, - ontology_file: Optional[str] = None, + ontology_file: Optional[Union[str, List[str]]] = None, matching_strategy: Optional[MatchingStrategy] = None, ) -> None: super().__init__(matching_strategy) self.ontology_file = ontology_file try: - if ontology_file and os.path.exists(ontology_file): + files_to_load = [] + if ontology_file is not None: + if isinstance(ontology_file, str): + files_to_load = [ontology_file] + elif isinstance(ontology_file, list): + files_to_load = ontology_file + else: + raise ValueError( + f"ontology_file must be a string, list of strings, or None. Got: {type(ontology_file)}" + ) + + if files_to_load: self.graph = Graph() - self.graph.parse(ontology_file) - logger.info("Ontology loaded successfully from file: %s", ontology_file) + loaded_files = [] + for file_path in files_to_load: + if os.path.exists(file_path): + self.graph.parse(file_path) + loaded_files.append(file_path) + logger.info("Ontology loaded successfully from file: %s", file_path) + else: + logger.warning( + "Ontology file '%s' not found. Skipping this file.", + file_path, + ) + + if not loaded_files: + logger.info( + "No valid ontology files found. No owl ontology will be attached to the graph." + ) + self.graph = None + else: + logger.info("Total ontology files loaded: %d", len(loaded_files)) else: logger.info( - "Ontology file '%s' not found. No owl ontology will be attached to the graph.", - ontology_file, + "No ontology file provided. No owl ontology will be attached to the graph." ) self.graph = None + self.build_lookup() except Exception as e: logger.error("Failed to load ontology", exc_info=e) diff --git a/cognee/modules/pipelines/operations/run_tasks_base.py b/cognee/modules/pipelines/operations/run_tasks_base.py index e5f577848..79d37a451 100644 --- a/cognee/modules/pipelines/operations/run_tasks_base.py +++ b/cognee/modules/pipelines/operations/run_tasks_base.py @@ -2,6 +2,7 @@ import inspect from cognee.shared.logging_utils import get_logger from cognee.modules.users.models import User from cognee.shared.utils import send_telemetry +from cognee import __version__ as cognee_version from ..tasks.task import Task @@ -25,6 +26,8 @@ async def handle_task( user_id=user.id, additional_properties={ "task_name": running_task.executable.__name__, + "cognee_version": cognee_version, + "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant", }, ) @@ -46,6 +49,8 @@ async def handle_task( user_id=user.id, additional_properties={ "task_name": running_task.executable.__name__, + "cognee_version": cognee_version, + "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant", }, ) except Exception as error: @@ -58,6 +63,8 @@ async def handle_task( user_id=user.id, additional_properties={ "task_name": running_task.executable.__name__, + "cognee_version": cognee_version, + "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant", }, ) raise error diff --git a/cognee/modules/pipelines/operations/run_tasks_with_telemetry.py b/cognee/modules/pipelines/operations/run_tasks_with_telemetry.py index a2af18be6..ae968c7a5 100644 --- a/cognee/modules/pipelines/operations/run_tasks_with_telemetry.py +++ b/cognee/modules/pipelines/operations/run_tasks_with_telemetry.py @@ -4,6 +4,7 @@ from cognee.modules.settings import get_current_settings from cognee.modules.users.models import User from cognee.shared.logging_utils import get_logger from cognee.shared.utils import send_telemetry +from cognee import __version__ as cognee_version from .run_tasks_base import run_tasks_base from ..tasks.task import Task @@ -26,6 +27,8 @@ async def run_tasks_with_telemetry( user.id, additional_properties={ "pipeline_name": str(pipeline_name), + "cognee_version": cognee_version, + "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant", } | config, ) @@ -39,7 +42,10 @@ async def run_tasks_with_telemetry( user.id, additional_properties={ "pipeline_name": str(pipeline_name), - }, + "cognee_version": cognee_version, + "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant", + } + | config, ) except Exception as error: logger.error( @@ -53,6 +59,8 @@ async def run_tasks_with_telemetry( user.id, additional_properties={ "pipeline_name": str(pipeline_name), + "cognee_version": cognee_version, + "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant", } | config, ) diff --git a/cognee/modules/retrieval/graph_completion_cot_retriever.py b/cognee/modules/retrieval/graph_completion_cot_retriever.py index 3f6ca81be..299db6855 100644 --- a/cognee/modules/retrieval/graph_completion_cot_retriever.py +++ b/cognee/modules/retrieval/graph_completion_cot_retriever.py @@ -1,10 +1,15 @@ import asyncio +import json from typing import Optional, List, Type, Any +from pydantic import BaseModel from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge from cognee.shared.logging_utils import get_logger from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever -from cognee.modules.retrieval.utils.completion import generate_completion, summarize_text +from cognee.modules.retrieval.utils.completion import ( + generate_structured_completion, + summarize_text, +) from cognee.modules.retrieval.utils.session_cache import ( save_conversation_history, get_conversation_history, @@ -17,6 +22,20 @@ from cognee.infrastructure.databases.cache.config import CacheConfig logger = get_logger() +def _as_answer_text(completion: Any) -> str: + """Convert completion to human-readable text for validation and follow-up prompts.""" + if isinstance(completion, str): + return completion + if isinstance(completion, BaseModel): + # Add notice that this is a structured response + json_str = completion.model_dump_json(indent=2) + return f"[Structured Response]\n{json_str}" + try: + return json.dumps(completion, indent=2) + except TypeError: + return str(completion) + + class GraphCompletionCotRetriever(GraphCompletionRetriever): """ Handles graph completion by generating responses based on a series of interactions with @@ -25,6 +44,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): questions based on reasoning. The public methods are: - get_completion + - get_structured_completion Instance variables include: - validation_system_prompt_path @@ -61,6 +81,155 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): self.followup_system_prompt_path = followup_system_prompt_path self.followup_user_prompt_path = followup_user_prompt_path + async def _run_cot_completion( + self, + query: str, + context: Optional[List[Edge]] = None, + conversation_history: str = "", + max_iter: int = 4, + response_model: Type = str, + ) -> tuple[Any, str, List[Edge]]: + """ + Run chain-of-thought completion with optional structured output. + + Parameters: + ----------- + - query: User query + - context: Optional pre-fetched context edges + - conversation_history: Optional conversation history string + - max_iter: Maximum CoT iterations + - response_model: Type for structured output (str for plain text) + + Returns: + -------- + - completion_result: The generated completion (string or structured model) + - context_text: The resolved context text + - triplets: The list of triplets used + """ + followup_question = "" + triplets = [] + completion = "" + + for round_idx in range(max_iter + 1): + if round_idx == 0: + if context is None: + triplets = await self.get_context(query) + context_text = await self.resolve_edges_to_text(triplets) + else: + context_text = await self.resolve_edges_to_text(context) + else: + triplets += await self.get_context(followup_question) + context_text = await self.resolve_edges_to_text(list(set(triplets))) + + completion = await generate_structured_completion( + query=query, + context=context_text, + user_prompt_path=self.user_prompt_path, + system_prompt_path=self.system_prompt_path, + system_prompt=self.system_prompt, + conversation_history=conversation_history if conversation_history else None, + response_model=response_model, + ) + + logger.info(f"Chain-of-thought: round {round_idx} - answer: {completion}") + + if round_idx < max_iter: + answer_text = _as_answer_text(completion) + valid_args = {"query": query, "answer": answer_text, "context": context_text} + valid_user_prompt = render_prompt( + filename=self.validation_user_prompt_path, context=valid_args + ) + valid_system_prompt = read_query_prompt( + prompt_file_name=self.validation_system_prompt_path + ) + + reasoning = await LLMGateway.acreate_structured_output( + text_input=valid_user_prompt, + system_prompt=valid_system_prompt, + response_model=str, + ) + followup_args = {"query": query, "answer": answer_text, "reasoning": reasoning} + followup_prompt = render_prompt( + filename=self.followup_user_prompt_path, context=followup_args + ) + followup_system = read_query_prompt( + prompt_file_name=self.followup_system_prompt_path + ) + + followup_question = await LLMGateway.acreate_structured_output( + text_input=followup_prompt, system_prompt=followup_system, response_model=str + ) + logger.info( + f"Chain-of-thought: round {round_idx} - follow-up question: {followup_question}" + ) + + return completion, context_text, triplets + + async def get_structured_completion( + self, + query: str, + context: Optional[List[Edge]] = None, + session_id: Optional[str] = None, + max_iter: int = 4, + response_model: Type = str, + ) -> Any: + """ + Generate structured completion responses based on a user query and contextual information. + + This method applies the same chain-of-thought logic as get_completion but returns + structured output using the provided response model. + + Parameters: + ----------- + - query (str): The user's query to be processed and answered. + - context (Optional[List[Edge]]): Optional context that may assist in answering the query. + If not provided, it will be fetched based on the query. (default None) + - session_id (Optional[str]): Optional session identifier for caching. If None, + defaults to 'default_session'. (default None) + - max_iter: The maximum number of iterations to refine the answer and generate + follow-up questions. (default 4) + - response_model (Type): The Pydantic model type for structured output. (default str) + + Returns: + -------- + - Any: The generated structured completion based on the response model. + """ + # Check if session saving is enabled + cache_config = CacheConfig() + user = session_user.get() + user_id = getattr(user, "id", None) + session_save = user_id and cache_config.caching + + # Load conversation history if enabled + conversation_history = "" + if session_save: + conversation_history = await get_conversation_history(session_id=session_id) + + completion, context_text, triplets = await self._run_cot_completion( + query=query, + context=context, + conversation_history=conversation_history, + max_iter=max_iter, + response_model=response_model, + ) + + if self.save_interaction and context and triplets and completion: + await self.save_qa( + question=query, answer=str(completion), context=context_text, triplets=triplets + ) + + # Save to session cache if enabled + if session_save: + context_summary = await summarize_text(context_text) + await save_conversation_history( + query=query, + context_summary=context_summary, + answer=str(completion), + session_id=session_id, + ) + + return completion + async def get_completion( self, query: str, @@ -92,82 +261,12 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever): - List[str]: A list containing the generated answer to the user's query. """ - followup_question = "" - triplets = [] - completion = "" - - # Retrieve conversation history if session saving is enabled - cache_config = CacheConfig() - user = session_user.get() - user_id = getattr(user, "id", None) - session_save = user_id and cache_config.caching - - conversation_history = "" - if session_save: - conversation_history = await get_conversation_history(session_id=session_id) - - for round_idx in range(max_iter + 1): - if round_idx == 0: - if context is None: - triplets = await self.get_context(query) - context_text = await self.resolve_edges_to_text(triplets) - else: - context_text = await self.resolve_edges_to_text(context) - else: - triplets += await self.get_context(followup_question) - context_text = await self.resolve_edges_to_text(list(set(triplets))) - - completion = await generate_completion( - query=query, - context=context_text, - user_prompt_path=self.user_prompt_path, - system_prompt_path=self.system_prompt_path, - system_prompt=self.system_prompt, - conversation_history=conversation_history if session_save else None, - ) - logger.info(f"Chain-of-thought: round {round_idx} - answer: {completion}") - if round_idx < max_iter: - valid_args = {"query": query, "answer": completion, "context": context_text} - valid_user_prompt = render_prompt( - filename=self.validation_user_prompt_path, context=valid_args - ) - valid_system_prompt = read_query_prompt( - prompt_file_name=self.validation_system_prompt_path - ) - - reasoning = await LLMGateway.acreate_structured_output( - text_input=valid_user_prompt, - system_prompt=valid_system_prompt, - response_model=str, - ) - followup_args = {"query": query, "answer": completion, "reasoning": reasoning} - followup_prompt = render_prompt( - filename=self.followup_user_prompt_path, context=followup_args - ) - followup_system = read_query_prompt( - prompt_file_name=self.followup_system_prompt_path - ) - - followup_question = await LLMGateway.acreate_structured_output( - text_input=followup_prompt, system_prompt=followup_system, response_model=str - ) - logger.info( - f"Chain-of-thought: round {round_idx} - follow-up question: {followup_question}" - ) - - if self.save_interaction and context and triplets and completion: - await self.save_qa( - question=query, answer=completion, context=context_text, triplets=triplets - ) - - # Save to session cache - if session_save: - context_summary = await summarize_text(context_text) - await save_conversation_history( - query=query, - context_summary=context_summary, - answer=completion, - session_id=session_id, - ) + completion = await self.get_structured_completion( + query=query, + context=context, + session_id=session_id, + max_iter=max_iter, + response_model=str, + ) return [completion] diff --git a/cognee/modules/retrieval/temporal_retriever.py b/cognee/modules/retrieval/temporal_retriever.py index 8ef5eed69..ec68d37bb 100644 --- a/cognee/modules/retrieval/temporal_retriever.py +++ b/cognee/modules/retrieval/temporal_retriever.py @@ -1,7 +1,7 @@ import os import asyncio from typing import Any, Optional, List, Type - +from datetime import datetime from operator import itemgetter from cognee.infrastructure.databases.vector import get_vector_engine @@ -79,7 +79,11 @@ class TemporalRetriever(GraphCompletionRetriever): else: base_directory = None - system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory) + time_now = datetime.now().strftime("%d-%m-%Y") + + system_prompt = render_prompt( + prompt_path, {"time_now": time_now}, base_directory=base_directory + ) interval = await LLMGateway.acreate_structured_output(query, system_prompt, QueryInterval) @@ -108,8 +112,6 @@ class TemporalRetriever(GraphCompletionRetriever): graph_engine = await get_graph_engine() - triplets = [] - if time_from and time_to: ids = await graph_engine.collect_time_ids(time_from=time_from, time_to=time_to) elif time_from: diff --git a/cognee/modules/retrieval/utils/completion.py b/cognee/modules/retrieval/utils/completion.py index 6b6b6190e..db7a10252 100644 --- a/cognee/modules/retrieval/utils/completion.py +++ b/cognee/modules/retrieval/utils/completion.py @@ -1,17 +1,18 @@ -from typing import Optional +from typing import Optional, Type, Any from cognee.infrastructure.llm.LLMGateway import LLMGateway from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt -async def generate_completion( +async def generate_structured_completion( query: str, context: str, user_prompt_path: str, system_prompt_path: str, system_prompt: Optional[str] = None, conversation_history: Optional[str] = None, -) -> str: - """Generates a completion using LLM with given context and prompts.""" + response_model: Type = str, +) -> Any: + """Generates a structured completion using LLM with given context and prompts.""" args = {"question": query, "context": context} user_prompt = render_prompt(user_prompt_path, args) system_prompt = system_prompt if system_prompt else read_query_prompt(system_prompt_path) @@ -23,6 +24,26 @@ async def generate_completion( return await LLMGateway.acreate_structured_output( text_input=user_prompt, system_prompt=system_prompt, + response_model=response_model, + ) + + +async def generate_completion( + query: str, + context: str, + user_prompt_path: str, + system_prompt_path: str, + system_prompt: Optional[str] = None, + conversation_history: Optional[str] = None, +) -> str: + """Generates a completion using LLM with given context and prompts.""" + return await generate_structured_completion( + query=query, + context=context, + user_prompt_path=user_prompt_path, + system_prompt_path=system_prompt_path, + system_prompt=system_prompt, + conversation_history=conversation_history, response_model=str, ) diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py index 29f50119c..aab004924 100644 --- a/cognee/modules/search/methods/search.py +++ b/cognee/modules/search/methods/search.py @@ -24,7 +24,7 @@ from cognee.modules.data.models import Dataset from cognee.modules.data.methods.get_authorized_existing_datasets import ( get_authorized_existing_datasets, ) - +from cognee import __version__ as cognee_version from .get_search_type_tools import get_search_type_tools from .no_access_control_search import no_access_control_search from ..utils.prepare_search_result import prepare_search_result @@ -64,7 +64,14 @@ async def search( Searching by dataset is only available in ENABLE_BACKEND_ACCESS_CONTROL mode """ query = await log_query(query_text, query_type.value, user.id) - send_telemetry("cognee.search EXECUTION STARTED", user.id) + send_telemetry( + "cognee.search EXECUTION STARTED", + user.id, + additional_properties={ + "cognee_version": cognee_version, + "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant", + }, + ) # Use search function filtered by permissions if access control is enabled if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true": @@ -101,7 +108,14 @@ async def search( ) ] - send_telemetry("cognee.search EXECUTION COMPLETED", user.id) + send_telemetry( + "cognee.search EXECUTION COMPLETED", + user.id, + additional_properties={ + "cognee_version": cognee_version, + "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant", + }, + ) await log_result( query.id, diff --git a/cognee/modules/visualization/cognee_network_visualization.py b/cognee/modules/visualization/cognee_network_visualization.py index c735e70f1..3bf5ea8e8 100644 --- a/cognee/modules/visualization/cognee_network_visualization.py +++ b/cognee/modules/visualization/cognee_network_visualization.py @@ -16,17 +16,17 @@ async def cognee_network_visualization(graph_data, destination_file_path: str = nodes_list = [] color_map = { - "Entity": "#f47710", - "EntityType": "#6510f4", - "DocumentChunk": "#801212", - "TextSummary": "#1077f4", - "TableRow": "#f47710", - "TableType": "#6510f4", - "ColumnValue": "#13613a", - "SchemaTable": "#f47710", - "DatabaseSchema": "#6510f4", - "SchemaRelationship": "#13613a", - "default": "#D3D3D3", + "Entity": "#5C10F4", + "EntityType": "#A550FF", + "DocumentChunk": "#0DFF00", + "TextSummary": "#5C10F4", + "TableRow": "#A550FF", + "TableType": "#5C10F4", + "ColumnValue": "#757470", + "SchemaTable": "#A550FF", + "DatabaseSchema": "#5C10F4", + "SchemaRelationship": "#323332", + "default": "#D8D8D8", } for node_id, node_info in nodes_data: @@ -98,16 +98,19 @@ async def cognee_network_visualization(graph_data, destination_file_path: str = +
+
Hover a node or edge to inspect details
@@ -305,8 +678,12 @@ async def cognee_network_visualization(graph_data, destination_file_path: str = """ - html_content = html_template.replace("{nodes}", json.dumps(nodes_list)) - html_content = html_content.replace("{links}", json.dumps(links_list)) + # Safely embed JSON inside