Merge branch 'main' of github.com:chinu0609/cognee

2025-11-06 23:05:12 +05:30 · 2025-11-06 23:05:12 +05:30 · ce4a5c8311
commit ce4a5c8311
parent b327756e5f bd71540d75
39 changed files with 5507 additions and 4349 deletions
--- a/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py
+++ b/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py
@ -0,0 +1,46 @@
 """add_last_accessed_to_data
 Revision ID: e1ec1dcb50b6
 Revises: 211ab850ef3d
 Create Date: 2025-11-04 21:45:52.642322
 """
 from typing import Sequence, Union
 from alembic import op
 import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision: str = 'e1ec1dcb50b6'
 down_revision: Union[str, None] = '211ab850ef3d'
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def _get_column(inspector, table, name, schema=None):  
    for col in inspector.get_columns(table, schema=schema):  
        if col["name"] == name:  
            return col  
    return None  
 def upgrade() -> None:  
    conn = op.get_bind()  
    insp = sa.inspect(conn)  
    last_accessed_column = _get_column(insp, "data", "last_accessed")   
    if not last_accessed_column:  
        op.add_column('data',   
            sa.Column('last_accessed', sa.DateTime(timezone=True), nullable=True)  
        )  
        # Optionally initialize with created_at values for existing records  
        op.execute("UPDATE data SET last_accessed = created_at")  
 def downgrade() -> None:  
    conn = op.get_bind()  
    insp = sa.inspect(conn)  
    last_accessed_column = _get_column(insp, "data", "last_accessed")  
    if last_accessed_column:  
        op.drop_column('data', 'last_accessed')
--- a/cognee-mcp/README.md
+++ b/cognee-mcp/README.md
@ -110,6 +110,47 @@ If you'd rather run cognee-mcp in a container, you have two options:
      # For stdio transport (default)
      docker run -e TRANSPORT_MODE=stdio --env-file ./.env --rm -it cognee/cognee-mcp:main
      ```
      **Installing optional dependencies at runtime:**
      You can install optional dependencies when running the container by setting the `EXTRAS` environment variable:
      ```bash
      # Install a single optional dependency group at runtime
      docker run \
        -e TRANSPORT_MODE=http \
        -e EXTRAS=aws \
        --env-file ./.env \
        -p 8000:8000 \
        --rm -it cognee/cognee-mcp:main
      # Install multiple optional dependency groups at runtime (comma-separated)
      docker run \
        -e TRANSPORT_MODE=sse \
        -e EXTRAS=aws,postgres,neo4j \
        --env-file ./.env \
        -p 8000:8000 \
        --rm -it cognee/cognee-mcp:main
      ```
      **Available optional dependency groups:**
      - `aws` - S3 storage support
      - `postgres` / `postgres-binary` - PostgreSQL database support
      - `neo4j` - Neo4j graph database support
      - `neptune` - AWS Neptune support
      - `chromadb` - ChromaDB vector store support
      - `scraping` - Web scraping capabilities
      - `distributed` - Modal distributed execution
      - `langchain` - LangChain integration
      - `llama-index` - LlamaIndex integration
      - `anthropic` - Anthropic models
      - `groq` - Groq models
      - `mistral` - Mistral models
      - `ollama` / `huggingface` - Local model support
      - `docs` - Document processing
      - `codegraph` - Code analysis
      - `monitoring` - Sentry & Langfuse monitoring
      - `redis` - Redis support
      - And more (see [pyproject.toml](https://github.com/topoteretes/cognee/blob/main/pyproject.toml) for full list)
 2. **Pull from Docker Hub** (no build required):
   ```bash
   # With HTTP transport (recommended for web deployments)
@ -119,6 +160,17 @@ If you'd rather run cognee-mcp in a container, you have two options:
   # With stdio transport (default)
   docker run -e TRANSPORT_MODE=stdio --env-file ./.env --rm -it cognee/cognee-mcp:main
   ```
   **With runtime installation of optional dependencies:**
   ```bash
   # Install optional dependencies from Docker Hub image
   docker run \
     -e TRANSPORT_MODE=http \
     -e EXTRAS=aws,postgres \
     --env-file ./.env \
     -p 8000:8000 \
     --rm -it cognee/cognee-mcp:main
   ```
 ### **Important: Docker vs Direct Usage**
 **Docker uses environment variables**, not command line arguments:
--- a/cognee-mcp/entrypoint.sh
+++ b/cognee-mcp/entrypoint.sh
@ -4,6 +4,42 @@ set -e  # Exit on error
 echo "Debug mode: $DEBUG"
 echo "Environment: $ENVIRONMENT"
 # Install optional dependencies if EXTRAS is set
 if [ -n "$EXTRAS" ]; then
    echo "Installing optional dependencies: $EXTRAS"
    # Get the cognee version that's currently installed
    COGNEE_VERSION=$(uv pip show cognee | grep "Version:" | awk '{print $2}')
    echo "Current cognee version: $COGNEE_VERSION"
    # Build the extras list for cognee
    IFS=',' read -ra EXTRA_ARRAY <<< "$EXTRAS"
    # Combine base extras from pyproject.toml with requested extras
    ALL_EXTRAS=""
    for extra in "${EXTRA_ARRAY[@]}"; do
        # Trim whitespace
        extra=$(echo "$extra" | xargs)
        # Add to extras list if not already present
        if [[ ! "$ALL_EXTRAS" =~ (^|,)"$extra"(,|$) ]]; then
            if [ -z "$ALL_EXTRAS" ]; then
                ALL_EXTRAS="$extra"
            else
                ALL_EXTRAS="$ALL_EXTRAS,$extra"
            fi
        fi
    done
    echo "Installing cognee with extras: $ALL_EXTRAS"
    echo "Running: uv pip install 'cognee[$ALL_EXTRAS]==$COGNEE_VERSION'"
    uv pip install "cognee[$ALL_EXTRAS]==$COGNEE_VERSION"
    # Verify installation
    echo ""
    echo "✓ Optional dependencies installation completed"
 else
    echo "No optional dependencies specified"
 fi
 # Set default transport mode if not specified
 TRANSPORT_MODE=${TRANSPORT_MODE:-"stdio"}
 echo "Transport mode: $TRANSPORT_MODE"
--- a/cognee/base_config.py
+++ b/cognee/base_config.py
@ -1,4 +1,5 @@
 import os
 from pathlib import Path
 from typing import Optional
 from functools import lru_cache
 from cognee.root_dir import get_absolute_path, ensure_absolute_path
@ -11,6 +12,9 @@ class BaseConfig(BaseSettings):
    data_root_directory: str = get_absolute_path(".data_storage")
    system_root_directory: str = get_absolute_path(".cognee_system")
    cache_root_directory: str = get_absolute_path(".cognee_cache")
    logs_root_directory: str = os.getenv(
        "COGNEE_LOGS_DIR", str(os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs"))
    )
    monitoring_tool: object = Observer.NONE
    @pydantic.model_validator(mode="after")
@ -30,6 +34,8 @@ class BaseConfig(BaseSettings):
        # Require absolute paths for root directories
        self.data_root_directory = ensure_absolute_path(self.data_root_directory)
        self.system_root_directory = ensure_absolute_path(self.system_root_directory)
        self.logs_root_directory = ensure_absolute_path(self.logs_root_directory)
        # Set monitoring tool based on available keys
        if self.langfuse_public_key and self.langfuse_secret_key:
            self.monitoring_tool = Observer.LANGFUSE
@ -49,6 +55,7 @@ class BaseConfig(BaseSettings):
            "system_root_directory": self.system_root_directory,
            "monitoring_tool": self.monitoring_tool,
            "cache_root_directory": self.cache_root_directory,
            "logs_root_directory": self.logs_root_directory,
        }
--- a/cognee/infrastructure/databases/vector/create_vector_engine.py
+++ b/cognee/infrastructure/databases/vector/create_vector_engine.py
@ -47,7 +47,7 @@ def create_vector_engine(
            embedding_engine=embedding_engine,
        )
-    if vector_db_provider == "pgvector":
+    if vector_db_provider.lower() == "pgvector":
        from cognee.infrastructure.databases.relational import get_relational_config
        # Get configuration for postgres database
@ -78,7 +78,7 @@ def create_vector_engine(
            embedding_engine,
        )
-    elif vector_db_provider == "chromadb":
+    elif vector_db_provider.lower() == "chromadb":
        try:
            import chromadb
        except ImportError:
@ -94,7 +94,7 @@ def create_vector_engine(
            embedding_engine=embedding_engine,
        )
-    elif vector_db_provider == "neptune_analytics":
+    elif vector_db_provider.lower() == "neptune_analytics":
        try:
            from langchain_aws import NeptuneAnalyticsGraph
        except ImportError:
@ -122,7 +122,7 @@ def create_vector_engine(
            embedding_engine=embedding_engine,
        )
-    else:
+    elif vector_db_provider.lower() == "lancedb":
        from .lancedb.LanceDBAdapter import LanceDBAdapter
        return LanceDBAdapter(
@ -130,3 +130,9 @@ def create_vector_engine(
            api_key=vector_db_key,
            embedding_engine=embedding_engine,
        )
    else:
        raise EnvironmentError(
            f"Unsupported graph database provider: {vector_db_provider}. "
            f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['LanceDB', 'PGVector', 'neptune_analytics', 'ChromaDB'])}"
        )
--- a/cognee/infrastructure/engine/models/DataPoint.py
+++ b/cognee/infrastructure/engine/models/DataPoint.py
@ -43,6 +43,9 @@ class DataPoint(BaseModel):
    updated_at: int = Field(
        default_factory=lambda: int(datetime.now(timezone.utc).timestamp() * 1000)
    )
    last_accessed_at: int = Field(  
        default_factory=lambda: int(datetime.now(timezone.utc).timestamp() * 1000)  
    )
    ontology_valid: bool = False
    version: int = 1  # Default version
    topological_rank: Optional[int] = 0
--- a/cognee/infrastructure/files/utils/get_file_metadata.py
+++ b/cognee/infrastructure/files/utils/get_file_metadata.py
@ -1,6 +1,6 @@
 import io
 import os.path
-from typing import BinaryIO, TypedDict
+from typing import BinaryIO, TypedDict, Optional
 from pathlib import Path
 from cognee.shared.logging_utils import get_logger
@ -27,7 +27,7 @@ class FileMetadata(TypedDict):
    file_size: int
-async def get_file_metadata(file: BinaryIO) -> FileMetadata:
+async def get_file_metadata(file: BinaryIO, name: Optional[str] = None) -> FileMetadata:
    """
    Retrieve metadata from a file object.
@ -53,7 +53,7 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata:
    except io.UnsupportedOperation as error:
        logger.error(f"Error retrieving content hash for file: {file.name} \n{str(error)}\n\n")
-    file_type = guess_file_type(file)
+    file_type = guess_file_type(file, name)
    file_path = getattr(file, "name", None) or getattr(file, "full_name", None)
--- a/cognee/infrastructure/files/utils/guess_file_type.py
+++ b/cognee/infrastructure/files/utils/guess_file_type.py
@ -1,6 +1,9 @@
-from typing import BinaryIO
+import io
 from pathlib import Path
 from typing import BinaryIO, Optional, Any
 import filetype
-from .is_text_content import is_text_content
+from tempfile import SpooledTemporaryFile
 from filetype.types.base import Type
 class FileTypeException(Exception):
@ -22,90 +25,7 @@ class FileTypeException(Exception):
        self.message = message
-class TxtFileType(filetype.Type):
+def guess_file_type(file: BinaryIO, name: Optional[str] = None) -> filetype.Type:
    """
    Represents a text file type with specific MIME and extension properties.
    Public methods:
    - match: Determines whether a given buffer matches the text file type.
    """
    MIME = "text/plain"
    EXTENSION = "txt"
    def __init__(self):
        super(TxtFileType, self).__init__(mime=TxtFileType.MIME, extension=TxtFileType.EXTENSION)
    def match(self, buf):
        """
        Determine if the given buffer contains text content.
        Parameters:
        -----------
            - buf: The buffer to check for text content.
        Returns:
        --------
            Returns True if the buffer is identified as text content, otherwise False.
        """
        return is_text_content(buf)
 txt_file_type = TxtFileType()
 filetype.add_type(txt_file_type)
 class CustomPdfMatcher(filetype.Type):
    """
    Match PDF file types based on MIME type and extension.
    Public methods:
    - match
    Instance variables:
    - MIME: The MIME type of the PDF.
    - EXTENSION: The file extension of the PDF.
    """
    MIME = "application/pdf"
    EXTENSION = "pdf"
    def __init__(self):
        super(CustomPdfMatcher, self).__init__(
            mime=CustomPdfMatcher.MIME, extension=CustomPdfMatcher.EXTENSION
        )
    def match(self, buf):
        """
        Determine if the provided buffer is a PDF file.
        This method checks for the presence of the PDF signature in the buffer.
        Raises:
        - TypeError: If the buffer is not of bytes type.
        Parameters:
        -----------
            - buf: The buffer containing the data to be checked.
        Returns:
        --------
            Returns True if the buffer contains a PDF signature, otherwise returns False.
        """
        return b"PDF-" in buf
 custom_pdf_matcher = CustomPdfMatcher()
 filetype.add_type(custom_pdf_matcher)
 def guess_file_type(file: BinaryIO) -> filetype.Type:
    """
    Guess the file type from the given binary file stream.
@ -122,12 +42,23 @@ def guess_file_type(file: BinaryIO) -> filetype.Type:
        - filetype.Type: The guessed file type, represented as filetype.Type.
    """
    # Note: If file has .txt or .text extension, consider it a plain text file as filetype.guess may not detect it properly
    # as it contains no magic number encoding
    ext = None
    if isinstance(file, str):
        ext = Path(file).suffix
    elif name is not None:
        ext = Path(name).suffix
    if ext in [".txt", ".text"]:
        file_type = Type("text/plain", "txt")
        return file_type
    file_type = filetype.guess(file)
    # If file type could not be determined consider it a plain text file as they don't have magic number encoding
    if file_type is None:
        from filetype.types.base import Type
        file_type = Type("text/plain", "txt")
    if file_type is None:
--- a/cognee/infrastructure/llm/prompts/extract_query_time.txt
+++ b/cognee/infrastructure/llm/prompts/extract_query_time.txt
@ -1,15 +1,13 @@
-For the purposes of identifying timestamps in a query, you are tasked with extracting relevant timestamps from the query.
+You are tasked with identifying relevant time periods where the answer to a given query should be searched.
-## Timestamp requirements
+Current date is:  `{{ time_now }}`. Determine relevant period(s) and return structured intervals.
- If the query contains interval extrack both starts_at and ends_at  properties
+
- If the query contains an instantaneous timestamp, starts_at and ends_at should be the same
+Extraction rules:
- If the query its open-ended (before 2009 or after 2009), the corresponding non defined end of the time should be none
+
-    -For example: "before 2009" -- starts_at: None, ends_at: 2009 or  "after 2009" -- starts_at: 2009, ends_at: None
+1. Query without specific timestamp: use the time period with starts_at set to None and ends_at set to now.
- Put always the data that comes first in time as starts_at and the timestamps that comes second in time as ends_at
+2. Explicit time intervals: If the query specifies a range (e.g., from 2010 to 2020, between January and March 2023), extract both start and end dates. Always assign the earlier date to starts_at and the later date to ends_at.
- If starts_at or ends_at cannot be extracted both of them has to be None
+3. Single timestamp: If the query refers to one specific moment (e.g., in 2015, on March 5, 2022), set starts_at and ends_at to that same timestamp.
-## Output Format
+4. Open-ended time references: For phrases such as "before X" or "after X", represent the unspecified side as None. For example: before 2009 → starts_at: None, ends_at: 2009; after 2009 → starts_at: 2009, ends_at: None.
-Your reply should be a JSON: list of dictionaries with the following structure:
+5. Current-time references ("now", "current", "today"): If the query explicitly refers to the present, set both starts_at and ends_at to now (the ingestion timestamp).
-```python
+6. "Who is" and "Who was" questions: These imply a general identity or biographical inquiry without a specific temporal scope. Set both starts_at and ends_at to None.
-class QueryInterval(BaseModel):
+7. Ordering rule: Always ensure the earlier date is assigned to starts_at and the later date to ends_at.
-    starts_at: Optional[Timestamp] = None
+8. No temporal information: If no valid or inferable time reference is found, set both starts_at and ends_at to None.
    ends_at: Optional[Timestamp] = None
 ```
--- a/cognee/infrastructure/loaders/LoaderEngine.py
+++ b/cognee/infrastructure/loaders/LoaderEngine.py
@ -1,6 +1,7 @@
 import filetype
 from typing import Dict, List, Optional, Any
 from .LoaderInterface import LoaderInterface
 from cognee.infrastructure.files.utils.guess_file_type import guess_file_type
 from cognee.shared.logging_utils import get_logger
 logger = get_logger(__name__)
@ -80,7 +81,7 @@ class LoaderEngine:
        """
        from pathlib import Path
-        file_info = filetype.guess(file_path)
+        file_info = guess_file_type(file_path)
        path_extension = Path(file_path).suffix.lstrip(".")
--- a/cognee/infrastructure/loaders/core/audio_loader.py
+++ b/cognee/infrastructure/loaders/core/audio_loader.py
@ -42,6 +42,7 @@ class AudioLoader(LoaderInterface):
            "audio/wav",
            "audio/amr",
            "audio/aiff",
            "audio/x-wav",
        ]
    @property
--- a/cognee/modules/chunking/models/DocumentChunk.py
+++ b/cognee/modules/chunking/models/DocumentChunk.py
@ -1,5 +1,7 @@
 from typing import List, Union
 from pydantic import BaseModel, Field
 from datetime import datetime, timezone  
 from cognee.infrastructure.engine import DataPoint
 from cognee.modules.data.processing.document_types import Document
 from cognee.modules.engine.models import Entity
@ -22,6 +24,7 @@ class DocumentChunk(DataPoint):
    - cut_type: The type of cut that defined this chunk.
    - is_part_of: The document to which this chunk belongs.
    - contains: A list of entities or events contained within the chunk (default is None).
    - last_accessed_at: The timestamp of the last time the chunk was accessed.
    - metadata: A dictionary to hold meta information related to the chunk, including index
    fields.
    """
@ -32,5 +35,4 @@ class DocumentChunk(DataPoint):
    cut_type: str
    is_part_of: Document
    contains: List[Union[Entity, Event]] = None
    metadata: dict = {"index_fields": ["text"]}
--- a/cognee/modules/data/models/Data.py
+++ b/cognee/modules/data/models/Data.py
@ -36,6 +36,7 @@ class Data(Base):
    data_size = Column(Integer, nullable=True)  # File size in bytes
    created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
    updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
    last_accessed = Column(DateTime(timezone=True), nullable=True)
    datasets = relationship(
        "Dataset",
--- a/cognee/modules/engine/models/Entity.py
+++ b/cognee/modules/engine/models/Entity.py
@ -1,11 +1,11 @@
 from cognee.infrastructure.engine import DataPoint
 from cognee.modules.engine.models.EntityType import EntityType
 from typing import Optional
-
+from datetime import datetime, timezone  
 from pydantic import BaseModel, Field
 class Entity(DataPoint):
    name: str
    is_a: Optional[EntityType] = None
    description: str
    metadata: dict = {"index_fields": ["name"]}
--- a/cognee/modules/graph/utils/init.py
+++ b/cognee/modules/graph/utils/init.py
@ -5,3 +5,4 @@ from .retrieve_existing_edges import retrieve_existing_edges
 from .convert_node_to_data_point import convert_node_to_data_point
 from .deduplicate_nodes_and_edges import deduplicate_nodes_and_edges
 from .resolve_edges_to_text import resolve_edges_to_text
 from .get_entity_nodes_from_triplets import get_entity_nodes_from_triplets
--- a/cognee/modules/graph/utils/get_entity_nodes_from_triplets.py
+++ b/cognee/modules/graph/utils/get_entity_nodes_from_triplets.py
@ -0,0 +1,13 @@
 def get_entity_nodes_from_triplets(triplets):
        entity_nodes = []
        seen_ids = set()
        for triplet in triplets:  
            if hasattr(triplet, 'node1') and triplet.node1 and triplet.node1.id not in seen_ids:  
                entity_nodes.append({"id": str(triplet.node1.id)})  
                seen_ids.add(triplet.node1.id)  
            if hasattr(triplet, 'node2') and triplet.node2 and triplet.node2.id not in seen_ids:  
                entity_nodes.append({"id": str(triplet.node2.id)})  
                seen_ids.add(triplet.node2.id)
        return entity_nodes
--- a/cognee/modules/ingestion/data_types/BinaryData.py
+++ b/cognee/modules/ingestion/data_types/BinaryData.py
@ -30,7 +30,7 @@ class BinaryData(IngestionData):
    async def ensure_metadata(self):
        if self.metadata is None:
-            self.metadata = await get_file_metadata(self.data)
+            self.metadata = await get_file_metadata(self.data, name=self.name)
            if self.metadata["name"] is None:
                self.metadata["name"] = self.name
--- a/cognee/modules/ontology/get_default_ontology_resolver.py
+++ b/cognee/modules/ontology/get_default_ontology_resolver.py
@ -21,7 +21,8 @@ def get_ontology_resolver_from_env(
            Supported value: "rdflib".
        matching_strategy (str): The matching strategy to apply.
            Supported value: "fuzzy".
-        ontology_file_path (str): Path to the ontology file required for the resolver.
+        ontology_file_path (str): Path to the ontology file(s) required for the resolver.
            Can be a single path or comma-separated paths for multiple files.
    Returns:
        BaseOntologyResolver: An instance of the requested ontology resolver.
@ -31,8 +32,13 @@ def get_ontology_resolver_from_env(
            or if required parameters are missing.
    """
    if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path:
        if "," in ontology_file_path:
            file_paths = [path.strip() for path in ontology_file_path.split(",")]
        else:
            file_paths = ontology_file_path
        return RDFLibOntologyResolver(
-            matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path
+            matching_strategy=FuzzyMatchingStrategy(), ontology_file=file_paths
        )
    else:
        raise EnvironmentError(
--- a/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py
+++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py
@ -2,7 +2,7 @@ import os
 import difflib
 from cognee.shared.logging_utils import get_logger
 from collections import deque
-from typing import List, Tuple, Dict, Optional, Any
+from typing import List, Tuple, Dict, Optional, Any, Union
 from rdflib import Graph, URIRef, RDF, RDFS, OWL
 from cognee.modules.ontology.exceptions import (
@ -26,22 +26,50 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
    def __init__(
        self,
-        ontology_file: Optional[str] = None,
+        ontology_file: Optional[Union[str, List[str]]] = None,
        matching_strategy: Optional[MatchingStrategy] = None,
    ) -> None:
        super().__init__(matching_strategy)
        self.ontology_file = ontology_file
        try:
-            if ontology_file and os.path.exists(ontology_file):
+            files_to_load = []
            if ontology_file is not None:
                if isinstance(ontology_file, str):
                    files_to_load = [ontology_file]
                elif isinstance(ontology_file, list):
                    files_to_load = ontology_file
                else:
                    raise ValueError(
                        f"ontology_file must be a string, list of strings, or None. Got: {type(ontology_file)}"
                    )
            if files_to_load:
                self.graph = Graph()
-                self.graph.parse(ontology_file)
+                loaded_files = []
-                logger.info("Ontology loaded successfully from file: %s", ontology_file)
+                for file_path in files_to_load:
                    if os.path.exists(file_path):
                        self.graph.parse(file_path)
                        loaded_files.append(file_path)
                        logger.info("Ontology loaded successfully from file: %s", file_path)
                    else:
                        logger.warning(
                            "Ontology file '%s' not found. Skipping this file.",
                            file_path,
                        )
                if not loaded_files:
                    logger.info(
                        "No valid ontology files found. No owl ontology will be attached to the graph."
                    )
                    self.graph = None
                else:
                    logger.info("Total ontology files loaded: %d", len(loaded_files))
            else:
                logger.info(
-                    "Ontology file '%s' not found. No owl ontology will be attached to the graph.",
+                    "No ontology file provided. No owl ontology will be attached to the graph."
                    ontology_file,
                )
                self.graph = None
            self.build_lookup()
        except Exception as e:
            logger.error("Failed to load ontology", exc_info=e)
--- a/cognee/modules/pipelines/operations/run_tasks_base.py
+++ b/cognee/modules/pipelines/operations/run_tasks_base.py
@ -27,6 +27,7 @@ async def handle_task(
        additional_properties={
            "task_name": running_task.executable.__name__,
            "cognee_version": cognee_version,
            "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
        },
    )
@ -49,6 +50,7 @@ async def handle_task(
            additional_properties={
                "task_name": running_task.executable.__name__,
                "cognee_version": cognee_version,
                "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
            },
        )
    except Exception as error:
@ -62,6 +64,7 @@ async def handle_task(
            additional_properties={
                "task_name": running_task.executable.__name__,
                "cognee_version": cognee_version,
                "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
            },
        )
        raise error
--- a/cognee/modules/pipelines/operations/run_tasks_with_telemetry.py
+++ b/cognee/modules/pipelines/operations/run_tasks_with_telemetry.py
@ -28,6 +28,7 @@ async def run_tasks_with_telemetry(
            additional_properties={
                "pipeline_name": str(pipeline_name),
                "cognee_version": cognee_version,
                "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
            }
            | config,
        )
@ -42,6 +43,7 @@ async def run_tasks_with_telemetry(
            additional_properties={
                "pipeline_name": str(pipeline_name),
                "cognee_version": cognee_version,
                "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
            }
            | config,
        )
@ -58,6 +60,7 @@ async def run_tasks_with_telemetry(
            additional_properties={
                "pipeline_name": str(pipeline_name),
                "cognee_version": cognee_version,
                "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
            }
            | config,
        )
--- a/cognee/modules/retrieval/chunks_retriever.py
+++ b/cognee/modules/retrieval/chunks_retriever.py
@ -1,10 +1,11 @@
 from typing import Any, Optional
-
+from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.modules.retrieval.base_retriever import BaseRetriever
 from cognee.modules.retrieval.exceptions.exceptions import NoDataError
 from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
 from datetime import datetime, timezone  
 logger = get_logger("ChunksRetriever")
@ -27,21 +28,16 @@ class ChunksRetriever(BaseRetriever):
    ):
        self.top_k = top_k
-    async def get_context(self, query: str) -> Any:
+    async def get_context(self, query: str) -> Any:  
        """
        Retrieves document chunks context based on the query.
        Searches for document chunks relevant to the specified query using a vector engine.
        Raises a NoDataError if no data is found in the system.
        Parameters:
        -----------
            - query (str): The query string to search for relevant document chunks.
        Returns:
        --------
            - Any: A list of document chunk payloads retrieved from the search.
        """
        logger.info(
@ -53,13 +49,14 @@ class ChunksRetriever(BaseRetriever):
        try:
            found_chunks = await vector_engine.search("DocumentChunk_text", query, limit=self.top_k)
            logger.info(f"Found {len(found_chunks)} chunks from vector search")
            await update_node_access_timestamps(found_chunks)
        except CollectionNotFoundError as error:
            logger.error("DocumentChunk_text collection not found in vector database")
            raise NoDataError("No data found in the system, please add data first.") from error
        chunk_payloads = [result.payload for result in found_chunks]
        logger.info(f"Returning {len(chunk_payloads)} chunk payloads")
        return chunk_payloads
    async def get_completion(
        self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None
--- a/cognee/modules/retrieval/completion_retriever.py
+++ b/cognee/modules/retrieval/completion_retriever.py
@ -8,6 +8,7 @@ from cognee.modules.retrieval.utils.session_cache import (
    save_conversation_history,
    get_conversation_history,
 )
 from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
 from cognee.modules.retrieval.base_retriever import BaseRetriever
 from cognee.modules.retrieval.exceptions.exceptions import NoDataError
 from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
@ -65,7 +66,7 @@ class CompletionRetriever(BaseRetriever):
            if len(found_chunks) == 0:
                return ""
-
+            await update_node_access_timestamps(found_chunks)
            # Combine all chunks text returned from vector search (number of chunks is determined by top_k
            chunks_payload = [found_chunk.payload["text"] for found_chunk in found_chunks]
            combined_context = "\n".join(chunks_payload)
--- a/cognee/modules/retrieval/graph_completion_retriever.py
+++ b/cognee/modules/retrieval/graph_completion_retriever.py
@ -16,11 +16,13 @@ from cognee.modules.retrieval.utils.session_cache import (
 )
 from cognee.shared.logging_utils import get_logger
 from cognee.modules.retrieval.utils.extract_uuid_from_node import extract_uuid_from_node
 from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
 from cognee.modules.retrieval.utils.models import CogneeUserInteraction
 from cognee.modules.engine.models.node_set import NodeSet
 from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.context_global_variables import session_user
 from cognee.infrastructure.databases.cache.config import CacheConfig
 from cognee.modules.graph.utils import get_entity_nodes_from_triplets
 logger = get_logger("GraphCompletionRetriever")
@ -139,6 +141,9 @@ class GraphCompletionRetriever(BaseGraphRetriever):
        # context = await self.resolve_edges_to_text(triplets)
        entity_nodes = get_entity_nodes_from_triplets(triplets)
        await update_node_access_timestamps(entity_nodes) 
        return triplets
    async def get_completion(
--- a/cognee/modules/retrieval/summaries_retriever.py
+++ b/cognee/modules/retrieval/summaries_retriever.py
@ -4,6 +4,7 @@ from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.modules.retrieval.base_retriever import BaseRetriever
 from cognee.modules.retrieval.exceptions.exceptions import NoDataError
 from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
 from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
 logger = get_logger("SummariesRetriever")
@ -54,6 +55,9 @@ class SummariesRetriever(BaseRetriever):
                "TextSummary_text", query, limit=self.top_k
            )
            logger.info(f"Found {len(summaries_results)} summaries from vector search")
            await update_node_access_timestamps(summaries_results)
        except CollectionNotFoundError as error:
            logger.error("TextSummary_text collection not found in vector database")
            raise NoDataError("No data found in the system, please add data first.") from error
--- a/cognee/modules/retrieval/temporal_retriever.py
+++ b/cognee/modules/retrieval/temporal_retriever.py
@ -1,7 +1,7 @@
 import os
 import asyncio
 from typing import Any, Optional, List, Type
-
+from datetime import datetime
 from operator import itemgetter
 from cognee.infrastructure.databases.vector import get_vector_engine
@ -79,7 +79,11 @@ class TemporalRetriever(GraphCompletionRetriever):
        else:
            base_directory = None
-        system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory)
+        time_now = datetime.now().strftime("%d-%m-%Y")
        system_prompt = render_prompt(
            prompt_path, {"time_now": time_now}, base_directory=base_directory
        )
        interval = await LLMGateway.acreate_structured_output(query, system_prompt, QueryInterval)
@ -108,8 +112,6 @@ class TemporalRetriever(GraphCompletionRetriever):
        graph_engine = await get_graph_engine()
        triplets = []
        if time_from and time_to:
            ids = await graph_engine.collect_time_ids(time_from=time_from, time_to=time_to)
        elif time_from:
--- a/cognee/modules/retrieval/utils/access_tracking.py
+++ b/cognee/modules/retrieval/utils/access_tracking.py
@ -0,0 +1,95 @@
 """Utilities for tracking data access in retrievers."""  
 import json  
 from datetime import datetime, timezone  
 from typing import List, Any  
 from uuid import UUID  
 from cognee.infrastructure.databases.graph import get_graph_engine  
 from cognee.infrastructure.databases.relational import get_relational_engine  
 from cognee.modules.data.models import Data  
 from cognee.shared.logging_utils import get_logger  
 from sqlalchemy import update  
 logger = get_logger(__name__)  
 async def update_node_access_timestamps(items: List[Any]):  
    """  
    Update last_accessed_at for nodes in graph database and corresponding Data records in SQL.  
    This function:  
    1. Updates last_accessed_at in the graph database nodes (in properties JSON)  
    2. Traverses to find origin TextDocument nodes (without hardcoded relationship names)  
    3. Updates last_accessed in the SQL Data table for those documents  
    Parameters  
    ----------  
    items : List[Any]  
        List of items with payload containing 'id' field (from vector search results)  
    """  
    if not items:  
        return  
    graph_engine = await get_graph_engine()  
    timestamp_ms = int(datetime.now(timezone.utc).timestamp() * 1000)  
    timestamp_dt = datetime.now(timezone.utc)  
    # Extract node IDs  
    node_ids = []  
    for item in items:  
        item_id = item.payload.get("id") if hasattr(item, 'payload') else item.get("id")  
        if item_id:  
            node_ids.append(str(item_id))  
    if not node_ids:  
        return  
    try:  
        # Step 1: Batch update graph nodes  
        for node_id in node_ids:  
            result = await graph_engine.query(  
                "MATCH (n:Node {id: $id}) RETURN n.properties",  
                {"id": node_id}  
            )  
            if result and result[0]:  
                props = json.loads(result[0][0]) if result[0][0] else {}  
                props["last_accessed_at"] = timestamp_ms  
                await graph_engine.query(  
                    "MATCH (n:Node {id: $id}) SET n.properties = $props",  
                    {"id": node_id, "props": json.dumps(props)}  
                )  
        logger.debug(f"Updated access timestamps for {len(node_ids)} graph nodes")  
        # Step 2: Find origin TextDocument nodes (without hardcoded relationship names)  
        origin_query = """  
        UNWIND $node_ids AS node_id  
        MATCH (chunk:Node {id: node_id})-[e:EDGE]-(doc:Node)  
        WHERE chunk.type = 'DocumentChunk' AND doc.type IN ['TextDocument', 'Document']  
        RETURN DISTINCT doc.id  
        """  
        result = await graph_engine.query(origin_query, {"node_ids": node_ids})  
        # Extract and deduplicate document IDs  
        doc_ids = list(set([row[0] for row in result if row and row[0]])) if result else []  
        # Step 3: Update SQL Data table  
        if doc_ids:  
            db_engine = get_relational_engine()  
            async with db_engine.get_async_session() as session:  
                stmt = update(Data).where(  
                    Data.id.in_([UUID(doc_id) for doc_id in doc_ids])  
                ).values(last_accessed=timestamp_dt)  
                await session.execute(stmt)  
                await session.commit()  
            logger.debug(f"Updated last_accessed for {len(doc_ids)} Data records in SQL")  
    except Exception as e:  
        logger.error(f"Failed to update timestamps: {e}")  
        raise
--- a/cognee/modules/search/methods/search.py
+++ b/cognee/modules/search/methods/search.py
@ -67,7 +67,10 @@ async def search(
    send_telemetry(
        "cognee.search EXECUTION STARTED",
        user.id,
-        additional_properties={"cognee_version": cognee_version},
+        additional_properties={
            "cognee_version": cognee_version,
            "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
        },
    )
    # Use search function filtered by permissions if access control is enabled
@ -108,7 +111,10 @@ async def search(
    send_telemetry(
        "cognee.search EXECUTION COMPLETED",
        user.id,
-        additional_properties={"cognee_version": cognee_version},
+        additional_properties={
            "cognee_version": cognee_version,
            "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
        },
    )
    await log_result(
--- a/cognee/modules/visualization/cognee_network_visualization.py
+++ b/cognee/modules/visualization/cognee_network_visualization.py
@ -16,17 +16,17 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
    nodes_list = []
    color_map = {
-        "Entity": "#f47710",
+        "Entity": "#5C10F4",
-        "EntityType": "#6510f4",
+        "EntityType": "#A550FF",
-        "DocumentChunk": "#801212",
+        "DocumentChunk": "#0DFF00",
-        "TextSummary": "#1077f4",
+        "TextSummary": "#5C10F4",
-        "TableRow": "#f47710",
+        "TableRow": "#A550FF",
-        "TableType": "#6510f4",
+        "TableType": "#5C10F4",
-        "ColumnValue": "#13613a",
+        "ColumnValue": "#757470",
-        "SchemaTable": "#f47710",
+        "SchemaTable": "#A550FF",
-        "DatabaseSchema": "#6510f4",
+        "DatabaseSchema": "#5C10F4",
-        "SchemaRelationship": "#13613a",
+        "SchemaRelationship": "#323332",
-        "default": "#D3D3D3",
+        "default": "#D8D8D8",
    }
    for node_id, node_info in nodes_data:
@ -98,16 +98,19 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
    <head>
        <meta charset="utf-8">
        <script src="https://d3js.org/d3.v5.min.js"></script>
        <script src="https://d3js.org/d3-contour.v1.min.js"></script>
        <style>
            body, html { margin: 0; padding: 0; width: 100%; height: 100%; overflow: hidden; background: linear-gradient(90deg, #101010, #1a1a2e); color: white; font-family: 'Inter', sans-serif; }
            svg { width: 100vw; height: 100vh; display: block; }
-            .links line { stroke: rgba(255, 255, 255, 0.4); stroke-width: 2px; }
+            .links line { stroke: rgba(160, 160, 160, 0.25); stroke-width: 1.5px; stroke-linecap: round; }
-            .links line.weighted { stroke: rgba(255, 215, 0, 0.7); }
+            .links line.weighted { stroke: rgba(255, 215, 0, 0.4); }
-            .links line.multi-weighted { stroke: rgba(0, 255, 127, 0.8); }
+            .links line.multi-weighted { stroke: rgba(0, 255, 127, 0.45); }
-            .nodes circle { stroke: white; stroke-width: 0.5px; filter: drop-shadow(0 0 5px rgba(255,255,255,0.3)); }
+            .nodes circle { stroke: white; stroke-width: 0.5px; }
-            .node-label { font-size: 5px; font-weight: bold; fill: white; text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
+            .node-label { font-size: 5px; font-weight: bold; fill: #F4F4F4; text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
-            .edge-label { font-size: 3px; fill: rgba(255, 255, 255, 0.7); text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
+            .edge-label { font-size: 3px; fill: #F4F4F4; text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; paint-order: stroke; stroke: rgba(50,51,50,0.75); stroke-width: 1px; }
            .density path { mix-blend-mode: screen; }
            .tooltip {
                position: absolute;
@ -125,11 +128,32 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
                max-width: 300px;
                word-wrap: break-word;
            }
            #info-panel {
                position: fixed;
                left: 12px;
                top: 12px;
                width: 340px;
                max-height: calc(100vh - 24px);
                overflow: auto;
                background: rgba(50, 51, 50, 0.7);
                backdrop-filter: blur(6px);
                border: 1px solid rgba(216, 216, 216, 0.35);
                border-radius: 8px;
                color: #F4F4F4;
                padding: 12px 14px;
                z-index: 1100;
            }
            #info-panel h3 { margin: 0 0 8px 0; font-size: 14px; color: #F4F4F4; }
            #info-panel .kv { font-size: 12px; line-height: 1.4; }
            #info-panel .kv .k { color: #D8D8D8; }
            #info-panel .kv .v { color: #F4F4F4; }
            #info-panel .placeholder { opacity: 0.7; font-size: 12px; }
        </style>
    </head>
    <body>
        <svg></svg>
        <div class="tooltip" id="tooltip"></div>
        <div id="info-panel"><div class="placeholder">Hover a node or edge to inspect details</div></div>
        <script>
            var nodes = {nodes};
            var links = {links};
@ -140,19 +164,141 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
            var container = svg.append("g");
            var tooltip = d3.select("#tooltip");
            var infoPanel = d3.select('#info-panel');
            function renderInfo(title, entries){
                function esc(s){ return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); }
                var html = '<h3>' + esc(title) + '</h3>';
                html += '<div class="kv">';
                entries.forEach(function(e){
                    html += '<div><span class="k">' + esc(e.k) + ':</span> <span class="v">' + esc(e.v) + '</span></div>';
                });
                html += '</div>';
                infoPanel.html(html);
            }
            function pickDescription(obj){
                if (!obj) return null;
                var keys = ['description','summary','text','content'];
                for (var i=0; i<keys.length; i++){
                    var v = obj[keys[i]];
                    if (typeof v === 'string' && v.trim()) return v.trim();
                }
                return null;
            }
            function truncate(s, n){ if (!s) return s; return s.length > n ? (s.slice(0, n) + '…') : s; }
            function renderNodeInfo(n){
                var entries = [];
                if (n.name) entries.push({k:'Name', v: n.name});
                if (n.type) entries.push({k:'Type', v: n.type});
                if (n.id) entries.push({k:'ID', v: n.id});
                var desc = pickDescription(n) || pickDescription(n.properties);
                if (desc) entries.push({k:'Description', v: truncate(desc.replace(/\s+/g,' ').trim(), 280)});
                if (n.properties) {
                    Object.keys(n.properties).slice(0, 12).forEach(function(key){
                        var v = n.properties[key];
                        if (v !== undefined && v !== null && typeof v !== 'object') entries.push({k: key, v: String(v)});
                    });
                }
                renderInfo(n.name || 'Node', entries);
            }
            function renderEdgeInfo(e){
                var entries = [];
                if (e.relation) entries.push({k:'Relation', v: e.relation});
                if (e.weight !== undefined && e.weight !== null) entries.push({k:'Weight', v: e.weight});
                if (e.all_weights && Object.keys(e.all_weights).length){
                    Object.keys(e.all_weights).slice(0, 8).forEach(function(k){ entries.push({k: 'w.'+k, v: e.all_weights[k]}); });
                }
                if (e.relationship_type) entries.push({k:'Type', v: e.relationship_type});
                var edesc = pickDescription(e.edge_info);
                if (edesc) entries.push({k:'Description', v: truncate(edesc.replace(/\s+/g,' ').trim(), 280)});
                renderInfo('Edge', entries);
            }
            // Basic runtime diagnostics
            console.log('[Cognee Visualization] nodes:', nodes ? nodes.length : 0, 'links:', links ? links.length : 0);
            window.addEventListener('error', function(e){
                try {
                    tooltip.html('<strong>Error:</strong> ' + e.message)
                        .style('left', '12px')
                        .style('top', '12px')
                        .style('opacity', 1);
                } catch(_) {}
            });
            // Normalize node IDs and link endpoints for robustness
            function resolveId(d){ return (d && (d.id || d.node_id || d.uuid || d.external_id || d.name)) || undefined; }
            if (Array.isArray(nodes)) {
                nodes.forEach(function(n){ var id = resolveId(n); if (id !== undefined) n.id = id; });
            }
            if (Array.isArray(links)) {
                links.forEach(function(l){
                    if (typeof l.source === 'object') l.source = resolveId(l.source);
                    if (typeof l.target === 'object') l.target = resolveId(l.target);
                });
            }
            if (!nodes || nodes.length === 0) {
                container.append('text')
                    .attr('x', width / 2)
                    .attr('y', height / 2)
                    .attr('fill', '#fff')
                    .attr('font-size', 14)
                    .attr('text-anchor', 'middle')
                    .text('No graph data available');
            }
            // Visual defs - reusable glow
            var defs = svg.append("defs");
            var glow = defs.append("filter").attr("id", "glow")
                .attr("x", "-30%")
                .attr("y", "-30%")
                .attr("width", "160%")
                .attr("height", "160%");
            glow.append("feGaussianBlur").attr("stdDeviation", 8).attr("result", "coloredBlur");
            var feMerge = glow.append("feMerge");
            feMerge.append("feMergeNode").attr("in", "coloredBlur");
            feMerge.append("feMergeNode").attr("in", "SourceGraphic");
            // Stronger glow for hovered adjacency
            var glowStrong = defs.append("filter").attr("id", "glow-strong")
                .attr("x", "-40%")
                .attr("y", "-40%")
                .attr("width", "180%")
                .attr("height", "180%");
            glowStrong.append("feGaussianBlur").attr("stdDeviation", 14).attr("result", "coloredBlur");
            var feMerge2 = glowStrong.append("feMerge");
            feMerge2.append("feMergeNode").attr("in", "coloredBlur");
            feMerge2.append("feMergeNode").attr("in", "SourceGraphic");
            var currentTransform = d3.zoomIdentity;
            var densityZoomTimer = null;
            var isInteracting = false;
            var labelBaseSize = 10;
            function getGroupKey(d){ return d && (d.type || d.category || d.group || d.color) || 'default'; }
            var simulation = d3.forceSimulation(nodes)
-                .force("link", d3.forceLink(links).id(d => d.id).strength(0.1))
+                .force("link", d3.forceLink(links).id(function(d){ return d.id; }).distance(100).strength(0.2))
-                .force("charge", d3.forceManyBody().strength(-275))
+                .force("charge", d3.forceManyBody().strength(-180))
                .force("collide", d3.forceCollide().radius(16).iterations(2))
                .force("center", d3.forceCenter(width / 2, height / 2))
-                .force("x", d3.forceX().strength(0.1).x(width / 2))
+                .force("x", d3.forceX().strength(0.06).x(width / 2))
-                .force("y", d3.forceY().strength(0.1).y(height / 2));
+                .force("y", d3.forceY().strength(0.06).y(height / 2))
                .alphaDecay(0.06)
                .velocityDecay(0.6);
            // Density layer (sibling of container to avoid double transforms)
            var densityLayer = svg.append("g")
                .attr("class", "density")
                .style("pointer-events", "none");
            if (densityLayer.lower) densityLayer.lower();
            var link = container.append("g")
                .attr("class", "links")
                .selectAll("line")
                .data(links)
                .enter().append("line")
                .style("opacity", 0)
                .style("pointer-events", "none")
                .attr("stroke-width", d => {
                    if (d.weight) return Math.max(2, d.weight * 5);
                    if (d.all_weights && Object.keys(d.all_weights).length > 0) {
@ -168,6 +314,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
                })
                .on("mouseover", function(d) {
                    // Create tooltip content for edge
                    renderEdgeInfo(d);
                    var content = "<strong>Edge Information</strong><br/>";
                    content += "Relationship: " + d.relation + "<br/>";
@ -212,6 +359,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
                .data(links)
                .enter().append("text")
                .attr("class", "edge-label")
                .style("opacity", 0)
                .text(d => {
                    var label = d.relation;
                    if (d.all_weights && Object.keys(d.all_weights).length > 1) {
@ -232,21 +380,225 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
                .data(nodes)
                .enter().append("g");
            // Color fallback by type when d.color is missing
            var colorByType = {
                "Entity": "#5C10F4",
                "EntityType": "#A550FF",
                "DocumentChunk": "#0DFF00",
                "TextSummary": "#5C10F4",
                "TableRow": "#A550FF",
                "TableType": "#5C10F4",
                "ColumnValue": "#757470",
                "SchemaTable": "#A550FF",
                "DatabaseSchema": "#5C10F4",
                "SchemaRelationship": "#323332"
            };
            var node = nodeGroup.append("circle")
                .attr("r", 13)
-                .attr("fill", d => d.color)
+                .attr("fill", function(d){ return d.color || colorByType[d.type] || "#D3D3D3"; })
                .style("filter", "url(#glow)")
                .attr("shape-rendering", "geometricPrecision")
                .call(d3.drag()
                    .on("start", dragstarted)
-                    .on("drag", dragged)
+                    .on("drag", function(d){ dragged(d); updateDensity(); showAdjacency(d); })
                    .on("end", dragended));
-            nodeGroup.append("text")
+            // Show links only for hovered node adjacency
            function isAdjacent(linkDatum, nodeId) {
                var sid = linkDatum && linkDatum.source && (linkDatum.source.id || linkDatum.source);
                var tid = linkDatum && linkDatum.target && (linkDatum.target.id || linkDatum.target);
                return sid === nodeId || tid === nodeId;
            }
            function showAdjacency(d) {
                var nodeId = d && (d.id || d.node_id || d.uuid || d.external_id || d.name);
                if (!nodeId) return;
                // Build neighbor set
                var neighborIds = {};
                neighborIds[nodeId] = true;
                for (var i = 0; i < links.length; i++) {
                    var l = links[i];
                    var sid = l && l.source && (l.source.id || l.source);
                    var tid = l && l.target && (l.target.id || l.target);
                    if (sid === nodeId) neighborIds[tid] = true;
                    if (tid === nodeId) neighborIds[sid] = true;
                }
                link
                    .style("opacity", function(l){ return isAdjacent(l, nodeId) ? 0.95 : 0; })
                    .style("stroke", function(l){ return isAdjacent(l, nodeId) ? "rgba(255,255,255,0.95)" : null; })
                    .style("stroke-width", function(l){ return isAdjacent(l, nodeId) ? 2.5 : 1.5; });
                edgeLabels.style("opacity", function(l){ return isAdjacent(l, nodeId) ? 1 : 0; });
                densityLayer.style("opacity", 0.35);
                // Highlight neighbor nodes and dim others
                node
                    .style("opacity", function(n){ return neighborIds[n.id] ? 1 : 0.25; })
                    .style("filter", function(n){ return neighborIds[n.id] ? "url(#glow-strong)" : "url(#glow)"; })
                    .attr("r", function(n){ return neighborIds[n.id] ? 15 : 13; });
                // Raise highlighted nodes
                node.filter(function(n){ return neighborIds[n.id]; }).raise();
                // Neighbor labels brighter
                nodeGroup.select("text")
                    .style("opacity", function(n){ return neighborIds[n.id] ? 1 : 0.2; })
                    .style("font-size", function(n){
                        var size = neighborIds[n.id] ? Math.min(22, labelBaseSize * 1.25) : labelBaseSize;
                        return size + "px";
                    });
            }
            function clearAdjacency() {
                link.style("opacity", 0)
                    .style("stroke", null)
                    .style("stroke-width", 1.5);
                edgeLabels.style("opacity", 0);
                densityLayer.style("opacity", 1);
                node
                    .style("opacity", 1)
                    .style("filter", "url(#glow)")
                    .attr("r", 13);
                nodeGroup.select("text")
                    .style("opacity", 1)
                    .style("font-size", labelBaseSize + "px");
            }
            node.on("mouseover", function(d){ showAdjacency(d); })
                .on("mouseout", function(){ clearAdjacency(); });
            node.on("mouseover", function(d){ renderNodeInfo(d); tooltip.style('opacity', 0); });
            // Also bind on the group so labels trigger adjacency too
            nodeGroup.on("mouseover", function(d){ showAdjacency(d); })
                .on("mouseout", function(){ clearAdjacency(); });
            // Density always on; no hover gating
            // Add labels sparsely to reduce clutter (every ~50th node), and truncate long text
            nodeGroup
                .filter(function(d, i){ return i % 14 === 0; })
                .append("text")
                .attr("class", "node-label")
                .attr("dy", 4)
                .attr("text-anchor", "middle")
-                .text(d => d.name);
+                .text(function(d){
                    var s = d && d.name ? String(d.name) : '';
                    return s.length > 40 ? (s.slice(0, 40) + "…") : s;
                })
                .style("font-size", labelBaseSize + "px");
-            node.append("title").text(d => JSON.stringify(d));
+            function applyLabelSize() {
                var k = (currentTransform && currentTransform.k) || 1;
                // Keep labels readable across zoom levels and hide when too small
                labelBaseSize = Math.max(7, Math.min(18, 10 / Math.sqrt(k)));
                nodeGroup.select("text")
                    .style("font-size", labelBaseSize + "px")
                    .style("display", (k < 0.35 ? "none" : null));
            }
            // Density cloud computation (throttled)
            var densityTick = 0;
            var geoPath = d3.geoPath().projection(null);
            var MAX_POINTS_PER_GROUP = 400;
            function updateDensity() {
                try {
                    if (isInteracting) return; // skip during interaction for smoother UX
                    if (typeof d3 === 'undefined' || typeof d3.contourDensity !== 'function') {
                        return; // d3-contour not available; skip gracefully
                    }
                    if (!nodes || nodes.length === 0) return;
                    var usable = nodes.filter(function(d){ return d && typeof d.x === 'number' && isFinite(d.x) && typeof d.y === 'number' && isFinite(d.y); });
                    if (usable.length < 3) return; // not enough positioned points yet
                    var t = currentTransform || d3.zoomIdentity;
                    if (t.k && t.k < 0.08) {
                        // Skip density at extreme zoom-out to avoid numerical instability/perf issues
                        densityLayer.selectAll('*').remove();
                        return;
                    }
                    function hexToRgb(hex){
                        if (!hex) return {r: 0, g: 200, b: 255};
                        var c = hex.replace('#','');
                        if (c.length === 3) c = c.split('').map(function(x){ return x+x; }).join('');
                        var num = parseInt(c, 16);
                        return { r: (num >> 16) & 255, g: (num >> 8) & 255, b: num & 255 };
                    }
                    // Build groups across all nodes
                    var groups = {};
                    for (var i = 0; i < usable.length; i++) {
                        var k = getGroupKey(usable[i]);
                        if (!groups[k]) groups[k] = [];
                        groups[k].push(usable[i]);
                    }
                    densityLayer.selectAll('*').remove();
                    Object.keys(groups).forEach(function(key){
                        var arr = groups[key];
                        if (!arr || arr.length < 3) return;
                        // Transform positions into screen space and sample to cap cost
                        var arrT = [];
                        var step = Math.max(1, Math.floor(arr.length / MAX_POINTS_PER_GROUP));
                        for (var j = 0; j < arr.length; j += step) {
                            var nx = t.applyX(arr[j].x);
                            var ny = t.applyY(arr[j].y);
                            if (isFinite(nx) && isFinite(ny)) {
                                arrT.push({ x: nx, y: ny, type: arr[j].type, color: arr[j].color });
                            }
                        }
                        if (arrT.length < 3) return;
                        // Compute adaptive bandwidth based on group spread
                        var cx = 0, cy = 0;
                        for (var k = 0; k < arrT.length; k++){ cx += arrT[k].x; cy += arrT[k].y; }
                        cx /= arrT.length; cy /= arrT.length;
                        var sumR = 0;
                        for (var k2 = 0; k2 < arrT.length; k2++){
                            var dx = arrT[k2].x - cx, dy = arrT[k2].y - cy;
                            sumR += Math.sqrt(dx*dx + dy*dy);
                        }
                        var avgR = sumR / arrT.length;
                        var dynamicBandwidth = Math.max(12, Math.min(80, avgR));
                        var densityBandwidth = dynamicBandwidth / (t.k || 1);
                        var contours = d3.contourDensity()
                            .x(function(d){ return d.x; })
                            .y(function(d){ return d.y; })
                            .size([width, height])
                            .bandwidth(densityBandwidth)
                            .thresholds(8)
                            (arrT);
                        if (!contours || contours.length === 0) return;
                        var maxVal = d3.max(contours, function(d){ return d.value; }) || 1;
                        // Use the first node color in the group or fallback neon palette
                        var baseColor = (arr.find(function(d){ return d && d.color; }) || {}).color || '#00c8ff';
                        var rgb = hexToRgb(baseColor);
                        var g = densityLayer.append('g').attr('data-group', key);
                        g.selectAll('path')
                            .data(contours)
                            .enter()
                            .append('path')
                            .attr('d', geoPath)
                            .attr('fill', 'rgb(' + rgb.r + ',' + rgb.g + ',' + rgb.b + ')')
                            .attr('stroke', 'none')
                            .style('opacity', function(d){
                                var v = maxVal ? (d.value / maxVal) : 0;
                                var alpha = Math.pow(Math.max(0, Math.min(1, v)), 1.6); // accentuate clusters
                                return 0.65 * alpha; // up to 0.65 opacity at peak density
                            })
                            .style('filter', 'blur(2px)');
                    });
                } catch (e) {
                    // Reduce impact of any runtime errors during zoom
                    console.warn('Density update failed:', e);
                }
            }
            simulation.on("tick", function() {
                link.attr("x1", d => d.source.x)
@ -266,16 +618,29 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
                    .attr("y", d => d.y)
                    .attr("dy", 4)
                    .attr("text-anchor", "middle");
                densityTick += 1;
                if (densityTick % 24 === 0) updateDensity();
            });
-            svg.call(d3.zoom().on("zoom", function() {
+            var zoomBehavior = d3.zoom()
-                container.attr("transform", d3.event.transform);
+                .on("start", function(){ isInteracting = true; densityLayer.style("opacity", 0.2); })
-            }));
+                .on("zoom", function(){
                    currentTransform = d3.event.transform;
                    container.attr("transform", currentTransform);
                })
                .on("end", function(){
                    if (densityZoomTimer) clearTimeout(densityZoomTimer);
                    densityZoomTimer = setTimeout(function(){ isInteracting = false; densityLayer.style("opacity", 1); updateDensity(); }, 140);
                });
            svg.call(zoomBehavior);
            function dragstarted(d) {
                if (!d3.event.active) simulation.alphaTarget(0.3).restart();
                d.fx = d.x;
                d.fy = d.y;
                isInteracting = true;
                densityLayer.style("opacity", 0.2);
            }
            function dragged(d) {
@ -287,6 +652,8 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
                if (!d3.event.active) simulation.alphaTarget(0);
                d.fx = null;
                d.fy = null;
                if (densityZoomTimer) clearTimeout(densityZoomTimer);
                densityZoomTimer = setTimeout(function(){ isInteracting = false; densityLayer.style("opacity", 1); updateDensity(); }, 140);
            }
            window.addEventListener("resize", function() {
@ -295,7 +662,13 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
                svg.attr("width", width).attr("height", height);
                simulation.force("center", d3.forceCenter(width / 2, height / 2));
                simulation.alpha(1).restart();
                updateDensity();
                applyLabelSize();
            });
            // Initial density draw
            updateDensity();
            applyLabelSize();
        </script>
        <svg style="position: fixed; bottom: 10px; right: 10px; width: 150px; height: auto; z-index: 9999;" viewBox="0 0 158 44" fill="none" xmlns="http://www.w3.org/2000/svg">
@ -305,8 +678,12 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
    </html>
    """
-    html_content = html_template.replace("{nodes}", json.dumps(nodes_list))
+    # Safely embed JSON inside <script> by escaping </ to avoid prematurely closing the tag
-    html_content = html_content.replace("{links}", json.dumps(links_list))
+    def _safe_json_embed(obj):
        return json.dumps(obj).replace("</", "<\\/")
    html_content = html_template.replace("{nodes}", _safe_json_embed(nodes_list))
    html_content = html_content.replace("{links}", _safe_json_embed(links_list))
    if not destination_file_path:
        home_dir = os.path.expanduser("~")
--- a/cognee/shared/logging_utils.py
+++ b/cognee/shared/logging_utils.py
@ -1,6 +1,7 @@
 import os
 import sys
 import logging
 import tempfile
 import structlog
 import traceback
 import platform
@ -76,9 +77,38 @@ log_levels = {
 # Track if structlog logging has been configured
 _is_structlog_configured = False
-# Path to logs directory
+
-LOGS_DIR = Path(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "logs"))
+def resolve_logs_dir():
-LOGS_DIR.mkdir(exist_ok=True)  # Create logs dir if it doesn't exist
+    """Resolve a writable logs directory.
    Priority:
    1) BaseConfig.logs_root_directory (respects COGNEE_LOGS_DIR)
    2) /tmp/cognee_logs (default, best-effort create)
    Returns a Path or None if none are writable/creatable.
    """
    from cognee.base_config import get_base_config
    base_config = get_base_config()
    logs_root_directory = Path(base_config.logs_root_directory)
    try:
        logs_root_directory.mkdir(parents=True, exist_ok=True)
        if os.access(logs_root_directory, os.W_OK):
            return logs_root_directory
    except Exception:
        pass
    try:
        tmp_log_path = Path(os.path.join("/tmp", "cognee_logs"))
        tmp_log_path.mkdir(parents=True, exist_ok=True)
        if os.access(tmp_log_path, os.W_OK):
            return tmp_log_path
    except Exception:
        pass
    return None
 # Maximum number of log files to keep
 MAX_LOG_FILES = 10
@ -430,28 +460,38 @@ def setup_logging(log_level=None, name=None):
    stream_handler.setFormatter(console_formatter)
    stream_handler.setLevel(log_level)
    root_logger = logging.getLogger()
    if root_logger.hasHandlers():
        root_logger.handlers.clear()
    root_logger.addHandler(stream_handler)
    # Note: root logger needs to be set at NOTSET to allow all messages through and specific stream and file handlers
    # can define their own levels.
    root_logger.setLevel(logging.NOTSET)
    # Resolve logs directory with env and safe fallbacks
    logs_dir = resolve_logs_dir()
    # Check if we already have a log file path from the environment
    # NOTE: environment variable must be used here as it allows us to
    # log to a single file with a name based on a timestamp in a multiprocess setting.
    # Without it, we would have a separate log file for every process.
    log_file_path = os.environ.get("LOG_FILE_NAME")
-    if not log_file_path:
+    if not log_file_path and logs_dir is not None:
        # Create a new log file name with the cognee start time
        start_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-        log_file_path = os.path.join(LOGS_DIR, f"{start_time}.log")
+        log_file_path = str((logs_dir / f"{start_time}.log").resolve())
        os.environ["LOG_FILE_NAME"] = log_file_path
-    # Create a file handler that uses our custom PlainFileHandler
+    try:
-    file_handler = PlainFileHandler(log_file_path, encoding="utf-8")
+        # Create a file handler that uses our custom PlainFileHandler
-    file_handler.setLevel(DEBUG)
+        file_handler = PlainFileHandler(log_file_path, encoding="utf-8")
-
+        file_handler.setLevel(DEBUG)
-    # Configure root logger
+        root_logger.addHandler(file_handler)
-    root_logger = logging.getLogger()
+    except Exception as e:
-    if root_logger.hasHandlers():
+        # Note: Exceptions happen in case of read only file systems or log file path poiting to location where it does
-        root_logger.handlers.clear()
+        # not have write permission. Logging to file is not mandatory so we just log a warning to console.
-    root_logger.addHandler(stream_handler)
+        root_logger.warning(f"Warning: Could not create log file handler at {log_file_path}: {e}")
    root_logger.addHandler(file_handler)
    root_logger.setLevel(log_level)
    if log_level > logging.DEBUG:
        import warnings
@ -466,7 +506,8 @@ def setup_logging(log_level=None, name=None):
        )
    # Clean up old log files, keeping only the most recent ones
-    cleanup_old_logs(LOGS_DIR, MAX_LOG_FILES)
+    if logs_dir is not None:
        cleanup_old_logs(logs_dir, MAX_LOG_FILES)
    # Mark logging as configured
    _is_structlog_configured = True
@ -490,6 +531,10 @@ def setup_logging(log_level=None, name=None):
    # Get a configured logger and log system information
    logger = structlog.get_logger(name if name else __name__)
    if logs_dir is not None:
        logger.info(f"Log file created at: {log_file_path}", log_file=log_file_path)
    # Detailed initialization for regular usage
    logger.info(
        "Logging initialized",
--- a/cognee/shared/utils.py
+++ b/cognee/shared/utils.py
@ -11,8 +11,10 @@ import pathlib
 from uuid import uuid4, uuid5, NAMESPACE_OID
 from cognee.base_config import get_base_config
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.databases.graph import get_graph_engine
 logger = get_logger()
 # Analytics Proxy Url, currently hosted by Vercel
 proxy_url = "https://test.prometh.ai"
@ -38,16 +40,21 @@ def get_anonymous_id():
    home_dir = str(pathlib.Path(pathlib.Path(__file__).parent.parent.parent.resolve()))
-    if not os.path.isdir(home_dir):
+    try:
-        os.makedirs(home_dir, exist_ok=True)
+        if not os.path.isdir(home_dir):
-    anonymous_id_file = os.path.join(home_dir, ".anon_id")
+            os.makedirs(home_dir, exist_ok=True)
-    if not os.path.isfile(anonymous_id_file):
+        anonymous_id_file = os.path.join(home_dir, ".anon_id")
-        anonymous_id = str(uuid4())
+        if not os.path.isfile(anonymous_id_file):
-        with open(anonymous_id_file, "w", encoding="utf-8") as f:
+            anonymous_id = str(uuid4())
-            f.write(anonymous_id)
+            with open(anonymous_id_file, "w", encoding="utf-8") as f:
-    else:
+                f.write(anonymous_id)
-        with open(anonymous_id_file, "r", encoding="utf-8") as f:
+        else:
-            anonymous_id = f.read()
+            with open(anonymous_id_file, "r", encoding="utf-8") as f:
                anonymous_id = f.read()
    except Exception as e:
        # In case of read-only filesystem or other issues
        logger.warning("Could not create or read anonymous id file: %s", e)
        return "unknown-anonymous-id"
    return anonymous_id
--- a/cognee/tasks/cleanup/cleanup_unused_data.py
+++ b/cognee/tasks/cleanup/cleanup_unused_data.py
@ -0,0 +1,336 @@
 """    
 Task for automatically deleting unused data from the memify pipeline.    
 This task identifies and removes data (chunks, entities, summaries) that hasn't    
 been accessed by retrievers for a specified period, helping maintain system    
 efficiency and storage optimization.    
 """    
 import json    
 from datetime import datetime, timezone, timedelta    
 from typing import Optional, Dict, Any    
 from uuid import UUID    
 from cognee.infrastructure.databases.graph import get_graph_engine    
 from cognee.infrastructure.databases.vector import get_vector_engine    
 from cognee.infrastructure.databases.relational import get_relational_engine  
 from cognee.modules.data.models import Data, DatasetData  
 from cognee.shared.logging_utils import get_logger    
 from sqlalchemy import select, or_  
 import cognee  
 logger = get_logger(__name__)    
 async def cleanup_unused_data(    
    days_threshold: Optional[int],    
    dry_run: bool = True,    
    user_id: Optional[UUID] = None,  
    text_doc: bool = False  
 ) -> Dict[str, Any]:    
    """    
    Identify and remove unused data from the memify pipeline.    
    Parameters    
    ----------    
    days_threshold : int    
        days since last access to consider data unused     
    dry_run : bool    
        If True, only report what would be deleted without actually deleting (default: True)    
    user_id : UUID, optional    
        Limit cleanup to specific user's data (default: None)  
    text_doc : bool  
        If True, use SQL-based filtering to find unused TextDocuments and call cognee.delete()  
        for proper whole-document deletion (default: False)  
    Returns    
    -------    
    Dict[str, Any]    
        Cleanup results with status, counts, and timestamp    
    """    
    logger.info(    
        "Starting cleanup task",    
        days_threshold=days_threshold,    
        dry_run=dry_run,    
        user_id=str(user_id) if user_id else None,  
        text_doc=text_doc  
    )    
    # Calculate cutoff timestamp  
    cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_threshold)
    if text_doc:  
        # SQL-based approach: Find unused TextDocuments and use cognee.delete()  
        return await _cleanup_via_sql(cutoff_date, dry_run, user_id)  
    else:  
        # Graph-based approach: Find unused nodes directly from graph  
        cutoff_timestamp_ms = int(cutoff_date.timestamp() * 1000)  
        logger.debug(f"Cutoff timestamp: {cutoff_date.isoformat()} ({cutoff_timestamp_ms}ms)")    
        # Find unused nodes    
        unused_nodes = await _find_unused_nodes(cutoff_timestamp_ms, user_id)    
        total_unused = sum(len(nodes) for nodes in unused_nodes.values())    
        logger.info(f"Found {total_unused} unused nodes", unused_nodes={k: len(v) for k, v in unused_nodes.items()})    
        if dry_run:    
            return {    
                "status": "dry_run",    
                "unused_count": total_unused,    
                "deleted_count": {    
                    "data_items": 0,    
                    "chunks": 0,    
                    "entities": 0,    
                    "summaries": 0,    
                    "associations": 0    
                },    
                "cleanup_date": datetime.now(timezone.utc).isoformat(),    
                "preview": {    
                    "chunks": len(unused_nodes["DocumentChunk"]),    
                    "entities": len(unused_nodes["Entity"]),    
                    "summaries": len(unused_nodes["TextSummary"])    
                }    
            }    
        # Delete unused nodes    
        deleted_counts = await _delete_unused_nodes(unused_nodes)    
        logger.info("Cleanup completed", deleted_counts=deleted_counts)    
        return {    
            "status": "completed",    
            "unused_count": total_unused,    
            "deleted_count": {    
                "data_items": 0,    
                "chunks": deleted_counts["DocumentChunk"],    
                "entities": deleted_counts["Entity"],    
                "summaries": deleted_counts["TextSummary"],    
                "associations": deleted_counts["associations"]    
            },    
            "cleanup_date": datetime.now(timezone.utc).isoformat()    
        }  
 async def _cleanup_via_sql(  
    cutoff_date: datetime,  
    dry_run: bool,  
    user_id: Optional[UUID] = None  
 ) -> Dict[str, Any]:  
    """  
    SQL-based cleanup: Query Data table for unused documents and use cognee.delete().  
    Parameters  
    ----------  
    cutoff_date : datetime  
        Cutoff date for last_accessed filtering  
    dry_run : bool  
        If True, only report what would be deleted  
    user_id : UUID, optional  
        Filter by user ID if provided  
    Returns  
    -------  
    Dict[str, Any]  
        Cleanup results  
    """  
    db_engine = get_relational_engine()  
    async with db_engine.get_async_session() as session:  
        # Query for Data records with old last_accessed timestamps  
        query = select(Data, DatasetData).join(  
            DatasetData, Data.id == DatasetData.data_id  
        ).where(  
            or_(  
                Data.last_accessed < cutoff_date,  
                Data.last_accessed.is_(None)  
            )  
        )  
        if user_id:  
            from cognee.modules.data.models import Dataset  
            query = query.join(Dataset, DatasetData.dataset_id == Dataset.id).where(  
                Dataset.owner_id == user_id  
            )  
        result = await session.execute(query)  
        unused_data = result.all()  
    logger.info(f"Found {len(unused_data)} unused documents in SQL")  
    if dry_run:  
        return {  
            "status": "dry_run",  
            "unused_count": len(unused_data),  
            "deleted_count": {  
                "data_items": 0,  
                "documents": 0  
            },  
            "cleanup_date": datetime.now(timezone.utc).isoformat(),  
            "preview": {  
                "documents": len(unused_data)  
            }  
        }  
    # Delete each document using cognee.delete()  
    deleted_count = 0  
    from cognee.modules.users.methods import get_default_user  
    user = await get_default_user() if user_id is None else None  
    for data, dataset_data in unused_data:  
        try:  
            await cognee.delete(  
                data_id=data.id,  
                dataset_id=dataset_data.dataset_id,  
                mode="hard",  # Use hard mode to also remove orphaned entities  
                user=user  
            )  
            deleted_count += 1  
            logger.info(f"Deleted document {data.id} from dataset {dataset_data.dataset_id}")  
        except Exception as e:  
            logger.error(f"Failed to delete document {data.id}: {e}")  
    logger.info("Cleanup completed", deleted_count=deleted_count)  
    return {  
        "status": "completed",  
        "unused_count": len(unused_data),  
        "deleted_count": {  
            "data_items": deleted_count,  
            "documents": deleted_count  
        },  
        "cleanup_date": datetime.now(timezone.utc).isoformat()  
    }  
 async def _find_unused_nodes(    
    cutoff_timestamp_ms: int,    
    user_id: Optional[UUID] = None    
 ) -> Dict[str, list]:    
    """    
    Query Kuzu for nodes with old last_accessed_at timestamps.    
    Parameters    
    ----------    
    cutoff_timestamp_ms : int    
        Cutoff timestamp in milliseconds since epoch    
    user_id : UUID, optional    
        Filter by user ID if provided    
    Returns    
    -------    
    Dict[str, list]    
        Dictionary mapping node types to lists of unused node IDs    
    """    
    graph_engine = await get_graph_engine()    
    # Query all nodes with their properties    
    query = "MATCH (n:Node) RETURN n.id, n.type, n.properties"    
    results = await graph_engine.query(query)    
    unused_nodes = {    
        "DocumentChunk": [],    
        "Entity": [],    
        "TextSummary": []    
    }    
    for node_id, node_type, props_json in results:    
        # Only process tracked node types    
        if node_type not in unused_nodes:    
            continue    
        # Parse properties JSON    
        if props_json:    
            try:    
                props = json.loads(props_json)    
                last_accessed = props.get("last_accessed_at")    
                # Check if node is unused (never accessed or accessed before cutoff)    
                if last_accessed is None or last_accessed < cutoff_timestamp_ms:    
                    unused_nodes[node_type].append(node_id)    
                    logger.debug(    
                        f"Found unused {node_type}",    
                        node_id=node_id,    
                        last_accessed=last_accessed    
                    )    
            except json.JSONDecodeError:    
                logger.warning(f"Failed to parse properties for node {node_id}")    
                continue    
    return unused_nodes    
 async def _delete_unused_nodes(unused_nodes: Dict[str, list]) -> Dict[str, int]:    
    """    
    Delete unused nodes from graph and vector databases.    
    Parameters    
    ----------    
    unused_nodes : Dict[str, list]    
        Dictionary mapping node types to lists of node IDs to delete    
    Returns    
    -------    
    Dict[str, int]    
        Count of deleted items by type    
    """    
    graph_engine = await get_graph_engine()    
    vector_engine = get_vector_engine()    
    deleted_counts = {    
        "DocumentChunk": 0,    
        "Entity": 0,    
        "TextSummary": 0,    
        "associations": 0    
    }    
    # Count associations before deletion    
    for node_type, node_ids in unused_nodes.items():    
        if not node_ids:    
            continue    
        # Count edges connected to these nodes    
        for node_id in node_ids:    
            result = await graph_engine.query(    
                "MATCH (n:Node {id: $id})-[r:EDGE]-() RETURN count(r)",    
                {"id": node_id}    
            )    
            if result and len(result) > 0:    
                deleted_counts["associations"] += result[0][0]    
    # Delete from graph database (uses DETACH DELETE, so edges are automatically removed)    
    for node_type, node_ids in unused_nodes.items():    
        if not node_ids:    
            continue    
        logger.info(f"Deleting {len(node_ids)} {node_type} nodes from graph database")    
        # Delete nodes in batches    
        await graph_engine.delete_nodes(node_ids)    
        deleted_counts[node_type] = len(node_ids)    
    # Delete from vector database    
    vector_collections = {    
        "DocumentChunk": "DocumentChunk_text",    
        "Entity": "Entity_name",    
        "TextSummary": "TextSummary_text"    
    }    
    for node_type, collection_name in vector_collections.items():    
        node_ids = unused_nodes[node_type]    
        if not node_ids:    
            continue    
        logger.info(f"Deleting {len(node_ids)} {node_type} embeddings from vector database")    
        try:    
            # Delete from vector collection    
            if await vector_engine.has_collection(collection_name):    
                for node_id in node_ids:    
                    try:    
                        await vector_engine.delete(collection_name, {"id": str(node_id)})    
                    except Exception as e:    
                        logger.warning(f"Failed to delete {node_id} from {collection_name}: {e}")    
        except Exception as e:    
            logger.error(f"Error deleting from vector collection {collection_name}: {e}")    
    return deleted_counts
--- a/cognee/tasks/ingestion/migrate_relational_database.py
+++ b/cognee/tasks/ingestion/migrate_relational_database.py
@ -239,7 +239,7 @@ async def complete_database_ingestion(schema, migrate_column_data):
                            id=uuid5(NAMESPACE_OID, name=column_node_id),
                            name=column_node_id,
                            properties=f"{key} {value} {table_name}",
-                            description=f"column from relational database table={table_name}. Column name={key} and value={value}. The value of the column is related to the following row with this id: {row_node.id}. This column has the following ID: {column_node_id}",
+                            description=f"column from relational database table={table_name}. Column name={key} and value={value}. This column has the following ID: {column_node_id}",
                        )
                        node_mapping[column_node_id] = column_node
--- a/cognee/tasks/summarization/models.py
+++ b/cognee/tasks/summarization/models.py
@ -1,5 +1,7 @@
 from typing import Union
 from pydantic import BaseModel, Field
 from typing import Union
 from datetime import datetime, timezone  
 from cognee.infrastructure.engine import DataPoint
 from cognee.modules.chunking.models import DocumentChunk
 from cognee.shared.CodeGraphEntities import CodeFile, CodePart
@ -17,7 +19,6 @@ class TextSummary(DataPoint):
    text: str
    made_from: DocumentChunk
    metadata: dict = {"index_fields": ["text"]}
--- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py
+++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py
@ -489,3 +489,154 @@ def test_get_ontology_resolver_from_env_resolver_functionality():
    assert nodes == []
    assert relationships == []
    assert start_node is None
 def test_multifile_ontology_loading_success():
    """Test successful loading of multiple ontology files."""
    ns1 = Namespace("http://example.org/cars#")
    ns2 = Namespace("http://example.org/tech#")
    g1 = Graph()
    g1.add((ns1.Vehicle, RDF.type, OWL.Class))
    g1.add((ns1.Car, RDF.type, OWL.Class))
    g1.add((ns1.Car, RDFS.subClassOf, ns1.Vehicle))
    g1.add((ns1.Audi, RDF.type, ns1.Car))
    g1.add((ns1.BMW, RDF.type, ns1.Car))
    g2 = Graph()
    g2.add((ns2.Company, RDF.type, OWL.Class))
    g2.add((ns2.TechCompany, RDF.type, OWL.Class))
    g2.add((ns2.TechCompany, RDFS.subClassOf, ns2.Company))
    g2.add((ns2.Apple, RDF.type, ns2.TechCompany))
    g2.add((ns2.Google, RDF.type, ns2.TechCompany))
    import tempfile
    with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f1:
        g1.serialize(f1.name, format="xml")
        file1_path = f1.name
    with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f2:
        g2.serialize(f2.name, format="xml")
        file2_path = f2.name
    try:
        resolver = RDFLibOntologyResolver(ontology_file=[file1_path, file2_path])
        assert resolver.graph is not None
        assert "car" in resolver.lookup["classes"]
        assert "vehicle" in resolver.lookup["classes"]
        assert "company" in resolver.lookup["classes"]
        assert "techcompany" in resolver.lookup["classes"]
        assert "audi" in resolver.lookup["individuals"]
        assert "bmw" in resolver.lookup["individuals"]
        assert "apple" in resolver.lookup["individuals"]
        assert "google" in resolver.lookup["individuals"]
        car_match = resolver.find_closest_match("Audi", "individuals")
        assert car_match == "audi"
        tech_match = resolver.find_closest_match("Google", "individuals")
        assert tech_match == "google"
    finally:
        import os
        os.unlink(file1_path)
        os.unlink(file2_path)
 def test_multifile_ontology_with_missing_files():
    """Test loading multiple ontology files where some don't exist."""
    ns = Namespace("http://example.org/test#")
    g = Graph()
    g.add((ns.Car, RDF.type, OWL.Class))
    g.add((ns.Audi, RDF.type, ns.Car))
    import tempfile
    with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f:
        g.serialize(f.name, format="xml")
        valid_file = f.name
    try:
        resolver = RDFLibOntologyResolver(
            ontology_file=["nonexistent_file_1.owl", valid_file, "nonexistent_file_2.owl"]
        )
        assert resolver.graph is not None
        assert "car" in resolver.lookup["classes"]
        assert "audi" in resolver.lookup["individuals"]
        match = resolver.find_closest_match("Audi", "individuals")
        assert match == "audi"
    finally:
        import os
        os.unlink(valid_file)
 def test_multifile_ontology_all_files_missing():
    """Test loading multiple ontology files where all files are missing."""
    resolver = RDFLibOntologyResolver(
        ontology_file=["nonexistent_file_1.owl", "nonexistent_file_2.owl", "nonexistent_file_3.owl"]
    )
    assert resolver.graph is None
    assert resolver.lookup["classes"] == {}
    assert resolver.lookup["individuals"] == {}
 def test_multifile_ontology_with_overlapping_entities():
    """Test loading multiple ontology files with overlapping/related entities."""
    ns = Namespace("http://example.org/automotive#")
    g1 = Graph()
    g1.add((ns.Vehicle, RDF.type, OWL.Class))
    g1.add((ns.Car, RDF.type, OWL.Class))
    g1.add((ns.Car, RDFS.subClassOf, ns.Vehicle))
    g2 = Graph()
    g2.add((ns.LuxuryCar, RDF.type, OWL.Class))
    g2.add((ns.LuxuryCar, RDFS.subClassOf, ns.Car))
    g2.add((ns.Mercedes, RDF.type, ns.LuxuryCar))
    g2.add((ns.BMW, RDF.type, ns.LuxuryCar))
    import tempfile
    with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f1:
        g1.serialize(f1.name, format="xml")
        file1_path = f1.name
    with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f2:
        g2.serialize(f2.name, format="xml")
        file2_path = f2.name
    try:
        resolver = RDFLibOntologyResolver(ontology_file=[file1_path, file2_path])
        assert "vehicle" in resolver.lookup["classes"]
        assert "car" in resolver.lookup["classes"]
        assert "luxurycar" in resolver.lookup["classes"]
        assert "mercedes" in resolver.lookup["individuals"]
        assert "bmw" in resolver.lookup["individuals"]
        nodes, relationships, start_node = resolver.get_subgraph("Mercedes", "individuals")
        uri_labels = {resolver._uri_to_key(n.uri) for n in nodes}
        assert "mercedes" in uri_labels
        assert "luxurycar" in uri_labels
        assert "car" in uri_labels
        assert "vehicle" in uri_labels
    finally:
        import os
        os.unlink(file1_path)
        os.unlink(file2_path)
--- a/examples/python/temporal_example.py
+++ b/examples/python/temporal_example.py
@ -77,6 +77,7 @@ async def main():
        "What happened between 2000 and 2006?",
        "What happened between 1903 and 1995, I am interested in the Selected Works of Arnulf Øverland Ole Peter Arnulf Øverland?",
        "Who is Attaphol Buspakom Attaphol Buspakom?",
        "Who was Arnulf Øverland?",
    ]
    for query_text in queries:
--- a/poetry.lock
+++ b/poetry.lock
@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
 [[package]]
 name = "accelerate"
@ -2543,7 +2543,6 @@ files = [
    {file = "fastuuid-0.12.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9b31dd488d0778c36f8279b306dc92a42f16904cba54acca71e107d65b60b0c"},
    {file = "fastuuid-0.12.0-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:b19361ee649365eefc717ec08005972d3d1eb9ee39908022d98e3bfa9da59e37"},
    {file = "fastuuid-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:8fc66b11423e6f3e1937385f655bedd67aebe56a3dcec0cb835351cfe7d358c9"},
    {file = "fastuuid-0.12.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:2925f67b88d47cb16aa3eb1ab20fdcf21b94d74490e0818c91ea41434b987493"},
    {file = "fastuuid-0.12.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7b15c54d300279ab20a9cc0579ada9c9f80d1bc92997fc61fb7bf3103d7cb26b"},
    {file = "fastuuid-0.12.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:458f1bc3ebbd76fdb89ad83e6b81ccd3b2a99fa6707cd3650b27606745cfb170"},
    {file = "fastuuid-0.12.0-cp38-cp38-manylinux_2_34_x86_64.whl", hash = "sha256:a8f0f83fbba6dc44271a11b22e15838641b8c45612cdf541b4822a5930f6893c"},
@ -4170,8 +4169,6 @@ groups = ["main"]
 markers = "extra == \"dlt\""
 files = [
    {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
    {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
    {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
 ]
 [package.dependencies]
@ -8593,7 +8590,6 @@ files = [
    {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
    {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
    {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
    {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
    {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
    {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
    {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,7 +1,7 @@
 [project]
 name = "cognee"
-version = "0.3.7"
+version = "0.3.9"
 description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
 authors = [
    { name = "Vasilije Markovic" },
--- a/uv.lock
+++ b/uv.lock