Merge pull request #4 from chinu0609/delete-last-acessed

Delete last acessed
2025-10-31 00:25:33 +05:30 · 2025-10-31 00:25:33 +05:30 · 4b43afcdab
commit 4b43afcdab
parent 464d35c8d4 6f06e4a5eb
7 changed files with 84 additions and 13 deletions
--- a/cognee/infrastructure/engine/models/DataPoint.py
+++ b/cognee/infrastructure/engine/models/DataPoint.py
@ -43,6 +43,9 @@ class DataPoint(BaseModel):
    updated_at: int = Field(
        default_factory=lambda: int(datetime.now(timezone.utc).timestamp() * 1000)
    )
    last_accessed_at: int = Field(  
        default_factory=lambda: int(datetime.now(timezone.utc).timestamp() * 1000)  
    )
    ontology_valid: bool = False
    version: int = 1  # Default version
    topological_rank: Optional[int] = 0
--- a/cognee/modules/chunking/models/DocumentChunk.py
+++ b/cognee/modules/chunking/models/DocumentChunk.py
@ -1,5 +1,7 @@
 from typing import List, Union
 from pydantic import BaseModel, Field
 from datetime import datetime, timezone  
 from cognee.infrastructure.engine import DataPoint
 from cognee.modules.data.processing.document_types import Document
 from cognee.modules.engine.models import Entity
@ -22,6 +24,7 @@ class DocumentChunk(DataPoint):
    - cut_type: The type of cut that defined this chunk.
    - is_part_of: The document to which this chunk belongs.
    - contains: A list of entities or events contained within the chunk (default is None).
    - last_accessed_at: The timestamp of the last time the chunk was accessed.
    - metadata: A dictionary to hold meta information related to the chunk, including index
    fields.
    """
@ -32,5 +35,4 @@ class DocumentChunk(DataPoint):
    cut_type: str
    is_part_of: Document
    contains: List[Union[Entity, Event]] = None
    metadata: dict = {"index_fields": ["text"]}
--- a/cognee/modules/engine/models/Entity.py
+++ b/cognee/modules/engine/models/Entity.py
@ -1,11 +1,11 @@
 from cognee.infrastructure.engine import DataPoint
 from cognee.modules.engine.models.EntityType import EntityType
 from typing import Optional
-
+from datetime import datetime, timezone  
 from pydantic import BaseModel, Field
 class Entity(DataPoint):
    name: str
    is_a: Optional[EntityType] = None
    description: str
    metadata: dict = {"index_fields": ["name"]}
--- a/cognee/modules/retrieval/chunks_retriever.py
+++ b/cognee/modules/retrieval/chunks_retriever.py
@ -1,10 +1,11 @@
 from typing import Any, Optional
-
+from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.modules.retrieval.base_retriever import BaseRetriever
 from cognee.modules.retrieval.exceptions.exceptions import NoDataError
 from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
 from datetime import datetime, timezone  
 logger = get_logger("ChunksRetriever")
@ -27,21 +28,16 @@ class ChunksRetriever(BaseRetriever):
    ):
        self.top_k = top_k
-    async def get_context(self, query: str) -> Any:
+    async def get_context(self, query: str) -> Any:  
        """
        Retrieves document chunks context based on the query.
        Searches for document chunks relevant to the specified query using a vector engine.
        Raises a NoDataError if no data is found in the system.
        Parameters:
        -----------
            - query (str): The query string to search for relevant document chunks.
        Returns:
        --------
            - Any: A list of document chunk payloads retrieved from the search.
        """
        logger.info(
@ -53,13 +49,14 @@ class ChunksRetriever(BaseRetriever):
        try:
            found_chunks = await vector_engine.search("DocumentChunk_text", query, limit=self.top_k)
            logger.info(f"Found {len(found_chunks)} chunks from vector search")
            await update_node_access_timestamps(found_chunks)
        except CollectionNotFoundError as error:
            logger.error("DocumentChunk_text collection not found in vector database")
            raise NoDataError("No data found in the system, please add data first.") from error
        chunk_payloads = [result.payload for result in found_chunks]
        logger.info(f"Returning {len(chunk_payloads)} chunk payloads")
        return chunk_payloads
    async def get_completion(
        self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None
--- a/cognee/modules/retrieval/summaries_retriever.py
+++ b/cognee/modules/retrieval/summaries_retriever.py
@ -4,6 +4,7 @@ from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.modules.retrieval.base_retriever import BaseRetriever
 from cognee.modules.retrieval.exceptions.exceptions import NoDataError
 from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
 from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
 logger = get_logger("SummariesRetriever")
@ -54,6 +55,9 @@ class SummariesRetriever(BaseRetriever):
                "TextSummary_text", query, limit=self.top_k
            )
            logger.info(f"Found {len(summaries_results)} summaries from vector search")
            await update_node_access_timestamps(summaries_results)
        except CollectionNotFoundError as error:
            logger.error("TextSummary_text collection not found in vector database")
            raise NoDataError("No data found in the system, please add data first.") from error
--- a/cognee/modules/retrieval/utils/access_tracking.py
+++ b/cognee/modules/retrieval/utils/access_tracking.py
@ -0,0 +1,64 @@
 """Utilities for tracking data access in retrievers."""  
 import json  
 from datetime import datetime, timezone  
 from typing import List, Any  
 from cognee.infrastructure.databases.graph import get_graph_engine  
 from cognee.shared.logging_utils import get_logger  
 logger = get_logger(__name__)  
 async def update_node_access_timestamps(items: List[Any]):  
    """  
    Update last_accessed_at for nodes in Kuzu graph database.  
    Automatically determines node type from the graph database.  
    Parameters  
    ----------  
    items : List[Any]  
        List of items with payload containing 'id' field (from vector search results)  
    """  
    if not items:  
        return  
    graph_engine = await get_graph_engine()  
    timestamp_ms = int(datetime.now(timezone.utc).timestamp() * 1000)  
    for item in items:  
        # Extract ID from payload  
        item_id = item.payload.get("id") if hasattr(item, 'payload') else item.get("id")  
        if not item_id:  
            continue  
        # try:  
        # Query to get both node type and properties in one call  
        result = await graph_engine.query(  
            "MATCH (n:Node {id: $id}) RETURN n.type as node_type, n.properties as props",  
            {"id": str(item_id)}  
        )  
        if result and len(result) > 0 and result[0]:  
            node_type = result[0][0]  # First column: node_type  
            props_json = result[0][1]  # Second column: properties  
            # Parse existing properties JSON  
            props = json.loads(props_json) if props_json else {}  
            # Update last_accessed_at with millisecond timestamp  
            props["last_accessed_at"] = timestamp_ms  
            # Write back to graph database  
            await graph_engine.query(  
                "MATCH (n:Node {id: $id}) SET n.properties = $props",  
                {"id": str(item_id), "props": json.dumps(props)}  
            )  
            logger.debug(f"Updated access timestamp for {node_type} node {item_id}")  
        # except Exception as e:  
        #     logger.error(f"Failed to update timestamp for node {item_id}: {e}")  
        #     continue  
    logger.debug(f"Updated access timestamps for {len(items)} nodes")
--- a/cognee/tasks/summarization/models.py
+++ b/cognee/tasks/summarization/models.py
@ -1,5 +1,7 @@
 from typing import Union
 from pydantic import BaseModel, Field
 from typing import Union
 from datetime import datetime, timezone  
 from cognee.infrastructure.engine import DataPoint
 from cognee.modules.chunking.models import DocumentChunk
 from cognee.shared.CodeGraphEntities import CodeFile, CodePart
@ -17,7 +19,6 @@ class TextSummary(DataPoint):
    text: str
    made_from: DocumentChunk
    metadata: dict = {"index_fields": ["text"]}