fix: flag to enable and disable last_accessed
This commit is contained in:
parent
53d3b50f93
commit
b52c1a1e25
3 changed files with 90 additions and 45 deletions
|
|
@ -5,6 +5,7 @@ Revises: 211ab850ef3d
|
||||||
Create Date: 2025-11-04 21:45:52.642322
|
Create Date: 2025-11-04 21:45:52.642322
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
import os
|
||||||
from typing import Sequence, Union
|
from typing import Sequence, Union
|
||||||
|
|
||||||
from alembic import op
|
from alembic import op
|
||||||
|
|
@ -17,6 +18,7 @@ down_revision: Union[str, None] = '211ab850ef3d'
|
||||||
branch_labels: Union[str, Sequence[str], None] = None
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
depends_on: Union[str, Sequence[str], None] = None
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
def _get_column(inspector, table, name, schema=None):
|
def _get_column(inspector, table, name, schema=None):
|
||||||
for col in inspector.get_columns(table, schema=schema):
|
for col in inspector.get_columns(table, schema=schema):
|
||||||
if col["name"] == name:
|
if col["name"] == name:
|
||||||
|
|
@ -30,11 +32,15 @@ def upgrade() -> None:
|
||||||
|
|
||||||
last_accessed_column = _get_column(insp, "data", "last_accessed")
|
last_accessed_column = _get_column(insp, "data", "last_accessed")
|
||||||
if not last_accessed_column:
|
if not last_accessed_column:
|
||||||
|
# Always create the column for schema consistency
|
||||||
op.add_column('data',
|
op.add_column('data',
|
||||||
sa.Column('last_accessed', sa.DateTime(timezone=True), nullable=True)
|
sa.Column('last_accessed', sa.DateTime(timezone=True), nullable=True)
|
||||||
)
|
)
|
||||||
# Optionally initialize with created_at values for existing records
|
|
||||||
op.execute("UPDATE data SET last_accessed = CURRENT_TIMESTAMP")
|
# Only initialize existing records if feature is enabled
|
||||||
|
enable_last_accessed = os.getenv("ENABLE_LAST_ACCESSED", "false").lower() == "true"
|
||||||
|
if enable_last_accessed:
|
||||||
|
op.execute("UPDATE data SET last_accessed = CURRENT_TIMESTAMP")
|
||||||
|
|
||||||
|
|
||||||
def downgrade() -> None:
|
def downgrade() -> None:
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ import json
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import List, Any
|
from typing import List, Any
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
import os
|
||||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
from cognee.modules.data.models import Data
|
from cognee.modules.data.models import Data
|
||||||
|
|
@ -28,6 +28,9 @@ async def update_node_access_timestamps(items: List[Any]):
|
||||||
items : List[Any]
|
items : List[Any]
|
||||||
List of items with payload containing 'id' field (from vector search results)
|
List of items with payload containing 'id' field (from vector search results)
|
||||||
"""
|
"""
|
||||||
|
if os.getenv("ENABLE_LAST_ACCESSED", "false").lower() != "true":
|
||||||
|
return
|
||||||
|
|
||||||
if not items:
|
if not items:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ import json
|
||||||
from datetime import datetime, timezone, timedelta
|
from datetime import datetime, timezone, timedelta
|
||||||
from typing import Optional, Dict, Any
|
from typing import Optional, Dict, Any
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
import os
|
||||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
|
@ -48,6 +48,42 @@ async def cleanup_unused_data(
|
||||||
Dict[str, Any]
|
Dict[str, Any]
|
||||||
Cleanup results with status, counts, and timestamp
|
Cleanup results with status, counts, and timestamp
|
||||||
"""
|
"""
|
||||||
|
# Check 1: Environment variable must be enabled
|
||||||
|
if os.getenv("ENABLE_LAST_ACCESSED", "false").lower() != "true":
|
||||||
|
logger.warning(
|
||||||
|
"Cleanup skipped: ENABLE_LAST_ACCESSED is not enabled."
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"status": "skipped",
|
||||||
|
"reason": "ENABLE_LAST_ACCESSED not enabled",
|
||||||
|
"unused_count": 0,
|
||||||
|
"deleted_count": {},
|
||||||
|
"cleanup_date": datetime.now(timezone.utc).isoformat()
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check 2: Verify tracking has actually been running
|
||||||
|
db_engine = get_relational_engine()
|
||||||
|
async with db_engine.get_async_session() as session:
|
||||||
|
# Count records with non-NULL last_accessed
|
||||||
|
tracked_count = await session.execute(
|
||||||
|
select(sa.func.count(Data.id)).where(Data.last_accessed.isnot(None))
|
||||||
|
)
|
||||||
|
tracked_records = tracked_count.scalar()
|
||||||
|
|
||||||
|
if tracked_records == 0:
|
||||||
|
logger.warning(
|
||||||
|
"Cleanup skipped: No records have been tracked yet. "
|
||||||
|
"ENABLE_LAST_ACCESSED may have been recently enabled. "
|
||||||
|
"Wait for retrievers to update timestamps before running cleanup."
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"status": "skipped",
|
||||||
|
"reason": "No tracked records found - tracking may be newly enabled",
|
||||||
|
"unused_count": 0,
|
||||||
|
"deleted_count": {},
|
||||||
|
"cleanup_date": datetime.now(timezone.utc).isoformat()
|
||||||
|
}
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Starting cleanup task",
|
"Starting cleanup task",
|
||||||
days_threshold=days_threshold,
|
days_threshold=days_threshold,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue