feat: Add logging to file [COG-1715] (#672)

<!-- .github/pull_request_template.md -->

## Description
Add logging to logs file

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin
This commit is contained in:
Igor Ilic 2025-03-28 16:13:56 +01:00 committed by GitHub
parent de5b7f2044
commit 2611d89094
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 187 additions and 8 deletions

7
.gitignore vendored
View file

@ -73,6 +73,11 @@ local_settings.py
db.sqlite3
db.sqlite3-journal
# Cognee logs directory - keep directory, ignore contents
logs/*
!logs/.gitkeep
!logs/README.md
# Flask stuff:
instance/
.webassets-cache
@ -188,4 +193,4 @@ node_modules/
SWE-bench_testsample/
# ChromaDB Data
.chromadb_data/
.chromadb_data/

View file

@ -1,6 +1,11 @@
import sys
import os
import threading
import logging
import structlog
import traceback
from datetime import datetime
from pathlib import Path
# Export common log levels
DEBUG = logging.DEBUG
@ -12,6 +17,96 @@ CRITICAL = logging.CRITICAL
# Track if logging has been configured
_is_configured = False
# Create a lock for thread-safe initialization
_setup_lock = threading.Lock()
# Path to logs directory
LOGS_DIR = Path(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "logs"))
LOGS_DIR.mkdir(exist_ok=True) # Create logs dir if it doesn't exist
# Maximum number of log files to keep
MAX_LOG_FILES = 10
class PlainFileHandler(logging.FileHandler):
"""A custom file handler that writes simpler plain text log entries."""
def emit(self, record):
try:
# Check if stream is available before trying to write
if self.stream is None:
self.stream = self._open()
# Extract the message from the structlog record
if isinstance(record.msg, dict) and "event" in record.msg:
# Extract the basic message
message = record.msg.get("event", "")
# Extract additional context
context = {
k: v
for k, v in record.msg.items()
if k not in ("event", "logger", "level", "timestamp")
}
# Format context if present
context_str = ""
if context:
context_str = " " + " ".join(
f"{k}={v}" for k, v in context.items() if k != "exc_info"
)
# Get the logger name from the record or from the structlog context
logger_name = record.msg.get("logger", record.name)
# Format timestamp
timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ")
# Create the log entry
log_entry = f"{timestamp} [{record.levelname.ljust(8)}] {message}{context_str} [{logger_name}]\n"
# Write to file
self.stream.write(log_entry)
self.flush()
# Handle exception if present
# Check both record.exc_info and the 'exc_info' in the message dict
record_has_exc = record.exc_info and record.exc_info != (None, None, None)
msg_has_exc = "exc_info" in record.msg and record.msg["exc_info"]
if record_has_exc:
# Use the exception info from the record
tb_str = "".join(traceback.format_exception(*record.exc_info))
self.stream.write(tb_str + "\n")
self.flush()
elif msg_has_exc and isinstance(record.msg["exc_info"], tuple):
# Use the exception info from the message
tb_str = "".join(traceback.format_exception(*record.msg["exc_info"]))
self.stream.write(tb_str + "\n")
self.flush()
elif msg_has_exc and hasattr(record.msg["exc_info"], "__traceback__"):
# Handle exceptions that are passed directly
exc = record.msg["exc_info"]
tb_str = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__))
self.stream.write(tb_str + "\n")
self.flush()
else:
# Fall back to standard handling for non-structlog messages
msg = self.format(record)
self.stream.write(msg + self.terminator)
self.flush()
# Handle exception if present in regular record
if record.exc_info and record.exc_info != (None, None, None):
tb_str = "".join(traceback.format_exception(*record.exc_info))
self.stream.write(tb_str + "\n")
self.flush()
except Exception as e:
self.handleError(record)
# Write error about handling this record
self.stream.write(f"Error in log handler: {e}\n")
self.flush()
def get_logger(name=None, level=INFO):
"""Get a configured structlog logger.
@ -24,13 +119,51 @@ def get_logger(name=None, level=INFO):
A configured structlog logger instance
"""
global _is_configured
# Always first check if logger is already configured to not use threading lock if not necessary
if not _is_configured:
setup_logging(level)
_is_configured = True
# Use threading lock to make sure setup_logging can be called only once
with _setup_lock:
# Unfortunately we also need a second check in case lock was entered twice at the same time
if not _is_configured:
setup_logging(level)
_is_configured = True
return structlog.get_logger(name if name else __name__)
def cleanup_old_logs(logs_dir, max_files):
"""
Removes old log files, keeping only the most recent ones.
Args:
logs_dir: Directory containing log files
max_files: Maximum number of log files to keep
"""
try:
logger = structlog.get_logger()
# Get all .log files in the directory (excluding README and other files)
log_files = [f for f in logs_dir.glob("*.log") if f.is_file()]
# Sort log files by modification time (newest first)
log_files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
# Remove old files that exceed the maximum
if len(log_files) > max_files:
for old_file in log_files[max_files:]:
try:
old_file.unlink()
logger.info(f"Deleted old log file: {old_file}")
except Exception as e:
logger.error(f"Failed to delete old log file {old_file}: {e}")
return True
except Exception as e:
logger.error(f"Error cleaning up log files: {e}")
return False
def setup_logging(log_level=INFO, name=None):
"""Sets up the logging configuration with structlog integration.
@ -94,8 +227,8 @@ def setup_logging(log_level=INFO, name=None):
# Install exception handlers
sys.excepthook = handle_exception
# Create formatter for standard library logging
formatter = structlog.stdlib.ProcessorFormatter(
# Create console formatter for standard library logging
console_formatter = structlog.stdlib.ProcessorFormatter(
processor=structlog.dev.ConsoleRenderer(
colors=True,
force_colors=True,
@ -111,7 +244,7 @@ def setup_logging(log_level=INFO, name=None):
),
)
# Setup handler with newlines
# Setup handler with newlines for console output
class NewlineStreamHandler(logging.StreamHandler):
def emit(self, record):
try:
@ -122,17 +255,27 @@ def setup_logging(log_level=INFO, name=None):
except Exception:
self.handleError(record)
# Use our custom handler
# Use our custom handler for console output
stream_handler = NewlineStreamHandler(sys.stdout)
stream_handler.setFormatter(formatter)
stream_handler.setFormatter(console_formatter)
stream_handler.setLevel(log_level)
# Create a file handler that uses our custom PlainFileHandler
current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
log_file_path = os.path.join(LOGS_DIR, f"{current_time}.log")
file_handler = PlainFileHandler(log_file_path, encoding="utf-8")
file_handler.setLevel(DEBUG)
# Configure root logger
root_logger = logging.getLogger()
if root_logger.hasHandlers():
root_logger.handlers.clear()
root_logger.addHandler(stream_handler)
root_logger.addHandler(file_handler)
root_logger.setLevel(log_level)
# Clean up old log files, keeping only the most recent ones
cleanup_old_logs(LOGS_DIR, MAX_LOG_FILES)
# Return a configured logger
return structlog.get_logger(name if name else __name__)

0
logs/.gitkeep Normal file
View file

31
logs/README.md Normal file
View file

@ -0,0 +1,31 @@
# Logs Directory
This directory contains the application logs for Cognee.
## Log Files
- Log files are named by date in the format `YYYY-MM-DD_HH-MM-SS.log`
- Logs are stored in plain text format with a consistent structure
- Each log entry includes:
- Timestamp (ISO format)
- Log level (padded to consistent width)
- Message
- Additional context (if any)
- Logger name (in square brackets)
- Exception tracebacks are included for error logs
## Sample Log Entry
```
2025-03-27T13:05:27.481446Z [INFO ] Structured log message user_id=user123 action=login status=success [TestLogger]
```
## Retention Policy
The system automatically keeps only the 10 most recent log files. Older log files are automatically deleted when new log files are created. This prevents excessive disk usage in long-running deployments.
## Usage
Logs are automatically generated by the application's logging mechanism. No manual actions are required to use this feature.
The logs directory structure is preserved in version control, but the log files themselves are gitignored.