diff --git a/.gitignore b/.gitignore index 3750b6bc1..c99e3a58e 100644 --- a/.gitignore +++ b/.gitignore @@ -73,6 +73,11 @@ local_settings.py db.sqlite3 db.sqlite3-journal +# Cognee logs directory - keep directory, ignore contents +logs/* +!logs/.gitkeep +!logs/README.md + # Flask stuff: instance/ .webassets-cache @@ -188,4 +193,4 @@ node_modules/ SWE-bench_testsample/ # ChromaDB Data -.chromadb_data/ \ No newline at end of file +.chromadb_data/ diff --git a/cognee/shared/logging_utils.py b/cognee/shared/logging_utils.py index b449a1fef..69a56882d 100644 --- a/cognee/shared/logging_utils.py +++ b/cognee/shared/logging_utils.py @@ -1,6 +1,11 @@ import sys +import os +import threading import logging import structlog +import traceback +from datetime import datetime +from pathlib import Path # Export common log levels DEBUG = logging.DEBUG @@ -12,6 +17,96 @@ CRITICAL = logging.CRITICAL # Track if logging has been configured _is_configured = False +# Create a lock for thread-safe initialization +_setup_lock = threading.Lock() + +# Path to logs directory +LOGS_DIR = Path(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "logs")) +LOGS_DIR.mkdir(exist_ok=True) # Create logs dir if it doesn't exist + +# Maximum number of log files to keep +MAX_LOG_FILES = 10 + + +class PlainFileHandler(logging.FileHandler): + """A custom file handler that writes simpler plain text log entries.""" + + def emit(self, record): + try: + # Check if stream is available before trying to write + if self.stream is None: + self.stream = self._open() + + # Extract the message from the structlog record + if isinstance(record.msg, dict) and "event" in record.msg: + # Extract the basic message + message = record.msg.get("event", "") + + # Extract additional context + context = { + k: v + for k, v in record.msg.items() + if k not in ("event", "logger", "level", "timestamp") + } + + # Format context if present + context_str = "" + if context: + context_str = " " + " ".join( + f"{k}={v}" for k, v in context.items() if k != "exc_info" + ) + + # Get the logger name from the record or from the structlog context + logger_name = record.msg.get("logger", record.name) + + # Format timestamp + timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ") + + # Create the log entry + log_entry = f"{timestamp} [{record.levelname.ljust(8)}] {message}{context_str} [{logger_name}]\n" + + # Write to file + self.stream.write(log_entry) + self.flush() + + # Handle exception if present + # Check both record.exc_info and the 'exc_info' in the message dict + record_has_exc = record.exc_info and record.exc_info != (None, None, None) + msg_has_exc = "exc_info" in record.msg and record.msg["exc_info"] + + if record_has_exc: + # Use the exception info from the record + tb_str = "".join(traceback.format_exception(*record.exc_info)) + self.stream.write(tb_str + "\n") + self.flush() + elif msg_has_exc and isinstance(record.msg["exc_info"], tuple): + # Use the exception info from the message + tb_str = "".join(traceback.format_exception(*record.msg["exc_info"])) + self.stream.write(tb_str + "\n") + self.flush() + elif msg_has_exc and hasattr(record.msg["exc_info"], "__traceback__"): + # Handle exceptions that are passed directly + exc = record.msg["exc_info"] + tb_str = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__)) + self.stream.write(tb_str + "\n") + self.flush() + else: + # Fall back to standard handling for non-structlog messages + msg = self.format(record) + self.stream.write(msg + self.terminator) + self.flush() + + # Handle exception if present in regular record + if record.exc_info and record.exc_info != (None, None, None): + tb_str = "".join(traceback.format_exception(*record.exc_info)) + self.stream.write(tb_str + "\n") + self.flush() + except Exception as e: + self.handleError(record) + # Write error about handling this record + self.stream.write(f"Error in log handler: {e}\n") + self.flush() + def get_logger(name=None, level=INFO): """Get a configured structlog logger. @@ -24,13 +119,51 @@ def get_logger(name=None, level=INFO): A configured structlog logger instance """ global _is_configured + + # Always first check if logger is already configured to not use threading lock if not necessary if not _is_configured: - setup_logging(level) - _is_configured = True + # Use threading lock to make sure setup_logging can be called only once + with _setup_lock: + # Unfortunately we also need a second check in case lock was entered twice at the same time + if not _is_configured: + setup_logging(level) + _is_configured = True return structlog.get_logger(name if name else __name__) +def cleanup_old_logs(logs_dir, max_files): + """ + Removes old log files, keeping only the most recent ones. + + Args: + logs_dir: Directory containing log files + max_files: Maximum number of log files to keep + """ + try: + logger = structlog.get_logger() + + # Get all .log files in the directory (excluding README and other files) + log_files = [f for f in logs_dir.glob("*.log") if f.is_file()] + + # Sort log files by modification time (newest first) + log_files.sort(key=lambda x: x.stat().st_mtime, reverse=True) + + # Remove old files that exceed the maximum + if len(log_files) > max_files: + for old_file in log_files[max_files:]: + try: + old_file.unlink() + logger.info(f"Deleted old log file: {old_file}") + except Exception as e: + logger.error(f"Failed to delete old log file {old_file}: {e}") + + return True + except Exception as e: + logger.error(f"Error cleaning up log files: {e}") + return False + + def setup_logging(log_level=INFO, name=None): """Sets up the logging configuration with structlog integration. @@ -94,8 +227,8 @@ def setup_logging(log_level=INFO, name=None): # Install exception handlers sys.excepthook = handle_exception - # Create formatter for standard library logging - formatter = structlog.stdlib.ProcessorFormatter( + # Create console formatter for standard library logging + console_formatter = structlog.stdlib.ProcessorFormatter( processor=structlog.dev.ConsoleRenderer( colors=True, force_colors=True, @@ -111,7 +244,7 @@ def setup_logging(log_level=INFO, name=None): ), ) - # Setup handler with newlines + # Setup handler with newlines for console output class NewlineStreamHandler(logging.StreamHandler): def emit(self, record): try: @@ -122,17 +255,27 @@ def setup_logging(log_level=INFO, name=None): except Exception: self.handleError(record) - # Use our custom handler + # Use our custom handler for console output stream_handler = NewlineStreamHandler(sys.stdout) - stream_handler.setFormatter(formatter) + stream_handler.setFormatter(console_formatter) stream_handler.setLevel(log_level) + # Create a file handler that uses our custom PlainFileHandler + current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + log_file_path = os.path.join(LOGS_DIR, f"{current_time}.log") + file_handler = PlainFileHandler(log_file_path, encoding="utf-8") + file_handler.setLevel(DEBUG) + # Configure root logger root_logger = logging.getLogger() if root_logger.hasHandlers(): root_logger.handlers.clear() root_logger.addHandler(stream_handler) + root_logger.addHandler(file_handler) root_logger.setLevel(log_level) + # Clean up old log files, keeping only the most recent ones + cleanup_old_logs(LOGS_DIR, MAX_LOG_FILES) + # Return a configured logger return structlog.get_logger(name if name else __name__) diff --git a/logs/.gitkeep b/logs/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/logs/README.md b/logs/README.md new file mode 100644 index 000000000..96ef613b5 --- /dev/null +++ b/logs/README.md @@ -0,0 +1,31 @@ +# Logs Directory + +This directory contains the application logs for Cognee. + +## Log Files + +- Log files are named by date in the format `YYYY-MM-DD_HH-MM-SS.log` +- Logs are stored in plain text format with a consistent structure +- Each log entry includes: + - Timestamp (ISO format) + - Log level (padded to consistent width) + - Message + - Additional context (if any) + - Logger name (in square brackets) +- Exception tracebacks are included for error logs + +## Sample Log Entry + +``` +2025-03-27T13:05:27.481446Z [INFO ] Structured log message user_id=user123 action=login status=success [TestLogger] +``` + +## Retention Policy + +The system automatically keeps only the 10 most recent log files. Older log files are automatically deleted when new log files are created. This prevents excessive disk usage in long-running deployments. + +## Usage + +Logs are automatically generated by the application's logging mechanism. No manual actions are required to use this feature. + +The logs directory structure is preserved in version control, but the log files themselves are gitignored.