fix: Logger suppresion and database logs (#1041)

## Description  ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: Igor Ilic <30923996+dexters1@users.noreply.github.com> Co-authored-by: Igor Ilic <igorilic03@gmail.com>
2025-07-03 20:08:27 +02:00 · 2025-07-03 20:08:27 +02:00 · ada3f7b086
commit ada3f7b086
parent b8ea699abe
9 changed files with 117 additions and 15 deletions
--- a/alembic.ini
+++ b/alembic.ini
@ -102,7 +102,7 @@ handlers =
 qualname = sqlalchemy.engine
 [logger_alembic]
-level = INFO
+level = WARN
 handlers =
 qualname = alembic
--- a/cognee-mcp/entrypoint.sh
+++ b/cognee-mcp/entrypoint.sh
@ -4,6 +4,10 @@ set -e  # Exit on error
 echo "Debug mode: $DEBUG"
 echo "Environment: $ENVIRONMENT"
 # Set default transport mode if not specified
 TRANSPORT_MODE=${TRANSPORT_MODE:-"stdio"}
 echo "Transport mode: $TRANSPORT_MODE"
 # Run Alembic migrations with proper error handling.
 # Note on UserAlreadyExists error handling:
 # During database migrations, we attempt to create a default user. If this user
@ -28,19 +32,31 @@ fi
 echo "Database migrations done."
-echo "Starting Cognee MCP Server..."
+echo "Starting Cognee MCP Server with transport mode: $TRANSPORT_MODE"
 # Add startup delay to ensure DB is ready
 sleep 2
-# Modified Gunicorn startup with error handling
+# Modified startup with transport mode selection and error handling
 if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
    if [ "$DEBUG" = "true" ]; then
        echo "Waiting for the debugger to attach..."
-        exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee
+        if [ "$TRANSPORT_MODE" = "sse" ]; then
            exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee --transport sse
        else
            exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee --transport stdio
        fi
    else
-        exec cognee
+        if [ "$TRANSPORT_MODE" = "sse" ]; then
            exec cognee --transport sse
        else
            exec cognee --transport stdio
        fi
    fi
 else
-    exec cognee
+    if [ "$TRANSPORT_MODE" = "sse" ]; then
        exec cognee --transport sse
    else
        exec cognee --transport stdio
    fi
 fi
--- a/cognee-mcp/src/server.py
+++ b/cognee-mcp/src/server.py
@ -18,6 +18,7 @@ from cognee.modules.search.types import SearchType
 from cognee.shared.data_models import KnowledgeGraph
 from cognee.modules.storage.utils import JSONEncoder
 try:
    from codingagents.coding_rule_associations import (
        add_rule_associations,
--- a/cognee/infrastructure/llm/gemini/adapter.py
+++ b/cognee/infrastructure/llm/gemini/adapter.py
@ -1,4 +1,5 @@
 import litellm
 import logging
 from pydantic import BaseModel
 from typing import Type, Optional
 from litellm import acompletion, JSONSchemaValidationError
--- a/cognee/infrastructure/llm/generic_llm_api/adapter.py
+++ b/cognee/infrastructure/llm/generic_llm_api/adapter.py
@ -1,5 +1,6 @@
 """Adapter for Generic API LLM provider API"""
 import logging
 from typing import Type
 from pydantic import BaseModel
@ -7,6 +8,7 @@ import instructor
 from cognee.infrastructure.llm.llm_interface import LLMInterface
 from cognee.infrastructure.llm.config import get_llm_config
 from cognee.infrastructure.llm.rate_limiter import rate_limit_async, sleep_and_retry_async
 from cognee.shared.logging_utils import get_logger
 import litellm
--- a/cognee/infrastructure/llm/utils.py
+++ b/cognee/infrastructure/llm/utils.py
@ -1,8 +1,7 @@
 from cognee.shared.logging_utils import get_logger
 import litellm
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.infrastructure.llm.get_llm_client import get_llm_client
 from cognee.shared.logging_utils import get_logger
 logger = get_logger()
@ -22,6 +21,9 @@ def get_max_chunk_tokens():
          the smaller value of the embedding engine's max tokens and half of the LLM's
          maximum tokens.
    """
    # NOTE: Import must be done in function to avoid circular import issue
    from cognee.infrastructure.databases.vector import get_vector_engine
    # Calculate max chunk size based on the following formula
    embedding_engine = get_vector_engine().embedding_engine
    llm_client = get_llm_client()
@ -93,6 +95,9 @@ async def test_embedding_connection():
    the exception if the connection to the embedding handler cannot be established.
    """
    try:
        # NOTE: Vector engine import must be done in function to avoid circular import issue
        from cognee.infrastructure.databases.vector import get_vector_engine
        await get_vector_engine().embedding_engine.embed_text("test")
    except Exception as e:
        logger.error(e)
--- a/cognee/modules/data/processing/document_types/open_data_file.py
+++ b/cognee/modules/data/processing/document_types/open_data_file.py
@ -1,4 +1,6 @@
 from typing import IO, Optional
 from urllib.parse import urlparse
 import os
 from cognee.api.v1.add.config import get_s3_config
@ -24,8 +26,16 @@ def open_data_file(
        else:
            return fs.open(file_path, mode=mode, encoding=encoding, **kwargs)
    elif file_path.startswith("file://"):
-        # Handle local file URLs by stripping the file:// prefix
+        # Handle local file URLs by properly parsing the URI
-        file_path = file_path.replace("file://", "", 1)
+        parsed_url = urlparse(file_path)
-        return open(file_path, mode=mode, encoding=encoding, **kwargs)
+        # On Windows, urlparse handles drive letters correctly
        # Convert the path component to a proper file path
        if os.name == "nt":  # Windows
            # Remove leading slash from Windows paths like /C:/Users/...
            local_path = parsed_url.path.lstrip("/")
        else:  # Unix-like systems
            local_path = parsed_url.path
        return open(local_path, mode=mode, encoding=encoding, **kwargs)
    else:
        return open(file_path, mode=mode, encoding=encoding, **kwargs)
--- a/cognee/shared/logging_utils.py
+++ b/cognee/shared/logging_utils.py
@ -11,6 +11,23 @@ import importlib.metadata
 from cognee import __version__ as cognee_version
 from typing import Protocol
 # Configure external library logging
 def configure_external_library_logging():
    """Configure logging for external libraries to reduce verbosity"""
    # Configure LiteLLM logging to reduce verbosity
    try:
        import litellm
        litellm.set_verbose = False
        # Suppress LiteLLM ERROR logging using standard logging
        logging.getLogger("litellm").setLevel(logging.CRITICAL)
    except ImportError:
        # LiteLLM not available, skip configuration
        pass
 # Export common log levels
 DEBUG = logging.DEBUG
 INFO = logging.INFO
@ -148,6 +165,44 @@ def get_logger(name=None, level=None) -> LoggerInterface:
        return logger
 def log_database_configuration(logger):
    """Log the current database configuration for all database types"""
    # NOTE: Has to be imporated at runtime to avoid circular import
    from cognee.infrastructure.databases.relational.config import get_relational_config
    from cognee.infrastructure.databases.vector.config import get_vectordb_config
    from cognee.infrastructure.databases.graph.config import get_graph_config
    try:
        # Log relational database configuration
        relational_config = get_relational_config()
        logger.info(f"Relational database: {relational_config.db_provider}")
        if relational_config.db_provider == "postgres":
            logger.info(f"Postgres host: {relational_config.db_host}:{relational_config.db_port}")
            logger.info(f"Postgres database: {relational_config.db_name}")
        elif relational_config.db_provider == "sqlite":
            logger.info(f"SQLite path: {relational_config.db_path}")
            logger.info(f"SQLite database: {relational_config.db_name}")
        # Log vector database configuration
        vector_config = get_vectordb_config()
        logger.info(f"Vector database: {vector_config.vector_db_provider}")
        if vector_config.vector_db_provider == "lancedb":
            logger.info(f"Vector database path: {vector_config.vector_db_url}")
        else:
            logger.info(f"Vector database URL: {vector_config.vector_db_url}")
        # Log graph database configuration
        graph_config = get_graph_config()
        logger.info(f"Graph database: {graph_config.graph_database_provider}")
        if graph_config.graph_database_provider == "kuzu":
            logger.info(f"Graph database path: {graph_config.graph_file_path}")
        else:
            logger.info(f"Graph database URL: {graph_config.graph_database_url}")
    except Exception as e:
        logger.warning(f"Could not retrieve database configuration: {str(e)}")
 def cleanup_old_logs(logs_dir, max_files):
    """
    Removes old log files, keeping only the most recent ones.
@ -193,6 +248,9 @@ def setup_logging(log_level=None, name=None):
    log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO")]
    # Configure external library logging early to suppress verbose output
    configure_external_library_logging()
    def exception_handler(logger, method_name, event_dict):
        """Custom processor to handle uncaught exceptions."""
        # Check if there's an exc_info that needs to be processed
@ -339,6 +397,9 @@ def setup_logging(log_level=None, name=None):
    logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
    # Log database configuration
    log_database_configuration(logger)
    # Return the configured logger
    return logger
--- a/cognee/tests/unit/modules/data/test_open_data_file.py
+++ b/cognee/tests/unit/modules/data/test_open_data_file.py
@ -1,6 +1,7 @@
 import os
 import tempfile
 import pytest
 from pathlib import Path
 from cognee.modules.data.processing.document_types.open_data_file import open_data_file
@ -29,7 +30,8 @@ class TestOpenDataFile:
            temp_file_path = f.name
        try:
-            file_url = f"file://{temp_file_path}"
+            # Use pathlib.Path.as_uri() for proper cross-platform file URL creation
            file_url = Path(temp_file_path).as_uri()
            with open_data_file(file_url, mode="r") as f:
                content = f.read()
                assert content == test_content
@ -44,7 +46,8 @@ class TestOpenDataFile:
            temp_file_path = f.name
        try:
-            file_url = f"file://{temp_file_path}"
+            # Use pathlib.Path.as_uri() for proper cross-platform file URL creation
            file_url = Path(temp_file_path).as_uri()
            with open_data_file(file_url, mode="rb") as f:
                content = f.read()
                assert content == test_content.encode()
@ -61,7 +64,8 @@ class TestOpenDataFile:
            temp_file_path = f.name
        try:
-            file_url = f"file://{temp_file_path}"
+            # Use pathlib.Path.as_uri() for proper cross-platform file URL creation
            file_url = Path(temp_file_path).as_uri()
            with open_data_file(file_url, mode="r", encoding="utf-8") as f:
                content = f.read()
                assert content == test_content
@ -84,7 +88,9 @@ class TestOpenDataFile:
        try:
            # Even if someone accidentally adds multiple file:// prefixes
-            file_url = f"file://file://{temp_file_path}"
+            # Use proper file URL creation first
            proper_file_url = Path(temp_file_path).as_uri()
            file_url = f"file://{proper_file_url}"
            with open_data_file(file_url, mode="r") as f:
                content = f.read()
                # This should work because we only replace the first occurrence