fix: Logger suppresion and database logs (#1041)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

---------

Co-authored-by: Igor Ilic <30923996+dexters1@users.noreply.github.com>
Co-authored-by: Igor Ilic <igorilic03@gmail.com>
This commit is contained in:
Vasilije 2025-07-03 20:08:27 +02:00 committed by GitHub
parent b8ea699abe
commit ada3f7b086
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 117 additions and 15 deletions

View file

@ -102,7 +102,7 @@ handlers =
qualname = sqlalchemy.engine qualname = sqlalchemy.engine
[logger_alembic] [logger_alembic]
level = INFO level = WARN
handlers = handlers =
qualname = alembic qualname = alembic

View file

@ -4,6 +4,10 @@ set -e # Exit on error
echo "Debug mode: $DEBUG" echo "Debug mode: $DEBUG"
echo "Environment: $ENVIRONMENT" echo "Environment: $ENVIRONMENT"
# Set default transport mode if not specified
TRANSPORT_MODE=${TRANSPORT_MODE:-"stdio"}
echo "Transport mode: $TRANSPORT_MODE"
# Run Alembic migrations with proper error handling. # Run Alembic migrations with proper error handling.
# Note on UserAlreadyExists error handling: # Note on UserAlreadyExists error handling:
# During database migrations, we attempt to create a default user. If this user # During database migrations, we attempt to create a default user. If this user
@ -28,19 +32,31 @@ fi
echo "Database migrations done." echo "Database migrations done."
echo "Starting Cognee MCP Server..." echo "Starting Cognee MCP Server with transport mode: $TRANSPORT_MODE"
# Add startup delay to ensure DB is ready # Add startup delay to ensure DB is ready
sleep 2 sleep 2
# Modified Gunicorn startup with error handling # Modified startup with transport mode selection and error handling
if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
if [ "$DEBUG" = "true" ]; then if [ "$DEBUG" = "true" ]; then
echo "Waiting for the debugger to attach..." echo "Waiting for the debugger to attach..."
exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee if [ "$TRANSPORT_MODE" = "sse" ]; then
exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee --transport sse
else
exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee --transport stdio
fi
else else
exec cognee if [ "$TRANSPORT_MODE" = "sse" ]; then
exec cognee --transport sse
else
exec cognee --transport stdio
fi
fi fi
else else
exec cognee if [ "$TRANSPORT_MODE" = "sse" ]; then
exec cognee --transport sse
else
exec cognee --transport stdio
fi
fi fi

View file

@ -18,6 +18,7 @@ from cognee.modules.search.types import SearchType
from cognee.shared.data_models import KnowledgeGraph from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.storage.utils import JSONEncoder from cognee.modules.storage.utils import JSONEncoder
try: try:
from codingagents.coding_rule_associations import ( from codingagents.coding_rule_associations import (
add_rule_associations, add_rule_associations,

View file

@ -1,4 +1,5 @@
import litellm import litellm
import logging
from pydantic import BaseModel from pydantic import BaseModel
from typing import Type, Optional from typing import Type, Optional
from litellm import acompletion, JSONSchemaValidationError from litellm import acompletion, JSONSchemaValidationError

View file

@ -1,5 +1,6 @@
"""Adapter for Generic API LLM provider API""" """Adapter for Generic API LLM provider API"""
import logging
from typing import Type from typing import Type
from pydantic import BaseModel from pydantic import BaseModel
@ -7,6 +8,7 @@ import instructor
from cognee.infrastructure.llm.llm_interface import LLMInterface from cognee.infrastructure.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.config import get_llm_config from cognee.infrastructure.llm.config import get_llm_config
from cognee.infrastructure.llm.rate_limiter import rate_limit_async, sleep_and_retry_async from cognee.infrastructure.llm.rate_limiter import rate_limit_async, sleep_and_retry_async
from cognee.shared.logging_utils import get_logger
import litellm import litellm

View file

@ -1,8 +1,7 @@
from cognee.shared.logging_utils import get_logger
import litellm import litellm
from cognee.infrastructure.databases.vector import get_vector_engine
from cognee.infrastructure.llm.get_llm_client import get_llm_client from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.shared.logging_utils import get_logger
logger = get_logger() logger = get_logger()
@ -22,6 +21,9 @@ def get_max_chunk_tokens():
the smaller value of the embedding engine's max tokens and half of the LLM's the smaller value of the embedding engine's max tokens and half of the LLM's
maximum tokens. maximum tokens.
""" """
# NOTE: Import must be done in function to avoid circular import issue
from cognee.infrastructure.databases.vector import get_vector_engine
# Calculate max chunk size based on the following formula # Calculate max chunk size based on the following formula
embedding_engine = get_vector_engine().embedding_engine embedding_engine = get_vector_engine().embedding_engine
llm_client = get_llm_client() llm_client = get_llm_client()
@ -93,6 +95,9 @@ async def test_embedding_connection():
the exception if the connection to the embedding handler cannot be established. the exception if the connection to the embedding handler cannot be established.
""" """
try: try:
# NOTE: Vector engine import must be done in function to avoid circular import issue
from cognee.infrastructure.databases.vector import get_vector_engine
await get_vector_engine().embedding_engine.embed_text("test") await get_vector_engine().embedding_engine.embed_text("test")
except Exception as e: except Exception as e:
logger.error(e) logger.error(e)

View file

@ -1,4 +1,6 @@
from typing import IO, Optional from typing import IO, Optional
from urllib.parse import urlparse
import os
from cognee.api.v1.add.config import get_s3_config from cognee.api.v1.add.config import get_s3_config
@ -24,8 +26,16 @@ def open_data_file(
else: else:
return fs.open(file_path, mode=mode, encoding=encoding, **kwargs) return fs.open(file_path, mode=mode, encoding=encoding, **kwargs)
elif file_path.startswith("file://"): elif file_path.startswith("file://"):
# Handle local file URLs by stripping the file:// prefix # Handle local file URLs by properly parsing the URI
file_path = file_path.replace("file://", "", 1) parsed_url = urlparse(file_path)
return open(file_path, mode=mode, encoding=encoding, **kwargs) # On Windows, urlparse handles drive letters correctly
# Convert the path component to a proper file path
if os.name == "nt": # Windows
# Remove leading slash from Windows paths like /C:/Users/...
local_path = parsed_url.path.lstrip("/")
else: # Unix-like systems
local_path = parsed_url.path
return open(local_path, mode=mode, encoding=encoding, **kwargs)
else: else:
return open(file_path, mode=mode, encoding=encoding, **kwargs) return open(file_path, mode=mode, encoding=encoding, **kwargs)

View file

@ -11,6 +11,23 @@ import importlib.metadata
from cognee import __version__ as cognee_version from cognee import __version__ as cognee_version
from typing import Protocol from typing import Protocol
# Configure external library logging
def configure_external_library_logging():
"""Configure logging for external libraries to reduce verbosity"""
# Configure LiteLLM logging to reduce verbosity
try:
import litellm
litellm.set_verbose = False
# Suppress LiteLLM ERROR logging using standard logging
logging.getLogger("litellm").setLevel(logging.CRITICAL)
except ImportError:
# LiteLLM not available, skip configuration
pass
# Export common log levels # Export common log levels
DEBUG = logging.DEBUG DEBUG = logging.DEBUG
INFO = logging.INFO INFO = logging.INFO
@ -148,6 +165,44 @@ def get_logger(name=None, level=None) -> LoggerInterface:
return logger return logger
def log_database_configuration(logger):
"""Log the current database configuration for all database types"""
# NOTE: Has to be imporated at runtime to avoid circular import
from cognee.infrastructure.databases.relational.config import get_relational_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
from cognee.infrastructure.databases.graph.config import get_graph_config
try:
# Log relational database configuration
relational_config = get_relational_config()
logger.info(f"Relational database: {relational_config.db_provider}")
if relational_config.db_provider == "postgres":
logger.info(f"Postgres host: {relational_config.db_host}:{relational_config.db_port}")
logger.info(f"Postgres database: {relational_config.db_name}")
elif relational_config.db_provider == "sqlite":
logger.info(f"SQLite path: {relational_config.db_path}")
logger.info(f"SQLite database: {relational_config.db_name}")
# Log vector database configuration
vector_config = get_vectordb_config()
logger.info(f"Vector database: {vector_config.vector_db_provider}")
if vector_config.vector_db_provider == "lancedb":
logger.info(f"Vector database path: {vector_config.vector_db_url}")
else:
logger.info(f"Vector database URL: {vector_config.vector_db_url}")
# Log graph database configuration
graph_config = get_graph_config()
logger.info(f"Graph database: {graph_config.graph_database_provider}")
if graph_config.graph_database_provider == "kuzu":
logger.info(f"Graph database path: {graph_config.graph_file_path}")
else:
logger.info(f"Graph database URL: {graph_config.graph_database_url}")
except Exception as e:
logger.warning(f"Could not retrieve database configuration: {str(e)}")
def cleanup_old_logs(logs_dir, max_files): def cleanup_old_logs(logs_dir, max_files):
""" """
Removes old log files, keeping only the most recent ones. Removes old log files, keeping only the most recent ones.
@ -193,6 +248,9 @@ def setup_logging(log_level=None, name=None):
log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO")] log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO")]
# Configure external library logging early to suppress verbose output
configure_external_library_logging()
def exception_handler(logger, method_name, event_dict): def exception_handler(logger, method_name, event_dict):
"""Custom processor to handle uncaught exceptions.""" """Custom processor to handle uncaught exceptions."""
# Check if there's an exc_info that needs to be processed # Check if there's an exc_info that needs to be processed
@ -339,6 +397,9 @@ def setup_logging(log_level=None, name=None):
logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai") logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
# Log database configuration
log_database_configuration(logger)
# Return the configured logger # Return the configured logger
return logger return logger

View file

@ -1,6 +1,7 @@
import os import os
import tempfile import tempfile
import pytest import pytest
from pathlib import Path
from cognee.modules.data.processing.document_types.open_data_file import open_data_file from cognee.modules.data.processing.document_types.open_data_file import open_data_file
@ -29,7 +30,8 @@ class TestOpenDataFile:
temp_file_path = f.name temp_file_path = f.name
try: try:
file_url = f"file://{temp_file_path}" # Use pathlib.Path.as_uri() for proper cross-platform file URL creation
file_url = Path(temp_file_path).as_uri()
with open_data_file(file_url, mode="r") as f: with open_data_file(file_url, mode="r") as f:
content = f.read() content = f.read()
assert content == test_content assert content == test_content
@ -44,7 +46,8 @@ class TestOpenDataFile:
temp_file_path = f.name temp_file_path = f.name
try: try:
file_url = f"file://{temp_file_path}" # Use pathlib.Path.as_uri() for proper cross-platform file URL creation
file_url = Path(temp_file_path).as_uri()
with open_data_file(file_url, mode="rb") as f: with open_data_file(file_url, mode="rb") as f:
content = f.read() content = f.read()
assert content == test_content.encode() assert content == test_content.encode()
@ -61,7 +64,8 @@ class TestOpenDataFile:
temp_file_path = f.name temp_file_path = f.name
try: try:
file_url = f"file://{temp_file_path}" # Use pathlib.Path.as_uri() for proper cross-platform file URL creation
file_url = Path(temp_file_path).as_uri()
with open_data_file(file_url, mode="r", encoding="utf-8") as f: with open_data_file(file_url, mode="r", encoding="utf-8") as f:
content = f.read() content = f.read()
assert content == test_content assert content == test_content
@ -84,7 +88,9 @@ class TestOpenDataFile:
try: try:
# Even if someone accidentally adds multiple file:// prefixes # Even if someone accidentally adds multiple file:// prefixes
file_url = f"file://file://{temp_file_path}" # Use proper file URL creation first
proper_file_url = Path(temp_file_path).as_uri()
file_url = f"file://{proper_file_url}"
with open_data_file(file_url, mode="r") as f: with open_data_file(file_url, mode="r") as f:
content = f.read() content = f.read()
# This should work because we only replace the first occurrence # This should work because we only replace the first occurrence