fix: Logger suppresion and database logs (#1041)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

---------

Co-authored-by: Igor Ilic <30923996+dexters1@users.noreply.github.com>
Co-authored-by: Igor Ilic <igorilic03@gmail.com>
This commit is contained in:
Vasilije 2025-07-03 20:08:27 +02:00 committed by GitHub
parent b8ea699abe
commit ada3f7b086
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 117 additions and 15 deletions

View file

@ -102,7 +102,7 @@ handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
level = WARN
handlers =
qualname = alembic

View file

@ -4,6 +4,10 @@ set -e # Exit on error
echo "Debug mode: $DEBUG"
echo "Environment: $ENVIRONMENT"
# Set default transport mode if not specified
TRANSPORT_MODE=${TRANSPORT_MODE:-"stdio"}
echo "Transport mode: $TRANSPORT_MODE"
# Run Alembic migrations with proper error handling.
# Note on UserAlreadyExists error handling:
# During database migrations, we attempt to create a default user. If this user
@ -28,19 +32,31 @@ fi
echo "Database migrations done."
echo "Starting Cognee MCP Server..."
echo "Starting Cognee MCP Server with transport mode: $TRANSPORT_MODE"
# Add startup delay to ensure DB is ready
sleep 2
# Modified Gunicorn startup with error handling
# Modified startup with transport mode selection and error handling
if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
if [ "$DEBUG" = "true" ]; then
echo "Waiting for the debugger to attach..."
exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee
if [ "$TRANSPORT_MODE" = "sse" ]; then
exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee --transport sse
else
exec cognee
exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee --transport stdio
fi
else
exec cognee
if [ "$TRANSPORT_MODE" = "sse" ]; then
exec cognee --transport sse
else
exec cognee --transport stdio
fi
fi
else
if [ "$TRANSPORT_MODE" = "sse" ]; then
exec cognee --transport sse
else
exec cognee --transport stdio
fi
fi

View file

@ -18,6 +18,7 @@ from cognee.modules.search.types import SearchType
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.storage.utils import JSONEncoder
try:
from codingagents.coding_rule_associations import (
add_rule_associations,

View file

@ -1,4 +1,5 @@
import litellm
import logging
from pydantic import BaseModel
from typing import Type, Optional
from litellm import acompletion, JSONSchemaValidationError

View file

@ -1,5 +1,6 @@
"""Adapter for Generic API LLM provider API"""
import logging
from typing import Type
from pydantic import BaseModel
@ -7,6 +8,7 @@ import instructor
from cognee.infrastructure.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.config import get_llm_config
from cognee.infrastructure.llm.rate_limiter import rate_limit_async, sleep_and_retry_async
from cognee.shared.logging_utils import get_logger
import litellm

View file

@ -1,8 +1,7 @@
from cognee.shared.logging_utils import get_logger
import litellm
from cognee.infrastructure.databases.vector import get_vector_engine
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.shared.logging_utils import get_logger
logger = get_logger()
@ -22,6 +21,9 @@ def get_max_chunk_tokens():
the smaller value of the embedding engine's max tokens and half of the LLM's
maximum tokens.
"""
# NOTE: Import must be done in function to avoid circular import issue
from cognee.infrastructure.databases.vector import get_vector_engine
# Calculate max chunk size based on the following formula
embedding_engine = get_vector_engine().embedding_engine
llm_client = get_llm_client()
@ -93,6 +95,9 @@ async def test_embedding_connection():
the exception if the connection to the embedding handler cannot be established.
"""
try:
# NOTE: Vector engine import must be done in function to avoid circular import issue
from cognee.infrastructure.databases.vector import get_vector_engine
await get_vector_engine().embedding_engine.embed_text("test")
except Exception as e:
logger.error(e)

View file

@ -1,4 +1,6 @@
from typing import IO, Optional
from urllib.parse import urlparse
import os
from cognee.api.v1.add.config import get_s3_config
@ -24,8 +26,16 @@ def open_data_file(
else:
return fs.open(file_path, mode=mode, encoding=encoding, **kwargs)
elif file_path.startswith("file://"):
# Handle local file URLs by stripping the file:// prefix
file_path = file_path.replace("file://", "", 1)
return open(file_path, mode=mode, encoding=encoding, **kwargs)
# Handle local file URLs by properly parsing the URI
parsed_url = urlparse(file_path)
# On Windows, urlparse handles drive letters correctly
# Convert the path component to a proper file path
if os.name == "nt": # Windows
# Remove leading slash from Windows paths like /C:/Users/...
local_path = parsed_url.path.lstrip("/")
else: # Unix-like systems
local_path = parsed_url.path
return open(local_path, mode=mode, encoding=encoding, **kwargs)
else:
return open(file_path, mode=mode, encoding=encoding, **kwargs)

View file

@ -11,6 +11,23 @@ import importlib.metadata
from cognee import __version__ as cognee_version
from typing import Protocol
# Configure external library logging
def configure_external_library_logging():
"""Configure logging for external libraries to reduce verbosity"""
# Configure LiteLLM logging to reduce verbosity
try:
import litellm
litellm.set_verbose = False
# Suppress LiteLLM ERROR logging using standard logging
logging.getLogger("litellm").setLevel(logging.CRITICAL)
except ImportError:
# LiteLLM not available, skip configuration
pass
# Export common log levels
DEBUG = logging.DEBUG
INFO = logging.INFO
@ -148,6 +165,44 @@ def get_logger(name=None, level=None) -> LoggerInterface:
return logger
def log_database_configuration(logger):
"""Log the current database configuration for all database types"""
# NOTE: Has to be imporated at runtime to avoid circular import
from cognee.infrastructure.databases.relational.config import get_relational_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
from cognee.infrastructure.databases.graph.config import get_graph_config
try:
# Log relational database configuration
relational_config = get_relational_config()
logger.info(f"Relational database: {relational_config.db_provider}")
if relational_config.db_provider == "postgres":
logger.info(f"Postgres host: {relational_config.db_host}:{relational_config.db_port}")
logger.info(f"Postgres database: {relational_config.db_name}")
elif relational_config.db_provider == "sqlite":
logger.info(f"SQLite path: {relational_config.db_path}")
logger.info(f"SQLite database: {relational_config.db_name}")
# Log vector database configuration
vector_config = get_vectordb_config()
logger.info(f"Vector database: {vector_config.vector_db_provider}")
if vector_config.vector_db_provider == "lancedb":
logger.info(f"Vector database path: {vector_config.vector_db_url}")
else:
logger.info(f"Vector database URL: {vector_config.vector_db_url}")
# Log graph database configuration
graph_config = get_graph_config()
logger.info(f"Graph database: {graph_config.graph_database_provider}")
if graph_config.graph_database_provider == "kuzu":
logger.info(f"Graph database path: {graph_config.graph_file_path}")
else:
logger.info(f"Graph database URL: {graph_config.graph_database_url}")
except Exception as e:
logger.warning(f"Could not retrieve database configuration: {str(e)}")
def cleanup_old_logs(logs_dir, max_files):
"""
Removes old log files, keeping only the most recent ones.
@ -193,6 +248,9 @@ def setup_logging(log_level=None, name=None):
log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO")]
# Configure external library logging early to suppress verbose output
configure_external_library_logging()
def exception_handler(logger, method_name, event_dict):
"""Custom processor to handle uncaught exceptions."""
# Check if there's an exc_info that needs to be processed
@ -339,6 +397,9 @@ def setup_logging(log_level=None, name=None):
logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
# Log database configuration
log_database_configuration(logger)
# Return the configured logger
return logger

View file

@ -1,6 +1,7 @@
import os
import tempfile
import pytest
from pathlib import Path
from cognee.modules.data.processing.document_types.open_data_file import open_data_file
@ -29,7 +30,8 @@ class TestOpenDataFile:
temp_file_path = f.name
try:
file_url = f"file://{temp_file_path}"
# Use pathlib.Path.as_uri() for proper cross-platform file URL creation
file_url = Path(temp_file_path).as_uri()
with open_data_file(file_url, mode="r") as f:
content = f.read()
assert content == test_content
@ -44,7 +46,8 @@ class TestOpenDataFile:
temp_file_path = f.name
try:
file_url = f"file://{temp_file_path}"
# Use pathlib.Path.as_uri() for proper cross-platform file URL creation
file_url = Path(temp_file_path).as_uri()
with open_data_file(file_url, mode="rb") as f:
content = f.read()
assert content == test_content.encode()
@ -61,7 +64,8 @@ class TestOpenDataFile:
temp_file_path = f.name
try:
file_url = f"file://{temp_file_path}"
# Use pathlib.Path.as_uri() for proper cross-platform file URL creation
file_url = Path(temp_file_path).as_uri()
with open_data_file(file_url, mode="r", encoding="utf-8") as f:
content = f.read()
assert content == test_content
@ -84,7 +88,9 @@ class TestOpenDataFile:
try:
# Even if someone accidentally adds multiple file:// prefixes
file_url = f"file://file://{temp_file_path}"
# Use proper file URL creation first
proper_file_url = Path(temp_file_path).as_uri()
file_url = f"file://{proper_file_url}"
with open_data_file(file_url, mode="r") as f:
content = f.read()
# This should work because we only replace the first occurrence