fix: Logger suppresion and database logs (#1041)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: Igor Ilic <30923996+dexters1@users.noreply.github.com> Co-authored-by: Igor Ilic <igorilic03@gmail.com>
This commit is contained in:
parent
b8ea699abe
commit
ada3f7b086
9 changed files with 117 additions and 15 deletions
|
|
@ -102,7 +102,7 @@ handlers =
|
||||||
qualname = sqlalchemy.engine
|
qualname = sqlalchemy.engine
|
||||||
|
|
||||||
[logger_alembic]
|
[logger_alembic]
|
||||||
level = INFO
|
level = WARN
|
||||||
handlers =
|
handlers =
|
||||||
qualname = alembic
|
qualname = alembic
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,10 @@ set -e # Exit on error
|
||||||
echo "Debug mode: $DEBUG"
|
echo "Debug mode: $DEBUG"
|
||||||
echo "Environment: $ENVIRONMENT"
|
echo "Environment: $ENVIRONMENT"
|
||||||
|
|
||||||
|
# Set default transport mode if not specified
|
||||||
|
TRANSPORT_MODE=${TRANSPORT_MODE:-"stdio"}
|
||||||
|
echo "Transport mode: $TRANSPORT_MODE"
|
||||||
|
|
||||||
# Run Alembic migrations with proper error handling.
|
# Run Alembic migrations with proper error handling.
|
||||||
# Note on UserAlreadyExists error handling:
|
# Note on UserAlreadyExists error handling:
|
||||||
# During database migrations, we attempt to create a default user. If this user
|
# During database migrations, we attempt to create a default user. If this user
|
||||||
|
|
@ -28,19 +32,31 @@ fi
|
||||||
|
|
||||||
echo "Database migrations done."
|
echo "Database migrations done."
|
||||||
|
|
||||||
echo "Starting Cognee MCP Server..."
|
echo "Starting Cognee MCP Server with transport mode: $TRANSPORT_MODE"
|
||||||
|
|
||||||
# Add startup delay to ensure DB is ready
|
# Add startup delay to ensure DB is ready
|
||||||
sleep 2
|
sleep 2
|
||||||
|
|
||||||
# Modified Gunicorn startup with error handling
|
# Modified startup with transport mode selection and error handling
|
||||||
if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
|
if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
|
||||||
if [ "$DEBUG" = "true" ]; then
|
if [ "$DEBUG" = "true" ]; then
|
||||||
echo "Waiting for the debugger to attach..."
|
echo "Waiting for the debugger to attach..."
|
||||||
exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee
|
if [ "$TRANSPORT_MODE" = "sse" ]; then
|
||||||
|
exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee --transport sse
|
||||||
|
else
|
||||||
|
exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee --transport stdio
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
exec cognee
|
if [ "$TRANSPORT_MODE" = "sse" ]; then
|
||||||
|
exec cognee --transport sse
|
||||||
|
else
|
||||||
|
exec cognee --transport stdio
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
exec cognee
|
if [ "$TRANSPORT_MODE" = "sse" ]; then
|
||||||
|
exec cognee --transport sse
|
||||||
|
else
|
||||||
|
exec cognee --transport stdio
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ from cognee.modules.search.types import SearchType
|
||||||
from cognee.shared.data_models import KnowledgeGraph
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
from cognee.modules.storage.utils import JSONEncoder
|
from cognee.modules.storage.utils import JSONEncoder
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from codingagents.coding_rule_associations import (
|
from codingagents.coding_rule_associations import (
|
||||||
add_rule_associations,
|
add_rule_associations,
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import litellm
|
import litellm
|
||||||
|
import logging
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from typing import Type, Optional
|
from typing import Type, Optional
|
||||||
from litellm import acompletion, JSONSchemaValidationError
|
from litellm import acompletion, JSONSchemaValidationError
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
"""Adapter for Generic API LLM provider API"""
|
"""Adapter for Generic API LLM provider API"""
|
||||||
|
|
||||||
|
import logging
|
||||||
from typing import Type
|
from typing import Type
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
@ -7,6 +8,7 @@ import instructor
|
||||||
from cognee.infrastructure.llm.llm_interface import LLMInterface
|
from cognee.infrastructure.llm.llm_interface import LLMInterface
|
||||||
from cognee.infrastructure.llm.config import get_llm_config
|
from cognee.infrastructure.llm.config import get_llm_config
|
||||||
from cognee.infrastructure.llm.rate_limiter import rate_limit_async, sleep_and_retry_async
|
from cognee.infrastructure.llm.rate_limiter import rate_limit_async, sleep_and_retry_async
|
||||||
|
from cognee.shared.logging_utils import get_logger
|
||||||
import litellm
|
import litellm
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,7 @@
|
||||||
from cognee.shared.logging_utils import get_logger
|
|
||||||
import litellm
|
import litellm
|
||||||
|
|
||||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
||||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||||
|
from cognee.shared.logging_utils import get_logger
|
||||||
|
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
@ -22,6 +21,9 @@ def get_max_chunk_tokens():
|
||||||
the smaller value of the embedding engine's max tokens and half of the LLM's
|
the smaller value of the embedding engine's max tokens and half of the LLM's
|
||||||
maximum tokens.
|
maximum tokens.
|
||||||
"""
|
"""
|
||||||
|
# NOTE: Import must be done in function to avoid circular import issue
|
||||||
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||||
|
|
||||||
# Calculate max chunk size based on the following formula
|
# Calculate max chunk size based on the following formula
|
||||||
embedding_engine = get_vector_engine().embedding_engine
|
embedding_engine = get_vector_engine().embedding_engine
|
||||||
llm_client = get_llm_client()
|
llm_client = get_llm_client()
|
||||||
|
|
@ -93,6 +95,9 @@ async def test_embedding_connection():
|
||||||
the exception if the connection to the embedding handler cannot be established.
|
the exception if the connection to the embedding handler cannot be established.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
# NOTE: Vector engine import must be done in function to avoid circular import issue
|
||||||
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||||
|
|
||||||
await get_vector_engine().embedding_engine.embed_text("test")
|
await get_vector_engine().embedding_engine.embed_text("test")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(e)
|
logger.error(e)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,6 @@
|
||||||
from typing import IO, Optional
|
from typing import IO, Optional
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
import os
|
||||||
from cognee.api.v1.add.config import get_s3_config
|
from cognee.api.v1.add.config import get_s3_config
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -24,8 +26,16 @@ def open_data_file(
|
||||||
else:
|
else:
|
||||||
return fs.open(file_path, mode=mode, encoding=encoding, **kwargs)
|
return fs.open(file_path, mode=mode, encoding=encoding, **kwargs)
|
||||||
elif file_path.startswith("file://"):
|
elif file_path.startswith("file://"):
|
||||||
# Handle local file URLs by stripping the file:// prefix
|
# Handle local file URLs by properly parsing the URI
|
||||||
file_path = file_path.replace("file://", "", 1)
|
parsed_url = urlparse(file_path)
|
||||||
return open(file_path, mode=mode, encoding=encoding, **kwargs)
|
# On Windows, urlparse handles drive letters correctly
|
||||||
|
# Convert the path component to a proper file path
|
||||||
|
if os.name == "nt": # Windows
|
||||||
|
# Remove leading slash from Windows paths like /C:/Users/...
|
||||||
|
local_path = parsed_url.path.lstrip("/")
|
||||||
|
else: # Unix-like systems
|
||||||
|
local_path = parsed_url.path
|
||||||
|
|
||||||
|
return open(local_path, mode=mode, encoding=encoding, **kwargs)
|
||||||
else:
|
else:
|
||||||
return open(file_path, mode=mode, encoding=encoding, **kwargs)
|
return open(file_path, mode=mode, encoding=encoding, **kwargs)
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,23 @@ import importlib.metadata
|
||||||
from cognee import __version__ as cognee_version
|
from cognee import __version__ as cognee_version
|
||||||
from typing import Protocol
|
from typing import Protocol
|
||||||
|
|
||||||
|
|
||||||
|
# Configure external library logging
|
||||||
|
def configure_external_library_logging():
|
||||||
|
"""Configure logging for external libraries to reduce verbosity"""
|
||||||
|
# Configure LiteLLM logging to reduce verbosity
|
||||||
|
try:
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
litellm.set_verbose = False
|
||||||
|
|
||||||
|
# Suppress LiteLLM ERROR logging using standard logging
|
||||||
|
logging.getLogger("litellm").setLevel(logging.CRITICAL)
|
||||||
|
except ImportError:
|
||||||
|
# LiteLLM not available, skip configuration
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# Export common log levels
|
# Export common log levels
|
||||||
DEBUG = logging.DEBUG
|
DEBUG = logging.DEBUG
|
||||||
INFO = logging.INFO
|
INFO = logging.INFO
|
||||||
|
|
@ -148,6 +165,44 @@ def get_logger(name=None, level=None) -> LoggerInterface:
|
||||||
return logger
|
return logger
|
||||||
|
|
||||||
|
|
||||||
|
def log_database_configuration(logger):
|
||||||
|
"""Log the current database configuration for all database types"""
|
||||||
|
# NOTE: Has to be imporated at runtime to avoid circular import
|
||||||
|
from cognee.infrastructure.databases.relational.config import get_relational_config
|
||||||
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Log relational database configuration
|
||||||
|
relational_config = get_relational_config()
|
||||||
|
logger.info(f"Relational database: {relational_config.db_provider}")
|
||||||
|
if relational_config.db_provider == "postgres":
|
||||||
|
logger.info(f"Postgres host: {relational_config.db_host}:{relational_config.db_port}")
|
||||||
|
logger.info(f"Postgres database: {relational_config.db_name}")
|
||||||
|
elif relational_config.db_provider == "sqlite":
|
||||||
|
logger.info(f"SQLite path: {relational_config.db_path}")
|
||||||
|
logger.info(f"SQLite database: {relational_config.db_name}")
|
||||||
|
|
||||||
|
# Log vector database configuration
|
||||||
|
vector_config = get_vectordb_config()
|
||||||
|
logger.info(f"Vector database: {vector_config.vector_db_provider}")
|
||||||
|
if vector_config.vector_db_provider == "lancedb":
|
||||||
|
logger.info(f"Vector database path: {vector_config.vector_db_url}")
|
||||||
|
else:
|
||||||
|
logger.info(f"Vector database URL: {vector_config.vector_db_url}")
|
||||||
|
|
||||||
|
# Log graph database configuration
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
logger.info(f"Graph database: {graph_config.graph_database_provider}")
|
||||||
|
if graph_config.graph_database_provider == "kuzu":
|
||||||
|
logger.info(f"Graph database path: {graph_config.graph_file_path}")
|
||||||
|
else:
|
||||||
|
logger.info(f"Graph database URL: {graph_config.graph_database_url}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not retrieve database configuration: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
def cleanup_old_logs(logs_dir, max_files):
|
def cleanup_old_logs(logs_dir, max_files):
|
||||||
"""
|
"""
|
||||||
Removes old log files, keeping only the most recent ones.
|
Removes old log files, keeping only the most recent ones.
|
||||||
|
|
@ -193,6 +248,9 @@ def setup_logging(log_level=None, name=None):
|
||||||
|
|
||||||
log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO")]
|
log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO")]
|
||||||
|
|
||||||
|
# Configure external library logging early to suppress verbose output
|
||||||
|
configure_external_library_logging()
|
||||||
|
|
||||||
def exception_handler(logger, method_name, event_dict):
|
def exception_handler(logger, method_name, event_dict):
|
||||||
"""Custom processor to handle uncaught exceptions."""
|
"""Custom processor to handle uncaught exceptions."""
|
||||||
# Check if there's an exc_info that needs to be processed
|
# Check if there's an exc_info that needs to be processed
|
||||||
|
|
@ -339,6 +397,9 @@ def setup_logging(log_level=None, name=None):
|
||||||
|
|
||||||
logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
|
logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
|
||||||
|
|
||||||
|
# Log database configuration
|
||||||
|
log_database_configuration(logger)
|
||||||
|
|
||||||
# Return the configured logger
|
# Return the configured logger
|
||||||
return logger
|
return logger
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import pytest
|
import pytest
|
||||||
|
from pathlib import Path
|
||||||
from cognee.modules.data.processing.document_types.open_data_file import open_data_file
|
from cognee.modules.data.processing.document_types.open_data_file import open_data_file
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -29,7 +30,8 @@ class TestOpenDataFile:
|
||||||
temp_file_path = f.name
|
temp_file_path = f.name
|
||||||
|
|
||||||
try:
|
try:
|
||||||
file_url = f"file://{temp_file_path}"
|
# Use pathlib.Path.as_uri() for proper cross-platform file URL creation
|
||||||
|
file_url = Path(temp_file_path).as_uri()
|
||||||
with open_data_file(file_url, mode="r") as f:
|
with open_data_file(file_url, mode="r") as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
assert content == test_content
|
assert content == test_content
|
||||||
|
|
@ -44,7 +46,8 @@ class TestOpenDataFile:
|
||||||
temp_file_path = f.name
|
temp_file_path = f.name
|
||||||
|
|
||||||
try:
|
try:
|
||||||
file_url = f"file://{temp_file_path}"
|
# Use pathlib.Path.as_uri() for proper cross-platform file URL creation
|
||||||
|
file_url = Path(temp_file_path).as_uri()
|
||||||
with open_data_file(file_url, mode="rb") as f:
|
with open_data_file(file_url, mode="rb") as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
assert content == test_content.encode()
|
assert content == test_content.encode()
|
||||||
|
|
@ -61,7 +64,8 @@ class TestOpenDataFile:
|
||||||
temp_file_path = f.name
|
temp_file_path = f.name
|
||||||
|
|
||||||
try:
|
try:
|
||||||
file_url = f"file://{temp_file_path}"
|
# Use pathlib.Path.as_uri() for proper cross-platform file URL creation
|
||||||
|
file_url = Path(temp_file_path).as_uri()
|
||||||
with open_data_file(file_url, mode="r", encoding="utf-8") as f:
|
with open_data_file(file_url, mode="r", encoding="utf-8") as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
assert content == test_content
|
assert content == test_content
|
||||||
|
|
@ -84,7 +88,9 @@ class TestOpenDataFile:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Even if someone accidentally adds multiple file:// prefixes
|
# Even if someone accidentally adds multiple file:// prefixes
|
||||||
file_url = f"file://file://{temp_file_path}"
|
# Use proper file URL creation first
|
||||||
|
proper_file_url = Path(temp_file_path).as_uri()
|
||||||
|
file_url = f"file://{proper_file_url}"
|
||||||
with open_data_file(file_url, mode="r") as f:
|
with open_data_file(file_url, mode="r") as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
# This should work because we only replace the first occurrence
|
# This should work because we only replace the first occurrence
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue