diff --git a/alembic.ini b/alembic.ini index e7cb55ee6..15cd939b3 100644 --- a/alembic.ini +++ b/alembic.ini @@ -102,7 +102,7 @@ handlers = qualname = sqlalchemy.engine [logger_alembic] -level = INFO +level = WARN handlers = qualname = alembic diff --git a/cognee-mcp/entrypoint.sh b/cognee-mcp/entrypoint.sh index 91a88c572..7a7cf70b8 100644 --- a/cognee-mcp/entrypoint.sh +++ b/cognee-mcp/entrypoint.sh @@ -4,6 +4,10 @@ set -e # Exit on error echo "Debug mode: $DEBUG" echo "Environment: $ENVIRONMENT" +# Set default transport mode if not specified +TRANSPORT_MODE=${TRANSPORT_MODE:-"stdio"} +echo "Transport mode: $TRANSPORT_MODE" + # Run Alembic migrations with proper error handling. # Note on UserAlreadyExists error handling: # During database migrations, we attempt to create a default user. If this user @@ -28,19 +32,31 @@ fi echo "Database migrations done." -echo "Starting Cognee MCP Server..." +echo "Starting Cognee MCP Server with transport mode: $TRANSPORT_MODE" # Add startup delay to ensure DB is ready sleep 2 -# Modified Gunicorn startup with error handling +# Modified startup with transport mode selection and error handling if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then if [ "$DEBUG" = "true" ]; then echo "Waiting for the debugger to attach..." - exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee + if [ "$TRANSPORT_MODE" = "sse" ]; then + exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee --transport sse + else + exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m cognee --transport stdio + fi else - exec cognee + if [ "$TRANSPORT_MODE" = "sse" ]; then + exec cognee --transport sse + else + exec cognee --transport stdio + fi fi else - exec cognee + if [ "$TRANSPORT_MODE" = "sse" ]; then + exec cognee --transport sse + else + exec cognee --transport stdio + fi fi diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index def3512b1..5a0a36b5a 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -18,6 +18,7 @@ from cognee.modules.search.types import SearchType from cognee.shared.data_models import KnowledgeGraph from cognee.modules.storage.utils import JSONEncoder + try: from codingagents.coding_rule_associations import ( add_rule_associations, diff --git a/cognee/infrastructure/llm/gemini/adapter.py b/cognee/infrastructure/llm/gemini/adapter.py index d141d1c84..db11a5ab4 100644 --- a/cognee/infrastructure/llm/gemini/adapter.py +++ b/cognee/infrastructure/llm/gemini/adapter.py @@ -1,4 +1,5 @@ import litellm +import logging from pydantic import BaseModel from typing import Type, Optional from litellm import acompletion, JSONSchemaValidationError diff --git a/cognee/infrastructure/llm/generic_llm_api/adapter.py b/cognee/infrastructure/llm/generic_llm_api/adapter.py index 9c00054f6..e74a4eb03 100644 --- a/cognee/infrastructure/llm/generic_llm_api/adapter.py +++ b/cognee/infrastructure/llm/generic_llm_api/adapter.py @@ -1,5 +1,6 @@ """Adapter for Generic API LLM provider API""" +import logging from typing import Type from pydantic import BaseModel @@ -7,6 +8,7 @@ import instructor from cognee.infrastructure.llm.llm_interface import LLMInterface from cognee.infrastructure.llm.config import get_llm_config from cognee.infrastructure.llm.rate_limiter import rate_limit_async, sleep_and_retry_async +from cognee.shared.logging_utils import get_logger import litellm diff --git a/cognee/infrastructure/llm/utils.py b/cognee/infrastructure/llm/utils.py index d25e41326..fdc8c521c 100644 --- a/cognee/infrastructure/llm/utils.py +++ b/cognee/infrastructure/llm/utils.py @@ -1,8 +1,7 @@ -from cognee.shared.logging_utils import get_logger import litellm -from cognee.infrastructure.databases.vector import get_vector_engine from cognee.infrastructure.llm.get_llm_client import get_llm_client +from cognee.shared.logging_utils import get_logger logger = get_logger() @@ -22,6 +21,9 @@ def get_max_chunk_tokens(): the smaller value of the embedding engine's max tokens and half of the LLM's maximum tokens. """ + # NOTE: Import must be done in function to avoid circular import issue + from cognee.infrastructure.databases.vector import get_vector_engine + # Calculate max chunk size based on the following formula embedding_engine = get_vector_engine().embedding_engine llm_client = get_llm_client() @@ -93,6 +95,9 @@ async def test_embedding_connection(): the exception if the connection to the embedding handler cannot be established. """ try: + # NOTE: Vector engine import must be done in function to avoid circular import issue + from cognee.infrastructure.databases.vector import get_vector_engine + await get_vector_engine().embedding_engine.embed_text("test") except Exception as e: logger.error(e) diff --git a/cognee/modules/data/processing/document_types/open_data_file.py b/cognee/modules/data/processing/document_types/open_data_file.py index 34a8b098a..4190f4420 100644 --- a/cognee/modules/data/processing/document_types/open_data_file.py +++ b/cognee/modules/data/processing/document_types/open_data_file.py @@ -1,4 +1,6 @@ from typing import IO, Optional +from urllib.parse import urlparse +import os from cognee.api.v1.add.config import get_s3_config @@ -24,8 +26,16 @@ def open_data_file( else: return fs.open(file_path, mode=mode, encoding=encoding, **kwargs) elif file_path.startswith("file://"): - # Handle local file URLs by stripping the file:// prefix - file_path = file_path.replace("file://", "", 1) - return open(file_path, mode=mode, encoding=encoding, **kwargs) + # Handle local file URLs by properly parsing the URI + parsed_url = urlparse(file_path) + # On Windows, urlparse handles drive letters correctly + # Convert the path component to a proper file path + if os.name == "nt": # Windows + # Remove leading slash from Windows paths like /C:/Users/... + local_path = parsed_url.path.lstrip("/") + else: # Unix-like systems + local_path = parsed_url.path + + return open(local_path, mode=mode, encoding=encoding, **kwargs) else: return open(file_path, mode=mode, encoding=encoding, **kwargs) diff --git a/cognee/shared/logging_utils.py b/cognee/shared/logging_utils.py index 16084eac6..989bcba64 100644 --- a/cognee/shared/logging_utils.py +++ b/cognee/shared/logging_utils.py @@ -11,6 +11,23 @@ import importlib.metadata from cognee import __version__ as cognee_version from typing import Protocol + +# Configure external library logging +def configure_external_library_logging(): + """Configure logging for external libraries to reduce verbosity""" + # Configure LiteLLM logging to reduce verbosity + try: + import litellm + + litellm.set_verbose = False + + # Suppress LiteLLM ERROR logging using standard logging + logging.getLogger("litellm").setLevel(logging.CRITICAL) + except ImportError: + # LiteLLM not available, skip configuration + pass + + # Export common log levels DEBUG = logging.DEBUG INFO = logging.INFO @@ -148,6 +165,44 @@ def get_logger(name=None, level=None) -> LoggerInterface: return logger +def log_database_configuration(logger): + """Log the current database configuration for all database types""" + # NOTE: Has to be imporated at runtime to avoid circular import + from cognee.infrastructure.databases.relational.config import get_relational_config + from cognee.infrastructure.databases.vector.config import get_vectordb_config + from cognee.infrastructure.databases.graph.config import get_graph_config + + try: + # Log relational database configuration + relational_config = get_relational_config() + logger.info(f"Relational database: {relational_config.db_provider}") + if relational_config.db_provider == "postgres": + logger.info(f"Postgres host: {relational_config.db_host}:{relational_config.db_port}") + logger.info(f"Postgres database: {relational_config.db_name}") + elif relational_config.db_provider == "sqlite": + logger.info(f"SQLite path: {relational_config.db_path}") + logger.info(f"SQLite database: {relational_config.db_name}") + + # Log vector database configuration + vector_config = get_vectordb_config() + logger.info(f"Vector database: {vector_config.vector_db_provider}") + if vector_config.vector_db_provider == "lancedb": + logger.info(f"Vector database path: {vector_config.vector_db_url}") + else: + logger.info(f"Vector database URL: {vector_config.vector_db_url}") + + # Log graph database configuration + graph_config = get_graph_config() + logger.info(f"Graph database: {graph_config.graph_database_provider}") + if graph_config.graph_database_provider == "kuzu": + logger.info(f"Graph database path: {graph_config.graph_file_path}") + else: + logger.info(f"Graph database URL: {graph_config.graph_database_url}") + + except Exception as e: + logger.warning(f"Could not retrieve database configuration: {str(e)}") + + def cleanup_old_logs(logs_dir, max_files): """ Removes old log files, keeping only the most recent ones. @@ -193,6 +248,9 @@ def setup_logging(log_level=None, name=None): log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO")] + # Configure external library logging early to suppress verbose output + configure_external_library_logging() + def exception_handler(logger, method_name, event_dict): """Custom processor to handle uncaught exceptions.""" # Check if there's an exc_info that needs to be processed @@ -339,6 +397,9 @@ def setup_logging(log_level=None, name=None): logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai") + # Log database configuration + log_database_configuration(logger) + # Return the configured logger return logger diff --git a/cognee/tests/unit/modules/data/test_open_data_file.py b/cognee/tests/unit/modules/data/test_open_data_file.py index eea402aa3..8ad3ec813 100644 --- a/cognee/tests/unit/modules/data/test_open_data_file.py +++ b/cognee/tests/unit/modules/data/test_open_data_file.py @@ -1,6 +1,7 @@ import os import tempfile import pytest +from pathlib import Path from cognee.modules.data.processing.document_types.open_data_file import open_data_file @@ -29,7 +30,8 @@ class TestOpenDataFile: temp_file_path = f.name try: - file_url = f"file://{temp_file_path}" + # Use pathlib.Path.as_uri() for proper cross-platform file URL creation + file_url = Path(temp_file_path).as_uri() with open_data_file(file_url, mode="r") as f: content = f.read() assert content == test_content @@ -44,7 +46,8 @@ class TestOpenDataFile: temp_file_path = f.name try: - file_url = f"file://{temp_file_path}" + # Use pathlib.Path.as_uri() for proper cross-platform file URL creation + file_url = Path(temp_file_path).as_uri() with open_data_file(file_url, mode="rb") as f: content = f.read() assert content == test_content.encode() @@ -61,7 +64,8 @@ class TestOpenDataFile: temp_file_path = f.name try: - file_url = f"file://{temp_file_path}" + # Use pathlib.Path.as_uri() for proper cross-platform file URL creation + file_url = Path(temp_file_path).as_uri() with open_data_file(file_url, mode="r", encoding="utf-8") as f: content = f.read() assert content == test_content @@ -84,7 +88,9 @@ class TestOpenDataFile: try: # Even if someone accidentally adds multiple file:// prefixes - file_url = f"file://file://{temp_file_path}" + # Use proper file URL creation first + proper_file_url = Path(temp_file_path).as_uri() + file_url = f"file://{proper_file_url}" with open_data_file(file_url, mode="r") as f: content = f.read() # This should work because we only replace the first occurrence