- Rename OpenAITranslationProvider to LLMTranslationProvider - Rename openai_provider.py to llm_provider.py - Change provider type from 'openai' to 'llm' in TranslationProviderType - Update all test files to use 'llm' provider and has_llm_api_key() - Add AliasChoices for explicit env var mapping in TranslationConfig - Update translate_content.py to fallback to config.target_language - Update cognify.py docstrings to reference 'llm' provider - Update .env.template and test README documentation The LLM provider now uses whatever LLM is configured in cognee (OpenAI, Azure, Ollama, Anthropic, etc.) instead of being tied to OpenAI.
338 lines
13 KiB
Text
338 lines
13 KiB
Text
###############################################################################
|
|
# NOTE: With default settings Cognee only needs an OpenAI LLM_API_KEY to be set.
|
|
# The rest of the settings don't have to be set.
|
|
# Default relational database: SQLite
|
|
# Default vector database : LanceDB
|
|
# Default graph database : Kuzu
|
|
#
|
|
# These default databases are all file-based, so no extra setup is needed
|
|
# for local use. The data by default will be stored in your .venv
|
|
###############################################################################
|
|
|
|
################################################################################
|
|
# 🧠 LLM Settings
|
|
################################################################################
|
|
# Currently we support BAML and Instructor(using litellm) for structured outputs
|
|
STRUCTURED_OUTPUT_FRAMEWORK="instructor"
|
|
|
|
LLM_API_KEY="your_api_key"
|
|
LLM_MODEL="openai/gpt-5-mini"
|
|
LLM_PROVIDER="openai"
|
|
LLM_ENDPOINT=""
|
|
LLM_API_VERSION=""
|
|
LLM_MAX_TOKENS="16384"
|
|
# Instructor's modes determine how structured data is requested from and extracted from LLM responses
|
|
# You can change this type (i.e. mode) via this env variable
|
|
# Each LLM has its own default value, e.g. gpt-5 models have "json_schema_mode"
|
|
LLM_INSTRUCTOR_MODE=""
|
|
|
|
EMBEDDING_PROVIDER="openai"
|
|
EMBEDDING_MODEL="openai/text-embedding-3-large"
|
|
EMBEDDING_ENDPOINT=""
|
|
EMBEDDING_API_VERSION=""
|
|
EMBEDDING_DIMENSIONS=3072
|
|
EMBEDDING_MAX_TOKENS=8191
|
|
EMBEDDING_BATCH_SIZE=36
|
|
# If embedding key is not provided same key set for LLM_API_KEY will be used
|
|
#EMBEDDING_API_KEY="your_api_key"
|
|
|
|
|
|
# If using BAML structured output these env variables will be used
|
|
BAML_LLM_PROVIDER=openai
|
|
BAML_LLM_MODEL="gpt-5-mini"
|
|
BAML_LLM_ENDPOINT=""
|
|
BAML_LLM_API_KEY="your_api_key"
|
|
BAML_LLM_API_VERSION=""
|
|
|
|
|
|
################################################################################
|
|
# 📂 ROOT DIRECTORY FOR DATABASES
|
|
################################################################################
|
|
# Set up the Cognee system directory. Cognee will store system files and databases here.
|
|
# Useful for setting root directory inside docker and also to avoid storing the databases in .venv
|
|
# DATA_ROOT_DIRECTORY='/Users/<user>/Desktop/cognee/.cognee_data/'
|
|
# SYSTEM_ROOT_DIRECTORY='/Users/<user>/Desktop/cognee/.cognee_system/'
|
|
|
|
################################################################################
|
|
# ☁️ Storage Backend Settings
|
|
################################################################################
|
|
# Configure storage backend (local filesystem or S3)
|
|
# STORAGE_BACKEND="local" # Default: uses local filesystem
|
|
#
|
|
# -- To switch to S3 storage, uncomment and fill these: ---------------------
|
|
# STORAGE_BACKEND="s3"
|
|
# STORAGE_BUCKET_NAME="your-bucket-name"
|
|
# AWS_REGION="us-east-1"
|
|
# AWS_ACCESS_KEY_ID="your-access-key"
|
|
# AWS_SECRET_ACCESS_KEY="your-secret-key"
|
|
#
|
|
# -- S3 Root Directories (optional) -----------------------------------------
|
|
# DATA_ROOT_DIRECTORY="s3://your-bucket/cognee/data"
|
|
# SYSTEM_ROOT_DIRECTORY="s3://your-bucket/cognee/system"
|
|
#
|
|
# -- Cache Directory (auto-configured for S3) -------------------------------
|
|
# When STORAGE_BACKEND=s3, cache automatically uses S3: s3://BUCKET/cognee/cache
|
|
# To override the automatic S3 cache location, uncomment:
|
|
# CACHE_ROOT_DIRECTORY="s3://your-bucket/cognee/cache"
|
|
|
|
################################################################################
|
|
# 🗄️ Relational database settings
|
|
################################################################################
|
|
|
|
DB_PROVIDER="sqlite"
|
|
DB_NAME=cognee_db
|
|
|
|
# -- To switch to Postgres / PGVector, uncomment and fill these: -------------
|
|
#DB_PROVIDER=postgres
|
|
#DB_NAME=cognee_db
|
|
# To use Postgres with the Cognee backend in Docker compose use the following instead: DB_HOST=host.docker.internal
|
|
#DB_HOST=127.0.0.1
|
|
#DB_PORT=5432
|
|
#DB_USERNAME=cognee
|
|
#DB_PASSWORD=cognee
|
|
|
|
# -- Advanced: Custom database connection arguments (optional) ---------------
|
|
# Pass additional connection parameters as JSON. Useful for SSL, timeouts, etc.
|
|
# Examples:
|
|
# For PostgreSQL with SSL:
|
|
# DATABASE_CONNECT_ARGS='{"sslmode": "require", "connect_timeout": 10}'
|
|
# For SQLite with custom timeout:
|
|
# DATABASE_CONNECT_ARGS='{"timeout": 60}'
|
|
#DATABASE_CONNECT_ARGS='{}'
|
|
|
|
################################################################################
|
|
# 🕸️ Graph Database settings
|
|
################################################################################
|
|
|
|
# Default (local file-based)
|
|
GRAPH_DATABASE_PROVIDER="kuzu"
|
|
# Handler for multi-user access control mode, it handles how should the mapping/creation of separate DBs be handled per Cognee dataset
|
|
GRAPH_DATASET_DATABASE_HANDLER="kuzu"
|
|
|
|
# -- To switch to Remote Kuzu uncomment and fill these: -------------------------------------------------------------
|
|
#GRAPH_DATABASE_PROVIDER="kuzu"
|
|
#GRAPH_DATABASE_PROVIDER="kuzu-remote"
|
|
#GRAPH_DATABASE_URL="http://localhost:8000"
|
|
#GRAPH_DATABASE_USERNAME=XXX
|
|
#GRAPH_DATABASE_PASSWORD=YYY
|
|
|
|
# -- To switch to Neo4j uncomment and fill these: -------------------------------------------------------------------
|
|
#GRAPH_DATABASE_PROVIDER="neo4j"
|
|
#GRAPH_DATABASE_URL=bolt://localhost:7687
|
|
#GRAPH_DATABASE_NAME="neo4j"
|
|
#GRAPH_DATABASE_USERNAME=neo4j
|
|
#GRAPH_DATABASE_PASSWORD=localneo4j
|
|
|
|
################################################################################
|
|
# 📐 Vector Database settings
|
|
################################################################################
|
|
|
|
# Supported providers: pgvector | qdrant | weaviate | milvus | lancedb | chromadb
|
|
VECTOR_DB_PROVIDER="lancedb"
|
|
# Not needed if a cloud vector database is not used
|
|
VECTOR_DB_URL=
|
|
VECTOR_DB_KEY=
|
|
# Handler for multi-user access control mode, it handles how should the mapping/creation of separate DBs be handled per Cognee dataset
|
|
VECTOR_DATASET_DATABASE_HANDLER="lancedb"
|
|
|
|
################################################################################
|
|
# 🧩 Ontology resolver settings
|
|
################################################################################
|
|
|
|
# -- Ontology resolver params --------------------------------------
|
|
# ONTOLOGY_RESOLVER=rdflib # Default: uses rdflib and owl file to read ontology structures
|
|
# MATCHING_STRATEGY=fuzzy # Default: uses fuzzy matching with 80% similarity threshold
|
|
# ONTOLOGY_FILE_PATH=YOUR_FULL_FULE_PATH # Default: empty
|
|
# To add ontology resolvers, either set them as it is set in ontology_example or add full_path and settings as envs.
|
|
|
|
################################################################################
|
|
# 🌐 Translation Settings
|
|
################################################################################
|
|
|
|
# Translation provider: llm (uses configured LLM), google, or azure
|
|
# "llm" uses whichever LLM is configured above (OpenAI, Azure, Ollama, Anthropic, etc.)
|
|
# "google" and "azure" use dedicated translation APIs
|
|
TRANSLATION_PROVIDER="llm"
|
|
|
|
# Default target language for translations (ISO 639-1 code, e.g., en, es, fr, de)
|
|
TARGET_LANGUAGE="en"
|
|
|
|
# Minimum confidence threshold for language detection (0.0 to 1.0)
|
|
CONFIDENCE_THRESHOLD=0.8
|
|
|
|
# -- Google Translate settings (required if using google provider) -----------
|
|
# GOOGLE_TRANSLATE_API_KEY="your-google-api-key"
|
|
# GOOGLE_PROJECT_ID="your-google-project-id"
|
|
|
|
# -- Azure Translator settings (required if using azure provider) ------------
|
|
# AZURE_TRANSLATOR_KEY="your-azure-translator-key"
|
|
# AZURE_TRANSLATOR_REGION="westeurope"
|
|
# AZURE_TRANSLATOR_ENDPOINT="https://api.cognitive.microsofttranslator.com"
|
|
|
|
# -- Performance settings ----------------------------------------------------
|
|
# TRANSLATION_BATCH_SIZE=10
|
|
# TRANSLATION_MAX_RETRIES=3
|
|
# TRANSLATION_TIMEOUT_SECONDS=30
|
|
|
|
################################################################################
|
|
# 🔄 MIGRATION (RELATIONAL → GRAPH) SETTINGS
|
|
################################################################################
|
|
|
|
MIGRATION_DB_PATH="/path/to/migration/directory"
|
|
MIGRATION_DB_NAME="migration_database.sqlite"
|
|
MIGRATION_DB_PROVIDER="sqlite"
|
|
|
|
# -- Postgres-specific migration params --------------------------------------
|
|
# MIGRATION_DB_USERNAME=cognee
|
|
# MIGRATION_DB_PASSWORD=cognee
|
|
# MIGRATION_DB_HOST="127.0.0.1"
|
|
# MIGRATION_DB_PORT=5432
|
|
|
|
################################################################################
|
|
# 🔒 Security Settings
|
|
################################################################################
|
|
|
|
# When set to false don't allow adding of local system files to Cognee. Should be set to False when Cognee is used as a backend.
|
|
ACCEPT_LOCAL_FILE_PATH=True
|
|
|
|
# When set to false don't allow HTTP requests to be sent from Cognee.
|
|
# This protects against Server Side Request Forgery when proper infrastructure is not in place.
|
|
ALLOW_HTTP_REQUESTS=True
|
|
|
|
# When set to false don't allow cypher search to be used in Cognee.
|
|
ALLOW_CYPHER_QUERY=True
|
|
|
|
# When set to False errors during data processing will be returned as info but not raised to allow handling of faulty documents
|
|
RAISE_INCREMENTAL_LOADING_ERRORS=True
|
|
|
|
# When set to True, the Cognee backend will require authentication for requests to the API.
|
|
# If you're disabling this, make sure to also disable ENABLE_BACKEND_ACCESS_CONTROL.
|
|
REQUIRE_AUTHENTICATION=False
|
|
|
|
# Set this variable to True to enforce usage of backend access control for Cognee
|
|
# Note: This is only currently supported by the following databases:
|
|
# Relational: SQLite, Postgres
|
|
# Vector: LanceDB
|
|
# Graph: KuzuDB
|
|
#
|
|
# It enforces creation of databases per Cognee user + dataset. Does not work with some graph and database providers.
|
|
# Disable mode when using not supported graph/vector databases.
|
|
ENABLE_BACKEND_ACCESS_CONTROL=True
|
|
|
|
################################################################################
|
|
# ☁️ Cloud Sync Settings
|
|
################################################################################
|
|
|
|
# Cognee Cloud API settings for syncing data to/from cloud infrastructure
|
|
COGNEE_CLOUD_API_URL="http://localhost:8001"
|
|
COGNEE_CLOUD_AUTH_TOKEN="your-api-key"
|
|
|
|
################################################################################
|
|
# UI Settings
|
|
################################################################################
|
|
|
|
# URL where the frontend is served, defaults to http://localhost:3000
|
|
UI_APP_URL=http://localhost:3000
|
|
|
|
################################################################################
|
|
# 🛠️ DEV Settings
|
|
################################################################################
|
|
|
|
ENV="local"
|
|
|
|
TOKENIZERS_PARALLELISM="false"
|
|
|
|
# LITELLM Logging Level. Set to quiet down logging
|
|
LITELLM_LOG="ERROR"
|
|
|
|
# Set this environment variable to disable sending telemetry data
|
|
# TELEMETRY_DISABLED=1
|
|
|
|
# Default User Configuration
|
|
# DEFAULT_USER_EMAIL=""
|
|
# DEFAULT_USER_PASSWORD=""
|
|
|
|
################################################################################
|
|
# 📂 AWS Settings
|
|
################################################################################
|
|
|
|
#AWS_REGION=""
|
|
#AWS_ENDPOINT_URL=""
|
|
#AWS_ACCESS_KEY_ID=""
|
|
#AWS_SECRET_ACCESS_KEY=""
|
|
#AWS_SESSION_TOKEN=""
|
|
|
|
------------------------------- END OF POSSIBLE SETTINGS -------------------------------
|
|
|
|
|
|
###############################################################################
|
|
# 🧪 EXAMPLE OVERRIDES (commented out)
|
|
###############################################################################
|
|
# The blocks below show how to configure alternative providers.
|
|
# Uncomment + fill values to switch.
|
|
|
|
########## Azure OpenAI #######################################################
|
|
#LLM_MODEL="azure/gpt-5-mini"
|
|
#LLM_ENDPOINT="https://DNS.azure.com/openai/deployments/gpt-5-mini"
|
|
#LLM_API_KEY="<<TALK TO YOUR AZURE GUY"
|
|
#LLM_API_VERSION="2024-12-01-preview"
|
|
|
|
## llm api version might not be relevant
|
|
#LLM_MAX_TOKENS="16384"
|
|
|
|
#EMBEDDING_MODEL="azure/text-embedding-3-large"
|
|
#EMBEDDING_ENDPOINT="https://DNS.openai.azure.com/openai/deployments/text-embedding-3-large"
|
|
#EMBEDDING_API_KEY="<<TALK TO YOUR AZURE GUY>"
|
|
#EMBEDDING_API_VERSION="2024-12-01-preview"
|
|
#EMBEDDING_DIMENSIONS=3072
|
|
#EMBEDDING_MAX_TOKENS=8191
|
|
|
|
########## Local LLM via Ollama ###############################################
|
|
|
|
|
|
#LLM_API_KEY ="ollama"
|
|
#LLM_MODEL="llama3.1:8b"
|
|
#LLM_PROVIDER="ollama"
|
|
#LLM_ENDPOINT="http://localhost:11434/v1"
|
|
#EMBEDDING_PROVIDER="ollama"
|
|
#EMBEDDING_MODEL="nomic-embed-text:latest"
|
|
#EMBEDDING_ENDPOINT="http://localhost:11434/api/embed"
|
|
#EMBEDDING_DIMENSIONS=768
|
|
#HUGGINGFACE_TOKENIZER="nomic-ai/nomic-embed-text-v1.5"
|
|
|
|
########## OpenRouter (also free) #########################################################
|
|
|
|
#LLM_API_KEY="<<go-get-one-yourself"
|
|
#LLM_PROVIDER="custom"
|
|
#LLM_MODEL="openrouter/google/gemini-2.0-flash-lite-preview-02-05:free"
|
|
#LLM_ENDPOINT="https://openrouter.ai/api/v1"
|
|
|
|
########## DeepInfra ##########################################################
|
|
|
|
#LLM_API_KEY="<<>>"
|
|
#LLM_PROVIDER="custom"
|
|
#LLM_MODEL="deepinfra/meta-llama/Meta-Llama-3-8B-Instruct"
|
|
#LLM_ENDPOINT="https://api.deepinfra.com/v1/openai"
|
|
|
|
#EMBEDDING_PROVIDER="openai"
|
|
#EMBEDDING_API_KEY="<<>>"
|
|
#EMBEDDING_MODEL="deepinfra/BAAI/bge-base-en-v1.5"
|
|
#EMBEDDING_ENDPOINT=""
|
|
#EMBEDDING_API_VERSION=""
|
|
#EMBEDDING_DIMENSIONS=3072
|
|
#EMBEDDING_MAX_TOKENS=8191
|
|
|
|
|
|
########## Release Test ###############################################
|
|
|
|
#LLM_API_KEY="..."
|
|
|
|
#OPENAI_API_KEY="..."
|
|
|
|
#MIGRATION_DB_PATH="~/Downloads/"
|
|
#MIGRATION_DB_NAME="Chinook_Sqlite.sqlite"
|
|
#MIGRATION_DB_PROVIDER="sqlite"
|
|
|
|
#GRAPH_DATABASE_URL="bolt://54.246.89.112:7687"
|
|
#GRAPH_DATABASE_USERNAME="neo4j"
|
|
#GRAPH_DATABASE_PASSWORD="pleaseletmein"
|