cognee/.env.template

###############################################################################
# NOTE: With default settings Cognee only needs an OpenAI LLM_API_KEY to be set.
#       The rest of the settings don't have to be set.
#       Default relational database: SQLite
#       Default vector database   : LanceDB
#       Default graph database    : Kuzu
#
#       These default databases are all file-based, so no extra setup is needed
#       for local use. The data by default will be stored in your .venv
###############################################################################

################################################################################
#  🧠 LLM Settings
################################################################################
# Currently we support BAML and Instructor(using litellm) for structured outputs
STRUCTURED_OUTPUT_FRAMEWORK="instructor"

LLM_API_KEY="your_api_key"
LLM_MODEL="openai/gpt-5-mini"
LLM_PROVIDER="openai"
LLM_ENDPOINT=""
LLM_API_VERSION=""
LLM_MAX_TOKENS="16384"
# Instructor's modes determine how structured data is requested from and extracted from LLM responses
# You can change this type (i.e. mode) via this env variable
# Each LLM has its own default value, e.g. gpt-5 models have "json_schema_mode"
LLM_INSTRUCTOR_MODE=""

EMBEDDING_PROVIDER="openai"
EMBEDDING_MODEL="openai/text-embedding-3-large"
EMBEDDING_ENDPOINT=""
EMBEDDING_API_VERSION=""
EMBEDDING_DIMENSIONS=3072
EMBEDDING_MAX_TOKENS=8191
EMBEDDING_BATCH_SIZE=36
# If embedding key is not provided same key set for LLM_API_KEY will be used
#EMBEDDING_API_KEY="your_api_key"


# If using BAML structured output these env variables will be used
BAML_LLM_PROVIDER=openai
BAML_LLM_MODEL="gpt-5-mini"
BAML_LLM_ENDPOINT=""
BAML_LLM_API_KEY="your_api_key"
BAML_LLM_API_VERSION=""


################################################################################
#  📂 ROOT DIRECTORY FOR DATABASES
################################################################################
# Set up the Cognee system directory. Cognee will store system files and databases here.
# Useful for setting root directory inside docker and also to avoid storing the databases in .venv
# DATA_ROOT_DIRECTORY='/Users/<user>/Desktop/cognee/.cognee_data/'
# SYSTEM_ROOT_DIRECTORY='/Users/<user>/Desktop/cognee/.cognee_system/'

################################################################################
# ☁️ Storage Backend Settings
################################################################################
# Configure storage backend (local filesystem or S3)
# STORAGE_BACKEND="local"  # Default: uses local filesystem
#
# -- To switch to S3 storage, uncomment and fill these: ---------------------
# STORAGE_BACKEND="s3"
# STORAGE_BUCKET_NAME="your-bucket-name"
# AWS_REGION="us-east-1"
# AWS_ACCESS_KEY_ID="your-access-key"
# AWS_SECRET_ACCESS_KEY="your-secret-key"
#
# -- S3 Root Directories (optional) -----------------------------------------
# DATA_ROOT_DIRECTORY="s3://your-bucket/cognee/data"
# SYSTEM_ROOT_DIRECTORY="s3://your-bucket/cognee/system"
#
# -- Cache Directory (auto-configured for S3) -------------------------------
# When STORAGE_BACKEND=s3, cache automatically uses S3: s3://BUCKET/cognee/cache
# To override the automatic S3 cache location, uncomment:
# CACHE_ROOT_DIRECTORY="s3://your-bucket/cognee/cache"

################################################################################
# 🗄️ Relational database settings
################################################################################

DB_PROVIDER="sqlite"
DB_NAME=cognee_db

# -- To switch to Postgres / PGVector, uncomment and fill these: -------------
#DB_PROVIDER=postgres
#DB_NAME=cognee_db
# To use Postgres with the Cognee backend in Docker compose use the following instead: DB_HOST=host.docker.internal
#DB_HOST=127.0.0.1
#DB_PORT=5432
#DB_USERNAME=cognee
#DB_PASSWORD=cognee

# -- Advanced: Custom database connection arguments (optional) ---------------
# Pass additional connection parameters as JSON. Useful for SSL, timeouts, etc.
# Examples:
#   For PostgreSQL with SSL:
#     DATABASE_CONNECT_ARGS='{"sslmode": "require", "connect_timeout": 10}'
#   For SQLite with custom timeout:
#     DATABASE_CONNECT_ARGS='{"timeout": 60}'
#DATABASE_CONNECT_ARGS='{}'

################################################################################
# 🕸️ Graph Database settings
################################################################################

# Default (local file-based)
GRAPH_DATABASE_PROVIDER="kuzu"
# Handler for multi-user access control mode, it handles how should the mapping/creation of separate DBs be handled per Cognee dataset
GRAPH_DATASET_DATABASE_HANDLER="kuzu"

# -- To switch to Remote Kuzu uncomment and fill these: -------------------------------------------------------------
#GRAPH_DATABASE_PROVIDER="kuzu"
#GRAPH_DATABASE_PROVIDER="kuzu-remote"
#GRAPH_DATABASE_URL="http://localhost:8000"
#GRAPH_DATABASE_USERNAME=XXX
#GRAPH_DATABASE_PASSWORD=YYY

# -- To switch to Neo4j uncomment and fill these: -------------------------------------------------------------------
#GRAPH_DATABASE_PROVIDER="neo4j"
#GRAPH_DATABASE_URL=bolt://localhost:7687
#GRAPH_DATABASE_NAME="neo4j"
#GRAPH_DATABASE_USERNAME=neo4j
#GRAPH_DATABASE_PASSWORD=localneo4j

################################################################################
#  📐 Vector Database settings
################################################################################

# Supported providers: pgvector | qdrant | weaviate | milvus | lancedb | chromadb
VECTOR_DB_PROVIDER="lancedb"
# Not needed if a cloud vector database is not used
VECTOR_DB_URL=
VECTOR_DB_KEY=
# Handler for multi-user access control mode, it handles how should the mapping/creation of separate DBs be handled per Cognee dataset
VECTOR_DATASET_DATABASE_HANDLER="lancedb"

################################################################################
# 🧩 Ontology resolver settings
################################################################################

# -- Ontology resolver params --------------------------------------
# ONTOLOGY_RESOLVER=rdflib  # Default: uses rdflib and owl file to read ontology structures
# MATCHING_STRATEGY=fuzzy # Default: uses fuzzy matching with 80% similarity threshold
# ONTOLOGY_FILE_PATH=YOUR_FULL_FULE_PATH # Default: empty
# To add ontology resolvers, either set them as it is set in ontology_example or add full_path and settings as envs.

################################################################################
# 🌐 Translation Settings
################################################################################

# Translation provider: llm (uses configured LLM), google, or azure
# "llm" uses whichever LLM is configured above (OpenAI, Azure, Ollama, Anthropic, etc.)
# "google" and "azure" use dedicated translation APIs
TRANSLATION_PROVIDER="llm"

# Default target language for translations (ISO 639-1 code, e.g., en, es, fr, de)
TARGET_LANGUAGE="en"

# Minimum confidence threshold for language detection (0.0 to 1.0)
CONFIDENCE_THRESHOLD=0.8

# -- Google Translate settings (required if using google provider) -----------
# GOOGLE_TRANSLATE_API_KEY="your-google-api-key"
# GOOGLE_PROJECT_ID="your-google-project-id"

# -- Azure Translator settings (required if using azure provider) ------------
# AZURE_TRANSLATOR_KEY="your-azure-translator-key"
# AZURE_TRANSLATOR_REGION="westeurope"
# AZURE_TRANSLATOR_ENDPOINT="https://api.cognitive.microsofttranslator.com"

# -- Performance settings ----------------------------------------------------
# TRANSLATION_BATCH_SIZE=10
# TRANSLATION_MAX_RETRIES=3
# TRANSLATION_TIMEOUT_SECONDS=30

################################################################################
#  🔄  MIGRATION (RELATIONAL → GRAPH) SETTINGS
################################################################################

MIGRATION_DB_PATH="/path/to/migration/directory"
MIGRATION_DB_NAME="migration_database.sqlite"
MIGRATION_DB_PROVIDER="sqlite"

# -- Postgres-specific migration params --------------------------------------
# MIGRATION_DB_USERNAME=cognee
# MIGRATION_DB_PASSWORD=cognee
# MIGRATION_DB_HOST="127.0.0.1"
# MIGRATION_DB_PORT=5432

################################################################################
# 🔒 Security Settings
################################################################################

# When set to false don't allow adding of local system files to Cognee. Should be set to False when Cognee is used as a backend.
ACCEPT_LOCAL_FILE_PATH=True

# When set to false don't allow HTTP requests to be sent from Cognee.
# This protects against Server Side Request Forgery when proper infrastructure is not in place.
ALLOW_HTTP_REQUESTS=True

# When set to false don't allow cypher search to be used in Cognee.
ALLOW_CYPHER_QUERY=True

# When set to False errors during data processing will be returned as info but not raised to allow handling of faulty documents
RAISE_INCREMENTAL_LOADING_ERRORS=True

# When set to True, the Cognee backend will require authentication for requests to the API.
# If you're disabling this, make sure to also disable ENABLE_BACKEND_ACCESS_CONTROL.
REQUIRE_AUTHENTICATION=False

# Set this variable to True to enforce usage of backend access control for Cognee
# Note: This is only currently supported by the following databases:
#       Relational: SQLite, Postgres
#       Vector: LanceDB
#       Graph: KuzuDB
#
# It enforces creation of databases per Cognee user + dataset. Does not work with some graph and database providers.
# Disable mode when using not supported graph/vector databases.
ENABLE_BACKEND_ACCESS_CONTROL=True

################################################################################
# ☁️ Cloud Sync Settings
################################################################################

# Cognee Cloud API settings for syncing data to/from cloud infrastructure
COGNEE_CLOUD_API_URL="http://localhost:8001"
COGNEE_CLOUD_AUTH_TOKEN="your-api-key"

################################################################################
# UI Settings
################################################################################

# URL where the frontend is served, defaults to http://localhost:3000
UI_APP_URL=http://localhost:3000

################################################################################
#  🛠️ DEV Settings
################################################################################

ENV="local"

TOKENIZERS_PARALLELISM="false"

# LITELLM Logging Level. Set to quiet down logging
LITELLM_LOG="ERROR"

# Set this environment variable to disable sending telemetry data
# TELEMETRY_DISABLED=1

# Default User Configuration
# DEFAULT_USER_EMAIL=""
# DEFAULT_USER_PASSWORD=""

################################################################################
#  📂 AWS Settings
################################################################################

#AWS_REGION=""
#AWS_ENDPOINT_URL=""
#AWS_ACCESS_KEY_ID=""
#AWS_SECRET_ACCESS_KEY=""
#AWS_SESSION_TOKEN=""

------------------------------- END OF POSSIBLE SETTINGS -------------------------------


###############################################################################
# 🧪  EXAMPLE OVERRIDES (commented out)
###############################################################################
# The blocks below show how to configure alternative providers.
# Uncomment + fill values to switch.

########## Azure OpenAI #######################################################
#LLM_MODEL="azure/gpt-5-mini"
#LLM_ENDPOINT="https://DNS.azure.com/openai/deployments/gpt-5-mini"
#LLM_API_KEY="<<TALK TO YOUR AZURE GUY"
#LLM_API_VERSION="2024-12-01-preview"

## llm api version might not be relevant
#LLM_MAX_TOKENS="16384"

#EMBEDDING_MODEL="azure/text-embedding-3-large"
#EMBEDDING_ENDPOINT="https://DNS.openai.azure.com/openai/deployments/text-embedding-3-large"
#EMBEDDING_API_KEY="<<TALK TO YOUR AZURE GUY>"
#EMBEDDING_API_VERSION="2024-12-01-preview"
#EMBEDDING_DIMENSIONS=3072
#EMBEDDING_MAX_TOKENS=8191

########## Local LLM via Ollama ###############################################


#LLM_API_KEY ="ollama"
#LLM_MODEL="llama3.1:8b"
#LLM_PROVIDER="ollama"
#LLM_ENDPOINT="http://localhost:11434/v1"
#EMBEDDING_PROVIDER="ollama"
#EMBEDDING_MODEL="nomic-embed-text:latest"
#EMBEDDING_ENDPOINT="http://localhost:11434/api/embed"
#EMBEDDING_DIMENSIONS=768
#HUGGINGFACE_TOKENIZER="nomic-ai/nomic-embed-text-v1.5"

########## OpenRouter (also free) #########################################################

#LLM_API_KEY="<<go-get-one-yourself"
#LLM_PROVIDER="custom"
#LLM_MODEL="openrouter/google/gemini-2.0-flash-lite-preview-02-05:free"
#LLM_ENDPOINT="https://openrouter.ai/api/v1"

########## DeepInfra ##########################################################

#LLM_API_KEY="<<>>"
#LLM_PROVIDER="custom"
#LLM_MODEL="deepinfra/meta-llama/Meta-Llama-3-8B-Instruct"
#LLM_ENDPOINT="https://api.deepinfra.com/v1/openai"

#EMBEDDING_PROVIDER="openai"
#EMBEDDING_API_KEY="<<>>"
#EMBEDDING_MODEL="deepinfra/BAAI/bge-base-en-v1.5"
#EMBEDDING_ENDPOINT=""
#EMBEDDING_API_VERSION=""
#EMBEDDING_DIMENSIONS=3072
#EMBEDDING_MAX_TOKENS=8191


########## Release Test ###############################################

#LLM_API_KEY="..."

#OPENAI_API_KEY="..."

#MIGRATION_DB_PATH="~/Downloads/"
#MIGRATION_DB_NAME="Chinook_Sqlite.sqlite"
#MIGRATION_DB_PROVIDER="sqlite"

#GRAPH_DATABASE_URL="bolt://54.246.89.112:7687"
#GRAPH_DATABASE_USERNAME="neo4j"
#GRAPH_DATABASE_PASSWORD="pleaseletmein"