From 1319944dcd6e4a325289b9474d976dab49df25fb Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 23 Jan 2025 18:05:45 +0100 Subject: [PATCH] docs: Update .env.template to include llm and embedding options --- .env.template | 17 ++++++++++++++++- .../vector/embeddings/LiteLLMEmbeddingEngine.py | 2 -- .../databases/vector/embeddings/config.py | 2 +- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/.env.template b/.env.template index 75a57de4d..6ce9fedf9 100644 --- a/.env.template +++ b/.env.template @@ -1,12 +1,27 @@ ENV="local" TOKENIZERS_PARALLELISM="false" -LLM_API_KEY= + +# LLM settings +LLM_API_KEY="" +LLM_MODEL="openai/gpt-4o-mini" +LLM_PROVIDER="openai" +LLM_ENDPOINT="" +LLM_API_VERSION="" GRAPHISTRY_USERNAME= GRAPHISTRY_PASSWORD= SENTRY_REPORTING_URL= +# Embedding settings +EMBEDDING_PROVIDER="openai" +EMBEDDING_API_KEY="" +EMBEDDING_MODEL="openai/text-embedding-3-large" +EMBEDDING_ENDPOINT="" +EMBEDDING_API_VERSION="" +EMBEDDING_DIMENSIONS=3072 +EMBEDDING_MAX_TOKENS=8191 + # "neo4j" or "networkx" GRAPH_DATABASE_PROVIDER="networkx" # Not needed if using networkx diff --git a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py index 50dde8e89..cb84337c2 100644 --- a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +++ b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py @@ -9,8 +9,6 @@ from cognee.infrastructure.databases.exceptions.EmbeddingException import Embedd from cognee.infrastructure.llm.tokenizer.Gemini import GeminiTokenizer from cognee.infrastructure.llm.tokenizer.HuggingFace import HuggingFaceTokenizer from cognee.infrastructure.llm.tokenizer.TikToken import TikTokenTokenizer -from transformers import AutoTokenizer -import tiktoken # Assuming this is how you import TikToken litellm.set_verbose = False logger = logging.getLogger("LiteLLMEmbeddingEngine") diff --git a/cognee/infrastructure/databases/vector/embeddings/config.py b/cognee/infrastructure/databases/vector/embeddings/config.py index cb72a46f4..315caf7ef 100644 --- a/cognee/infrastructure/databases/vector/embeddings/config.py +++ b/cognee/infrastructure/databases/vector/embeddings/config.py @@ -10,7 +10,7 @@ class EmbeddingConfig(BaseSettings): embedding_endpoint: Optional[str] = None embedding_api_key: Optional[str] = None embedding_api_version: Optional[str] = None - embedding_max_tokens: Optional[int] = float("inf") + embedding_max_tokens: Optional[int] = 8191 model_config = SettingsConfigDict(env_file=".env", extra="allow")