cognee/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
Igor Ilic 8879f3fbbe
feat: Add gemini support [COG-1023] (#485)
<!-- .github/pull_request_template.md -->

## Description
PR to test Gemini PR from holchan

1. Add Gemini LLM and Gemini Embedding support 
2. Fix CodeGraph issue with chunks being bigger than maximum token value
3. Add Tokenizer adapters to CodeGraph

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
    - Added support for the Gemini LLM provider.
    - Expanded LLM configuration options.
- Introduced a new GitHub Actions workflow for multimetric QA
evaluation.
- Added new environment variables for LLM and embedding configurations
across various workflows.

- **Bug Fixes**
    - Improved error handling in various components.
    - Updated tokenization and embedding processes.
    - Removed warning related to missing `dict` method in data items.

- **Refactor**
    - Simplified token extraction and decoding methods.
    - Updated tokenizer interfaces.
    - Removed deprecated dependencies.
    - Enhanced retry logic and error handling in embedding processes.

- **Documentation**
    - Updated configuration comments and settings.

- **Chores**
- Updated GitHub Actions workflows to accommodate new secrets and
environment variables.
    - Modified evaluation parameters.
    - Adjusted dependency management for optional libraries.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: holchan <61059652+holchan@users.noreply.github.com>
Co-authored-by: Boris <boris@topoteretes.com>
2025-01-31 18:03:23 +01:00

133 lines
4.7 KiB
Python

import asyncio
import logging
import math
from typing import List, Optional
import litellm
import os
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
from cognee.infrastructure.databases.exceptions.EmbeddingException import EmbeddingException
from cognee.infrastructure.llm.tokenizer.Gemini import GeminiTokenizer
from cognee.infrastructure.llm.tokenizer.HuggingFace import HuggingFaceTokenizer
from cognee.infrastructure.llm.tokenizer.TikToken import TikTokenTokenizer
litellm.set_verbose = False
logger = logging.getLogger("LiteLLMEmbeddingEngine")
class LiteLLMEmbeddingEngine(EmbeddingEngine):
api_key: str
endpoint: str
api_version: str
provider: str
model: str
dimensions: int
mock: bool
MAX_RETRIES = 5
def __init__(
self,
model: Optional[str] = "openai/text-embedding-3-large",
provider: str = "openai",
dimensions: Optional[int] = 3072,
api_key: str = None,
endpoint: str = None,
api_version: str = None,
max_tokens: int = 512,
):
self.api_key = api_key
self.endpoint = endpoint
self.api_version = api_version
self.provider = provider
self.model = model
self.dimensions = dimensions
self.max_tokens = max_tokens
self.tokenizer = self.get_tokenizer()
self.retry_count = 0
enable_mocking = os.getenv("MOCK_EMBEDDING", "false")
if isinstance(enable_mocking, bool):
enable_mocking = str(enable_mocking).lower()
self.mock = enable_mocking in ("true", "1", "yes")
async def embed_text(self, text: List[str]) -> List[List[float]]:
async def exponential_backoff(attempt):
wait_time = min(10 * (2**attempt), 60) # Max 60 seconds
await asyncio.sleep(wait_time)
try:
if self.mock:
response = {"data": [{"embedding": [0.0] * self.dimensions} for _ in text]}
self.retry_count = 0
return [data["embedding"] for data in response["data"]]
else:
response = await litellm.aembedding(
model=self.model,
input=text,
api_key=self.api_key,
api_base=self.endpoint,
api_version=self.api_version,
)
self.retry_count = 0 # Reset retry count on successful call
return [data["embedding"] for data in response.data]
except litellm.exceptions.ContextWindowExceededError as error:
if isinstance(text, list):
if len(text) == 1:
parts = [text]
else:
parts = [text[0 : math.ceil(len(text) / 2)], text[math.ceil(len(text) / 2) :]]
parts_futures = [self.embed_text(part) for part in parts]
embeddings = await asyncio.gather(*parts_futures)
all_embeddings = []
for embeddings_part in embeddings:
all_embeddings.extend(embeddings_part)
return all_embeddings
logger.error("Context window exceeded for embedding text: %s", str(error))
raise error
except litellm.exceptions.RateLimitError:
if self.retry_count >= self.MAX_RETRIES:
raise Exception("Rate limit exceeded and no more retries left.")
await exponential_backoff(self.retry_count)
self.retry_count += 1
return await self.embed_text(text)
except (
litellm.exceptions.BadRequestError,
litellm.exceptions.NotFoundError,
) as e:
logger.error(f"Embedding error with model {self.model}: {str(e)}")
raise EmbeddingException(f"Failed to index data points using model {self.model}")
except Exception as error:
logger.error("Error embedding text: %s", str(error))
raise error
def get_vector_size(self) -> int:
return self.dimensions
def get_tokenizer(self):
logger.debug(f"Loading tokenizer for model {self.model}...")
# If model also contains provider information, extract only model information
model = self.model.split("/")[-1]
if "openai" in self.provider.lower():
tokenizer = TikTokenTokenizer(model=model, max_tokens=self.max_tokens)
elif "gemini" in self.provider.lower():
tokenizer = GeminiTokenizer(model=model, max_tokens=self.max_tokens)
else:
tokenizer = HuggingFaceTokenizer(model=self.model, max_tokens=self.max_tokens)
logger.debug(f"Tokenizer loaded for model: {self.model}")
return tokenizer