feat: Add gemini support [COG-1023] (#485)

## Description PR to test Gemini PR from holchan 1. Add Gemini LLM and Gemini Embedding support 2. Fix CodeGraph issue with chunks being bigger than maximum token value 3. Add Tokenizer adapters to CodeGraph ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin  ## Summary by CodeRabbit - **New Features** - Added support for the Gemini LLM provider. - Expanded LLM configuration options. - Introduced a new GitHub Actions workflow for multimetric QA evaluation. - Added new environment variables for LLM and embedding configurations across various workflows. - **Bug Fixes** - Improved error handling in various components. - Updated tokenization and embedding processes. - Removed warning related to missing `dict` method in data items. - **Refactor** - Simplified token extraction and decoding methods. - Updated tokenizer interfaces. - Removed deprecated dependencies. - Enhanced retry logic and error handling in embedding processes. - **Documentation** - Updated configuration comments and settings. - **Chores** - Updated GitHub Actions workflows to accommodate new secrets and environment variables. - Modified evaluation parameters. - Adjusted dependency management for optional libraries.  --------- Co-authored-by: holchan <61059652+holchan@users.noreply.github.com> Co-authored-by: Boris <boris@topoteretes.com>
2025-01-31 18:03:23 +01:00 · 2025-01-31 18:03:23 +01:00 · 8879f3fbbe
commit 8879f3fbbe
parent f843c256e4
42 changed files with 494 additions and 101 deletions
--- a/.env.template
+++ b/.env.template
@ -1,7 +1,7 @@
 ENV="local"
 TOKENIZERS_PARALLELISM="false"

-# LLM settings
+# LLM Configuration
 LLM_API_KEY=""
 LLM_MODEL="openai/gpt-4o-mini"
 LLM_PROVIDER="openai"
@ -14,7 +14,7 @@ GRAPHISTRY_PASSWORD=

 SENTRY_REPORTING_URL=

-# Embedding settings
+# Embedding Configuration
 EMBEDDING_PROVIDER="openai"
 EMBEDDING_API_KEY=""
 EMBEDDING_MODEL="openai/text-embedding-3-large"
--- a/.github/workflows/reusable_notebook.yml
+++ b/.github/workflows/reusable_notebook.yml
@ -12,8 +12,24 @@ on:
        required: true
      GRAPHISTRY_PASSWORD:
        required: true
+      #LLM_MODEL:
+      #  required: true
+      #LLM_ENDPOINT:
+      #  required: true
+      LLM_API_KEY:
+        required: true
      OPENAI_API_KEY:
        required: true
+      #LLM_API_VERSION:
+      #  required: true
+      EMBEDDING_MODEL:
+        required: true
+      EMBEDDING_ENDPOINT:
+        required: true
+      EMBEDDING_API_KEY:
+        required: true
+      EMBEDDING_API_VERSION:
+        required: true

 env:
  RUNTIME__LOG_LEVEL: ERROR
@ -50,8 +66,15 @@ jobs:
      - name: Execute Jupyter Notebook
        env:
          ENV: 'dev'
+          #LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          #LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} # Use OpenAI Until a multimedia model is deployed and DeepEval support for other models is added
+          #LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
          GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
          GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
        run: |
--- a/.github/workflows/reusable_python_example.yml
+++ b/.github/workflows/reusable_python_example.yml
@ -16,7 +16,23 @@ on:
        required: true
      GRAPHISTRY_PASSWORD:
        required: true
+      LLM_MODEL:
+        required: true
+      LLM_ENDPOINT:
+        required: true
+      LLM_API_KEY:
+        required: true
      OPENAI_API_KEY:
+        required: false
+      LLM_API_VERSION:
+        required: true
+      EMBEDDING_MODEL:
+        required: true
+      EMBEDDING_ENDPOINT:
+        required: true
+      EMBEDDING_API_KEY:
+        required: true
+      EMBEDDING_API_VERSION:
        required: true

 env:
@ -54,7 +70,15 @@ jobs:
        env:
          ENV: 'dev'
          PYTHONFAULTHANDLER: 1
-          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
          GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
          GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
        run: poetry run python ${{ inputs.example-location }} ${{ inputs.arguments }}
--- a/.github/workflows/test_code_graph_example.yml
+++ b/.github/workflows/test_code_graph_example.yml
@ -17,6 +17,13 @@ jobs:
        example-location: ./examples/python/code_graph_example.py
        arguments: "--repo_path ./evals"
      secrets:
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        LLM_MODEL: ${{ secrets.LLM_MODEL }}
+        LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+        LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+        LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+        EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+        EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+        EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+        EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
        GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
--- a/.github/workflows/test_cognee_llama_index_notebook.yml
+++ b/.github/workflows/test_cognee_llama_index_notebook.yml
@ -15,6 +15,14 @@ jobs:
      with:
        notebook-location: notebooks/cognee_llama_index.ipynb
      secrets:
+        #LLM_MODEL: ${{ secrets.LLM_MODEL }}
+        #LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+        LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        #LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+        EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+        EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+        EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+        EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
        GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
--- a/.github/workflows/test_cognee_multimedia_notebook.yml
+++ b/.github/workflows/test_cognee_multimedia_notebook.yml
@ -15,6 +15,14 @@ jobs:
      with:
        notebook-location: notebooks/cognee_multimedia_demo.ipynb
      secrets:
+        #LLM_MODEL: ${{ secrets.LLM_MODEL }}
+        #LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+        LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        #LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+        EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+        EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+        EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+        EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
        GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
--- a/.github/workflows/test_deduplication.yml
+++ b/.github/workflows/test_deduplication.yml
@ -57,5 +57,12 @@ jobs:
      - name: Run deduplication test
        env:
          ENV: 'dev'
-          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        run: poetry run python ./cognee/tests/test_deduplication.py
--- a/.github/workflows/test_dynamic_steps_example.yml
+++ b/.github/workflows/test_dynamic_steps_example.yml
@ -16,6 +16,13 @@ jobs:
      with:
        example-location: ./examples/python/dynamic_steps_example.py
      secrets:
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        LLM_MODEL: ${{ secrets.LLM_MODEL }}
+        LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+        LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+        LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+        EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+        EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+        EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+        EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
        GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
--- a/.github/workflows/test_dynamic_steps_example_windows.yml
+++ b/.github/workflows/test_dynamic_steps_example_windows.yml
@ -38,5 +38,12 @@ jobs:
        env:
          ENV: 'dev'
          PYTHONFAULTHANDLER: 1
-          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        run: poetry run python ./examples/python/dynamic_steps_example.py
--- a/.github/workflows/test_llama_index_cognee_integration_notebook.yml
+++ b/.github/workflows/test_llama_index_cognee_integration_notebook.yml
@ -15,6 +15,14 @@ jobs:
      with:
        notebook-location: notebooks/llama_index_cognee_integration.ipynb
      secrets:
+        #LLM_MODEL: ${{ secrets.LLM_MODEL }}
+        #LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+        LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        #LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+        EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+        EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+        EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+        EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
        GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
--- a/.github/workflows/test_milvus.yml
+++ b/.github/workflows/test_milvus.yml
@ -47,7 +47,14 @@ jobs:
      - name: Run default basic pipeline
        env:
          ENV: 'dev'
-          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        run: poetry run python ./cognee/tests/test_milvus.py

      - name: Clean up disk space
--- a/.github/workflows/test_multimedia_example.yaml
+++ b/.github/workflows/test_multimedia_example.yaml
@ -16,6 +16,13 @@ jobs:
      with:
        example-location: ./examples/python/multimedia_example.py
      secrets:
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        #LLM_MODEL: ${{ secrets.LLM_MODEL }}
+        #LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+        LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} # Use OpenAI until we deploy models to handle multimedia
+        #LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+        EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+        EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+        EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+        EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
        GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
--- a/.github/workflows/test_multimetric_qa_eval_run.yaml
+++ b/.github/workflows/test_multimetric_qa_eval_run.yaml
@ -0,0 +1,30 @@
+name: test | multimetric qa eval run
+
+on:
+  workflow_dispatch:
+  pull_request:
+    types: [labeled, synchronize]
+
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run_multimetric_qa_eval_test:
+      uses: ./.github/workflows/reusable_python_example.yml
+      with:
+        example-location: ./evals/multimetric_qa_eval_run.py
+        arguments: "--params_file evals/qa_eval_parameters.json --out_dir dirname"
+      secrets:
+        LLM_MODEL: ${{ secrets.LLM_MODEL }}
+        LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+        LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+        LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} # Until we add support for azure for DeepEval
+        EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+        EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+        EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+        EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+        GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
+        GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
--- a/.github/workflows/test_neo4j.yml
+++ b/.github/workflows/test_neo4j.yml
@ -43,7 +43,14 @@ jobs:
      - name: Run default Neo4j
        env:
          ENV: 'dev'
-          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
          GRAPH_DATABASE_URL: ${{ secrets.NEO4J_API_URL }}
          GRAPH_DATABASE_PASSWORD: ${{ secrets.NEO4J_API_KEY }}
          GRAPH_DATABASE_USERNAME: "neo4j"
--- a/.github/workflows/test_notebook.yml
+++ b/.github/workflows/test_notebook.yml
@ -16,6 +16,14 @@ jobs:
      with:
        notebook-location: notebooks/cognee_demo.ipynb
      secrets:
+        #LLM_MODEL: ${{ secrets.LLM_MODEL }}
+        #LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+        LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        #LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+        EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+        EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+        EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+        EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
        GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
--- a/.github/workflows/test_pgvector.yml
+++ b/.github/workflows/test_pgvector.yml
@ -58,5 +58,12 @@ jobs:
      - name: Run default PGVector
        env:
          ENV: 'dev'
-          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        run: poetry run python ./cognee/tests/test_pgvector.py
--- a/.github/workflows/test_python_3_10.yml
+++ b/.github/workflows/test_python_3_10.yml
@ -56,7 +56,14 @@ jobs:
      - name: Run default basic pipeline
        env:
          ENV: 'dev'
-          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        run: poetry run python ./cognee/tests/test_library.py

      - name: Clean up disk space
--- a/.github/workflows/test_python_3_11.yml
+++ b/.github/workflows/test_python_3_11.yml
@ -58,7 +58,14 @@ jobs:
      - name: Run default basic pipeline
        env:
          ENV: 'dev'
-          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        run: poetry run python ./cognee/tests/test_library.py

      - name: Clean up disk space
--- a/.github/workflows/test_python_3_12.yml
+++ b/.github/workflows/test_python_3_12.yml
@ -56,7 +56,14 @@ jobs:
      - name: Run default basic pipeline
        env:
          ENV: 'dev'
-          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        run: poetry run python ./cognee/tests/test_library.py

      - name: Clean up disk space
--- a/.github/workflows/test_qdrant.yml
+++ b/.github/workflows/test_qdrant.yml
@ -44,7 +44,14 @@ jobs:
      - name: Run default Qdrant
        env:
          ENV: 'dev'
-          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
          VECTOR_DB_URL: ${{ secrets.QDRANT_API_URL }}
          VECTOR_DB_KEY: ${{ secrets.QDRANT_API_KEY }}
        run: poetry run python ./cognee/tests/test_qdrant.py
--- a/.github/workflows/test_simple_example.yml
+++ b/.github/workflows/test_simple_example.yml
@ -16,6 +16,13 @@ jobs:
      with:
        example-location: ./examples/python/simple_example.py
      secrets:
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        LLM_MODEL: ${{ secrets.LLM_MODEL }}
+        LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+        LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+        LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+        EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+        EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+        EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+        EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
        GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
        GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
--- a/.github/workflows/test_weaviate.yml
+++ b/.github/workflows/test_weaviate.yml
@ -44,7 +44,14 @@ jobs:
      - name: Run default Weaviate
        env:
          ENV: 'dev'
-          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          LLM_MODEL: ${{ secrets.LLM_MODEL }}
+          LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+          LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+          LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+          EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+          EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+          EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+          EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
          VECTOR_DB_URL: ${{ secrets.WEAVIATE_API_URL }}
          VECTOR_DB_KEY: ${{ secrets.WEAVIATE_API_KEY }}
        run: poetry run python ./cognee/tests/test_weaviate.py
--- a/cognee/api/v1/settings/routers/get_settings_router.py
+++ b/cognee/api/v1/settings/routers/get_settings_router.py
@ -21,7 +21,7 @@ class SettingsDTO(OutDTO):


 class LLMConfigInputDTO(InDTO):
-    provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"]]
+    provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"], Literal["gemini"]]
    model: str
    api_key: str

--- a/cognee/infrastructure/databases/exceptions/EmbeddingException.py
+++ b/cognee/infrastructure/databases/exceptions/EmbeddingException.py
@ -1,4 +1,14 @@
-class EmbeddingException(Exception):
+from cognee.exceptions import CogneeApiError
+from fastapi import status
+
+
+class EmbeddingException(CogneeApiError):
    """Custom exception for handling embedding-related errors."""

-    pass
+    def __init__(
+        self,
+        message: str = "Embedding Exception.",
+        name: str = "EmbeddingException",
+        status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+    ):
+        super().__init__(message, name, status_code)
--- a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
@ -23,10 +23,12 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
    dimensions: int
    mock: bool

+    MAX_RETRIES = 5
+
    def __init__(
        self,
+        model: Optional[str] = "openai/text-embedding-3-large",
        provider: str = "openai",
-        model: Optional[str] = "text-embedding-3-large",
        dimensions: Optional[int] = 3072,
        api_key: str = None,
        endpoint: str = None,
@ -41,15 +43,13 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
        self.dimensions = dimensions
        self.max_tokens = max_tokens
        self.tokenizer = self.get_tokenizer()
+        self.retry_count = 0

        enable_mocking = os.getenv("MOCK_EMBEDDING", "false")
        if isinstance(enable_mocking, bool):
            enable_mocking = str(enable_mocking).lower()
        self.mock = enable_mocking in ("true", "1", "yes")

-    MAX_RETRIES = 5
-    retry_count = 0
-
    async def embed_text(self, text: List[str]) -> List[List[float]]:
        async def exponential_backoff(attempt):
            wait_time = min(10 * (2**attempt), 60)  # Max 60 seconds
@ -64,14 +64,14 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
                return [data["embedding"] for data in response["data"]]
            else:
                response = await litellm.aembedding(
-                    self.model,
+                    model=self.model,
                    input=text,
                    api_key=self.api_key,
                    api_base=self.endpoint,
                    api_version=self.api_version,
                )

-                self.retry_count = 0
+                self.retry_count = 0  # Reset retry count on successful call

                return [data["embedding"] for data in response.data]

@ -99,13 +99,16 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
                raise Exception("Rate limit exceeded and no more retries left.")

            await exponential_backoff(self.retry_count)
-
            self.retry_count += 1

            return await self.embed_text(text)

-        except litellm.exceptions.BadRequestError:
-            raise EmbeddingException("Failed to index data points.")
+        except (
+            litellm.exceptions.BadRequestError,
+            litellm.exceptions.NotFoundError,
+        ) as e:
+            logger.error(f"Embedding error with model {self.model}: {str(e)}")
+            raise EmbeddingException(f"Failed to index data points using model {self.model}")

        except Exception as error:
            logger.error("Error embedding text: %s", str(error))
--- a/cognee/infrastructure/llm/gemini/init.py
+++ b/cognee/infrastructure/llm/gemini/init.py
--- a/cognee/infrastructure/llm/gemini/adapter.py
+++ b/cognee/infrastructure/llm/gemini/adapter.py
@ -0,0 +1,155 @@
+from typing import Type, Optional
+from pydantic import BaseModel
+import logging
+import litellm
+import asyncio
+from litellm import acompletion, JSONSchemaValidationError
+from cognee.shared.data_models import MonitoringTool
+from cognee.exceptions import InvalidValueError
+from cognee.infrastructure.llm.llm_interface import LLMInterface
+from cognee.infrastructure.llm.prompts import read_query_prompt
+from cognee.base_config import get_base_config
+
+logger = logging.getLogger(__name__)
+
+monitoring = get_base_config().monitoring_tool
+if monitoring == MonitoringTool.LANGFUSE:
+    from langfuse.decorators import observe
+
+
+class GeminiAdapter(LLMInterface):
+    MAX_RETRIES = 5
+
+    def __init__(
+        self,
+        api_key: str,
+        model: str,
+        max_tokens: int,
+        endpoint: Optional[str] = None,
+        api_version: Optional[str] = None,
+        streaming: bool = False,
+    ) -> None:
+        self.api_key = api_key
+        self.model = model
+        self.endpoint = endpoint
+        self.api_version = api_version
+        self.streaming = streaming
+        self.max_tokens = max_tokens
+
+    @observe(as_type="generation")
+    async def acreate_structured_output(
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+    ) -> BaseModel:
+        try:
+            response_schema = {
+                "type": "object",
+                "properties": {
+                    "summary": {"type": "string"},
+                    "description": {"type": "string"},
+                    "nodes": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "name": {"type": "string"},
+                                "type": {"type": "string"},
+                                "description": {"type": "string"},
+                                "id": {"type": "string"},
+                                "label": {"type": "string"},
+                            },
+                            "required": ["name", "type", "description", "id", "label"],
+                        },
+                    },
+                    "edges": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "source_node_id": {"type": "string"},
+                                "target_node_id": {"type": "string"},
+                                "relationship_name": {"type": "string"},
+                            },
+                            "required": ["source_node_id", "target_node_id", "relationship_name"],
+                        },
+                    },
+                },
+                "required": ["summary", "description", "nodes", "edges"],
+            }
+
+            simplified_prompt = f"""
+{system_prompt}
+
+IMPORTANT: Your response must be a valid JSON object with these required fields:
+1. summary: A brief summary
+2. description: A detailed description
+3. nodes: Array of nodes with name, type, description, id, and label
+4. edges: Array of edges with source_node_id, target_node_id, and relationship_name
+
+Example structure:
+{{
+  "summary": "Brief summary",
+  "description": "Detailed description",
+  "nodes": [
+    {{
+      "name": "Example Node",
+      "type": "Concept",
+      "description": "Node description",
+      "id": "example-id",
+      "label": "Concept"
+    }}
+  ],
+  "edges": [
+    {{
+      "source_node_id": "source-id",
+      "target_node_id": "target-id",
+      "relationship_name": "relates_to"
+    }}
+  ]
+}}"""
+
+            messages = [
+                {"role": "system", "content": simplified_prompt},
+                {"role": "user", "content": text_input},
+            ]
+
+            try:
+                response = await acompletion(
+                    model=f"{self.model}",
+                    messages=messages,
+                    api_key=self.api_key,
+                    max_tokens=self.max_tokens,
+                    temperature=0.1,
+                    response_format={"type": "json_object", "schema": response_schema},
+                    timeout=10,
+                    num_retries=self.MAX_RETRIES,
+                )
+
+                if response.choices and response.choices[0].message.content:
+                    content = response.choices[0].message.content
+                    return response_model.model_validate_json(content)
+
+            except litellm.exceptions.BadRequestError as e:
+                logger.error(f"Bad request error: {str(e)}")
+                raise ValueError(f"Invalid request: {str(e)}")
+
+            raise ValueError("Failed to get valid response after retries")
+
+        except JSONSchemaValidationError as e:
+            logger.error(f"Schema validation failed: {str(e)}")
+            logger.debug(f"Raw response: {e.raw_response}")
+            raise ValueError(f"Response failed schema validation: {str(e)}")
+
+    def show_prompt(self, text_input: str, system_prompt: str) -> str:
+        """Format and display the prompt for a user query."""
+        if not text_input:
+            text_input = "No user input provided."
+        if not system_prompt:
+            raise InvalidValueError(message="No system prompt path provided.")
+        system_prompt = read_query_prompt(system_prompt)
+
+        formatted_prompt = (
+            f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
+            if system_prompt
+            else None
+        )
+        return formatted_prompt
--- a/cognee/infrastructure/llm/get_llm_client.py
+++ b/cognee/infrastructure/llm/get_llm_client.py
@ -12,6 +12,7 @@ class LLMProvider(Enum):
    OLLAMA = "ollama"
    ANTHROPIC = "anthropic"
    CUSTOM = "custom"
+    GEMINI = "gemini"


 def get_llm_client():
@ -78,5 +79,20 @@ def get_llm_client():
            max_tokens=max_tokens,
        )

+    elif provider == LLMProvider.GEMINI:
+        if llm_config.llm_api_key is None:
+            raise InvalidValueError(message="LLM API key is not set.")
+
+        from .gemini.adapter import GeminiAdapter
+
+        return GeminiAdapter(
+            api_key=llm_config.llm_api_key,
+            model=llm_config.llm_model,
+            max_tokens=max_tokens,
+            endpoint=llm_config.llm_endpoint,
+            api_version=llm_config.llm_api_version,
+            streaming=llm_config.llm_streaming,
+        )
+
    else:
        raise InvalidValueError(message=f"Unsupported LLM provider: {provider}")
--- a/cognee/infrastructure/llm/openai/adapter.py
+++ b/cognee/infrastructure/llm/openai/adapter.py
@ -23,6 +23,8 @@ class OpenAIAdapter(LLMInterface):
    api_key: str
    api_version: str

+    MAX_RETRIES = 5
+
    """Adapter for OpenAI's GPT-3, GPT=4 API"""

    def __init__(
@ -68,7 +70,7 @@ class OpenAIAdapter(LLMInterface):
            api_base=self.endpoint,
            api_version=self.api_version,
            response_model=response_model,
-            max_retries=5,
+            max_retries=self.MAX_RETRIES,
        )

    @observe
@ -94,7 +96,7 @@ class OpenAIAdapter(LLMInterface):
            api_base=self.endpoint,
            api_version=self.api_version,
            response_model=response_model,
-            max_retries=5,
+            max_retries=self.MAX_RETRIES,
        )

    def create_transcript(self, input):
@ -112,7 +114,7 @@ class OpenAIAdapter(LLMInterface):
            api_key=self.api_key,
            api_base=self.endpoint,
            api_version=self.api_version,
-            max_retries=5,
+            max_retries=self.MAX_RETRIES,
        )

        return transcription
@ -144,7 +146,7 @@ class OpenAIAdapter(LLMInterface):
            api_base=self.endpoint,
            api_version=self.api_version,
            max_tokens=300,
-            max_retries=5,
+            max_retries=self.MAX_RETRIES,
        )

    def show_prompt(self, text_input: str, system_prompt: str) -> str:
--- a/cognee/infrastructure/llm/tokenizer/Gemini/adapter.py
+++ b/cognee/infrastructure/llm/tokenizer/Gemini/adapter.py
@ -1,4 +1,4 @@
-from typing import List, Any
+from typing import List, Any, Union

 from ..tokenizer_interface import TokenizerInterface

@ -26,6 +26,10 @@ class GeminiTokenizer(TokenizerInterface):
    def extract_tokens(self, text: str) -> List[Any]:
        raise NotImplementedError

+    def decode_single_token(self, encoding: int):
+        # Gemini tokenizer doesn't have the option to decode tokens
+        raise NotImplementedError
+
    def count_tokens(self, text: str) -> int:
        """
        Returns the number of tokens in the given text.
@ -39,6 +43,3 @@ class GeminiTokenizer(TokenizerInterface):
        import google.generativeai as genai

        return len(genai.embed_content(model=f"models/{self.model}", content=text))
-
-    def trim_text_to_max_tokens(self, text: str) -> str:
-        raise NotImplementedError
--- a/cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py
+++ b/cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py
@ -1,7 +1,5 @@
 from typing import List, Any

-from transformers import AutoTokenizer
-
 from ..tokenizer_interface import TokenizerInterface


@ -14,6 +12,9 @@ class HuggingFaceTokenizer(TokenizerInterface):
        self.model = model
        self.max_tokens = max_tokens

+        # Import here to make it an optional dependency
+        from transformers import AutoTokenizer
+
        self.tokenizer = AutoTokenizer.from_pretrained(model)

    def extract_tokens(self, text: str) -> List[Any]:
@ -32,5 +33,6 @@ class HuggingFaceTokenizer(TokenizerInterface):
        """
        return len(self.tokenizer.tokenize(text))

-    def trim_text_to_max_tokens(self, text: str) -> str:
+    def decode_single_token(self, encoding: int):
+        # Gemini tokenizer doesn't have the option to decode tokens
        raise NotImplementedError
--- a/cognee/infrastructure/llm/tokenizer/TikToken/adapter.py
+++ b/cognee/infrastructure/llm/tokenizer/TikToken/adapter.py
@ -21,14 +21,17 @@ class TikTokenTokenizer(TokenizerInterface):
        self.tokenizer = tiktoken.encoding_for_model(self.model)

    def extract_tokens(self, text: str) -> List[Any]:
-        tokens = []
        # Using TikToken's method to tokenize text
        token_ids = self.tokenizer.encode(text)
-        # Go through tokens and decode them to text value
-        for token_id in token_ids:
-            token = self.tokenizer.decode([token_id])
-            tokens.append(token)
-        return tokens
+        return token_ids
+
+    def decode_token_list(self, tokens: List[Any]) -> List[Any]:
+        if not isinstance(tokens, list):
+            tokens = [tokens]
+        return [self.tokenizer.decode(i) for i in tokens]
+
+    def decode_single_token(self, token: int):
+        return self.tokenizer.decode_single_token_bytes(token).decode("utf-8", errors="replace")

    def count_tokens(self, text: str) -> int:
        """
--- a/cognee/infrastructure/llm/tokenizer/tokenizer_interface.py
+++ b/cognee/infrastructure/llm/tokenizer/tokenizer_interface.py
@ -14,5 +14,5 @@ class TokenizerInterface(Protocol):
        raise NotImplementedError

    @abstractmethod
-    def trim_text_to_max_tokens(self, text: str) -> str:
+    def decode_single_token(self, token: int) -> str:
        raise NotImplementedError
--- a/cognee/modules/graph/cognee_graph/CogneeGraph.py
+++ b/cognee/modules/graph/cognee_graph/CogneeGraph.py
@ -113,8 +113,10 @@ class CogneeGraph(CogneeAbstractGraph):

        except (ValueError, TypeError) as e:
            print(f"Error projecting graph: {e}")
+            raise e
        except Exception as ex:
            print(f"Unexpected error: {ex}")
+            raise ex

    async def map_vector_distances_to_graph_nodes(self, node_distances) -> None:
        for category, scored_results in node_distances.items():
--- a/cognee/modules/settings/get_settings.py
+++ b/cognee/modules/settings/get_settings.py
@ -13,6 +13,7 @@ class ModelName(Enum):
    openai = "openai"
    ollama = "ollama"
    anthropic = "anthropic"
+    gemini = "gemini"


 class LLMConfig(BaseModel):
@ -72,6 +73,10 @@ def get_settings() -> SettingsDict:
            "value": "anthropic",
            "label": "Anthropic",
        },
+        {
+            "value": "gemini",
+            "label": "Gemini",
+        },
    ]

    return SettingsDict.model_validate(
@ -136,6 +141,12 @@ def get_settings() -> SettingsDict:
                            "label": "Claude 3 Haiku",
                        },
                    ],
+                    "gemini": [
+                        {
+                            "value": "gemini-2.0-flash-exp",
+                            "label": "Gemini 2.0 Flash",
+                        },
+                    ],
                },
            },
            vector_db={
--- a/cognee/tasks/chunks/chunk_by_paragraph.py
+++ b/cognee/tasks/chunks/chunk_by_paragraph.py
@ -1,8 +1,6 @@
-from typing import Any, Dict, Iterator, Optional, Union
+from typing import Any, Dict, Iterator
 from uuid import NAMESPACE_OID, uuid5

-import tiktoken
-
 from cognee.infrastructure.databases.vector import get_vector_engine

 from .chunk_by_sentence import chunk_by_sentence
@ -19,7 +17,7 @@ def chunk_by_paragraph(
    When chunks are joined with empty string "", they reproduce the original text exactly.

    Notes:
-        - Tokenization is handled using the `tiktoken` library, ensuring compatibility with the vector engine's embedding model.
+        - Tokenization is handled using our tokenization adapters, ensuring compatibility with the vector engine's embedding model.
        - If `batch_paragraphs` is False, each paragraph will be yielded as a separate chunk.
        - Handles cases where paragraphs exceed the specified token or word limits by splitting them as needed.
        - Remaining text at the end of the input will be yielded as a final chunk.
@ -31,17 +29,12 @@ def chunk_by_paragraph(
    last_cut_type = None
    current_token_count = 0

-    vector_engine = get_vector_engine()
-    embedding_model = vector_engine.embedding_engine.model
-    embedding_model = embedding_model.split("/")[-1]
-
    for paragraph_id, sentence, word_count, end_type in chunk_by_sentence(
        data, maximum_length=paragraph_length
    ):
        # Check if this sentence would exceed length limit
-
-        tokenizer = tiktoken.encoding_for_model(embedding_model)
-        token_count = len(tokenizer.encode(sentence))
+        embedding_engine = get_vector_engine().embedding_engine
+        token_count = embedding_engine.tokenizer.count_tokens(sentence)

        if current_word_count > 0 and (
            current_word_count + word_count > paragraph_length
--- a/cognee/tasks/ingestion/ingest_data.py
+++ b/cognee/tasks/ingestion/ingest_data.py
@ -30,9 +30,6 @@ async def ingest_data(data: Any, dataset_name: str, user: User):
        if hasattr(data_item, "dict") and inspect.ismethod(getattr(data_item, "dict")):
            return {"metadata": data_item.dict(), "origin": str(type(data_item))}
        else:
-            warnings.warn(
-                f"Data of type {type(data_item)}... does not have dict method. Returning empty metadata."
-            )
            return {}

    @dlt.resource(standalone=True, primary_key="id", merge_key="id")
--- a/cognee/tasks/repo_processor/get_source_code_chunks.py
+++ b/cognee/tasks/repo_processor/get_source_code_chunks.py
@ -3,33 +3,32 @@ from typing import AsyncGenerator, Generator
 from uuid import NAMESPACE_OID, uuid5

 import parso
-import tiktoken

 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.infrastructure.engine import DataPoint
 from cognee.shared.CodeGraphEntities import CodeFile, CodePart, SourceCodeChunk
+from cognee.infrastructure.llm import get_max_chunk_tokens

 logger = logging.getLogger(__name__)


-def _count_tokens(tokenizer: tiktoken.Encoding, source_code: str) -> int:
-    return len(tokenizer.encode(source_code))
-
-
 def _get_naive_subchunk_token_counts(
-    tokenizer: tiktoken.Encoding, source_code: str, max_subchunk_tokens: int = 8000
+    source_code: str, max_subchunk_tokens
 ) -> list[tuple[str, int]]:
    """Splits source code into subchunks of up to max_subchunk_tokens and counts tokens."""

-    token_ids = tokenizer.encode(source_code)
+    tokenizer = get_vector_engine().embedding_engine.tokenizer
+    token_ids = tokenizer.extract_tokens(source_code)
    subchunk_token_counts = []

    for start_idx in range(0, len(token_ids), max_subchunk_tokens):
        subchunk_token_ids = token_ids[start_idx : start_idx + max_subchunk_tokens]
        token_count = len(subchunk_token_ids)
+        # Note: This can't work with Gemini embeddings as they keep their method of encoding text
+        # to tokens hidden and don't offer a decoder
+        # TODO: Add support for different tokenizers for this function
        subchunk = "".join(
-            tokenizer.decode_single_token_bytes(token_id).decode("utf-8", errors="replace")
-            for token_id in subchunk_token_ids
+            tokenizer.decode_single_token(token_id) for token_id in subchunk_token_ids
        )
        subchunk_token_counts.append((subchunk, token_count))

@ -37,15 +36,14 @@ def _get_naive_subchunk_token_counts(


 def _get_subchunk_token_counts(
-    tokenizer: tiktoken.Encoding,
    source_code: str,
-    max_subchunk_tokens: int = 8000,
+    max_subchunk_tokens,
    depth: int = 0,
    max_depth: int = 100,
 ) -> list[tuple[str, int]]:
    """Splits source code into subchunk and counts tokens for each subchunk."""
    if depth > max_depth:
-        return _get_naive_subchunk_token_counts(tokenizer, source_code, max_subchunk_tokens)
+        return _get_naive_subchunk_token_counts(source_code, max_subchunk_tokens)

    try:
        module = parso.parse(source_code)
@ -64,7 +62,8 @@ def _get_subchunk_token_counts(
    subchunk_token_counts = []
    for child in module.children:
        subchunk = child.get_code()
-        token_count = _count_tokens(tokenizer, subchunk)
+        tokenizer = get_vector_engine().embedding_engine.tokenizer
+        token_count = tokenizer.count_tokens(subchunk)

        if token_count == 0:
            continue
@ -75,13 +74,13 @@ def _get_subchunk_token_counts(

        if child.type == "string":
            subchunk_token_counts.extend(
-                _get_naive_subchunk_token_counts(tokenizer, subchunk, max_subchunk_tokens)
+                _get_naive_subchunk_token_counts(subchunk, max_subchunk_tokens)
            )
            continue

        subchunk_token_counts.extend(
            _get_subchunk_token_counts(
-                tokenizer, subchunk, max_subchunk_tokens, depth=depth + 1, max_depth=max_depth
+                subchunk, max_subchunk_tokens, depth=depth + 1, max_depth=max_depth
            )
        )

@ -96,22 +95,19 @@ def _get_chunk_source_code(
    cumulative_counts = []
    current_source_code = ""

-    # Get embedding engine used in vector database
-    embedding_engine = get_vector_engine().embedding_engine
-
    for i, (child_code, token_count) in enumerate(code_token_counts):
        current_count += token_count
        cumulative_counts.append(current_count)
-        if current_count > embedding_engine.max_tokens:
+        if current_count > get_max_chunk_tokens():
            break
        current_source_code += f"\n{child_code}"

-    if current_count <= embedding_engine.max_tokens:
+    if current_count <= get_max_chunk_tokens():
        return [], current_source_code.strip()

    cutoff = 1
    for i, cum_count in enumerate(cumulative_counts):
-        if cum_count > (1 - overlap) * embedding_engine.max_tokens:
+        if cum_count > (1 - overlap) * get_max_chunk_tokens():
            break
        cutoff = i

@ -121,19 +117,16 @@ def _get_chunk_source_code(
 def get_source_code_chunks_from_code_part(
    code_file_part: CodePart,
    overlap: float = 0.25,
-    granularity: float = 0.1,
+    granularity: float = 0.09,
 ) -> Generator[SourceCodeChunk, None, None]:
    """Yields source code chunks from a CodePart object, with configurable token limits and overlap."""
    if not code_file_part.source_code:
        logger.error(f"No source code in CodeFile {code_file_part.id}")
        return

-    embedding_engine = get_vector_engine().embedding_engine
-    tokenizer = embedding_engine.tokenizer
-
-    max_subchunk_tokens = max(1, int(granularity * embedding_engine.max_tokens))
+    max_subchunk_tokens = max(1, int(granularity * get_max_chunk_tokens()))
    subchunk_token_counts = _get_subchunk_token_counts(
-        tokenizer, code_file_part.source_code, max_subchunk_tokens
+        code_file_part.source_code, max_subchunk_tokens
    )

    previous_chunk = None
@ -157,7 +150,6 @@ async def get_source_code_chunks(
    data_points: list[DataPoint],
 ) -> AsyncGenerator[list[DataPoint], None]:
    """Processes code graph datapoints, create SourceCodeChink datapoints."""
-    # TODO: Add support for other embedding models, with max_token mapping
    for data_point in data_points:
        try:
            yield data_point
@ -173,5 +165,7 @@ async def get_source_code_chunks(
                        yield source_code_chunk
                except Exception as e:
                    logger.error(f"Error processing code part: {e}")
+                    raise e
        except Exception as e:
            logger.error(f"Error processing data point: {e}")
+            raise e
--- a/evals/eval_on_hotpot.py
+++ b/evals/eval_on_hotpot.py
@ -114,7 +114,7 @@ async def eval_on_QA_dataset(
    if not out_path.exists():
        out_path.mkdir(parents=True, exist_ok=True)

-    random.seed(42)
+    random.seed(43)
    instances = dataset if not num_samples else random.sample(dataset, num_samples)

    contexts_filename = out_path / Path(
--- a/evals/qa_eval_parameters.json
+++ b/evals/qa_eval_parameters.json
@ -4,7 +4,6 @@
    ],
    "rag_option": [
        "cognee_incremental",
-        "cognee",
        "no_rag",
        "simple_rag",
        "brute_force"
@ -14,10 +13,6 @@
    ],
    "metric_names": [
        "Correctness",
-        "Comprehensiveness",
-        "Directness",
-        "Diversity",
-        "Empowerment",
-        "promptfoo.directness"
+        "Comprehensiveness"
    ]
 }
--- a/poetry.lock
+++ b/poetry.lock
@ -7241,7 +7241,7 @@ crt = ["botocore[crt] (>=1.36.0,<2.0a.0)"]
 name = "safetensors"
 version = "0.5.2"
 description = ""
-optional = false
+optional = true
 python-versions = ">=3.7"
 files = [
    {file = "safetensors-0.5.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:45b6092997ceb8aa3801693781a71a99909ab9cc776fbc3fa9322d29b1d3bef2"},
@ -8079,7 +8079,7 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,
 name = "transformers"
 version = "4.48.1"
 description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
-optional = false
+optional = true
 python-versions = ">=3.9.0"
 files = [
    {file = "transformers-4.48.1-py3-none-any.whl", hash = "sha256:24be0564b0a36d9e433d9a65de248f1545b6f6edce1737669605eb6a8141bbbb"},
@ -9040,6 +9040,7 @@ falkordb = ["falkordb"]
 filesystem = ["botocore"]
 gemini = ["google-generativeai"]
 groq = ["groq"]
+huggingface = ["transformers"]
 langchain = ["langchain_text_splitters", "langsmith"]
 llama-index = ["llama-index-core"]
 milvus = ["pymilvus"]
@ -9053,4 +9054,4 @@ weaviate = ["weaviate-client"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10.0,<3.13"
-content-hash = "480675c274cd85a76a95bf03af865b1a0b462f25bbc21d7427b0a0b8e21c13db"
+content-hash = "e0752df2545fd5048c0969acc7282fce8e034ec0abfabfe07785e7d34c44fc8b"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -70,7 +70,7 @@ pgvector = {version = "^0.3.5", optional = true}
 psycopg2 = {version = "^2.9.10", optional = true}
 llama-index-core = {version = "^0.12.11", optional = true}
 deepeval = {version = "^2.0.1", optional = true}
-transformers = "^4.46.3"
+transformers = {version = "^4.46.3", optional = true}
 pymilvus = {version = "^2.5.0", optional = true}
 unstructured = { extras = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"], version = "^0.16.13", optional = true }
 pre-commit = "^4.0.1"
@ -92,6 +92,7 @@ notebook = ["notebook", "ipykernel", "overrides", "ipywidgets", "jupyterlab", "j
 langchain = ["langsmith", "langchain_text_splitters"]
 llama-index = ["llama-index-core"]
 gemini = ["google-generativeai"]
+huggingface = ["transformers"]
 deepeval = ["deepeval"]
 posthog = ["posthog"]
 falkordb = ["falkordb"]