diff --git a/.env.template b/.env.template
index b91f89540..b1e7057e2 100644
--- a/.env.template
+++ b/.env.template
@@ -30,6 +30,9 @@ EMBEDDING_DIMENSIONS=3072
 EMBEDDING_MAX_TOKENS=8191
 # If embedding key is not provided same key set for LLM_API_KEY will be used
 #EMBEDDING_API_KEY="your_api_key"
+# Note: OpenAI support up to 2048 elements and Gemini supports a maximum of 100 elements in an embedding batch,
+#       Cognee sets the optimal batch size for OpenAI and Gemini, but a custom size can be defined if necessary for other models
+#EMBEDDING_BATCH_SIZE=2048
 
 # If using BAML structured output these env variables will be used
 BAML_LLM_PROVIDER=openai
@@ -52,18 +55,18 @@ BAML_LLM_API_VERSION=""
 ################################################################################
 # Configure storage backend (local filesystem or S3)
 # STORAGE_BACKEND="local"  # Default: uses local filesystem
-# 
+#
 # -- To switch to S3 storage, uncomment and fill these: ---------------------
 # STORAGE_BACKEND="s3"
 # STORAGE_BUCKET_NAME="your-bucket-name"
 # AWS_REGION="us-east-1"
 # AWS_ACCESS_KEY_ID="your-access-key"
 # AWS_SECRET_ACCESS_KEY="your-secret-key"
-# 
+#
 # -- S3 Root Directories (optional) -----------------------------------------
 # DATA_ROOT_DIRECTORY="s3://your-bucket/cognee/data"
 # SYSTEM_ROOT_DIRECTORY="s3://your-bucket/cognee/system"
-# 
+#
 # -- Cache Directory (auto-configured for S3) -------------------------------
 # When STORAGE_BACKEND=s3, cache automatically uses S3: s3://BUCKET/cognee/cache
 # To override the automatic S3 cache location, uncomment:
diff --git a/.github/actions/cognee_setup/action.yml b/.github/actions/cognee_setup/action.yml
index e46a42edb..1326f2d81 100644
--- a/.github/actions/cognee_setup/action.yml
+++ b/.github/actions/cognee_setup/action.yml
@@ -41,4 +41,4 @@ runs:
             EXTRA_ARGS="$EXTRA_ARGS --extra $extra"
           done
         fi
-        uv sync --extra api --extra docs --extra evals --extra gemini --extra codegraph --extra ollama --extra dev --extra neo4j $EXTRA_ARGS
+        uv sync --extra api --extra docs --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j $EXTRA_ARGS
diff --git a/.github/workflows/test_llms.yml b/.github/workflows/test_llms.yml
index 5a0f947c9..6b0221309 100644
--- a/.github/workflows/test_llms.yml
+++ b/.github/workflows/test_llms.yml
@@ -27,7 +27,7 @@ jobs:
         env:
           LLM_PROVIDER: "gemini"
           LLM_API_KEY: ${{ secrets.GEMINI_API_KEY }}
-          LLM_MODEL: "gemini/gemini-1.5-flash"
+          LLM_MODEL: "gemini/gemini-2.0-flash"
           EMBEDDING_PROVIDER: "gemini"
           EMBEDDING_API_KEY: ${{ secrets.GEMINI_API_KEY }}
           EMBEDDING_MODEL: "gemini/text-embedding-004"
@@ -83,4 +83,4 @@ jobs:
           EMBEDDING_MODEL: "openai/text-embedding-3-large"
           EMBEDDING_DIMENSIONS: "3072"
           EMBEDDING_MAX_TOKENS: "8191"
-        run: uv run python ./examples/python/simple_example.py
\ No newline at end of file
+        run: uv run python ./examples/python/simple_example.py
diff --git a/Dockerfile b/Dockerfile
index be29f359a..b1b77513d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -31,7 +31,7 @@ COPY README.md pyproject.toml uv.lock entrypoint.sh ./
 
 # Install the project's dependencies using the lockfile and settings
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable
+    uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable
 
 # Copy Alembic configuration
 COPY alembic.ini /app/alembic.ini
@@ -42,7 +42,7 @@ COPY alembic/ /app/alembic
 COPY ./cognee /app/cognee
 COPY ./distributed /app/distributed
 RUN --mount=type=cache,target=/root/.cache/uv \
-uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
+uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
 
 FROM python:3.12-slim-bookworm
 
diff --git a/cognee-mcp/pyproject.toml b/cognee-mcp/pyproject.toml
index bf9ecd51f..b4e421cc5 100644
--- a/cognee-mcp/pyproject.toml
+++ b/cognee-mcp/pyproject.toml
@@ -8,6 +8,7 @@ requires-python = ">=3.10"
 dependencies = [
     # For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes.
     #"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/igorilic/Desktop/cognee",
+    # TODO: Remove gemini from optional dependecnies for new Cognee version after 0.3.4
     "cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.3.4",
     "fastmcp>=2.10.0,<3.0.0",
     "mcp>=1.12.0,<2.0.0",
diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py
index eeb867984..1880735c2 100644
--- a/cognee/api/v1/add/add.py
+++ b/cognee/api/v1/add/add.py
@@ -148,7 +148,9 @@ async def add(
 
     await setup()
 
-    user, authorized_dataset = await resolve_authorized_user_dataset(dataset_id, dataset_name, user)
+    user, authorized_dataset = await resolve_authorized_user_dataset(
+        dataset_name=dataset_name, dataset_id=dataset_id, user=user
+    )
 
     await reset_dataset_pipeline_run_status(
         authorized_dataset.id, user, pipeline_names=["add_pipeline", "cognify_pipeline"]
diff --git a/cognee/api/v1/ui/ui.py b/cognee/api/v1/ui/ui.py
index f4df0edde..108806ba2 100644
--- a/cognee/api/v1/ui/ui.py
+++ b/cognee/api/v1/ui/ui.py
@@ -627,7 +627,6 @@ def start_ui(
                 env=env,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
-                text=True,
                 shell=True,
             )
         else:
@@ -637,7 +636,6 @@ def start_ui(
                 env=env,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
-                text=True,
                 preexec_fn=os.setsid if hasattr(os, "setsid") else None,
             )
 
diff --git a/cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py
index afa180c67..c410d99dc 100644
--- a/cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py
@@ -34,3 +34,12 @@ class EmbeddingEngine(Protocol):
             - int: An integer representing the number of dimensions in the embedding vector.
         """
         raise NotImplementedError()
+
+    def get_batch_size(self) -> int:
+        """
+        Return the desired batch size for embedding calls
+
+        Returns:
+
+        """
+        raise NotImplementedError()
diff --git a/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py
index acb041e76..8f922d934 100644
--- a/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py
@@ -42,11 +42,13 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
         model: Optional[str] = "openai/text-embedding-3-large",
         dimensions: Optional[int] = 3072,
         max_completion_tokens: int = 512,
+        batch_size: int = 100,
     ):
         self.model = model
         self.dimensions = dimensions
         self.max_completion_tokens = max_completion_tokens
         self.tokenizer = self.get_tokenizer()
+        self.batch_size = batch_size
         # self.retry_count = 0
         self.embedding_model = TextEmbedding(model_name=model)
 
@@ -101,6 +103,15 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
         """
         return self.dimensions
 
+    def get_batch_size(self) -> int:
+        """
+        Return the desired batch size for embedding calls
+
+        Returns:
+
+        """
+        return self.batch_size
+
     def get_tokenizer(self):
         """
         Instantiate and return the tokenizer used for preparing text for embedding.
diff --git a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
index 0cb8286fc..d68941d25 100644
--- a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py
@@ -58,6 +58,7 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
         endpoint: str = None,
         api_version: str = None,
         max_completion_tokens: int = 512,
+        batch_size: int = 100,
     ):
         self.api_key = api_key
         self.endpoint = endpoint
@@ -68,6 +69,7 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
         self.max_completion_tokens = max_completion_tokens
         self.tokenizer = self.get_tokenizer()
         self.retry_count = 0
+        self.batch_size = batch_size
 
         enable_mocking = os.getenv("MOCK_EMBEDDING", "false")
         if isinstance(enable_mocking, bool):
@@ -165,6 +167,15 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
         """
         return self.dimensions
 
+    def get_batch_size(self) -> int:
+        """
+        Return the desired batch size for embedding calls
+
+        Returns:
+
+        """
+        return self.batch_size
+
     def get_tokenizer(self):
         """
         Load and return the appropriate tokenizer for the specified model based on the provider.
@@ -183,9 +194,15 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
                 model=model, max_completion_tokens=self.max_completion_tokens
             )
         elif "gemini" in self.provider.lower():
-            tokenizer = GeminiTokenizer(
-                model=model, max_completion_tokens=self.max_completion_tokens
+            # Since Gemini tokenization needs to send an API request to get the token count we will use TikToken to
+            # count tokens as we calculate tokens word by word
+            tokenizer = TikTokenTokenizer(
+                model=None, max_completion_tokens=self.max_completion_tokens
             )
+            # Note: Gemini Tokenizer expects an LLM model as input and not the embedding model
+            # tokenizer = GeminiTokenizer(
+            #     llm_model=llm_model, max_completion_tokens=self.max_completion_tokens
+            # )
         elif "mistral" in self.provider.lower():
             tokenizer = MistralTokenizer(
                 model=model, max_completion_tokens=self.max_completion_tokens
diff --git a/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py
index e6e590597..e79ba3f6a 100644
--- a/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py
@@ -54,12 +54,14 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
         max_completion_tokens: int = 512,
         endpoint: Optional[str] = "http://localhost:11434/api/embeddings",
         huggingface_tokenizer: str = "Salesforce/SFR-Embedding-Mistral",
+        batch_size: int = 100,
     ):
         self.model = model
         self.dimensions = dimensions
         self.max_completion_tokens = max_completion_tokens
         self.endpoint = endpoint
         self.huggingface_tokenizer_name = huggingface_tokenizer
+        self.batch_size = batch_size
         self.tokenizer = self.get_tokenizer()
 
         enable_mocking = os.getenv("MOCK_EMBEDDING", "false")
@@ -122,6 +124,15 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
         """
         return self.dimensions
 
+    def get_batch_size(self) -> int:
+        """
+        Return the desired batch size for embedding calls
+
+        Returns:
+
+        """
+        return self.batch_size
+
     def get_tokenizer(self):
         """
         Load and return a HuggingFace tokenizer for the embedding engine.
diff --git a/cognee/infrastructure/databases/vector/embeddings/config.py b/cognee/infrastructure/databases/vector/embeddings/config.py
index 04a1f18f2..24f724151 100644
--- a/cognee/infrastructure/databases/vector/embeddings/config.py
+++ b/cognee/infrastructure/databases/vector/embeddings/config.py
@@ -19,9 +19,17 @@ class EmbeddingConfig(BaseSettings):
     embedding_api_key: Optional[str] = None
     embedding_api_version: Optional[str] = None
     embedding_max_completion_tokens: Optional[int] = 8191
+    embedding_batch_size: Optional[int] = None
     huggingface_tokenizer: Optional[str] = None
     model_config = SettingsConfigDict(env_file=".env", extra="allow")
 
+    def model_post_init(self, __context) -> None:
+        # If embedding batch size is not defined use 2048 as default for OpenAI and 100 for all other embedding models
+        if not self.embedding_batch_size and self.embedding_provider.lower() == "openai":
+            self.embedding_batch_size = 2048
+        elif not self.embedding_batch_size:
+            self.embedding_batch_size = 100
+
     def to_dict(self) -> dict:
         """
         Serialize all embedding configuration settings to a dictionary.
diff --git a/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py b/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py
index e7fcf4e94..de56c8971 100644
--- a/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py
@@ -31,6 +31,7 @@ def get_embedding_engine() -> EmbeddingEngine:
         config.embedding_endpoint,
         config.embedding_api_key,
         config.embedding_api_version,
+        config.embedding_batch_size,
         config.huggingface_tokenizer,
         llm_config.llm_api_key,
         llm_config.llm_provider,
@@ -46,6 +47,7 @@ def create_embedding_engine(
     embedding_endpoint,
     embedding_api_key,
     embedding_api_version,
+    embedding_batch_size,
     huggingface_tokenizer,
     llm_api_key,
     llm_provider,
@@ -84,6 +86,7 @@ def create_embedding_engine(
             model=embedding_model,
             dimensions=embedding_dimensions,
             max_completion_tokens=embedding_max_completion_tokens,
+            batch_size=embedding_batch_size,
         )
 
     if embedding_provider == "ollama":
@@ -95,6 +98,7 @@ def create_embedding_engine(
             max_completion_tokens=embedding_max_completion_tokens,
             endpoint=embedding_endpoint,
             huggingface_tokenizer=huggingface_tokenizer,
+            batch_size=embedding_batch_size,
         )
 
     from .LiteLLMEmbeddingEngine import LiteLLMEmbeddingEngine
@@ -108,4 +112,5 @@ def create_embedding_engine(
         model=embedding_model,
         dimensions=embedding_dimensions,
         max_completion_tokens=embedding_max_completion_tokens,
+        batch_size=embedding_batch_size,
     )
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
index 2c7b73eef..510d29ce8 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
@@ -1,115 +1,155 @@
-import litellm
-from pydantic import BaseModel
-from typing import Type, Optional
-from litellm import acompletion, JSONSchemaValidationError
+"""Adapter for Generic API LLM provider API"""
 
-from cognee.shared.logging_utils import get_logger
-from cognee.modules.observability.get_observe import get_observe
-from cognee.infrastructure.llm.exceptions import MissingSystemPromptPathError
+import litellm
+import instructor
+from typing import Type
+from pydantic import BaseModel
+from openai import ContentFilterFinishReasonError
+from litellm.exceptions import ContentPolicyViolationError
+from instructor.core import InstructorRetryException
+
+from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
     LLMInterface,
 )
-from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
     rate_limit_async,
     sleep_and_retry_async,
 )
 
-logger = get_logger()
-observe = get_observe()
-
 
 class GeminiAdapter(LLMInterface):
     """
-    Handles interactions with a language model API.
+    Adapter for Gemini API LLM provider.
 
-    Public methods include:
-    - acreate_structured_output
-    - show_prompt
+    This class initializes the API adapter with necessary credentials and configurations for
+    interacting with the gemini LLM models. It provides methods for creating structured outputs
+    based on user input and system prompts.
+
+    Public methods:
+    - acreate_structured_output(text_input: str, system_prompt: str, response_model:
+    Type[BaseModel]) -> BaseModel
     """
 
-    MAX_RETRIES = 5
+    name: str
+    model: str
+    api_key: str
 
     def __init__(
         self,
+        endpoint,
         api_key: str,
         model: str,
+        api_version: str,
         max_completion_tokens: int,
-        endpoint: Optional[str] = None,
-        api_version: Optional[str] = None,
-        streaming: bool = False,
-    ) -> None:
-        self.api_key = api_key
+        fallback_model: str = None,
+        fallback_api_key: str = None,
+        fallback_endpoint: str = None,
+    ):
         self.model = model
+        self.api_key = api_key
         self.endpoint = endpoint
         self.api_version = api_version
-        self.streaming = streaming
         self.max_completion_tokens = max_completion_tokens
 
-    @observe(as_type="generation")
+        self.fallback_model = fallback_model
+        self.fallback_api_key = fallback_api_key
+        self.fallback_endpoint = fallback_endpoint
+
+        self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)
+
     @sleep_and_retry_async()
     @rate_limit_async
     async def acreate_structured_output(
         self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
         """
-        Generate structured output from the language model based on the provided input and
-        system prompt.
+        Generate a response from a user query.
 
-        This method handles retries and raises a ValueError if the request fails or the response
-        does not conform to the expected schema, logging errors accordingly.
+        This asynchronous method sends a user query and a system prompt to a language model and
+        retrieves the generated response. It handles API communication and retries up to a
+        specified limit in case of request failures.
 
         Parameters:
         -----------
 
-            - text_input (str): The user input text to generate a response for.
-            - system_prompt (str): The system's prompt or context to influence the language
-              model's generation.
-            - response_model (Type[BaseModel]): A model type indicating the expected format of
-              the response.
+            - text_input (str): The input text from the user to generate a response for.
+            - system_prompt (str): A prompt that provides context or instructions for the
+              response generation.
+            - response_model (Type[BaseModel]): A Pydantic model that defines the structure of
+              the expected response.
 
         Returns:
         --------
 
-            - BaseModel: Returns the generated response as an instance of the specified response
-              model.
+            - BaseModel: An instance of the specified response model containing the structured
+              output from the language model.
         """
+
         try:
-            if response_model is str:
-                response_schema = {"type": "string"}
-            else:
-                response_schema = response_model
+            return await self.aclient.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": f"""{text_input}""",
+                    },
+                    {
+                        "role": "system",
+                        "content": system_prompt,
+                    },
+                ],
+                api_key=self.api_key,
+                max_retries=5,
+                api_base=self.endpoint,
+                api_version=self.api_version,
+                response_model=response_model,
+            )
+        except (
+            ContentFilterFinishReasonError,
+            ContentPolicyViolationError,
+            InstructorRetryException,
+        ) as error:
+            if (
+                isinstance(error, InstructorRetryException)
+                and "content management policy" not in str(error).lower()
+            ):
+                raise error
 
-            messages = [
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": text_input},
-            ]
-
-            try:
-                response = await acompletion(
-                    model=f"{self.model}",
-                    messages=messages,
-                    api_key=self.api_key,
-                    max_completion_tokens=self.max_completion_tokens,
-                    temperature=0.1,
-                    response_format=response_schema,
-                    timeout=100,
-                    num_retries=self.MAX_RETRIES,
+            if not (self.fallback_model and self.fallback_api_key and self.fallback_endpoint):
+                raise ContentPolicyFilterError(
+                    f"The provided input contains content that is not aligned with our content policy: {text_input}"
                 )
 
-                if response.choices and response.choices[0].message.content:
-                    content = response.choices[0].message.content
-                    if response_model is str:
-                        return content
-                    return response_model.model_validate_json(content)
-
-            except litellm.exceptions.BadRequestError as e:
-                logger.error(f"Bad request error: {str(e)}")
-                raise ValueError(f"Invalid request: {str(e)}")
-
-            raise ValueError("Failed to get valid response after retries")
-
-        except JSONSchemaValidationError as e:
-            logger.error(f"Schema validation failed: {str(e)}")
-            logger.debug(f"Raw response: {e.raw_response}")
-            raise ValueError(f"Response failed schema validation: {str(e)}")
+            try:
+                return await self.aclient.chat.completions.create(
+                    model=self.fallback_model,
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": f"""{text_input}""",
+                        },
+                        {
+                            "role": "system",
+                            "content": system_prompt,
+                        },
+                    ],
+                    max_retries=5,
+                    api_key=self.fallback_api_key,
+                    api_base=self.fallback_endpoint,
+                    response_model=response_model,
+                )
+            except (
+                ContentFilterFinishReasonError,
+                ContentPolicyViolationError,
+                InstructorRetryException,
+            ) as error:
+                if (
+                    isinstance(error, InstructorRetryException)
+                    and "content management policy" not in str(error).lower()
+                ):
+                    raise error
+                else:
+                    raise ContentPolicyFilterError(
+                        f"The provided input contains content that is not aligned with our content policy: {text_input}"
+                    )
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
index 86adac25a..8c15a5804 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
@@ -6,7 +6,7 @@ from typing import Type
 from pydantic import BaseModel
 from openai import ContentFilterFinishReasonError
 from litellm.exceptions import ContentPolicyViolationError
-from instructor.exceptions import InstructorRetryException
+from instructor.core import InstructorRetryException
 
 from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
@@ -56,9 +56,7 @@ class GenericAPIAdapter(LLMInterface):
         self.fallback_api_key = fallback_api_key
         self.fallback_endpoint = fallback_endpoint
 
-        self.aclient = instructor.from_litellm(
-            litellm.acompletion, mode=instructor.Mode.JSON, api_key=api_key
-        )
+        self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)
 
     @sleep_and_retry_async()
     @rate_limit_async
@@ -102,6 +100,7 @@ class GenericAPIAdapter(LLMInterface):
                     },
                 ],
                 max_retries=5,
+                api_key=self.api_key,
                 api_base=self.endpoint,
                 response_model=response_model,
             )
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
index 049b27e04..0ae621428 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
@@ -143,7 +143,6 @@ def get_llm_client(raise_api_key_error: bool = True):
             max_completion_tokens=max_completion_tokens,
             endpoint=llm_config.llm_endpoint,
             api_version=llm_config.llm_api_version,
-            streaming=llm_config.llm_streaming,
         )
 
     else:
diff --git a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
index 7273de976..989b240ac 100644
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
@@ -5,15 +5,13 @@ from typing import Type
 from pydantic import BaseModel
 from openai import ContentFilterFinishReasonError
 from litellm.exceptions import ContentPolicyViolationError
-from instructor.exceptions import InstructorRetryException
+from instructor.core import InstructorRetryException
 
-from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
     LLMInterface,
 )
 from cognee.infrastructure.llm.exceptions import (
     ContentPolicyFilterError,
-    MissingSystemPromptPathError,
 )
 from cognee.infrastructure.files.utils.open_data_file import open_data_file
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
diff --git a/cognee/infrastructure/llm/tokenizer/Gemini/adapter.py b/cognee/infrastructure/llm/tokenizer/Gemini/adapter.py
index a57cff3f7..d595fba2d 100644
--- a/cognee/infrastructure/llm/tokenizer/Gemini/adapter.py
+++ b/cognee/infrastructure/llm/tokenizer/Gemini/adapter.py
@@ -3,6 +3,7 @@ from typing import List, Any
 from ..tokenizer_interface import TokenizerInterface
 
 
+# NOTE: DEPRECATED as to count tokens you need to send an API request to Google it is too slow to use with Cognee
 class GeminiTokenizer(TokenizerInterface):
     """
     Implements a tokenizer interface for the Gemini model, managing token extraction and
@@ -16,10 +17,10 @@ class GeminiTokenizer(TokenizerInterface):
 
     def __init__(
         self,
-        model: str,
+        llm_model: str,
         max_completion_tokens: int = 3072,
     ):
-        self.model = model
+        self.llm_model = llm_model
         self.max_completion_tokens = max_completion_tokens
 
         # Get LLM API key from config
@@ -28,12 +29,11 @@ class GeminiTokenizer(TokenizerInterface):
             get_llm_config,
         )
 
-        config = get_embedding_config()
         llm_config = get_llm_config()
 
-        import google.generativeai as genai
+        from google import genai
 
-        genai.configure(api_key=config.embedding_api_key or llm_config.llm_api_key)
+        self.client = genai.Client(api_key=llm_config.llm_api_key)
 
     def extract_tokens(self, text: str) -> List[Any]:
         """
@@ -77,6 +77,7 @@ class GeminiTokenizer(TokenizerInterface):
 
             - int: The number of tokens in the given text.
         """
-        import google.generativeai as genai
 
-        return len(genai.embed_content(model=f"models/{self.model}", content=text))
+        tokens_response = self.client.models.count_tokens(model=self.llm_model, contents=text)
+
+        return tokens_response.total_tokens
diff --git a/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py b/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py
index 7e3d1c124..981c02112 100644
--- a/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py
+++ b/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py
@@ -1,47 +1,31 @@
 from uuid import UUID
+from typing import Optional
 
-from cognee.api.v1.exceptions import DatasetNotFoundError
 from cognee.modules.users.models import User
-from cognee.modules.users.methods import get_default_user
-from cognee.modules.data.methods import (
-    create_authorized_dataset,
-    get_authorized_dataset,
-    get_authorized_dataset_by_name,
+from cognee.modules.pipelines.layers.resolve_authorized_user_datasets import (
+    resolve_authorized_user_datasets,
 )
 
 
-async def resolve_authorized_user_dataset(dataset_id: UUID, dataset_name: str, user: User):
+async def resolve_authorized_user_dataset(
+    dataset_name: str, dataset_id: Optional[UUID] = None, user: Optional[User] = None
+):
     """
     Function handles creation and dataset authorization if dataset already exist for Cognee.
     Verifies that provided user has necessary permission for provided Dataset.
     If Dataset does not exist creates the Dataset and gives permission for the user creating the dataset.
 
     Args:
-        dataset_id: Id of the dataset.
         dataset_name: Name of the dataset.
+        dataset_id: Id of the dataset.
         user: Cognee User request is being processed for, if None default user will be used.
 
     Returns:
         Tuple[User, Dataset]: A tuple containing the user and the authorized dataset.
     """
-    if not user:
-        user = await get_default_user()
 
-    if dataset_id:
-        authorized_dataset = await get_authorized_dataset(user, dataset_id, "write")
-    elif dataset_name:
-        authorized_dataset = await get_authorized_dataset_by_name(dataset_name, user, "write")
+    user, authorized_datasets = await resolve_authorized_user_datasets(
+        datasets=dataset_id if dataset_id else dataset_name, user=user
+    )
 
-        if not authorized_dataset:
-            authorized_dataset = await create_authorized_dataset(
-                dataset_name=dataset_name, user=user
-            )
-    else:
-        raise ValueError("Either dataset_id or dataset_name must be provided.")
-
-    if not authorized_dataset:
-        raise DatasetNotFoundError(
-            message=f"Dataset ({str(dataset_id) or dataset_name}) not found."
-        )
-
-    return user, authorized_dataset
+    return user, authorized_datasets[0]
diff --git a/cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py b/cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py
index f91064995..cda39d7bb 100644
--- a/cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py
+++ b/cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py
@@ -1,5 +1,5 @@
 from uuid import UUID
-from typing import Union, Tuple, List
+from typing import Union, Tuple, List, Optional
 
 from cognee.modules.users.methods import get_default_user
 from cognee.modules.users.models import User
@@ -13,7 +13,7 @@ from cognee.modules.data.methods import (
 
 
 async def resolve_authorized_user_datasets(
-    datasets: Union[str, UUID, list[str], list[UUID]], user: User = None
+    datasets: Union[str, UUID, list[str], list[UUID]], user: Optional[User] = None
 ) -> Tuple[User, List[Dataset]]:
     """
     Function handles creation and dataset authorization if datasets already exist for Cognee.
diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py
index 8bcef815f..958af6444 100644
--- a/cognee/modules/search/methods/search.py
+++ b/cognee/modules/search/methods/search.py
@@ -19,7 +19,9 @@ from cognee.modules.search.types import (
 from cognee.modules.search.operations import log_query, log_result
 from cognee.modules.users.models import User
 from cognee.modules.data.models import Dataset
-from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
+from cognee.modules.data.methods.get_authorized_existing_datasets import (
+    get_authorized_existing_datasets,
+)
 
 from .get_search_type_tools import get_search_type_tools
 from .no_access_control_search import no_access_control_search
@@ -202,7 +204,9 @@ async def authorized_search(
     Not to be used outside of active access control mode.
     """
     # Find datasets user has read access for (if datasets are provided only return them. Provided user has read access)
-    search_datasets = await get_specific_user_permission_datasets(user.id, "read", dataset_ids)
+    search_datasets = await get_authorized_existing_datasets(
+        datasets=dataset_ids, permission_type="read", user=user
+    )
 
     if use_combined_context:
         search_responses = await search_in_datasets_context(
diff --git a/cognee/tasks/storage/index_data_points.py b/cognee/tasks/storage/index_data_points.py
index 9c363c04c..362412657 100644
--- a/cognee/tasks/storage/index_data_points.py
+++ b/cognee/tasks/storage/index_data_points.py
@@ -39,7 +39,7 @@ async def index_data_points(data_points: list[DataPoint]):
         field_name = index_name_and_field[first_occurence + 1 :]
         try:
             # In case the amount of indexable points is too large we need to send them in batches
-            batch_size = 100
+            batch_size = vector_engine.embedding_engine.get_batch_size()
             for i in range(0, len(indexable_points), batch_size):
                 batch = indexable_points[i : i + batch_size]
                 await vector_engine.index_data_points(index_name, field_name, batch)
diff --git a/cognee/tasks/storage/index_graph_edges.py b/cognee/tasks/storage/index_graph_edges.py
index 2233ab99f..f4e32f8c8 100644
--- a/cognee/tasks/storage/index_graph_edges.py
+++ b/cognee/tasks/storage/index_graph_edges.py
@@ -9,7 +9,7 @@ from cognee.modules.graph.models.EdgeType import EdgeType
 logger = get_logger(level=ERROR)
 
 
-async def index_graph_edges(batch_size: int = 1024):
+async def index_graph_edges():
     """
     Indexes graph edges by creating and managing vector indexes for relationship types.
 
@@ -72,6 +72,8 @@ async def index_graph_edges(batch_size: int = 1024):
     for index_name, indexable_points in index_points.items():
         index_name, field_name = index_name.split(".")
 
+        # Get maximum batch size for embedding model
+        batch_size = vector_engine.embedding_engine.get_batch_size()
         # We save the data in batches of {batch_size} to not put a lot of pressure on the database
         for start in range(0, len(indexable_points), batch_size):
             batch = indexable_points[start : start + batch_size]
diff --git a/cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py b/cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py
index 9cd96f5b9..48bbc53e3 100644
--- a/cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py
+++ b/cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py
@@ -1,5 +1,5 @@
 import pytest
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock, patch, MagicMock
 from cognee.tasks.storage.index_graph_edges import index_graph_edges
 
 
@@ -16,6 +16,7 @@ async def test_index_graph_edges_success():
         ],
     )
     mock_vector_engine = AsyncMock()
+    mock_vector_engine.embedding_engine.get_batch_size = MagicMock(return_value=100)
 
     # Patch the globals of the function so that when it does:
     #   vector_engine = get_vector_engine()
diff --git a/poetry.lock b/poetry.lock
index f873ca5a0..85dc8c879 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
 
 [[package]]
 name = "accelerate"
@@ -878,7 +878,7 @@ description = "Extensible memoizing collections and decorators"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
+markers = "extra == \"deepeval\" or extra == \"chromadb\" or extra == \"docs\""
 files = [
     {file = "cachetools-6.2.0-py3-none-any.whl", hash = "sha256:1c76a8960c0041fcc21097e357f882197c79da0dbff766e7317890a65d7d8ba6"},
     {file = "cachetools-6.2.0.tar.gz", hash = "sha256:38b328c0889450f05f5e120f56ab68c8abaf424e1275522b138ffc93253f7e32"},
@@ -2749,28 +2749,6 @@ files = [
     {file = "giturlparse-0.12.0.tar.gz", hash = "sha256:c0fff7c21acc435491b1779566e038757a205c1ffdcb47e4f81ea52ad8c3859a"},
 ]
 
-[[package]]
-name = "google-ai-generativelanguage"
-version = "0.6.15"
-description = "Google Ai Generativelanguage API client library"
-optional = true
-python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"gemini\""
-files = [
-    {file = "google_ai_generativelanguage-0.6.15-py3-none-any.whl", hash = "sha256:5a03ef86377aa184ffef3662ca28f19eeee158733e45d7947982eb953c6ebb6c"},
-    {file = "google_ai_generativelanguage-0.6.15.tar.gz", hash = "sha256:8f6d9dc4c12b065fe2d0289026171acea5183ebf2d0b11cefe12f3821e159ec3"},
-]
-
-[package.dependencies]
-google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]}
-google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev"
-proto-plus = [
-    {version = ">=1.22.3,<2.0.0dev"},
-    {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
-]
-protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev"
-
 [[package]]
 name = "google-api-core"
 version = "2.25.1"
@@ -2778,7 +2756,7 @@ description = "Google API client core library"
 optional = true
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\""
+markers = "extra == \"docs\""
 files = [
     {file = "google_api_core-2.25.1-py3-none-any.whl", hash = "sha256:8a2a56c1fef82987a524371f99f3bd0143702fecc670c72e600c1cda6bf8dbb7"},
     {file = "google_api_core-2.25.1.tar.gz", hash = "sha256:d2aaa0b13c78c61cb3f4282c464c046e45fbd75755683c9c525e6e8f7ed0a5e8"},
@@ -2808,26 +2786,6 @@ grpc = ["grpcio (>=1.33.2,<2.0.0)", "grpcio (>=1.49.1,<2.0.0) ; python_version >
 grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.0)"]
 grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.0)"]
 
-[[package]]
-name = "google-api-python-client"
-version = "2.183.0"
-description = "Google API Client Library for Python"
-optional = true
-python-versions = ">=3.7"
-groups = ["main"]
-markers = "extra == \"gemini\""
-files = [
-    {file = "google_api_python_client-2.183.0-py3-none-any.whl", hash = "sha256:2005b6e86c27be1db1a43f43e047a0f8e004159f3cceddecb08cf1624bddba31"},
-    {file = "google_api_python_client-2.183.0.tar.gz", hash = "sha256:abae37e04fecf719388e5c02f707ed9cdf952f10b217c79a3e76c636762e3ea9"},
-]
-
-[package.dependencies]
-google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0"
-google-auth = ">=1.32.0,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0"
-google-auth-httplib2 = ">=0.2.0,<1.0.0"
-httplib2 = ">=0.19.0,<1.0.0"
-uritemplate = ">=3.0.1,<5"
-
 [[package]]
 name = "google-auth"
 version = "2.41.0"
@@ -2835,7 +2793,7 @@ description = "Google Authentication Library"
 optional = true
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
+markers = "extra == \"deepeval\" or extra == \"chromadb\" or extra == \"docs\""
 files = [
     {file = "google_auth-2.41.0-py2.py3-none-any.whl", hash = "sha256:d8bed9b53ab63b7b0374656b8e1bef051f95bb14ecc0cf21ba49de7911d62e09"},
     {file = "google_auth-2.41.0.tar.gz", hash = "sha256:c9d7b534ea4a5d9813c552846797fafb080312263cd4994d6622dd50992ae101"},
@@ -2856,23 +2814,6 @@ requests = ["requests (>=2.20.0,<3.0.0)"]
 testing = ["aiohttp (<3.10.0)", "aiohttp (>=3.6.2,<4.0.0)", "aioresponses", "cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "cryptography (>=38.0.3)", "flask", "freezegun", "grpcio", "mock", "oauth2client", "packaging", "pyjwt (>=2.0)", "pyopenssl (<24.3.0)", "pyopenssl (>=20.0.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-localserver", "pyu2f (>=0.1.5)", "requests (>=2.20.0,<3.0.0)", "responses", "urllib3"]
 urllib3 = ["packaging", "urllib3"]
 
-[[package]]
-name = "google-auth-httplib2"
-version = "0.2.0"
-description = "Google Authentication Library: httplib2 transport"
-optional = true
-python-versions = "*"
-groups = ["main"]
-markers = "extra == \"gemini\""
-files = [
-    {file = "google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05"},
-    {file = "google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d"},
-]
-
-[package.dependencies]
-google-auth = "*"
-httplib2 = ">=0.19.0"
-
 [[package]]
 name = "google-cloud-vision"
 version = "3.10.2"
@@ -2922,31 +2863,6 @@ websockets = ">=13.0.0,<15.1.0"
 aiohttp = ["aiohttp (<4.0.0)"]
 local-tokenizer = ["protobuf", "sentencepiece (>=0.2.0)"]
 
-[[package]]
-name = "google-generativeai"
-version = "0.8.5"
-description = "Google Generative AI High level API client library and tools."
-optional = true
-python-versions = ">=3.9"
-groups = ["main"]
-markers = "extra == \"gemini\""
-files = [
-    {file = "google_generativeai-0.8.5-py3-none-any.whl", hash = "sha256:22b420817fb263f8ed520b33285f45976d5b21e904da32b80d4fd20c055123a2"},
-]
-
-[package.dependencies]
-google-ai-generativelanguage = "0.6.15"
-google-api-core = "*"
-google-api-python-client = "*"
-google-auth = ">=2.15.0"
-protobuf = "*"
-pydantic = "*"
-tqdm = "*"
-typing-extensions = "*"
-
-[package.extras]
-dev = ["Pillow", "absl-py", "black", "ipython", "nose2", "pandas", "pytype", "pyyaml"]
-
 [[package]]
 name = "googleapis-common-protos"
 version = "1.70.0"
@@ -2954,7 +2870,7 @@ description = "Common protobufs used in Google APIs"
 optional = true
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
+markers = "extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
 files = [
     {file = "googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8"},
     {file = "googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257"},
@@ -3104,7 +3020,7 @@ description = "HTTP/2-based RPC framework"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
+markers = "extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
 files = [
     {file = "grpcio-1.75.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:1712b5890b22547dd29f3215c5788d8fc759ce6dd0b85a6ba6e2731f2d04c088"},
     {file = "grpcio-1.75.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:8d04e101bba4b55cea9954e4aa71c24153ba6182481b487ff376da28d4ba46cf"},
@@ -3182,7 +3098,7 @@ description = "Status proto mapping for gRPC"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\""
+markers = "extra == \"docs\""
 files = [
     {file = "grpcio_status-1.71.2-py3-none-any.whl", hash = "sha256:803c98cb6a8b7dc6dbb785b1111aed739f241ab5e9da0bba96888aa74704cfd3"},
     {file = "grpcio_status-1.71.2.tar.gz", hash = "sha256:c7a97e176df71cdc2c179cd1847d7fc86cca5832ad12e9798d7fed6b7a1aab50"},
@@ -3374,22 +3290,6 @@ http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
 trio = ["trio (>=0.22.0,<1.0)"]
 
-[[package]]
-name = "httplib2"
-version = "0.31.0"
-description = "A comprehensive HTTP client library."
-optional = true
-python-versions = ">=3.6"
-groups = ["main"]
-markers = "extra == \"gemini\""
-files = [
-    {file = "httplib2-0.31.0-py3-none-any.whl", hash = "sha256:b9cd78abea9b4e43a7714c6e0f8b6b8561a6fc1e95d5dbd367f5bf0ef35f5d24"},
-    {file = "httplib2-0.31.0.tar.gz", hash = "sha256:ac7ab497c50975147d4f7b1ade44becc7df2f8954d42b38b3d69c515f531135c"},
-]
-
-[package.dependencies]
-pyparsing = ">=3.0.4,<4"
-
 [[package]]
 name = "httptools"
 version = "0.6.4"
@@ -4071,8 +3971,6 @@ groups = ["main"]
 markers = "extra == \"dlt\""
 files = [
     {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
-    {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
-    {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
 ]
 
 [package.dependencies]
@@ -8184,7 +8082,7 @@ description = "Beautiful, Pythonic protocol buffers"
 optional = true
 python-versions = ">=3.7"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\""
+markers = "extra == \"docs\""
 files = [
     {file = "proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66"},
     {file = "proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012"},
@@ -8256,7 +8154,6 @@ files = [
     {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"},
     {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"},
     {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"},
-    {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"},
     {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"},
     {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"},
     {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"},
@@ -8318,7 +8215,6 @@ files = [
     {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
     {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
     {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
-    {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
     {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
     {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
     {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
@@ -8528,7 +8424,7 @@ description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
+markers = "extra == \"deepeval\" or extra == \"chromadb\" or extra == \"docs\""
 files = [
     {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"},
     {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"},
@@ -8541,7 +8437,7 @@ description = "A collection of ASN.1-based protocols modules"
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
+markers = "extra == \"deepeval\" or extra == \"chromadb\" or extra == \"docs\""
 files = [
     {file = "pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a"},
     {file = "pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6"},
@@ -9430,13 +9326,6 @@ optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "PyYAML-6.0.3-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f"},
-    {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4"},
-    {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3"},
-    {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6"},
-    {file = "PyYAML-6.0.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369"},
-    {file = "PyYAML-6.0.3-cp38-cp38-win32.whl", hash = "sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295"},
-    {file = "PyYAML-6.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b"},
     {file = "pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b"},
     {file = "pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956"},
     {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8"},
@@ -10249,7 +10138,7 @@ description = "Pure-Python RSA implementation"
 optional = true
 python-versions = "<4,>=3.6"
 groups = ["main"]
-markers = "extra == \"gemini\" or extra == \"docs\" or extra == \"deepeval\" or extra == \"chromadb\""
+markers = "extra == \"deepeval\" or extra == \"chromadb\" or extra == \"docs\""
 files = [
     {file = "rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762"},
     {file = "rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75"},
@@ -12003,19 +11892,6 @@ files = [
 [package.extras]
 dev = ["flake8", "flake8-annotations", "flake8-bandit", "flake8-bugbear", "flake8-commas", "flake8-comprehensions", "flake8-continuation", "flake8-datetimez", "flake8-docstrings", "flake8-import-order", "flake8-literal", "flake8-modern-annotations", "flake8-noqa", "flake8-pyproject", "flake8-requirements", "flake8-typechecking-import", "flake8-use-fstring", "mypy", "pep8-naming", "types-PyYAML"]
 
-[[package]]
-name = "uritemplate"
-version = "4.2.0"
-description = "Implementation of RFC 6570 URI Templates"
-optional = true
-python-versions = ">=3.9"
-groups = ["main"]
-markers = "extra == \"gemini\""
-files = [
-    {file = "uritemplate-4.2.0-py3-none-any.whl", hash = "sha256:962201ba1c4edcab02e60f9a0d3821e82dfc5d2d6662a21abd533879bdb8a686"},
-    {file = "uritemplate-4.2.0.tar.gz", hash = "sha256:480c2ed180878955863323eea31b0ede668795de182617fef9c6ca09e6ec9d0e"},
-]
-
 [[package]]
 name = "urllib3"
 version = "2.3.0"
@@ -12882,7 +12758,6 @@ dlt = ["dlt"]
 docs = ["unstructured"]
 evals = ["gdown", "matplotlib", "pandas", "plotly", "scikit-learn"]
 falkordb = ["falkordb"]
-gemini = ["google-generativeai"]
 graphiti = ["graphiti-core"]
 groq = ["groq"]
 huggingface = ["transformers"]
@@ -12901,4 +12776,4 @@ posthog = ["posthog"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<=3.13"
-content-hash = "8c5ebda99705d8bcaf9d14e244a556d2627106d58d0fdce25318b4c1a647197a"
+content-hash = "c76267fe685339b5b5665342c81850a3e891cadaf760178bf3b04058f35b1014"
diff --git a/pyproject.toml b/pyproject.toml
index e2c523a2d..572e9287d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -82,7 +82,6 @@ langchain = [
     "langchain_text_splitters>=0.3.2,<1.0.0",
 ]
 llama-index = ["llama-index-core>=0.12.11,<0.13"]
-gemini = ["google-generativeai>=0.8.4,<0.9"]
 huggingface = ["transformers>=4.46.3,<5"]
 ollama = ["transformers>=4.46.3,<5"]
 mistral = ["mistral-common>=1.5.2,<2"]
diff --git a/uv.lock b/uv.lock
index 4e46e1d56..a36416ce5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -962,9 +962,6 @@ evals = [
 falkordb = [
     { name = "falkordb" },
 ]
-gemini = [
-    { name = "google-generativeai" },
-]
 graphiti = [
     { name = "graphiti-core" },
 ]
@@ -1038,7 +1035,6 @@ requires-dist = [
     { name = "filetype", specifier = ">=1.2.0,<2.0.0" },
     { name = "gdown", marker = "extra == 'evals'", specifier = ">=5.2.0,<6" },
     { name = "gitpython", marker = "extra == 'dev'", specifier = ">=3.1.43,<4" },
-    { name = "google-generativeai", marker = "extra == 'gemini'", specifier = ">=0.8.4,<0.9" },
     { name = "graphiti-core", marker = "extra == 'graphiti'", specifier = ">=0.7.0,<0.8" },
     { name = "groq", marker = "extra == 'groq'", specifier = ">=0.8.0,<1.0.0" },
     { name = "gunicorn", specifier = ">=20.1.0,<24" },
@@ -1108,7 +1104,7 @@ requires-dist = [
     { name = "uvicorn", specifier = ">=0.34.0,<1.0.0" },
     { name = "websockets", specifier = ">=15.0.1,<16.0.0" },
 ]
-provides-extras = ["api", "distributed", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "gemini", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "falkordb", "groq", "chromadb", "docs", "codegraph", "evals", "graphiti", "aws", "dlt", "baml", "dev", "debug", "monitoring"]
+provides-extras = ["api", "distributed", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "falkordb", "groq", "chromadb", "docs", "codegraph", "evals", "graphiti", "aws", "dlt", "baml", "dev", "debug", "monitoring"]
 
 [[package]]
 name = "colorama"
@@ -2176,21 +2172,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/dd/94/c6ff3388b8e3225a014e55aed957188639aa0966443e0408d38f0c9614a7/giturlparse-0.12.0-py2.py3-none-any.whl", hash = "sha256:412b74f2855f1da2fefa89fd8dde62df48476077a72fc19b62039554d27360eb", size = 15752, upload-time = "2023-09-24T07:22:35.465Z" },
 ]
 
-[[package]]
-name = "google-ai-generativelanguage"
-version = "0.6.15"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "google-api-core", extra = ["grpc"] },
-    { name = "google-auth" },
-    { name = "proto-plus" },
-    { name = "protobuf" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/11/d1/48fe5d7a43d278e9f6b5ada810b0a3530bbeac7ed7fcbcd366f932f05316/google_ai_generativelanguage-0.6.15.tar.gz", hash = "sha256:8f6d9dc4c12b065fe2d0289026171acea5183ebf2d0b11cefe12f3821e159ec3", size = 1375443, upload-time = "2025-01-13T21:50:47.459Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7c/a3/67b8a6ff5001a1d8864922f2d6488dc2a14367ceb651bc3f09a947f2f306/google_ai_generativelanguage-0.6.15-py3-none-any.whl", hash = "sha256:5a03ef86377aa184ffef3662ca28f19eeee158733e45d7947982eb953c6ebb6c", size = 1327356, upload-time = "2025-01-13T21:50:44.174Z" },
-]
-
 [[package]]
 name = "google-api-core"
 version = "2.25.1"
@@ -2213,22 +2194,6 @@ grpc = [
     { name = "grpcio-status" },
 ]
 
-[[package]]
-name = "google-api-python-client"
-version = "2.183.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "google-api-core" },
-    { name = "google-auth" },
-    { name = "google-auth-httplib2" },
-    { name = "httplib2" },
-    { name = "uritemplate" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/fa/1f/49a2c83fc6dcd8b127cc9efbecf7d5fc36109c2028ba22ed6cb4d072fca4/google_api_python_client-2.183.0.tar.gz", hash = "sha256:abae37e04fecf719388e5c02f707ed9cdf952f10b217c79a3e76c636762e3ea9", size = 13645623, upload-time = "2025-09-23T22:27:00.854Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ab/06/1974f937172854bc7622eff5c2390f33542ceb843f305922922c8f5f7f17/google_api_python_client-2.183.0-py3-none-any.whl", hash = "sha256:2005b6e86c27be1db1a43f43e047a0f8e004159f3cceddecb08cf1624bddba31", size = 14214837, upload-time = "2025-09-23T22:26:57.758Z" },
-]
-
 [[package]]
 name = "google-auth"
 version = "2.41.0"
@@ -2243,19 +2208,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/78/ff/a1c426fc9bea7268230bf92340da7d112fae18cf946cafe13ab17d14e6ee/google_auth-2.41.0-py2.py3-none-any.whl", hash = "sha256:d8bed9b53ab63b7b0374656b8e1bef051f95bb14ecc0cf21ba49de7911d62e09", size = 221168, upload-time = "2025-09-29T21:36:33.925Z" },
 ]
 
-[[package]]
-name = "google-auth-httplib2"
-version = "0.2.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "google-auth" },
-    { name = "httplib2" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/56/be/217a598a818567b28e859ff087f347475c807a5649296fb5a817c58dacef/google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05", size = 10842, upload-time = "2023-12-12T17:40:30.722Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/be/8a/fe34d2f3f9470a27b01c9e76226965863f153d5fbe276f83608562e49c04/google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d", size = 9253, upload-time = "2023-12-12T17:40:13.055Z" },
-]
-
 [[package]]
 name = "google-cloud-vision"
 version = "3.10.2"
@@ -2290,24 +2242,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cb/c3/12c1f386184d2fcd694b73adeabc3714a5ed65c01cc183b4e3727a26b9d1/google_genai-1.39.1-py3-none-any.whl", hash = "sha256:6ca36c7e40db6fcba7049dfdd102c86da326804f34403bd7d90fa613a45e5a78", size = 244681, upload-time = "2025-09-26T20:56:17.527Z" },
 ]
 
-[[package]]
-name = "google-generativeai"
-version = "0.8.5"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "google-ai-generativelanguage" },
-    { name = "google-api-core" },
-    { name = "google-api-python-client" },
-    { name = "google-auth" },
-    { name = "protobuf" },
-    { name = "pydantic" },
-    { name = "tqdm" },
-    { name = "typing-extensions" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/6e/40/c42ff9ded9f09ec9392879a8e6538a00b2dc185e834a3392917626255419/google_generativeai-0.8.5-py3-none-any.whl", hash = "sha256:22b420817fb263f8ed520b33285f45976d5b21e904da32b80d4fd20c055123a2", size = 155427, upload-time = "2025-04-17T00:40:00.67Z" },
-]
-
 [[package]]
 name = "googleapis-common-protos"
 version = "1.70.0"
@@ -2591,18 +2525,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
 ]
 
-[[package]]
-name = "httplib2"
-version = "0.31.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pyparsing" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/52/77/6653db69c1f7ecfe5e3f9726fdadc981794656fcd7d98c4209fecfea9993/httplib2-0.31.0.tar.gz", hash = "sha256:ac7ab497c50975147d4f7b1ade44becc7df2f8954d42b38b3d69c515f531135c", size = 250759, upload-time = "2025-09-11T12:16:03.403Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8c/a2/0d269db0f6163be503775dc8b6a6fa15820cc9fdc866f6ba608d86b721f2/httplib2-0.31.0-py3-none-any.whl", hash = "sha256:b9cd78abea9b4e43a7714c6e0f8b6b8561a6fc1e95d5dbd367f5bf0ef35f5d24", size = 91148, upload-time = "2025-09-11T12:16:01.803Z" },
-]
-
 [[package]]
 name = "httptools"
 version = "0.6.4"
@@ -4869,7 +4791,7 @@ name = "nvidia-cudnn-cu12"
 version = "9.10.2.21"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten'" },
+    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
@@ -4880,7 +4802,7 @@ name = "nvidia-cufft-cu12"
 version = "11.3.3.83"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
@@ -4907,9 +4829,9 @@ name = "nvidia-cusolver-cu12"
 version = "11.7.3.90"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten'" },
-    { name = "nvidia-cusparse-cu12", marker = "sys_platform != 'emscripten'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
+    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
+    { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
@@ -4920,7 +4842,7 @@ name = "nvidia-cusparse-cu12"
 version = "12.5.8.93"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
@@ -8280,7 +8202,7 @@ name = "triton"
 version = "3.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "setuptools", marker = "sys_platform != 'emscripten'" },
+    { name = "setuptools", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload-time = "2025-07-30T19:58:21.715Z" },
@@ -8565,15 +8487,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e7/00/3fca040d7cf8a32776d3d81a00c8ee7457e00f80c649f1e4a863c8321ae9/uri_template-1.3.0-py3-none-any.whl", hash = "sha256:a44a133ea12d44a0c0f06d7d42a52d71282e77e2f937d8abd5655b8d56fc1363", size = 11140, upload-time = "2023-06-21T01:49:03.467Z" },
 ]
 
-[[package]]
-name = "uritemplate"
-version = "4.2.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/98/60/f174043244c5306c9988380d2cb10009f91563fc4b31293d27e17201af56/uritemplate-4.2.0.tar.gz", hash = "sha256:480c2ed180878955863323eea31b0ede668795de182617fef9c6ca09e6ec9d0e", size = 33267, upload-time = "2025-06-02T15:12:06.318Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a9/99/3ae339466c9183ea5b8ae87b34c0b897eda475d2aec2307cae60e5cd4f29/uritemplate-4.2.0-py3-none-any.whl", hash = "sha256:962201ba1c4edcab02e60f9a0d3821e82dfc5d2d6662a21abd533879bdb8a686", size = 11488, upload-time = "2025-06-02T15:12:03.405Z" },
-]
-
 [[package]]
 name = "urllib3"
 version = "2.3.0"