feat: add instructor mode env variable and config parameter (#1789)

## Description  Added a variable to control which instructor mode we use. The defaults for each adapter are used, but a user can override this if the set the `LLM_INSTRUCTOR_MODE` env variable. ## Type of Change  - [ ] Bug fix (non-breaking change that fixes an issue) - [x] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable)  ## Pre-submission Checklist  - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
2025-11-22 14:18:40 -08:00 · 2025-11-22 14:18:40 -08:00 · bcf1d4890f
commit bcf1d4890f
parent 2176ec16b8 204f9c2e4a
9 changed files with 63 additions and 11 deletions
--- a/.env.template
+++ b/.env.template
@ -21,6 +21,10 @@ LLM_PROVIDER="openai"
 LLM_ENDPOINT=""
 LLM_API_VERSION=""
 LLM_MAX_TOKENS="16384"
+# Instructor's modes determine how structured data is requested from and extracted from LLM responses
+# You can change this type (i.e. mode) via this env variable
+# Each LLM has its own default value, e.g. gpt-5 models have "json_schema_mode"
+LLM_INSTRUCTOR_MODE=""

 EMBEDDING_PROVIDER="openai"
 EMBEDDING_MODEL="openai/text-embedding-3-large"
--- a/cognee/infrastructure/llm/config.py
+++ b/cognee/infrastructure/llm/config.py
@ -38,6 +38,7 @@ class LLMConfig(BaseSettings):
    """

    structured_output_framework: str = "instructor"
+    llm_instructor_mode: str = ""
    llm_provider: str = "openai"
    llm_model: str = "openai/gpt-5-mini"
    llm_endpoint: str = ""
@ -181,6 +182,7 @@ class LLMConfig(BaseSettings):
              instance.
        """
        return {
+            "llm_instructor_mode": self.llm_instructor_mode.lower(),
            "provider": self.llm_provider,
            "model": self.llm_model,
            "endpoint": self.llm_endpoint,
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
@ -28,13 +28,16 @@ class AnthropicAdapter(LLMInterface):

    name = "Anthropic"
    model: str
+    default_instructor_mode = "anthropic_tools"

-    def __init__(self, max_completion_tokens: int, model: str = None):
+    def __init__(self, max_completion_tokens: int, model: str = None, instructor_mode: str = None):
        import anthropic

+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
        self.aclient = instructor.patch(
            create=anthropic.AsyncAnthropic(api_key=get_llm_config().llm_api_key).messages.create,
-            mode=instructor.Mode.ANTHROPIC_TOOLS,
+            mode=instructor.Mode(self.instructor_mode),
        )

        self.model = model
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
@ -41,6 +41,7 @@ class GeminiAdapter(LLMInterface):
    name: str
    model: str
    api_key: str
+    default_instructor_mode = "json_mode"

    def __init__(
        self,
@ -49,6 +50,7 @@ class GeminiAdapter(LLMInterface):
        model: str,
        api_version: str,
        max_completion_tokens: int,
+        instructor_mode: str = None,
        fallback_model: str = None,
        fallback_api_key: str = None,
        fallback_endpoint: str = None,
@ -63,7 +65,11 @@ class GeminiAdapter(LLMInterface):
        self.fallback_api_key = fallback_api_key
        self.fallback_endpoint = fallback_endpoint

-        self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)
+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
+        self.aclient = instructor.from_litellm(
+            litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
+        )

    @retry(
        stop=stop_after_delay(128),
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py
@ -41,6 +41,7 @@ class GenericAPIAdapter(LLMInterface):
    name: str
    model: str
    api_key: str
+    default_instructor_mode = "json_mode"

    def __init__(
        self,
@ -49,6 +50,7 @@ class GenericAPIAdapter(LLMInterface):
        model: str,
        name: str,
        max_completion_tokens: int,
+        instructor_mode: str = None,
        fallback_model: str = None,
        fallback_api_key: str = None,
        fallback_endpoint: str = None,
@ -63,7 +65,11 @@ class GenericAPIAdapter(LLMInterface):
        self.fallback_api_key = fallback_api_key
        self.fallback_endpoint = fallback_endpoint

-        self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)
+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
+        self.aclient = instructor.from_litellm(
+            litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
+        )

    @retry(
        stop=stop_after_delay(128),
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
@ -81,6 +81,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            model=llm_config.llm_model,
            transcription_model=llm_config.transcription_model,
            max_completion_tokens=max_completion_tokens,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
            streaming=llm_config.llm_streaming,
            fallback_api_key=llm_config.fallback_api_key,
            fallback_endpoint=llm_config.fallback_endpoint,
@ -101,6 +102,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            llm_config.llm_model,
            "Ollama",
            max_completion_tokens=max_completion_tokens,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
        )

    elif provider == LLMProvider.ANTHROPIC:
@ -109,7 +111,9 @@ def get_llm_client(raise_api_key_error: bool = True):
        )

        return AnthropicAdapter(
-            max_completion_tokens=max_completion_tokens, model=llm_config.llm_model
+            max_completion_tokens=max_completion_tokens,
+            model=llm_config.llm_model,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
        )

    elif provider == LLMProvider.CUSTOM:
@ -126,6 +130,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            llm_config.llm_model,
            "Custom",
            max_completion_tokens=max_completion_tokens,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
            fallback_api_key=llm_config.fallback_api_key,
            fallback_endpoint=llm_config.fallback_endpoint,
            fallback_model=llm_config.fallback_model,
@ -145,6 +150,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            max_completion_tokens=max_completion_tokens,
            endpoint=llm_config.llm_endpoint,
            api_version=llm_config.llm_api_version,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
        )

    elif provider == LLMProvider.MISTRAL:
@ -160,6 +166,7 @@ def get_llm_client(raise_api_key_error: bool = True):
            model=llm_config.llm_model,
            max_completion_tokens=max_completion_tokens,
            endpoint=llm_config.llm_endpoint,
+            instructor_mode=llm_config.llm_instructor_mode.lower(),
        )

    else:
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
@ -37,16 +37,26 @@ class MistralAdapter(LLMInterface):
    model: str
    api_key: str
    max_completion_tokens: int
+    default_instructor_mode = "mistral_tools"

-    def __init__(self, api_key: str, model: str, max_completion_tokens: int, endpoint: str = None):
+    def __init__(
+        self,
+        api_key: str,
+        model: str,
+        max_completion_tokens: int,
+        endpoint: str = None,
+        instructor_mode: str = None,
+    ):
        from mistralai import Mistral

        self.model = model
        self.max_completion_tokens = max_completion_tokens

+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
        self.aclient = instructor.from_litellm(
            litellm.acompletion,
-            mode=instructor.Mode.MISTRAL_TOOLS,
+            mode=instructor.Mode(self.instructor_mode),
            api_key=get_llm_config().llm_api_key,
        )

--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py
@ -42,8 +42,16 @@ class OllamaAPIAdapter(LLMInterface):
    - aclient
    """

+    default_instructor_mode = "json_mode"
+
    def __init__(
-        self, endpoint: str, api_key: str, model: str, name: str, max_completion_tokens: int
+        self,
+        endpoint: str,
+        api_key: str,
+        model: str,
+        name: str,
+        max_completion_tokens: int,
+        instructor_mode: str = None,
    ):
        self.name = name
        self.model = model
@ -51,8 +59,11 @@ class OllamaAPIAdapter(LLMInterface):
        self.endpoint = endpoint
        self.max_completion_tokens = max_completion_tokens

+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+
        self.aclient = instructor.from_openai(
-            OpenAI(base_url=self.endpoint, api_key=self.api_key), mode=instructor.Mode.JSON
+            OpenAI(base_url=self.endpoint, api_key=self.api_key),
+            mode=instructor.Mode(self.instructor_mode),
        )

    @retry(
--- a/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
+++ b/cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py
@ -56,6 +56,7 @@ class OpenAIAdapter(LLMInterface):
    model: str
    api_key: str
    api_version: str
+    default_instructor_mode = "json_schema_mode"

    MAX_RETRIES = 5

@ -69,19 +70,21 @@ class OpenAIAdapter(LLMInterface):
        model: str,
        transcription_model: str,
        max_completion_tokens: int,
+        instructor_mode: str = None,
        streaming: bool = False,
        fallback_model: str = None,
        fallback_api_key: str = None,
        fallback_endpoint: str = None,
    ):
+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
        # TODO: With gpt5 series models OpenAI expects JSON_SCHEMA as a mode for structured outputs.
        #       Make sure all new gpt models will work with this mode as well.
        if "gpt-5" in model:
            self.aclient = instructor.from_litellm(
-                litellm.acompletion, mode=instructor.Mode.JSON_SCHEMA
+                litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
            )
            self.client = instructor.from_litellm(
-                litellm.completion, mode=instructor.Mode.JSON_SCHEMA
+                litellm.completion, mode=instructor.Mode(self.instructor_mode)
            )
        else:
            self.aclient = instructor.from_litellm(litellm.acompletion)