Merge pull request #1910 from danielaskdd/openai-option

feat: Add OpenAI LLM Options Support
2025-08-05 03:55:23 +08:00 · 2025-08-05 03:55:23 +08:00 · 51deee3d82
commit 51deee3d82
parent 3099748668 adf7ec8e35
6 changed files with 141 additions and 88 deletions
--- a/env.example
+++ b/env.example
@ -108,13 +108,14 @@ MAX_PARALLEL_INSERT=2
 ### Num of chunks send to Embedding in single request
 # EMBEDDING_BATCH_NUM=10

-#######################
+###########################################################
 ### LLM Configuration
-#######################
-### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
+### LLM_BINDING type: openai, ollama, lollms, azure_openai
+###########################################################
+### LLM temperature setting for all llm binding (openai, azure_openai, ollama)
 # TEMPERATURE=1.0
+### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature

-### LLM Binding type: openai, ollama, lollms, azure_openai
 LLM_BINDING=openai
 LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
@ -162,11 +163,18 @@ EMBEDDING_BINDING_HOST=http://localhost:11434
 # AZURE_EMBEDDING_API_KEY=your_api_key

 ### Jina AI Embedding
-EMBEDDING_BINDING=jina
-EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings
-EMBEDDING_MODEL=jina-embeddings-v4
-EMBEDDING_DIM=2048
-EMBEDDING_BINDING_API_KEY=your_api_key
+# EMBEDDING_BINDING=jina
+# EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings
+# EMBEDDING_MODEL=jina-embeddings-v4
+# EMBEDDING_DIM=2048
+# EMBEDDING_BINDING_API_KEY=your_api_key
+
+### use the following command to see all support options for Ollama
+### lightrag-server --llm-binding ollama --help
+### lightrag-server --embedding-binding ollama --help
+
+### use the following command to see all support options for openai and azure_openai
+### lightrag-server --llm-binding openai --help

 ####################################################################
 ### WORKSPACE setting workspace name for all storage types
--- a/lightrag/api/README-zh.md
+++ b/lightrag/api/README-zh.md
@ -354,12 +354,20 @@ API 服务器可以通过三种方式配置（优先级从高到低）：
 LightRAG 支持绑定到各种 LLM/嵌入后端：

 * ollama
-* lollms
 * openai 和 openai 兼容
 * azure_openai
+* lollms

 使用环境变量 `LLM_BINDING` 或 CLI 参数 `--llm-binding` 选择 LLM 后端类型。使用环境变量 `EMBEDDING_BINDING` 或 CLI 参数 `--embedding-binding` 选择嵌入后端类型。

+LLM和Embedding配置例子请查看项目根目录的 env.example 文件。OpenAI和Ollama兼容LLM接口的支持的完整配置选型可以通过一下命令查看：
+
+```
+lightrag-server --llm-binding openai --help
+lightrag-server --llm-binding ollama --help
+lightrag-server --embedding-binding ollama --help
+```
+
 ### 实体提取配置
 * ENABLE_LLM_CACHE_FOR_EXTRACT：为实体提取启用 LLM 缓存（默认：true）

--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@ -357,12 +357,19 @@ Most of the configurations come with default settings; check out the details in
 LightRAG supports binding to various LLM/Embedding backends:

 * ollama
-* lollms
 * openai & openai compatible
 * azure_openai
+* lollms

 Use environment variables `LLM_BINDING` or CLI argument `--llm-binding` to select the LLM backend type. Use environment variables `EMBEDDING_BINDING` or CLI argument `--embedding-binding` to select the Embedding backend type.

+For LLM and embedding configuration examples, please refer to the `env.example` file in the project's root directory. To view the complete list of configurable options for OpenAI and Ollama-compatible LLM interfaces, use the following commands:
+```
+lightrag-server --llm-binding openai --help
+lightrag-server --llm-binding ollama --help
+lightrag-server --embedding-binding ollama --help
+```
+
 ### Entity Extraction Configuration
 * ENABLE_LLM_CACHE_FOR_EXTRACT: Enable LLM cache for entity extraction (default: true)

--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@ -7,7 +7,11 @@ import argparse
 import logging
 from dotenv import load_dotenv
 from lightrag.utils import get_env_value
-from lightrag.llm.binding_options import OllamaEmbeddingOptions, OllamaLLMOptions
+from lightrag.llm.binding_options import (
+    OllamaEmbeddingOptions,
+    OllamaLLMOptions,
+    OpenAILLMOptions,
+)
 from lightrag.base import OllamaServerInfos
 import sys

@ -239,6 +243,20 @@ def parse_args() -> argparse.Namespace:
    elif os.environ.get("EMBEDDING_BINDING") == "ollama":
        OllamaEmbeddingOptions.add_args(parser)

+    # Add OpenAI LLM options when llm-binding is openai or azure_openai
+    if "--llm-binding" in sys.argv:
+        try:
+            idx = sys.argv.index("--llm-binding")
+            if idx + 1 < len(sys.argv) and sys.argv[idx + 1] in [
+                "openai",
+                "azure_openai",
+            ]:
+                OpenAILLMOptions.add_args(parser)
+        except IndexError:
+            pass
+    elif os.environ.get("LLM_BINDING") in ["openai", "azure_openai"]:
+        OpenAILLMOptions.add_args(parser)
+
    args = parser.parse_args()

    # convert relative path to absolute path
@ -311,6 +329,17 @@ def parse_args() -> argparse.Namespace:
            # Use the explicitly set OLLAMA_LLM_TEMPERATURE
            args.ollama_llm_temperature = float(ollama_llm_temp)

+    # Handle OpenAI LLM temperature fallback when llm-binding is openai or azure_openai
+    if args.llm_binding in ["openai", "azure_openai"]:
+        # Check if OPENAI_LLM_TEMPERATURE is set, if not fallback to global TEMPERATURE
+        openai_llm_temp = get_env_value("OPENAI_LLM_TEMPERATURE", None)
+        if openai_llm_temp is None:
+            # Fallback to global TEMPERATURE value
+            args.openai_llm_temperature = args.temperature
+        else:
+            # Use the explicitly set OPENAI_LLM_TEMPERATURE
+            args.openai_llm_temperature = float(openai_llm_temp)
+
    # Select Document loading tool (DOCLING, DEFAULT)
    args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")

--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@ -238,6 +238,7 @@ def create_app(args):
        from lightrag.llm.binding_options import OllamaLLMOptions
    if args.llm_binding == "openai" or args.embedding_binding == "openai":
        from lightrag.llm.openai import openai_complete_if_cache, openai_embed
+        from lightrag.llm.binding_options import OpenAILLMOptions
    if args.llm_binding == "azure_openai" or args.embedding_binding == "azure_openai":
        from lightrag.llm.azure_openai import (
            azure_openai_complete_if_cache,
@ -262,7 +263,14 @@ def create_app(args):
            kwargs["response_format"] = GPTKeywordExtractionFormat
        if history_messages is None:
            history_messages = []
-        kwargs["temperature"] = args.temperature
+
+        # Use OpenAI LLM options if available, otherwise fallback to global temperature
+        if args.llm_binding == "openai":
+            openai_options = OpenAILLMOptions.options_dict(args)
+            kwargs.update(openai_options)
+        else:
+            kwargs["temperature"] = args.temperature
+
        return await openai_complete_if_cache(
            args.llm_model,
            prompt,
@ -285,7 +293,14 @@ def create_app(args):
            kwargs["response_format"] = GPTKeywordExtractionFormat
        if history_messages is None:
            history_messages = []
-        kwargs["temperature"] = args.temperature
+
+        # Use OpenAI LLM options if available, otherwise fallback to global temperature
+        if args.llm_binding == "azure_openai":
+            openai_options = OpenAILLMOptions.options_dict(args)
+            kwargs.update(openai_options)
+        else:
+            kwargs["temperature"] = args.temperature
+
        return await azure_openai_complete_if_cache(
            args.llm_model,
            prompt,
--- a/lightrag/llm/binding_options.py
+++ b/lightrag/llm/binding_options.py
@ -287,25 +287,6 @@ class BindingOptions:
        return asdict(self)


-# =============================================================================
-# Binding Options for Different LLM Providers
-# =============================================================================
-#
-# This section contains dataclass definitions for various LLM provider options.
-# Each binding option class inherits from BindingOptions and defines:
-#   - _binding_name: Unique identifier for the binding
-#   - Configuration parameters with default values
-#   - _help: Dictionary mapping parameter names to help descriptions
-#
-# To add a new binding:
-#   1. Create a new dataclass inheriting from BindingOptions
-#   2. Set the _binding_name class variable
-#   3. Define configuration parameters as class attributes
-#   4. Add corresponding help strings in the _help dictionary
-#
-# =============================================================================
-
-
 # =============================================================================
 # Binding Options for Ollama
 # =============================================================================
@ -407,23 +388,6 @@ class _OllamaOptionsMixin:
    }


-# =============================================================================
-# Ollama Binding Options - Specialized Configurations
-# =============================================================================
-#
-# This section defines specialized binding option classes for different Ollama
-# use cases. Both classes inherit from OllamaOptionsMixin to share the complete
-# set of Ollama configuration parameters, while providing distinct binding names
-# for command-line argument generation and environment variable handling.
-#
-# OllamaEmbeddingOptions: Specialized for embedding tasks
-# OllamaLLMOptions: Specialized for language model/chat tasks
-#
-# Each class maintains its own binding name prefix, allowing users to configure
-# embedding and LLM options independently when both are used in the same application.
-# =============================================================================
-
-
@dataclass
 class OllamaEmbeddingOptions(_OllamaOptionsMixin, BindingOptions):
    """Options for Ollama embeddings with specialized configuration for embedding tasks."""
@ -441,41 +405,46 @@ class OllamaLLMOptions(_OllamaOptionsMixin, BindingOptions):


 # =============================================================================
-# Additional LLM Provider Bindings
+# Binding Options for OpenAI
 # =============================================================================
 #
-# This section is where you can add binding options for other LLM providers.
-# Each new binding should follow the same pattern as the Ollama bindings above:
-#
-# 1. Create a dataclass that inherits from BindingOptions
-# 2. Set a unique _binding_name class variable (e.g., "openai", "anthropic")
-# 3. Define configuration parameters as class attributes with default values
-# 4. Add a _help class variable with descriptions for each parameter
-#
-# Example template for a new provider:
-#
-# @dataclass
-# class NewProviderOptions(BindingOptions):
-#     """Options for NewProvider LLM binding."""
-#
-#     _binding_name: ClassVar[str] = "newprovider"
-#
-#     # Configuration parameters
-#     api_key: str = ""
-#     max_tokens: int = 1000
-#     model: str = "default-model"
-#
-#     # Help descriptions
-#     _help: ClassVar[dict[str, str]] = {
-#         "api_key": "API key for authentication",
-#         "max_tokens": "Maximum tokens to generate",
-#         "model": "Model name to use",
-#     }
+# OpenAI binding options provide configuration for OpenAI's API and Azure OpenAI.
+# These options control model behavior, sampling parameters, and generation settings.
+# The parameters are based on OpenAI's API specification and provide fine-grained
+# control over model inference and generation.
 #
 # =============================================================================
+@dataclass
+class OpenAILLMOptions(BindingOptions):
+    """Options for OpenAI LLM with configuration for OpenAI and Azure OpenAI API calls."""
+
+    # mandatory name of binding
+    _binding_name: ClassVar[str] = "openai_llm"
+
+    # Sampling and generation parameters
+    frequency_penalty: float = 0.0  # Penalty for token frequency (-2.0 to 2.0)
+    max_completion_tokens: int = None  # Maximum number of tokens to generate
+    presence_penalty: float = 0.0  # Penalty for token presence (-2.0 to 2.0)
+    reasoning_effort: str = "medium"  # Reasoning effort level (low, medium, high)
+    safety_identifier: str = ""  # Safety identifier for content filtering
+    service_tier: str = ""  # Service tier for API usage
+    stop: List[str] = field(default_factory=list)  # Stop sequences
+    temperature: float = DEFAULT_TEMPERATURE  # Controls randomness (0.0 to 2.0)
+    top_p: float = 1.0  # Nucleus sampling parameter (0.0 to 1.0)
+
+    # Help descriptions
+    _help: ClassVar[dict[str, str]] = {
+        "frequency_penalty": "Penalty for token frequency (-2.0 to 2.0, positive values discourage repetition)",
+        "max_completion_tokens": "Maximum number of tokens to generate (optional, leave empty for model default)",
+        "presence_penalty": "Penalty for token presence (-2.0 to 2.0, positive values encourage new topics)",
+        "reasoning_effort": "Reasoning effort level for o1 models (low, medium, high)",
+        "safety_identifier": "Safety identifier for content filtering (optional)",
+        "service_tier": "Service tier for API usage (optional)",
+        "stop": 'Stop sequences (JSON array of strings, e.g., \'["</s>", "\\n\\n"]\')',
+        "temperature": "Controls randomness (0.0-2.0, higher = more creative)",
+        "top_p": "Nucleus sampling parameter (0.0-1.0, lower = more focused)",
+    }

-# TODO: Add binding options for additional LLM providers here
-# Common providers to consider: OpenAI, Anthropic, Cohere, Hugging Face, etc.

 # =============================================================================
 # Main Section - For Testing and Sample Generation
@ -505,10 +474,11 @@ if __name__ == "__main__":
    # dotenv.load_dotenv(stream=env_strstream)

    if len(sys.argv) > 1 and sys.argv[1] == "test":
-        # Add arguments for OllamaEmbeddingOptions and OllamaLLMOptions
-        parser = ArgumentParser(description="Test Ollama binding")
+        # Add arguments for OllamaEmbeddingOptions, OllamaLLMOptions, and OpenAILLMOptions
+        parser = ArgumentParser(description="Test binding options")
        OllamaEmbeddingOptions.add_args(parser)
        OllamaLLMOptions.add_args(parser)
+        OpenAILLMOptions.add_args(parser)

        # Parse arguments test
        args = parser.parse_args(
@ -517,20 +487,36 @@ if __name__ == "__main__":
                "1024",
                "--ollama-llm-num_ctx",
                "2048",
-                # "--ollama-llm-stop",
-                # '["</s>", "\\n\\n"]',
+                "--openai-llm-temperature",
+                "0.7",
+                "--openai-llm-max_completion_tokens",
+                "1000",
+                "--openai-llm-stop",
+                '["</s>", "\\n\\n"]',
            ]
        )
        print("Final args for LLM and Embedding:")
        print(f"{args}\n")

-        print("LLM options:")
+        print("Ollama LLM options:")
        print(OllamaLLMOptions.options_dict(args))
-        # print(OllamaLLMOptions(num_ctx=30000).asdict())

-        print("\nEmbedding options:")
+        print("\nOllama Embedding options:")
        print(OllamaEmbeddingOptions.options_dict(args))
-        # print(OllamaEmbeddingOptions(**embedding_options).asdict())
+
+        print("\nOpenAI LLM options:")
+        print(OpenAILLMOptions.options_dict(args))
+
+        # Test creating OpenAI options instance
+        openai_options = OpenAILLMOptions(
+            temperature=0.8,
+            max_completion_tokens=1500,
+            frequency_penalty=0.1,
+            presence_penalty=0.2,
+            stop=["<|end|>", "\n\n"],
+        )
+        print("\nOpenAI LLM options instance:")
+        print(openai_options.asdict())

    else:
        print(BindingOptions.generate_dot_env_sample())