From adf7ec8e353469082697fd70a933f57975b9e135 Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 5 Aug 2025 03:47:26 +0800 Subject: [PATCH] feat: Add OpenAI LLM Options support with BindingOptions framework - Add OpenAILLMOptions dataclass with full OpenAI API parameter support - Integrate OpenAI options in config.py for automatic binding detection - Update server functions to inject OpenAI options for openai/azure_openai bindings --- env.example | 26 ++++--- lightrag/api/README-zh.md | 10 ++- lightrag/api/README.md | 9 ++- lightrag/api/config.py | 31 +++++++- lightrag/api/lightrag_server.py | 19 ++++- lightrag/llm/binding_options.py | 134 ++++++++++++++------------------ 6 files changed, 141 insertions(+), 88 deletions(-) diff --git a/env.example b/env.example index 6d5b40b0..3967e00e 100644 --- a/env.example +++ b/env.example @@ -108,13 +108,14 @@ MAX_PARALLEL_INSERT=2 ### Num of chunks send to Embedding in single request # EMBEDDING_BATCH_NUM=10 -####################### +########################################################### ### LLM Configuration -####################### -### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature +### LLM_BINDING type: openai, ollama, lollms, azure_openai +########################################################### +### LLM temperature setting for all llm binding (openai, azure_openai, ollama) # TEMPERATURE=1.0 +### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature -### LLM Binding type: openai, ollama, lollms, azure_openai LLM_BINDING=openai LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 @@ -162,11 +163,18 @@ EMBEDDING_BINDING_HOST=http://localhost:11434 # AZURE_EMBEDDING_API_KEY=your_api_key ### Jina AI Embedding -EMBEDDING_BINDING=jina -EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings -EMBEDDING_MODEL=jina-embeddings-v4 -EMBEDDING_DIM=2048 -EMBEDDING_BINDING_API_KEY=your_api_key +# EMBEDDING_BINDING=jina +# EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings +# EMBEDDING_MODEL=jina-embeddings-v4 +# EMBEDDING_DIM=2048 +# EMBEDDING_BINDING_API_KEY=your_api_key + +### use the following command to see all support options for Ollama +### lightrag-server --llm-binding ollama --help +### lightrag-server --embedding-binding ollama --help + +### use the following command to see all support options for openai and azure_openai +### lightrag-server --llm-binding openai --help #################################################################### ### WORKSPACE setting workspace name for all storage types diff --git a/lightrag/api/README-zh.md b/lightrag/api/README-zh.md index 428e65d5..b80419e1 100644 --- a/lightrag/api/README-zh.md +++ b/lightrag/api/README-zh.md @@ -354,12 +354,20 @@ API 服务器可以通过三种方式配置(优先级从高到低): LightRAG 支持绑定到各种 LLM/嵌入后端: * ollama -* lollms * openai 和 openai 兼容 * azure_openai +* lollms 使用环境变量 `LLM_BINDING` 或 CLI 参数 `--llm-binding` 选择 LLM 后端类型。使用环境变量 `EMBEDDING_BINDING` 或 CLI 参数 `--embedding-binding` 选择嵌入后端类型。 +LLM和Embedding配置例子请查看项目根目录的 env.example 文件。OpenAI和Ollama兼容LLM接口的支持的完整配置选型可以通过一下命令查看: + +``` +lightrag-server --llm-binding openai --help +lightrag-server --llm-binding ollama --help +lightrag-server --embedding-binding ollama --help +``` + ### 实体提取配置 * ENABLE_LLM_CACHE_FOR_EXTRACT:为实体提取启用 LLM 缓存(默认:true) diff --git a/lightrag/api/README.md b/lightrag/api/README.md index 71076059..6e0a59de 100644 --- a/lightrag/api/README.md +++ b/lightrag/api/README.md @@ -357,12 +357,19 @@ Most of the configurations come with default settings; check out the details in LightRAG supports binding to various LLM/Embedding backends: * ollama -* lollms * openai & openai compatible * azure_openai +* lollms Use environment variables `LLM_BINDING` or CLI argument `--llm-binding` to select the LLM backend type. Use environment variables `EMBEDDING_BINDING` or CLI argument `--embedding-binding` to select the Embedding backend type. +For LLM and embedding configuration examples, please refer to the `env.example` file in the project's root directory. To view the complete list of configurable options for OpenAI and Ollama-compatible LLM interfaces, use the following commands: +``` +lightrag-server --llm-binding openai --help +lightrag-server --llm-binding ollama --help +lightrag-server --embedding-binding ollama --help +``` + ### Entity Extraction Configuration * ENABLE_LLM_CACHE_FOR_EXTRACT: Enable LLM cache for entity extraction (default: true) diff --git a/lightrag/api/config.py b/lightrag/api/config.py index 8e9266a3..bf74460d 100644 --- a/lightrag/api/config.py +++ b/lightrag/api/config.py @@ -7,7 +7,11 @@ import argparse import logging from dotenv import load_dotenv from lightrag.utils import get_env_value -from lightrag.llm.binding_options import OllamaEmbeddingOptions, OllamaLLMOptions +from lightrag.llm.binding_options import ( + OllamaEmbeddingOptions, + OllamaLLMOptions, + OpenAILLMOptions, +) from lightrag.base import OllamaServerInfos import sys @@ -239,6 +243,20 @@ def parse_args() -> argparse.Namespace: elif os.environ.get("EMBEDDING_BINDING") == "ollama": OllamaEmbeddingOptions.add_args(parser) + # Add OpenAI LLM options when llm-binding is openai or azure_openai + if "--llm-binding" in sys.argv: + try: + idx = sys.argv.index("--llm-binding") + if idx + 1 < len(sys.argv) and sys.argv[idx + 1] in [ + "openai", + "azure_openai", + ]: + OpenAILLMOptions.add_args(parser) + except IndexError: + pass + elif os.environ.get("LLM_BINDING") in ["openai", "azure_openai"]: + OpenAILLMOptions.add_args(parser) + args = parser.parse_args() # convert relative path to absolute path @@ -311,6 +329,17 @@ def parse_args() -> argparse.Namespace: # Use the explicitly set OLLAMA_LLM_TEMPERATURE args.ollama_llm_temperature = float(ollama_llm_temp) + # Handle OpenAI LLM temperature fallback when llm-binding is openai or azure_openai + if args.llm_binding in ["openai", "azure_openai"]: + # Check if OPENAI_LLM_TEMPERATURE is set, if not fallback to global TEMPERATURE + openai_llm_temp = get_env_value("OPENAI_LLM_TEMPERATURE", None) + if openai_llm_temp is None: + # Fallback to global TEMPERATURE value + args.openai_llm_temperature = args.temperature + else: + # Use the explicitly set OPENAI_LLM_TEMPERATURE + args.openai_llm_temperature = float(openai_llm_temp) + # Select Document loading tool (DOCLING, DEFAULT) args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT") diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index a4d0c345..699f59ac 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -238,6 +238,7 @@ def create_app(args): from lightrag.llm.binding_options import OllamaLLMOptions if args.llm_binding == "openai" or args.embedding_binding == "openai": from lightrag.llm.openai import openai_complete_if_cache, openai_embed + from lightrag.llm.binding_options import OpenAILLMOptions if args.llm_binding == "azure_openai" or args.embedding_binding == "azure_openai": from lightrag.llm.azure_openai import ( azure_openai_complete_if_cache, @@ -262,7 +263,14 @@ def create_app(args): kwargs["response_format"] = GPTKeywordExtractionFormat if history_messages is None: history_messages = [] - kwargs["temperature"] = args.temperature + + # Use OpenAI LLM options if available, otherwise fallback to global temperature + if args.llm_binding == "openai": + openai_options = OpenAILLMOptions.options_dict(args) + kwargs.update(openai_options) + else: + kwargs["temperature"] = args.temperature + return await openai_complete_if_cache( args.llm_model, prompt, @@ -285,7 +293,14 @@ def create_app(args): kwargs["response_format"] = GPTKeywordExtractionFormat if history_messages is None: history_messages = [] - kwargs["temperature"] = args.temperature + + # Use OpenAI LLM options if available, otherwise fallback to global temperature + if args.llm_binding == "azure_openai": + openai_options = OpenAILLMOptions.options_dict(args) + kwargs.update(openai_options) + else: + kwargs["temperature"] = args.temperature + return await azure_openai_complete_if_cache( args.llm_model, prompt, diff --git a/lightrag/llm/binding_options.py b/lightrag/llm/binding_options.py index 27272e62..827620ee 100644 --- a/lightrag/llm/binding_options.py +++ b/lightrag/llm/binding_options.py @@ -287,25 +287,6 @@ class BindingOptions: return asdict(self) -# ============================================================================= -# Binding Options for Different LLM Providers -# ============================================================================= -# -# This section contains dataclass definitions for various LLM provider options. -# Each binding option class inherits from BindingOptions and defines: -# - _binding_name: Unique identifier for the binding -# - Configuration parameters with default values -# - _help: Dictionary mapping parameter names to help descriptions -# -# To add a new binding: -# 1. Create a new dataclass inheriting from BindingOptions -# 2. Set the _binding_name class variable -# 3. Define configuration parameters as class attributes -# 4. Add corresponding help strings in the _help dictionary -# -# ============================================================================= - - # ============================================================================= # Binding Options for Ollama # ============================================================================= @@ -407,23 +388,6 @@ class _OllamaOptionsMixin: } -# ============================================================================= -# Ollama Binding Options - Specialized Configurations -# ============================================================================= -# -# This section defines specialized binding option classes for different Ollama -# use cases. Both classes inherit from OllamaOptionsMixin to share the complete -# set of Ollama configuration parameters, while providing distinct binding names -# for command-line argument generation and environment variable handling. -# -# OllamaEmbeddingOptions: Specialized for embedding tasks -# OllamaLLMOptions: Specialized for language model/chat tasks -# -# Each class maintains its own binding name prefix, allowing users to configure -# embedding and LLM options independently when both are used in the same application. -# ============================================================================= - - @dataclass class OllamaEmbeddingOptions(_OllamaOptionsMixin, BindingOptions): """Options for Ollama embeddings with specialized configuration for embedding tasks.""" @@ -441,41 +405,46 @@ class OllamaLLMOptions(_OllamaOptionsMixin, BindingOptions): # ============================================================================= -# Additional LLM Provider Bindings +# Binding Options for OpenAI # ============================================================================= # -# This section is where you can add binding options for other LLM providers. -# Each new binding should follow the same pattern as the Ollama bindings above: -# -# 1. Create a dataclass that inherits from BindingOptions -# 2. Set a unique _binding_name class variable (e.g., "openai", "anthropic") -# 3. Define configuration parameters as class attributes with default values -# 4. Add a _help class variable with descriptions for each parameter -# -# Example template for a new provider: -# -# @dataclass -# class NewProviderOptions(BindingOptions): -# """Options for NewProvider LLM binding.""" -# -# _binding_name: ClassVar[str] = "newprovider" -# -# # Configuration parameters -# api_key: str = "" -# max_tokens: int = 1000 -# model: str = "default-model" -# -# # Help descriptions -# _help: ClassVar[dict[str, str]] = { -# "api_key": "API key for authentication", -# "max_tokens": "Maximum tokens to generate", -# "model": "Model name to use", -# } +# OpenAI binding options provide configuration for OpenAI's API and Azure OpenAI. +# These options control model behavior, sampling parameters, and generation settings. +# The parameters are based on OpenAI's API specification and provide fine-grained +# control over model inference and generation. # # ============================================================================= +@dataclass +class OpenAILLMOptions(BindingOptions): + """Options for OpenAI LLM with configuration for OpenAI and Azure OpenAI API calls.""" + + # mandatory name of binding + _binding_name: ClassVar[str] = "openai_llm" + + # Sampling and generation parameters + frequency_penalty: float = 0.0 # Penalty for token frequency (-2.0 to 2.0) + max_completion_tokens: int = None # Maximum number of tokens to generate + presence_penalty: float = 0.0 # Penalty for token presence (-2.0 to 2.0) + reasoning_effort: str = "medium" # Reasoning effort level (low, medium, high) + safety_identifier: str = "" # Safety identifier for content filtering + service_tier: str = "" # Service tier for API usage + stop: List[str] = field(default_factory=list) # Stop sequences + temperature: float = DEFAULT_TEMPERATURE # Controls randomness (0.0 to 2.0) + top_p: float = 1.0 # Nucleus sampling parameter (0.0 to 1.0) + + # Help descriptions + _help: ClassVar[dict[str, str]] = { + "frequency_penalty": "Penalty for token frequency (-2.0 to 2.0, positive values discourage repetition)", + "max_completion_tokens": "Maximum number of tokens to generate (optional, leave empty for model default)", + "presence_penalty": "Penalty for token presence (-2.0 to 2.0, positive values encourage new topics)", + "reasoning_effort": "Reasoning effort level for o1 models (low, medium, high)", + "safety_identifier": "Safety identifier for content filtering (optional)", + "service_tier": "Service tier for API usage (optional)", + "stop": 'Stop sequences (JSON array of strings, e.g., \'["", "\\n\\n"]\')', + "temperature": "Controls randomness (0.0-2.0, higher = more creative)", + "top_p": "Nucleus sampling parameter (0.0-1.0, lower = more focused)", + } -# TODO: Add binding options for additional LLM providers here -# Common providers to consider: OpenAI, Anthropic, Cohere, Hugging Face, etc. # ============================================================================= # Main Section - For Testing and Sample Generation @@ -505,10 +474,11 @@ if __name__ == "__main__": # dotenv.load_dotenv(stream=env_strstream) if len(sys.argv) > 1 and sys.argv[1] == "test": - # Add arguments for OllamaEmbeddingOptions and OllamaLLMOptions - parser = ArgumentParser(description="Test Ollama binding") + # Add arguments for OllamaEmbeddingOptions, OllamaLLMOptions, and OpenAILLMOptions + parser = ArgumentParser(description="Test binding options") OllamaEmbeddingOptions.add_args(parser) OllamaLLMOptions.add_args(parser) + OpenAILLMOptions.add_args(parser) # Parse arguments test args = parser.parse_args( @@ -517,20 +487,36 @@ if __name__ == "__main__": "1024", "--ollama-llm-num_ctx", "2048", - # "--ollama-llm-stop", - # '["", "\\n\\n"]', + "--openai-llm-temperature", + "0.7", + "--openai-llm-max_completion_tokens", + "1000", + "--openai-llm-stop", + '["", "\\n\\n"]', ] ) print("Final args for LLM and Embedding:") print(f"{args}\n") - print("LLM options:") + print("Ollama LLM options:") print(OllamaLLMOptions.options_dict(args)) - # print(OllamaLLMOptions(num_ctx=30000).asdict()) - print("\nEmbedding options:") + print("\nOllama Embedding options:") print(OllamaEmbeddingOptions.options_dict(args)) - # print(OllamaEmbeddingOptions(**embedding_options).asdict()) + + print("\nOpenAI LLM options:") + print(OpenAILLMOptions.options_dict(args)) + + # Test creating OpenAI options instance + openai_options = OpenAILLMOptions( + temperature=0.8, + max_completion_tokens=1500, + frequency_penalty=0.1, + presence_penalty=0.2, + stop=["<|end|>", "\n\n"], + ) + print("\nOpenAI LLM options instance:") + print(openai_options.asdict()) else: print(BindingOptions.generate_dot_env_sample())