Merge pull request #1910 from danielaskdd/openai-option

feat: Add OpenAI LLM Options Support
This commit is contained in:
Daniel.y 2025-08-05 03:55:23 +08:00 committed by GitHub
commit 51deee3d82
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 141 additions and 88 deletions

View file

@ -108,13 +108,14 @@ MAX_PARALLEL_INSERT=2
### Num of chunks send to Embedding in single request
# EMBEDDING_BATCH_NUM=10
#######################
###########################################################
### LLM Configuration
#######################
### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
### LLM_BINDING type: openai, ollama, lollms, azure_openai
###########################################################
### LLM temperature setting for all llm binding (openai, azure_openai, ollama)
# TEMPERATURE=1.0
### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
### LLM Binding type: openai, ollama, lollms, azure_openai
LLM_BINDING=openai
LLM_MODEL=gpt-4o
LLM_BINDING_HOST=https://api.openai.com/v1
@ -162,11 +163,18 @@ EMBEDDING_BINDING_HOST=http://localhost:11434
# AZURE_EMBEDDING_API_KEY=your_api_key
### Jina AI Embedding
EMBEDDING_BINDING=jina
EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings
EMBEDDING_MODEL=jina-embeddings-v4
EMBEDDING_DIM=2048
EMBEDDING_BINDING_API_KEY=your_api_key
# EMBEDDING_BINDING=jina
# EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings
# EMBEDDING_MODEL=jina-embeddings-v4
# EMBEDDING_DIM=2048
# EMBEDDING_BINDING_API_KEY=your_api_key
### use the following command to see all support options for Ollama
### lightrag-server --llm-binding ollama --help
### lightrag-server --embedding-binding ollama --help
### use the following command to see all support options for openai and azure_openai
### lightrag-server --llm-binding openai --help
####################################################################
### WORKSPACE setting workspace name for all storage types

View file

@ -354,12 +354,20 @@ API 服务器可以通过三种方式配置(优先级从高到低):
LightRAG 支持绑定到各种 LLM/嵌入后端:
* ollama
* lollms
* openai 和 openai 兼容
* azure_openai
* lollms
使用环境变量 `LLM_BINDING` 或 CLI 参数 `--llm-binding` 选择 LLM 后端类型。使用环境变量 `EMBEDDING_BINDING` 或 CLI 参数 `--embedding-binding` 选择嵌入后端类型。
LLM和Embedding配置例子请查看项目根目录的 env.example 文件。OpenAI和Ollama兼容LLM接口的支持的完整配置选型可以通过一下命令查看
```
lightrag-server --llm-binding openai --help
lightrag-server --llm-binding ollama --help
lightrag-server --embedding-binding ollama --help
```
### 实体提取配置
* ENABLE_LLM_CACHE_FOR_EXTRACT为实体提取启用 LLM 缓存默认true

View file

@ -357,12 +357,19 @@ Most of the configurations come with default settings; check out the details in
LightRAG supports binding to various LLM/Embedding backends:
* ollama
* lollms
* openai & openai compatible
* azure_openai
* lollms
Use environment variables `LLM_BINDING` or CLI argument `--llm-binding` to select the LLM backend type. Use environment variables `EMBEDDING_BINDING` or CLI argument `--embedding-binding` to select the Embedding backend type.
For LLM and embedding configuration examples, please refer to the `env.example` file in the project's root directory. To view the complete list of configurable options for OpenAI and Ollama-compatible LLM interfaces, use the following commands:
```
lightrag-server --llm-binding openai --help
lightrag-server --llm-binding ollama --help
lightrag-server --embedding-binding ollama --help
```
### Entity Extraction Configuration
* ENABLE_LLM_CACHE_FOR_EXTRACT: Enable LLM cache for entity extraction (default: true)

View file

@ -7,7 +7,11 @@ import argparse
import logging
from dotenv import load_dotenv
from lightrag.utils import get_env_value
from lightrag.llm.binding_options import OllamaEmbeddingOptions, OllamaLLMOptions
from lightrag.llm.binding_options import (
OllamaEmbeddingOptions,
OllamaLLMOptions,
OpenAILLMOptions,
)
from lightrag.base import OllamaServerInfos
import sys
@ -239,6 +243,20 @@ def parse_args() -> argparse.Namespace:
elif os.environ.get("EMBEDDING_BINDING") == "ollama":
OllamaEmbeddingOptions.add_args(parser)
# Add OpenAI LLM options when llm-binding is openai or azure_openai
if "--llm-binding" in sys.argv:
try:
idx = sys.argv.index("--llm-binding")
if idx + 1 < len(sys.argv) and sys.argv[idx + 1] in [
"openai",
"azure_openai",
]:
OpenAILLMOptions.add_args(parser)
except IndexError:
pass
elif os.environ.get("LLM_BINDING") in ["openai", "azure_openai"]:
OpenAILLMOptions.add_args(parser)
args = parser.parse_args()
# convert relative path to absolute path
@ -311,6 +329,17 @@ def parse_args() -> argparse.Namespace:
# Use the explicitly set OLLAMA_LLM_TEMPERATURE
args.ollama_llm_temperature = float(ollama_llm_temp)
# Handle OpenAI LLM temperature fallback when llm-binding is openai or azure_openai
if args.llm_binding in ["openai", "azure_openai"]:
# Check if OPENAI_LLM_TEMPERATURE is set, if not fallback to global TEMPERATURE
openai_llm_temp = get_env_value("OPENAI_LLM_TEMPERATURE", None)
if openai_llm_temp is None:
# Fallback to global TEMPERATURE value
args.openai_llm_temperature = args.temperature
else:
# Use the explicitly set OPENAI_LLM_TEMPERATURE
args.openai_llm_temperature = float(openai_llm_temp)
# Select Document loading tool (DOCLING, DEFAULT)
args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")

View file

@ -238,6 +238,7 @@ def create_app(args):
from lightrag.llm.binding_options import OllamaLLMOptions
if args.llm_binding == "openai" or args.embedding_binding == "openai":
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.llm.binding_options import OpenAILLMOptions
if args.llm_binding == "azure_openai" or args.embedding_binding == "azure_openai":
from lightrag.llm.azure_openai import (
azure_openai_complete_if_cache,
@ -262,7 +263,14 @@ def create_app(args):
kwargs["response_format"] = GPTKeywordExtractionFormat
if history_messages is None:
history_messages = []
kwargs["temperature"] = args.temperature
# Use OpenAI LLM options if available, otherwise fallback to global temperature
if args.llm_binding == "openai":
openai_options = OpenAILLMOptions.options_dict(args)
kwargs.update(openai_options)
else:
kwargs["temperature"] = args.temperature
return await openai_complete_if_cache(
args.llm_model,
prompt,
@ -285,7 +293,14 @@ def create_app(args):
kwargs["response_format"] = GPTKeywordExtractionFormat
if history_messages is None:
history_messages = []
kwargs["temperature"] = args.temperature
# Use OpenAI LLM options if available, otherwise fallback to global temperature
if args.llm_binding == "azure_openai":
openai_options = OpenAILLMOptions.options_dict(args)
kwargs.update(openai_options)
else:
kwargs["temperature"] = args.temperature
return await azure_openai_complete_if_cache(
args.llm_model,
prompt,

View file

@ -287,25 +287,6 @@ class BindingOptions:
return asdict(self)
# =============================================================================
# Binding Options for Different LLM Providers
# =============================================================================
#
# This section contains dataclass definitions for various LLM provider options.
# Each binding option class inherits from BindingOptions and defines:
# - _binding_name: Unique identifier for the binding
# - Configuration parameters with default values
# - _help: Dictionary mapping parameter names to help descriptions
#
# To add a new binding:
# 1. Create a new dataclass inheriting from BindingOptions
# 2. Set the _binding_name class variable
# 3. Define configuration parameters as class attributes
# 4. Add corresponding help strings in the _help dictionary
#
# =============================================================================
# =============================================================================
# Binding Options for Ollama
# =============================================================================
@ -407,23 +388,6 @@ class _OllamaOptionsMixin:
}
# =============================================================================
# Ollama Binding Options - Specialized Configurations
# =============================================================================
#
# This section defines specialized binding option classes for different Ollama
# use cases. Both classes inherit from OllamaOptionsMixin to share the complete
# set of Ollama configuration parameters, while providing distinct binding names
# for command-line argument generation and environment variable handling.
#
# OllamaEmbeddingOptions: Specialized for embedding tasks
# OllamaLLMOptions: Specialized for language model/chat tasks
#
# Each class maintains its own binding name prefix, allowing users to configure
# embedding and LLM options independently when both are used in the same application.
# =============================================================================
@dataclass
class OllamaEmbeddingOptions(_OllamaOptionsMixin, BindingOptions):
"""Options for Ollama embeddings with specialized configuration for embedding tasks."""
@ -441,41 +405,46 @@ class OllamaLLMOptions(_OllamaOptionsMixin, BindingOptions):
# =============================================================================
# Additional LLM Provider Bindings
# Binding Options for OpenAI
# =============================================================================
#
# This section is where you can add binding options for other LLM providers.
# Each new binding should follow the same pattern as the Ollama bindings above:
#
# 1. Create a dataclass that inherits from BindingOptions
# 2. Set a unique _binding_name class variable (e.g., "openai", "anthropic")
# 3. Define configuration parameters as class attributes with default values
# 4. Add a _help class variable with descriptions for each parameter
#
# Example template for a new provider:
#
# @dataclass
# class NewProviderOptions(BindingOptions):
# """Options for NewProvider LLM binding."""
#
# _binding_name: ClassVar[str] = "newprovider"
#
# # Configuration parameters
# api_key: str = ""
# max_tokens: int = 1000
# model: str = "default-model"
#
# # Help descriptions
# _help: ClassVar[dict[str, str]] = {
# "api_key": "API key for authentication",
# "max_tokens": "Maximum tokens to generate",
# "model": "Model name to use",
# }
# OpenAI binding options provide configuration for OpenAI's API and Azure OpenAI.
# These options control model behavior, sampling parameters, and generation settings.
# The parameters are based on OpenAI's API specification and provide fine-grained
# control over model inference and generation.
#
# =============================================================================
@dataclass
class OpenAILLMOptions(BindingOptions):
"""Options for OpenAI LLM with configuration for OpenAI and Azure OpenAI API calls."""
# mandatory name of binding
_binding_name: ClassVar[str] = "openai_llm"
# Sampling and generation parameters
frequency_penalty: float = 0.0 # Penalty for token frequency (-2.0 to 2.0)
max_completion_tokens: int = None # Maximum number of tokens to generate
presence_penalty: float = 0.0 # Penalty for token presence (-2.0 to 2.0)
reasoning_effort: str = "medium" # Reasoning effort level (low, medium, high)
safety_identifier: str = "" # Safety identifier for content filtering
service_tier: str = "" # Service tier for API usage
stop: List[str] = field(default_factory=list) # Stop sequences
temperature: float = DEFAULT_TEMPERATURE # Controls randomness (0.0 to 2.0)
top_p: float = 1.0 # Nucleus sampling parameter (0.0 to 1.0)
# Help descriptions
_help: ClassVar[dict[str, str]] = {
"frequency_penalty": "Penalty for token frequency (-2.0 to 2.0, positive values discourage repetition)",
"max_completion_tokens": "Maximum number of tokens to generate (optional, leave empty for model default)",
"presence_penalty": "Penalty for token presence (-2.0 to 2.0, positive values encourage new topics)",
"reasoning_effort": "Reasoning effort level for o1 models (low, medium, high)",
"safety_identifier": "Safety identifier for content filtering (optional)",
"service_tier": "Service tier for API usage (optional)",
"stop": 'Stop sequences (JSON array of strings, e.g., \'["</s>", "\\n\\n"]\')',
"temperature": "Controls randomness (0.0-2.0, higher = more creative)",
"top_p": "Nucleus sampling parameter (0.0-1.0, lower = more focused)",
}
# TODO: Add binding options for additional LLM providers here
# Common providers to consider: OpenAI, Anthropic, Cohere, Hugging Face, etc.
# =============================================================================
# Main Section - For Testing and Sample Generation
@ -505,10 +474,11 @@ if __name__ == "__main__":
# dotenv.load_dotenv(stream=env_strstream)
if len(sys.argv) > 1 and sys.argv[1] == "test":
# Add arguments for OllamaEmbeddingOptions and OllamaLLMOptions
parser = ArgumentParser(description="Test Ollama binding")
# Add arguments for OllamaEmbeddingOptions, OllamaLLMOptions, and OpenAILLMOptions
parser = ArgumentParser(description="Test binding options")
OllamaEmbeddingOptions.add_args(parser)
OllamaLLMOptions.add_args(parser)
OpenAILLMOptions.add_args(parser)
# Parse arguments test
args = parser.parse_args(
@ -517,20 +487,36 @@ if __name__ == "__main__":
"1024",
"--ollama-llm-num_ctx",
"2048",
# "--ollama-llm-stop",
# '["</s>", "\\n\\n"]',
"--openai-llm-temperature",
"0.7",
"--openai-llm-max_completion_tokens",
"1000",
"--openai-llm-stop",
'["</s>", "\\n\\n"]',
]
)
print("Final args for LLM and Embedding:")
print(f"{args}\n")
print("LLM options:")
print("Ollama LLM options:")
print(OllamaLLMOptions.options_dict(args))
# print(OllamaLLMOptions(num_ctx=30000).asdict())
print("\nEmbedding options:")
print("\nOllama Embedding options:")
print(OllamaEmbeddingOptions.options_dict(args))
# print(OllamaEmbeddingOptions(**embedding_options).asdict())
print("\nOpenAI LLM options:")
print(OpenAILLMOptions.options_dict(args))
# Test creating OpenAI options instance
openai_options = OpenAILLMOptions(
temperature=0.8,
max_completion_tokens=1500,
frequency_penalty=0.1,
presence_penalty=0.2,
stop=["<|end|>", "\n\n"],
)
print("\nOpenAI LLM options instance:")
print(openai_options.asdict())
else:
print(BindingOptions.generate_dot_env_sample())