Merge pull request #1910 from danielaskdd/openai-option
feat: Add OpenAI LLM Options Support
This commit is contained in:
commit
51deee3d82
6 changed files with 141 additions and 88 deletions
26
env.example
26
env.example
|
|
@ -108,13 +108,14 @@ MAX_PARALLEL_INSERT=2
|
|||
### Num of chunks send to Embedding in single request
|
||||
# EMBEDDING_BATCH_NUM=10
|
||||
|
||||
#######################
|
||||
###########################################################
|
||||
### LLM Configuration
|
||||
#######################
|
||||
### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
|
||||
### LLM_BINDING type: openai, ollama, lollms, azure_openai
|
||||
###########################################################
|
||||
### LLM temperature setting for all llm binding (openai, azure_openai, ollama)
|
||||
# TEMPERATURE=1.0
|
||||
### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
|
||||
|
||||
### LLM Binding type: openai, ollama, lollms, azure_openai
|
||||
LLM_BINDING=openai
|
||||
LLM_MODEL=gpt-4o
|
||||
LLM_BINDING_HOST=https://api.openai.com/v1
|
||||
|
|
@ -162,11 +163,18 @@ EMBEDDING_BINDING_HOST=http://localhost:11434
|
|||
# AZURE_EMBEDDING_API_KEY=your_api_key
|
||||
|
||||
### Jina AI Embedding
|
||||
EMBEDDING_BINDING=jina
|
||||
EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings
|
||||
EMBEDDING_MODEL=jina-embeddings-v4
|
||||
EMBEDDING_DIM=2048
|
||||
EMBEDDING_BINDING_API_KEY=your_api_key
|
||||
# EMBEDDING_BINDING=jina
|
||||
# EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings
|
||||
# EMBEDDING_MODEL=jina-embeddings-v4
|
||||
# EMBEDDING_DIM=2048
|
||||
# EMBEDDING_BINDING_API_KEY=your_api_key
|
||||
|
||||
### use the following command to see all support options for Ollama
|
||||
### lightrag-server --llm-binding ollama --help
|
||||
### lightrag-server --embedding-binding ollama --help
|
||||
|
||||
### use the following command to see all support options for openai and azure_openai
|
||||
### lightrag-server --llm-binding openai --help
|
||||
|
||||
####################################################################
|
||||
### WORKSPACE setting workspace name for all storage types
|
||||
|
|
|
|||
|
|
@ -354,12 +354,20 @@ API 服务器可以通过三种方式配置(优先级从高到低):
|
|||
LightRAG 支持绑定到各种 LLM/嵌入后端:
|
||||
|
||||
* ollama
|
||||
* lollms
|
||||
* openai 和 openai 兼容
|
||||
* azure_openai
|
||||
* lollms
|
||||
|
||||
使用环境变量 `LLM_BINDING` 或 CLI 参数 `--llm-binding` 选择 LLM 后端类型。使用环境变量 `EMBEDDING_BINDING` 或 CLI 参数 `--embedding-binding` 选择嵌入后端类型。
|
||||
|
||||
LLM和Embedding配置例子请查看项目根目录的 env.example 文件。OpenAI和Ollama兼容LLM接口的支持的完整配置选型可以通过一下命令查看:
|
||||
|
||||
```
|
||||
lightrag-server --llm-binding openai --help
|
||||
lightrag-server --llm-binding ollama --help
|
||||
lightrag-server --embedding-binding ollama --help
|
||||
```
|
||||
|
||||
### 实体提取配置
|
||||
* ENABLE_LLM_CACHE_FOR_EXTRACT:为实体提取启用 LLM 缓存(默认:true)
|
||||
|
||||
|
|
|
|||
|
|
@ -357,12 +357,19 @@ Most of the configurations come with default settings; check out the details in
|
|||
LightRAG supports binding to various LLM/Embedding backends:
|
||||
|
||||
* ollama
|
||||
* lollms
|
||||
* openai & openai compatible
|
||||
* azure_openai
|
||||
* lollms
|
||||
|
||||
Use environment variables `LLM_BINDING` or CLI argument `--llm-binding` to select the LLM backend type. Use environment variables `EMBEDDING_BINDING` or CLI argument `--embedding-binding` to select the Embedding backend type.
|
||||
|
||||
For LLM and embedding configuration examples, please refer to the `env.example` file in the project's root directory. To view the complete list of configurable options for OpenAI and Ollama-compatible LLM interfaces, use the following commands:
|
||||
```
|
||||
lightrag-server --llm-binding openai --help
|
||||
lightrag-server --llm-binding ollama --help
|
||||
lightrag-server --embedding-binding ollama --help
|
||||
```
|
||||
|
||||
### Entity Extraction Configuration
|
||||
* ENABLE_LLM_CACHE_FOR_EXTRACT: Enable LLM cache for entity extraction (default: true)
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,11 @@ import argparse
|
|||
import logging
|
||||
from dotenv import load_dotenv
|
||||
from lightrag.utils import get_env_value
|
||||
from lightrag.llm.binding_options import OllamaEmbeddingOptions, OllamaLLMOptions
|
||||
from lightrag.llm.binding_options import (
|
||||
OllamaEmbeddingOptions,
|
||||
OllamaLLMOptions,
|
||||
OpenAILLMOptions,
|
||||
)
|
||||
from lightrag.base import OllamaServerInfos
|
||||
import sys
|
||||
|
||||
|
|
@ -239,6 +243,20 @@ def parse_args() -> argparse.Namespace:
|
|||
elif os.environ.get("EMBEDDING_BINDING") == "ollama":
|
||||
OllamaEmbeddingOptions.add_args(parser)
|
||||
|
||||
# Add OpenAI LLM options when llm-binding is openai or azure_openai
|
||||
if "--llm-binding" in sys.argv:
|
||||
try:
|
||||
idx = sys.argv.index("--llm-binding")
|
||||
if idx + 1 < len(sys.argv) and sys.argv[idx + 1] in [
|
||||
"openai",
|
||||
"azure_openai",
|
||||
]:
|
||||
OpenAILLMOptions.add_args(parser)
|
||||
except IndexError:
|
||||
pass
|
||||
elif os.environ.get("LLM_BINDING") in ["openai", "azure_openai"]:
|
||||
OpenAILLMOptions.add_args(parser)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# convert relative path to absolute path
|
||||
|
|
@ -311,6 +329,17 @@ def parse_args() -> argparse.Namespace:
|
|||
# Use the explicitly set OLLAMA_LLM_TEMPERATURE
|
||||
args.ollama_llm_temperature = float(ollama_llm_temp)
|
||||
|
||||
# Handle OpenAI LLM temperature fallback when llm-binding is openai or azure_openai
|
||||
if args.llm_binding in ["openai", "azure_openai"]:
|
||||
# Check if OPENAI_LLM_TEMPERATURE is set, if not fallback to global TEMPERATURE
|
||||
openai_llm_temp = get_env_value("OPENAI_LLM_TEMPERATURE", None)
|
||||
if openai_llm_temp is None:
|
||||
# Fallback to global TEMPERATURE value
|
||||
args.openai_llm_temperature = args.temperature
|
||||
else:
|
||||
# Use the explicitly set OPENAI_LLM_TEMPERATURE
|
||||
args.openai_llm_temperature = float(openai_llm_temp)
|
||||
|
||||
# Select Document loading tool (DOCLING, DEFAULT)
|
||||
args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")
|
||||
|
||||
|
|
|
|||
|
|
@ -238,6 +238,7 @@ def create_app(args):
|
|||
from lightrag.llm.binding_options import OllamaLLMOptions
|
||||
if args.llm_binding == "openai" or args.embedding_binding == "openai":
|
||||
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
||||
from lightrag.llm.binding_options import OpenAILLMOptions
|
||||
if args.llm_binding == "azure_openai" or args.embedding_binding == "azure_openai":
|
||||
from lightrag.llm.azure_openai import (
|
||||
azure_openai_complete_if_cache,
|
||||
|
|
@ -262,7 +263,14 @@ def create_app(args):
|
|||
kwargs["response_format"] = GPTKeywordExtractionFormat
|
||||
if history_messages is None:
|
||||
history_messages = []
|
||||
kwargs["temperature"] = args.temperature
|
||||
|
||||
# Use OpenAI LLM options if available, otherwise fallback to global temperature
|
||||
if args.llm_binding == "openai":
|
||||
openai_options = OpenAILLMOptions.options_dict(args)
|
||||
kwargs.update(openai_options)
|
||||
else:
|
||||
kwargs["temperature"] = args.temperature
|
||||
|
||||
return await openai_complete_if_cache(
|
||||
args.llm_model,
|
||||
prompt,
|
||||
|
|
@ -285,7 +293,14 @@ def create_app(args):
|
|||
kwargs["response_format"] = GPTKeywordExtractionFormat
|
||||
if history_messages is None:
|
||||
history_messages = []
|
||||
kwargs["temperature"] = args.temperature
|
||||
|
||||
# Use OpenAI LLM options if available, otherwise fallback to global temperature
|
||||
if args.llm_binding == "azure_openai":
|
||||
openai_options = OpenAILLMOptions.options_dict(args)
|
||||
kwargs.update(openai_options)
|
||||
else:
|
||||
kwargs["temperature"] = args.temperature
|
||||
|
||||
return await azure_openai_complete_if_cache(
|
||||
args.llm_model,
|
||||
prompt,
|
||||
|
|
|
|||
|
|
@ -287,25 +287,6 @@ class BindingOptions:
|
|||
return asdict(self)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Binding Options for Different LLM Providers
|
||||
# =============================================================================
|
||||
#
|
||||
# This section contains dataclass definitions for various LLM provider options.
|
||||
# Each binding option class inherits from BindingOptions and defines:
|
||||
# - _binding_name: Unique identifier for the binding
|
||||
# - Configuration parameters with default values
|
||||
# - _help: Dictionary mapping parameter names to help descriptions
|
||||
#
|
||||
# To add a new binding:
|
||||
# 1. Create a new dataclass inheriting from BindingOptions
|
||||
# 2. Set the _binding_name class variable
|
||||
# 3. Define configuration parameters as class attributes
|
||||
# 4. Add corresponding help strings in the _help dictionary
|
||||
#
|
||||
# =============================================================================
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Binding Options for Ollama
|
||||
# =============================================================================
|
||||
|
|
@ -407,23 +388,6 @@ class _OllamaOptionsMixin:
|
|||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Ollama Binding Options - Specialized Configurations
|
||||
# =============================================================================
|
||||
#
|
||||
# This section defines specialized binding option classes for different Ollama
|
||||
# use cases. Both classes inherit from OllamaOptionsMixin to share the complete
|
||||
# set of Ollama configuration parameters, while providing distinct binding names
|
||||
# for command-line argument generation and environment variable handling.
|
||||
#
|
||||
# OllamaEmbeddingOptions: Specialized for embedding tasks
|
||||
# OllamaLLMOptions: Specialized for language model/chat tasks
|
||||
#
|
||||
# Each class maintains its own binding name prefix, allowing users to configure
|
||||
# embedding and LLM options independently when both are used in the same application.
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class OllamaEmbeddingOptions(_OllamaOptionsMixin, BindingOptions):
|
||||
"""Options for Ollama embeddings with specialized configuration for embedding tasks."""
|
||||
|
|
@ -441,41 +405,46 @@ class OllamaLLMOptions(_OllamaOptionsMixin, BindingOptions):
|
|||
|
||||
|
||||
# =============================================================================
|
||||
# Additional LLM Provider Bindings
|
||||
# Binding Options for OpenAI
|
||||
# =============================================================================
|
||||
#
|
||||
# This section is where you can add binding options for other LLM providers.
|
||||
# Each new binding should follow the same pattern as the Ollama bindings above:
|
||||
#
|
||||
# 1. Create a dataclass that inherits from BindingOptions
|
||||
# 2. Set a unique _binding_name class variable (e.g., "openai", "anthropic")
|
||||
# 3. Define configuration parameters as class attributes with default values
|
||||
# 4. Add a _help class variable with descriptions for each parameter
|
||||
#
|
||||
# Example template for a new provider:
|
||||
#
|
||||
# @dataclass
|
||||
# class NewProviderOptions(BindingOptions):
|
||||
# """Options for NewProvider LLM binding."""
|
||||
#
|
||||
# _binding_name: ClassVar[str] = "newprovider"
|
||||
#
|
||||
# # Configuration parameters
|
||||
# api_key: str = ""
|
||||
# max_tokens: int = 1000
|
||||
# model: str = "default-model"
|
||||
#
|
||||
# # Help descriptions
|
||||
# _help: ClassVar[dict[str, str]] = {
|
||||
# "api_key": "API key for authentication",
|
||||
# "max_tokens": "Maximum tokens to generate",
|
||||
# "model": "Model name to use",
|
||||
# }
|
||||
# OpenAI binding options provide configuration for OpenAI's API and Azure OpenAI.
|
||||
# These options control model behavior, sampling parameters, and generation settings.
|
||||
# The parameters are based on OpenAI's API specification and provide fine-grained
|
||||
# control over model inference and generation.
|
||||
#
|
||||
# =============================================================================
|
||||
@dataclass
|
||||
class OpenAILLMOptions(BindingOptions):
|
||||
"""Options for OpenAI LLM with configuration for OpenAI and Azure OpenAI API calls."""
|
||||
|
||||
# mandatory name of binding
|
||||
_binding_name: ClassVar[str] = "openai_llm"
|
||||
|
||||
# Sampling and generation parameters
|
||||
frequency_penalty: float = 0.0 # Penalty for token frequency (-2.0 to 2.0)
|
||||
max_completion_tokens: int = None # Maximum number of tokens to generate
|
||||
presence_penalty: float = 0.0 # Penalty for token presence (-2.0 to 2.0)
|
||||
reasoning_effort: str = "medium" # Reasoning effort level (low, medium, high)
|
||||
safety_identifier: str = "" # Safety identifier for content filtering
|
||||
service_tier: str = "" # Service tier for API usage
|
||||
stop: List[str] = field(default_factory=list) # Stop sequences
|
||||
temperature: float = DEFAULT_TEMPERATURE # Controls randomness (0.0 to 2.0)
|
||||
top_p: float = 1.0 # Nucleus sampling parameter (0.0 to 1.0)
|
||||
|
||||
# Help descriptions
|
||||
_help: ClassVar[dict[str, str]] = {
|
||||
"frequency_penalty": "Penalty for token frequency (-2.0 to 2.0, positive values discourage repetition)",
|
||||
"max_completion_tokens": "Maximum number of tokens to generate (optional, leave empty for model default)",
|
||||
"presence_penalty": "Penalty for token presence (-2.0 to 2.0, positive values encourage new topics)",
|
||||
"reasoning_effort": "Reasoning effort level for o1 models (low, medium, high)",
|
||||
"safety_identifier": "Safety identifier for content filtering (optional)",
|
||||
"service_tier": "Service tier for API usage (optional)",
|
||||
"stop": 'Stop sequences (JSON array of strings, e.g., \'["</s>", "\\n\\n"]\')',
|
||||
"temperature": "Controls randomness (0.0-2.0, higher = more creative)",
|
||||
"top_p": "Nucleus sampling parameter (0.0-1.0, lower = more focused)",
|
||||
}
|
||||
|
||||
# TODO: Add binding options for additional LLM providers here
|
||||
# Common providers to consider: OpenAI, Anthropic, Cohere, Hugging Face, etc.
|
||||
|
||||
# =============================================================================
|
||||
# Main Section - For Testing and Sample Generation
|
||||
|
|
@ -505,10 +474,11 @@ if __name__ == "__main__":
|
|||
# dotenv.load_dotenv(stream=env_strstream)
|
||||
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "test":
|
||||
# Add arguments for OllamaEmbeddingOptions and OllamaLLMOptions
|
||||
parser = ArgumentParser(description="Test Ollama binding")
|
||||
# Add arguments for OllamaEmbeddingOptions, OllamaLLMOptions, and OpenAILLMOptions
|
||||
parser = ArgumentParser(description="Test binding options")
|
||||
OllamaEmbeddingOptions.add_args(parser)
|
||||
OllamaLLMOptions.add_args(parser)
|
||||
OpenAILLMOptions.add_args(parser)
|
||||
|
||||
# Parse arguments test
|
||||
args = parser.parse_args(
|
||||
|
|
@ -517,20 +487,36 @@ if __name__ == "__main__":
|
|||
"1024",
|
||||
"--ollama-llm-num_ctx",
|
||||
"2048",
|
||||
# "--ollama-llm-stop",
|
||||
# '["</s>", "\\n\\n"]',
|
||||
"--openai-llm-temperature",
|
||||
"0.7",
|
||||
"--openai-llm-max_completion_tokens",
|
||||
"1000",
|
||||
"--openai-llm-stop",
|
||||
'["</s>", "\\n\\n"]',
|
||||
]
|
||||
)
|
||||
print("Final args for LLM and Embedding:")
|
||||
print(f"{args}\n")
|
||||
|
||||
print("LLM options:")
|
||||
print("Ollama LLM options:")
|
||||
print(OllamaLLMOptions.options_dict(args))
|
||||
# print(OllamaLLMOptions(num_ctx=30000).asdict())
|
||||
|
||||
print("\nEmbedding options:")
|
||||
print("\nOllama Embedding options:")
|
||||
print(OllamaEmbeddingOptions.options_dict(args))
|
||||
# print(OllamaEmbeddingOptions(**embedding_options).asdict())
|
||||
|
||||
print("\nOpenAI LLM options:")
|
||||
print(OpenAILLMOptions.options_dict(args))
|
||||
|
||||
# Test creating OpenAI options instance
|
||||
openai_options = OpenAILLMOptions(
|
||||
temperature=0.8,
|
||||
max_completion_tokens=1500,
|
||||
frequency_penalty=0.1,
|
||||
presence_penalty=0.2,
|
||||
stop=["<|end|>", "\n\n"],
|
||||
)
|
||||
print("\nOpenAI LLM options instance:")
|
||||
print(openai_options.asdict())
|
||||
|
||||
else:
|
||||
print(BindingOptions.generate_dot_env_sample())
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue