feat: Add extra_body parameter support for OpenRouter/vLLM compatibility
- Enhanced add_args function to handle dict types with JSON parsing - Added reasoning and extra_body parameters for OpenRouter/vLLM compatibility - Updated env.example with OpenRouter/vLLM parameter examples
This commit is contained in:
parent
5d34007f2c
commit
4b2ef71c25
2 changed files with 139 additions and 4 deletions
|
|
@ -153,7 +153,13 @@ LLM_BINDING_API_KEY=your_api_key
|
||||||
# OPENAI_LLM_PRESENCE_PENALTY=1.5
|
# OPENAI_LLM_PRESENCE_PENALTY=1.5
|
||||||
### If the presence penalty still can not stop the model from generates repetitive or unconstrained output
|
### If the presence penalty still can not stop the model from generates repetitive or unconstrained output
|
||||||
# OPENAI_LLM_MAX_COMPLETION_TOKENS=16384
|
# OPENAI_LLM_MAX_COMPLETION_TOKENS=16384
|
||||||
### use the following command to see all support options for openai and azure_openai
|
|
||||||
|
### OpenRouter Specific Parameters
|
||||||
|
# OPENAI_LLM_EXTRA_BODY='{"reasoning": {"enabled": false}}'
|
||||||
|
### Qwen3 Specific Parameters depoly by vLLM
|
||||||
|
# OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
|
||||||
|
|
||||||
|
### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
|
||||||
### lightrag-server --llm-binding openai --help
|
### lightrag-server --llm-binding openai --help
|
||||||
|
|
||||||
### Ollama Server Specific Parameters
|
### Ollama Server Specific Parameters
|
||||||
|
|
|
||||||
|
|
@ -99,7 +99,7 @@ class BindingOptions:
|
||||||
group = parser.add_argument_group(f"{cls._binding_name} binding options")
|
group = parser.add_argument_group(f"{cls._binding_name} binding options")
|
||||||
for arg_item in cls.args_env_name_type_value():
|
for arg_item in cls.args_env_name_type_value():
|
||||||
# Handle JSON parsing for list types
|
# Handle JSON parsing for list types
|
||||||
if arg_item["type"] == List[str]:
|
if arg_item["type"] is List[str]:
|
||||||
|
|
||||||
def json_list_parser(value):
|
def json_list_parser(value):
|
||||||
try:
|
try:
|
||||||
|
|
@ -126,6 +126,34 @@ class BindingOptions:
|
||||||
default=env_value,
|
default=env_value,
|
||||||
help=arg_item["help"],
|
help=arg_item["help"],
|
||||||
)
|
)
|
||||||
|
# Handle JSON parsing for dict types
|
||||||
|
elif arg_item["type"] is dict:
|
||||||
|
|
||||||
|
def json_dict_parser(value):
|
||||||
|
try:
|
||||||
|
parsed = json.loads(value)
|
||||||
|
if not isinstance(parsed, dict):
|
||||||
|
raise argparse.ArgumentTypeError(
|
||||||
|
f"Expected JSON object, got {type(parsed).__name__}"
|
||||||
|
)
|
||||||
|
return parsed
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
raise argparse.ArgumentTypeError(f"Invalid JSON: {e}")
|
||||||
|
|
||||||
|
# Get environment variable with JSON parsing
|
||||||
|
env_value = get_env_value(f"{arg_item['env_name']}", argparse.SUPPRESS)
|
||||||
|
if env_value is not argparse.SUPPRESS:
|
||||||
|
try:
|
||||||
|
env_value = json_dict_parser(env_value)
|
||||||
|
except argparse.ArgumentTypeError:
|
||||||
|
env_value = argparse.SUPPRESS
|
||||||
|
|
||||||
|
group.add_argument(
|
||||||
|
f"--{arg_item['argname']}",
|
||||||
|
type=json_dict_parser,
|
||||||
|
default=env_value,
|
||||||
|
help=arg_item["help"],
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
group.add_argument(
|
group.add_argument(
|
||||||
f"--{arg_item['argname']}",
|
f"--{arg_item['argname']}",
|
||||||
|
|
@ -234,8 +262,8 @@ class BindingOptions:
|
||||||
if arg_item["help"]:
|
if arg_item["help"]:
|
||||||
sample_stream.write(f"# {arg_item['help']}\n")
|
sample_stream.write(f"# {arg_item['help']}\n")
|
||||||
|
|
||||||
# Handle JSON formatting for list types
|
# Handle JSON formatting for list and dict types
|
||||||
if arg_item["type"] == List[str]:
|
if arg_item["type"] is List[str] or arg_item["type"] is dict:
|
||||||
default_value = json.dumps(arg_item["default"])
|
default_value = json.dumps(arg_item["default"])
|
||||||
else:
|
else:
|
||||||
default_value = arg_item["default"]
|
default_value = arg_item["default"]
|
||||||
|
|
@ -431,6 +459,8 @@ class OpenAILLMOptions(BindingOptions):
|
||||||
stop: List[str] = field(default_factory=list) # Stop sequences
|
stop: List[str] = field(default_factory=list) # Stop sequences
|
||||||
temperature: float = DEFAULT_TEMPERATURE # Controls randomness (0.0 to 2.0)
|
temperature: float = DEFAULT_TEMPERATURE # Controls randomness (0.0 to 2.0)
|
||||||
top_p: float = 1.0 # Nucleus sampling parameter (0.0 to 1.0)
|
top_p: float = 1.0 # Nucleus sampling parameter (0.0 to 1.0)
|
||||||
|
max_tokens: int = None # Maximum number of tokens to generate(deprecated, use max_completion_tokens instead)
|
||||||
|
extra_body: dict = None # Extra body parameters for OpenRouter of vLLM
|
||||||
|
|
||||||
# Help descriptions
|
# Help descriptions
|
||||||
_help: ClassVar[dict[str, str]] = {
|
_help: ClassVar[dict[str, str]] = {
|
||||||
|
|
@ -443,6 +473,8 @@ class OpenAILLMOptions(BindingOptions):
|
||||||
"stop": 'Stop sequences (JSON array of strings, e.g., \'["</s>", "\\n\\n"]\')',
|
"stop": 'Stop sequences (JSON array of strings, e.g., \'["</s>", "\\n\\n"]\')',
|
||||||
"temperature": "Controls randomness (0.0-2.0, higher = more creative)",
|
"temperature": "Controls randomness (0.0-2.0, higher = more creative)",
|
||||||
"top_p": "Nucleus sampling parameter (0.0-1.0, lower = more focused)",
|
"top_p": "Nucleus sampling parameter (0.0-1.0, lower = more focused)",
|
||||||
|
"max_tokens": "Maximum number of tokens to generate (deprecated, use max_completion_tokens instead)",
|
||||||
|
"extra_body": 'Extra body parameters for OpenRouter of vLLM (JSON dict, e.g., \'"reasoning": {"reasoning": {"enabled": false}}\')',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -493,6 +525,8 @@ if __name__ == "__main__":
|
||||||
"1000",
|
"1000",
|
||||||
"--openai-llm-stop",
|
"--openai-llm-stop",
|
||||||
'["</s>", "\\n\\n"]',
|
'["</s>", "\\n\\n"]',
|
||||||
|
"--openai-llm-reasoning",
|
||||||
|
'{"effort": "high", "max_tokens": 2000, "exclude": false, "enabled": true}',
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
print("Final args for LLM and Embedding:")
|
print("Final args for LLM and Embedding:")
|
||||||
|
|
@ -518,5 +552,100 @@ if __name__ == "__main__":
|
||||||
print("\nOpenAI LLM options instance:")
|
print("\nOpenAI LLM options instance:")
|
||||||
print(openai_options.asdict())
|
print(openai_options.asdict())
|
||||||
|
|
||||||
|
# Test creating OpenAI options instance with reasoning parameter
|
||||||
|
openai_options_with_reasoning = OpenAILLMOptions(
|
||||||
|
temperature=0.9,
|
||||||
|
max_completion_tokens=2000,
|
||||||
|
reasoning={
|
||||||
|
"effort": "medium",
|
||||||
|
"max_tokens": 1500,
|
||||||
|
"exclude": True,
|
||||||
|
"enabled": True,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
print("\nOpenAI LLM options instance with reasoning:")
|
||||||
|
print(openai_options_with_reasoning.asdict())
|
||||||
|
|
||||||
|
# Test dict parsing functionality
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("TESTING DICT PARSING FUNCTIONALITY")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
# Test valid JSON dict parsing
|
||||||
|
test_parser = ArgumentParser(description="Test dict parsing")
|
||||||
|
OpenAILLMOptions.add_args(test_parser)
|
||||||
|
|
||||||
|
try:
|
||||||
|
test_args = test_parser.parse_args(
|
||||||
|
["--openai-llm-reasoning", '{"effort": "low", "max_tokens": 1000}']
|
||||||
|
)
|
||||||
|
print("✓ Valid JSON dict parsing successful:")
|
||||||
|
print(
|
||||||
|
f" Parsed reasoning: {OpenAILLMOptions.options_dict(test_args)['reasoning']}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Valid JSON dict parsing failed: {e}")
|
||||||
|
|
||||||
|
# Test invalid JSON dict parsing
|
||||||
|
try:
|
||||||
|
test_args = test_parser.parse_args(
|
||||||
|
[
|
||||||
|
"--openai-llm-reasoning",
|
||||||
|
'{"effort": "low", "max_tokens": 1000', # Missing closing brace
|
||||||
|
]
|
||||||
|
)
|
||||||
|
print("✗ Invalid JSON should have failed but didn't")
|
||||||
|
except SystemExit:
|
||||||
|
print("✓ Invalid JSON dict parsing correctly rejected")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✓ Invalid JSON dict parsing correctly rejected: {e}")
|
||||||
|
|
||||||
|
# Test non-dict JSON parsing
|
||||||
|
try:
|
||||||
|
test_args = test_parser.parse_args(
|
||||||
|
[
|
||||||
|
"--openai-llm-reasoning",
|
||||||
|
'["not", "a", "dict"]', # Array instead of dict
|
||||||
|
]
|
||||||
|
)
|
||||||
|
print("✗ Non-dict JSON should have failed but didn't")
|
||||||
|
except SystemExit:
|
||||||
|
print("✓ Non-dict JSON parsing correctly rejected")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✓ Non-dict JSON parsing correctly rejected: {e}")
|
||||||
|
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("TESTING ENVIRONMENT VARIABLE SUPPORT")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
# Test environment variable support for dict
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["OPENAI_LLM_REASONING"] = (
|
||||||
|
'{"effort": "high", "max_tokens": 3000, "exclude": false}'
|
||||||
|
)
|
||||||
|
|
||||||
|
env_parser = ArgumentParser(description="Test env var dict parsing")
|
||||||
|
OpenAILLMOptions.add_args(env_parser)
|
||||||
|
|
||||||
|
try:
|
||||||
|
env_args = env_parser.parse_args(
|
||||||
|
[]
|
||||||
|
) # No command line args, should use env var
|
||||||
|
reasoning_from_env = OpenAILLMOptions.options_dict(env_args).get(
|
||||||
|
"reasoning"
|
||||||
|
)
|
||||||
|
if reasoning_from_env:
|
||||||
|
print("✓ Environment variable dict parsing successful:")
|
||||||
|
print(f" Parsed reasoning from env: {reasoning_from_env}")
|
||||||
|
else:
|
||||||
|
print("✗ Environment variable dict parsing failed: No reasoning found")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Environment variable dict parsing failed: {e}")
|
||||||
|
finally:
|
||||||
|
# Clean up environment variable
|
||||||
|
if "OPENAI_LLM_REASONING" in os.environ:
|
||||||
|
del os.environ["OPENAI_LLM_REASONING"]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print(BindingOptions.generate_dot_env_sample())
|
print(BindingOptions.generate_dot_env_sample())
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue