fix: handle reasoning_effort parameter gracefully across models

- Set litellm.drop_params=True to auto-drop unsupported parameters
- Changed reasoning_effort from extra_body to direct parameter
- Added reasoning_effort to both async and sync methods
- Removed redundant retry logic for unsupported parameters
- Ensures compatibility with models that don't support reasoning_effort

This fixes errors when using models that don't support the reasoning_effort
parameter while maintaining the optimization for models that do support it.
This commit is contained in:
oryx1729 2025-09-20 16:35:51 -07:00
parent f14751bca7
commit 766b300fbc

View file

@ -29,6 +29,8 @@ observe = get_observe()
logger = get_logger()
# litellm to drop unsupported params, e.g., reasoning_effort when not supported by the model.
litellm.drop_params = True
class OpenAIAdapter(LLMInterface):
"""
@ -132,39 +134,13 @@ class OpenAIAdapter(LLMInterface):
api_version=self.api_version,
response_model=response_model,
max_retries=self.MAX_RETRIES,
extra_body={"reasoning_effort": "minimal"},
reasoning_effort="minimal",
)
except (
ContentFilterFinishReasonError,
ContentPolicyViolationError,
InstructorRetryException,
) as error:
if (
isinstance(error, InstructorRetryException)
and "content management policy" not in str(error).lower()
):
logger.debug(
"LLM Model does not support reasoning_effort parameter, trying call without the parameter."
)
return await self.aclient.chat.completions.create(
model=self.model,
messages=[
{
"role": "user",
"content": f"""{text_input}""",
},
{
"role": "system",
"content": system_prompt,
},
],
api_key=self.api_key,
api_base=self.endpoint,
api_version=self.api_version,
response_model=response_model,
max_retries=self.MAX_RETRIES,
)
if not (self.fallback_model and self.fallback_api_key):
raise ContentPolicyFilterError(
f"The provided input contains content that is not aligned with our content policy: {text_input}"
@ -246,6 +222,7 @@ class OpenAIAdapter(LLMInterface):
api_base=self.endpoint,
api_version=self.api_version,
response_model=response_model,
reasoning_effort="minimal",
max_retries=self.MAX_RETRIES,
)