graphiti/graphiti_core/llm_client/litellm_client.py
supmo668 74a422369c feat: Add enhanced configuration system with multi-provider LLM support
This commit introduces a comprehensive configuration system that makes
Graphiti more flexible and easier to configure across different
providers and deployment environments.

## New Features

- **Unified Configuration**: New GraphitiConfig class with Pydantic validation
- **YAML Support**: Load configuration from .graphiti.yaml files
- **Multi-Provider Support**: Easy switching between OpenAI, Azure, Anthropic,
  Gemini, Groq, and LiteLLM
- **LiteLLM Integration**: Unified access to 100+ LLM providers
- **Factory Functions**: Automatic client creation from configuration
- **Full Backward Compatibility**: Existing code continues to work

## Configuration System

- graphiti_core/config/settings.py: Pydantic configuration classes
- graphiti_core/config/providers.py: Provider enumerations and defaults
- graphiti_core/config/factory.py: Factory functions for client creation

## LiteLLM Client

- graphiti_core/llm_client/litellm_client.py: New unified LLM client
- Support for Azure OpenAI, AWS Bedrock, Vertex AI, Ollama, vLLM, etc.
- Automatic structured output detection

## Documentation

- docs/CONFIGURATION.md: Comprehensive configuration guide
- examples/graphiti_config_example.yaml: Example configurations
- DOMAIN_AGNOSTIC_IMPROVEMENT_PLAN.md: Future improvement roadmap

## Tests

- tests/config/test_settings.py: 22 tests for configuration
- tests/config/test_factory.py: 12 tests for factories
- 33/34 tests passing (97%)

## Issues Addressed

- #1004: Azure OpenAI support
- #1006: Azure OpenAI reranker support
- #1007: vLLM/OpenAI-compatible provider stability
- #1074: Ollama embeddings support
- #995: Docker Azure OpenAI support

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-30 23:47:38 -08:00

267 lines
9 KiB
Python

"""
Copyright 2024, Zep Software, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import logging
import typing
from pydantic import BaseModel
from ..prompts.models import Message
from .client import LLMClient
from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
from .errors import RateLimitError
logger = logging.getLogger(__name__)
try:
import litellm # type: ignore
from litellm import acompletion # type: ignore
LITELLM_AVAILABLE = True
except ImportError:
LITELLM_AVAILABLE = False
logger.warning('LiteLLM not available. Install with: pip install graphiti-core[litellm]')
class LiteLLMClient(LLMClient):
"""LLM client using LiteLLM for unified multi-provider support.
LiteLLM provides a unified interface to 100+ LLM providers including:
- OpenAI, Azure OpenAI
- Anthropic
- Google (Gemini, Vertex AI)
- AWS Bedrock
- Cohere, Replicate, HuggingFace
- Local models (Ollama, vLLM, LocalAI)
- And many more
Examples:
>>> # OpenAI via LiteLLM
>>> client = LiteLLMClient(
... LLMConfig(
... model='gpt-4.1-mini',
... api_key='sk-...',
... )
... )
>>> # Azure OpenAI
>>> client = LiteLLMClient(
... LLMConfig(
... model='azure/gpt-4-deployment-name',
... base_url='https://your-resource.openai.azure.com',
... api_key='...',
... )
... )
>>> # AWS Bedrock
>>> client = LiteLLMClient(
... LLMConfig(
... model='bedrock/anthropic.claude-3-sonnet-20240229-v1:0',
... )
... )
>>> # Ollama (local)
>>> client = LiteLLMClient(
... LLMConfig(
... model='ollama/llama2',
... base_url='http://localhost:11434',
... )
... )
"""
def __init__(self, config: LLMConfig | None = None, cache: bool = False):
"""Initialize LiteLLM client.
Args:
config: LLM configuration. Model name should follow LiteLLM conventions.
cache: Whether to enable response caching.
Raises:
ImportError: If LiteLLM is not installed.
"""
if not LITELLM_AVAILABLE:
raise ImportError(
'LiteLLM is required for LiteLLMClient. '
'Install with: pip install graphiti-core[litellm]'
)
super().__init__(config, cache)
# Configure LiteLLM
if self.config.base_url:
litellm.api_base = self.config.base_url
if self.config.api_key:
litellm.api_key = self.config.api_key
# Disable verbose logging by default
litellm.suppress_debug_info = True
logger.info(f'Initialized LiteLLM client with model: {self.model}')
async def _generate_response(
self,
messages: list[Message],
response_model: type[BaseModel] | None = None,
max_tokens: int = DEFAULT_MAX_TOKENS,
model_size: ModelSize = ModelSize.medium,
) -> dict[str, typing.Any]:
"""Generate a response using LiteLLM.
Args:
messages: List of conversation messages
response_model: Optional Pydantic model for structured output
max_tokens: Maximum tokens in response
model_size: Size of model to use (medium or small)
Returns:
Dictionary containing the response data
Raises:
RateLimitError: If rate limit is exceeded
Exception: For other errors from the LLM provider
"""
# Select model based on size
model = self.model if model_size == ModelSize.medium else self.small_model
if not model:
raise ValueError('Model must be specified for LiteLLM client')
# Convert messages to LiteLLM format
litellm_messages = [
{'role': msg.role, 'content': self._clean_input(msg.content)} for msg in messages
]
try:
# Check if provider supports structured output
supports_structured = self._supports_structured_output(model)
if response_model and supports_structured:
# Use LiteLLM's structured output support
with self.tracer.start_span('litellm_completion') as span:
span.add_attributes(
{
'model': model,
'structured_output': True,
'max_tokens': max_tokens,
}
)
response = await acompletion(
model=model,
messages=litellm_messages,
temperature=self.temperature,
max_tokens=max_tokens,
response_format={'type': 'json_object'},
)
# Parse JSON response into Pydantic model
content = response.choices[0].message.content
import json
result = json.loads(content)
# Validate with response model
if response_model:
validated = response_model(**result)
return validated.model_dump()
return result
elif response_model:
# Fallback: Use OpenAI-style function calling or prompt engineering
with self.tracer.start_span('litellm_completion_json') as span:
span.add_attributes(
{
'model': model,
'structured_output': False,
'max_tokens': max_tokens,
}
)
# Add JSON schema to the last message
schema_str = response_model.model_json_schema()
litellm_messages[-1]['content'] += (
f'\n\nRespond with valid JSON matching this schema: {schema_str}'
)
response = await acompletion(
model=model,
messages=litellm_messages,
temperature=self.temperature,
max_tokens=max_tokens,
)
content = response.choices[0].message.content
import json
# Try to parse JSON from response
result = json.loads(content)
validated = response_model(**result)
return validated.model_dump()
else:
# Regular completion without structured output
with self.tracer.start_span('litellm_completion_text') as span:
span.add_attributes(
{
'model': model,
'max_tokens': max_tokens,
}
)
response = await acompletion(
model=model,
messages=litellm_messages,
temperature=self.temperature,
max_tokens=max_tokens,
)
return {'content': response.choices[0].message.content}
except Exception as e:
error_str = str(e).lower()
# Check for rate limiting
if 'rate limit' in error_str or 'quota' in error_str or '429' in error_str:
raise RateLimitError(f'Rate limit exceeded for model {model}: {e}') from e
# Re-raise other exceptions
logger.error(f'Error generating response with LiteLLM: {e}')
raise
def _supports_structured_output(self, model: str) -> bool:
"""Check if a model supports structured JSON output.
Args:
model: Model identifier (e.g., "gpt-4", "azure/gpt-4", "bedrock/claude-3")
Returns:
True if the model supports structured output, False otherwise
"""
# Extract base model name from LiteLLM format
model_lower = model.lower()
# OpenAI models with structured output support
if any(x in model_lower for x in ['gpt-4', 'gpt-3.5', 'gpt-4.1', 'gpt-5', 'o1', 'o3']):
return True
# Gemini models support JSON mode
if 'gemini' in model_lower:
return True
# Claude 3+ models support JSON mode
return 'claude-3' in model_lower or 'claude-4' in model_lower