This commit introduces a comprehensive configuration system that makes Graphiti more flexible and easier to configure across different providers and deployment environments. ## New Features - **Unified Configuration**: New GraphitiConfig class with Pydantic validation - **YAML Support**: Load configuration from .graphiti.yaml files - **Multi-Provider Support**: Easy switching between OpenAI, Azure, Anthropic, Gemini, Groq, and LiteLLM - **LiteLLM Integration**: Unified access to 100+ LLM providers - **Factory Functions**: Automatic client creation from configuration - **Full Backward Compatibility**: Existing code continues to work ## Configuration System - graphiti_core/config/settings.py: Pydantic configuration classes - graphiti_core/config/providers.py: Provider enumerations and defaults - graphiti_core/config/factory.py: Factory functions for client creation ## LiteLLM Client - graphiti_core/llm_client/litellm_client.py: New unified LLM client - Support for Azure OpenAI, AWS Bedrock, Vertex AI, Ollama, vLLM, etc. - Automatic structured output detection ## Documentation - docs/CONFIGURATION.md: Comprehensive configuration guide - examples/graphiti_config_example.yaml: Example configurations - DOMAIN_AGNOSTIC_IMPROVEMENT_PLAN.md: Future improvement roadmap ## Tests - tests/config/test_settings.py: 22 tests for configuration - tests/config/test_factory.py: 12 tests for factories - 33/34 tests passing (97%) ## Issues Addressed - #1004: Azure OpenAI support - #1006: Azure OpenAI reranker support - #1007: vLLM/OpenAI-compatible provider stability - #1074: Ollama embeddings support - #995: Docker Azure OpenAI support 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
267 lines
9 KiB
Python
267 lines
9 KiB
Python
"""
|
|
Copyright 2024, Zep Software, Inc.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
"""
|
|
|
|
import logging
|
|
import typing
|
|
|
|
from pydantic import BaseModel
|
|
|
|
from ..prompts.models import Message
|
|
from .client import LLMClient
|
|
from .config import DEFAULT_MAX_TOKENS, LLMConfig, ModelSize
|
|
from .errors import RateLimitError
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
try:
|
|
import litellm # type: ignore
|
|
from litellm import acompletion # type: ignore
|
|
|
|
LITELLM_AVAILABLE = True
|
|
except ImportError:
|
|
LITELLM_AVAILABLE = False
|
|
logger.warning('LiteLLM not available. Install with: pip install graphiti-core[litellm]')
|
|
|
|
|
|
class LiteLLMClient(LLMClient):
|
|
"""LLM client using LiteLLM for unified multi-provider support.
|
|
|
|
LiteLLM provides a unified interface to 100+ LLM providers including:
|
|
- OpenAI, Azure OpenAI
|
|
- Anthropic
|
|
- Google (Gemini, Vertex AI)
|
|
- AWS Bedrock
|
|
- Cohere, Replicate, HuggingFace
|
|
- Local models (Ollama, vLLM, LocalAI)
|
|
- And many more
|
|
|
|
Examples:
|
|
>>> # OpenAI via LiteLLM
|
|
>>> client = LiteLLMClient(
|
|
... LLMConfig(
|
|
... model='gpt-4.1-mini',
|
|
... api_key='sk-...',
|
|
... )
|
|
... )
|
|
|
|
>>> # Azure OpenAI
|
|
>>> client = LiteLLMClient(
|
|
... LLMConfig(
|
|
... model='azure/gpt-4-deployment-name',
|
|
... base_url='https://your-resource.openai.azure.com',
|
|
... api_key='...',
|
|
... )
|
|
... )
|
|
|
|
>>> # AWS Bedrock
|
|
>>> client = LiteLLMClient(
|
|
... LLMConfig(
|
|
... model='bedrock/anthropic.claude-3-sonnet-20240229-v1:0',
|
|
... )
|
|
... )
|
|
|
|
>>> # Ollama (local)
|
|
>>> client = LiteLLMClient(
|
|
... LLMConfig(
|
|
... model='ollama/llama2',
|
|
... base_url='http://localhost:11434',
|
|
... )
|
|
... )
|
|
"""
|
|
|
|
def __init__(self, config: LLMConfig | None = None, cache: bool = False):
|
|
"""Initialize LiteLLM client.
|
|
|
|
Args:
|
|
config: LLM configuration. Model name should follow LiteLLM conventions.
|
|
cache: Whether to enable response caching.
|
|
|
|
Raises:
|
|
ImportError: If LiteLLM is not installed.
|
|
"""
|
|
if not LITELLM_AVAILABLE:
|
|
raise ImportError(
|
|
'LiteLLM is required for LiteLLMClient. '
|
|
'Install with: pip install graphiti-core[litellm]'
|
|
)
|
|
|
|
super().__init__(config, cache)
|
|
|
|
# Configure LiteLLM
|
|
if self.config.base_url:
|
|
litellm.api_base = self.config.base_url
|
|
|
|
if self.config.api_key:
|
|
litellm.api_key = self.config.api_key
|
|
|
|
# Disable verbose logging by default
|
|
litellm.suppress_debug_info = True
|
|
|
|
logger.info(f'Initialized LiteLLM client with model: {self.model}')
|
|
|
|
async def _generate_response(
|
|
self,
|
|
messages: list[Message],
|
|
response_model: type[BaseModel] | None = None,
|
|
max_tokens: int = DEFAULT_MAX_TOKENS,
|
|
model_size: ModelSize = ModelSize.medium,
|
|
) -> dict[str, typing.Any]:
|
|
"""Generate a response using LiteLLM.
|
|
|
|
Args:
|
|
messages: List of conversation messages
|
|
response_model: Optional Pydantic model for structured output
|
|
max_tokens: Maximum tokens in response
|
|
model_size: Size of model to use (medium or small)
|
|
|
|
Returns:
|
|
Dictionary containing the response data
|
|
|
|
Raises:
|
|
RateLimitError: If rate limit is exceeded
|
|
Exception: For other errors from the LLM provider
|
|
"""
|
|
# Select model based on size
|
|
model = self.model if model_size == ModelSize.medium else self.small_model
|
|
|
|
if not model:
|
|
raise ValueError('Model must be specified for LiteLLM client')
|
|
|
|
# Convert messages to LiteLLM format
|
|
litellm_messages = [
|
|
{'role': msg.role, 'content': self._clean_input(msg.content)} for msg in messages
|
|
]
|
|
|
|
try:
|
|
# Check if provider supports structured output
|
|
supports_structured = self._supports_structured_output(model)
|
|
|
|
if response_model and supports_structured:
|
|
# Use LiteLLM's structured output support
|
|
with self.tracer.start_span('litellm_completion') as span:
|
|
span.add_attributes(
|
|
{
|
|
'model': model,
|
|
'structured_output': True,
|
|
'max_tokens': max_tokens,
|
|
}
|
|
)
|
|
|
|
response = await acompletion(
|
|
model=model,
|
|
messages=litellm_messages,
|
|
temperature=self.temperature,
|
|
max_tokens=max_tokens,
|
|
response_format={'type': 'json_object'},
|
|
)
|
|
|
|
# Parse JSON response into Pydantic model
|
|
content = response.choices[0].message.content
|
|
import json
|
|
|
|
result = json.loads(content)
|
|
|
|
# Validate with response model
|
|
if response_model:
|
|
validated = response_model(**result)
|
|
return validated.model_dump()
|
|
|
|
return result
|
|
|
|
elif response_model:
|
|
# Fallback: Use OpenAI-style function calling or prompt engineering
|
|
with self.tracer.start_span('litellm_completion_json') as span:
|
|
span.add_attributes(
|
|
{
|
|
'model': model,
|
|
'structured_output': False,
|
|
'max_tokens': max_tokens,
|
|
}
|
|
)
|
|
|
|
# Add JSON schema to the last message
|
|
schema_str = response_model.model_json_schema()
|
|
litellm_messages[-1]['content'] += (
|
|
f'\n\nRespond with valid JSON matching this schema: {schema_str}'
|
|
)
|
|
|
|
response = await acompletion(
|
|
model=model,
|
|
messages=litellm_messages,
|
|
temperature=self.temperature,
|
|
max_tokens=max_tokens,
|
|
)
|
|
|
|
content = response.choices[0].message.content
|
|
import json
|
|
|
|
# Try to parse JSON from response
|
|
result = json.loads(content)
|
|
validated = response_model(**result)
|
|
return validated.model_dump()
|
|
|
|
else:
|
|
# Regular completion without structured output
|
|
with self.tracer.start_span('litellm_completion_text') as span:
|
|
span.add_attributes(
|
|
{
|
|
'model': model,
|
|
'max_tokens': max_tokens,
|
|
}
|
|
)
|
|
|
|
response = await acompletion(
|
|
model=model,
|
|
messages=litellm_messages,
|
|
temperature=self.temperature,
|
|
max_tokens=max_tokens,
|
|
)
|
|
|
|
return {'content': response.choices[0].message.content}
|
|
|
|
except Exception as e:
|
|
error_str = str(e).lower()
|
|
|
|
# Check for rate limiting
|
|
if 'rate limit' in error_str or 'quota' in error_str or '429' in error_str:
|
|
raise RateLimitError(f'Rate limit exceeded for model {model}: {e}') from e
|
|
|
|
# Re-raise other exceptions
|
|
logger.error(f'Error generating response with LiteLLM: {e}')
|
|
raise
|
|
|
|
def _supports_structured_output(self, model: str) -> bool:
|
|
"""Check if a model supports structured JSON output.
|
|
|
|
Args:
|
|
model: Model identifier (e.g., "gpt-4", "azure/gpt-4", "bedrock/claude-3")
|
|
|
|
Returns:
|
|
True if the model supports structured output, False otherwise
|
|
"""
|
|
# Extract base model name from LiteLLM format
|
|
model_lower = model.lower()
|
|
|
|
# OpenAI models with structured output support
|
|
if any(x in model_lower for x in ['gpt-4', 'gpt-3.5', 'gpt-4.1', 'gpt-5', 'o1', 'o3']):
|
|
return True
|
|
|
|
# Gemini models support JSON mode
|
|
if 'gemini' in model_lower:
|
|
return True
|
|
|
|
# Claude 3+ models support JSON mode
|
|
return 'claude-3' in model_lower or 'claude-4' in model_lower
|