Remove legacy storage implementations and deprecated examples: - Delete FAISS, JSON, Memgraph, Milvus, MongoDB, Nano Vector DB, Neo4j, NetworkX, Qdrant, Redis storage backends - Remove Kubernetes deployment manifests and installation scripts - Delete unofficial examples for deprecated backends and offline deployment docs Streamline core infrastructure: - Consolidate storage layer to PostgreSQL-only implementation - Add full-text search caching with FTS cache module - Implement metrics collection and monitoring pipeline - Add explain and metrics API routes Modernize frontend and tooling: - Switch web UI to Bun with bun.lock, remove npm and pnpm lockfiles - Update Dockerfile for PostgreSQL-only deployment - Add Makefile for common development tasks - Update environment and configuration examples Enhance evaluation and testing capabilities: - Add prompt optimization with DSPy and auto-tuning - Implement ground truth regeneration and variant testing - Add prompt debugging and response comparison utilities - Expand test coverage with new integration scenarios Simplify dependencies and configuration: - Remove offline-specific requirement files - Update pyproject.toml with streamlined dependencies - Add Python version pinning with .python-version - Create project guidelines in CLAUDE.md and AGENTS.md
325 lines
10 KiB
Python
325 lines
10 KiB
Python
import logging
|
|
import os
|
|
from collections.abc import AsyncIterator
|
|
from typing import Any, cast
|
|
|
|
import numpy as np
|
|
import pipmaster as pm # Pipmaster for dynamic library install
|
|
|
|
# Install Anthropic SDK if not present
|
|
if not pm.is_installed('anthropic'):
|
|
pm.install('anthropic')
|
|
|
|
# Add Voyage AI import
|
|
if not pm.is_installed('voyageai'):
|
|
pm.install('voyageai')
|
|
|
|
from anthropic import (
|
|
APIConnectionError,
|
|
APITimeoutError,
|
|
AsyncAnthropic,
|
|
RateLimitError,
|
|
)
|
|
from tenacity import (
|
|
retry,
|
|
retry_if_exception_type,
|
|
stop_after_attempt,
|
|
wait_exponential,
|
|
)
|
|
from voyageai.client import Client as VoyageClient
|
|
|
|
from lightrag.api import __api_version__
|
|
from lightrag.utils import (
|
|
VERBOSE_DEBUG,
|
|
logger,
|
|
safe_unicode_decode,
|
|
verbose_debug,
|
|
)
|
|
|
|
|
|
# Custom exception for retry mechanism
|
|
class InvalidResponseError(Exception):
|
|
"""Custom exception class for triggering retry mechanism"""
|
|
|
|
pass
|
|
|
|
|
|
_RETRYABLE_EXCEPTIONS = cast(
|
|
tuple[type[BaseException], ...],
|
|
(RateLimitError, APIConnectionError, APITimeoutError, InvalidResponseError),
|
|
)
|
|
_RETRYABLE_EMBED_EXCEPTIONS = cast(
|
|
tuple[type[BaseException], ...],
|
|
(RateLimitError, APIConnectionError, APITimeoutError),
|
|
)
|
|
|
|
|
|
# Core Anthropic completion function with retry
|
|
@retry(
|
|
stop=stop_after_attempt(3),
|
|
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
retry=retry_if_exception_type(_RETRYABLE_EXCEPTIONS),
|
|
)
|
|
async def anthropic_complete_if_cache(
|
|
model: str,
|
|
prompt: str,
|
|
system_prompt: str | None = None,
|
|
history_messages: list[dict[str, Any]] | None = None,
|
|
enable_cot: bool = False,
|
|
base_url: str | None = None,
|
|
api_key: str | None = None,
|
|
**kwargs: Any,
|
|
) -> str | AsyncIterator[str]:
|
|
if history_messages is None:
|
|
history_messages = []
|
|
if enable_cot:
|
|
logger.debug('enable_cot=True is not supported for the Anthropic API and will be ignored.')
|
|
if not api_key:
|
|
api_key = os.environ.get('ANTHROPIC_API_KEY')
|
|
|
|
default_headers = {
|
|
'User-Agent': f'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}',
|
|
'Content-Type': 'application/json',
|
|
}
|
|
|
|
# Set logger level to INFO when VERBOSE_DEBUG is off
|
|
if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
|
|
logging.getLogger('anthropic').setLevel(logging.INFO)
|
|
|
|
kwargs.pop('hashing_kv', None)
|
|
kwargs.pop('keyword_extraction', None)
|
|
timeout = kwargs.pop('timeout', None)
|
|
|
|
anthropic_cls = cast(Any, AsyncAnthropic)
|
|
anthropic_async_client = (
|
|
anthropic_cls(default_headers=default_headers, api_key=api_key, timeout=timeout)
|
|
if base_url is None
|
|
else anthropic_cls(
|
|
base_url=base_url,
|
|
default_headers=default_headers,
|
|
api_key=api_key,
|
|
timeout=timeout,
|
|
)
|
|
)
|
|
|
|
messages: list[dict[str, Any]] = []
|
|
if system_prompt:
|
|
messages.append({'role': 'system', 'content': system_prompt})
|
|
messages.extend(history_messages)
|
|
messages.append({'role': 'user', 'content': prompt})
|
|
|
|
logger.debug('===== Sending Query to Anthropic LLM =====')
|
|
logger.debug(f'Model: {model} Base URL: {base_url}')
|
|
logger.debug(f'Additional kwargs: {kwargs}')
|
|
verbose_debug(f'Query: {prompt}')
|
|
verbose_debug(f'System prompt: {system_prompt}')
|
|
|
|
try:
|
|
response = await anthropic_async_client.messages.create(model=model, messages=messages, stream=True, **kwargs)
|
|
except APITimeoutError as e:
|
|
# Must catch before APIConnectionError (APITimeoutError is a subclass)
|
|
logger.error(f'Anthropic API Timeout Error: {e}')
|
|
raise
|
|
except APIConnectionError as e:
|
|
logger.error(f'Anthropic API Connection Error: {e}')
|
|
raise
|
|
except RateLimitError as e:
|
|
logger.error(f'Anthropic API Rate Limit Error: {e}')
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f'Anthropic API Call Failed,\nModel: {model},\nParams: {kwargs}, Got: {e}')
|
|
raise
|
|
|
|
async def stream_response():
|
|
try:
|
|
async for event in response:
|
|
content = event.delta.text if hasattr(event, 'delta') and event.delta.text else None
|
|
if content is None:
|
|
continue
|
|
if r'\u' in content:
|
|
content = safe_unicode_decode(content.encode('utf-8'))
|
|
yield content
|
|
except Exception as e:
|
|
logger.error(f'Error in stream response: {e!s}')
|
|
raise
|
|
|
|
return stream_response()
|
|
|
|
|
|
# Generic Anthropic completion function
|
|
async def anthropic_complete(
|
|
prompt: str,
|
|
system_prompt: str | None = None,
|
|
history_messages: list[dict[str, Any]] | None = None,
|
|
enable_cot: bool = False,
|
|
**kwargs: Any,
|
|
) -> str | AsyncIterator[str]:
|
|
if history_messages is None:
|
|
history_messages = []
|
|
model_name = kwargs['hashing_kv'].global_config['llm_model_name']
|
|
return await anthropic_complete_if_cache(
|
|
model_name,
|
|
prompt,
|
|
system_prompt=system_prompt,
|
|
history_messages=history_messages,
|
|
enable_cot=enable_cot,
|
|
**kwargs,
|
|
)
|
|
|
|
|
|
# Claude 3 Opus specific completion
|
|
async def claude_3_opus_complete(
|
|
prompt: str,
|
|
system_prompt: str | None = None,
|
|
history_messages: list[dict[str, Any]] | None = None,
|
|
enable_cot: bool = False,
|
|
**kwargs: Any,
|
|
) -> str | AsyncIterator[str]:
|
|
if history_messages is None:
|
|
history_messages = []
|
|
return await anthropic_complete_if_cache(
|
|
'claude-3-opus-20240229',
|
|
prompt,
|
|
system_prompt=system_prompt,
|
|
history_messages=history_messages,
|
|
enable_cot=enable_cot,
|
|
**kwargs,
|
|
)
|
|
|
|
|
|
# Claude 3 Sonnet specific completion
|
|
async def claude_3_sonnet_complete(
|
|
prompt: str,
|
|
system_prompt: str | None = None,
|
|
history_messages: list[dict[str, Any]] | None = None,
|
|
enable_cot: bool = False,
|
|
**kwargs: Any,
|
|
) -> str | AsyncIterator[str]:
|
|
if history_messages is None:
|
|
history_messages = []
|
|
return await anthropic_complete_if_cache(
|
|
'claude-3-sonnet-20240229',
|
|
prompt,
|
|
system_prompt=system_prompt,
|
|
history_messages=history_messages,
|
|
enable_cot=enable_cot,
|
|
**kwargs,
|
|
)
|
|
|
|
|
|
# Claude 3 Haiku specific completion
|
|
async def claude_3_haiku_complete(
|
|
prompt: str,
|
|
system_prompt: str | None = None,
|
|
history_messages: list[dict[str, Any]] | None = None,
|
|
enable_cot: bool = False,
|
|
**kwargs: Any,
|
|
) -> str | AsyncIterator[str]:
|
|
if history_messages is None:
|
|
history_messages = []
|
|
return await anthropic_complete_if_cache(
|
|
'claude-3-haiku-20240307',
|
|
prompt,
|
|
system_prompt=system_prompt,
|
|
history_messages=history_messages,
|
|
enable_cot=enable_cot,
|
|
**kwargs,
|
|
)
|
|
|
|
|
|
# Embedding function (placeholder, as Anthropic does not provide embeddings)
|
|
@retry(
|
|
stop=stop_after_attempt(3),
|
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
|
retry=retry_if_exception_type(_RETRYABLE_EMBED_EXCEPTIONS),
|
|
)
|
|
async def anthropic_embed(
|
|
texts: list[str],
|
|
model: str = 'voyage-3', # Default to voyage-3 as a good general-purpose model
|
|
base_url: str | None = None,
|
|
api_key: str | None = None,
|
|
) -> np.ndarray:
|
|
"""
|
|
Generate embeddings using Voyage AI since Anthropic doesn't provide native embedding support.
|
|
|
|
Args:
|
|
texts: List of text strings to embed
|
|
model: Voyage AI model name (e.g., "voyage-3", "voyage-3-large", "voyage-code-3")
|
|
base_url: Optional custom base URL (not used for Voyage AI)
|
|
api_key: API key for Voyage AI (defaults to VOYAGE_API_KEY environment variable)
|
|
|
|
Returns:
|
|
numpy array of shape (len(texts), embedding_dimension) containing the embeddings
|
|
"""
|
|
if not api_key:
|
|
api_key = os.environ.get('VOYAGE_API_KEY')
|
|
if not api_key:
|
|
logger.error('VOYAGE_API_KEY environment variable not set')
|
|
raise ValueError('VOYAGE_API_KEY environment variable is required for embeddings')
|
|
|
|
try:
|
|
# Initialize Voyage AI client
|
|
voyage_client = VoyageClient(api_key=api_key)
|
|
|
|
# Get embeddings
|
|
result = voyage_client.embed(
|
|
texts,
|
|
model=model,
|
|
input_type='document', # Assuming document context; could be made configurable
|
|
)
|
|
|
|
# Convert list of embeddings to numpy array
|
|
embeddings = np.array(result.embeddings, dtype=np.float32)
|
|
|
|
logger.debug(f'Generated embeddings for {len(texts)} texts using {model}')
|
|
verbose_debug(f'Embedding shape: {embeddings.shape}')
|
|
|
|
return embeddings
|
|
|
|
except Exception as e:
|
|
logger.error(f'Voyage AI embedding failed: {e!s}')
|
|
raise
|
|
|
|
|
|
# Optional: a helper function to get available embedding models
|
|
def get_available_embedding_models() -> dict[str, dict]:
|
|
"""
|
|
Returns a dictionary of available Voyage AI embedding models and their properties.
|
|
"""
|
|
return {
|
|
'voyage-3-large': {
|
|
'context_length': 32000,
|
|
'dimension': 1024,
|
|
'description': 'Best general-purpose and multilingual',
|
|
},
|
|
'voyage-3': {
|
|
'context_length': 32000,
|
|
'dimension': 1024,
|
|
'description': 'General-purpose and multilingual',
|
|
},
|
|
'voyage-3-lite': {
|
|
'context_length': 32000,
|
|
'dimension': 512,
|
|
'description': 'Optimized for latency and cost',
|
|
},
|
|
'voyage-code-3': {
|
|
'context_length': 32000,
|
|
'dimension': 1024,
|
|
'description': 'Optimized for code',
|
|
},
|
|
'voyage-finance-2': {
|
|
'context_length': 32000,
|
|
'dimension': 1024,
|
|
'description': 'Optimized for finance',
|
|
},
|
|
'voyage-law-2': {
|
|
'context_length': 16000,
|
|
'dimension': 1024,
|
|
'description': 'Optimized for legal',
|
|
},
|
|
'voyage-multimodal-3': {
|
|
'context_length': 32000,
|
|
'dimension': 1024,
|
|
'description': 'Multimodal text and images',
|
|
},
|
|
}
|