From a4b3da862b61597dd7e33d6551032a7cb540a54a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20MANSUY?= Date: Thu, 4 Dec 2025 19:14:31 +0800 Subject: [PATCH] cherry-pick 02fdceb9 --- lightrag/llm/azure_openai.py | 28 ++++++-- lightrag/llm/openai.py | 131 +++++++++++++++++++++-------------- pyproject.toml | 75 +++++++++++++++----- requirements-offline-llm.txt | 2 +- requirements-offline.txt | 2 +- uv.lock | 8 ++- 6 files changed, 164 insertions(+), 82 deletions(-) diff --git a/lightrag/llm/azure_openai.py b/lightrag/llm/azure_openai.py index 98437ca8..cb8d68df 100644 --- a/lightrag/llm/azure_openai.py +++ b/lightrag/llm/azure_openai.py @@ -26,6 +26,7 @@ from lightrag.utils import ( safe_unicode_decode, logger, ) +from lightrag.types import GPTKeywordExtractionFormat import numpy as np @@ -46,6 +47,7 @@ async def azure_openai_complete_if_cache( base_url: str | None = None, api_key: str | None = None, api_version: str | None = None, + keyword_extraction: bool = False, **kwargs, ): if enable_cot: @@ -66,9 +68,12 @@ async def azure_openai_complete_if_cache( ) kwargs.pop("hashing_kv", None) - kwargs.pop("keyword_extraction", None) timeout = kwargs.pop("timeout", None) + # Handle keyword extraction mode + if keyword_extraction: + kwargs["response_format"] = GPTKeywordExtractionFormat + openai_async_client = AsyncAzureOpenAI( azure_endpoint=base_url, azure_deployment=deployment, @@ -85,7 +90,7 @@ async def azure_openai_complete_if_cache( messages.append({"role": "user", "content": prompt}) if "response_format" in kwargs: - response = await openai_async_client.beta.chat.completions.parse( + response = await openai_async_client.chat.completions.parse( model=model, messages=messages, **kwargs ) else: @@ -108,21 +113,32 @@ async def azure_openai_complete_if_cache( return inner() else: - content = response.choices[0].message.content - if r"\u" in content: - content = safe_unicode_decode(content.encode("utf-8")) + message = response.choices[0].message + + # Handle parsed responses (structured output via response_format) + # When using beta.chat.completions.parse(), the response is in message.parsed + if hasattr(message, "parsed") and message.parsed is not None: + # Serialize the parsed structured response to JSON + content = message.parsed.model_dump_json() + logger.debug("Using parsed structured response from API") + else: + # Handle regular content responses + content = message.content + if content and r"\u" in content: + content = safe_unicode_decode(content.encode("utf-8")) + return content async def azure_openai_complete( prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: - kwargs.pop("keyword_extraction", None) result = await azure_openai_complete_if_cache( os.getenv("LLM_MODEL", "gpt-4o-mini"), prompt, system_prompt=system_prompt, history_messages=history_messages, + keyword_extraction=keyword_extraction, **kwargs, ) return result diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py index 363b176e..6da79c2c 100644 --- a/lightrag/llm/openai.py +++ b/lightrag/llm/openai.py @@ -47,7 +47,7 @@ try: # Only enable Langfuse if both keys are configured if langfuse_public_key and langfuse_secret_key: - from langfuse.openai import AsyncOpenAI + from langfuse.openai import AsyncOpenAI # type: ignore[import-untyped] LANGFUSE_ENABLED = True logger.info("Langfuse observability enabled for OpenAI client") @@ -140,6 +140,7 @@ async def openai_complete_if_cache( token_tracker: Any | None = None, stream: bool | None = None, timeout: int | None = None, + keyword_extraction: bool = False, **kwargs: Any, ) -> str: """Complete a prompt using OpenAI's API with caching support and Chain of Thought (COT) integration. @@ -171,12 +172,13 @@ async def openai_complete_if_cache( enable_cot: Whether to enable Chain of Thought (COT) processing. Default is False. stream: Whether to stream the response. Default is False. timeout: Request timeout in seconds. Default is None. + keyword_extraction: Whether to enable keyword extraction mode. When True, triggers + special response formatting for keyword extraction. Default is False. **kwargs: Additional keyword arguments to pass to the OpenAI API. Special kwargs: - openai_client_configs: Dict of configuration options for the AsyncOpenAI client. These will be passed to the client constructor but will be overridden by explicit parameters (api_key, base_url). - - keyword_extraction: Will be removed from kwargs before passing to OpenAI. Returns: The completed text (with integrated COT content if available) or an async iterator @@ -197,11 +199,14 @@ async def openai_complete_if_cache( # Remove special kwargs that shouldn't be passed to OpenAI kwargs.pop("hashing_kv", None) - kwargs.pop("keyword_extraction", None) # Extract client configuration options client_configs = kwargs.pop("openai_client_configs", {}) + # Handle keyword extraction mode + if keyword_extraction: + kwargs["response_format"] = GPTKeywordExtractionFormat + # Create the OpenAI client openai_async_client = create_openai_async_client( api_key=api_key, @@ -236,7 +241,7 @@ async def openai_complete_if_cache( try: # Don't use async with context manager, use client directly if "response_format" in kwargs: - response = await openai_async_client.beta.chat.completions.parse( + response = await openai_async_client.chat.completions.parse( model=model, messages=messages, **kwargs ) else: @@ -448,46 +453,57 @@ async def openai_complete_if_cache( raise InvalidResponseError("Invalid response from OpenAI API") message = response.choices[0].message - content = getattr(message, "content", None) - reasoning_content = getattr(message, "reasoning_content", "") - # Handle COT logic for non-streaming responses (only if enabled) - final_content = "" + # Handle parsed responses (structured output via response_format) + # When using beta.chat.completions.parse(), the response is in message.parsed + if hasattr(message, "parsed") and message.parsed is not None: + # Serialize the parsed structured response to JSON + final_content = message.parsed.model_dump_json() + logger.debug("Using parsed structured response from API") + else: + # Handle regular content responses + content = getattr(message, "content", None) + reasoning_content = getattr(message, "reasoning_content", "") - if enable_cot: - # Check if we should include reasoning content - should_include_reasoning = False - if reasoning_content and reasoning_content.strip(): - if not content or content.strip() == "": - # Case 1: Only reasoning content, should include COT - should_include_reasoning = True - final_content = ( - content or "" - ) # Use empty string if content is None + # Handle COT logic for non-streaming responses (only if enabled) + final_content = "" + + if enable_cot: + # Check if we should include reasoning content + should_include_reasoning = False + if reasoning_content and reasoning_content.strip(): + if not content or content.strip() == "": + # Case 1: Only reasoning content, should include COT + should_include_reasoning = True + final_content = ( + content or "" + ) # Use empty string if content is None + else: + # Case 3: Both content and reasoning_content present, ignore reasoning + should_include_reasoning = False + final_content = content else: - # Case 3: Both content and reasoning_content present, ignore reasoning - should_include_reasoning = False - final_content = content + # No reasoning content, use regular content + final_content = content or "" + + # Apply COT wrapping if needed + if should_include_reasoning: + if r"\u" in reasoning_content: + reasoning_content = safe_unicode_decode( + reasoning_content.encode("utf-8") + ) + final_content = ( + f"{reasoning_content}{final_content}" + ) else: - # No reasoning content, use regular content + # COT disabled, only use regular content final_content = content or "" - # Apply COT wrapping if needed - if should_include_reasoning: - if r"\u" in reasoning_content: - reasoning_content = safe_unicode_decode( - reasoning_content.encode("utf-8") - ) - final_content = f"{reasoning_content}{final_content}" - else: - # COT disabled, only use regular content - final_content = content or "" - - # Validate final content - if not final_content or final_content.strip() == "": - logger.error("Received empty content from OpenAI API") - await openai_async_client.close() # Ensure client is closed - raise InvalidResponseError("Received empty content from OpenAI API") + # Validate final content + if not final_content or final_content.strip() == "": + logger.error("Received empty content from OpenAI API") + await openai_async_client.close() # Ensure client is closed + raise InvalidResponseError("Received empty content from OpenAI API") # Apply Unicode decoding to final content if needed if r"\u" in final_content: @@ -521,15 +537,13 @@ async def openai_complete( ) -> Union[str, AsyncIterator[str]]: if history_messages is None: history_messages = [] - keyword_extraction = kwargs.pop("keyword_extraction", None) - if keyword_extraction: - kwargs["response_format"] = "json" model_name = kwargs["hashing_kv"].global_config["llm_model_name"] return await openai_complete_if_cache( model_name, prompt, system_prompt=system_prompt, history_messages=history_messages, + keyword_extraction=keyword_extraction, **kwargs, ) @@ -544,15 +558,13 @@ async def gpt_4o_complete( ) -> str: if history_messages is None: history_messages = [] - keyword_extraction = kwargs.pop("keyword_extraction", None) - if keyword_extraction: - kwargs["response_format"] = GPTKeywordExtractionFormat return await openai_complete_if_cache( "gpt-4o", prompt, system_prompt=system_prompt, history_messages=history_messages, enable_cot=enable_cot, + keyword_extraction=keyword_extraction, **kwargs, ) @@ -567,15 +579,13 @@ async def gpt_4o_mini_complete( ) -> str: if history_messages is None: history_messages = [] - keyword_extraction = kwargs.pop("keyword_extraction", None) - if keyword_extraction: - kwargs["response_format"] = GPTKeywordExtractionFormat return await openai_complete_if_cache( "gpt-4o-mini", prompt, system_prompt=system_prompt, history_messages=history_messages, enable_cot=enable_cot, + keyword_extraction=keyword_extraction, **kwargs, ) @@ -590,20 +600,20 @@ async def nvidia_openai_complete( ) -> str: if history_messages is None: history_messages = [] - kwargs.pop("keyword_extraction", None) result = await openai_complete_if_cache( "nvidia/llama-3.1-nemotron-70b-instruct", # context length 128k prompt, system_prompt=system_prompt, history_messages=history_messages, enable_cot=enable_cot, + keyword_extraction=keyword_extraction, base_url="https://integrate.api.nvidia.com/v1", **kwargs, ) return result -@wrap_embedding_func_with_attrs(embedding_dim=1536) +@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192) @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), @@ -618,6 +628,7 @@ async def openai_embed( model: str = "text-embedding-3-small", base_url: str | None = None, api_key: str | None = None, + embedding_dim: int | None = None, client_configs: dict[str, Any] | None = None, token_tracker: Any | None = None, ) -> np.ndarray: @@ -628,6 +639,12 @@ async def openai_embed( model: The OpenAI embedding model to use. base_url: Optional base URL for the OpenAI API. api_key: Optional OpenAI API key. If None, uses the OPENAI_API_KEY environment variable. + embedding_dim: Optional embedding dimension for dynamic dimension reduction. + **IMPORTANT**: This parameter is automatically injected by the EmbeddingFunc wrapper. + Do NOT manually pass this parameter when calling the function directly. + The dimension is controlled by the @wrap_embedding_func_with_attrs decorator. + Manually passing a different value will trigger a warning and be ignored. + When provided (by EmbeddingFunc), it will be passed to the OpenAI API for dimension reduction. client_configs: Additional configuration options for the AsyncOpenAI client. These will override any default configurations but will be overridden by explicit parameters (api_key, base_url). @@ -647,9 +664,19 @@ async def openai_embed( ) async with openai_async_client: - response = await openai_async_client.embeddings.create( - model=model, input=texts, encoding_format="base64" - ) + # Prepare API call parameters + api_params = { + "model": model, + "input": texts, + "encoding_format": "base64", + } + + # Add dimensions parameter only if embedding_dim is provided + if embedding_dim is not None: + api_params["dimensions"] = embedding_dim + + # Make API call + response = await openai_async_client.embeddings.create(**api_params) if token_tracker and hasattr(response, "usage"): token_counts = { diff --git a/pyproject.toml b/pyproject.toml index ba1ba2cf..1b966dae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,8 +29,8 @@ dependencies = [ "json_repair", "nano-vectordb", "networkx", - "numpy", - "pandas>=2.0.0,<2.3.0", + "numpy>=1.24.0,<2.0.0", + "pandas>=2.0.0,<2.4.0", "pipmaster", "pydantic", "pypinyin", @@ -42,6 +42,13 @@ dependencies = [ ] [project.optional-dependencies] +# Test framework dependencies (for CI/CD and testing) +pytest = [ + "pytest>=8.4.2", + "pytest-asyncio>=1.2.0", + "pre-commit", +] + api = [ # Core dependencies "aiohttp", @@ -50,9 +57,9 @@ api = [ "json_repair", "nano-vectordb", "networkx", - "numpy", - "openai>=1.0.0,<3.0.0", - "pandas>=2.0.0,<2.3.0", + "numpy>=1.24.0,<2.0.0", + "openai>=2.0.0,<3.0.0", + "pandas>=2.0.0,<2.4.0", "pipmaster", "pydantic", "pypinyin", @@ -79,30 +86,36 @@ api = [ "python-multipart", "pytz", "uvicorn", + "gunicorn", + # Document processing dependencies (required for API document upload functionality) + "openpyxl>=3.0.0,<4.0.0", # XLSX processing + "pycryptodome>=3.0.0,<4.0.0", # PDF encryption support + "pypdf>=6.1.0", # PDF processing + "python-docx>=0.8.11,<2.0.0", # DOCX processing + "python-pptx>=0.6.21,<2.0.0", # PPTX processing +] + +# Advanced document processing engine (optional) +docling = [ + # On macOS, pytorch and frameworks use Objective-C are not fork-safe, + # and not compatible to gunicorn multi-worker mode + "docling>=2.0.0,<3.0.0; sys_platform != 'darwin'", ] # Offline deployment dependencies (layered design for flexibility) -offline-docs = [ - # Document processing dependencies - "pypdf2>=3.0.0", - "python-docx>=0.8.11,<2.0.0", - "python-pptx>=0.6.21,<2.0.0", - "openpyxl>=3.0.0,<4.0.0", -] - offline-storage = [ # Storage backend dependencies - "redis>=5.0.0,<7.0.0", + "redis>=5.0.0,<8.0.0", "neo4j>=5.0.0,<7.0.0", "pymilvus>=2.6.2,<3.0.0", "pymongo>=4.0.0,<5.0.0", "asyncpg>=0.29.0,<1.0.0", - "qdrant-client>=1.7.0,<2.0.0", + "qdrant-client>=1.11.0,<2.0.0", ] offline-llm = [ # LLM provider dependencies - "openai>=1.0.0,<3.0.0", + "openai>=2.0.0,<3.0.0", "anthropic>=0.18.0,<1.0.0", "ollama>=0.1.0,<1.0.0", "zhipuai>=2.0.0,<3.0.0", @@ -114,8 +127,24 @@ offline-llm = [ ] offline = [ - # Complete offline package (includes all offline dependencies) - "lightrag-hku[offline-docs,offline-storage,offline-llm]", + # Complete offline package (includes api for document processing, plus storage and LLM) + "lightrag-hku[api,offline-storage,offline-llm]", +] + +evaluation = [ + # Test framework dependencies (for evaluation) + "pytest>=8.4.2", + "pytest-asyncio>=1.2.0", + "pre-commit", + # RAG evaluation dependencies (RAGAS framework) + "ragas>=0.3.7", + "datasets>=4.3.0", + "httpx>=0.28.1", +] + +observability = [ + # LLM observability and tracing dependencies + "langfuse>=3.8.1", ] [project.scripts] @@ -140,7 +169,15 @@ include-package-data = true version = {attr = "lightrag.__version__"} [tool.setuptools.package-data] -lightrag = ["api/webui/**/*"] +lightrag = ["api/webui/**/*", "api/static/**/*"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] [tool.ruff] target-version = "py310" diff --git a/requirements-offline-llm.txt b/requirements-offline-llm.txt index 1539552a..bcfb1451 100644 --- a/requirements-offline-llm.txt +++ b/requirements-offline-llm.txt @@ -14,6 +14,6 @@ google-api-core>=2.0.0,<3.0.0 google-genai>=1.0.0,<2.0.0 llama-index>=0.9.0,<1.0.0 ollama>=0.1.0,<1.0.0 -openai>=1.0.0,<3.0.0 +openai>=2.0.0,<3.0.0 voyageai>=0.2.0,<1.0.0 zhipuai>=2.0.0,<3.0.0 diff --git a/requirements-offline.txt b/requirements-offline.txt index c959816b..4450ce40 100644 --- a/requirements-offline.txt +++ b/requirements-offline.txt @@ -18,7 +18,7 @@ asyncpg>=0.29.0,<1.0.0 llama-index>=0.9.0,<1.0.0 neo4j>=5.0.0,<7.0.0 ollama>=0.1.0,<1.0.0 -openai>=1.0.0,<3.0.0 +openai>=2.0.0,<3.0.0 openpyxl>=3.0.0,<4.0.0 pymilvus>=2.6.2,<3.0.0 pymongo>=4.0.0,<5.0.0 diff --git a/uv.lock b/uv.lock index 8f95d2cf..fb483760 100644 --- a/uv.lock +++ b/uv.lock @@ -2737,7 +2737,6 @@ requires-dist = [ { name = "json-repair", marker = "extra == 'api'" }, { name = "langfuse", marker = "extra == 'observability'", specifier = ">=3.8.1" }, { name = "lightrag-hku", extras = ["api", "offline-llm", "offline-storage"], marker = "extra == 'offline'" }, - { name = "lightrag-hku", extras = ["pytest"], marker = "extra == 'evaluation'" }, { name = "llama-index", marker = "extra == 'offline-llm'", specifier = ">=0.9.0,<1.0.0" }, { name = "nano-vectordb" }, { name = "nano-vectordb", marker = "extra == 'api'" }, @@ -2747,14 +2746,15 @@ requires-dist = [ { name = "numpy", specifier = ">=1.24.0,<2.0.0" }, { name = "numpy", marker = "extra == 'api'", specifier = ">=1.24.0,<2.0.0" }, { name = "ollama", marker = "extra == 'offline-llm'", specifier = ">=0.1.0,<1.0.0" }, - { name = "openai", marker = "extra == 'api'", specifier = ">=1.0.0,<3.0.0" }, - { name = "openai", marker = "extra == 'offline-llm'", specifier = ">=1.0.0,<3.0.0" }, + { name = "openai", marker = "extra == 'api'", specifier = ">=2.0.0,<3.0.0" }, + { name = "openai", marker = "extra == 'offline-llm'", specifier = ">=2.0.0,<3.0.0" }, { name = "openpyxl", marker = "extra == 'api'", specifier = ">=3.0.0,<4.0.0" }, { name = "pandas", specifier = ">=2.0.0,<2.4.0" }, { name = "pandas", marker = "extra == 'api'", specifier = ">=2.0.0,<2.4.0" }, { name = "passlib", extras = ["bcrypt"], marker = "extra == 'api'" }, { name = "pipmaster" }, { name = "pipmaster", marker = "extra == 'api'" }, + { name = "pre-commit", marker = "extra == 'evaluation'" }, { name = "pre-commit", marker = "extra == 'pytest'" }, { name = "psutil", marker = "extra == 'api'" }, { name = "pycryptodome", marker = "extra == 'api'", specifier = ">=3.0.0,<4.0.0" }, @@ -2766,7 +2766,9 @@ requires-dist = [ { name = "pypdf", marker = "extra == 'api'", specifier = ">=6.1.0" }, { name = "pypinyin" }, { name = "pypinyin", marker = "extra == 'api'" }, + { name = "pytest", marker = "extra == 'evaluation'", specifier = ">=8.4.2" }, { name = "pytest", marker = "extra == 'pytest'", specifier = ">=8.4.2" }, + { name = "pytest-asyncio", marker = "extra == 'evaluation'", specifier = ">=1.2.0" }, { name = "pytest-asyncio", marker = "extra == 'pytest'", specifier = ">=1.2.0" }, { name = "python-docx", marker = "extra == 'api'", specifier = ">=0.8.11,<2.0.0" }, { name = "python-dotenv" },