chore: sync with upstream HKUDS/LightRAG
- Add KaTeX extensions (mhchem for chemistry, copy-tex for copying) - Add CASCADE to AGE extension for PostgreSQL - Remove future dependency, replace passlib with bcrypt - Fix Jina embedding configuration and provider defaults - Update gunicorn help text and bump API version to 0258 - Documentation and README updates
This commit is contained in:
parent
1bdd906753
commit
8d099fc3ac
16 changed files with 196 additions and 146 deletions
55
README-zh.md
55
README-zh.md
|
|
@ -407,6 +407,11 @@ LightRAG 需要利用LLM和Embeding模型来完成文档索引和知识库查询
|
||||||
* LightRAG还支持类OpenAI的聊天/嵌入API:
|
* LightRAG还支持类OpenAI的聊天/嵌入API:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
from lightrag.utils import wrap_embedding_func_with_attrs
|
||||||
|
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
||||||
|
|
||||||
async def llm_model_func(
|
async def llm_model_func(
|
||||||
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
|
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
|
||||||
) -> str:
|
) -> str:
|
||||||
|
|
@ -420,8 +425,9 @@ async def llm_model_func(
|
||||||
**kwargs
|
**kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@wrap_embedding_func_with_attrs(embedding_dim=4096, max_token_size=8192)
|
||||||
async def embedding_func(texts: list[str]) -> np.ndarray:
|
async def embedding_func(texts: list[str]) -> np.ndarray:
|
||||||
return await openai_embed(
|
return await openai_embed.func(
|
||||||
texts,
|
texts,
|
||||||
model="solar-embedding-1-large-query",
|
model="solar-embedding-1-large-query",
|
||||||
api_key=os.getenv("UPSTAGE_API_KEY"),
|
api_key=os.getenv("UPSTAGE_API_KEY"),
|
||||||
|
|
@ -432,16 +438,17 @@ async def initialize_rag():
|
||||||
rag = LightRAG(
|
rag = LightRAG(
|
||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=embedding_func # 直接传入装饰后的函数
|
||||||
embedding_dim=4096,
|
|
||||||
func=embedding_func
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
await rag.initialize_storages()
|
await rag.initialize_storages()
|
||||||
return rag
|
return rag
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> **关于嵌入函数封装的重要说明:**
|
||||||
|
>
|
||||||
|
> `EmbeddingFunc` 不能嵌套封装。已经被 `@wrap_embedding_func_with_attrs` 装饰过的嵌入函数(如 `openai_embed`、`ollama_embed` 等)不能再次使用 `EmbeddingFunc()` 封装。这就是为什么在创建自定义嵌入函数时,我们调用 `xxx_embed.func`(底层未封装的函数)而不是直接调用 `xxx_embed`。
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
|
|
@ -478,19 +485,20 @@ rag = LightRAG(
|
||||||
然后您只需要按如下方式设置LightRAG:
|
然后您只需要按如下方式设置LightRAG:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
import numpy as np
|
||||||
|
from lightrag.utils import wrap_embedding_func_with_attrs
|
||||||
|
from lightrag.llm.ollama import ollama_model_complete, ollama_embed
|
||||||
|
|
||||||
|
@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192)
|
||||||
|
async def embedding_func(texts: list[str]) -> np.ndarray:
|
||||||
|
return await ollama_embed.func(texts, embed_model="nomic-embed-text")
|
||||||
|
|
||||||
# 使用Ollama模型初始化LightRAG
|
# 使用Ollama模型初始化LightRAG
|
||||||
rag = LightRAG(
|
rag = LightRAG(
|
||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
llm_model_func=ollama_model_complete, # 使用Ollama模型进行文本生成
|
llm_model_func=ollama_model_complete, # 使用Ollama模型进行文本生成
|
||||||
llm_model_name='your_model_name', # 您的模型名称
|
llm_model_name='your_model_name', # 您的模型名称
|
||||||
# 使用Ollama嵌入函数
|
embedding_func=embedding_func, # 直接传入装饰后的函数
|
||||||
embedding_func=EmbeddingFunc(
|
|
||||||
embedding_dim=768,
|
|
||||||
func=lambda texts: ollama_embed(
|
|
||||||
texts,
|
|
||||||
embed_model="nomic-embed-text"
|
|
||||||
)
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -529,22 +537,27 @@ ollama create -f Modelfile qwen2m
|
||||||
您可以使用`llm_model_kwargs`参数配置ollama:
|
您可以使用`llm_model_kwargs`参数配置ollama:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
import numpy as np
|
||||||
|
from lightrag.utils import wrap_embedding_func_with_attrs
|
||||||
|
from lightrag.llm.ollama import ollama_model_complete, ollama_embed
|
||||||
|
|
||||||
|
@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192)
|
||||||
|
async def embedding_func(texts: list[str]) -> np.ndarray:
|
||||||
|
return await ollama_embed.func(texts, embed_model="nomic-embed-text")
|
||||||
|
|
||||||
rag = LightRAG(
|
rag = LightRAG(
|
||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
llm_model_func=ollama_model_complete, # 使用Ollama模型进行文本生成
|
llm_model_func=ollama_model_complete, # 使用Ollama模型进行文本生成
|
||||||
llm_model_name='your_model_name', # 您的模型名称
|
llm_model_name='your_model_name', # 您的模型名称
|
||||||
llm_model_kwargs={"options": {"num_ctx": 32768}},
|
llm_model_kwargs={"options": {"num_ctx": 32768}},
|
||||||
# 使用Ollama嵌入函数
|
embedding_func=embedding_func, # 直接传入装饰后的函数
|
||||||
embedding_func=EmbeddingFunc(
|
|
||||||
embedding_dim=768,
|
|
||||||
func=lambda texts: ollama_embed(
|
|
||||||
texts,
|
|
||||||
embed_model="nomic-embed-text"
|
|
||||||
)
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> **关于嵌入函数封装的重要说明:**
|
||||||
|
>
|
||||||
|
> `EmbeddingFunc` 不能嵌套封装。已经被 `@wrap_embedding_func_with_attrs` 装饰过的嵌入函数(如 `openai_embed`、`ollama_embed` 等)不能再次使用 `EmbeddingFunc()` 封装。这就是为什么在创建自定义嵌入函数时,我们调用 `xxx_embed.func`(底层未封装的函数)而不是直接调用 `xxx_embed`。
|
||||||
|
|
||||||
* **低RAM GPU**
|
* **低RAM GPU**
|
||||||
|
|
||||||
为了在低RAM GPU上运行此实验,您应该选择小型模型并调整上下文窗口(增加上下文会增加内存消耗)。例如,在6Gb RAM的改装挖矿GPU上运行这个ollama示例需要将上下文大小设置为26k,同时使用`gemma2:2b`。它能够在`book.txt`中找到197个实体和19个关系。
|
为了在低RAM GPU上运行此实验,您应该选择小型模型并调整上下文窗口(增加上下文会增加内存消耗)。例如,在6Gb RAM的改装挖矿GPU上运行这个ollama示例需要将上下文大小设置为26k,同时使用`gemma2:2b`。它能够在`book.txt`中找到197个实体和19个关系。
|
||||||
|
|
|
||||||
99
README.md
99
README.md
|
|
@ -51,24 +51,24 @@
|
||||||
|
|
||||||
---
|
---
|
||||||
## 🎉 News
|
## 🎉 News
|
||||||
- [2025.11.05]🎯[New Feature]: Integrated **RAGAS for Evaluation** and **Langfuse for Tracing**. Updated the API to return retrieved contexts alongside query results to support context precision metrics.
|
- [2025.11]🎯[New Feature]: Integrated **RAGAS for Evaluation** and **Langfuse for Tracing**. Updated the API to return retrieved contexts alongside query results to support context precision metrics.
|
||||||
- [2025.10.22]🎯[Scalability Enhancement]: Eliminated processing bottlenecks to support **Large-Scale Datasets Efficiently**.
|
- [2025.10]🎯[Scalability Enhancement]: Eliminated processing bottlenecks to support **Large-Scale Datasets Efficiently**.
|
||||||
- [2025.09.15]🎯Significantly enhances KG extraction accuracy for **small LLMs** like Qwen3-30B-A3B.
|
- [2025.09]🎯[New Feature] Enhances knowledge graph extraction accuracy for **Open-Sourced LLMs** such as Qwen3-30B-A3B.
|
||||||
- [2025.08.29]🎯**Reranker** is supported now , significantly boosting performance for mixed queries(Set as default query mode now).
|
- [2025.08]🎯[New Feature] **Reranker** is now supported, significantly boosting performance for mixed queries (set as default query mode).
|
||||||
- [2025.08.04]🎯**Document deletion** with KG regeneration to ensure query performance.
|
- [2025.08]🎯[New Feature] Added **Document Deletion** with automatic KG regeneration to ensure optimal query performance.
|
||||||
- [2025.06.16]🎯Our team has released [RAG-Anything](https://github.com/HKUDS/RAG-Anything) an All-in-One Multimodal RAG System for seamless text, image, table, and equation processing.
|
- [2025.06]🎯[New Release] Our team has released [RAG-Anything](https://github.com/HKUDS/RAG-Anything) — an **All-in-One Multimodal RAG** system for seamless processing of text, images, tables, and equations.
|
||||||
- [2025.06.05]🎯LightRAG now supports comprehensive multimodal data handling through [RAG-Anything](https://github.com/HKUDS/RAG-Anything) integration, enabling seamless document parsing and RAG capabilities across diverse formats including PDFs, images, Office documents, tables, and formulas. Please refer to the new [multimodal section](https://github.com/HKUDS/LightRAG/?tab=readme-ov-file#multimodal-document-processing-rag-anything-integration) for details.
|
- [2025.06]🎯[New Feature] LightRAG now supports comprehensive multimodal data handling through [RAG-Anything](https://github.com/HKUDS/RAG-Anything) integration, enabling seamless document parsing and RAG capabilities across diverse formats including PDFs, images, Office documents, tables, and formulas. Please refer to the new [multimodal section](https://github.com/HKUDS/LightRAG/?tab=readme-ov-file#multimodal-document-processing-rag-anything-integration) for details.
|
||||||
- [2025.03.18]🎯LightRAG now supports citation functionality, enabling proper source attribution.
|
- [2025.03]🎯[New Feature] LightRAG now supports citation functionality, enabling proper source attribution and enhanced document traceability.
|
||||||
- [2025.02.12]🎯You can now use MongoDB as all in-one Storage.
|
- [2025.02]🎯[New Feature] You can now use MongoDB as an all-in-one storage solution for unified data management.
|
||||||
- [2025.02.05]🎯Our team has released [VideoRAG](https://github.com/HKUDS/VideoRAG) understanding extremely long-context videos.
|
- [2025.02]🎯[New Release] Our team has released [VideoRAG](https://github.com/HKUDS/VideoRAG)-a RAG system for understanding extremely long-context videos
|
||||||
- [2025.01.13]🎯Our team has released [MiniRAG](https://github.com/HKUDS/MiniRAG) making RAG simpler with small models.
|
- [2025.01]🎯[New Release] Our team has released [MiniRAG](https://github.com/HKUDS/MiniRAG) making RAG simpler with small models.
|
||||||
- [2025.01.06]🎯You can now use PostgreSQL as all in-one Storage.
|
- [2025.01]🎯You can now use PostgreSQL as an all-in-one storage solution for data management.
|
||||||
- [2024.11.19]🎯A comprehensive guide to LightRAG is now available on [LearnOpenCV](https://learnopencv.com/lightrag). Many thanks to the blog author.
|
- [2024.11]🎯[New Resource] A comprehensive guide to LightRAG is now available on [LearnOpenCV](https://learnopencv.com/lightrag). — explore in-depth tutorials and best practices. Many thanks to the blog author for this excellent contribution!
|
||||||
- [2024.11.09]🎯Introducing the LightRAG Webui, which allows you to insert, query, visualize LightRAG knowledge.
|
- [2024.11]🎯[New Feature] Introducing the LightRAG WebUI — an interface that allows you to insert, query, and visualize LightRAG knowledge through an intuitive web-based dashboard.
|
||||||
- [2024.11.04]🎯You can now [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage).
|
- [2024.11]🎯[New Feature] You can now [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage)-enabling graph database support.
|
||||||
- [2024.10.18]🎯We've added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). Thanks to the author!
|
- [2024.10]🎯[New Feature] We've added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). — a walkthrough of LightRAG's capabilities. Thanks to the author for this excellent contribution!
|
||||||
- [2024.10.17]🎯We have created a [Discord channel](https://discord.gg/yF2MmDJyGJ)! Welcome to join for sharing and discussions! 🎉🎉
|
- [2024.10]🎯[New Channel] We have created a [Discord channel](https://discord.gg/yF2MmDJyGJ)!💬 Welcome to join our community for sharing, discussions, and collaboration! 🎉🎉
|
||||||
- [2024.10.16]🎯LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
|
- [2024.10]🎯[New Feature] LightRAG now supports [Ollama models](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#quick-start)!
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary style="font-size: 1.4em; font-weight: bold; cursor: pointer; display: list-item;">
|
<summary style="font-size: 1.4em; font-weight: bold; cursor: pointer; display: list-item;">
|
||||||
|
|
@ -214,7 +214,7 @@ For a streaming response implementation example, please see `examples/lightrag_o
|
||||||
|
|
||||||
**Note 2**: Only `lightrag_openai_demo.py` and `lightrag_openai_compatible_demo.py` are officially supported sample codes. Other sample files are community contributions that haven't undergone full testing and optimization.
|
**Note 2**: Only `lightrag_openai_demo.py` and `lightrag_openai_compatible_demo.py` are officially supported sample codes. Other sample files are community contributions that haven't undergone full testing and optimization.
|
||||||
|
|
||||||
## Programing with LightRAG Core
|
## Programming with LightRAG Core
|
||||||
|
|
||||||
> ⚠️ **If you would like to integrate LightRAG into your project, we recommend utilizing the REST API provided by the LightRAG Server**. LightRAG Core is typically intended for embedded applications or for researchers who wish to conduct studies and evaluations.
|
> ⚠️ **If you would like to integrate LightRAG into your project, we recommend utilizing the REST API provided by the LightRAG Server**. LightRAG Core is typically intended for embedded applications or for researchers who wish to conduct studies and evaluations.
|
||||||
|
|
||||||
|
|
@ -313,7 +313,7 @@ A full list of LightRAG init parameters:
|
||||||
| **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) |
|
| **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) |
|
||||||
| **enable_llm_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
|
| **enable_llm_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
|
||||||
| **enable_llm_cache_for_entity_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `TRUE` |
|
| **enable_llm_cache_for_entity_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `TRUE` |
|
||||||
| **addon_params** | `dict` | Additional parameters, e.g., `{"language": "Simplified Chinese", "entity_types": ["organization", "person", "location", "event"]}`: sets example limit, entiy/relation extraction output language | language: English` |
|
| **addon_params** | `dict` | Additional parameters, e.g., `{"language": "Simplified Chinese", "entity_types": ["organization", "person", "location", "event"]}`: sets example limit, entity/relation extraction output language | language: English` |
|
||||||
| **embedding_cache_config** | `dict` | Configuration for question-answer caching. Contains three parameters: `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers. `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM. `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
|
| **embedding_cache_config** | `dict` | Configuration for question-answer caching. Contains three parameters: `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers. `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM. `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
@ -364,7 +364,7 @@ class QueryParam:
|
||||||
max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "30000"))
|
max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "30000"))
|
||||||
"""Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
|
"""Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
|
||||||
|
|
||||||
# History mesages is only send to LLM for context, not used for retrieval
|
# History messages are only sent to LLM for context, not used for retrieval
|
||||||
conversation_history: list[dict[str, str]] = field(default_factory=list)
|
conversation_history: list[dict[str, str]] = field(default_factory=list)
|
||||||
"""Stores past conversation history to maintain context.
|
"""Stores past conversation history to maintain context.
|
||||||
Format: [{"role": "user/assistant", "content": "message"}].
|
Format: [{"role": "user/assistant", "content": "message"}].
|
||||||
|
|
@ -403,6 +403,11 @@ LightRAG requires the utilization of LLM and Embedding models to accomplish docu
|
||||||
* LightRAG also supports Open AI-like chat/embeddings APIs:
|
* LightRAG also supports Open AI-like chat/embeddings APIs:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
from lightrag.utils import wrap_embedding_func_with_attrs
|
||||||
|
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
||||||
|
|
||||||
async def llm_model_func(
|
async def llm_model_func(
|
||||||
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
|
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
|
||||||
) -> str:
|
) -> str:
|
||||||
|
|
@ -416,8 +421,9 @@ async def llm_model_func(
|
||||||
**kwargs
|
**kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@wrap_embedding_func_with_attrs(embedding_dim=4096, max_token_size=8192)
|
||||||
async def embedding_func(texts: list[str]) -> np.ndarray:
|
async def embedding_func(texts: list[str]) -> np.ndarray:
|
||||||
return await openai_embed(
|
return await openai_embed.func(
|
||||||
texts,
|
texts,
|
||||||
model="solar-embedding-1-large-query",
|
model="solar-embedding-1-large-query",
|
||||||
api_key=os.getenv("UPSTAGE_API_KEY"),
|
api_key=os.getenv("UPSTAGE_API_KEY"),
|
||||||
|
|
@ -428,16 +434,17 @@ async def initialize_rag():
|
||||||
rag = LightRAG(
|
rag = LightRAG(
|
||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
llm_model_func=llm_model_func,
|
llm_model_func=llm_model_func,
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=embedding_func # Pass the decorated function directly
|
||||||
embedding_dim=4096,
|
|
||||||
func=embedding_func
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
await rag.initialize_storages()
|
await rag.initialize_storages()
|
||||||
return rag
|
return rag
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> **Important Note on Embedding Function Wrapping:**
|
||||||
|
>
|
||||||
|
> `EmbeddingFunc` cannot be nested. Functions that have been decorated with `@wrap_embedding_func_with_attrs` (such as `openai_embed`, `ollama_embed`, etc.) cannot be wrapped again using `EmbeddingFunc()`. This is why we call `xxx_embed.func` (the underlying unwrapped function) instead of `xxx_embed` directly when creating custom embedding functions.
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
|
|
@ -476,19 +483,20 @@ If you want to use Ollama models, you need to pull model you plan to use and emb
|
||||||
Then you only need to set LightRAG as follows:
|
Then you only need to set LightRAG as follows:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
import numpy as np
|
||||||
|
from lightrag.utils import wrap_embedding_func_with_attrs
|
||||||
|
from lightrag.llm.ollama import ollama_model_complete, ollama_embed
|
||||||
|
|
||||||
|
@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192)
|
||||||
|
async def embedding_func(texts: list[str]) -> np.ndarray:
|
||||||
|
return await ollama_embed.func(texts, embed_model="nomic-embed-text")
|
||||||
|
|
||||||
# Initialize LightRAG with Ollama model
|
# Initialize LightRAG with Ollama model
|
||||||
rag = LightRAG(
|
rag = LightRAG(
|
||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
llm_model_func=ollama_model_complete, # Use Ollama model for text generation
|
llm_model_func=ollama_model_complete, # Use Ollama model for text generation
|
||||||
llm_model_name='your_model_name', # Your model name
|
llm_model_name='your_model_name', # Your model name
|
||||||
# Use Ollama embedding function
|
embedding_func=embedding_func, # Pass the decorated function directly
|
||||||
embedding_func=EmbeddingFunc(
|
|
||||||
embedding_dim=768,
|
|
||||||
func=lambda texts: ollama_embed(
|
|
||||||
texts,
|
|
||||||
embed_model="nomic-embed-text"
|
|
||||||
)
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -527,22 +535,27 @@ ollama create -f Modelfile qwen2m
|
||||||
Tiy can use `llm_model_kwargs` param to configure ollama:
|
Tiy can use `llm_model_kwargs` param to configure ollama:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
import numpy as np
|
||||||
|
from lightrag.utils import wrap_embedding_func_with_attrs
|
||||||
|
from lightrag.llm.ollama import ollama_model_complete, ollama_embed
|
||||||
|
|
||||||
|
@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192)
|
||||||
|
async def embedding_func(texts: list[str]) -> np.ndarray:
|
||||||
|
return await ollama_embed.func(texts, embed_model="nomic-embed-text")
|
||||||
|
|
||||||
rag = LightRAG(
|
rag = LightRAG(
|
||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
llm_model_func=ollama_model_complete, # Use Ollama model for text generation
|
llm_model_func=ollama_model_complete, # Use Ollama model for text generation
|
||||||
llm_model_name='your_model_name', # Your model name
|
llm_model_name='your_model_name', # Your model name
|
||||||
llm_model_kwargs={"options": {"num_ctx": 32768}},
|
llm_model_kwargs={"options": {"num_ctx": 32768}},
|
||||||
# Use Ollama embedding function
|
embedding_func=embedding_func, # Pass the decorated function directly
|
||||||
embedding_func=EmbeddingFunc(
|
|
||||||
embedding_dim=768,
|
|
||||||
func=lambda texts: ollama_embed(
|
|
||||||
texts,
|
|
||||||
embed_model="nomic-embed-text"
|
|
||||||
)
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> **Important Note on Embedding Function Wrapping:**
|
||||||
|
>
|
||||||
|
> `EmbeddingFunc` cannot be nested. Functions that have been decorated with `@wrap_embedding_func_with_attrs` (such as `openai_embed`, `ollama_embed`, etc.) cannot be wrapped again using `EmbeddingFunc()`. This is why we call `xxx_embed.func` (the underlying unwrapped function) instead of `xxx_embed` directly when creating custom embedding functions.
|
||||||
|
|
||||||
* **Low RAM GPUs**
|
* **Low RAM GPUs**
|
||||||
|
|
||||||
In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
|
In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
|
||||||
|
|
@ -1555,7 +1568,7 @@ Langfuse provides a drop-in replacement for the OpenAI client that automatically
|
||||||
pip install lightrag-hku
|
pip install lightrag-hku
|
||||||
pip install lightrag-hku[observability]
|
pip install lightrag-hku[observability]
|
||||||
|
|
||||||
# Or install from souce code with debug mode enabled
|
# Or install from source code with debug mode enabled
|
||||||
pip install -e .
|
pip install -e .
|
||||||
pip install -e ".[observability]"
|
pip install -e ".[observability]"
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
__api_version__ = "0256"
|
__api_version__ = "0258"
|
||||||
|
|
|
||||||
|
|
@ -365,8 +365,12 @@ def parse_args() -> argparse.Namespace:
|
||||||
|
|
||||||
# Inject model configuration
|
# Inject model configuration
|
||||||
args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
|
args.llm_model = get_env_value("LLM_MODEL", "mistral-nemo:latest")
|
||||||
args.embedding_model = get_env_value("EMBEDDING_MODEL", "bge-m3:latest")
|
# EMBEDDING_MODEL defaults to None - each binding will use its own default model
|
||||||
args.embedding_dim = get_env_value("EMBEDDING_DIM", 1024, int)
|
# e.g., OpenAI uses "text-embedding-3-small", Jina uses "jina-embeddings-v4"
|
||||||
|
args.embedding_model = get_env_value("EMBEDDING_MODEL", None, special_none=True)
|
||||||
|
# EMBEDDING_DIM defaults to None - each binding will use its own default dimension
|
||||||
|
# Value is inherited from provider defaults via wrap_embedding_func_with_attrs decorator
|
||||||
|
args.embedding_dim = get_env_value("EMBEDDING_DIM", None, int, special_none=True)
|
||||||
args.embedding_send_dim = get_env_value("EMBEDDING_SEND_DIM", False, bool)
|
args.embedding_send_dim = get_env_value("EMBEDDING_SEND_DIM", False, bool)
|
||||||
|
|
||||||
# Inject chunk configuration
|
# Inject chunk configuration
|
||||||
|
|
|
||||||
|
|
@ -672,6 +672,17 @@ def create_app(args):
|
||||||
2. Extracts max_token_size and embedding_dim from provider if it's an EmbeddingFunc
|
2. Extracts max_token_size and embedding_dim from provider if it's an EmbeddingFunc
|
||||||
3. Creates an optimized wrapper that calls the underlying function directly (avoiding double-wrapping)
|
3. Creates an optimized wrapper that calls the underlying function directly (avoiding double-wrapping)
|
||||||
4. Returns a properly configured EmbeddingFunc instance
|
4. Returns a properly configured EmbeddingFunc instance
|
||||||
|
|
||||||
|
Configuration Rules:
|
||||||
|
- When EMBEDDING_MODEL is not set: Uses provider's default model and dimension
|
||||||
|
(e.g., jina-embeddings-v4 with 2048 dims, text-embedding-3-small with 1536 dims)
|
||||||
|
- When EMBEDDING_MODEL is set to a custom model: User MUST also set EMBEDDING_DIM
|
||||||
|
to match the custom model's dimension (e.g., for jina-embeddings-v3, set EMBEDDING_DIM=1024)
|
||||||
|
|
||||||
|
Note: The embedding_dim parameter is automatically injected by EmbeddingFunc wrapper
|
||||||
|
when send_dimensions=True (enabled for Jina and Gemini bindings). This wrapper calls
|
||||||
|
the underlying provider function directly (.func) to avoid double-wrapping, so we must
|
||||||
|
explicitly pass embedding_dim to the provider's underlying function.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Step 1: Import provider function and extract default attributes
|
# Step 1: Import provider function and extract default attributes
|
||||||
|
|
@ -731,6 +742,7 @@ def create_app(args):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Step 3: Create optimized embedding function (calls underlying function directly)
|
# Step 3: Create optimized embedding function (calls underlying function directly)
|
||||||
|
# Note: When model is None, each binding will use its own default model
|
||||||
async def optimized_embedding_function(texts, embedding_dim=None):
|
async def optimized_embedding_function(texts, embedding_dim=None):
|
||||||
try:
|
try:
|
||||||
if binding == "lollms":
|
if binding == "lollms":
|
||||||
|
|
@ -742,9 +754,9 @@ def create_app(args):
|
||||||
if isinstance(lollms_embed, EmbeddingFunc)
|
if isinstance(lollms_embed, EmbeddingFunc)
|
||||||
else lollms_embed
|
else lollms_embed
|
||||||
)
|
)
|
||||||
return await actual_func(
|
# lollms embed_model is not used (server uses configured vectorizer)
|
||||||
texts, embed_model=model, host=host, api_key=api_key
|
# Only pass base_url and api_key
|
||||||
)
|
return await actual_func(texts, base_url=host, api_key=api_key)
|
||||||
elif binding == "ollama":
|
elif binding == "ollama":
|
||||||
from lightrag.llm.ollama import ollama_embed
|
from lightrag.llm.ollama import ollama_embed
|
||||||
|
|
||||||
|
|
@ -763,13 +775,16 @@ def create_app(args):
|
||||||
|
|
||||||
ollama_options = OllamaEmbeddingOptions.options_dict(args)
|
ollama_options = OllamaEmbeddingOptions.options_dict(args)
|
||||||
|
|
||||||
return await actual_func(
|
# Pass embed_model only if provided, let function use its default (bge-m3:latest)
|
||||||
texts,
|
kwargs = {
|
||||||
embed_model=model,
|
"texts": texts,
|
||||||
host=host,
|
"host": host,
|
||||||
api_key=api_key,
|
"api_key": api_key,
|
||||||
options=ollama_options,
|
"options": ollama_options,
|
||||||
)
|
}
|
||||||
|
if model:
|
||||||
|
kwargs["embed_model"] = model
|
||||||
|
return await actual_func(**kwargs)
|
||||||
elif binding == "azure_openai":
|
elif binding == "azure_openai":
|
||||||
from lightrag.llm.azure_openai import azure_openai_embed
|
from lightrag.llm.azure_openai import azure_openai_embed
|
||||||
|
|
||||||
|
|
@ -778,7 +793,11 @@ def create_app(args):
|
||||||
if isinstance(azure_openai_embed, EmbeddingFunc)
|
if isinstance(azure_openai_embed, EmbeddingFunc)
|
||||||
else azure_openai_embed
|
else azure_openai_embed
|
||||||
)
|
)
|
||||||
return await actual_func(texts, model=model, api_key=api_key)
|
# Pass model only if provided, let function use its default otherwise
|
||||||
|
kwargs = {"texts": texts, "api_key": api_key}
|
||||||
|
if model:
|
||||||
|
kwargs["model"] = model
|
||||||
|
return await actual_func(**kwargs)
|
||||||
elif binding == "aws_bedrock":
|
elif binding == "aws_bedrock":
|
||||||
from lightrag.llm.bedrock import bedrock_embed
|
from lightrag.llm.bedrock import bedrock_embed
|
||||||
|
|
||||||
|
|
@ -787,7 +806,11 @@ def create_app(args):
|
||||||
if isinstance(bedrock_embed, EmbeddingFunc)
|
if isinstance(bedrock_embed, EmbeddingFunc)
|
||||||
else bedrock_embed
|
else bedrock_embed
|
||||||
)
|
)
|
||||||
return await actual_func(texts, model=model)
|
# Pass model only if provided, let function use its default otherwise
|
||||||
|
kwargs = {"texts": texts}
|
||||||
|
if model:
|
||||||
|
kwargs["model"] = model
|
||||||
|
return await actual_func(**kwargs)
|
||||||
elif binding == "jina":
|
elif binding == "jina":
|
||||||
from lightrag.llm.jina import jina_embed
|
from lightrag.llm.jina import jina_embed
|
||||||
|
|
||||||
|
|
@ -796,12 +819,16 @@ def create_app(args):
|
||||||
if isinstance(jina_embed, EmbeddingFunc)
|
if isinstance(jina_embed, EmbeddingFunc)
|
||||||
else jina_embed
|
else jina_embed
|
||||||
)
|
)
|
||||||
return await actual_func(
|
# Pass model only if provided, let function use its default (jina-embeddings-v4)
|
||||||
texts,
|
kwargs = {
|
||||||
embedding_dim=embedding_dim,
|
"texts": texts,
|
||||||
base_url=host,
|
"embedding_dim": embedding_dim,
|
||||||
api_key=api_key,
|
"base_url": host,
|
||||||
)
|
"api_key": api_key,
|
||||||
|
}
|
||||||
|
if model:
|
||||||
|
kwargs["model"] = model
|
||||||
|
return await actual_func(**kwargs)
|
||||||
elif binding == "gemini":
|
elif binding == "gemini":
|
||||||
from lightrag.llm.gemini import gemini_embed
|
from lightrag.llm.gemini import gemini_embed
|
||||||
|
|
||||||
|
|
@ -819,14 +846,19 @@ def create_app(args):
|
||||||
|
|
||||||
gemini_options = GeminiEmbeddingOptions.options_dict(args)
|
gemini_options = GeminiEmbeddingOptions.options_dict(args)
|
||||||
|
|
||||||
return await actual_func(
|
# Pass model only if provided, let function use its default (gemini-embedding-001)
|
||||||
texts,
|
kwargs = {
|
||||||
model=model,
|
"texts": texts,
|
||||||
base_url=host,
|
"base_url": host,
|
||||||
api_key=api_key,
|
"api_key": api_key,
|
||||||
embedding_dim=embedding_dim,
|
"embedding_dim": embedding_dim,
|
||||||
task_type=gemini_options.get("task_type", "RETRIEVAL_DOCUMENT"),
|
"task_type": gemini_options.get(
|
||||||
)
|
"task_type", "RETRIEVAL_DOCUMENT"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
if model:
|
||||||
|
kwargs["model"] = model
|
||||||
|
return await actual_func(**kwargs)
|
||||||
else: # openai and compatible
|
else: # openai and compatible
|
||||||
from lightrag.llm.openai import openai_embed
|
from lightrag.llm.openai import openai_embed
|
||||||
|
|
||||||
|
|
@ -835,13 +867,16 @@ def create_app(args):
|
||||||
if isinstance(openai_embed, EmbeddingFunc)
|
if isinstance(openai_embed, EmbeddingFunc)
|
||||||
else openai_embed
|
else openai_embed
|
||||||
)
|
)
|
||||||
return await actual_func(
|
# Pass model only if provided, let function use its default (text-embedding-3-small)
|
||||||
texts,
|
kwargs = {
|
||||||
model=model,
|
"texts": texts,
|
||||||
base_url=host,
|
"base_url": host,
|
||||||
api_key=api_key,
|
"api_key": api_key,
|
||||||
embedding_dim=embedding_dim,
|
"embedding_dim": embedding_dim,
|
||||||
)
|
}
|
||||||
|
if model:
|
||||||
|
kwargs["model"] = model
|
||||||
|
return await actual_func(**kwargs)
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise Exception(f"Failed to import {binding} embedding: {e}")
|
raise Exception(f"Failed to import {binding} embedding: {e}")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -100,7 +100,7 @@ def main():
|
||||||
print("\nHow to fix:")
|
print("\nHow to fix:")
|
||||||
print(" Option 1 - Set environment variable before starting (recommended):")
|
print(" Option 1 - Set environment variable before starting (recommended):")
|
||||||
print(" export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES")
|
print(" export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES")
|
||||||
print(" lightrag-server")
|
print(" lightrag-gunicorn --workers 2")
|
||||||
print("\n Option 2 - Add to your shell profile (~/.zshrc or ~/.bash_profile):")
|
print("\n Option 2 - Add to your shell profile (~/.zshrc or ~/.bash_profile):")
|
||||||
print(" echo 'export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES' >> ~/.zshrc")
|
print(" echo 'export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES' >> ~/.zshrc")
|
||||||
print(" source ~/.zshrc")
|
print(" source ~/.zshrc")
|
||||||
|
|
|
||||||
|
|
@ -384,7 +384,7 @@ class PostgreSQLDB:
|
||||||
async def configure_age_extension(connection: asyncpg.Connection) -> None:
|
async def configure_age_extension(connection: asyncpg.Connection) -> None:
|
||||||
"""Create AGE extension if it doesn't exist for graph operations."""
|
"""Create AGE extension if it doesn't exist for graph operations."""
|
||||||
try:
|
try:
|
||||||
await connection.execute("CREATE EXTENSION IF NOT EXISTS age") # type: ignore
|
await connection.execute("CREATE EXTENSION IF NOT EXISTS AGE CASCADE") # type: ignore
|
||||||
logger.info("PostgreSQL, AGE extension enabled")
|
logger.info("PostgreSQL, AGE extension enabled")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Could not create AGE extension: {e}")
|
logger.warning(f"Could not create AGE extension: {e}")
|
||||||
|
|
|
||||||
|
|
@ -69,6 +69,7 @@ async def fetch_data(url, headers, data):
|
||||||
)
|
)
|
||||||
async def jina_embed(
|
async def jina_embed(
|
||||||
texts: list[str],
|
texts: list[str],
|
||||||
|
model: str = "jina-embeddings-v4",
|
||||||
embedding_dim: int = 2048,
|
embedding_dim: int = 2048,
|
||||||
late_chunking: bool = False,
|
late_chunking: bool = False,
|
||||||
base_url: str = None,
|
base_url: str = None,
|
||||||
|
|
@ -78,6 +79,8 @@ async def jina_embed(
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
texts: List of texts to embed.
|
texts: List of texts to embed.
|
||||||
|
model: The Jina embedding model to use (default: jina-embeddings-v4).
|
||||||
|
Supported models: jina-embeddings-v3, jina-embeddings-v4, etc.
|
||||||
embedding_dim: The embedding dimensions (default: 2048 for jina-embeddings-v4).
|
embedding_dim: The embedding dimensions (default: 2048 for jina-embeddings-v4).
|
||||||
**IMPORTANT**: This parameter is automatically injected by the EmbeddingFunc wrapper.
|
**IMPORTANT**: This parameter is automatically injected by the EmbeddingFunc wrapper.
|
||||||
Do NOT manually pass this parameter when calling the function directly.
|
Do NOT manually pass this parameter when calling the function directly.
|
||||||
|
|
@ -107,7 +110,7 @@ async def jina_embed(
|
||||||
"Authorization": f"Bearer {os.environ['JINA_API_KEY']}",
|
"Authorization": f"Bearer {os.environ['JINA_API_KEY']}",
|
||||||
}
|
}
|
||||||
data = {
|
data = {
|
||||||
"model": "jina-embeddings-v4",
|
"model": model,
|
||||||
"task": "text-matching",
|
"task": "text-matching",
|
||||||
"dimensions": embedding_dim,
|
"dimensions": embedding_dim,
|
||||||
"embedding_type": "base64",
|
"embedding_type": "base64",
|
||||||
|
|
|
||||||
|
|
@ -173,7 +173,9 @@ async def ollama_model_complete(
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
|
@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=8192)
|
||||||
async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray:
|
async def ollama_embed(
|
||||||
|
texts: list[str], embed_model: str = "bge-m3:latest", **kwargs
|
||||||
|
) -> np.ndarray:
|
||||||
api_key = kwargs.pop("api_key", None)
|
api_key = kwargs.pop("api_key", None)
|
||||||
if not api_key:
|
if not api_key:
|
||||||
api_key = os.getenv("OLLAMA_API_KEY")
|
api_key = os.getenv("OLLAMA_API_KEY")
|
||||||
|
|
|
||||||
|
|
@ -887,7 +887,7 @@ async def azure_openai_complete(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@wrap_embedding_func_with_attrs(embedding_dim=1536)
|
@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
|
||||||
async def azure_openai_embed(
|
async def azure_openai_embed(
|
||||||
texts: list[str],
|
texts: list[str],
|
||||||
model: str | None = None,
|
model: str | None = None,
|
||||||
|
|
|
||||||
|
|
@ -144,7 +144,8 @@ export const ChatMessage = ({
|
||||||
? displayContent
|
? displayContent
|
||||||
: message.content || ''
|
: message.content || ''
|
||||||
|
|
||||||
// Load KaTeX dynamically
|
// Load KaTeX rehype plugin dynamically
|
||||||
|
// Note: KaTeX extensions (mhchem, copy-tex) are imported statically in main.tsx
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const loadKaTeX = async () => {
|
const loadKaTeX = async () => {
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,9 @@ import './index.css'
|
||||||
import AppRouter from './AppRouter'
|
import AppRouter from './AppRouter'
|
||||||
import './i18n.ts'
|
import './i18n.ts'
|
||||||
import 'katex/dist/katex.min.css'
|
import 'katex/dist/katex.min.css'
|
||||||
|
// Import KaTeX extensions at app startup to ensure they are registered before any rendering
|
||||||
|
import 'katex/contrib/mhchem' // Chemistry formulas: \ce{} and \pu{}
|
||||||
|
import 'katex/contrib/copy-tex' // Allow copying rendered formulas as LaTeX source
|
||||||
|
|
||||||
createRoot(document.getElementById('root')!).render(
|
createRoot(document.getElementById('root')!).render(
|
||||||
<StrictMode>
|
<StrictMode>
|
||||||
|
|
|
||||||
1
lightrag_webui/src/types/katex.d.ts
vendored
1
lightrag_webui/src/types/katex.d.ts
vendored
|
|
@ -1 +1,2 @@
|
||||||
declare module 'katex/contrib/mhchem'
|
declare module 'katex/contrib/mhchem'
|
||||||
|
declare module 'katex/contrib/copy-tex'
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,10 @@ export default defineConfig({
|
||||||
resolve: {
|
resolve: {
|
||||||
alias: {
|
alias: {
|
||||||
'@': path.resolve(__dirname, './src')
|
'@': path.resolve(__dirname, './src')
|
||||||
}
|
},
|
||||||
|
// Force all modules to use the same katex instance
|
||||||
|
// This ensures mhchem extension registered in main.tsx is available to rehype-katex
|
||||||
|
dedupe: ['katex']
|
||||||
},
|
},
|
||||||
// base: import.meta.env.VITE_BASE_URL || '/webui/',
|
// base: import.meta.env.VITE_BASE_URL || '/webui/',
|
||||||
base: webuiPrefix,
|
base: webuiPrefix,
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,6 @@ classifiers = [
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"configparser",
|
"configparser",
|
||||||
"future",
|
|
||||||
"google-api-core>=2.0.0,<3.0.0",
|
"google-api-core>=2.0.0,<3.0.0",
|
||||||
"google-genai>=1.0.0,<2.0.0",
|
"google-genai>=1.0.0,<2.0.0",
|
||||||
"json_repair",
|
"json_repair",
|
||||||
|
|
@ -54,7 +53,6 @@ api = [
|
||||||
# Core dependencies
|
# Core dependencies
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"configparser",
|
"configparser",
|
||||||
"future",
|
|
||||||
"json_repair",
|
"json_repair",
|
||||||
"nano-vectordb",
|
"nano-vectordb",
|
||||||
"networkx",
|
"networkx",
|
||||||
|
|
@ -78,9 +76,9 @@ api = [
|
||||||
"distro",
|
"distro",
|
||||||
"fastapi",
|
"fastapi",
|
||||||
"httpcore",
|
"httpcore",
|
||||||
"httpx",
|
"httpx>=0.28.1",
|
||||||
"jiter",
|
"jiter",
|
||||||
"passlib[bcrypt]",
|
"bcrypt>=4.0.0",
|
||||||
"psutil",
|
"psutil",
|
||||||
"PyJWT>=2.8.0,<3.0.0",
|
"PyJWT>=2.8.0,<3.0.0",
|
||||||
"python-jose[cryptography]",
|
"python-jose[cryptography]",
|
||||||
|
|
@ -132,16 +130,18 @@ offline = [
|
||||||
"lightrag-hku[api,offline-storage,offline-llm]",
|
"lightrag-hku[api,offline-storage,offline-llm]",
|
||||||
]
|
]
|
||||||
|
|
||||||
evaluation = [
|
test = [
|
||||||
# Test framework dependencies
|
"lightrag-hku[api]",
|
||||||
"pytest>=8.4.2",
|
"pytest>=8.4.2",
|
||||||
"pytest-asyncio>=1.2.0",
|
"pytest-asyncio>=1.2.0",
|
||||||
"pre-commit",
|
"pre-commit",
|
||||||
"ruff",
|
"ruff",
|
||||||
# RAG evaluation dependencies (RAGAS framework)
|
]
|
||||||
|
|
||||||
|
evaluation = [
|
||||||
|
"lightrag-hku[api]",
|
||||||
"ragas>=0.3.7",
|
"ragas>=0.3.7",
|
||||||
"datasets>=4.3.0",
|
"datasets>=4.3.0",
|
||||||
"httpx>=0.28.1",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
observability = [
|
observability = [
|
||||||
|
|
|
||||||
34
uv.lock
generated
34
uv.lock
generated
|
|
@ -1334,15 +1334,6 @@ http = [
|
||||||
{ name = "aiohttp" },
|
{ name = "aiohttp" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "future"
|
|
||||||
version = "1.0.0"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/a7/b2/4140c69c6a66432916b26158687e821ba631a4c9273c474343badf84d3ba/future-1.0.0.tar.gz", hash = "sha256:bd2968309307861edae1458a4f8a4f3598c03be43b97521076aebf5d94c07b05", size = 1228490, upload-time = "2024-02-21T11:52:38.461Z" }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/da/71/ae30dadffc90b9006d77af76b393cb9dfbfc9629f339fc1574a1c52e6806/future-1.0.0-py3-none-any.whl", hash = "sha256:929292d34f5872e70396626ef385ec22355a1fae8ad29e1a734c3e43f9fbc216", size = 491326, upload-time = "2024-02-21T11:52:35.956Z" },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "gitdb"
|
name = "gitdb"
|
||||||
version = "4.0.12"
|
version = "4.0.12"
|
||||||
|
|
@ -2542,7 +2533,6 @@ source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "aiohttp" },
|
{ name = "aiohttp" },
|
||||||
{ name = "configparser" },
|
{ name = "configparser" },
|
||||||
{ name = "future" },
|
|
||||||
{ name = "google-api-core" },
|
{ name = "google-api-core" },
|
||||||
{ name = "google-genai" },
|
{ name = "google-genai" },
|
||||||
{ name = "json-repair" },
|
{ name = "json-repair" },
|
||||||
|
|
@ -2567,10 +2557,10 @@ api = [
|
||||||
{ name = "aiohttp" },
|
{ name = "aiohttp" },
|
||||||
{ name = "ascii-colors" },
|
{ name = "ascii-colors" },
|
||||||
{ name = "asyncpg" },
|
{ name = "asyncpg" },
|
||||||
|
{ name = "bcrypt" },
|
||||||
{ name = "configparser" },
|
{ name = "configparser" },
|
||||||
{ name = "distro" },
|
{ name = "distro" },
|
||||||
{ name = "fastapi" },
|
{ name = "fastapi" },
|
||||||
{ name = "future" },
|
|
||||||
{ name = "google-api-core" },
|
{ name = "google-api-core" },
|
||||||
{ name = "google-genai" },
|
{ name = "google-genai" },
|
||||||
{ name = "gunicorn" },
|
{ name = "gunicorn" },
|
||||||
|
|
@ -2585,7 +2575,6 @@ api = [
|
||||||
{ name = "openai" },
|
{ name = "openai" },
|
||||||
{ name = "openpyxl" },
|
{ name = "openpyxl" },
|
||||||
{ name = "pandas" },
|
{ name = "pandas" },
|
||||||
{ name = "passlib", extra = ["bcrypt"] },
|
|
||||||
{ name = "pipmaster" },
|
{ name = "pipmaster" },
|
||||||
{ name = "psutil" },
|
{ name = "psutil" },
|
||||||
{ name = "pycryptodome" },
|
{ name = "pycryptodome" },
|
||||||
|
|
@ -2627,10 +2616,10 @@ offline = [
|
||||||
{ name = "anthropic" },
|
{ name = "anthropic" },
|
||||||
{ name = "ascii-colors" },
|
{ name = "ascii-colors" },
|
||||||
{ name = "asyncpg" },
|
{ name = "asyncpg" },
|
||||||
|
{ name = "bcrypt" },
|
||||||
{ name = "configparser" },
|
{ name = "configparser" },
|
||||||
{ name = "distro" },
|
{ name = "distro" },
|
||||||
{ name = "fastapi" },
|
{ name = "fastapi" },
|
||||||
{ name = "future" },
|
|
||||||
{ name = "google-api-core" },
|
{ name = "google-api-core" },
|
||||||
{ name = "google-genai" },
|
{ name = "google-genai" },
|
||||||
{ name = "gunicorn" },
|
{ name = "gunicorn" },
|
||||||
|
|
@ -2648,7 +2637,6 @@ offline = [
|
||||||
{ name = "openai" },
|
{ name = "openai" },
|
||||||
{ name = "openpyxl" },
|
{ name = "openpyxl" },
|
||||||
{ name = "pandas" },
|
{ name = "pandas" },
|
||||||
{ name = "passlib", extra = ["bcrypt"] },
|
|
||||||
{ name = "pipmaster" },
|
{ name = "pipmaster" },
|
||||||
{ name = "psutil" },
|
{ name = "psutil" },
|
||||||
{ name = "pycryptodome" },
|
{ name = "pycryptodome" },
|
||||||
|
|
@ -2714,14 +2702,13 @@ requires-dist = [
|
||||||
{ name = "ascii-colors", marker = "extra == 'api'" },
|
{ name = "ascii-colors", marker = "extra == 'api'" },
|
||||||
{ name = "asyncpg", marker = "extra == 'api'" },
|
{ name = "asyncpg", marker = "extra == 'api'" },
|
||||||
{ name = "asyncpg", marker = "extra == 'offline-storage'", specifier = ">=0.29.0,<1.0.0" },
|
{ name = "asyncpg", marker = "extra == 'offline-storage'", specifier = ">=0.29.0,<1.0.0" },
|
||||||
|
{ name = "bcrypt", marker = "extra == 'api'", specifier = ">=4.0.0" },
|
||||||
{ name = "configparser" },
|
{ name = "configparser" },
|
||||||
{ name = "configparser", marker = "extra == 'api'" },
|
{ name = "configparser", marker = "extra == 'api'" },
|
||||||
{ name = "datasets", marker = "extra == 'evaluation'", specifier = ">=4.3.0" },
|
{ name = "datasets", marker = "extra == 'evaluation'", specifier = ">=4.3.0" },
|
||||||
{ name = "distro", marker = "extra == 'api'" },
|
{ name = "distro", marker = "extra == 'api'" },
|
||||||
{ name = "docling", marker = "sys_platform != 'darwin' and extra == 'docling'", specifier = ">=2.0.0,<3.0.0" },
|
{ name = "docling", marker = "sys_platform != 'darwin' and extra == 'docling'", specifier = ">=2.0.0,<3.0.0" },
|
||||||
{ name = "fastapi", marker = "extra == 'api'" },
|
{ name = "fastapi", marker = "extra == 'api'" },
|
||||||
{ name = "future" },
|
|
||||||
{ name = "future", marker = "extra == 'api'" },
|
|
||||||
{ name = "google-api-core", specifier = ">=2.0.0,<3.0.0" },
|
{ name = "google-api-core", specifier = ">=2.0.0,<3.0.0" },
|
||||||
{ name = "google-api-core", marker = "extra == 'api'", specifier = ">=2.0.0,<3.0.0" },
|
{ name = "google-api-core", marker = "extra == 'api'", specifier = ">=2.0.0,<3.0.0" },
|
||||||
{ name = "google-api-core", marker = "extra == 'offline-llm'", specifier = ">=2.0.0,<3.0.0" },
|
{ name = "google-api-core", marker = "extra == 'offline-llm'", specifier = ">=2.0.0,<3.0.0" },
|
||||||
|
|
@ -2751,7 +2738,6 @@ requires-dist = [
|
||||||
{ name = "openpyxl", marker = "extra == 'api'", specifier = ">=3.0.0,<4.0.0" },
|
{ name = "openpyxl", marker = "extra == 'api'", specifier = ">=3.0.0,<4.0.0" },
|
||||||
{ name = "pandas", specifier = ">=2.0.0,<2.4.0" },
|
{ name = "pandas", specifier = ">=2.0.0,<2.4.0" },
|
||||||
{ name = "pandas", marker = "extra == 'api'", specifier = ">=2.0.0,<2.4.0" },
|
{ name = "pandas", marker = "extra == 'api'", specifier = ">=2.0.0,<2.4.0" },
|
||||||
{ name = "passlib", extras = ["bcrypt"], marker = "extra == 'api'" },
|
|
||||||
{ name = "pipmaster" },
|
{ name = "pipmaster" },
|
||||||
{ name = "pipmaster", marker = "extra == 'api'" },
|
{ name = "pipmaster", marker = "extra == 'api'" },
|
||||||
{ name = "pre-commit", marker = "extra == 'evaluation'" },
|
{ name = "pre-commit", marker = "extra == 'evaluation'" },
|
||||||
|
|
@ -4110,20 +4096,6 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436, upload-time = "2024-09-20T13:09:48.112Z" },
|
{ url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436, upload-time = "2024-09-20T13:09:48.112Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "passlib"
|
|
||||||
version = "1.7.4"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/b6/06/9da9ee59a67fae7761aab3ccc84fa4f3f33f125b370f1ccdb915bf967c11/passlib-1.7.4.tar.gz", hash = "sha256:defd50f72b65c5402ab2c573830a6978e5f202ad0d984793c8dde2c4152ebe04", size = 689844, upload-time = "2020-10-08T19:00:52.121Z" }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/3b/a4/ab6b7589382ca3df236e03faa71deac88cae040af60c071a78d254a62172/passlib-1.7.4-py2.py3-none-any.whl", hash = "sha256:aa6bca462b8d8bda89c70b382f0c298a20b5560af6cbfa2dce410c0a2fb669f1", size = 525554, upload-time = "2020-10-08T19:00:49.856Z" },
|
|
||||||
]
|
|
||||||
|
|
||||||
[package.optional-dependencies]
|
|
||||||
bcrypt = [
|
|
||||||
{ name = "bcrypt" },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pillow"
|
name = "pillow"
|
||||||
version = "11.3.0"
|
version = "11.3.0"
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue