Merge pull request #1778 from danielaskdd/add-ollama-num-ctx
feat: Refine summary logic and add dedicated Ollama num_ctx config
This commit is contained in:
commit
375bfd57a4
9 changed files with 32 additions and 19 deletions
|
|
@ -250,7 +250,7 @@ if __name__ == "__main__":
|
|||
| **embedding_func_max_async** | `int` | 最大并发异步嵌入进程数 | `16` |
|
||||
| **llm_model_func** | `callable` | LLM生成的函数 | `gpt_4o_mini_complete` |
|
||||
| **llm_model_name** | `str` | 用于生成的LLM模型名称 | `meta-llama/Llama-3.2-1B-Instruct` |
|
||||
| **llm_model_max_token_size** | `int` | LLM生成的最大令牌大小(影响实体关系摘要) | `32768`(默认值由环境变量MAX_TOKENS更改) |
|
||||
| **llm_model_max_token_size** | `int` | 生成实体关系摘要时送给LLM的最大令牌数 | `32000`(默认值由环境变量MAX_TOKENS更改) |
|
||||
| **llm_model_max_async** | `int` | 最大并发异步LLM进程数 | `4`(默认值由环境变量MAX_ASYNC更改) |
|
||||
| **llm_model_kwargs** | `dict` | LLM生成的附加参数 | |
|
||||
| **vector_db_storage_cls_kwargs** | `dict` | 向量数据库的附加参数,如设置节点和关系检索的阈值 | cosine_better_than_threshold: 0.2(默认值由环境变量COSINE_THRESHOLD更改) |
|
||||
|
|
|
|||
|
|
@ -257,7 +257,7 @@ A full list of LightRAG init parameters:
|
|||
| **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
|
||||
| **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
|
||||
| **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
|
||||
| **llm_model_max_token_size** | `int` | Maximum token size for LLM generation (affects entity relation summaries) | `32768`(default value changed by env var MAX_TOKENS) |
|
||||
| **llm_model_max_token_size** | `int` | Maximum tokens send to LLM to generate entity relation summaries | `32000`(default value changed by env var MAX_TOKENS) |
|
||||
| **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`(default value changed by env var MAX_ASYNC) |
|
||||
| **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | |
|
||||
| **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) |
|
||||
|
|
|
|||
|
|
@ -46,7 +46,6 @@ OLLAMA_EMULATING_MODEL_TAG=latest
|
|||
### Chunk size for document splitting, 500~1500 is recommended
|
||||
# CHUNK_SIZE=1200
|
||||
# CHUNK_OVERLAP_SIZE=100
|
||||
# MAX_TOKEN_SUMMARY=500
|
||||
|
||||
### RAG Query Configuration
|
||||
# HISTORY_TURNS=3
|
||||
|
|
@ -91,8 +90,7 @@ TEMPERATURE=0
|
|||
### Max concurrency requests of LLM
|
||||
MAX_ASYNC=4
|
||||
### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model)
|
||||
### MAX_TOKENS: set as num_ctx option for Ollama by API Server
|
||||
MAX_TOKENS=32768
|
||||
MAX_TOKENS=32000
|
||||
### LLM Binding type: openai, ollama, lollms, azure_openai
|
||||
LLM_BINDING=openai
|
||||
LLM_MODEL=gpt-4o
|
||||
|
|
@ -101,6 +99,8 @@ LLM_BINDING_API_KEY=your_api_key
|
|||
### Optional for Azure
|
||||
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
|
||||
# AZURE_OPENAI_DEPLOYMENT=gpt-4o
|
||||
### set as num_ctx option for Ollama LLM
|
||||
# OLLAMA_NUM_CTX=32768
|
||||
|
||||
### Embedding Configuration
|
||||
### Embedding Binding type: openai, ollama, lollms, azure_openai
|
||||
|
|
|
|||
|
|
@ -54,8 +54,8 @@ LLM_BINDING=openai
|
|||
LLM_MODEL=gpt-4o
|
||||
LLM_BINDING_HOST=https://api.openai.com/v1
|
||||
LLM_BINDING_API_KEY=your_api_key
|
||||
### 发送给 LLM 的最大 token 数(小于模型上下文大小)
|
||||
MAX_TOKENS=32768
|
||||
### 发送给 LLM 进行实体关系摘要的最大 token 数(小于模型上下文大小)
|
||||
MAX_TOKENS=32000
|
||||
|
||||
EMBEDDING_BINDING=ollama
|
||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||
|
|
@ -71,8 +71,10 @@ LLM_BINDING=ollama
|
|||
LLM_MODEL=mistral-nemo:latest
|
||||
LLM_BINDING_HOST=http://localhost:11434
|
||||
# LLM_BINDING_API_KEY=your_api_key
|
||||
### 发送给 LLM 的最大 token 数(基于您的 Ollama 服务器容量)
|
||||
MAX_TOKENS=8192
|
||||
### 发送给 LLM 进行实体关系摘要的最大 token 数(小于模型上下文大小)
|
||||
MAX_TOKENS=7500
|
||||
### Ollama 服务器上下文 token 数(基于您的 Ollama 服务器容量)
|
||||
OLLAMA_NUM_CTX=8192
|
||||
|
||||
EMBEDDING_BINDING=ollama
|
||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||
|
|
|
|||
|
|
@ -71,8 +71,10 @@ LLM_BINDING=ollama
|
|||
LLM_MODEL=mistral-nemo:latest
|
||||
LLM_BINDING_HOST=http://localhost:11434
|
||||
# LLM_BINDING_API_KEY=your_api_key
|
||||
### Max tokens sent to LLM (based on your Ollama Server capacity)
|
||||
MAX_TOKENS=8192
|
||||
### Max tokens sent to LLM for entity relation description summarization (Less than LLM context length)
|
||||
MAX_TOKENS=7500
|
||||
### Ollama Server context length
|
||||
OLLAMA_NUM_CTX=8192
|
||||
|
||||
EMBEDDING_BINDING=ollama
|
||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ def parse_args() -> argparse.Namespace:
|
|||
parser.add_argument(
|
||||
"--max-tokens",
|
||||
type=int,
|
||||
default=get_env_value("MAX_TOKENS", 32768, int),
|
||||
default=get_env_value("MAX_TOKENS", 32000, int),
|
||||
help="Maximum token size (default: from env or 32768)",
|
||||
)
|
||||
|
||||
|
|
@ -270,6 +270,9 @@ def parse_args() -> argparse.Namespace:
|
|||
args.llm_binding = "openai"
|
||||
args.embedding_binding = "ollama"
|
||||
|
||||
# Ollama ctx_num
|
||||
args.ollama_num_ctx = get_env_value("OLLAMA_NUM_CTX", 32768, int)
|
||||
|
||||
args.llm_binding_host = get_env_value(
|
||||
"LLM_BINDING_HOST", get_default_host(args.llm_binding)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -336,7 +336,7 @@ def create_app(args):
|
|||
llm_model_kwargs={
|
||||
"host": args.llm_binding_host,
|
||||
"timeout": args.timeout,
|
||||
"options": {"num_ctx": args.max_tokens},
|
||||
"options": {"num_ctx": args.ollama_num_ctx},
|
||||
"api_key": args.llm_binding_api_key,
|
||||
}
|
||||
if args.llm_binding == "lollms" or args.llm_binding == "ollama"
|
||||
|
|
|
|||
|
|
@ -231,7 +231,7 @@ class LightRAG:
|
|||
llm_model_name: str = field(default="gpt-4o-mini")
|
||||
"""Name of the LLM model used for generating responses."""
|
||||
|
||||
llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 32768)))
|
||||
llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 32000)))
|
||||
"""Maximum number of tokens allowed per LLM response."""
|
||||
|
||||
llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4)))
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ async def _handle_entity_relation_summary(
|
|||
|
||||
tokenizer: Tokenizer = global_config["tokenizer"]
|
||||
llm_max_tokens = global_config["llm_model_max_token_size"]
|
||||
summary_max_tokens = global_config["summary_to_max_tokens"]
|
||||
# summary_max_tokens = global_config["summary_to_max_tokens"]
|
||||
|
||||
language = global_config["addon_params"].get(
|
||||
"language", PROMPTS["DEFAULT_LANGUAGE"]
|
||||
|
|
@ -145,7 +145,7 @@ async def _handle_entity_relation_summary(
|
|||
use_prompt,
|
||||
use_llm_func,
|
||||
llm_response_cache=llm_response_cache,
|
||||
max_tokens=summary_max_tokens,
|
||||
# max_tokens=summary_max_tokens,
|
||||
cache_type="extract",
|
||||
)
|
||||
return summary
|
||||
|
|
@ -687,7 +687,10 @@ async def _rebuild_single_entity(
|
|||
|
||||
# Helper function to generate final description with optional LLM summary
|
||||
async def _generate_final_description(combined_description: str) -> str:
|
||||
if len(combined_description) > global_config["summary_to_max_tokens"]:
|
||||
force_llm_summary_on_merge = global_config["force_llm_summary_on_merge"]
|
||||
num_fragment = combined_description.count(GRAPH_FIELD_SEP) + 1
|
||||
|
||||
if num_fragment >= force_llm_summary_on_merge:
|
||||
return await _handle_entity_relation_summary(
|
||||
entity_name,
|
||||
combined_description,
|
||||
|
|
@ -842,8 +845,11 @@ async def _rebuild_single_relationship(
|
|||
# )
|
||||
weight = sum(weights) if weights else current_relationship.get("weight", 1.0)
|
||||
|
||||
# Use summary if description is too long
|
||||
if len(combined_description) > global_config["summary_to_max_tokens"]:
|
||||
# Use summary if description has too many fragments
|
||||
force_llm_summary_on_merge = global_config["force_llm_summary_on_merge"]
|
||||
num_fragment = combined_description.count(GRAPH_FIELD_SEP) + 1
|
||||
|
||||
if num_fragment >= force_llm_summary_on_merge:
|
||||
final_description = await _handle_entity_relation_summary(
|
||||
f"{src}-{tgt}",
|
||||
combined_description,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue