cherry-pick c14f25b7
This commit is contained in:
parent
3b986f046f
commit
44a05f7a25
3 changed files with 168 additions and 33 deletions
67
env.example
67
env.example
|
|
@ -50,6 +50,8 @@ OLLAMA_EMULATING_MODEL_TAG=latest
|
||||||
# JWT_ALGORITHM=HS256
|
# JWT_ALGORITHM=HS256
|
||||||
|
|
||||||
### API-Key to access LightRAG Server API
|
### API-Key to access LightRAG Server API
|
||||||
|
### Use this key in HTTP requests with the 'X-API-Key' header
|
||||||
|
### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query
|
||||||
# LIGHTRAG_API_KEY=your-secure-api-key-here
|
# LIGHTRAG_API_KEY=your-secure-api-key-here
|
||||||
# WHITELIST_PATHS=/health,/api/*
|
# WHITELIST_PATHS=/health,/api/*
|
||||||
|
|
||||||
|
|
@ -119,6 +121,9 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
|
||||||
### Document processing output language: English, Chinese, French, German ...
|
### Document processing output language: English, Chinese, French, German ...
|
||||||
SUMMARY_LANGUAGE=English
|
SUMMARY_LANGUAGE=English
|
||||||
|
|
||||||
|
### PDF decryption password for protected PDF files
|
||||||
|
# PDF_DECRYPT_PASSWORD=your_pdf_password_here
|
||||||
|
|
||||||
### Entity types that the LLM will attempt to recognize
|
### Entity types that the LLM will attempt to recognize
|
||||||
# ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]'
|
# ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]'
|
||||||
|
|
||||||
|
|
@ -142,8 +147,9 @@ SUMMARY_LANGUAGE=English
|
||||||
### FIFO: First in first out
|
### FIFO: First in first out
|
||||||
### KEEP: Keep oldest (less merge action and faster)
|
### KEEP: Keep oldest (less merge action and faster)
|
||||||
# SOURCE_IDS_LIMIT_METHOD=FIFO
|
# SOURCE_IDS_LIMIT_METHOD=FIFO
|
||||||
### Maximum number of file paths stored in entity/relation file_path field
|
|
||||||
# MAX_FILE_PATHS=30
|
# Maximum number of file paths stored in entity/relation file_path field (For displayed only, does not affect query performance)
|
||||||
|
# MAX_FILE_PATHS=100
|
||||||
|
|
||||||
### maximum number of related chunks per source entity or relation
|
### maximum number of related chunks per source entity or relation
|
||||||
### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
|
### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
|
||||||
|
|
@ -162,10 +168,11 @@ MAX_PARALLEL_INSERT=2
|
||||||
### Num of chunks send to Embedding in single request
|
### Num of chunks send to Embedding in single request
|
||||||
# EMBEDDING_BATCH_NUM=10
|
# EMBEDDING_BATCH_NUM=10
|
||||||
|
|
||||||
###########################################################
|
###########################################################################
|
||||||
### LLM Configuration
|
### LLM Configuration
|
||||||
### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
|
### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini
|
||||||
###########################################################
|
### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service
|
||||||
|
###########################################################################
|
||||||
### LLM request timeout setting for all llm (0 means no timeout for Ollma)
|
### LLM request timeout setting for all llm (0 means no timeout for Ollma)
|
||||||
# LLM_TIMEOUT=180
|
# LLM_TIMEOUT=180
|
||||||
|
|
||||||
|
|
@ -184,6 +191,15 @@ LLM_BINDING_API_KEY=your_api_key
|
||||||
# LLM_BINDING_API_KEY=your_api_key
|
# LLM_BINDING_API_KEY=your_api_key
|
||||||
# LLM_BINDING=openai
|
# LLM_BINDING=openai
|
||||||
|
|
||||||
|
### Gemini example
|
||||||
|
# LLM_BINDING=gemini
|
||||||
|
# LLM_MODEL=gemini-flash-latest
|
||||||
|
# LLM_BINDING_API_KEY=your_gemini_api_key
|
||||||
|
# LLM_BINDING_HOST=https://generativelanguage.googleapis.com
|
||||||
|
GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
|
||||||
|
# GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
|
||||||
|
# GEMINI_LLM_TEMPERATURE=0.7
|
||||||
|
|
||||||
### OpenAI Compatible API Specific Parameters
|
### OpenAI Compatible API Specific Parameters
|
||||||
### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
|
### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
|
||||||
# OPENAI_LLM_TEMPERATURE=0.9
|
# OPENAI_LLM_TEMPERATURE=0.9
|
||||||
|
|
@ -207,6 +223,7 @@ OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
|
||||||
# OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
|
# OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
|
||||||
|
|
||||||
### use the following command to see all support options for Ollama LLM
|
### use the following command to see all support options for Ollama LLM
|
||||||
|
### If LightRAG deployed in Docker uses host.docker.internal instead of localhost in LLM_BINDING_HOST
|
||||||
### lightrag-server --llm-binding ollama --help
|
### lightrag-server --llm-binding ollama --help
|
||||||
### Ollama Server Specific Parameters
|
### Ollama Server Specific Parameters
|
||||||
### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000
|
### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000
|
||||||
|
|
@ -219,16 +236,25 @@ OLLAMA_LLM_NUM_CTX=32768
|
||||||
### Bedrock Specific Parameters
|
### Bedrock Specific Parameters
|
||||||
# BEDROCK_LLM_TEMPERATURE=1.0
|
# BEDROCK_LLM_TEMPERATURE=1.0
|
||||||
|
|
||||||
####################################################################################
|
#######################################################################################
|
||||||
### Embedding Configuration (Should not be changed after the first file processed)
|
### Embedding Configuration (Should not be changed after the first file processed)
|
||||||
### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
|
### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
|
||||||
####################################################################################
|
### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service
|
||||||
|
#######################################################################################
|
||||||
# EMBEDDING_TIMEOUT=30
|
# EMBEDDING_TIMEOUT=30
|
||||||
|
|
||||||
|
### Control whether to send embedding_dim parameter to embedding API
|
||||||
|
### IMPORTANT: Jina ALWAYS sends dimension parameter (API requirement) - this setting is ignored for Jina
|
||||||
|
### For OpenAI: Set to 'true' to enable dynamic dimension adjustment
|
||||||
|
### For OpenAI: Set to 'false' (default) to disable sending dimension parameter
|
||||||
|
### Note: Automatically ignored for backends that don't support dimension parameter (e.g., Ollama)
|
||||||
|
# EMBEDDING_SEND_DIM=false
|
||||||
|
|
||||||
EMBEDDING_BINDING=ollama
|
EMBEDDING_BINDING=ollama
|
||||||
EMBEDDING_MODEL=bge-m3:latest
|
EMBEDDING_MODEL=bge-m3:latest
|
||||||
EMBEDDING_DIM=1024
|
EMBEDDING_DIM=1024
|
||||||
EMBEDDING_BINDING_API_KEY=your_api_key
|
EMBEDDING_BINDING_API_KEY=your_api_key
|
||||||
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
|
# If LightRAG deployed in Docker uses host.docker.internal instead of localhost
|
||||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||||
|
|
||||||
### OpenAI compatible (VoyageAI embedding openai compatible)
|
### OpenAI compatible (VoyageAI embedding openai compatible)
|
||||||
|
|
@ -390,21 +416,30 @@ MEMGRAPH_DATABASE=memgraph
|
||||||
### Evaluation Configuration
|
### Evaluation Configuration
|
||||||
############################
|
############################
|
||||||
### RAGAS evaluation models (used for RAG quality assessment)
|
### RAGAS evaluation models (used for RAG quality assessment)
|
||||||
|
### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible
|
||||||
### Default uses OpenAI models for evaluation
|
### Default uses OpenAI models for evaluation
|
||||||
# EVAL_LLM_MODEL=gpt-4.1
|
|
||||||
# EVAL_EMBEDDING_MODEL=text-embedding-3-large
|
### LLM Configuration for Evaluation
|
||||||
### API key for evaluation (fallback to OPENAI_API_KEY if not set)
|
# EVAL_LLM_MODEL=gpt-4o-mini
|
||||||
|
### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set)
|
||||||
# EVAL_LLM_BINDING_API_KEY=your_api_key
|
# EVAL_LLM_BINDING_API_KEY=your_api_key
|
||||||
### Custom endpoint for evaluation models (optional, for OpenAI-compatible services)
|
### Custom OpenAI-compatible endpoint for LLM evaluation (optional)
|
||||||
# EVAL_LLM_BINDING_HOST=https://api.openai.com/v1
|
# EVAL_LLM_BINDING_HOST=https://api.openai.com/v1
|
||||||
|
|
||||||
### Evaluation concurrency and rate limiting
|
### Embedding Configuration for Evaluation
|
||||||
### Number of concurrent test case evaluations (default: 1 for serial evaluation)
|
# EVAL_EMBEDDING_MODEL=text-embedding-3-large
|
||||||
|
### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY)
|
||||||
|
# EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key
|
||||||
|
### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST)
|
||||||
|
# EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1
|
||||||
|
|
||||||
|
### Performance Tuning
|
||||||
|
### Number of concurrent test case evaluations
|
||||||
### Lower values reduce API rate limit issues but increase evaluation time
|
### Lower values reduce API rate limit issues but increase evaluation time
|
||||||
# EVAL_MAX_CONCURRENT=3
|
# EVAL_MAX_CONCURRENT=2
|
||||||
### TOP_K query parameter of LightRAG (default: 10)
|
### TOP_K query parameter of LightRAG (default: 10)
|
||||||
### Number of entities or relations retrieved from KG
|
### Number of entities or relations retrieved from KG
|
||||||
# EVAL_QUERY_TOP_K=10
|
# EVAL_QUERY_TOP_K=10
|
||||||
### LLM request retry and timeout settings for evaluation
|
### LLM request retry and timeout settings for evaluation
|
||||||
# EVAL_LLM_MAX_RETRIES=5
|
# EVAL_LLM_MAX_RETRIES=5
|
||||||
# EVAL_LLM_TIMEOUT=120
|
# EVAL_LLM_TIMEOUT=180
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,6 @@ import logging.config
|
||||||
import sys
|
import sys
|
||||||
import uvicorn
|
import uvicorn
|
||||||
import pipmaster as pm
|
import pipmaster as pm
|
||||||
import inspect
|
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
from fastapi.responses import RedirectResponse
|
from fastapi.responses import RedirectResponse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -512,7 +511,9 @@ def create_app(args):
|
||||||
|
|
||||||
return optimized_azure_openai_model_complete
|
return optimized_azure_openai_model_complete
|
||||||
|
|
||||||
def create_optimized_gemini_llm_func(config_cache: LLMConfigCache, args):
|
def create_optimized_gemini_llm_func(
|
||||||
|
config_cache: LLMConfigCache, args, llm_timeout: int
|
||||||
|
):
|
||||||
"""Create optimized Gemini LLM function with cached configuration"""
|
"""Create optimized Gemini LLM function with cached configuration"""
|
||||||
|
|
||||||
async def optimized_gemini_model_complete(
|
async def optimized_gemini_model_complete(
|
||||||
|
|
@ -527,6 +528,8 @@ def create_app(args):
|
||||||
if history_messages is None:
|
if history_messages is None:
|
||||||
history_messages = []
|
history_messages = []
|
||||||
|
|
||||||
|
# Use pre-processed configuration to avoid repeated parsing
|
||||||
|
kwargs["timeout"] = llm_timeout
|
||||||
if (
|
if (
|
||||||
config_cache.gemini_llm_options is not None
|
config_cache.gemini_llm_options is not None
|
||||||
and "generation_config" not in kwargs
|
and "generation_config" not in kwargs
|
||||||
|
|
@ -568,7 +571,7 @@ def create_app(args):
|
||||||
config_cache, args, llm_timeout
|
config_cache, args, llm_timeout
|
||||||
)
|
)
|
||||||
elif binding == "gemini":
|
elif binding == "gemini":
|
||||||
return create_optimized_gemini_llm_func(config_cache, args)
|
return create_optimized_gemini_llm_func(config_cache, args, llm_timeout)
|
||||||
else: # openai and compatible
|
else: # openai and compatible
|
||||||
# Use optimized function with pre-processed configuration
|
# Use optimized function with pre-processed configuration
|
||||||
return create_optimized_openai_llm_func(config_cache, args, llm_timeout)
|
return create_optimized_openai_llm_func(config_cache, args, llm_timeout)
|
||||||
|
|
@ -595,7 +598,7 @@ def create_app(args):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def create_optimized_embedding_function(
|
def create_optimized_embedding_function(
|
||||||
config_cache: LLMConfigCache, binding, model, host, api_key, dimensions, args
|
config_cache: LLMConfigCache, binding, model, host, api_key, args
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create optimized embedding function with pre-processed configuration for applicable bindings.
|
Create optimized embedding function with pre-processed configuration for applicable bindings.
|
||||||
|
|
@ -640,9 +643,7 @@ def create_app(args):
|
||||||
elif binding == "jina":
|
elif binding == "jina":
|
||||||
from lightrag.llm.jina import jina_embed
|
from lightrag.llm.jina import jina_embed
|
||||||
|
|
||||||
return await jina_embed(
|
return await jina_embed(texts, base_url=host, api_key=api_key)
|
||||||
texts, dimensions=dimensions, base_url=host, api_key=api_key
|
|
||||||
)
|
|
||||||
else: # openai and compatible
|
else: # openai and compatible
|
||||||
from lightrag.llm.openai import openai_embed
|
from lightrag.llm.openai import openai_embed
|
||||||
|
|
||||||
|
|
@ -687,17 +688,49 @@ def create_app(args):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create embedding function with optimized configuration
|
# Create embedding function with optimized configuration
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
# Create the optimized embedding function
|
||||||
|
optimized_embedding_func = create_optimized_embedding_function(
|
||||||
|
config_cache=config_cache,
|
||||||
|
binding=args.embedding_binding,
|
||||||
|
model=args.embedding_model,
|
||||||
|
host=args.embedding_binding_host,
|
||||||
|
api_key=args.embedding_binding_api_key,
|
||||||
|
args=args, # Pass args object for fallback option generation
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check environment variable for sending dimensions
|
||||||
|
embedding_send_dim = os.getenv("EMBEDDING_SEND_DIM", "false").lower() == "true"
|
||||||
|
|
||||||
|
# Check if the function signature has embedding_dim parameter
|
||||||
|
# Note: Since optimized_embedding_func is an async function, inspect its signature
|
||||||
|
sig = inspect.signature(optimized_embedding_func)
|
||||||
|
has_embedding_dim_param = "embedding_dim" in sig.parameters
|
||||||
|
|
||||||
|
# Determine send_dimensions value based on binding type
|
||||||
|
# Jina REQUIRES dimension parameter (forced to True)
|
||||||
|
# OpenAI and others: controlled by EMBEDDING_SEND_DIM environment variable
|
||||||
|
if args.embedding_binding == "jina":
|
||||||
|
# Jina API requires dimension parameter - always send it
|
||||||
|
send_dimensions = has_embedding_dim_param
|
||||||
|
dimension_control = "forced (Jina API requirement)"
|
||||||
|
else:
|
||||||
|
# For OpenAI and other bindings, respect EMBEDDING_SEND_DIM setting
|
||||||
|
send_dimensions = embedding_send_dim and has_embedding_dim_param
|
||||||
|
dimension_control = f"env_var={embedding_send_dim}"
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Embedding configuration: send_dimensions={send_dimensions} "
|
||||||
|
f"({dimension_control}, has_param={has_embedding_dim_param}, "
|
||||||
|
f"binding={args.embedding_binding})"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create EmbeddingFunc with send_dimensions attribute
|
||||||
embedding_func = EmbeddingFunc(
|
embedding_func = EmbeddingFunc(
|
||||||
embedding_dim=args.embedding_dim,
|
embedding_dim=args.embedding_dim,
|
||||||
func=create_optimized_embedding_function(
|
func=optimized_embedding_func,
|
||||||
config_cache=config_cache,
|
send_dimensions=send_dimensions,
|
||||||
binding=args.embedding_binding,
|
|
||||||
model=args.embedding_model,
|
|
||||||
host=args.embedding_binding_host,
|
|
||||||
api_key=args.embedding_binding_api_key,
|
|
||||||
dimensions=args.embedding_dim,
|
|
||||||
args=args, # Pass args object for fallback option generation
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Configure rerank function based on args.rerank_bindingparameter
|
# Configure rerank function based on args.rerank_bindingparameter
|
||||||
|
|
|
||||||
|
|
@ -353,8 +353,29 @@ class EmbeddingFunc:
|
||||||
embedding_dim: int
|
embedding_dim: int
|
||||||
func: callable
|
func: callable
|
||||||
max_token_size: int | None = None # deprecated keep it for compatible only
|
max_token_size: int | None = None # deprecated keep it for compatible only
|
||||||
|
send_dimensions: bool = (
|
||||||
|
False # Control whether to send embedding_dim to the function
|
||||||
|
)
|
||||||
|
|
||||||
async def __call__(self, *args, **kwargs) -> np.ndarray:
|
async def __call__(self, *args, **kwargs) -> np.ndarray:
|
||||||
|
# Only inject embedding_dim when send_dimensions is True
|
||||||
|
if self.send_dimensions:
|
||||||
|
# Check if user provided embedding_dim parameter
|
||||||
|
if "embedding_dim" in kwargs:
|
||||||
|
user_provided_dim = kwargs["embedding_dim"]
|
||||||
|
# If user's value differs from class attribute, output warning
|
||||||
|
if (
|
||||||
|
user_provided_dim is not None
|
||||||
|
and user_provided_dim != self.embedding_dim
|
||||||
|
):
|
||||||
|
logger.warning(
|
||||||
|
f"Ignoring user-provided embedding_dim={user_provided_dim}, "
|
||||||
|
f"using declared embedding_dim={self.embedding_dim} from decorator"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Inject embedding_dim from decorator
|
||||||
|
kwargs["embedding_dim"] = self.embedding_dim
|
||||||
|
|
||||||
return await self.func(*args, **kwargs)
|
return await self.func(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1795,7 +1816,7 @@ def normalize_extracted_info(name: str, remove_inner_quotes=False) -> str:
|
||||||
- Filter out short numeric-only text (length < 3 and only digits/dots)
|
- Filter out short numeric-only text (length < 3 and only digits/dots)
|
||||||
- remove_inner_quotes = True
|
- remove_inner_quotes = True
|
||||||
remove Chinese quotes
|
remove Chinese quotes
|
||||||
remove English queotes in and around chinese
|
remove English quotes in and around chinese
|
||||||
Convert non-breaking spaces to regular spaces
|
Convert non-breaking spaces to regular spaces
|
||||||
Convert narrow non-breaking spaces after non-digits to regular spaces
|
Convert narrow non-breaking spaces after non-digits to regular spaces
|
||||||
|
|
||||||
|
|
@ -2551,6 +2572,52 @@ def apply_source_ids_limit(
|
||||||
return truncated
|
return truncated
|
||||||
|
|
||||||
|
|
||||||
|
def compute_incremental_chunk_ids(
|
||||||
|
existing_full_chunk_ids: list[str],
|
||||||
|
old_chunk_ids: list[str],
|
||||||
|
new_chunk_ids: list[str],
|
||||||
|
) -> list[str]:
|
||||||
|
"""
|
||||||
|
Compute incrementally updated chunk IDs based on changes.
|
||||||
|
|
||||||
|
This function applies delta changes (additions and removals) to an existing
|
||||||
|
list of chunk IDs while maintaining order and ensuring deduplication.
|
||||||
|
Delta additions from new_chunk_ids are placed at the end.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
existing_full_chunk_ids: Complete list of existing chunk IDs from storage
|
||||||
|
old_chunk_ids: Previous chunk IDs from source_id (chunks being replaced)
|
||||||
|
new_chunk_ids: New chunk IDs from updated source_id (chunks being added)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Updated list of chunk IDs with deduplication
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> existing = ['chunk-1', 'chunk-2', 'chunk-3']
|
||||||
|
>>> old = ['chunk-1', 'chunk-2']
|
||||||
|
>>> new = ['chunk-2', 'chunk-4']
|
||||||
|
>>> compute_incremental_chunk_ids(existing, old, new)
|
||||||
|
['chunk-3', 'chunk-2', 'chunk-4']
|
||||||
|
"""
|
||||||
|
# Calculate changes
|
||||||
|
chunks_to_remove = set(old_chunk_ids) - set(new_chunk_ids)
|
||||||
|
chunks_to_add = set(new_chunk_ids) - set(old_chunk_ids)
|
||||||
|
|
||||||
|
# Apply changes to full chunk_ids
|
||||||
|
# Step 1: Remove chunks that are no longer needed
|
||||||
|
updated_chunk_ids = [
|
||||||
|
cid for cid in existing_full_chunk_ids if cid not in chunks_to_remove
|
||||||
|
]
|
||||||
|
|
||||||
|
# Step 2: Add new chunks (preserving order from new_chunk_ids)
|
||||||
|
# Note: 'cid not in updated_chunk_ids' check ensures deduplication
|
||||||
|
for cid in new_chunk_ids:
|
||||||
|
if cid in chunks_to_add and cid not in updated_chunk_ids:
|
||||||
|
updated_chunk_ids.append(cid)
|
||||||
|
|
||||||
|
return updated_chunk_ids
|
||||||
|
|
||||||
|
|
||||||
def subtract_source_ids(
|
def subtract_source_ids(
|
||||||
source_ids: Iterable[str],
|
source_ids: Iterable[str],
|
||||||
ids_to_remove: Collection[str],
|
ids_to_remove: Collection[str],
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue