Merge branch 'context-builder'

2025-07-23 16:14:44 +08:00 · 2025-07-23 16:14:44 +08:00 · 00d7bc80bf
commit 00d7bc80bf
parent 681d43bb32 75fd6c73ea
1 changed files with 73 additions and 64 deletions
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -1329,6 +1329,16 @@ async def merge_nodes_and_edges(
    for edge_key, edges in all_edges.items():
        tasks.append(asyncio.create_task(_locked_process_edges(edge_key, edges)))
    # Check if there are any tasks to process
    if not tasks:
        log_message = f"No entities or relationships to process for {file_path}"
        logger.info(log_message)
        if pipeline_status_lock is not None:
            async with pipeline_status_lock:
                pipeline_status["latest_message"] = log_message
                pipeline_status["history_messages"].append(log_message)
        return
    # Execute all tasks in parallel with semaphore control and early failure detection
    done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_EXCEPTION)
@ -2020,7 +2030,6 @@ async def _build_query_context(
    # Unified token control system - Apply precise token limits to entities and relations
    tokenizer = text_chunks_db.global_config.get("tokenizer")
    if tokenizer:
    # Get new token limits from query_param (with fallback to global_config)
    max_entity_tokens = getattr(
        query_param,
@ -2145,9 +2154,9 @@ async def _build_query_context(
            if chunks:
                all_chunks.extend(chunks)
-    # Apply token processing to chunks if tokenizer is available
+    # Apply token processing to chunks
    text_units_context = []
-    if tokenizer and all_chunks:
+    if all_chunks:
        # Calculate dynamic token limit for text chunks
        entities_str = json.dumps(entities_context, ensure_ascii=False)
        relations_str = json.dumps(relations_context, ensure_ascii=False)
@ -2600,7 +2609,7 @@ async def _get_edge_data(
            combined = {
                "src_id": k["src_id"],
                "tgt_id": k["tgt_id"],
-                "rank": edge_degrees_dict.get(pair, k.get("rank", 0)),
+                "rank": edge_degrees_dict.get(pair, 0),
                "created_at": k.get("created_at", None),
                **edge_props,
            }