From 7e3914052d4117be4a12676115460479c96570d6 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sat, 19 Jul 2025 21:01:03 +0800 Subject: [PATCH] Optimize text chunk retrieval with batch fetching - Replace individual chunk fetches with batch get - Simplify deduplication logic - Improve error handling for missing data --- lightrag/operate.py | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 183798b1..a0418174 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -2722,31 +2722,31 @@ async def _find_related_text_unit_from_relationships( all_text_units_lookup = {} # Deduplicate and preserve order | {c_id:order} - async def build_text_units_unique(text_units): - text_units_flat = {} - for index, unit_list in enumerate(text_units): - for c_id in unit_list: - if c_id not in text_units_flat or index < text_units_flat[c_id]: - # Keep the smallest order - text_units_flat[c_id] = index - return text_units_flat + text_units_unique_flat = {} + for index, unit_list in enumerate(text_units): + for c_id in unit_list: + if ( + c_id not in text_units_unique_flat + or index < text_units_unique_flat[c_id] + ): + # Keep the smallest order + text_units_unique_flat[c_id] = index - text_units_unique_flat = build_text_units_unique(text_units) + if not text_units_unique_flat: + logger.warning("No valid text chunks found") + return [] - async def fetch_chunk_data(c_id, index): - if c_id not in all_text_units_lookup: - chunk_data = await text_chunks_db.get_by_id(c_id) - # Only store valid data - if chunk_data is not None and "content" in chunk_data: - all_text_units_lookup[c_id] = { - "data": chunk_data, - "order": index, - } + # Batch get all text chunk data + chunk_ids = list(text_units_unique_flat.keys()) + chunk_data_list = await text_chunks_db.get_by_ids(chunk_ids) - tasks = [ - fetch_chunk_data(c_id, order) for c_id, order in text_units_unique_flat.items() - ] - await asyncio.gather(*tasks) + # Build lookup table, handling possible missing data + for chunk_id, chunk_data in zip(chunk_ids, chunk_data_list): + if chunk_data is not None and "content" in chunk_data: + all_text_units_lookup[chunk_id] = { + "data": chunk_data, + "order": text_units_unique_flat[chunk_id], + } if not all_text_units_lookup: logger.warning("No valid text chunks found")