Optimize text chunk retrieval with batch fetching

- Replace individual chunk fetches with batch get
- Simplify deduplication logic
- Improve error handling for missing data
This commit is contained in:
yangdx 2025-07-19 21:01:03 +08:00
parent 7acca59dfb
commit 7e3914052d

View file

@ -2722,31 +2722,31 @@ async def _find_related_text_unit_from_relationships(
all_text_units_lookup = {} all_text_units_lookup = {}
# Deduplicate and preserve order | {c_id:order} # Deduplicate and preserve order | {c_id:order}
async def build_text_units_unique(text_units): text_units_unique_flat = {}
text_units_flat = {} for index, unit_list in enumerate(text_units):
for index, unit_list in enumerate(text_units): for c_id in unit_list:
for c_id in unit_list: if (
if c_id not in text_units_flat or index < text_units_flat[c_id]: c_id not in text_units_unique_flat
# Keep the smallest order or index < text_units_unique_flat[c_id]
text_units_flat[c_id] = index ):
return text_units_flat # Keep the smallest order
text_units_unique_flat[c_id] = index
text_units_unique_flat = build_text_units_unique(text_units) if not text_units_unique_flat:
logger.warning("No valid text chunks found")
return []
async def fetch_chunk_data(c_id, index): # Batch get all text chunk data
if c_id not in all_text_units_lookup: chunk_ids = list(text_units_unique_flat.keys())
chunk_data = await text_chunks_db.get_by_id(c_id) chunk_data_list = await text_chunks_db.get_by_ids(chunk_ids)
# Only store valid data
if chunk_data is not None and "content" in chunk_data:
all_text_units_lookup[c_id] = {
"data": chunk_data,
"order": index,
}
tasks = [ # Build lookup table, handling possible missing data
fetch_chunk_data(c_id, order) for c_id, order in text_units_unique_flat.items() for chunk_id, chunk_data in zip(chunk_ids, chunk_data_list):
] if chunk_data is not None and "content" in chunk_data:
await asyncio.gather(*tasks) all_text_units_lookup[chunk_id] = {
"data": chunk_data,
"order": text_units_unique_flat[chunk_id],
}
if not all_text_units_lookup: if not all_text_units_lookup:
logger.warning("No valid text chunks found") logger.warning("No valid text chunks found")