Optimize text chunk retrieval with batch fetching
- Replace individual chunk fetches with batch get - Simplify deduplication logic - Improve error handling for missing data
This commit is contained in:
parent
7acca59dfb
commit
7e3914052d
1 changed files with 22 additions and 22 deletions
|
|
@ -2722,31 +2722,31 @@ async def _find_related_text_unit_from_relationships(
|
||||||
all_text_units_lookup = {}
|
all_text_units_lookup = {}
|
||||||
|
|
||||||
# Deduplicate and preserve order | {c_id:order}
|
# Deduplicate and preserve order | {c_id:order}
|
||||||
async def build_text_units_unique(text_units):
|
text_units_unique_flat = {}
|
||||||
text_units_flat = {}
|
for index, unit_list in enumerate(text_units):
|
||||||
for index, unit_list in enumerate(text_units):
|
for c_id in unit_list:
|
||||||
for c_id in unit_list:
|
if (
|
||||||
if c_id not in text_units_flat or index < text_units_flat[c_id]:
|
c_id not in text_units_unique_flat
|
||||||
# Keep the smallest order
|
or index < text_units_unique_flat[c_id]
|
||||||
text_units_flat[c_id] = index
|
):
|
||||||
return text_units_flat
|
# Keep the smallest order
|
||||||
|
text_units_unique_flat[c_id] = index
|
||||||
|
|
||||||
text_units_unique_flat = build_text_units_unique(text_units)
|
if not text_units_unique_flat:
|
||||||
|
logger.warning("No valid text chunks found")
|
||||||
|
return []
|
||||||
|
|
||||||
async def fetch_chunk_data(c_id, index):
|
# Batch get all text chunk data
|
||||||
if c_id not in all_text_units_lookup:
|
chunk_ids = list(text_units_unique_flat.keys())
|
||||||
chunk_data = await text_chunks_db.get_by_id(c_id)
|
chunk_data_list = await text_chunks_db.get_by_ids(chunk_ids)
|
||||||
# Only store valid data
|
|
||||||
if chunk_data is not None and "content" in chunk_data:
|
|
||||||
all_text_units_lookup[c_id] = {
|
|
||||||
"data": chunk_data,
|
|
||||||
"order": index,
|
|
||||||
}
|
|
||||||
|
|
||||||
tasks = [
|
# Build lookup table, handling possible missing data
|
||||||
fetch_chunk_data(c_id, order) for c_id, order in text_units_unique_flat.items()
|
for chunk_id, chunk_data in zip(chunk_ids, chunk_data_list):
|
||||||
]
|
if chunk_data is not None and "content" in chunk_data:
|
||||||
await asyncio.gather(*tasks)
|
all_text_units_lookup[chunk_id] = {
|
||||||
|
"data": chunk_data,
|
||||||
|
"order": text_units_unique_flat[chunk_id],
|
||||||
|
}
|
||||||
|
|
||||||
if not all_text_units_lookup:
|
if not all_text_units_lookup:
|
||||||
logger.warning("No valid text chunks found")
|
logger.warning("No valid text chunks found")
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue