fix: Improve deduplication ID validation and logging
- Add comprehensive logging to verify IDs sent to LLM (sent vs received) - Enhance prompt with explicit ID bounds (0 through N-1) - Add validation warnings for missing and extra IDs from LLM responses - Improve error message clarity for invalid dedupe IDs - Log actual IDs sent to LLM to confirm no index leakage This helps diagnose cases where the LLM returns IDs outside the valid range (e.g., ID 19 when only 0-18 were sent). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
a24ada94bb
commit
22bfdc532a
2 changed files with 50 additions and 3 deletions
|
|
@ -166,7 +166,9 @@ def nodes(context: dict[str, Any]) -> list[Message]:
|
|||
- They have similar names or purposes but refer to separate instances or concepts.
|
||||
|
||||
Task:
|
||||
Respond with a JSON object that contains an "entity_resolutions" array with one entry for each entity in ENTITIES, ordered by the entity id.
|
||||
ENTITIES contains {len(context['extracted_nodes'])} entities with IDs 0 through {len(context['extracted_nodes']) - 1}.
|
||||
Respond with a JSON object that contains an "entity_resolutions" array with EXACTLY {len(context['extracted_nodes'])} entries - one for each entity in ENTITIES.
|
||||
Your response MUST use only the IDs 0 through {len(context['extracted_nodes']) - 1}. Do not skip any IDs or use IDs outside this range.
|
||||
|
||||
For every entity, return an object with the following keys:
|
||||
{{
|
||||
|
|
|
|||
|
|
@ -269,6 +269,27 @@ async def _resolve_with_llm(
|
|||
for i, node in enumerate(llm_extracted_nodes)
|
||||
]
|
||||
|
||||
sent_ids = [ctx['id'] for ctx in extracted_nodes_context]
|
||||
logger.debug(
|
||||
'Sending %d entities to LLM for deduplication with IDs 0-%d (actual IDs sent: %s)',
|
||||
len(llm_extracted_nodes),
|
||||
len(llm_extracted_nodes) - 1,
|
||||
sent_ids if len(sent_ids) <= 20 else f'{sent_ids[:10]}...{sent_ids[-10:]}',
|
||||
)
|
||||
if llm_extracted_nodes:
|
||||
sample_size = min(3, len(extracted_nodes_context))
|
||||
logger.debug(
|
||||
'First %d entities: %s',
|
||||
sample_size,
|
||||
[(ctx['id'], ctx['name']) for ctx in extracted_nodes_context[:sample_size]],
|
||||
)
|
||||
if len(extracted_nodes_context) > 3:
|
||||
logger.debug(
|
||||
'Last %d entities: %s',
|
||||
sample_size,
|
||||
[(ctx['id'], ctx['name']) for ctx in extracted_nodes_context[-sample_size:]],
|
||||
)
|
||||
|
||||
existing_nodes_context = [
|
||||
{
|
||||
**{
|
||||
|
|
@ -301,15 +322,39 @@ async def _resolve_with_llm(
|
|||
valid_relative_range = range(len(state.unresolved_indices))
|
||||
processed_relative_ids: set[int] = set()
|
||||
|
||||
received_ids = {r.id for r in node_resolutions}
|
||||
expected_ids = set(valid_relative_range)
|
||||
missing_ids = expected_ids - received_ids
|
||||
extra_ids = received_ids - expected_ids
|
||||
|
||||
logger.debug(
|
||||
'Received %d resolutions for %d entities (expected %d)',
|
||||
len(node_resolutions),
|
||||
len(state.unresolved_indices),
|
||||
len(state.unresolved_indices),
|
||||
)
|
||||
|
||||
if missing_ids:
|
||||
logger.warning('LLM did not return resolutions for IDs: %s', sorted(missing_ids))
|
||||
|
||||
if extra_ids:
|
||||
logger.warning(
|
||||
'LLM returned invalid IDs outside valid range 0-%d: %s (all returned IDs: %s)',
|
||||
len(state.unresolved_indices) - 1,
|
||||
sorted(extra_ids),
|
||||
sorted(received_ids),
|
||||
)
|
||||
|
||||
for resolution in node_resolutions:
|
||||
relative_id: int = resolution.id
|
||||
duplicate_idx: int = resolution.duplicate_idx
|
||||
|
||||
if relative_id not in valid_relative_range:
|
||||
logger.warning(
|
||||
'Skipping invalid LLM dedupe id %s (unresolved indices: %s)',
|
||||
'Skipping invalid LLM dedupe id %d (valid range: 0-%d, received %d resolutions)',
|
||||
relative_id,
|
||||
state.unresolved_indices,
|
||||
len(state.unresolved_indices) - 1,
|
||||
len(node_resolutions),
|
||||
)
|
||||
continue
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue