Merge branch 'optimize-extraction' into return-data-only

This commit is contained in:
yangdx 2025-09-13 13:33:07 +08:00
commit 0496ddcb92
3 changed files with 6 additions and 9 deletions

View file

@ -125,7 +125,7 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
SUMMARY_LANGUAGE=English SUMMARY_LANGUAGE=English
### Entity types that the LLM will attempt to recognize ### Entity types that the LLM will attempt to recognize
# ENTITY_TYPES='["Organization", "Person", "Location", "Event", "Technology", "Equipment", "Product", "Document", "Category"]' # ENTITY_TYPES='["Person", "Organization", "Location", "Event", "Artifact", "CreativeWork"]'
### Chunk size for document splitting, 500~1500 is recommended ### Chunk size for document splitting, 500~1500 is recommended
# CHUNK_SIZE=1200 # CHUNK_SIZE=1200

View file

@ -24,15 +24,12 @@ DEFAULT_SUMMARY_LENGTH_RECOMMENDED = 600
DEFAULT_SUMMARY_CONTEXT_SIZE = 12000 DEFAULT_SUMMARY_CONTEXT_SIZE = 12000
# Default entities to extract if ENTITY_TYPES is not specified in .env # Default entities to extract if ENTITY_TYPES is not specified in .env
DEFAULT_ENTITY_TYPES = [ DEFAULT_ENTITY_TYPES = [
"Organization",
"Person", "Person",
"Organization",
"Location", "Location",
"Event", "Event",
"Technology", "Artifact",
"Equipment", "CreativeWork",
"Product",
"Document",
"Category",
] ]
# Separator for graph fields # Separator for graph fields

View file

@ -323,7 +323,7 @@ async def _handle_single_entity_extraction(
if len(record_attributes) != 4 or "entity" not in record_attributes[0]: if len(record_attributes) != 4 or "entity" not in record_attributes[0]:
if len(record_attributes) > 1 and "entity" in record_attributes[0]: if len(record_attributes) > 1 and "entity" in record_attributes[0]:
logger.warning( logger.warning(
f"{chunk_key}: extraction failed! only got {len(record_attributes)} feilds on entity `{record_attributes[1]}`" f"{chunk_key}: extraction failed! Found {len(record_attributes)}/4 feilds on ENTITY `{record_attributes[1]}`"
) )
return None return None
@ -394,7 +394,7 @@ async def _handle_single_relationship_extraction(
if len(record_attributes) != 5 or "relationship" not in record_attributes[0]: if len(record_attributes) != 5 or "relationship" not in record_attributes[0]:
if len(record_attributes) > 1 and "relationship" in record_attributes[0]: if len(record_attributes) > 1 and "relationship" in record_attributes[0]:
logger.warning( logger.warning(
f"{chunk_key}: extraction failed! only got {len(record_attributes)} fields on realtion `{record_attributes[1]}`" f"{chunk_key}: extraction failed! Found {len(record_attributes)}/5 fields on REALTION `{record_attributes[1]}`"
) )
return None return None