Merge branch 'optimize-extraction' into return-data-only

This commit is contained in:
yangdx 2025-09-13 13:33:07 +08:00
commit 0496ddcb92
3 changed files with 6 additions and 9 deletions

View file

@ -125,7 +125,7 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
SUMMARY_LANGUAGE=English
### Entity types that the LLM will attempt to recognize
# ENTITY_TYPES='["Organization", "Person", "Location", "Event", "Technology", "Equipment", "Product", "Document", "Category"]'
# ENTITY_TYPES='["Person", "Organization", "Location", "Event", "Artifact", "CreativeWork"]'
### Chunk size for document splitting, 500~1500 is recommended
# CHUNK_SIZE=1200

View file

@ -24,15 +24,12 @@ DEFAULT_SUMMARY_LENGTH_RECOMMENDED = 600
DEFAULT_SUMMARY_CONTEXT_SIZE = 12000
# Default entities to extract if ENTITY_TYPES is not specified in .env
DEFAULT_ENTITY_TYPES = [
"Organization",
"Person",
"Organization",
"Location",
"Event",
"Technology",
"Equipment",
"Product",
"Document",
"Category",
"Artifact",
"CreativeWork",
]
# Separator for graph fields

View file

@ -323,7 +323,7 @@ async def _handle_single_entity_extraction(
if len(record_attributes) != 4 or "entity" not in record_attributes[0]:
if len(record_attributes) > 1 and "entity" in record_attributes[0]:
logger.warning(
f"{chunk_key}: extraction failed! only got {len(record_attributes)} feilds on entity `{record_attributes[1]}`"
f"{chunk_key}: extraction failed! Found {len(record_attributes)}/4 feilds on ENTITY `{record_attributes[1]}`"
)
return None
@ -394,7 +394,7 @@ async def _handle_single_relationship_extraction(
if len(record_attributes) != 5 or "relationship" not in record_attributes[0]:
if len(record_attributes) > 1 and "relationship" in record_attributes[0]:
logger.warning(
f"{chunk_key}: extraction failed! only got {len(record_attributes)} fields on realtion `{record_attributes[1]}`"
f"{chunk_key}: extraction failed! Found {len(record_attributes)}/5 fields on REALTION `{record_attributes[1]}`"
)
return None