diff --git a/lightrag/operate.py b/lightrag/operate.py index f142aedf..9fa35c6c 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -847,7 +847,7 @@ async def _process_extraction_result( chunk_key: str, timestamp: int, file_path: str = "unknown_source", - tuple_delimiter: str = "<|SEP|>", + tuple_delimiter: str = "<|S|>", completion_delimiter: str = "<|COMPLETE|>", ) -> tuple[dict, dict]: """Process a single extraction result (either initial or gleaning) diff --git a/lightrag/prompt.py b/lightrag/prompt.py index 2f20bcd8..69c759bb 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -15,8 +15,8 @@ You are a Knowledge Graph Specialist responsible for extracting entities and rel ---Instructions--- 1. Entity Extraction: Identify clearly defined and meaningful entities in the input text, and extract the following information: - - entity_name: Name of the entity, ensure entity names are consistent throughout the extraction. - - entity_type: Categorize the entity using the following entity types: {entity_types}; if none of the provided types are suitable, classify it as `Other`. + - entity_name: Name of the entity; ensure entity names are consistent throughout the extraction. + - entity_type: Categorize the entity using the following entity types: {entity_types}; if none of the provided entity types are suitable, classify it as `Other`. - entity_description: Provide a concise yet comprehensive description of the entity's attributes and activities based on the information present in the input text. 2. Relationship Extraction: Identify direct, clearly stated and meaningful relationships between extracted entities, and extract the following information: - source_entity: name of the source entity. diff --git a/lightrag/utils.py b/lightrag/utils.py index 0890d755..38c630f2 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -2571,19 +2571,19 @@ def fix_tuple_delimiter_corruption( Args: record: The text record to fix - delimiter_core: The core delimiter (e.g., "SEP" from "<|SEP|>") - tuple_delimiter: The complete tuple delimiter (e.g., "<|SEP|>") + delimiter_core: The core delimiter (e.g., "S" from "<|S|>") + tuple_delimiter: The complete tuple delimiter (e.g., "<|S|>") Returns: The corrected record with proper tuple_delimiter format Examples: - >>> fix_tuple_delimiter_corruption("entityname", "SEP", "<|SEP|>") - "entity<|SEP|>name" - >>> fix_tuple_delimiter_corruption("entityname", "SEP", "<|SEP|>") - "entity<|SEP|>name" - >>> fix_tuple_delimiter_corruption("entity|SEP|>name", "SEP", "<|SEP|>") - "entity<|SEP|>name" + >>> fix_tuple_delimiter_corruption("entityname", "SEP", "<|S|>") + "entity<|S|>name" + >>> fix_tuple_delimiter_corruption("entityname", "SEP", "<|S|>") + "entity<|S|>name" + >>> fix_tuple_delimiter_corruption("entity|S|>name", "SEP", "<|S|>") + "entity<|S|>name" """ if not record or not delimiter_core or not tuple_delimiter: return record @@ -2591,49 +2591,49 @@ def fix_tuple_delimiter_corruption( # Escape the delimiter core for regex use escaped_delimiter_core = re.escape(delimiter_core) - # Fix: <|SEP||SEP|> -> <|SEP|>, <|SEP|||SEP|> -> <|SEP|> + # Fix: <|S||S|> -> <|S|>, <|S|||S|> -> <|S|> record = re.sub( rf"<\|{escaped_delimiter_core}\|+{escaped_delimiter_core}\|>", tuple_delimiter, record, ) - # Fix: -> <|SEP|>, <|SEP|Y> -> <|SEP|>, -> <|SEP|> (one extra characters outside pipes) + # Fix: -> <|S|>, <|S|Y> -> <|S|>, -> <|S|> (one extra characters outside pipes) record = re.sub( rf"<.?\|{escaped_delimiter_core}\|.?>", tuple_delimiter, record, ) - # Fix: , , <|SEP> -> <|SEP|> (missing one or both pipes) + # Fix: , , <|S> -> <|S|> (missing one or both pipes) record = re.sub( rf"<\|?{escaped_delimiter_core}\|?>", tuple_delimiter, record, ) - # Fix: -> <|SEP|>, <|SEPX> -> <|SEP|> (one pipe is replaced by other character) + # Fix: -> <|S|>, <|SX> -> <|S|> (one pipe is replaced by other character) record = re.sub( rf"<[^|]{escaped_delimiter_core}\|>|<\|{escaped_delimiter_core}[^|]>", tuple_delimiter, record, ) - # Fix: <|SEP| -> <|SEP|> (missing closing >) + # Fix: <|S| -> <|S|> (missing closing >) record = re.sub( rf"<\|{escaped_delimiter_core}\|(?!>)", tuple_delimiter, record, ) - # Fix: |SEP|> -> <|SEP|> (missing opening <) + # Fix: |S|> -> <|S|> (missing opening <) record = re.sub( rf"(?", tuple_delimiter, record, ) - # Fix: <|SEP|>| -> <|SEP|> ( this is a fix of: <|SEP|| -> <|SEP|> ) + # Fix: <|S|>| -> <|S|> ( this is a fix for: <|S|| -> <|S|> ) record = re.sub( rf"<\|{escaped_delimiter_core}\|>\|", tuple_delimiter,