Refactor extraction delimiters from ## to newlines and change tuple delimiter to <|SEP|>
• Add robust delimiter fixing logic • Update prompts for single-line format
This commit is contained in:
parent
7fe47fac84
commit
7f83a58497
2 changed files with 138 additions and 129 deletions
|
|
@ -317,7 +317,7 @@ async def _handle_single_entity_extraction(
|
|||
if len(record_attributes) < 4 or "entity" not in record_attributes[0]:
|
||||
if len(record_attributes) > 1 and "entity" in record_attributes[0]:
|
||||
logger.warning(
|
||||
f"{chunk_key}: Entity `{record_attributes[1]}` extraction failed -- expecting 4 fields but got {len(record_attributes)}"
|
||||
f"{chunk_key} extraction failed: only got {len(record_attributes)} feilds on entity `{record_attributes[1]}`"
|
||||
)
|
||||
return None
|
||||
|
||||
|
|
@ -386,7 +386,7 @@ async def _handle_single_relationship_extraction(
|
|||
if len(record_attributes) < 5 or "relationship" not in record_attributes[0]:
|
||||
if len(record_attributes) > 1 and "relationship" in record_attributes[0]:
|
||||
logger.warning(
|
||||
f"{chunk_key}: Relation `{record_attributes[1]}` extraction failed -- expecting 5 fields but got {len(record_attributes)}"
|
||||
f"{chunk_key} extraction failed: only got {len(record_attributes)} fields on realtion `{record_attributes[1]}`"
|
||||
)
|
||||
return None
|
||||
|
||||
|
|
@ -843,8 +843,7 @@ async def _process_extraction_result(
|
|||
result: str,
|
||||
chunk_key: str,
|
||||
file_path: str = "unknown_source",
|
||||
tuple_delimiter: str = "<|>",
|
||||
record_delimiter: str = "##",
|
||||
tuple_delimiter: str = "<|SEP|>",
|
||||
completion_delimiter: str = "<|COMPLETE|>",
|
||||
) -> tuple[dict, dict]:
|
||||
"""Process a single extraction result (either initial or gleaning)
|
||||
|
|
@ -861,10 +860,6 @@ async def _process_extraction_result(
|
|||
maybe_nodes = defaultdict(list)
|
||||
maybe_edges = defaultdict(list)
|
||||
|
||||
# Standardize Chinese brackets around record_delimiter to English brackets
|
||||
bracket_pattern = f"[))](\\s*{re.escape(record_delimiter)}\\s*)[((]"
|
||||
result = re.sub(bracket_pattern, ")\\1(", result)
|
||||
|
||||
if completion_delimiter not in result:
|
||||
logger.warning(
|
||||
f"{chunk_key}: Complete delimiter can not be found in extraction result"
|
||||
|
|
@ -872,71 +867,88 @@ async def _process_extraction_result(
|
|||
|
||||
records = split_string_by_multi_markers(
|
||||
result,
|
||||
[record_delimiter, completion_delimiter],
|
||||
["\n", completion_delimiter],
|
||||
)
|
||||
|
||||
for record in records:
|
||||
# Remove outer brackets (support English and Chinese brackets with enhanced tolerance)
|
||||
record = record.strip()
|
||||
|
||||
# Define allowed leading and trailing characters
|
||||
leading_trailing_chars = r'[`<>"\']*'
|
||||
|
||||
# Handle leading characters before left bracket
|
||||
if record.startswith("(") or record.startswith("("):
|
||||
record = record[1:]
|
||||
else:
|
||||
# Check for leading characters + left bracket pattern
|
||||
leading_bracket_pattern = r"^" + leading_trailing_chars + r"([((])"
|
||||
match = re.search(leading_bracket_pattern, record)
|
||||
if match:
|
||||
# Extract content from the left bracket position
|
||||
bracket_pos = match.start(1)
|
||||
record = record[bracket_pos + 1 :]
|
||||
else:
|
||||
logger.warning(
|
||||
f"{chunk_key}: Record starting bracket can not be found in extraction result"
|
||||
)
|
||||
|
||||
# Handle trailing characters after right bracket
|
||||
if record.endswith(")") or record.endswith(")"):
|
||||
record = record[:-1]
|
||||
else:
|
||||
# Check for right bracket + trailing characters pattern
|
||||
trailing_bracket_pattern = r"([))])" + leading_trailing_chars + r"$"
|
||||
match = re.search(trailing_bracket_pattern, record)
|
||||
if match:
|
||||
# Extract content up to the right bracket position
|
||||
bracket_pos = match.start(1)
|
||||
record = record[:bracket_pos]
|
||||
else:
|
||||
logger.warning(
|
||||
f"{chunk_key}: Record ending bracket can not be found in extraction result"
|
||||
)
|
||||
|
||||
record = record.strip()
|
||||
if record is None:
|
||||
continue
|
||||
|
||||
if tuple_delimiter == "<|>":
|
||||
# fix entity<| with entity<|>
|
||||
record = re.sub(r"^entity<\|(?!>)", r"entity<|>", record)
|
||||
# fix relationship<| with relationship<|>
|
||||
record = re.sub(r"^relationship<\|(?!>)", r"relationship<|>", record)
|
||||
# fix <||> with <|>
|
||||
record = record.replace("<||>", "<|>")
|
||||
# fix < | > with <|>
|
||||
record = record.replace("< | >", "<|>")
|
||||
# fix <<|>> with <|>
|
||||
record = record.replace("<<|>>", "<|>")
|
||||
# fix <|>> with <|>
|
||||
record = record.replace("<|>>", "<|>")
|
||||
# fix <<|> with <|>
|
||||
record = record.replace("<<|>", "<|>")
|
||||
# fix <.|> with <|>
|
||||
record = record.replace("<.|>", "<|>")
|
||||
# fix <|.> with <|>
|
||||
record = record.replace("<|.>", "<|>")
|
||||
# Fix various forms of tuple_delimiter corruption from the LLM output.
|
||||
# It handles missing or replaced characters around the core delimiter.
|
||||
# 1. `<` or `>` may be missing.
|
||||
# 2. `|` may be missing or replaced by another character.
|
||||
# 3. There might be extra characters inserted.
|
||||
# 4. Missing opening `<` or closing `>`
|
||||
# Example transformations:
|
||||
# <SEP> -> <|SEP|>
|
||||
# <SEP|> -> <|SEP|> (where left | is missing)
|
||||
# <|SEP> -> <|SEP|> (where right | is missing)
|
||||
# <XSEP|> -> <|SEP|> (where left | is replace by other charater)
|
||||
# <|SEPX> -> <|SEP|> (where right | is replace by other charater)
|
||||
# <|SEP|X> -> <|SEP|> (where X is not '>')
|
||||
# <XX|SEP|YY> -> <|SEP|> (handles extra characters)
|
||||
# |SEP|> -> <|SEP|> (where left | is missing)
|
||||
# <|SEP| -> <|SEP|> (where right | is missing)
|
||||
|
||||
escaped_delimiter_core = re.escape(tuple_delimiter[2:-2]) # Extract "SEP" from "<|SEP|>"
|
||||
|
||||
# Fix: <SEP> -> <|SEP|> (missing pipes)
|
||||
record = re.sub(
|
||||
rf"<{escaped_delimiter_core}>",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
||||
# Fix: <SEP|> -> <|SEP|> (missing left pipe only)
|
||||
record = re.sub(
|
||||
rf"<{escaped_delimiter_core}\|>",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
||||
# Fix: <|SEP> -> <|SEP|> (missing right pipe only)
|
||||
record = re.sub(
|
||||
rf"<\|{escaped_delimiter_core}>",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
||||
# Fix: <XSEP|> -> <|SEP|> (character X replacing first pipe)
|
||||
record = re.sub(
|
||||
rf"<[^|]+{escaped_delimiter_core}\|>",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
||||
# Fix: <|SEPX> -> <|SEP|> (character X replacing second pipe)
|
||||
record = re.sub(
|
||||
rf"<\|{escaped_delimiter_core}[^|]+>",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
||||
# Fix: <XX|SEP|YY> -> <|SEP|> (extra characters around, but preserve correct delimiters)
|
||||
record = re.sub(
|
||||
rf"<[^<>]+\|{escaped_delimiter_core}\|[^<>]+>",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
||||
# Fix: |SEP|> -> <|SEP|> (missing opening <)
|
||||
record = re.sub(
|
||||
rf"(?<!<)\|{escaped_delimiter_core}\|>",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
||||
# Fix: <|SEP| -> <|SEP|> (missing closing >)
|
||||
record = re.sub(
|
||||
rf"<\|{escaped_delimiter_core}\|(?!>)",
|
||||
tuple_delimiter,
|
||||
record,
|
||||
)
|
||||
|
||||
record_attributes = split_string_by_multi_markers(record, [tuple_delimiter])
|
||||
|
||||
|
|
@ -988,7 +1000,6 @@ async def _parse_extraction_result(
|
|||
chunk_id,
|
||||
file_path,
|
||||
tuple_delimiter=PROMPTS["DEFAULT_TUPLE_DELIMITER"],
|
||||
record_delimiter=PROMPTS["DEFAULT_RECORD_DELIMITER"],
|
||||
completion_delimiter=PROMPTS["DEFAULT_COMPLETION_DELIMITER"],
|
||||
)
|
||||
|
||||
|
|
@ -1976,7 +1987,6 @@ async def extract_entities(
|
|||
|
||||
example_context_base = dict(
|
||||
tuple_delimiter=PROMPTS["DEFAULT_TUPLE_DELIMITER"],
|
||||
record_delimiter=PROMPTS["DEFAULT_RECORD_DELIMITER"],
|
||||
completion_delimiter=PROMPTS["DEFAULT_COMPLETION_DELIMITER"],
|
||||
entity_types=", ".join(entity_types),
|
||||
language=language,
|
||||
|
|
@ -1986,7 +1996,6 @@ async def extract_entities(
|
|||
|
||||
context_base = dict(
|
||||
tuple_delimiter=PROMPTS["DEFAULT_TUPLE_DELIMITER"],
|
||||
record_delimiter=PROMPTS["DEFAULT_RECORD_DELIMITER"],
|
||||
completion_delimiter=PROMPTS["DEFAULT_COMPLETION_DELIMITER"],
|
||||
entity_types=",".join(entity_types),
|
||||
examples=examples,
|
||||
|
|
@ -2046,7 +2055,6 @@ async def extract_entities(
|
|||
chunk_key,
|
||||
file_path,
|
||||
tuple_delimiter=context_base["tuple_delimiter"],
|
||||
record_delimiter=context_base["record_delimiter"],
|
||||
completion_delimiter=context_base["completion_delimiter"],
|
||||
)
|
||||
|
||||
|
|
@ -2069,7 +2077,6 @@ async def extract_entities(
|
|||
chunk_key,
|
||||
file_path,
|
||||
tuple_delimiter=context_base["tuple_delimiter"],
|
||||
record_delimiter=context_base["record_delimiter"],
|
||||
completion_delimiter=context_base["completion_delimiter"],
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ from typing import Any
|
|||
|
||||
PROMPTS: dict[str, Any] = {}
|
||||
|
||||
PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
|
||||
PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
|
||||
# Delimiter must be bracketed in "<|...|>"
|
||||
PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|SEP|>"
|
||||
PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
|
||||
|
||||
PROMPTS["DEFAULT_USER_PROMPT"] = "n/a"
|
||||
|
|
@ -14,22 +14,22 @@ PROMPTS["entity_extraction_system_prompt"] = """---Role---
|
|||
You are a Knowledge Graph Specialist responsible for extracting entities and relationships from the input text.
|
||||
|
||||
---Instructions---
|
||||
1. **Entity Extraction:** Identify clearly defined and meaningful entities in the input text, and extract the following information:
|
||||
1. Entity Extraction: Identify clearly defined and meaningful entities in the input text, and extract the following information:
|
||||
- entity_name: Name of the entity, ensure entity names are consistent throughout the extraction.
|
||||
- entity_type: Categorize the entity using the following entity types: {entity_types}; if none of the provided types are suitable, classify it as `Other`.
|
||||
- entity_description: Provide a comprehensive description of the entity's attributes and activities based on the information present in the input text.
|
||||
2. **Entity Output Format:** (entity{tuple_delimiter}entity_name{tuple_delimiter}entity_type{tuple_delimiter}entity_description)
|
||||
3. **Relationship Extraction:** Identify direct, clearly-stated and meaningful relationships between extracted entities within the input text, and extract the following information:
|
||||
- entity_description: Provide a concise yet comprehensive description of the entity's attributes and activities based on the information present in the input text.
|
||||
2. Relationship Extraction: Identify direct, clearly stated and meaningful relationships between extracted entities within the input text, and extract the following information:
|
||||
- source_entity: name of the source entity.
|
||||
- target_entity: name of the target entity.
|
||||
- relationship_keywords: one or more high-level key words that summarize the overarching nature of the relationship, focusing on concepts or themes rather than specific details.
|
||||
- relationship_keywords: one or more high-level keywords that summarize the overarching nature of the relationship, focusing on concepts or themes rather than specific details.
|
||||
- relationship_description: Explain the nature of the relationship between the source and target entities, providing a clear rationale for their connection.
|
||||
4. **Relationship Output Format:** (relationship{tuple_delimiter}source_entity{tuple_delimiter}target_entity{tuple_delimiter}relationship_keywords{tuple_delimiter}relationship_description)
|
||||
5. **Relationship Order:** Prioritize relationships based on their significance to the intended meaning of input text, and output more crucial relationships first.
|
||||
6. **Avoid Pronouns:** For entity names and all descriptions, explicitly name the subject or object instead of using pronouns; avoid pronouns such as `this document`, `our company`, `I`, `you`, and `he/she`.
|
||||
7. **Undirectional Relationship:** Treat relationships as undirected; swapping the source and target entities does not constitute a new relationship. Avoid outputting duplicate relationships.
|
||||
8. **Language:** Output entity names, keywords and descriptions in {language}. Proper nouns, such as personal names, should not be translated. Please keep them in their original language.
|
||||
9. **Delimiter:** Use {record_delimiter} as the entity or relationship list delimiter; output {completion_delimiter} when all the entities and relationships are extracted.
|
||||
3. Keep Full Context: Ensure the entity name and description are writtenin third person, explicitly name the subject or object instead of using pronouns; avoid pronouns such as `this article`, `this paper`, `our company`, `I`, `you`, and `he/she`.
|
||||
4. Output Each Entity On A Single Line; use `{tuple_delimiter}` as field delimiter, adhering to the following format: entity{tuple_delimiter}entity_name{tuple_delimiter}entity_type{tuple_delimiter}entity_description
|
||||
5. Output Each Relationship On A Single Line; use `{tuple_delimiter}` as field delimiter, aAdhering to the following format: relationship{tuple_delimiter}source_entity{tuple_delimiter}target_entity{tuple_delimiter}relationship_keywords{tuple_delimiter}relationship_description
|
||||
6. Output Order: Output the entity list first, followed by the relationship list. Within the relationship list, prioritize relationships based on their significance to the intended meaning of the input text, outputting more crucial relationships first.
|
||||
7. Undirected Relationship: Treat relationships as undirected; swapping the source and target entities does not constitute a new relationship. Avoid outputting duplicate relationships.
|
||||
8. Language: Output entity names, keywords, and descriptions in {language}. Proper nouns (e.g., personal names, place names, organization names) should not be translated. Please keep them in their original language.
|
||||
9. Output `{completion_delimiter}` when all the entities and relationships have been extracted.
|
||||
|
||||
---Examples---
|
||||
{examples}
|
||||
|
|
@ -44,25 +44,27 @@ Text:
|
|||
"""
|
||||
|
||||
PROMPTS["entity_extraction_user_prompt"] = """---Task---
|
||||
Extract entities and relationships from the input text to be Processed.
|
||||
Extract entities and relationships from the input text to be processed.
|
||||
|
||||
---Instructions---
|
||||
1. Output entities and relationships, prioritized by their relevance to the input text's core meaning.
|
||||
2. Output `{completion_delimiter}` when all the entities and relationships are extracted.
|
||||
3. Ensure the output language is {language}. Proper nouns, such as personal names, should not be translated. Please keep them in their original language.
|
||||
1. Output each entity and relationship on a single line.
|
||||
2. Prioritize entities and relationships most relevant to the input text's core meaning.
|
||||
3. Output `{completion_delimiter}` only after all relevant entities and relationships have been extracted.
|
||||
4. Ensure the output language is {language}. Proper nouns (e.g., personal names, place names, organization names) must be kept in their original language and not translated.
|
||||
|
||||
<Output>
|
||||
"""
|
||||
|
||||
PROMPTS["entity_continue_extraction_user_prompt"] = """---Task---
|
||||
Identify any missed entities or relationships from the input text to be Processed of last extraction task.
|
||||
Identify any missed entities or relationships from the input text to be processed based on the last extraction task.
|
||||
|
||||
---Instructions---
|
||||
1. Output the entities and realtionships in the same format as previous extraction task.
|
||||
2. Do not include entities and relations that have been correctly extracted in last extraction task.
|
||||
3. If the entity or relation output is truncated or has missing fields in last extraction task, please re-output it in the correct format.
|
||||
4. Output `{completion_delimiter}` when all the entities and relationships are extracted.
|
||||
5. Ensure the output language is {language}.
|
||||
1. Output entities and relationships in the same format as the previous extraction task.
|
||||
2. Do not include entities and relationships that were correctly extracted in the last extraction task.
|
||||
3. If an entity or relationship output was truncated or had missing fields in the last extraction task, please re-output it in the correct format.
|
||||
4. Output each entity and relationship on a single line, prioritizing entities and relationships most relevant to the input text's core meaning.
|
||||
5. Output `{completion_delimiter}` only after all relevant entities and relationships have been extracted.
|
||||
6. Ensure the output language is {language}. Proper nouns (e.g., personal names, place names, organization names) must be kept in their original language and not translated.
|
||||
|
||||
<Output>
|
||||
"""
|
||||
|
|
@ -80,16 +82,16 @@ It was a small transformation, barely perceptible, but one that Alex noted with
|
|||
```
|
||||
|
||||
<Output>
|
||||
(entity{tuple_delimiter}Alex{tuple_delimiter}person{tuple_delimiter}Alex is a character who experiences frustration and is observant of the dynamics among other characters.){record_delimiter}
|
||||
(entity{tuple_delimiter}Taylor{tuple_delimiter}person{tuple_delimiter}Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective.){record_delimiter}
|
||||
(entity{tuple_delimiter}Jordan{tuple_delimiter}person{tuple_delimiter}Jordan shares a commitment to discovery and has a significant interaction with Taylor regarding a device.){record_delimiter}
|
||||
(entity{tuple_delimiter}Cruz{tuple_delimiter}person{tuple_delimiter}Cruz is associated with a vision of control and order, influencing the dynamics among other characters.){record_delimiter}
|
||||
(entity{tuple_delimiter}The Device{tuple_delimiter}equiment{tuple_delimiter}The Device is central to the story, with potential game-changing implications, and is revered by Taylor.){record_delimiter}
|
||||
(relationship{tuple_delimiter}Alex{tuple_delimiter}Taylor{tuple_delimiter}power dynamics, observation{tuple_delimiter}Alex observes Taylor's authoritarian behavior and notes changes in Taylor's attitude toward the device.){record_delimiter}
|
||||
(relationship{tuple_delimiter}Alex{tuple_delimiter}Jordan{tuple_delimiter}shared goals, rebellion{tuple_delimiter}Alex and Jordan share a commitment to discovery, which contrasts with Cruz's vision.){record_delimiter}
|
||||
(relationship{tuple_delimiter}Taylor{tuple_delimiter}Jordan{tuple_delimiter}conflict resolution, mutual respect{tuple_delimiter}Taylor and Jordan interact directly regarding the device, leading to a moment of mutual respect and an uneasy truce.){record_delimiter}
|
||||
(relationship{tuple_delimiter}Jordan{tuple_delimiter}Cruz{tuple_delimiter}ideological conflict, rebellion{tuple_delimiter}Jordan's commitment to discovery is in rebellion against Cruz's vision of control and order.){record_delimiter}
|
||||
(relationship{tuple_delimiter}Taylor{tuple_delimiter}The Device{tuple_delimiter}reverence, technological significance{tuple_delimiter}Taylor shows reverence towards the device, indicating its importance and potential impact.){record_delimiter}
|
||||
entity{tuple_delimiter}Alex{tuple_delimiter}person{tuple_delimiter}Alex is a character who experiences frustration and is observant of the dynamics among other characters.
|
||||
entity{tuple_delimiter}Taylor{tuple_delimiter}person{tuple_delimiter}Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective.
|
||||
entity{tuple_delimiter}Jordan{tuple_delimiter}person{tuple_delimiter}Jordan shares a commitment to discovery and has a significant interaction with Taylor regarding a device.
|
||||
entity{tuple_delimiter}Cruz{tuple_delimiter}person{tuple_delimiter}Cruz is associated with a vision of control and order, influencing the dynamics among other characters.
|
||||
entity{tuple_delimiter}The Device{tuple_delimiter}equiment{tuple_delimiter}The Device is central to the story, with potential game-changing implications, and is revered by Taylor.
|
||||
relationship{tuple_delimiter}Alex{tuple_delimiter}Taylor{tuple_delimiter}power dynamics, observation{tuple_delimiter}Alex observes Taylor's authoritarian behavior and notes changes in Taylor's attitude toward the device.
|
||||
relationship{tuple_delimiter}Alex{tuple_delimiter}Jordan{tuple_delimiter}shared goals, rebellion{tuple_delimiter}Alex and Jordan share a commitment to discovery, which contrasts with Cruz's vision.)
|
||||
relationship{tuple_delimiter}Taylor{tuple_delimiter}Jordan{tuple_delimiter}conflict resolution, mutual respect{tuple_delimiter}Taylor and Jordan interact directly regarding the device, leading to a moment of mutual respect and an uneasy truce.
|
||||
relationship{tuple_delimiter}Jordan{tuple_delimiter}Cruz{tuple_delimiter}ideological conflict, rebellion{tuple_delimiter}Jordan's commitment to discovery is in rebellion against Cruz's vision of control and order.
|
||||
relationship{tuple_delimiter}Taylor{tuple_delimiter}The Device{tuple_delimiter}reverence, technological significance{tuple_delimiter}Taylor shows reverence towards the device, indicating its importance and potential impact.
|
||||
{completion_delimiter}
|
||||
|
||||
""",
|
||||
|
|
@ -105,18 +107,18 @@ Financial experts are closely watching the Federal Reserve's next move, as specu
|
|||
```
|
||||
|
||||
<Output>
|
||||
(entity{tuple_delimiter}Global Tech Index{tuple_delimiter}category{tuple_delimiter}The Global Tech Index tracks the performance of major technology stocks and experienced a 3.4% decline today.){record_delimiter}
|
||||
(entity{tuple_delimiter}Nexon Technologies{tuple_delimiter}organization{tuple_delimiter}Nexon Technologies is a tech company that saw its stock decline by 7.8% after disappointing earnings.){record_delimiter}
|
||||
(entity{tuple_delimiter}Omega Energy{tuple_delimiter}organization{tuple_delimiter}Omega Energy is an energy company that gained 2.1% in stock value due to rising oil prices.){record_delimiter}
|
||||
(entity{tuple_delimiter}Gold Futures{tuple_delimiter}product{tuple_delimiter}Gold futures rose by 1.5%, indicating increased investor interest in safe-haven assets.){record_delimiter}
|
||||
(entity{tuple_delimiter}Crude Oil{tuple_delimiter}product{tuple_delimiter}Crude oil prices rose to $87.60 per barrel due to supply constraints and strong demand.){record_delimiter}
|
||||
(entity{tuple_delimiter}Market Selloff{tuple_delimiter}category{tuple_delimiter}Market selloff refers to the significant decline in stock values due to investor concerns over interest rates and regulations.){record_delimiter}
|
||||
(entity{tuple_delimiter}Federal Reserve Policy Announcement{tuple_delimiter}category{tuple_delimiter}The Federal Reserve's upcoming policy announcement is expected to impact investor confidence and market stability.){record_delimiter}
|
||||
(entity{tuple_delimiter}3.4% Decline{tuple_delimiter}category{tuple_delimiter}The Global Tech Index experienced a 3.4% decline in midday trading.){record_delimiter}
|
||||
(relationship{tuple_delimiter}Global Tech Index{tuple_delimiter}Market Selloff{tuple_delimiter}market performance, investor sentiment{tuple_delimiter}The decline in the Global Tech Index is part of the broader market selloff driven by investor concerns.){record_delimiter}
|
||||
(relationship{tuple_delimiter}Nexon Technologies{tuple_delimiter}Global Tech Index{tuple_delimiter}company impact, index movement{tuple_delimiter}Nexon Technologies' stock decline contributed to the overall drop in the Global Tech Index.){record_delimiter}
|
||||
(relationship{tuple_delimiter}Gold Futures{tuple_delimiter}Market Selloff{tuple_delimiter}market reaction, safe-haven investment{tuple_delimiter}Gold prices rose as investors sought safe-haven assets during the market selloff.){record_delimiter}
|
||||
(relationship{tuple_delimiter}Federal Reserve Policy Announcement{tuple_delimiter}Market Selloff{tuple_delimiter}interest rate impact, financial regulation{tuple_delimiter}Speculation over Federal Reserve policy changes contributed to market volatility and investor selloff.){record_delimiter}
|
||||
entity{tuple_delimiter}Global Tech Index{tuple_delimiter}category{tuple_delimiter}The Global Tech Index tracks the performance of major technology stocks and experienced a 3.4% decline today.
|
||||
entity{tuple_delimiter}Nexon Technologies{tuple_delimiter}organization{tuple_delimiter}Nexon Technologies is a tech company that saw its stock decline by 7.8% after disappointing earnings.
|
||||
entity{tuple_delimiter}Omega Energy{tuple_delimiter}organization{tuple_delimiter}Omega Energy is an energy company that gained 2.1% in stock value due to rising oil prices.
|
||||
entity{tuple_delimiter}Gold Futures{tuple_delimiter}product{tuple_delimiter}Gold futures rose by 1.5%, indicating increased investor interest in safe-haven assets.
|
||||
entity{tuple_delimiter}Crude Oil{tuple_delimiter}product{tuple_delimiter}Crude oil prices rose to $87.60 per barrel due to supply constraints and strong demand.
|
||||
entity{tuple_delimiter}Market Selloff{tuple_delimiter}category{tuple_delimiter}Market selloff refers to the significant decline in stock values due to investor concerns over interest rates and regulations.
|
||||
entity{tuple_delimiter}Federal Reserve Policy Announcement{tuple_delimiter}category{tuple_delimiter}The Federal Reserve's upcoming policy announcement is expected to impact investor confidence and market stability.
|
||||
entity{tuple_delimiter}3.4% Decline{tuple_delimiter}category{tuple_delimiter}The Global Tech Index experienced a 3.4% decline in midday trading.
|
||||
relationship{tuple_delimiter}Global Tech Index{tuple_delimiter}Market Selloff{tuple_delimiter}market performance, investor sentiment{tuple_delimiter}The decline in the Global Tech Index is part of the broader market selloff driven by investor concerns.
|
||||
relationship{tuple_delimiter}Nexon Technologies{tuple_delimiter}Global Tech Index{tuple_delimiter}company impact, index movement{tuple_delimiter}Nexon Technologies' stock decline contributed to the overall drop in the Global Tech Index.
|
||||
relationship{tuple_delimiter}Gold Futures{tuple_delimiter}Market Selloff{tuple_delimiter}market reaction, safe-haven investment{tuple_delimiter}Gold prices rose as investors sought safe-haven assets during the market selloff.
|
||||
relationship{tuple_delimiter}Federal Reserve Policy Announcement{tuple_delimiter}Market Selloff{tuple_delimiter}interest rate impact, financial regulation{tuple_delimiter}Speculation over Federal Reserve policy changes contributed to market volatility and investor selloff.
|
||||
{completion_delimiter}
|
||||
|
||||
""",
|
||||
|
|
@ -126,16 +128,16 @@ At the World Athletics Championship in Tokyo, Noah Carter broke the 100m sprint
|
|||
```
|
||||
|
||||
<Output>
|
||||
(entity{tuple_delimiter}World Athletics Championship{tuple_delimiter}event{tuple_delimiter}The World Athletics Championship is a global sports competition featuring top athletes in track and field.){record_delimiter}
|
||||
(entity{tuple_delimiter}Tokyo{tuple_delimiter}location{tuple_delimiter}Tokyo is the host city of the World Athletics Championship.){record_delimiter}
|
||||
(entity{tuple_delimiter}Noah Carter{tuple_delimiter}person{tuple_delimiter}Noah Carter is a sprinter who set a new record in the 100m sprint at the World Athletics Championship.){record_delimiter}
|
||||
(entity{tuple_delimiter}100m Sprint Record{tuple_delimiter}category{tuple_delimiter}The 100m sprint record is a benchmark in athletics, recently broken by Noah Carter.){record_delimiter}
|
||||
(entity{tuple_delimiter}Carbon-Fiber Spikes{tuple_delimiter}equipment{tuple_delimiter}Carbon-fiber spikes are advanced sprinting shoes that provide enhanced speed and traction.){record_delimiter}
|
||||
(entity{tuple_delimiter}World Athletics Federation{tuple_delimiter}organization{tuple_delimiter}The World Athletics Federation is the governing body overseeing the World Athletics Championship and record validations.){record_delimiter}
|
||||
(relationship{tuple_delimiter}World Athletics Championship{tuple_delimiter}Tokyo{tuple_delimiter}event location, international competition{tuple_delimiter}The World Athletics Championship is being hosted in Tokyo.){record_delimiter}
|
||||
(relationship{tuple_delimiter}Noah Carter{tuple_delimiter}100m Sprint Record{tuple_delimiter}athlete achievement, record-breaking{tuple_delimiter}Noah Carter set a new 100m sprint record at the championship.){record_delimiter}
|
||||
(relationship{tuple_delimiter}Noah Carter{tuple_delimiter}Carbon-Fiber Spikes{tuple_delimiter}athletic equipment, performance boost{tuple_delimiter}Noah Carter used carbon-fiber spikes to enhance performance during the race.){record_delimiter}
|
||||
(relationship{tuple_delimiter}Noah Carter{tuple_delimiter}World Athletics Championship{tuple_delimiter}athlete participation, competition{tuple_delimiter}Noah Carter is competing at the World Athletics Championship.){record_delimiter}
|
||||
entity{tuple_delimiter}World Athletics Championship{tuple_delimiter}event{tuple_delimiter}The World Athletics Championship is a global sports competition featuring top athletes in track and field.
|
||||
entity{tuple_delimiter}Tokyo{tuple_delimiter}location{tuple_delimiter}Tokyo is the host city of the World Athletics Championship.
|
||||
entity{tuple_delimiter}Noah Carter{tuple_delimiter}person{tuple_delimiter}Noah Carter is a sprinter who set a new record in the 100m sprint at the World Athletics Championship.
|
||||
entity{tuple_delimiter}100m Sprint Record{tuple_delimiter}category{tuple_delimiter}The 100m sprint record is a benchmark in athletics, recently broken by Noah Carter.
|
||||
entity{tuple_delimiter}Carbon-Fiber Spikes{tuple_delimiter}equipment{tuple_delimiter}Carbon-fiber spikes are advanced sprinting shoes that provide enhanced speed and traction.
|
||||
entity{tuple_delimiter}World Athletics Federation{tuple_delimiter}organization{tuple_delimiter}The World Athletics Federation is the governing body overseeing the World Athletics Championship and record validations.
|
||||
relationship{tuple_delimiter}World Athletics Championship{tuple_delimiter}Tokyo{tuple_delimiter}event location, international competition{tuple_delimiter}The World Athletics Championship is being hosted in Tokyo.
|
||||
relationship{tuple_delimiter}Noah Carter{tuple_delimiter}100m Sprint Record{tuple_delimiter}athlete achievement, record-breaking{tuple_delimiter}Noah Carter set a new 100m sprint record at the championship.
|
||||
relationship{tuple_delimiter}Noah Carter{tuple_delimiter}Carbon-Fiber Spikes{tuple_delimiter}athletic equipment, performance boost{tuple_delimiter}Noah Carter used carbon-fiber spikes to enhance performance during the race.
|
||||
relationship{tuple_delimiter}Noah Carter{tuple_delimiter}World Athletics Championship{tuple_delimiter}athlete participation, competition{tuple_delimiter}Noah Carter is competing at the World Athletics Championship.
|
||||
{completion_delimiter}
|
||||
|
||||
""",
|
||||
|
|
@ -149,10 +151,10 @@ Your task is to synthesize a list of descriptions of a given entity or relation
|
|||
|
||||
---Instructions---
|
||||
1. **Comprehensiveness:** The summary must integrate key information from all provided descriptions. Do not omit important facts.
|
||||
2. **Context:** The summary must explicitly mention the name of the entity or relation for full context.
|
||||
2. **Context:** Ensure the summary is written in the third person, and explicitly mention the name of the entity or relation for full clarity and context.
|
||||
3. **Conflict:** In case of conflicting or inconsistent descriptions, determine if they originate from multiple, distinct entities or relationships that share the same name. If so, summarize each entity or relationship separately and then consolidate all summaries.
|
||||
4. **Style:** The output must be written from an objective, third-person perspective.
|
||||
5. **Length:** Maintain depth and completeness while ensuring the summary's length not exceed {summary_length} tokens.
|
||||
5. **Length:** Maintain depth and completeness while ensuring the summary's length does not exceed {summary_length} tokens.
|
||||
6. **Language:** The entire output must be written in {language}.
|
||||
|
||||
---Data---
|
||||
|
|
@ -215,7 +217,7 @@ Given a user query, your task is to extract two distinct types of keywords:
|
|||
|
||||
---Instructions & Constraints---
|
||||
1. **Output Format**: Your output MUST be a valid JSON object and nothing else. Do not include any explanatory text, markdown code fences (like ```json), or any other text before or after the JSON. It will be parsed directly by a JSON parser.
|
||||
2. **Source of Truth**: All keywords must be explicitly derived from the user query, with both high-level and low-level keyword categories required to contain content.
|
||||
2. **Source of Truth**: All keywords must be explicitly derived from the user query, with both high-level and low-level keyword categories are required to contain content.
|
||||
3. **Concise & Meaningful**: Keywords should be concise words or meaningful phrases. Prioritize multi-word phrases when they represent a single concept. For example, from "latest financial report of Apple Inc.", you should extract "latest financial report" and "Apple Inc." rather than "latest", "financial", "report", and "Apple".
|
||||
4. **Handle Edge Cases**: For queries that are too simple, vague, or nonsensical (e.g., "hello", "ok", "asdfghjkl"), you must return a JSON object with empty lists for both keyword types.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue