add extract nodes from text prompt (#106)

2024-09-11 12:06:08 -04:00 · 2024-09-11 12:06:08 -04:00 · 4122d350a5
commit 4122d350a5
parent b214baa85f
2 changed files with 69 additions and 2 deletions
--- a/graphiti_core/prompts/extract_nodes.py
+++ b/graphiti_core/prompts/extract_nodes.py
@ -24,12 +24,14 @@ class Prompt(Protocol):
    v1: PromptVersion
    v2: PromptVersion
    extract_json: PromptVersion
    extract_text: PromptVersion
 class Versions(TypedDict):
    v1: PromptFunction
    v2: PromptFunction
    extract_json: PromptFunction
    extract_text: PromptFunction
 def v1(context: dict[str, Any]) -> list[Message]:
@ -144,4 +146,44 @@ Respond with a JSON object in the following format:
    ]
-versions: Versions = {'v1': v1, 'v2': v2, 'extract_json': extract_json}
+def extract_text(context: dict[str, Any]) -> list[Message]:
    sys_prompt = """You are an AI assistant that extracts entity nodes from conversational text. Your primary task is to identify and extract the speaker and other significant entities mentioned in the conversation."""
    user_prompt = f"""
 Given the following conversation, extract entity nodes from the CURRENT MESSAGE that are explicitly or implicitly mentioned:
 Conversation:
 {json.dumps([ep['content'] for ep in context['previous_episodes']], indent=2)}
 <CURRENT MESSAGE>
 {context["episode_content"]}
 Guidelines:
 2. Extract significant entities, concepts, or actors mentioned in the conversation.
 3. Provide concise but informative summaries for each extracted node.
 4. Avoid creating nodes for relationships or actions.
 5. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
 6. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
 Respond with a JSON object in the following format:
 {{
    "extracted_nodes": [
        {{
            "name": "Unique identifier for the node (use the speaker's name for speaker nodes)",
            "labels": ["Entity", "OptionalAdditionalLabel"],
            "summary": "Brief summary of the node's role or significance"
        }}
    ]
 }}
 """
    return [
        Message(role='system', content=sys_prompt),
        Message(role='user', content=user_prompt),
    ]
 versions: Versions = {
    'v1': v1,
    'v2': v2,
    'extract_json': extract_json,
    'extract_text': extract_text,
 }
--- a/graphiti_core/utils/maintenance/node_operations.py
+++ b/graphiti_core/utils/maintenance/node_operations.py
@ -48,6 +48,29 @@ async def extract_message_nodes(
    return extracted_node_data
 async def extract_text_nodes(
    llm_client: LLMClient, episode: EpisodicNode, previous_episodes: list[EpisodicNode]
 ) -> list[dict[str, Any]]:
    # Prepare context for LLM
    context = {
        'episode_content': episode.content,
        'episode_timestamp': episode.valid_at.isoformat(),
        'previous_episodes': [
            {
                'content': ep.content,
                'timestamp': ep.valid_at.isoformat(),
            }
            for ep in previous_episodes
        ],
    }
    llm_response = await llm_client.generate_response(
        prompt_library.extract_nodes.extract_text(context)
    )
    extracted_node_data = llm_response.get('extracted_nodes', [])
    return extracted_node_data
 async def extract_json_nodes(
    llm_client: LLMClient,
    episode: EpisodicNode,
@ -73,8 +96,10 @@ async def extract_nodes(
 ) -> list[EntityNode]:
    start = time()
    extracted_node_data: list[dict[str, Any]] = []
-    if episode.source in [EpisodeType.message, EpisodeType.text]:
+    if episode.source == EpisodeType.message:
        extracted_node_data = await extract_message_nodes(llm_client, episode, previous_episodes)
    elif episode.source == EpisodeType.text:
        extracted_node_data = await extract_text_nodes(llm_client, episode, previous_episodes)
    elif episode.source == EpisodeType.json:
        extracted_node_data = await extract_json_nodes(llm_client, episode)