import logging from datetime import datetime from time import time from core.llm_client import LLMClient from core.nodes import EntityNode, EpisodicNode from core.prompts import prompt_library logger = logging.getLogger(__name__) async def extract_new_nodes( llm_client: LLMClient, episode: EpisodicNode, relevant_schema: dict[str, any], previous_episodes: list[EpisodicNode], ) -> list[EntityNode]: # Prepare context for LLM existing_nodes = [ {'name': node_name, 'label': node_info['label'], 'uuid': node_info['uuid']} for node_name, node_info in relevant_schema['nodes'].items() ] context = { 'episode_content': episode.content, 'episode_timestamp': (episode.valid_at.isoformat() if episode.valid_at else None), 'existing_nodes': existing_nodes, 'previous_episodes': [ { 'content': ep.content, 'timestamp': ep.valid_at.isoformat() if ep.valid_at else None, } for ep in previous_episodes ], } llm_response = await llm_client.generate_response(prompt_library.extract_nodes.v1(context)) new_nodes_data = llm_response.get('new_nodes', []) logger.info(f'Extracted new nodes: {new_nodes_data}') # Convert the extracted data into EntityNode objects new_nodes = [] for node_data in new_nodes_data: # Check if the node already exists if not any(existing_node['name'] == node_data['name'] for existing_node in existing_nodes): new_node = EntityNode( name=node_data['name'], labels=node_data['labels'], summary=node_data['summary'], created_at=datetime.now(), ) new_nodes.append(new_node) logger.info(f'Created new node: {new_node.name} (UUID: {new_node.uuid})') else: logger.info(f"Node {node_data['name']} already exists, skipping creation.") return new_nodes async def extract_nodes( llm_client: LLMClient, episode: EpisodicNode, previous_episodes: list[EpisodicNode], ) -> list[EntityNode]: start = time() # Prepare context for LLM context = { 'episode_content': episode.content, 'episode_timestamp': (episode.valid_at.isoformat() if episode.valid_at else None), 'previous_episodes': [ { 'content': ep.content, 'timestamp': ep.valid_at.isoformat() if ep.valid_at else None, } for ep in previous_episodes ], } llm_response = await llm_client.generate_response(prompt_library.extract_nodes.v3(context)) new_nodes_data = llm_response.get('new_nodes', []) end = time() logger.info(f'Extracted new nodes: {new_nodes_data} in {(end - start) * 1000} ms') # Convert the extracted data into EntityNode objects new_nodes = [] for node_data in new_nodes_data: new_node = EntityNode( name=node_data['name'], labels=node_data['labels'], summary=node_data['summary'], created_at=datetime.now(), ) new_nodes.append(new_node) logger.info(f'Created new node: {new_node.name} (UUID: {new_node.uuid})') return new_nodes async def dedupe_extracted_nodes( llm_client: LLMClient, extracted_nodes: list[EntityNode], existing_nodes: list[EntityNode], ) -> tuple[list[EntityNode], dict[str, str]]: start = time() # build existing node map node_map = {} for node in existing_nodes: node_map[node.name] = node # Prepare context for LLM existing_nodes_context = [ {'name': node.name, 'summary': node.summary} for node in existing_nodes ] extracted_nodes_context = [ {'name': node.name, 'summary': node.summary} for node in extracted_nodes ] context = { 'existing_nodes': existing_nodes_context, 'extracted_nodes': extracted_nodes_context, } llm_response = await llm_client.generate_response(prompt_library.dedupe_nodes.v2(context)) duplicate_data = llm_response.get('duplicates', []) end = time() logger.info(f'Deduplicated nodes: {duplicate_data} in {(end - start) * 1000} ms') uuid_map = {} for duplicate in duplicate_data: uuid = node_map[duplicate['name']].uuid uuid_value = node_map[duplicate['duplicate_of']].uuid uuid_map[uuid] = uuid_value nodes = [] for node in extracted_nodes: if node.uuid in uuid_map: existing_name = uuid_map[node.name] existing_node = node_map[existing_name] nodes.append(existing_node) continue nodes.append(node) return nodes, uuid_map async def dedupe_node_list( llm_client: LLMClient, nodes: list[EntityNode], ) -> tuple[list[EntityNode], dict[str, str]]: start = time() # build node map node_map = {} for node in nodes: node_map[node.name] = node # Prepare context for LLM nodes_context = [{'name': node.name, 'summary': node.summary} for node in nodes] context = { 'nodes': nodes_context, } llm_response = await llm_client.generate_response( prompt_library.dedupe_nodes.node_list(context) ) nodes_data = llm_response.get('nodes', []) end = time() logger.info(f'Deduplicated nodes: {nodes_data} in {(end - start) * 1000} ms') # Get full node data unique_nodes = [] uuid_map: dict[str, str] = {} for node_data in nodes_data: node = node_map[node_data['names'][0]] unique_nodes.append(node) for name in node_data['names'][1:]: uuid = node_map[name].uuid uuid_value = node_map[node_data['names'][0]].uuid uuid_map[uuid] = uuid_value return unique_nodes, uuid_map