diff --git a/check_source_data.py b/check_source_data.py index b6b89f19..fc686c49 100644 --- a/check_source_data.py +++ b/check_source_data.py @@ -1,62 +1,72 @@ #!/usr/bin/env python3 """Check what's in the source database.""" -from neo4j import GraphDatabase import os -NEO4J_URI = "bolt://192.168.1.25:7687" -NEO4J_USER = "neo4j" +from neo4j import GraphDatabase + +NEO4J_URI = 'bolt://192.168.1.25:7687' +NEO4J_USER = 'neo4j' NEO4J_PASSWORD = '!"MiTa1205' -SOURCE_DATABASE = "neo4j" -SOURCE_GROUP_ID = "lvarming73" +SOURCE_DATABASE = 'neo4j' +SOURCE_GROUP_ID = 'lvarming73' driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) -print("=" * 70) -print("Checking Source Database") -print("=" * 70) +print('=' * 70) +print('Checking Source Database') +print('=' * 70) with driver.session(database=SOURCE_DATABASE) as session: # Check total nodes - result = session.run(""" + result = session.run( + """ MATCH (n {group_id: $group_id}) RETURN count(n) as total - """, group_id=SOURCE_GROUP_ID) + """, + group_id=SOURCE_GROUP_ID, + ) total = result.single()['total'] print(f"\n✓ Total nodes with group_id '{SOURCE_GROUP_ID}': {total}") # Check date range - result = session.run(""" + result = session.run( + """ MATCH (n:Episodic {group_id: $group_id}) WHERE n.created_at IS NOT NULL RETURN min(n.created_at) as earliest, max(n.created_at) as latest, count(n) as total - """, group_id=SOURCE_GROUP_ID) + """, + group_id=SOURCE_GROUP_ID, + ) dates = result.single() if dates and dates['total'] > 0: - print(f"\n✓ Episodic date range:") - print(f" Earliest: {dates['earliest']}") - print(f" Latest: {dates['latest']}") - print(f" Total episodes: {dates['total']}") + print(f'\n✓ Episodic date range:') + print(f' Earliest: {dates["earliest"]}') + print(f' Latest: {dates["latest"]}') + print(f' Total episodes: {dates["total"]}') else: - print("\n⚠️ No episodic nodes with dates found") + print('\n⚠️ No episodic nodes with dates found') # Sample episodic nodes by date - result = session.run(""" + result = session.run( + """ MATCH (n:Episodic {group_id: $group_id}) RETURN n.name as name, n.created_at as created_at ORDER BY n.created_at LIMIT 10 - """, group_id=SOURCE_GROUP_ID) + """, + group_id=SOURCE_GROUP_ID, + ) - print(f"\n✓ Oldest episodic nodes:") + print(f'\n✓ Oldest episodic nodes:') for record in result: - print(f" - {record['name']}: {record['created_at']}") + print(f' - {record["name"]}: {record["created_at"]}') # Check for other group_ids in neo4j database result = session.run(""" @@ -68,7 +78,7 @@ with driver.session(database=SOURCE_DATABASE) as session: print(f"\n✓ All group_ids in '{SOURCE_DATABASE}' database:") for record in result: - print(f" {record['group_id']}: {record['count']} nodes") + print(f' {record["group_id"]}: {record["count"]} nodes') driver.close() -print("\n" + "=" * 70) +print('\n' + '=' * 70) diff --git a/mcp_server/src/graphiti_mcp_server.py b/mcp_server/src/graphiti_mcp_server.py index 5e6f7410..de6adf35 100644 --- a/mcp_server/src/graphiti_mcp_server.py +++ b/mcp_server/src/graphiti_mcp_server.py @@ -115,33 +115,148 @@ config: GraphitiConfig # MCP server instructions GRAPHITI_MCP_INSTRUCTIONS = """ -Graphiti is a memory service for AI agents built on a knowledge graph. Graphiti performs well -with dynamic data such as user interactions, changing enterprise data, and external information. +Graphiti is a memory service for AI agents built on a knowledge graph. It transforms information +into a richly connected knowledge network of entities and relationships. -Graphiti transforms information into a richly connected knowledge network, allowing you to -capture relationships between concepts, entities, and information. The system organizes data as episodes -(content snippets), nodes (entities), and facts (relationships between entities), creating a dynamic, -queryable memory store that evolves with new information. Graphiti supports multiple data formats, including -structured JSON data, enabling seamless integration with existing data pipelines and systems. +The system organizes data as: +- **Episodes**: Content snippets (conversations, notes, documents) +- **Nodes**: Entities (people, projects, concepts, decisions, anything) +- **Facts**: Relationships between entities with temporal metadata -Facts contain temporal metadata, allowing you to track the time of creation and whether a fact is invalid -(superseded by new information). +## Core Workflow: SEARCH FIRST, THEN ADD -Key capabilities: -1. Add episodes (text, messages, or JSON) to the knowledge graph with the add_memory tool -2. Search for nodes (entities) in the graph using natural language queries with search_nodes -3. Find relevant facts (relationships between entities) with search_facts -4. Retrieve specific entity edges or episodes by UUID -5. Manage the knowledge graph with tools like delete_episode, delete_entity_edge, and clear_graph +**Always explore existing knowledge before adding new information.** -The server connects to a database for persistent storage and uses language models for certain operations. -Each piece of information is organized by group_id, allowing you to maintain separate knowledge domains. +### WHEN ADDING INFORMATION: -When adding information, provide descriptive names and detailed content to improve search quality. -When searching, use specific queries and consider filtering by group_id for more relevant results. +``` +User provides information + ↓ +1. search_nodes(extract key entities/concepts) + ↓ +2. IF entities found: + → get_entity_connections(entity_uuid) - See what's already linked + → Optional: get_entity_timeline(entity_uuid) - Understand history + ↓ +3. add_memory(episode_body with explicit references to found entities) + ↓ +Result: New episode automatically connects to existing knowledge +``` -For optimal performance, ensure the database is properly configured and accessible, and valid -API keys are provided for any language model operations. +### WHEN RETRIEVING INFORMATION: + +``` +User asks question + ↓ +1. search_nodes(extract keywords from question) + ↓ +2. For relevant entities: + → get_entity_connections(uuid) - Explore neighborhood + → get_entity_timeline(uuid) - See evolution/history + ↓ +3. Optional: search_memory_facts(semantic query) - Find specific relationships + ↓ +Result: Comprehensive answer from complete graph context +``` + +## Tool Selection Guide + +**Finding entities:** +- `search_nodes` - Semantic search for entities by keywords/description + +**Exploring connections:** +- `get_entity_connections` - **ALL** relationships for an entity (complete, direct graph traversal) +- `search_memory_facts` - Semantic search for relationships (query-driven, may miss some) + +**Understanding history:** +- `get_entity_timeline` - **ALL** episodes mentioning an entity (chronological, complete) + +**Adding information:** +- `add_memory` - Store new episodes (AFTER searching existing knowledge) + +**Retrieval vs Graph Traversal:** +- Use `get_entity_connections` when you need COMPLETE data (pattern detection, exploration) +- Use `search_memory_facts` when you have a specific semantic query + +## Examples + +### Example 1: Adding Technical Decision + +```python +# ❌ BAD: Creates disconnected node +add_memory( + name="Database choice", + episode_body="Chose PostgreSQL for new service" +) + +# ✅ GOOD: Connects to existing knowledge +nodes = search_nodes(query="database architecture microservices") +# Found: MySQL (main db), Redis (cache), microservices pattern + +connections = get_entity_connections(entity_uuid=nodes[0]['uuid']) +# Sees: MySQL connects to user-service, payment-service + +add_memory( + name="Database choice", + episode_body="Chose PostgreSQL for new notification-service. Different from + existing MySQL used by user-service and payment-service because + we need better JSON support for notification templates." +) +# Result: Rich connections created automatically +``` + +### Example 2: Exploring Project Context + +```python +# User asks: "What database considerations do we have?" + +nodes = search_nodes(query="database") +# Returns: PostgreSQL, MySQL, Redis entities + +for node in nodes: + connections = get_entity_connections(entity_uuid=node['uuid']) + # Shows ALL services, constraints, decisions connected to each database + + timeline = get_entity_timeline(entity_uuid=node['uuid']) + # Shows when each database was discussed, decisions made over time + +# Synthesize comprehensive answer from COMPLETE data +``` + +### Example 3: Pattern Recognition + +```python +# User: "I'm feeling stressed today" + +nodes = search_nodes(query="stress") +connections = get_entity_connections(entity_uuid=nodes[0]['uuid']) +# Discovers: stress ↔ work, sleep, project-deadline, coffee-intake + +timeline = get_entity_timeline(entity_uuid=nodes[0]['uuid']) +# Shows: First mentioned 3 months ago, frequency increasing + +# Can now make informed observations based on complete data + +add_memory( + name="Stress discussion", + episode_body="Discussed stress today. User recognizes connection to + project deadlines and sleep quality from our past conversations." +) +``` + +## Key Principles + +1. **Always search before adding** - Even for trivial data +2. **Reference found entities by name** - Creates automatic connections +3. **Use complete data for patterns** - Graph traversal, not semantic guessing +4. **Track evolution over time** - Timeline shows how understanding changed + +## Technical Notes + +- All tools respect `group_id` filtering for namespace isolation +- Temporal metadata tracks both creation time and domain time +- Facts can be invalidated (superseded) without deletion +- Processing is asynchronous - episodes queued for background processing """ # MCP server instance @@ -384,14 +499,21 @@ async def add_memory( ) -> SuccessResponse | ErrorResponse: """Add information to memory. **This is the PRIMARY method for storing information.** - **PRIORITY: Use this tool FIRST when storing any information.** + **⚠️ IMPORTANT: SEARCH FIRST, THEN ADD** + + Before using this tool, always: + 1. Search for related entities: search_nodes(query="relevant keywords") + 2. Explore their connections: get_entity_connections(entity_uuid=found_uuid) + 3. Then add with context: Reference found entities by name in episode_body + + This creates rich automatic connections instead of isolated nodes. Processes content asynchronously, automatically extracting entities, relationships, and deduplicating similar information. Returns immediately while processing continues in background. WHEN TO USE THIS TOOL: - - Storing information → add_memory (this tool) **USE THIS FIRST** + - Storing information → add_memory (this tool) **USE AFTER SEARCHING** - Searching information → use search_nodes or search_memory_facts - Deleting information → use delete_episode or delete_entity_edge @@ -1426,6 +1548,215 @@ async def clear_graph( return ErrorResponse(error=f'Error clearing graph: {error_msg}') +@mcp.tool( + annotations={ + 'title': 'Get Entity Connections', + 'readOnlyHint': True, + 'destructiveHint': False, + 'idempotentHint': True, + 'openWorldHint': True, + }, +) +async def get_entity_connections( + entity_uuid: str, + group_ids: list[str] | None = None, + max_connections: int = 50, +) -> FactSearchResponse | ErrorResponse: + """Get ALL relationships connected to a specific entity. **Complete graph traversal.** + + **Use this to explore what's already known about an entity before adding new information.** + + Unlike search_memory_facts which requires a semantic query, this performs direct graph + traversal to return EVERY relationship where the entity is involved. Guarantees completeness. + + WHEN TO USE THIS TOOL: + - Exploring entity neighborhood → get_entity_connections **USE THIS** + - Before adding related info → get_entity_connections **USE THIS** + - Pattern detection (need complete data) → get_entity_connections **USE THIS** + - Understanding full context without formulating queries + + WHEN NOT to use: + - Specific semantic query → use search_memory_facts instead + - Finding entities → use search_nodes instead + + Use Cases: + - "Show everything connected to PostgreSQL decision" + - "What's linked to the authentication service?" + - "All relationships for Django project" + - Before adding: "Let me see what's already known about X" + + Args: + entity_uuid: UUID of entity to explore (from search_nodes or previous results) + group_ids: Optional list of namespaces to filter connections + max_connections: Maximum relationships to return (default: 50) + + Returns: + FactSearchResponse with all connected relationships and temporal metadata + + Examples: + # After finding entity + nodes = search_nodes(query="Django project") + django_uuid = nodes[0]['uuid'] + + # Get all connections + connections = get_entity_connections(entity_uuid=django_uuid) + + # Limited results for specific namespace + connections = get_entity_connections( + entity_uuid=django_uuid, + group_ids=["work"], + max_connections=20 + ) + """ + global graphiti_service + + if graphiti_service is None: + return ErrorResponse(error='Graphiti service not initialized') + + try: + client = await graphiti_service.get_client() + + # Use existing EntityEdge.get_by_node_uuid() method + edges = await EntityEdge.get_by_node_uuid(client.driver, entity_uuid) + + # Filter by group_ids if provided + if group_ids: + edges = [e for e in edges if e.group_id in group_ids] + + # Limit results + edges = edges[:max_connections] + + if not edges: + return FactSearchResponse( + message=f'No connections found for entity {entity_uuid}', facts=[] + ) + + # Format using existing formatter + facts = [format_fact_result(edge) for edge in edges] + + return FactSearchResponse( + message=f'Found {len(facts)} connection(s) for entity', facts=facts + ) + + except Exception as e: + error_msg = str(e) + logger.error(f'Error getting entity connections: {error_msg}') + return ErrorResponse(error=f'Error getting entity connections: {error_msg}') + + +@mcp.tool( + annotations={ + 'title': 'Get Entity Timeline', + 'readOnlyHint': True, + 'destructiveHint': False, + 'idempotentHint': True, + 'openWorldHint': True, + }, +) +async def get_entity_timeline( + entity_uuid: str, + group_ids: list[str] | None = None, + max_episodes: int = 20, +) -> EpisodeSearchResponse | ErrorResponse: + """Get chronological episode history for an entity. **Shows evolution over time.** + + **Use this to understand how an entity was discussed across conversations.** + + Returns ALL episodes where this entity was mentioned, in chronological order. + Shows the full conversational history and temporal evolution of concepts, decisions, + or any tracked entity. + + WHEN TO USE THIS TOOL: + - Understanding entity history → get_entity_timeline **USE THIS** + - "When did we discuss X?" → get_entity_timeline **USE THIS** + - Tracking evolution over time → get_entity_timeline **USE THIS** + - Seeing context from original sources + + WHEN NOT to use: + - Semantic search across all episodes → use search_episodes instead + - Finding entities → use search_nodes instead + + Use Cases: + - "When did we first discuss microservices architecture?" + - "Show all mentions of the deployment pipeline" + - "Timeline of stress mentions" + - "How did our understanding of GraphQL evolve?" + + Args: + entity_uuid: UUID of entity (from search_nodes or previous results) + group_ids: Optional list of namespaces to filter episodes + max_episodes: Maximum episodes to return (default: 20, chronological) + + Returns: + EpisodeSearchResponse with episodes ordered chronologically + + Examples: + # After finding entity + nodes = search_nodes(query="microservices") + arch_uuid = nodes[0]['uuid'] + + # Get conversation history + timeline = get_entity_timeline(entity_uuid=arch_uuid) + + # Recent episodes only for specific namespace + timeline = get_entity_timeline( + entity_uuid=arch_uuid, + group_ids=["architecture"], + max_episodes=10 + ) + """ + global graphiti_service + + if graphiti_service is None: + return ErrorResponse(error='Graphiti service not initialized') + + try: + client = await graphiti_service.get_client() + + # Use existing EpisodicNode.get_by_entity_node_uuid() method + episodes = await EpisodicNode.get_by_entity_node_uuid(client.driver, entity_uuid) + + # Filter by group_ids if provided + if group_ids: + episodes = [e for e in episodes if e.group_id in group_ids] + + # Sort by valid_at (chronological order) + episodes.sort(key=lambda e: e.valid_at) + + # Limit results + episodes = episodes[:max_episodes] + + if not episodes: + return EpisodeSearchResponse( + message=f'No episodes found mentioning entity {entity_uuid}', episodes=[] + ) + + # Format episodes + episode_results = [] + for ep in episodes: + episode_results.append( + { + 'uuid': ep.uuid, + 'name': ep.name, + 'content': ep.content, + 'valid_at': ep.valid_at.isoformat() if ep.valid_at else None, + 'created_at': ep.created_at.isoformat() if ep.created_at else None, + 'source': ep.source.value if ep.source else None, + 'group_id': ep.group_id, + } + ) + + return EpisodeSearchResponse( + message=f'Found {len(episode_results)} episode(s) mentioning entity', + episodes=episode_results, + ) + + except Exception as e: + error_msg = str(e) + logger.error(f'Error getting entity timeline: {error_msg}') + return ErrorResponse(error=f'Error getting entity timeline: {error_msg}') + + @mcp.tool( annotations={ 'title': 'Get Server Status', diff --git a/mcp_server/src/models/response_types.py b/mcp_server/src/models/response_types.py index 7c889418..16d96bca 100644 --- a/mcp_server/src/models/response_types.py +++ b/mcp_server/src/models/response_types.py @@ -1,6 +1,7 @@ """Response type definitions for Graphiti MCP Server.""" from typing import Any + from typing_extensions import TypedDict diff --git a/mcp_server/tests/test_database_param.py b/mcp_server/tests/test_database_param.py index 971a5d5f..591efb59 100644 --- a/mcp_server/tests/test_database_param.py +++ b/mcp_server/tests/test_database_param.py @@ -8,14 +8,15 @@ from pathlib import Path # Setup path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) + def test_neo4j_database_parameter(): """Test that Neo4j database parameter is included in configuration.""" from src.config.schema import GraphitiConfig from src.services.factories import DatabaseDriverFactory - print('\n' + '='*70) + print('\n' + '=' * 70) print('Testing Neo4j Database Parameter Configuration') - print('='*70 + '\n') + print('=' * 70 + '\n') # Test 1: Default database value print('Test 1: Default database value') @@ -25,7 +26,9 @@ def test_neo4j_database_parameter(): assert 'database' in db_config, 'Database parameter missing from config!' print(f' ✓ Database parameter present in config') print(f' ✓ Default database value: {db_config["database"]}') - assert db_config['database'] == 'neo4j', f'Expected default "neo4j", got {db_config["database"]}' + assert db_config['database'] == 'neo4j', ( + f'Expected default "neo4j", got {db_config["database"]}' + ) print(f' ✓ Default value matches expected: neo4j\n') # Test 2: Environment variable override @@ -37,7 +40,9 @@ def test_neo4j_database_parameter(): assert 'database' in db_config2, 'Database parameter missing from config!' print(f' ✓ Database parameter present in config') print(f' ✓ Overridden database value: {db_config2["database"]}') - assert db_config2['database'] == 'graphiti', f'Expected "graphiti", got {db_config2["database"]}' + assert db_config2['database'] == 'graphiti', ( + f'Expected "graphiti", got {db_config2["database"]}' + ) print(f' ✓ Environment override works correctly\n') # Clean up @@ -50,9 +55,9 @@ def test_neo4j_database_parameter(): assert param in db_config, f'Required parameter "{param}" missing!' print(f' ✓ {param}: present') - print('\n' + '='*70) + print('\n' + '=' * 70) print('✅ All database parameter tests passed!') - print('='*70) + print('=' * 70) print('\nSummary:') print(' - database parameter is included in Neo4j config') print(' - Default value is "neo4j"') @@ -70,5 +75,6 @@ if __name__ == '__main__': except Exception as e: print(f'\n❌ Unexpected error: {e}\n') import traceback + traceback.print_exc() sys.exit(1) diff --git a/migrate_group_id.py b/migrate_group_id.py index 6641bbad..1e32cb01 100644 --- a/migrate_group_id.py +++ b/migrate_group_id.py @@ -10,20 +10,20 @@ This script migrates data from: Target: graphiti database, group_id='6910959f2128b5c4faa22283' """ -from neo4j import GraphDatabase import os +from neo4j import GraphDatabase # Configuration -NEO4J_URI = "bolt://192.168.1.25:7687" -NEO4J_USER = "neo4j" -NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD", '!"MiTa1205') +NEO4J_URI = 'bolt://192.168.1.25:7687' +NEO4J_USER = 'neo4j' +NEO4J_PASSWORD = os.environ.get('NEO4J_PASSWORD', '!"MiTa1205') -SOURCE_DATABASE = "neo4j" -SOURCE_GROUP_ID = "lvarming73" +SOURCE_DATABASE = 'neo4j' +SOURCE_GROUP_ID = 'lvarming73' -TARGET_DATABASE = "graphiti" -TARGET_GROUP_ID = "6910959f2128b5c4faa22283" +TARGET_DATABASE = 'graphiti' +TARGET_GROUP_ID = '6910959f2128b5c4faa22283' def migrate_data(): @@ -33,41 +33,49 @@ def migrate_data(): try: # Step 1: Export data from source database - print(f"\n📤 Exporting data from {SOURCE_DATABASE} database (group_id: {SOURCE_GROUP_ID})...") + print( + f'\n📤 Exporting data from {SOURCE_DATABASE} database (group_id: {SOURCE_GROUP_ID})...' + ) with driver.session(database=SOURCE_DATABASE) as session: # Get all nodes with the source group_id - nodes_result = session.run(""" + nodes_result = session.run( + """ MATCH (n {group_id: $group_id}) RETURN id(n) as old_id, labels(n) as labels, properties(n) as props ORDER BY old_id - """, group_id=SOURCE_GROUP_ID) + """, + group_id=SOURCE_GROUP_ID, + ) nodes = list(nodes_result) - print(f" Found {len(nodes)} nodes to migrate") + print(f' Found {len(nodes)} nodes to migrate') if len(nodes) == 0: - print(" ⚠️ No nodes found. Nothing to migrate.") + print(' ⚠️ No nodes found. Nothing to migrate.') return # Get all relationships between nodes with the source group_id - rels_result = session.run(""" + rels_result = session.run( + """ MATCH (n {group_id: $group_id})-[r]->(m {group_id: $group_id}) RETURN id(startNode(r)) as from_id, id(endNode(r)) as to_id, type(r) as rel_type, properties(r) as props - """, group_id=SOURCE_GROUP_ID) + """, + group_id=SOURCE_GROUP_ID, + ) relationships = list(rels_result) - print(f" Found {len(relationships)} relationships to migrate") + print(f' Found {len(relationships)} relationships to migrate') # Step 2: Create ID mapping (old Neo4j internal ID -> new node UUID) - print(f"\n📥 Importing data to {TARGET_DATABASE} database (group_id: {TARGET_GROUP_ID})...") + print(f'\n📥 Importing data to {TARGET_DATABASE} database (group_id: {TARGET_GROUP_ID})...') id_mapping = {} @@ -88,16 +96,19 @@ def migrate_data(): labels_str = ':'.join(labels) # Create node - result = session.run(f""" + result = session.run( + f""" CREATE (n:{labels_str}) SET n = $props RETURN id(n) as new_id, n.uuid as uuid - """, props=props) + """, + props=props, + ) record = result.single() id_mapping[old_id] = record['new_id'] - print(f" ✅ Created {len(nodes)} nodes") + print(f' ✅ Created {len(nodes)} nodes') # Create relationships rel_count = 0 @@ -116,74 +127,87 @@ def migrate_data(): to_new_id = id_mapping.get(to_old_id) if from_new_id is None or to_new_id is None: - print(f" ⚠️ Skipping relationship: node mapping not found") + print(f' ⚠️ Skipping relationship: node mapping not found') continue # Create relationship - session.run(f""" + session.run( + f""" MATCH (a), (b) WHERE id(a) = $from_id AND id(b) = $to_id CREATE (a)-[r:{rel_type}]->(b) SET r = $props - """, from_id=from_new_id, to_id=to_new_id, props=props) + """, + from_id=from_new_id, + to_id=to_new_id, + props=props, + ) rel_count += 1 - print(f" ✅ Created {rel_count} relationships") + print(f' ✅ Created {rel_count} relationships') # Step 3: Verify migration - print(f"\n✅ Migration complete!") - print(f"\n📊 Verification:") + print(f'\n✅ Migration complete!') + print(f'\n📊 Verification:') with driver.session(database=TARGET_DATABASE) as session: # Count nodes in target - result = session.run(""" + result = session.run( + """ MATCH (n {group_id: $group_id}) RETURN count(n) as node_count - """, group_id=TARGET_GROUP_ID) + """, + group_id=TARGET_GROUP_ID, + ) target_count = result.single()['node_count'] - print(f" Target database now has {target_count} nodes with group_id={TARGET_GROUP_ID}") + print( + f' Target database now has {target_count} nodes with group_id={TARGET_GROUP_ID}' + ) # Show node types - result = session.run(""" + result = session.run( + """ MATCH (n {group_id: $group_id}) RETURN labels(n) as labels, count(*) as count ORDER BY count DESC - """, group_id=TARGET_GROUP_ID) + """, + group_id=TARGET_GROUP_ID, + ) - print(f"\n Node types:") + print(f'\n Node types:') for record in result: labels = ':'.join(record['labels']) count = record['count'] - print(f" {labels}: {count}") + print(f' {labels}: {count}') - print(f"\n🎉 Done! Your data has been migrated successfully.") - print(f"\nNext steps:") - print(f"1. Verify the data in Neo4j Browser:") - print(f" :use graphiti") + print(f'\n🎉 Done! Your data has been migrated successfully.') + print(f'\nNext steps:') + print(f'1. Verify the data in Neo4j Browser:') + print(f' :use graphiti') print(f" MATCH (n {{group_id: '{TARGET_GROUP_ID}'}}) RETURN n LIMIT 25") - print(f"2. Test in LibreChat to ensure everything works") - print(f"3. Once verified, you can delete the old data:") - print(f" :use neo4j") + print(f'2. Test in LibreChat to ensure everything works') + print(f'3. Once verified, you can delete the old data:') + print(f' :use neo4j') print(f" MATCH (n {{group_id: '{SOURCE_GROUP_ID}'}}) DETACH DELETE n") finally: driver.close() -if __name__ == "__main__": - print("=" * 70) - print("Graphiti Data Migration Script") - print("=" * 70) +if __name__ == '__main__': + print('=' * 70) + print('Graphiti Data Migration Script') + print('=' * 70) print(f"\nSource: {SOURCE_DATABASE} database, group_id='{SOURCE_GROUP_ID}'") print(f"Target: {TARGET_DATABASE} database, group_id='{TARGET_GROUP_ID}'") - print(f"\nNeo4j URI: {NEO4J_URI}") - print("=" * 70) + print(f'\nNeo4j URI: {NEO4J_URI}') + print('=' * 70) response = input("\n⚠️ Ready to migrate? This will copy all data. Type 'yes' to continue: ") if response.lower() == 'yes': migrate_data() else: - print("\n❌ Migration cancelled.") + print('\n❌ Migration cancelled.') diff --git a/uv.lock b/uv.lock index a5cbe727..27958aa8 100644 --- a/uv.lock +++ b/uv.lock @@ -782,8 +782,8 @@ wheels = [ ] [[package]] -name = "graphiti-core" -version = "0.23.0" +name = "graphiti-core-varming" +version = "0.23.2" source = { editable = "." } dependencies = [ { name = "diskcache" }, diff --git a/verify_migration.py b/verify_migration.py index 00ad0ba1..dab51636 100644 --- a/verify_migration.py +++ b/verify_migration.py @@ -1,101 +1,118 @@ #!/usr/bin/env python3 """Verify migration data in Neo4j.""" -from neo4j import GraphDatabase -import os import json +import os -NEO4J_URI = "bolt://192.168.1.25:7687" -NEO4J_USER = "neo4j" +from neo4j import GraphDatabase + +NEO4J_URI = 'bolt://192.168.1.25:7687' +NEO4J_USER = 'neo4j' NEO4J_PASSWORD = '!"MiTa1205' -TARGET_DATABASE = "graphiti" -TARGET_GROUP_ID = "6910959f2128b5c4faa22283" +TARGET_DATABASE = 'graphiti' +TARGET_GROUP_ID = '6910959f2128b5c4faa22283' driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) -print("=" * 70) -print("Verifying Migration Data") -print("=" * 70) +print('=' * 70) +print('Verifying Migration Data') +print('=' * 70) with driver.session(database=TARGET_DATABASE) as session: # Check total nodes - result = session.run(""" + result = session.run( + """ MATCH (n {group_id: $group_id}) RETURN count(n) as total - """, group_id=TARGET_GROUP_ID) + """, + group_id=TARGET_GROUP_ID, + ) total = result.single()['total'] print(f"\n✓ Total nodes with group_id '{TARGET_GROUP_ID}': {total}") # Check node labels and properties - result = session.run(""" + result = session.run( + """ MATCH (n {group_id: $group_id}) RETURN DISTINCT labels(n) as labels, count(*) as count ORDER BY count DESC - """, group_id=TARGET_GROUP_ID) + """, + group_id=TARGET_GROUP_ID, + ) - print(f"\n✓ Node types:") + print(f'\n✓ Node types:') for record in result: labels = ':'.join(record['labels']) count = record['count'] - print(f" {labels}: {count}") + print(f' {labels}: {count}') # Sample some episodic nodes - result = session.run(""" + result = session.run( + """ MATCH (n:Episodic {group_id: $group_id}) RETURN n.uuid as uuid, n.name as name, n.content as content, n.created_at as created_at LIMIT 5 - """, group_id=TARGET_GROUP_ID) + """, + group_id=TARGET_GROUP_ID, + ) - print(f"\n✓ Sample Episodic nodes:") + print(f'\n✓ Sample Episodic nodes:') episodes = list(result) if episodes: for record in episodes: - print(f" - {record['name']}") - print(f" UUID: {record['uuid']}") - print(f" Created: {record['created_at']}") - print(f" Content: {record['content'][:100] if record['content'] else 'None'}...") + print(f' - {record["name"]}') + print(f' UUID: {record["uuid"]}') + print(f' Created: {record["created_at"]}') + print(f' Content: {record["content"][:100] if record["content"] else "None"}...') else: - print(" ⚠️ No episodic nodes found!") + print(' ⚠️ No episodic nodes found!') # Sample some entity nodes - result = session.run(""" + result = session.run( + """ MATCH (n:Entity {group_id: $group_id}) RETURN n.uuid as uuid, n.name as name, labels(n) as labels, n.summary as summary LIMIT 10 - """, group_id=TARGET_GROUP_ID) + """, + group_id=TARGET_GROUP_ID, + ) - print(f"\n✓ Sample Entity nodes:") + print(f'\n✓ Sample Entity nodes:') entities = list(result) if entities: for record in entities: labels = ':'.join(record['labels']) - print(f" - {record['name']} ({labels})") - print(f" UUID: {record['uuid']}") + print(f' - {record["name"]} ({labels})') + print(f' UUID: {record["uuid"]}') if record['summary']: - print(f" Summary: {record['summary'][:80]}...") + print(f' Summary: {record["summary"][:80]}...') else: - print(" ⚠️ No entity nodes found!") + print(' ⚠️ No entity nodes found!') # Check relationships - result = session.run(""" + result = session.run( + """ MATCH (n {group_id: $group_id})-[r]->(m {group_id: $group_id}) RETURN type(r) as rel_type, count(*) as count ORDER BY count DESC LIMIT 10 - """, group_id=TARGET_GROUP_ID) + """, + group_id=TARGET_GROUP_ID, + ) - print(f"\n✓ Relationship types:") + print(f'\n✓ Relationship types:') rels = list(result) if rels: for record in rels: - print(f" {record['rel_type']}: {record['count']}") + print(f' {record["rel_type"]}: {record["count"]}') else: - print(" ⚠️ No relationships found!") + print(' ⚠️ No relationships found!') # Check if nodes have required properties - result = session.run(""" + result = session.run( + """ MATCH (n:Episodic {group_id: $group_id}) RETURN count(n) as total, @@ -104,19 +121,22 @@ with driver.session(database=TARGET_DATABASE) as session: count(n.content) as has_content, count(n.created_at) as has_created_at, count(n.valid_at) as has_valid_at - """, group_id=TARGET_GROUP_ID) + """, + group_id=TARGET_GROUP_ID, + ) props = result.single() - print(f"\n✓ Episodic node properties:") - print(f" Total: {props['total']}") - print(f" Has uuid: {props['has_uuid']}") - print(f" Has name: {props['has_name']}") - print(f" Has content: {props['has_content']}") - print(f" Has created_at: {props['has_created_at']}") - print(f" Has valid_at: {props['has_valid_at']}") + print(f'\n✓ Episodic node properties:') + print(f' Total: {props["total"]}') + print(f' Has uuid: {props["has_uuid"]}') + print(f' Has name: {props["has_name"]}') + print(f' Has content: {props["has_content"]}') + print(f' Has created_at: {props["has_created_at"]}') + print(f' Has valid_at: {props["has_valid_at"]}') # Check Entity properties - result = session.run(""" + result = session.run( + """ MATCH (n:Entity {group_id: $group_id}) RETURN count(n) as total, @@ -124,15 +144,17 @@ with driver.session(database=TARGET_DATABASE) as session: count(n.name) as has_name, count(n.summary) as has_summary, count(n.created_at) as has_created_at - """, group_id=TARGET_GROUP_ID) + """, + group_id=TARGET_GROUP_ID, + ) props = result.single() - print(f"\n✓ Entity node properties:") - print(f" Total: {props['total']}") - print(f" Has uuid: {props['has_uuid']}") - print(f" Has name: {props['has_name']}") - print(f" Has summary: {props['has_summary']}") - print(f" Has created_at: {props['has_created_at']}") + print(f'\n✓ Entity node properties:') + print(f' Total: {props["total"]}') + print(f' Has uuid: {props["has_uuid"]}') + print(f' Has name: {props["has_name"]}') + print(f' Has summary: {props["has_summary"]}') + print(f' Has created_at: {props["has_created_at"]}') driver.close() -print("\n" + "=" * 70) +print('\n' + '=' * 70)