diff --git a/README.md b/README.md index 1256b2e9..daa22842 100644 --- a/README.md +++ b/README.md @@ -119,67 +119,16 @@ poetry add graphiti-core > Support for Anthropic and Groq LLM inferences is available, too. Other LLM providers may be supported via OpenAI > compatible APIs. -```python -from graphiti_core import Graphiti -from graphiti_core.nodes import EpisodeType -from datetime import datetime, timezone +For a complete working example, see the [Quickstart Example](./examples/quickstart/README.md) in the examples directory. The quickstart demonstrates: -# Initialize Graphiti as Your Memory Layer -graphiti = Graphiti("bolt://localhost:7687", "neo4j", "password") +1. Connecting to a Neo4j database +2. Initializing Graphiti indices and constraints +3. Adding episodes to the graph (both text and structured JSON) +4. Searching for relationships (edges) using hybrid search +5. Reranking search results using graph distance +6. Searching for nodes using predefined search recipes -# Initialize the graph database with Graphiti's indices. This only needs to be done once. -graphiti.build_indices_and_constraints() - -# Add episodes -episodes = [ - "Kamala Harris is the Attorney General of California. She was previously " - "the district attorney for San Francisco.", - "As AG, Harris was in office from January 3, 2011 – January 3, 2017", -] -for i, episode in enumerate(episodes): - await graphiti.add_episode( - name=f"Freakonomics Radio {i}", - episode_body=episode, - source=EpisodeType.text, - source_description="podcast", - reference_time=datetime.now(timezone.utc) - ) - -# Search the graph for semantic memory retrieval -# Execute a hybrid search combining semantic similarity and BM25 retrieval -# Results are combined and reranked using Reciprocal Rank Fusion -results = await graphiti.search('Who was the California Attorney General?') -[ - EntityEdge( -│ uuid = '3133258f738e487383f07b04e15d4ac0', -│ source_node_uuid = '2a85789b318d4e418050506879906e62', -│ target_node_uuid = 'baf7781f445945989d6e4f927f881556', -│ created_at = datetime.datetime(2024, 8, 26, 13, 13, 24, 861097), -│ name = 'HELD_POSITION', -# the fact reflects the updated state that Harris is -# no longer the AG of California -│ fact = 'Kamala Harris was the Attorney General of California', -│ fact_embedding = [ -│ │ -0.009955154731869698, -│ ... -│ │ 0.00784289836883545 -│], -│ episodes = ['b43e98ad0a904088a76c67985caecc22'], -│ expired_at = datetime.datetime(2024, 8, 26, 20, 18, 1, 53812), -# These dates represent the date this edge was true. -│ valid_at = datetime.datetime(2011, 1, 3, 0, 0, tzinfo= < UTC >), -│ invalid_at = datetime.datetime(2017, 1, 3, 0, 0, tzinfo= < UTC >) -) -] - -# Rerank search results based on graph distance -# Provide a node UUID to prioritize results closer to that node in the graph. -# Results are weighted by their proximity, with distant edges receiving lower scores. -await graphiti.search('Who was the California Attorney General?', center_node_uuid) - -# Close the connection when chat state management is complete -graphiti.close() -``` +The example is fully documented with clear explanations of each functionality and includes a comprehensive README with setup instructions and next steps. ## Graph Service diff --git a/examples/quickstart/README.md b/examples/quickstart/README.md new file mode 100644 index 00000000..d192f949 --- /dev/null +++ b/examples/quickstart/README.md @@ -0,0 +1,84 @@ +# Graphiti Quickstart Example + +This example demonstrates the basic functionality of Graphiti, including: + +1. Connecting to a Neo4j database +2. Initializing Graphiti indices and constraints +3. Adding episodes to the graph +4. Searching the graph with semantic and keyword matching +5. Exploring graph-based search with reranking using the top search result's source node UUID +6. Performing node search using predefined search recipes + +## Prerequisites + +- Neo4j Desktop installed and running +- A local DBMS created and started in Neo4j Desktop +- Python 3.9+ +- OpenAI API key (set as `OPENAI_API_KEY` environment variable) + +## Setup Instructions + +1. Install the required dependencies: + +```bash +pip install graphiti-core +``` + +2. Set up environment variables: + +```bash +# Required for LLM and embedding +export OPENAI_API_KEY=your_openai_api_key + +# Optional Neo4j connection parameters (defaults shown) +export NEO4J_URI=bolt://localhost:7687 +export NEO4J_USER=neo4j +export NEO4J_PASSWORD=password +``` + +3. Run the example: + +```bash +python quickstart.py +``` + +## What This Example Demonstrates + +- **Graph Initialization**: Setting up the Graphiti indices and constraints in Neo4j +- **Adding Episodes**: Adding text content that will be analyzed and converted into knowledge graph nodes and edges +- **Edge Search Functionality**: Performing hybrid searches that combine semantic similarity and BM25 retrieval to find relationships (edges) +- **Graph-Aware Search**: Using the source node UUID from the top search result to rerank additional search results based on graph distance +- **Node Search Using Recipes**: Using predefined search configurations like NODE_HYBRID_SEARCH_RRF to directly search for nodes rather than edges +- **Result Processing**: Understanding the structure of search results including facts, nodes, and temporal metadata + +## Next Steps + +After running this example, you can: + +1. Modify the episode content to add your own information +2. Try different search queries to explore the knowledge extraction +3. Experiment with different center nodes for graph-distance-based reranking +4. Try other predefined search recipes from `graphiti_core.search.search_config_recipes` +5. Explore the more advanced examples in the other directories + +## Understanding the Output + +### Edge Search Results + +The edge search results include EntityEdge objects with: + +- UUID: Unique identifier for the edge +- Fact: The extracted fact from the episode +- Valid at/invalid at: Time period during which the fact was true (if available) +- Source/target node UUIDs: Connections between entities in the knowledge graph + +### Node Search Results + +The node search results include EntityNode objects with: + +- UUID: Unique identifier for the node +- Name: The name of the entity +- Content Summary: A summary of the node's content +- Node Labels: The types of the node (e.g., Person, Organization) +- Created At: When the node was created +- Attributes: Additional properties associated with the node diff --git a/examples/quickstart/quickstart.py b/examples/quickstart/quickstart.py new file mode 100644 index 00000000..71c72c9c --- /dev/null +++ b/examples/quickstart/quickstart.py @@ -0,0 +1,242 @@ +""" +Copyright 2025, Zep Software, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import asyncio +import json +import logging +import os +from datetime import datetime, timezone +from logging import INFO + +from dotenv import load_dotenv + +from graphiti_core import Graphiti +from graphiti_core.nodes import EpisodeType +from graphiti_core.search.search_config_recipes import NODE_HYBRID_SEARCH_RRF + +################################################# +# CONFIGURATION +################################################# +# Set up logging and environment variables for +# connecting to Neo4j database +################################################# + +# Configure logging +logging.basicConfig( + level=INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', +) +logger = logging.getLogger(__name__) + +load_dotenv() + +# Neo4j connection parameters +# Make sure Neo4j Desktop is running with a local DBMS started +neo4j_uri = os.environ.get('NEO4J_URI', 'bolt://localhost:7687') +neo4j_user = os.environ.get('NEO4J_USER', 'neo4j') +neo4j_password = os.environ.get('NEO4J_PASSWORD', 'password') + +if not neo4j_uri or not neo4j_user or not neo4j_password: + raise ValueError('NEO4J_URI, NEO4J_USER, and NEO4J_PASSWORD must be set') + + +async def main(): + ################################################# + # INITIALIZATION + ################################################# + # Connect to Neo4j and set up Graphiti indices + # This is required before using other Graphiti + # functionality + ################################################# + + # Initialize Graphiti with Neo4j connection + graphiti = Graphiti(neo4j_uri, neo4j_user, neo4j_password) + + try: + # Initialize the graph database with graphiti's indices. This only needs to be done once. + await graphiti.build_indices_and_constraints() + + ################################################# + # ADDING EPISODES + ################################################# + # Episodes are the primary units of information + # in Graphiti. They can be text or structured JSON + # and are automatically processed to extract entities + # and relationships. + ################################################# + + # Example: Add Episodes + # Episodes list containing both text and JSON episodes + episodes = [ + { + 'content': 'Kamala Harris is the Attorney General of California. She was previously ' + 'the district attorney for San Francisco.', + 'type': EpisodeType.text, + 'description': 'podcast transcript', + }, + { + 'content': 'As AG, Harris was in office from January 3, 2011 – January 3, 2017', + 'type': EpisodeType.text, + 'description': 'podcast transcript', + }, + { + 'content': { + 'name': 'Gavin Newsom', + 'position': 'Governor', + 'state': 'California', + 'previous_role': 'Lieutenant Governor', + 'previous_location': 'San Francisco', + }, + 'type': EpisodeType.json, + 'description': 'podcast metadata', + }, + { + 'content': { + 'name': 'Gavin Newsom', + 'position': 'Governor', + 'term_start': 'January 7, 2019', + 'term_end': 'Present', + }, + 'type': EpisodeType.json, + 'description': 'podcast metadata', + }, + ] + + # Add episodes to the graph + for i, episode in enumerate(episodes): + await graphiti.add_episode( + name=f'Freakonomics Radio {i}', + episode_body=episode['content'] + if isinstance(episode['content'], str) + else json.dumps(episode['content']), + source=episode['type'], + source_description=episode['description'], + reference_time=datetime.now(timezone.utc), + ) + print(f'Added episode: Freakonomics Radio {i} ({episode["type"].value})') + + ################################################# + # BASIC SEARCH + ################################################# + # The simplest way to retrieve relationships (edges) + # from Graphiti is using the search method, which + # performs a hybrid search combining semantic + # similarity and BM25 text retrieval. + ################################################# + + # Perform a hybrid search combining semantic similarity and BM25 retrieval + print("\nSearching for: 'Who was the California Attorney General?'") + results = await graphiti.search('Who was the California Attorney General?') + + # Print search results + print('\nSearch Results:') + for result in results: + print(f'UUID: {result.uuid}') + print(f'Fact: {result.fact}') + if hasattr(result, 'valid_at') and result.valid_at: + print(f'Valid from: {result.valid_at}') + if hasattr(result, 'invalid_at') and result.invalid_at: + print(f'Valid until: {result.invalid_at}') + print('---') + + ################################################# + # CENTER NODE SEARCH + ################################################# + # For more contextually relevant results, you can + # use a center node to rerank search results based + # on their graph distance to a specific node + ################################################# + + # Use the top search result's UUID as the center node for reranking + if results and len(results) > 0: + # Get the source node UUID from the top result + center_node_uuid = results[0].source_node_uuid + + print('\nReranking search results based on graph distance:') + print(f'Using center node UUID: {center_node_uuid}') + + reranked_results = await graphiti.search( + 'Who was the California Attorney General?', center_node_uuid=center_node_uuid + ) + + # Print reranked search results + print('\nReranked Search Results:') + for result in reranked_results: + print(f'UUID: {result.uuid}') + print(f'Fact: {result.fact}') + if hasattr(result, 'valid_at') and result.valid_at: + print(f'Valid from: {result.valid_at}') + if hasattr(result, 'invalid_at') and result.invalid_at: + print(f'Valid until: {result.invalid_at}') + print('---') + else: + print('No results found in the initial search to use as center node.') + + ################################################# + # NODE SEARCH USING SEARCH RECIPES + ################################################# + # Graphiti provides predefined search recipes + # optimized for different search scenarios. + # Here we use NODE_HYBRID_SEARCH_RRF for retrieving + # nodes directly instead of edges. + ################################################# + + # Example: Perform a node search using _search method with standard recipes + print( + '\nPerforming node search using _search method with standard recipe NODE_HYBRID_SEARCH_RRF:' + ) + + # Use a predefined search configuration recipe and modify its limit + node_search_config = NODE_HYBRID_SEARCH_RRF.model_copy(deep=True) + node_search_config.limit = 5 # Limit to 5 results + + # Execute the node search + node_search_results = await graphiti._search( + query='California Governor', + config=node_search_config, + ) + + # Print node search results + print('\nNode Search Results:') + for node in node_search_results.nodes: + print(f'Node UUID: {node.uuid}') + print(f'Node Name: {node.name}') + node_summary = node.summary[:100] + '...' if len(node.summary) > 100 else node.summary + print(f'Content Summary: {node_summary}') + print(f"Node Labels: {', '.join(node.labels)}") + print(f'Created At: {node.created_at}') + if hasattr(node, 'attributes') and node.attributes: + print('Attributes:') + for key, value in node.attributes.items(): + print(f' {key}: {value}') + print('---') + + finally: + ################################################# + # CLEANUP + ################################################# + # Always close the connection to Neo4j when + # finished to properly release resources + ################################################# + + # Close the connection + await graphiti.close() + print('\nConnection closed') + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/examples/quickstart/requirements.txt b/examples/quickstart/requirements.txt new file mode 100644 index 00000000..61fe494d --- /dev/null +++ b/examples/quickstart/requirements.txt @@ -0,0 +1,2 @@ +graphiti-core +python-dotenv>=1.0.0 \ No newline at end of file