From 41c3da244061b379892bd1b296cf96a3f2965cb1 Mon Sep 17 00:00:00 2001 From: bechbd Date: Tue, 26 Aug 2025 07:51:20 -0800 Subject: [PATCH] Fixed issue where creating indices was not called for Neptune and added missing quickstart example (#850) * Rebased Neptune changes based on significant rework done * Updated the README documentation * Fixed linting and formatting * Update README.md Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * Update graphiti_core/driver/neptune_driver.py Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * Update README.md Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * Addressed feedback from code review * Updated the README documentation for clarity * Updated the README and neptune_driver based on PR feedback * Update node_db_queries.py * bug: Fixed issue with missing call to create indicies for Neptune and added quickstart example * chore: added pyright to ignore the attribute not in GrapHDriver * Fixed quickstart with feedback from automated PR --------- Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> Co-authored-by: Preston Rasmussen <109292228+prasmussen15@users.noreply.github.com> --- examples/quickstart/quickstart_neptune.py | 253 ++++++++++++++++++ .../maintenance/graph_data_operations.py | 3 +- 2 files changed, 255 insertions(+), 1 deletion(-) create mode 100644 examples/quickstart/quickstart_neptune.py diff --git a/examples/quickstart/quickstart_neptune.py b/examples/quickstart/quickstart_neptune.py new file mode 100644 index 00000000..76a494e3 --- /dev/null +++ b/examples/quickstart/quickstart_neptune.py @@ -0,0 +1,253 @@ +""" +Copyright 2025, Zep Software, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import asyncio +import json +import logging +import os +from datetime import datetime, timezone +from logging import INFO + +from dotenv import load_dotenv + +from graphiti_core import Graphiti +from graphiti_core.driver.neptune_driver import NeptuneDriver +from graphiti_core.nodes import EpisodeType +from graphiti_core.search.search_config_recipes import NODE_HYBRID_SEARCH_RRF + +################################################# +# CONFIGURATION +################################################# +# Set up logging and environment variables for +# connecting to Neptune database +################################################# + +# Configure logging +logging.basicConfig( + level=INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', +) +logger = logging.getLogger(__name__) + +load_dotenv() + +# Neptune and OpenSearch connection parameters +neptune_uri = os.environ.get('NEPTUNE_HOST') +neptune_port = int(os.environ.get('NEPTUNE_PORT', 8182)) +aoss_host = os.environ.get('AOSS_HOST') + +if not neptune_uri: + raise ValueError('NEPTUNE_HOST must be set') + + +if not aoss_host: + raise ValueError('AOSS_HOST must be set') + + +async def main(): + ################################################# + # INITIALIZATION + ################################################# + # Connect to Neptune and set up Graphiti indices + # This is required before using other Graphiti + # functionality + ################################################# + + # Initialize Graphiti with Neptune connection + driver = NeptuneDriver(host=neptune_uri, aoss_host=aoss_host, port=neptune_port) + + graphiti = Graphiti(graph_driver=driver) + + try: + # Initialize the graph database with graphiti's indices. This only needs to be done once. + await driver.delete_aoss_indices() + await driver._delete_all_data() + await graphiti.build_indices_and_constraints() + + ################################################# + # ADDING EPISODES + ################################################# + # Episodes are the primary units of information + # in Graphiti. They can be text or structured JSON + # and are automatically processed to extract entities + # and relationships. + ################################################# + + # Example: Add Episodes + # Episodes list containing both text and JSON episodes + episodes = [ + { + 'content': 'Kamala Harris is the Attorney General of California. She was previously ' + 'the district attorney for San Francisco.', + 'type': EpisodeType.text, + 'description': 'podcast transcript', + }, + { + 'content': 'As AG, Harris was in office from January 3, 2011 – January 3, 2017', + 'type': EpisodeType.text, + 'description': 'podcast transcript', + }, + { + 'content': { + 'name': 'Gavin Newsom', + 'position': 'Governor', + 'state': 'California', + 'previous_role': 'Lieutenant Governor', + 'previous_location': 'San Francisco', + }, + 'type': EpisodeType.json, + 'description': 'podcast metadata', + }, + { + 'content': { + 'name': 'Gavin Newsom', + 'position': 'Governor', + 'term_start': 'January 7, 2019', + 'term_end': 'Present', + }, + 'type': EpisodeType.json, + 'description': 'podcast metadata', + }, + ] + + # Add episodes to the graph + for i, episode in enumerate(episodes): + await graphiti.add_episode( + name=f'Freakonomics Radio {i}', + episode_body=episode['content'] + if isinstance(episode['content'], str) + else json.dumps(episode['content']), + source=episode['type'], + source_description=episode['description'], + reference_time=datetime.now(timezone.utc), + ) + print(f'Added episode: Freakonomics Radio {i} ({episode["type"].value})') + + await graphiti.build_communities() + + ################################################# + # BASIC SEARCH + ################################################# + # The simplest way to retrieve relationships (edges) + # from Graphiti is using the search method, which + # performs a hybrid search combining semantic + # similarity and BM25 text retrieval. + ################################################# + + # Perform a hybrid search combining semantic similarity and BM25 retrieval + print("\nSearching for: 'Who was the California Attorney General?'") + results = await graphiti.search('Who was the California Attorney General?') + + # Print search results + print('\nSearch Results:') + for result in results: + print(f'UUID: {result.uuid}') + print(f'Fact: {result.fact}') + if hasattr(result, 'valid_at') and result.valid_at: + print(f'Valid from: {result.valid_at}') + if hasattr(result, 'invalid_at') and result.invalid_at: + print(f'Valid until: {result.invalid_at}') + print('---') + + ################################################# + # CENTER NODE SEARCH + ################################################# + # For more contextually relevant results, you can + # use a center node to rerank search results based + # on their graph distance to a specific node + ################################################# + + # Use the top search result's UUID as the center node for reranking + if results and len(results) > 0: + # Get the source node UUID from the top result + center_node_uuid = results[0].source_node_uuid + + print('\nReranking search results based on graph distance:') + print(f'Using center node UUID: {center_node_uuid}') + + reranked_results = await graphiti.search( + 'Who was the California Attorney General?', center_node_uuid=center_node_uuid + ) + + # Print reranked search results + print('\nReranked Search Results:') + for result in reranked_results: + print(f'UUID: {result.uuid}') + print(f'Fact: {result.fact}') + if hasattr(result, 'valid_at') and result.valid_at: + print(f'Valid from: {result.valid_at}') + if hasattr(result, 'invalid_at') and result.invalid_at: + print(f'Valid until: {result.invalid_at}') + print('---') + else: + print('No results found in the initial search to use as center node.') + + ################################################# + # NODE SEARCH USING SEARCH RECIPES + ################################################# + # Graphiti provides predefined search recipes + # optimized for different search scenarios. + # Here we use NODE_HYBRID_SEARCH_RRF for retrieving + # nodes directly instead of edges. + ################################################# + + # Example: Perform a node search using _search method with standard recipes + print( + '\nPerforming node search using _search method with standard recipe NODE_HYBRID_SEARCH_RRF:' + ) + + # Use a predefined search configuration recipe and modify its limit + node_search_config = NODE_HYBRID_SEARCH_RRF.model_copy(deep=True) + node_search_config.limit = 5 # Limit to 5 results + + # Execute the node search + node_search_results = await graphiti._search( + query='California Governor', + config=node_search_config, + ) + + # Print node search results + print('\nNode Search Results:') + for node in node_search_results.nodes: + print(f'Node UUID: {node.uuid}') + print(f'Node Name: {node.name}') + node_summary = node.summary[:100] + '...' if len(node.summary) > 100 else node.summary + print(f'Content Summary: {node_summary}') + print(f'Node Labels: {", ".join(node.labels)}') + print(f'Created At: {node.created_at}') + if hasattr(node, 'attributes') and node.attributes: + print('Attributes:') + for key, value in node.attributes.items(): + print(f' {key}: {value}') + print('---') + + finally: + ################################################# + # CLEANUP + ################################################# + # Always close the connection to Neptune when + # finished to properly release resources + ################################################# + + # Close the connection + await graphiti.close() + print('\nConnection closed') + + +if __name__ == '__main__': + asyncio.run(main()) + \ No newline at end of file diff --git a/graphiti_core/utils/maintenance/graph_data_operations.py b/graphiti_core/utils/maintenance/graph_data_operations.py index 03950dd5..8de54d8e 100644 --- a/graphiti_core/utils/maintenance/graph_data_operations.py +++ b/graphiti_core/utils/maintenance/graph_data_operations.py @@ -35,7 +35,8 @@ logger = logging.getLogger(__name__) async def build_indices_and_constraints(driver: GraphDriver, delete_existing: bool = False): if driver.provider == GraphProvider.NEPTUNE: - return # Neptune does not need indexes built + await driver.create_aoss_indices() # pyright: ignore[reportAttributeAccessIssue] + return if delete_existing: records, _, _ = await driver.execute_query( """