diff --git a/examples/azure-openai/.env.example b/examples/azure-openai/.env.example new file mode 100644 index 00000000..d8ed88e7 --- /dev/null +++ b/examples/azure-openai/.env.example @@ -0,0 +1,11 @@ +# Neo4j connection settings +NEO4J_URI=bolt://localhost:7687 +NEO4J_USER=neo4j +NEO4J_PASSWORD=password + +# Azure OpenAI settings +AZURE_OPENAI_ENDPOINT=https://your-resource-name.openai.azure.com +AZURE_OPENAI_API_KEY=your-api-key-here +AZURE_OPENAI_API_VERSION=2024-10-21 +AZURE_OPENAI_DEPLOYMENT=gpt-4 +AZURE_OPENAI_EMBEDDING_DEPLOYMENT=text-embedding-3-small diff --git a/examples/azure-openai/README.md b/examples/azure-openai/README.md new file mode 100644 index 00000000..b356ae5c --- /dev/null +++ b/examples/azure-openai/README.md @@ -0,0 +1,154 @@ +# Azure OpenAI with Neo4j Example + +This example demonstrates how to use Graphiti with Azure OpenAI and Neo4j to build a knowledge graph. + +## Prerequisites + +- Python 3.10+ +- Neo4j database (running locally or remotely) +- Azure OpenAI subscription with deployed models + +## Setup + +### 1. Install Dependencies + +From the project root: + +```bash +uv sync --extra dev +``` + +### 2. Configure Environment Variables + +Copy the `.env.example` file to `.env` and fill in your credentials: + +```bash +cd examples/azure-openai +cp .env.example .env +``` + +Edit `.env` with your actual values: + +```env +# Neo4j connection settings +NEO4J_URI=bolt://localhost:7687 +NEO4J_USER=neo4j +NEO4J_PASSWORD=your-password + +# Azure OpenAI settings +AZURE_OPENAI_ENDPOINT=https://your-resource-name.openai.azure.com +AZURE_OPENAI_API_KEY=your-api-key-here +AZURE_OPENAI_API_VERSION=2024-10-21 +AZURE_OPENAI_DEPLOYMENT=gpt-4 +AZURE_OPENAI_EMBEDDING_DEPLOYMENT=text-embedding-3-small +``` + +### 3. Azure OpenAI Model Deployments + +This example requires two Azure OpenAI model deployments: + +1. **Chat Completion Model**: Used for entity extraction and relationship analysis + - Recommended: GPT-4, GPT-4 Turbo, or GPT-4.1 + - Set the deployment name in `AZURE_OPENAI_DEPLOYMENT` + +2. **Embedding Model**: Used for semantic search + - Recommended: text-embedding-3-small or text-embedding-3-large + - Set the deployment name in `AZURE_OPENAI_EMBEDDING_DEPLOYMENT` + +### 4. Neo4j Setup + +Make sure Neo4j is running and accessible at the URI specified in your `.env` file. + +For local development: +- Download and install [Neo4j Desktop](https://neo4j.com/download/) +- Create a new database +- Start the database +- Use the credentials in your `.env` file + +## Running the Example + +```bash +cd examples/azure-openai +python azure_openai_neo4j.py +``` + +## What This Example Does + +1. **Initialization**: Sets up connections to Neo4j and Azure OpenAI +2. **Adding Episodes**: Ingests text and JSON data about California politics +3. **Basic Search**: Performs hybrid search combining semantic similarity and BM25 retrieval +4. **Center Node Search**: Reranks results based on graph distance to a specific node +5. **Cleanup**: Properly closes database connections + +## Key Concepts + +### Azure OpenAI Integration + +The example shows how to configure Graphiti to use Azure OpenAI: + +```python +# Initialize Azure OpenAI client +azure_client = AsyncAzureOpenAI( + azure_endpoint=azure_endpoint, + api_key=azure_api_key, + api_version=azure_api_version, +) + +# Create LLM and Embedder clients +llm_client = AzureOpenAILLMClient(azure_client=azure_client) +embedder_client = AzureOpenAIEmbedderClient( + azure_client=azure_client, + model=azure_embedding_deployment +) + +# Initialize Graphiti with custom clients +graphiti = Graphiti( + neo4j_uri, + neo4j_user, + neo4j_password, + llm_client=llm_client, + embedder=embedder_client, +) +``` + +### Episodes + +Episodes are the primary units of information in Graphiti. They can be: +- **Text**: Raw text content (e.g., transcripts, documents) +- **JSON**: Structured data with key-value pairs + +### Hybrid Search + +Graphiti combines multiple search strategies: +- **Semantic Search**: Uses embeddings to find semantically similar content +- **BM25**: Keyword-based text retrieval +- **Graph Traversal**: Leverages relationships between entities + +## Troubleshooting + +### Azure OpenAI API Errors + +- Verify your endpoint URL is correct (should end in `.openai.azure.com`) +- Check that your API key is valid +- Ensure your deployment names match actual deployments in Azure +- Verify API version is supported by your deployment + +### Neo4j Connection Issues + +- Ensure Neo4j is running +- Check firewall settings +- Verify credentials are correct +- Check URI format (should be `bolt://` or `neo4j://`) + +## Next Steps + +- Explore other search recipes in `graphiti_core/search/search_config_recipes.py` +- Try different episode types and content +- Experiment with custom entity definitions +- Add more episodes to build a larger knowledge graph + +## Related Examples + +- `examples/quickstart/` - Basic Graphiti usage with OpenAI +- `examples/podcast/` - Processing longer content +- `examples/ecommerce/` - Domain-specific knowledge graphs diff --git a/examples/azure-openai/azure_openai_neo4j.py b/examples/azure-openai/azure_openai_neo4j.py new file mode 100644 index 00000000..775a6dbc --- /dev/null +++ b/examples/azure-openai/azure_openai_neo4j.py @@ -0,0 +1,223 @@ +""" +Copyright 2025, Zep Software, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import asyncio +import json +import logging +import os +from datetime import datetime, timezone +from logging import INFO + +from dotenv import load_dotenv +from openai import AsyncAzureOpenAI + +from graphiti_core import Graphiti +from graphiti_core.embedder.azure_openai import AzureOpenAIEmbedderClient +from graphiti_core.llm_client.azure_openai_client import AzureOpenAILLMClient +from graphiti_core.nodes import EpisodeType + +################################################# +# CONFIGURATION +################################################# +# Set up logging and environment variables for +# connecting to Neo4j database and Azure OpenAI +################################################# + +# Configure logging +logging.basicConfig( + level=INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', +) +logger = logging.getLogger(__name__) + +load_dotenv() + +# Neo4j connection parameters +# Make sure Neo4j Desktop is running with a local DBMS started +neo4j_uri = os.environ.get('NEO4J_URI', 'bolt://localhost:7687') +neo4j_user = os.environ.get('NEO4J_USER', 'neo4j') +neo4j_password = os.environ.get('NEO4J_PASSWORD', 'password') + +if not neo4j_uri or not neo4j_user or not neo4j_password: + raise ValueError('NEO4J_URI, NEO4J_USER, and NEO4J_PASSWORD must be set') + +# Azure OpenAI connection parameters +azure_endpoint = os.environ.get('AZURE_OPENAI_ENDPOINT') +azure_api_key = os.environ.get('AZURE_OPENAI_API_KEY') +azure_api_version = os.environ.get('AZURE_OPENAI_API_VERSION', '2024-10-21') +azure_deployment = os.environ.get('AZURE_OPENAI_DEPLOYMENT', 'gpt-4') +azure_embedding_deployment = os.environ.get( + 'AZURE_OPENAI_EMBEDDING_DEPLOYMENT', 'text-embedding-3-small' +) + +if not azure_endpoint or not azure_api_key: + raise ValueError('AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY must be set') + + +async def main(): + ################################################# + # INITIALIZATION + ################################################# + # Connect to Neo4j and Azure OpenAI, then set up + # Graphiti indices. This is required before using + # other Graphiti functionality + ################################################# + + # Initialize Azure OpenAI client + azure_client = AsyncAzureOpenAI( + azure_endpoint=azure_endpoint, + api_key=azure_api_key, + api_version=azure_api_version, + ) + + # Create LLM and Embedder clients + llm_client = AzureOpenAILLMClient(azure_client=azure_client) + embedder_client = AzureOpenAIEmbedderClient( + azure_client=azure_client, model=azure_embedding_deployment + ) + + # Initialize Graphiti with Neo4j connection and Azure OpenAI clients + graphiti = Graphiti( + neo4j_uri, + neo4j_user, + neo4j_password, + llm_client=llm_client, + embedder=embedder_client, + ) + + try: + ################################################# + # ADDING EPISODES + ################################################# + # Episodes are the primary units of information + # in Graphiti. They can be text or structured JSON + # and are automatically processed to extract entities + # and relationships. + ################################################# + + # Example: Add Episodes + # Episodes list containing both text and JSON episodes + episodes = [ + { + 'content': 'Kamala Harris is the Attorney General of California. She was previously ' + 'the district attorney for San Francisco.', + 'type': EpisodeType.text, + 'description': 'podcast transcript', + }, + { + 'content': 'As AG, Harris was in office from January 3, 2011 – January 3, 2017', + 'type': EpisodeType.text, + 'description': 'podcast transcript', + }, + { + 'content': { + 'name': 'Gavin Newsom', + 'position': 'Governor', + 'state': 'California', + 'previous_role': 'Lieutenant Governor', + 'previous_location': 'San Francisco', + }, + 'type': EpisodeType.json, + 'description': 'podcast metadata', + }, + ] + + # Add episodes to the graph + for i, episode in enumerate(episodes): + await graphiti.add_episode( + name=f'California Politics {i}', + episode_body=episode['content'] + if isinstance(episode['content'], str) + else json.dumps(episode['content']), + source=episode['type'], + source_description=episode['description'], + reference_time=datetime.now(timezone.utc), + ) + print(f'Added episode: California Politics {i} ({episode["type"].value})') + + ################################################# + # BASIC SEARCH + ################################################# + # The simplest way to retrieve relationships (edges) + # from Graphiti is using the search method, which + # performs a hybrid search combining semantic + # similarity and BM25 text retrieval. + ################################################# + + # Perform a hybrid search combining semantic similarity and BM25 retrieval + print("\nSearching for: 'Who was the California Attorney General?'") + results = await graphiti.search('Who was the California Attorney General?') + + # Print search results + print('\nSearch Results:') + for result in results: + print(f'UUID: {result.uuid}') + print(f'Fact: {result.fact}') + if hasattr(result, 'valid_at') and result.valid_at: + print(f'Valid from: {result.valid_at}') + if hasattr(result, 'invalid_at') and result.invalid_at: + print(f'Valid until: {result.invalid_at}') + print('---') + + ################################################# + # CENTER NODE SEARCH + ################################################# + # For more contextually relevant results, you can + # use a center node to rerank search results based + # on their graph distance to a specific node + ################################################# + + # Use the top search result's UUID as the center node for reranking + if results and len(results) > 0: + # Get the source node UUID from the top result + center_node_uuid = results[0].source_node_uuid + + print('\nReranking search results based on graph distance:') + print(f'Using center node UUID: {center_node_uuid}') + + reranked_results = await graphiti.search( + 'Who was the California Attorney General?', center_node_uuid=center_node_uuid + ) + + # Print reranked search results + print('\nReranked Search Results:') + for result in reranked_results: + print(f'UUID: {result.uuid}') + print(f'Fact: {result.fact}') + if hasattr(result, 'valid_at') and result.valid_at: + print(f'Valid from: {result.valid_at}') + if hasattr(result, 'invalid_at') and result.invalid_at: + print(f'Valid until: {result.invalid_at}') + print('---') + else: + print('No results found in the initial search to use as center node.') + + finally: + ################################################# + # CLEANUP + ################################################# + # Always close the connection to Neo4j when + # finished to properly release resources + ################################################# + + # Close the connection + await graphiti.close() + print('\nConnection closed') + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/examples/azure-openai/requirements.txt b/examples/azure-openai/requirements.txt new file mode 100644 index 00000000..3d68e68f --- /dev/null +++ b/examples/azure-openai/requirements.txt @@ -0,0 +1,3 @@ +graphiti-core +python-dotenv +openai