diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml index 98eba069..44052d66 100644 --- a/.github/workflows/cla.yml +++ b/.github/workflows/cla.yml @@ -26,7 +26,7 @@ jobs: path-to-document: "https://github.com/getzep/graphiti/blob/main/Zep-CLA.md" # e.g. a CLA or a DCO document # branch should not be protected branch: "main" - allowlist: paul-paliychuk,prasmussen15,danielchalef,dependabot[bot],ellipsis-dev[bot] + allowlist: paul-paliychuk,prasmussen15,danielchalef,dependabot[bot],ellipsis-dev # the followings are the optional inputs - If the optional inputs are not given, then default values will be taken #remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..278de4dc --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,38 @@ +name: Release to PyPI + +on: + push: + tags: ["v*.*.*"] + +env: + POETRY_VERSION: "1.8.3" + +jobs: + release: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: write + environment: + name: release + url: https://pypi.org/p/zep-cloud + steps: + - uses: actions/checkout@v4 + - name: Install poetry + run: pipx install poetry==$POETRY_VERSION + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + - name: Compare pyproject version with tag + run: | + TAG_VERSION=${GITHUB_REF#refs/tags/} + if [ "$TAG_VERSION" != "v$(poetry version --short)" ]; then + echo "Tag version $TAG_VERSION does not match the project version $(poetry version --short)" + exit 1 + fi + - name: Build project for distribution + run: poetry build + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/README.md b/README.md index 1468647d..d6c697e4 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,32 @@ -# graphiti: Temporal Knowledge Graphs for Agentic Applications +
+# graphiti + +## Temporal Knowledge Graphs for Agentic Applications + +
+ +[![Discord](https://dcbadge.vercel.app/api/server/W8Kw6bsgXQ?style=flat)](https://discord.com/invite/W8Kw6bsgXQ) [![Lint](https://github.com/getzep/graphiti/actions/workflows/lint.yml/badge.svg)](https://github.com/getzep/graphiti/actions/workflows/lint.yml) [![Unit Tests](https://github.com/getzep/graphiti/actions/workflows/unit_tests.yml/badge.svg)](https://github.com/getzep/graphiti/actions/workflows/unit_tests.yml) [![MyPy Check](https://github.com/getzep/graphiti/actions/workflows/typecheck.yml/badge.svg)](https://github.com/getzep/graphiti/actions/workflows/typecheck.yml) [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/getzep/graphiti) -graphiti builds dynamic, temporally-aware knowledge graphs that represent complex, evolving relationships between entities over time. graphiti ingests both unstructured and structured data and the resulting graph may be queried using a fusion of time, full-text, semantic, and graph algorithm approaches. +
-TODO: Add video here. +
+graphiti builds dynamic, temporally aware knowledge graphs that represent complex, evolving relationships between entities over time. graphiti ingests both unstructured and structured data and the resulting graph may be queried using a fusion of time, full-text, semantic, and graph algorithm approaches. + +
+ + +

+ graphiti demo slides +

+ +
+ With graphiti, you can build LLM applications such as: - Assistants that learn from user interactions, fusing personal knowledge with dynamic data from business systems like CRMs and billing platforms. @@ -27,12 +45,15 @@ graphiti is designed for dynamic data and agentic use: - **Fast**: Search results in < 100ms, with latency primarily determined by the 3rd-party embedding API call. - **Schema Consistency**: Maintains a coherent graph structure by reusing existing schema, preventing unnecessary proliferation of node and edge types. + ## graphiti and Zep Memory -graphiti powers the core of Zep's next-generation memory layer for LLM-powered Assistants and Agents. +graphiti powers the core of [Zep's memory layer](https://www.getzep.com) for LLM-powered Assistants and Agents. We're excited to open-source graphiti, believing its potential reaches far beyond memory applications. + + ## Installation Requirements: @@ -48,16 +69,22 @@ Optional: > [!TIP] > The simplest way to install Neo4j is via [Neo4j Desktop](https://neo4j.com/download/). It provides a user-friendly interface to manage Neo4j instances and databases. -`pip install graphiti-core` +```bash +pip install graphiti-core +``` or -`poetry add graphiti-core` +```bash +poetry add graphiti-core +``` + + ## Quick Start > [!IMPORTANT] -> Zep uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment. Support for Anthropic and Groq LLM inferences is available, too. +> graphiti uses OpenAI for LLM inference and embedding. Ensure that an `OPENAI_API_KEY` is set in your environment. Support for Anthropic and Groq LLM inferences is available, too. ```python from graphiti_core import Graphiti @@ -67,6 +94,9 @@ from datetime import datetime # Initialize Graphiti graphiti = Graphiti("bolt://localhost:7687", "neo4j", "password") +# Initialize the graph database with graphiti's indices. This only needs to be done once. +graphiti.build_indices_and_constraints() + # Add episodes episodes = [ "Kamala Harris is the Attorney General of California. She was previously " @@ -118,223 +148,27 @@ await client.search('Who was the California Attorney General?', center_node_uuid graphiti.close() ``` + + ## Documentation -### Adding Episodes +Visit the Zep knowledge base for graphiti [Guides and API documentation](https://help.getzep.com/graphiti/graphiti). -Episodes represent a single data ingestion event. An `episode` is itself a node, and any nodes identified while ingesting the -episode are related to the episode via `MENTIONS` edges. - -Episodes enable querying for information at a point in time and understanding the provenance of nodes and their edge relationships. - -Supported episode types: - -- `text`: Unstructured text data -- `message`: Conversational messages of the format `speaker: message...` -- `json`: Structured data, processed distinctly from the other types - -The graph below was generated using the code in the [Quick Start](#quick-start). Each "podcast" is an individual episode. - -![Simple Graph Visualization](images/simple_graph.svg) - -#### Adding a `text` or `message` Episode - -Using the `EpisodeType.text` type: - -```python -await graphiti.add_episode( - name="tech_innovation_article", - episode_body=( - "MIT researchers have unveiled 'ClimateNet', an AI system capable of predicting " - "climate patterns with unprecedented accuracy. Early tests show it can forecast " - "major weather events up to three weeks in advance, potentially revolutionizing " - "disaster preparedness and agricultural planning." - ), - source=EpisodeType.text, - # A description of the source (e.g., "podcast", "news article") - source_description="Technology magazine article", - # The timestamp for when this episode occurred or was created - reference_time=datetime(2023, 11, 15, 9, 30), - # Additional metadata about the episode (optional) - metadata={ - "author": "Zara Patel", - "publication": "Tech Horizons Monthly", - "word_count": 39 - } -) -``` - -Using the `EpisodeType.message` type supports passing in multi-turn conversations in the `episode_body`. - -The text should be structured in `{role/name}: {message}` pairs. - -```python -await graphiti.add_episode( - name="Customer_Support_Interaction_1", - episode_body=( - "Customer: Hi, I'm having trouble with my Allbirds shoes. " - "The sole is coming off after only 2 months of use.\n" - "Support: I'm sorry to hear that. Can you please provide your order number?" - ), - source=EpisodeType.message, - source_description="Customer support chat", - reference_time=datetime(2024, 3, 15, 14, 45), - metadata={ - "channel": "Live Chat", - "agent_id": "SP001", - "customer_id": "C12345" - } -) -``` - -#### Adding an Epsiode using structured data in JSON format - -JSON documents can be arbitrarily nested. However, it's advisable to keep documents compact, as they must fit within your LLM's context window. - -> [!TIP] -> For large data imports, consider using the `add_episode_bulk` API to efficiently add multiple episodes at once. - -```python -product_data = { - "id": "PROD001", - "name": "Men's SuperLight Wool Runners", - "color": "Dark Grey", - "sole_color": "Medium Grey", - "material": "Wool", - "technology": "SuperLight Foam", - "price": 125.00, - "in_stock": True, - "last_updated": "2024-03-15T10:30:00Z" -} - -# Add the episode to the graph -await graphiti.add_episode( - name="Product Update - PROD001", - episode_body=product_data, # Pass the Python dictionary directly - source=EpisodeType.json, - source_description="Allbirds product catalog update", - reference_time=datetime.now(), - metadata={ - "update_type": "product_info", - "catalog_version": "v2.3" - } -) -``` - -#### Loading Episodes in Bulk - -Graphiti offers `add_episode_bulk` for efficient batch ingestion of episodes, significantly outperforming `add_episode` for large datasets. This method is highly recommended for bulk loading. - -> [!WARNING] -> Use `add_episode_bulk` only for populating empty graphs or when edge invalidation is not required. The bulk ingestion pipeline does not perform edge invalidation operations. - -```python -product_data = [ - { - "id": "PROD001", - "name": "Men's SuperLight Wool Runners", - "color": "Dark Grey", - "sole_color": "Medium Grey", - "material": "Wool", - "technology": "SuperLight Foam", - "price": 125.00, - "in_stock": true, - "last_updated": "2024-03-15T10:30:00Z" - }, - ... - { - "id": "PROD0100", - "name": "Kids Wool Runner-up Mizzles", - "color": "Natural Grey", - "sole_color": "Orange", - "material": "Wool", - "technology": "Water-repellent", - "price": 80.00, - "in_stock": true, - "last_updated": "2024-03-17T14:45:00Z" - } -] - -# Prepare the episodes for bulk loading -bulk_episodes = [ - RawEpisode( - name=f"Product Update - {product['id']}", - content=json.dumps(product), - source=EpisodeType.json, - source_description="Allbirds product catalog update", - reference_time=datetime.now() - ) - for product in product_data -] - -await graphiti.add_episode_bulk(bulk_episodes) -``` - -### Searching graphiti's graph - -The examples below demonstrate two search approaches in the graphiti library: - -1. **Hybrid Search:** - - ```python - await graphiti.search(query) - ``` - - Combines semantic similarity and BM25 retrieval, reranked using Reciprocal Rank Fusion. - - Example: Does a broad retrieval of facts related to Allbirds Wool Runners and Jane's purchase. - -2. **Node Distance Reranking:** - - ```python - await client.search(query, focal_node_uuid) - ``` - - Extends Hybrid Search above by prioritizing results based on proximity to a specified node in the graph. - - Example: Focuses on Jane-specific information, highlighting her wool allergy. - -Node Distance Reranking is particularly useful for entity-specific queries, providing more contextually relevant results. It weights facts by their closeness to the focal node, emphasizing information directly related to the entity of interest. - -This dual approach allows for both broad exploration and targeted, entity-specific information retrieval from the knowledge graph. - -```python -query = "Can Jane wear Allbirds Wool Runners?" -jane_node_uuid = "123e4567-e89b-12d3-a456-426614174000" - -def print_facts(edges): - print("\n".join([edge.fact for edge in edges])) - -# Hybrid Search -results = await graphiti.search(query) -print_facts(results) - -> The Allbirds Wool Runners are sold by Allbirds. -> Men's SuperLight Wool Runners - Dark Grey (Medium Grey Sole) has a runner silhouette. -> Jane purchased SuperLight Wool Runners. - -# Hybrid Search with Node Distance Reranking -await client.search(query, jane_node_uuid) -print_facts(results) - -> Jane purchased SuperLight Wool Runners. -> Jane is allergic to wool. -> The Allbirds Wool Runners are sold by Allbirds. -``` ## Status and Roadmap -wip, but endavour to not break API. +graphiti is under active development. We aim to maintain API stability while working on: -graphiti is under active development. Areas we're actively working on: +- [X] Implementing node and edge CRUD operations +- [ ] Improving performance and scalability +- [ ] Achieving good performance with different LLM and embedding models +- [ ] Creating a dedicated embedder interface +- [ ] Supporting custom graph schemas: + - Allow developers to provide their own defined node and edge classes when ingesting episodes + - Enable more flexible knowledge representation tailored to specific use cases +- [ ] Enhancing retrieval capabilities with more robust and configurable options +- [ ] Expanding test coverage to ensure reliability and catch edge cases -- performance and scalability -- search improvements -- node CRUD - -TODO: expand on the above. Include ROADMAP - -Latency scales sublinearly with graph size, with a cap ## Contributing diff --git a/graphiti_core/edges.py b/graphiti_core/edges.py index b269fad9..645a3b32 100644 --- a/graphiti_core/edges.py +++ b/graphiti_core/edges.py @@ -23,6 +23,7 @@ from uuid import uuid4 from neo4j import AsyncDriver from pydantic import BaseModel, Field +from graphiti_core.helpers import parse_db_date from graphiti_core.llm_client.config import EMBEDDING_DIM from graphiti_core.nodes import Node @@ -38,6 +39,9 @@ class Edge(BaseModel, ABC): @abstractmethod async def save(self, driver: AsyncDriver): ... + @abstractmethod + async def delete(self, driver: AsyncDriver): ... + def __hash__(self): return hash(self.uuid) @@ -46,6 +50,9 @@ class Edge(BaseModel, ABC): return self.uuid == other.uuid return False + @classmethod + async def get_by_uuid(cls, driver: AsyncDriver, uuid: str): ... + class EpisodicEdge(Edge): async def save(self, driver: AsyncDriver): @@ -66,9 +73,48 @@ class EpisodicEdge(Edge): return result + async def delete(self, driver: AsyncDriver): + result = await driver.execute_query( + """ + MATCH (n:Episodic)-[e:MENTIONS {uuid: $uuid}]->(m:Entity) + DELETE e + """, + uuid=self.uuid, + ) -# TODO: Neo4j doesn't support variables for edge types and labels. -# Right now we have all edge nodes as type RELATES_TO + logger.info(f'Deleted Edge: {self.uuid}') + + return result + + @classmethod + async def get_by_uuid(cls, driver: AsyncDriver, uuid: str): + records, _, _ = await driver.execute_query( + """ + MATCH (n:Episodic)-[e:MENTIONS {uuid: $uuid}]->(m:Entity) + RETURN + e.uuid As uuid, + n.uuid AS source_node_uuid, + m.uuid AS target_node_uuid, + e.created_at AS created_at + """, + uuid=uuid, + ) + + edges: list[EpisodicEdge] = [] + + for record in records: + edges.append( + EpisodicEdge( + uuid=record['uuid'], + source_node_uuid=record['source_node_uuid'], + target_node_uuid=record['target_node_uuid'], + created_at=record['created_at'].to_native(), + ) + ) + + logger.info(f'Found Edge: {uuid}') + + return edges[0] class EntityEdge(Edge): @@ -97,7 +143,7 @@ class EntityEdge(Edge): self.fact_embedding = embedding[:EMBEDDING_DIM] end = time() - logger.info(f'embedded {text} in {end-start} ms') + logger.info(f'embedded {text} in {end - start} ms') return embedding @@ -127,3 +173,60 @@ class EntityEdge(Edge): logger.info(f'Saved edge to neo4j: {self.uuid}') return result + + async def delete(self, driver: AsyncDriver): + result = await driver.execute_query( + """ + MATCH (n:Entity)-[e:RELATES_TO {uuid: $uuid}]->(m:Entity) + DELETE e + """, + uuid=self.uuid, + ) + + logger.info(f'Deleted Edge: {self.uuid}') + + return result + + @classmethod + async def get_by_uuid(cls, driver: AsyncDriver, uuid: str): + records, _, _ = await driver.execute_query( + """ + MATCH (n:Entity)-[e:RELATES_TO {uuid: $uuid}]->(m:Entity) + RETURN + e.uuid AS uuid, + n.uuid AS source_node_uuid, + m.uuid AS target_node_uuid, + e.created_at AS created_at, + e.name AS name, + e.fact AS fact, + e.fact_embedding AS fact_embedding, + e.episodes AS episodes, + e.expired_at AS expired_at, + e.valid_at AS valid_at, + e.invalid_at AS invalid_at + """, + uuid=uuid, + ) + + edges: list[EntityEdge] = [] + + for record in records: + edges.append( + EntityEdge( + uuid=record['uuid'], + source_node_uuid=record['source_node_uuid'], + target_node_uuid=record['target_node_uuid'], + fact=record['fact'], + name=record['name'], + episodes=record['episodes'], + fact_embedding=record['fact_embedding'], + created_at=record['created_at'].to_native(), + expired_at=parse_db_date(record['expired_at']), + valid_at=parse_db_date(record['valid_at']), + invalid_at=parse_db_date(record['invalid_at']), + ) + ) + + logger.info(f'Found Edge: {uuid}') + + return edges[0] diff --git a/graphiti_core/helpers.py b/graphiti_core/helpers.py new file mode 100644 index 00000000..6233d274 --- /dev/null +++ b/graphiti_core/helpers.py @@ -0,0 +1,7 @@ +from datetime import datetime + +from neo4j import time as neo4j_time + + +def parse_db_date(neo_date: neo4j_time.DateTime | None) -> datetime | None: + return neo_date.to_native() if neo_date else None diff --git a/graphiti_core/llm_client/__init__.py b/graphiti_core/llm_client/__init__.py index 9c1255b1..24f9a46f 100644 --- a/graphiti_core/llm_client/__init__.py +++ b/graphiti_core/llm_client/__init__.py @@ -1,7 +1,5 @@ -from .anthropic_client import AnthropicClient from .client import LLMClient from .config import LLMConfig -from .groq_client import GroqClient from .openai_client import OpenAIClient -__all__ = ['LLMClient', 'OpenAIClient', 'LLMConfig', 'AnthropicClient', 'GroqClient'] +__all__ = ['LLMClient', 'OpenAIClient', 'LLMConfig'] diff --git a/graphiti_core/nodes.py b/graphiti_core/nodes.py index fe4e0341..c2f7c833 100644 --- a/graphiti_core/nodes.py +++ b/graphiti_core/nodes.py @@ -75,6 +75,9 @@ class Node(BaseModel, ABC): @abstractmethod async def save(self, driver: AsyncDriver): ... + @abstractmethod + async def delete(self, driver: AsyncDriver): ... + def __hash__(self): return hash(self.uuid) @@ -83,6 +86,9 @@ class Node(BaseModel, ABC): return self.uuid == other.uuid return False + @classmethod + async def get_by_uuid(cls, driver: AsyncDriver, uuid: str): ... + class EpisodicNode(Node): source: EpisodeType = Field(description='source type') @@ -111,13 +117,58 @@ class EpisodicNode(Node): created_at=self.created_at, valid_at=self.valid_at, source=self.source.value, - _database='neo4j', ) logger.info(f'Saved Node to neo4j: {self.uuid}') return result + async def delete(self, driver: AsyncDriver): + result = await driver.execute_query( + """ + MATCH (n:Episodic {uuid: $uuid}) + DETACH DELETE n + """, + uuid=self.uuid, + ) + + logger.info(f'Deleted Node: {self.uuid}') + + return result + + @classmethod + async def get_by_uuid(cls, driver: AsyncDriver, uuid: str): + records, _, _ = await driver.execute_query( + """ + MATCH (e:Episodic {uuid: $uuid}) + RETURN e.content as content, + e.created_at as created_at, + e.valid_at as valid_at, + e.uuid as uuid, + e.name as name, + e.source_description as source_description, + e.source as source + """, + uuid=uuid, + ) + + episodes = [ + EpisodicNode( + content=record['content'], + created_at=record['created_at'].to_native().timestamp(), + valid_at=(record['valid_at'].to_native()), + uuid=record['uuid'], + source=EpisodeType.from_str(record['source']), + name=record['name'], + source_description=record['source_description'], + ) + for record in records + ] + + logger.info(f'Found Node: {uuid}') + + return episodes[0] + class EntityNode(Node): name_embedding: list[float] | None = Field(default=None, description='embedding of the name') @@ -153,3 +204,47 @@ class EntityNode(Node): logger.info(f'Saved Node to neo4j: {self.uuid}') return result + + async def delete(self, driver: AsyncDriver): + result = await driver.execute_query( + """ + MATCH (n:Entity {uuid: $uuid}) + DETACH DELETE n + """, + uuid=self.uuid, + ) + + logger.info(f'Deleted Node: {self.uuid}') + + return result + + @classmethod + async def get_by_uuid(cls, driver: AsyncDriver, uuid: str): + records, _, _ = await driver.execute_query( + """ + MATCH (n:Entity {uuid: $uuid}) + RETURN + n.uuid As uuid, + n.name AS name, + n.created_at AS created_at, + n.summary AS summary + """, + uuid=uuid, + ) + + nodes: list[EntityNode] = [] + + for record in records: + nodes.append( + EntityNode( + uuid=record['uuid'], + name=record['name'], + labels=['Entity'], + created_at=record['created_at'].to_native(), + summary=record['summary'], + ) + ) + + logger.info(f'Found Node: {uuid}') + + return nodes[0] diff --git a/graphiti_core/search/search.py b/graphiti_core/search/search.py index 03111225..03ed04a4 100644 --- a/graphiti_core/search/search.py +++ b/graphiti_core/search/search.py @@ -20,7 +20,7 @@ from enum import Enum from time import time from neo4j import AsyncDriver -from pydantic import BaseModel +from pydantic import BaseModel, Field from graphiti_core.edges import EntityEdge from graphiti_core.llm_client.config import EMBEDDING_DIM @@ -49,8 +49,8 @@ class Reranker(Enum): class SearchConfig(BaseModel): - num_edges: int = 10 - num_nodes: int = 10 + num_edges: int = Field(default=10) + num_nodes: int = Field(default=10) num_episodes: int = EPISODE_WINDOW_LEN search_methods: list[SearchMethod] reranker: Reranker | None @@ -63,12 +63,12 @@ class SearchResults(BaseModel): async def hybrid_search( - driver: AsyncDriver, - embedder, - query: str, - timestamp: datetime, - config: SearchConfig, - center_node_uuid: str | None = None, + driver: AsyncDriver, + embedder, + query: str, + timestamp: datetime, + config: SearchConfig, + center_node_uuid: str | None = None, ) -> SearchResults: start = time() @@ -79,11 +79,11 @@ async def hybrid_search( search_results = [] if config.num_episodes > 0: - episodes.extend(await retrieve_episodes(driver, timestamp)) + episodes.extend(await retrieve_episodes(driver, timestamp, config.num_episodes)) nodes.extend(await get_mentioned_nodes(driver, episodes)) if SearchMethod.bm25 in config.search_methods: - text_search = await edge_fulltext_search(query, driver) + text_search = await edge_fulltext_search(query, driver, 2 * config.num_edges) search_results.append(text_search) if SearchMethod.cosine_similarity in config.search_methods: @@ -94,7 +94,9 @@ async def hybrid_search( .embedding[:EMBEDDING_DIM] ) - similarity_search = await edge_similarity_search(search_vector, driver) + similarity_search = await edge_similarity_search( + search_vector, driver, 2 * config.num_edges + ) search_results.append(similarity_search) if len(search_results) > 1 and config.reranker is None: diff --git a/graphiti_core/search/search_utils.py b/graphiti_core/search/search_utils.py index e4789fef..6eeb5cea 100644 --- a/graphiti_core/search/search_utils.py +++ b/graphiti_core/search/search_utils.py @@ -3,13 +3,12 @@ import logging import re import typing from collections import defaultdict -from datetime import datetime from time import time from neo4j import AsyncDriver -from neo4j import time as neo4j_time from graphiti_core.edges import EntityEdge +from graphiti_core.helpers import parse_db_date from graphiti_core.nodes import EntityNode, EpisodicNode logger = logging.getLogger(__name__) @@ -17,10 +16,6 @@ logger = logging.getLogger(__name__) RELEVANT_SCHEMA_LIMIT = 3 -def parse_db_date(neo_date: neo4j_time.DateTime | None) -> datetime | None: - return neo_date.to_native() if neo_date else None - - async def get_mentioned_nodes(driver: AsyncDriver, episodes: list[EpisodicNode]): episode_uuids = [episode.uuid for episode in episodes] records, _, _ = await driver.execute_query( @@ -106,7 +101,7 @@ async def edge_similarity_search( # vector similarity search over embedded facts records, _, _ = await driver.execute_query( """ - CALL db.index.vector.queryRelationships("fact_embedding", 5, $search_vector) + CALL db.index.vector.queryRelationships("fact_embedding", $limit, $search_vector) YIELD relationship AS r, score MATCH (n)-[r:RELATES_TO]->(m) RETURN @@ -121,7 +116,7 @@ async def edge_similarity_search( r.expired_at AS expired_at, r.valid_at AS valid_at, r.invalid_at AS invalid_at - ORDER BY score DESC LIMIT $limit + ORDER BY score DESC """, search_vector=search_vector, limit=limit, @@ -316,8 +311,11 @@ async def hybrid_node_search( relevant_node_uuids = set() results = await asyncio.gather( - *[entity_fulltext_search(q, driver, limit or RELEVANT_SCHEMA_LIMIT) for q in queries], - *[entity_similarity_search(e, driver, limit or RELEVANT_SCHEMA_LIMIT) for e in embeddings], + *[entity_fulltext_search(q, driver, 2 * (limit or RELEVANT_SCHEMA_LIMIT)) for q in queries], + *[ + entity_similarity_search(e, driver, 2 * (limit or RELEVANT_SCHEMA_LIMIT)) + for e in embeddings + ], ) for result in results: diff --git a/images/graphiti-intro-slides-stock-2.gif b/images/graphiti-intro-slides-stock-2.gif new file mode 100644 index 00000000..28a2a694 Binary files /dev/null and b/images/graphiti-intro-slides-stock-2.gif differ diff --git a/poetry.lock b/poetry.lock index 22653da0..902d58a0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "annotated-types" @@ -1804,7 +1804,6 @@ description = "Nvidia JIT LTO Library" optional = false python-versions = ">=3" files = [ - {file = "nvidia_nvjitlink_cu12-12.6.20-py3-none-manylinux2014_aarch64.whl", hash = "sha256:84fb38465a5bc7c70cbc320cfd0963eb302ee25a5e939e9f512bbba55b6072fb"}, {file = "nvidia_nvjitlink_cu12-12.6.20-py3-none-manylinux2014_x86_64.whl", hash = "sha256:562ab97ea2c23164823b2a89cb328d01d45cb99634b8c65fe7cd60d14562bd79"}, {file = "nvidia_nvjitlink_cu12-12.6.20-py3-none-win_amd64.whl", hash = "sha256:ed3c43a17f37b0c922a919203d2d36cbef24d41cc3e6b625182f8b58203644f6"}, ] @@ -3758,4 +3757,4 @@ test = ["websockets"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "001663dfc8078ad473675c994b15191db1f53a844e23f40ffa4a704379a61132" +content-hash = "425298614e73ee8b1333343bdf27e876849d56b98671d1ad7b30ca6c6584d203" diff --git a/pyproject.toml b/pyproject.toml index 956e48d4..dd741e02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,11 @@ [tool.poetry] name = "graphiti-core" -version = "0.0.1" +version = "0.1.0" description = "A temporal graph building library" authors = [ "Paul Paliychuk ", "Preston Rasmussen ", + "Daniel Chalef ", ] readme = "README.md" license = "Apache-2.0" @@ -20,9 +21,7 @@ fastapi = "^0.112.0" neo4j = "^5.23.0" sentence-transformers = "^3.0.1" diskcache = "^5.6.3" -arrow = "^1.3.0" openai = "^1.38.0" -anthropic = "^0.34.1" tenacity = "^9.0.0" [tool.poetry.dev-dependencies] @@ -36,6 +35,7 @@ ruff = "^0.6.2" pydantic = "^2.8.2" mypy = "^1.11.1" groq = "^0.9.0" +anthropic = "^0.34.1" ipykernel = "^6.29.5" jupyterlab = "^4.2.4" diskcache-stubs = "^5.6.3.6.20240818" diff --git a/tests/test_graphiti_int.py b/tests/test_graphiti_int.py index f1b48842..68c54a06 100644 --- a/tests/test_graphiti_int.py +++ b/tests/test_graphiti_int.py @@ -22,8 +22,6 @@ from datetime import datetime import pytest from dotenv import load_dotenv -from neo4j import AsyncGraphDatabase -from openai import OpenAI from graphiti_core.edges import EntityEdge, EpisodicEdge from graphiti_core.graphiti import Graphiti @@ -74,7 +72,7 @@ def format_context(facts): @pytest.mark.asyncio async def test_graphiti_init(): logger = setup_logging() - graphiti = Graphiti(NEO4J_URI, NEO4j_USER, NEO4j_PASSWORD, None) + graphiti = Graphiti(NEO4J_URI, NEO4j_USER, NEO4j_PASSWORD) edges = await graphiti.search('Freakenomics guest') @@ -92,11 +90,9 @@ async def test_graphiti_init(): @pytest.mark.asyncio async def test_graph_integration(): - driver = AsyncGraphDatabase.driver( - NEO4J_URI, - auth=(NEO4j_USER, NEO4j_PASSWORD), - ) - embedder = OpenAI().embeddings + client = Graphiti(NEO4J_URI, NEO4j_USER, NEO4j_PASSWORD) + embedder = client.llm_client.get_embedder() + driver = client.driver now = datetime.now() episode = EpisodicNode( @@ -139,10 +135,21 @@ async def test_graph_integration(): invalid_at=now, ) - entity_edge.generate_embedding(embedder) + await entity_edge.generate_embedding(embedder) nodes = [episode, alice_node, bob_node] edges = [episodic_edge_1, episodic_edge_2, entity_edge] + # test save await asyncio.gather(*[node.save(driver) for node in nodes]) await asyncio.gather(*[edge.save(driver) for edge in edges]) + + # test get + assert await EpisodicNode.get_by_uuid(driver, episode.uuid) is not None + assert await EntityNode.get_by_uuid(driver, alice_node.uuid) is not None + assert await EpisodicEdge.get_by_uuid(driver, episodic_edge_1.uuid) is not None + assert await EntityEdge.get_by_uuid(driver, entity_edge.uuid) is not None + + # test delete + await asyncio.gather(*[node.delete(driver) for node in nodes]) + await asyncio.gather(*[edge.delete(driver) for edge in edges]) diff --git a/tests/utils/search/search_utils_test.py b/tests/utils/search/search_utils_test.py index 01d78aaa..e4760976 100644 --- a/tests/utils/search/search_utils_test.py +++ b/tests/utils/search/search_utils_test.py @@ -113,8 +113,8 @@ async def test_hybrid_node_search_with_limit(): assert mock_fulltext_search.call_count == 1 assert mock_similarity_search.call_count == 1 # Verify that the limit was passed to the search functions - mock_fulltext_search.assert_called_with('Test', mock_driver, 1) - mock_similarity_search.assert_called_with([0.1, 0.2, 0.3], mock_driver, 1) + mock_fulltext_search.assert_called_with('Test', mock_driver, 2) + mock_similarity_search.assert_called_with([0.1, 0.2, 0.3], mock_driver, 2) @pytest.mark.asyncio @@ -148,5 +148,5 @@ async def test_hybrid_node_search_with_limit_and_duplicates(): assert set(node.name for node in results) == {'Alice', 'Bob', 'Charlie'} assert mock_fulltext_search.call_count == 1 assert mock_similarity_search.call_count == 1 - mock_fulltext_search.assert_called_with('Test', mock_driver, 2) - mock_similarity_search.assert_called_with([0.1, 0.2, 0.3], mock_driver, 2) + mock_fulltext_search.assert_called_with('Test', mock_driver, 4) + mock_similarity_search.assert_called_with([0.1, 0.2, 0.3], mock_driver, 4)