graphiti/graphiti_core/driver/neo4j_driver.py
Lars Varming a74bdf8321 Fix: Add Neo4j connection pool configuration parameters
Added configurable connection pool parameters to Neo4jDriver to fix
"network abort" and "Response write failure" errors caused by stale
connections.

Changes:
- Added max_connection_pool_size parameter (default: 200)
- Added connection_timeout parameter (default: 60s)
- Added max_connection_lifetime parameter (default: 7200s = 2hr)
- Added liveness_check_timeout parameter (default: 60s) - CRITICAL FIX
- Added connection_acquisition_timeout parameter (default: 120s)

The liveness_check_timeout=60s enables connection validation after 60s
of idle time, preventing reuse of stale connections that were closed by
the Neo4j server.

Version: graphiti-core-varming 0.23.2

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-14 14:31:47 +01:00

145 lines
5.4 KiB
Python

"""
Copyright 2024, Zep Software, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import logging
from collections.abc import Coroutine
from typing import Any
from neo4j import AsyncGraphDatabase, EagerResult
from typing_extensions import LiteralString
from graphiti_core.driver.driver import GraphDriver, GraphDriverSession, GraphProvider
from graphiti_core.graph_queries import get_fulltext_indices, get_range_indices
from graphiti_core.helpers import semaphore_gather
logger = logging.getLogger(__name__)
class Neo4jDriver(GraphDriver):
provider = GraphProvider.NEO4J
default_group_id: str = ''
def __init__(
self,
uri: str,
user: str | None,
password: str | None,
database: str = 'neo4j',
max_connection_pool_size: int = 200,
connection_timeout: float = 60.0,
max_connection_lifetime: float = 7200.0,
liveness_check_timeout: float = 60.0,
connection_acquisition_timeout: float = 120.0,
):
"""Initialize Neo4j driver with connection pool configuration.
Args:
uri: Neo4j connection URI (bolt://, neo4j://, etc.)
user: Username for authentication
password: Password for authentication
database: Database name (default: 'neo4j')
max_connection_pool_size: Max connections per host (default: 200)
connection_timeout: Timeout for TCP connection (default: 60s)
max_connection_lifetime: Max time connection is kept (default: 7200s = 2hr)
liveness_check_timeout: Idle time before connection check (default: 60s)
connection_acquisition_timeout: Timeout to acquire connection (default: 120s)
"""
super().__init__()
self.client = AsyncGraphDatabase.driver(
uri=uri,
auth=(user or '', password or ''),
max_connection_pool_size=max_connection_pool_size,
connection_timeout=connection_timeout,
max_connection_lifetime=max_connection_lifetime,
liveness_check_timeout=liveness_check_timeout,
connection_acquisition_timeout=connection_acquisition_timeout,
)
self._database = database
# Schedule the indices and constraints to be built
import asyncio
try:
# Try to get the current event loop
loop = asyncio.get_running_loop()
# Schedule the build_indices_and_constraints to run
loop.create_task(self.build_indices_and_constraints())
except RuntimeError:
# No event loop running, this will be handled later
pass
self.aoss_client = None
async def execute_query(self, cypher_query_: LiteralString, **kwargs: Any) -> EagerResult:
# Extract query parameters from kwargs
# Support both 'params' (legacy) and 'parameters_' (standard) keys
params = kwargs.pop('params', None) or kwargs.pop('parameters_', None)
if params is None:
params = {}
# CRITICAL FIX: database_ must be a keyword argument to Neo4j driver's execute_query,
# NOT a query parameter in the parameters dict.
# Previous code incorrectly added it to params dict, causing all queries to go to
# the default 'neo4j' database instead of the configured database.
kwargs.setdefault('database_', self._database)
try:
result = await self.client.execute_query(cypher_query_, parameters_=params, **kwargs)
except Exception as e:
logger.error(f'Error executing Neo4j query: {e}\n{cypher_query_}\n{params}')
raise
return result
def session(self, database: str | None = None) -> GraphDriverSession:
_database = database or self._database
return self.client.session(database=_database) # type: ignore
async def close(self) -> None:
return await self.client.close()
def delete_all_indexes(self) -> Coroutine:
return self.client.execute_query(
'CALL db.indexes() YIELD name DROP INDEX name',
)
async def build_indices_and_constraints(self, delete_existing: bool = False):
if delete_existing:
await self.delete_all_indexes()
range_indices: list[LiteralString] = get_range_indices(self.provider)
fulltext_indices: list[LiteralString] = get_fulltext_indices(self.provider)
index_queries: list[LiteralString] = range_indices + fulltext_indices
await semaphore_gather(
*[
self.execute_query(
query,
)
for query in index_queries
]
)
async def health_check(self) -> None:
"""Check Neo4j connectivity by running the driver's verify_connectivity method."""
try:
await self.client.verify_connectivity()
return None
except Exception as e:
print(f'Neo4j health check failed: {e}')
raise