add merge
This commit is contained in:
commit
0402619ed7
38 changed files with 2942 additions and 3713 deletions
2
.github/actions/cognee_setup/action.yml
vendored
2
.github/actions/cognee_setup/action.yml
vendored
|
|
@ -41,4 +41,4 @@ runs:
|
|||
EXTRA_ARGS="$EXTRA_ARGS --extra $extra"
|
||||
done
|
||||
fi
|
||||
uv sync --extra api --extra docs --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j $EXTRA_ARGS
|
||||
uv sync --extra api --extra docs --extra evals --extra codegraph --extra ollama --extra dev --extra neo4j --extra redis $EXTRA_ARGS
|
||||
|
|
|
|||
67
.github/workflows/e2e_tests.yml
vendored
67
.github/workflows/e2e_tests.yml
vendored
|
|
@ -1,4 +1,6 @@
|
|||
name: Reusable Integration Tests
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
|
|
@ -264,3 +266,68 @@ jobs:
|
|||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_edge_ingestion.py
|
||||
|
||||
|
||||
|
||||
run_concurrent_subprocess_access_test:
|
||||
name: Concurrent Subprocess access test
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
services:
|
||||
postgres:
|
||||
image: pgvector/pgvector:pg17
|
||||
env:
|
||||
POSTGRES_USER: cognee
|
||||
POSTGRES_PASSWORD: cognee
|
||||
POSTGRES_DB: cognee_db
|
||||
options: >-
|
||||
--health-cmd pg_isready
|
||||
--health-interval 10s
|
||||
--health-timeout 5s
|
||||
--health-retries 5
|
||||
ports:
|
||||
- 5432:5432
|
||||
|
||||
redis:
|
||||
image: redis:7
|
||||
ports:
|
||||
- 6379:6379
|
||||
options: >-
|
||||
--health-cmd "redis-cli ping"
|
||||
--health-interval 5s
|
||||
--health-timeout 3s
|
||||
--health-retries 5
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
extra-dependencies: "postgres redis"
|
||||
|
||||
- name: Run Concurrent subprocess access test (Kuzu/Lancedb/Postgres)
|
||||
env:
|
||||
ENV: dev
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
GRAPH_DATABASE_PROVIDER: 'kuzu'
|
||||
CACHING: true
|
||||
SHARED_KUZU_LOCK: true
|
||||
DB_PROVIDER: 'postgres'
|
||||
DB_NAME: 'cognee_db'
|
||||
DB_HOST: '127.0.0.1'
|
||||
DB_PORT: 5432
|
||||
DB_USERNAME: cognee
|
||||
DB_PASSWORD: cognee
|
||||
run: uv run python ./cognee/tests/test_concurrent_subprocess_access.py
|
||||
75
.github/workflows/examples_tests.yml
vendored
75
.github/workflows/examples_tests.yml
vendored
|
|
@ -110,6 +110,81 @@ jobs:
|
|||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./examples/python/dynamic_steps_example.py
|
||||
|
||||
test-temporal-example:
|
||||
name: Run Temporal Tests
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Run Temporal Example
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./examples/python/temporal_example.py
|
||||
|
||||
test-ontology-example:
|
||||
name: Run Ontology Tests
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Run Ontology Demo Example
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./examples/python/ontology_demo_example.py
|
||||
|
||||
test-agentic-reasoning:
|
||||
name: Run Agentic Reasoning Tests
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Run Agentic Reasoning Example
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./examples/python/agentic_reasoning_procurement_example.py
|
||||
|
||||
test-memify:
|
||||
name: Run Memify Example
|
||||
runs-on: ubuntu-22.04
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ on:
|
|||
python-versions:
|
||||
required: false
|
||||
type: string
|
||||
default: '["3.10.x", "3.11.x", "3.12.x"]'
|
||||
default: '["3.10.x", "3.12.x", "3.13.x"]'
|
||||
secrets:
|
||||
LLM_PROVIDER:
|
||||
required: true
|
||||
|
|
@ -193,6 +193,13 @@ jobs:
|
|||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Path setup
|
||||
if: ${{ matrix.os }} == 'windows-latest'
|
||||
shell: bash
|
||||
run: |
|
||||
PATH=$(printf '%s' "$PATH" | tr ':' $'\n' | grep -vi '/git/usr/bin' | paste -sd: -)
|
||||
export PATH
|
||||
|
||||
- name: Run Soft Deletion Tests
|
||||
env:
|
||||
ENV: 'dev'
|
||||
|
|
|
|||
2
.github/workflows/test_suites.yml
vendored
2
.github/workflows/test_suites.yml
vendored
|
|
@ -85,7 +85,7 @@ jobs:
|
|||
needs: [basic-tests, e2e-tests]
|
||||
uses: ./.github/workflows/test_different_operating_systems.yml
|
||||
with:
|
||||
python-versions: '["3.10.x", "3.11.x", "3.12.x"]'
|
||||
python-versions: '["3.10.x", "3.11.x", "3.12.x", "3.13.x"]'
|
||||
secrets: inherit
|
||||
|
||||
# Matrix-based vector database tests
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ async def add(
|
|||
extraction_rules: Optional[Dict[str, Any]] = None,
|
||||
tavily_config: Optional[BaseModel] = None,
|
||||
soup_crawler_config: Optional[BaseModel] = None,
|
||||
data_per_batch: Optional[int] = 20,
|
||||
):
|
||||
"""
|
||||
Add data to Cognee for knowledge graph processing.
|
||||
|
|
@ -235,6 +236,7 @@ async def add(
|
|||
vector_db_config=vector_db_config,
|
||||
graph_db_config=graph_db_config,
|
||||
incremental_loading=incremental_loading,
|
||||
data_per_batch=data_per_batch,
|
||||
):
|
||||
pipeline_run_info = run_info
|
||||
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ async def cognify(
|
|||
incremental_loading: bool = True,
|
||||
custom_prompt: Optional[str] = None,
|
||||
temporal_cognify: bool = False,
|
||||
data_per_batch: int = 20,
|
||||
):
|
||||
"""
|
||||
Transform ingested data into a structured knowledge graph.
|
||||
|
|
@ -238,6 +239,7 @@ async def cognify(
|
|||
graph_db_config=graph_db_config,
|
||||
incremental_loading=incremental_loading,
|
||||
pipeline_name="cognify_pipeline",
|
||||
data_per_batch=data_per_batch,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from uuid import UUID
|
||||
from typing import Union, Optional, List, Type
|
||||
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.modules.engine.models.node_set import NodeSet
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
|
||||
|
|
@ -8,6 +9,9 @@ from cognee.modules.users.methods import get_default_user
|
|||
from cognee.modules.search.methods import search as search_function
|
||||
from cognee.modules.data.methods import get_authorized_existing_datasets
|
||||
from cognee.modules.data.exceptions import DatasetNotFoundError
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
async def search(
|
||||
|
|
@ -175,6 +179,13 @@ async def search(
|
|||
if not datasets:
|
||||
raise DatasetNotFoundError(message="No datasets found.")
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
if is_empty:
|
||||
logger.warning("Search attempt on an empty knowledge graph")
|
||||
return []
|
||||
|
||||
filtered_search_results = await search_function(
|
||||
query_text=query_text,
|
||||
query_type=query_type,
|
||||
|
|
|
|||
2
cognee/infrastructure/databases/cache/__init__.py
vendored
Normal file
2
cognee/infrastructure/databases/cache/__init__.py
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
from .get_cache_engine import get_cache_engine
|
||||
from .config import get_cache_config
|
||||
42
cognee/infrastructure/databases/cache/cache_db_interface.py
vendored
Normal file
42
cognee/infrastructure/databases/cache/cache_db_interface.py
vendored
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from contextlib import contextmanager
|
||||
|
||||
|
||||
class CacheDBInterface(ABC):
|
||||
"""
|
||||
Abstract base class for distributed cache coordination systems (e.g., Redis, Memcached).
|
||||
Provides a common interface for lock acquisition, release, and context-managed locking.
|
||||
"""
|
||||
|
||||
def __init__(self, host: str, port: int, lock_key: str):
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.lock_key = lock_key
|
||||
self.lock = None
|
||||
|
||||
@abstractmethod
|
||||
def acquire_lock(self):
|
||||
"""
|
||||
Acquire a lock on the given key.
|
||||
Must be implemented by subclasses.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def release_lock(self):
|
||||
"""
|
||||
Release the lock if it is held.
|
||||
Must be implemented by subclasses.
|
||||
"""
|
||||
pass
|
||||
|
||||
@contextmanager
|
||||
def hold_lock(self):
|
||||
"""
|
||||
Context manager for safely acquiring and releasing the lock.
|
||||
"""
|
||||
self.acquire()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.release()
|
||||
39
cognee/infrastructure/databases/cache/config.py
vendored
Normal file
39
cognee/infrastructure/databases/cache/config.py
vendored
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
from functools import lru_cache
|
||||
|
||||
|
||||
class CacheConfig(BaseSettings):
|
||||
"""
|
||||
Configuration for distributed cache systems (e.g., Redis), used for locking or coordination.
|
||||
|
||||
Attributes:
|
||||
- shared_kuzu_lock: Shared kuzu lock logic on/off.
|
||||
- cache_host: Hostname of the cache service.
|
||||
- cache_port: Port number for the cache service.
|
||||
- agentic_lock_expire: Automatic lock expiration time (in seconds).
|
||||
- agentic_lock_timeout: Maximum time (in seconds) to wait for the lock release.
|
||||
"""
|
||||
|
||||
caching: bool = False
|
||||
shared_kuzu_lock: bool = False
|
||||
cache_host: str = "localhost"
|
||||
cache_port: int = 6379
|
||||
agentic_lock_expire: int = 240
|
||||
agentic_lock_timeout: int = 300
|
||||
|
||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"caching": self.caching,
|
||||
"shared_kuzu_lock": self.shared_kuzu_lock,
|
||||
"cache_host": self.cache_host,
|
||||
"cache_port": self.cache_port,
|
||||
"agentic_lock_expire": self.agentic_lock_expire,
|
||||
"agentic_lock_timeout": self.agentic_lock_timeout,
|
||||
}
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_cache_config():
|
||||
return CacheConfig()
|
||||
59
cognee/infrastructure/databases/cache/get_cache_engine.py
vendored
Normal file
59
cognee/infrastructure/databases/cache/get_cache_engine.py
vendored
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
"""Factory to get the appropriate cache coordination engine (e.g., Redis)."""
|
||||
|
||||
from functools import lru_cache
|
||||
from cognee.infrastructure.databases.cache.config import get_cache_config
|
||||
|
||||
from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
|
||||
|
||||
config = get_cache_config()
|
||||
|
||||
|
||||
@lru_cache
|
||||
def create_cache_engine(
|
||||
cache_host: str,
|
||||
cache_port: int,
|
||||
lock_key: str,
|
||||
agentic_lock_expire: int = 240,
|
||||
agentic_lock_timeout: int = 300,
|
||||
):
|
||||
"""
|
||||
Factory function to instantiate a cache coordination backend (currently Redis).
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
- cache_host: Hostname or IP of the cache server.
|
||||
- cache_port: Port number to connect to.
|
||||
- lock_key: Identifier used for the locking resource.
|
||||
- agentic_lock_expire: Duration to hold the lock after acquisition.
|
||||
- agentic_lock_timeout: Max time to wait for the lock before failing.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
- CacheDBInterface: An instance of the appropriate cache adapter. :TODO: Now we support only Redis. later if we add more here we can split the logic
|
||||
"""
|
||||
if config.caching:
|
||||
from cognee.infrastructure.databases.cache.redis.RedisAdapter import RedisAdapter
|
||||
|
||||
return RedisAdapter(
|
||||
host=cache_host,
|
||||
port=cache_port,
|
||||
lock_name=lock_key,
|
||||
timeout=agentic_lock_expire,
|
||||
blocking_timeout=agentic_lock_timeout,
|
||||
)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def get_cache_engine(lock_key: str) -> CacheDBInterface:
|
||||
"""
|
||||
Returns a cache adapter instance using current context configuration.
|
||||
"""
|
||||
|
||||
return create_cache_engine(
|
||||
cache_host=config.cache_host,
|
||||
cache_port=config.cache_port,
|
||||
lock_key=lock_key,
|
||||
agentic_lock_expire=config.agentic_lock_expire,
|
||||
agentic_lock_timeout=config.agentic_lock_timeout,
|
||||
)
|
||||
49
cognee/infrastructure/databases/cache/redis/RedisAdapter.py
vendored
Normal file
49
cognee/infrastructure/databases/cache/redis/RedisAdapter.py
vendored
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
import redis
|
||||
from contextlib import contextmanager
|
||||
from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
|
||||
|
||||
|
||||
class RedisAdapter(CacheDBInterface):
|
||||
def __init__(self, host, port, lock_name, timeout=240, blocking_timeout=300):
|
||||
super().__init__(host, port, lock_name)
|
||||
self.redis = redis.Redis(host=host, port=port)
|
||||
self.timeout = timeout
|
||||
self.blocking_timeout = blocking_timeout
|
||||
|
||||
def acquire_lock(self):
|
||||
"""
|
||||
Acquire the Redis lock manually. Raises if acquisition fails.
|
||||
"""
|
||||
self.lock = self.redis.lock(
|
||||
name=self.lock_key,
|
||||
timeout=self.timeout,
|
||||
blocking_timeout=self.blocking_timeout,
|
||||
)
|
||||
|
||||
acquired = self.lock.acquire()
|
||||
if not acquired:
|
||||
raise RuntimeError(f"Could not acquire Redis lock: {self.lock_key}")
|
||||
|
||||
return self.lock
|
||||
|
||||
def release_lock(self):
|
||||
"""
|
||||
Release the Redis lock manually, if held.
|
||||
"""
|
||||
if self.lock:
|
||||
try:
|
||||
self.lock.release()
|
||||
self.lock = None
|
||||
except redis.exceptions.LockError:
|
||||
pass
|
||||
|
||||
@contextmanager
|
||||
def hold_lock(self):
|
||||
"""
|
||||
Context manager for acquiring and releasing the Redis lock automatically.
|
||||
"""
|
||||
self.acquire()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.release()
|
||||
|
|
@ -162,5 +162,5 @@ def create_graph_engine(
|
|||
|
||||
raise EnvironmentError(
|
||||
f"Unsupported graph database provider: {graph_database_provider}. "
|
||||
f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'kuzu', 'kuzu-remote', 'memgraph', 'neptune', 'neptune_analytics'])}"
|
||||
f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'kuzu', 'kuzu-remote', 'neptune', 'neptune_analytics'])}"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -159,6 +159,11 @@ class GraphDBInterface(ABC):
|
|||
- get_connections
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def is_empty(self) -> bool:
|
||||
logger.warning("is_empty() is not implemented")
|
||||
return True
|
||||
|
||||
@abstractmethod
|
||||
async def query(self, query: str, params: dict) -> List[Any]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import os
|
|||
import json
|
||||
import asyncio
|
||||
import tempfile
|
||||
from uuid import UUID
|
||||
from uuid import UUID, uuid5, NAMESPACE_OID
|
||||
from kuzu import Connection
|
||||
from kuzu.database import Database
|
||||
from datetime import datetime, timezone
|
||||
|
|
@ -23,9 +23,14 @@ from cognee.infrastructure.engine import DataPoint
|
|||
from cognee.modules.storage.utils import JSONEncoder
|
||||
from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int
|
||||
from cognee.tasks.temporal_graph.models import Timestamp
|
||||
from cognee.infrastructure.databases.cache.config import get_cache_config
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
cache_config = get_cache_config()
|
||||
if cache_config.shared_kuzu_lock:
|
||||
from cognee.infrastructure.databases.cache.get_cache_engine import get_cache_engine
|
||||
|
||||
|
||||
class KuzuAdapter(GraphDBInterface):
|
||||
"""
|
||||
|
|
@ -39,12 +44,20 @@ class KuzuAdapter(GraphDBInterface):
|
|||
|
||||
def __init__(self, db_path: str):
|
||||
"""Initialize Kuzu database connection and schema."""
|
||||
self.open_connections = 0
|
||||
self._is_closed = False
|
||||
self.db_path = db_path # Path for the database directory
|
||||
self.db: Optional[Database] = None
|
||||
self.connection: Optional[Connection] = None
|
||||
if cache_config.shared_kuzu_lock:
|
||||
self.redis_lock = get_cache_engine(
|
||||
lock_key="kuzu-lock-" + str(uuid5(NAMESPACE_OID, db_path))
|
||||
)
|
||||
else:
|
||||
self.executor = ThreadPoolExecutor()
|
||||
self._initialize_connection()
|
||||
self.KUZU_ASYNC_LOCK = asyncio.Lock()
|
||||
self._connection_change_lock = asyncio.Lock()
|
||||
|
||||
def _initialize_connection(self) -> None:
|
||||
"""Initialize the Kuzu database connection and schema."""
|
||||
|
|
@ -185,6 +198,15 @@ class KuzuAdapter(GraphDBInterface):
|
|||
except FileNotFoundError:
|
||||
logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
|
||||
|
||||
async def is_empty(self) -> bool:
|
||||
query = """
|
||||
MATCH (n)
|
||||
RETURN true
|
||||
LIMIT 1;
|
||||
"""
|
||||
query_result = await self.query(query)
|
||||
return len(query_result) == 0
|
||||
|
||||
async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
|
||||
"""
|
||||
Execute a Kuzu query asynchronously with automatic reconnection.
|
||||
|
|
@ -209,9 +231,13 @@ class KuzuAdapter(GraphDBInterface):
|
|||
params = params or {}
|
||||
|
||||
def blocking_query():
|
||||
lock_acquired = False
|
||||
try:
|
||||
if cache_config.shared_kuzu_lock:
|
||||
self.redis_lock.acquire_lock()
|
||||
lock_acquired = True
|
||||
if not self.connection:
|
||||
logger.debug("Reconnecting to Kuzu database...")
|
||||
logger.info("Reconnecting to Kuzu database...")
|
||||
self._initialize_connection()
|
||||
|
||||
result = self.connection.execute(query, params)
|
||||
|
|
@ -225,12 +251,47 @@ class KuzuAdapter(GraphDBInterface):
|
|||
val = val.as_py()
|
||||
processed_rows.append(val)
|
||||
rows.append(tuple(processed_rows))
|
||||
|
||||
return rows
|
||||
except Exception as e:
|
||||
logger.error(f"Query execution failed: {str(e)}")
|
||||
raise
|
||||
finally:
|
||||
if cache_config.shared_kuzu_lock and lock_acquired:
|
||||
try:
|
||||
self.close()
|
||||
finally:
|
||||
self.redis_lock.release_lock()
|
||||
|
||||
return await loop.run_in_executor(self.executor, blocking_query)
|
||||
if cache_config.shared_kuzu_lock:
|
||||
async with self._connection_change_lock:
|
||||
self.open_connections += 1
|
||||
logger.info(f"Open connections after open: {self.open_connections}")
|
||||
try:
|
||||
result = blocking_query()
|
||||
finally:
|
||||
self.open_connections -= 1
|
||||
logger.info(f"Open connections after close: {self.open_connections}")
|
||||
return result
|
||||
else:
|
||||
result = await loop.run_in_executor(self.executor, blocking_query)
|
||||
return result
|
||||
|
||||
def close(self):
|
||||
if self.connection:
|
||||
del self.connection
|
||||
self.connection = None
|
||||
if self.db:
|
||||
del self.db
|
||||
self.db = None
|
||||
self._is_closed = True
|
||||
logger.info("Kuzu database closed successfully")
|
||||
|
||||
def reopen(self):
|
||||
if self._is_closed:
|
||||
self._is_closed = False
|
||||
self._initialize_connection()
|
||||
logger.info("Kuzu database re-opened successfully")
|
||||
|
||||
@asynccontextmanager
|
||||
async def get_session(self):
|
||||
|
|
@ -1557,44 +1618,6 @@ class KuzuAdapter(GraphDBInterface):
|
|||
logger.error(f"Failed to delete graph data: {e}")
|
||||
raise
|
||||
|
||||
async def clear_database(self) -> None:
|
||||
"""
|
||||
Clear all data from the database by deleting the database files and reinitializing.
|
||||
|
||||
This method removes all files associated with the database and reinitializes the Kuzu
|
||||
database structure, ensuring a completely empty state. It handles exceptions that might
|
||||
occur during file deletions or initializations carefully.
|
||||
"""
|
||||
try:
|
||||
if self.connection:
|
||||
self.connection = None
|
||||
if self.db:
|
||||
self.db.close()
|
||||
self.db = None
|
||||
|
||||
db_dir = os.path.dirname(self.db_path)
|
||||
db_name = os.path.basename(self.db_path)
|
||||
file_storage = get_file_storage(db_dir)
|
||||
|
||||
if await file_storage.file_exists(db_name):
|
||||
await file_storage.remove_all()
|
||||
logger.info(f"Deleted Kuzu database files at {self.db_path}")
|
||||
|
||||
# Reinitialize the database
|
||||
self._initialize_connection()
|
||||
# Verify the database is empty
|
||||
result = self.connection.execute("MATCH (n:Node) RETURN COUNT(n)")
|
||||
count = result.get_next()[0] if result.has_next() else 0
|
||||
if count > 0:
|
||||
logger.warning(
|
||||
f"Database still contains {count} nodes after clearing, forcing deletion"
|
||||
)
|
||||
self.connection.execute("MATCH (n:Node) DETACH DELETE n")
|
||||
logger.info("Database cleared successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Error during database clearing: {e}")
|
||||
raise
|
||||
|
||||
async def get_document_subgraph(self, data_id: str):
|
||||
"""
|
||||
Get all nodes that should be deleted when removing a document.
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -87,6 +87,15 @@ class Neo4jAdapter(GraphDBInterface):
|
|||
async with self.driver.session(database=self.graph_database_name) as session:
|
||||
yield session
|
||||
|
||||
async def is_empty(self) -> bool:
|
||||
query = """
|
||||
RETURN EXISTS {
|
||||
MATCH (n)
|
||||
} AS node_exists;
|
||||
"""
|
||||
query_result = await self.query(query)
|
||||
return not query_result[0]["node_exists"]
|
||||
|
||||
@deadlock_retry()
|
||||
async def query(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -105,7 +105,6 @@ class LoaderEngine:
|
|||
async def load_file(
|
||||
self,
|
||||
file_path: str,
|
||||
file_stream: Optional[Any],
|
||||
preferred_loaders: Optional[List[str]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ async def run_pipeline(
|
|||
vector_db_config: dict = None,
|
||||
graph_db_config: dict = None,
|
||||
incremental_loading: bool = False,
|
||||
data_per_batch: int = 20,
|
||||
):
|
||||
validate_pipeline_tasks(tasks)
|
||||
await setup_and_check_environment(vector_db_config, graph_db_config)
|
||||
|
|
@ -50,6 +51,7 @@ async def run_pipeline(
|
|||
pipeline_name=pipeline_name,
|
||||
context={"dataset": dataset},
|
||||
incremental_loading=incremental_loading,
|
||||
data_per_batch=data_per_batch,
|
||||
):
|
||||
yield run_info
|
||||
|
||||
|
|
@ -62,6 +64,7 @@ async def run_pipeline_per_dataset(
|
|||
pipeline_name: str = "custom_pipeline",
|
||||
context: dict = None,
|
||||
incremental_loading=False,
|
||||
data_per_batch: int = 20,
|
||||
):
|
||||
# Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
|
||||
await set_database_global_context_variables(dataset.id, dataset.owner_id)
|
||||
|
|
@ -77,7 +80,7 @@ async def run_pipeline_per_dataset(
|
|||
return
|
||||
|
||||
pipeline_run = run_tasks(
|
||||
tasks, dataset.id, data, user, pipeline_name, context, incremental_loading
|
||||
tasks, dataset.id, data, user, pipeline_name, context, incremental_loading, data_per_batch
|
||||
)
|
||||
|
||||
async for pipeline_run_info in pipeline_run:
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@ from cognee.modules.pipelines.operations import (
|
|||
log_pipeline_run_complete,
|
||||
log_pipeline_run_error,
|
||||
)
|
||||
from .run_tasks_with_telemetry import run_tasks_with_telemetry
|
||||
from .run_tasks_data_item import run_tasks_data_item
|
||||
from ..tasks.task import Task
|
||||
|
||||
|
|
@ -60,6 +59,7 @@ async def run_tasks(
|
|||
pipeline_name: str = "unknown_pipeline",
|
||||
context: dict = None,
|
||||
incremental_loading: bool = False,
|
||||
data_per_batch: int = 20,
|
||||
):
|
||||
if not user:
|
||||
user = await get_default_user()
|
||||
|
|
@ -89,7 +89,11 @@ async def run_tasks(
|
|||
if incremental_loading:
|
||||
data = await resolve_data_directories(data)
|
||||
|
||||
# Create async tasks per data item that will run the pipeline for the data item
|
||||
# Create and gather batches of async tasks of data items that will run the pipeline for the data item
|
||||
results = []
|
||||
for start in range(0, len(data), data_per_batch):
|
||||
data_batch = data[start : start + data_per_batch]
|
||||
|
||||
data_item_tasks = [
|
||||
asyncio.create_task(
|
||||
run_tasks_data_item(
|
||||
|
|
@ -104,9 +108,10 @@ async def run_tasks(
|
|||
incremental_loading,
|
||||
)
|
||||
)
|
||||
for data_item in data
|
||||
for data_item in data_batch
|
||||
]
|
||||
results = await asyncio.gather(*data_item_tasks)
|
||||
|
||||
results.extend(await asyncio.gather(*data_item_tasks))
|
||||
|
||||
# Remove skipped data items from results
|
||||
results = [result for result in results if result]
|
||||
|
|
|
|||
|
|
@ -115,9 +115,8 @@ async def run_tasks_data_item_incremental(
|
|||
data_point = (
|
||||
await session.execute(select(Data).filter(Data.id == data_id))
|
||||
).scalar_one_or_none()
|
||||
data_point.pipeline_status[pipeline_name] = {
|
||||
str(dataset.id): DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED
|
||||
}
|
||||
status_for_pipeline = data_point.pipeline_status.setdefault(pipeline_name, {})
|
||||
status_for_pipeline[str(dataset.id)] = DataItemStatus.DATA_ITEM_PROCESSING_COMPLETED
|
||||
await session.merge(data_point)
|
||||
await session.commit()
|
||||
|
||||
|
|
|
|||
25
cognee/tests/subprocesses/reader.py
Normal file
25
cognee/tests/subprocesses/reader.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import asyncio
|
||||
import time
|
||||
from cognee.infrastructure.databases.graph.kuzu.adapter import KuzuAdapter
|
||||
|
||||
# This will create the test.db if it doesn't exist
|
||||
|
||||
|
||||
async def main():
|
||||
adapter = KuzuAdapter("test.db")
|
||||
result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
|
||||
print(f"Reader: Found {result[0][0]} nodes")
|
||||
result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
|
||||
print(f"Reader: Found {result[0][0]} nodes")
|
||||
result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
|
||||
print(f"Reader: Found {result[0][0]} nodes")
|
||||
result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
|
||||
print(f"Reader: Found {result[0][0]} nodes")
|
||||
result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
|
||||
print(f"Reader: Found {result} nodes")
|
||||
result = await adapter.query("MATCH (n:Node) RETURN COUNT(n)")
|
||||
print(f"Reader: Found {result[0][0]} nodes")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
31
cognee/tests/subprocesses/simple_cognify_1.py
Normal file
31
cognee/tests/subprocesses/simple_cognify_1.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
import asyncio
|
||||
import cognee
|
||||
from cognee.shared.logging_utils import setup_logging, INFO
|
||||
from cognee.api.v1.search import SearchType
|
||||
|
||||
|
||||
async def main():
|
||||
await cognee.cognify(datasets=["first_cognify_dataset"])
|
||||
|
||||
query_text = (
|
||||
"Tell me what is in the context. Additionally write out 'FIRST_COGNIFY' before your answer"
|
||||
)
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION,
|
||||
query_text=query_text,
|
||||
datasets=["first_cognify_dataset"],
|
||||
)
|
||||
|
||||
print("Search results:")
|
||||
for result_text in search_results:
|
||||
print(result_text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger = setup_logging(log_level=INFO)
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
loop.run_until_complete(main())
|
||||
finally:
|
||||
loop.run_until_complete(loop.shutdown_asyncgens())
|
||||
31
cognee/tests/subprocesses/simple_cognify_2.py
Normal file
31
cognee/tests/subprocesses/simple_cognify_2.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
import asyncio
|
||||
import cognee
|
||||
from cognee.shared.logging_utils import setup_logging, INFO
|
||||
from cognee.api.v1.search import SearchType
|
||||
|
||||
|
||||
async def main():
|
||||
await cognee.cognify(datasets=["second_cognify_dataset"])
|
||||
|
||||
query_text = (
|
||||
"Tell me what is in the context. Additionally write out 'SECOND_COGNIFY' before your answer"
|
||||
)
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION,
|
||||
query_text=query_text,
|
||||
datasets=["second_cognify_dataset"],
|
||||
)
|
||||
|
||||
print("Search results:")
|
||||
for result_text in search_results:
|
||||
print(result_text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger = setup_logging(log_level=INFO)
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
loop.run_until_complete(main())
|
||||
finally:
|
||||
loop.run_until_complete(loop.shutdown_asyncgens())
|
||||
32
cognee/tests/subprocesses/writer.py
Normal file
32
cognee/tests/subprocesses/writer.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
import asyncio
|
||||
import time
|
||||
import uuid
|
||||
from cognee.modules.data.processing.document_types import PdfDocument
|
||||
from cognee.infrastructure.databases.graph.kuzu.adapter import KuzuAdapter
|
||||
|
||||
|
||||
def create_node(name):
|
||||
document = PdfDocument(
|
||||
id=uuid.uuid4(),
|
||||
name=name,
|
||||
raw_data_location=name,
|
||||
external_metadata="test_external_metadata",
|
||||
mime_type="test_mime",
|
||||
)
|
||||
return document
|
||||
|
||||
|
||||
async def main():
|
||||
adapter = KuzuAdapter("test.db")
|
||||
nodes = [create_node(f"Node{i}") for i in range(5)]
|
||||
|
||||
print("Writer: Starting...")
|
||||
await adapter.add_nodes(nodes)
|
||||
|
||||
print("writer finished...")
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
84
cognee/tests/test_concurrent_subprocess_access.py
Normal file
84
cognee/tests/test_concurrent_subprocess_access.py
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
import os
|
||||
import asyncio
|
||||
import cognee
|
||||
import pathlib
|
||||
import subprocess
|
||||
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
"""
|
||||
Test: Redis-based Kùzu Locking Across Subprocesses
|
||||
|
||||
This test ensures the Redis shared lock correctly serializes access to the Kùzu
|
||||
database when multiple subprocesses (writer/reader and cognify tasks) run in parallel.
|
||||
If this test fails, it indicates the locking mechanism is not properly handling
|
||||
concurrent subprocess access.
|
||||
"""
|
||||
|
||||
|
||||
async def concurrent_subprocess_access():
|
||||
data_directory_path = str(
|
||||
pathlib.Path(
|
||||
os.path.join(pathlib.Path(__file__).parent, ".data_storage/concurrent_tasks")
|
||||
).resolve()
|
||||
)
|
||||
cognee_directory_path = str(
|
||||
pathlib.Path(
|
||||
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/concurrent_tasks")
|
||||
).resolve()
|
||||
)
|
||||
|
||||
subprocess_directory_path = str(
|
||||
pathlib.Path(os.path.join(pathlib.Path(__file__).parent, "subprocesses/")).resolve()
|
||||
)
|
||||
|
||||
writer_path = subprocess_directory_path + "/writer.py"
|
||||
reader_path = subprocess_directory_path + "/reader.py"
|
||||
|
||||
cognee.config.data_root_directory(data_directory_path)
|
||||
cognee.config.system_root_directory(cognee_directory_path)
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
writer_process = subprocess.Popen([os.sys.executable, str(writer_path)])
|
||||
|
||||
reader_process = subprocess.Popen([os.sys.executable, str(reader_path)])
|
||||
|
||||
# Wait for both processes to complete
|
||||
writer_process.wait()
|
||||
reader_process.wait()
|
||||
|
||||
logger.info("Basic write read subprocess example finished")
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
text = """
|
||||
This is the text of the first cognify subprocess
|
||||
"""
|
||||
await cognee.add(text, dataset_name="first_cognify_dataset")
|
||||
|
||||
text = """
|
||||
This is the text of the second cognify subprocess
|
||||
"""
|
||||
await cognee.add(text, dataset_name="second_cognify_dataset")
|
||||
|
||||
first_cognify_path = subprocess_directory_path + "/simple_cognify_1.py"
|
||||
second_cognify_path = subprocess_directory_path + "/simple_cognify_2.py"
|
||||
|
||||
first_cognify_process = subprocess.Popen([os.sys.executable, str(first_cognify_path)])
|
||||
|
||||
second_cognify_process = subprocess.Popen([os.sys.executable, str(second_cognify_path)])
|
||||
|
||||
# Wait for both processes to complete
|
||||
first_cognify_process.wait()
|
||||
second_cognify_process.wait()
|
||||
|
||||
logger.info("Database concurrent subprocess example finished")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(concurrent_subprocess_access())
|
||||
|
|
@ -47,10 +47,26 @@ async def main():
|
|||
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
||||
)
|
||||
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
assert is_empty, "Kuzu graph database is not empty"
|
||||
|
||||
await cognee.add([explanation_file_path_quantum], dataset_name)
|
||||
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
assert is_empty, "Kuzu graph database should be empty before cognify"
|
||||
|
||||
await cognee.cognify([dataset_name])
|
||||
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
assert not is_empty, "Kuzu graph database should not be empty"
|
||||
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
|
||||
vector_engine = get_vector_engine()
|
||||
|
|
@ -114,11 +130,10 @@ async def main():
|
|||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
assert len(nodes) == 0 and len(edges) == 0, "Kuzu graph database is not empty"
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
assert is_empty, "Kuzu graph database is not empty"
|
||||
|
||||
finally:
|
||||
# Ensure cleanup even if tests fail
|
||||
|
|
|
|||
|
|
@ -1,105 +0,0 @@
|
|||
import os
|
||||
|
||||
import pathlib
|
||||
import cognee
|
||||
from cognee.infrastructure.files.storage import get_storage_config
|
||||
from cognee.modules.search.operations import get_history
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.search.types import SearchType
|
||||
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
async def main():
|
||||
cognee.config.set_graph_database_provider("memgraph")
|
||||
data_directory_path = str(
|
||||
pathlib.Path(
|
||||
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_memgraph")
|
||||
).resolve()
|
||||
)
|
||||
cognee.config.data_root_directory(data_directory_path)
|
||||
cognee_directory_path = str(
|
||||
pathlib.Path(
|
||||
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_memgraph")
|
||||
).resolve()
|
||||
)
|
||||
cognee.config.system_root_directory(cognee_directory_path)
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
dataset_name = "cs_explanations"
|
||||
|
||||
explanation_file_path_nlp = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
|
||||
)
|
||||
await cognee.add([explanation_file_path_nlp], dataset_name)
|
||||
|
||||
explanation_file_path_quantum = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
||||
)
|
||||
|
||||
await cognee.add([explanation_file_path_quantum], dataset_name)
|
||||
|
||||
await cognee.cognify([dataset_name])
|
||||
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
|
||||
vector_engine = get_vector_engine()
|
||||
random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0]
|
||||
random_node_name = random_node.payload["text"]
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
|
||||
)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted sentences are:\n")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted chunks are:\n")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.SUMMARIES, query_text=random_node_name
|
||||
)
|
||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
||||
print("\nExtracted results are:\n")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.NATURAL_LANGUAGE,
|
||||
query_text=f"Find nodes connected to node with name {random_node_name}",
|
||||
)
|
||||
assert len(search_results) != 0, "Query related natural language don't exist."
|
||||
print("\nExtracted results are:\n")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
user = await get_default_user()
|
||||
history = await get_history(user.id)
|
||||
|
||||
assert len(history) == 8, "Search history is not correct."
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
data_root_directory = get_storage_config()["data_root_directory"]
|
||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
assert len(nodes) == 0 and len(edges) == 0, "Memgraph graph database is not empty"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
||||
asyncio.run(main())
|
||||
|
|
@ -35,6 +35,14 @@ async def main():
|
|||
explanation_file_path_nlp = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
|
||||
)
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
assert is_empty, "Graph has to be empty"
|
||||
|
||||
await cognee.add([explanation_file_path_nlp], dataset_name)
|
||||
|
||||
explanation_file_path_quantum = os.path.join(
|
||||
|
|
@ -42,9 +50,16 @@ async def main():
|
|||
)
|
||||
|
||||
await cognee.add([explanation_file_path_quantum], dataset_name)
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
assert is_empty, "Graph has to be empty before cognify"
|
||||
|
||||
await cognee.cognify([dataset_name])
|
||||
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
assert not is_empty, "Graph shouldn't be empty"
|
||||
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
|
||||
vector_engine = get_vector_engine()
|
||||
|
|
@ -117,11 +132,8 @@ async def main():
|
|||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
assert len(nodes) == 0 and len(edges) == 0, "Neo4j graph database is not empty"
|
||||
is_empty = await graph_engine.is_empty()
|
||||
assert is_empty, "Neo4j graph database is not empty"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
21
cognee/tests/unit/api/test_search.py
Normal file
21
cognee/tests/unit/api/test_search.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
import pytest
|
||||
import cognee
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_search_raises_SearchOnEmptyGraphError_on_empty_graph():
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
await cognee.add("Sample input")
|
||||
result = await cognee.search("Sample query")
|
||||
assert result == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_search_doesnt_raise_SearchOnEmptyGraphError():
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
await cognee.add("Sample input")
|
||||
await cognee.cognify()
|
||||
result = await cognee.search("Sample query")
|
||||
assert result != []
|
||||
87
cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py
vendored
Normal file
87
cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py
vendored
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
"""Tests for cache configuration."""
|
||||
|
||||
import pytest
|
||||
from cognee.infrastructure.databases.cache.config import CacheConfig, get_cache_config
|
||||
|
||||
|
||||
def test_cache_config_defaults():
|
||||
"""Test that CacheConfig has the correct default values."""
|
||||
config = CacheConfig()
|
||||
|
||||
assert config.caching is False
|
||||
assert config.shared_kuzu_lock is False
|
||||
assert config.cache_host == "localhost"
|
||||
assert config.cache_port == 6379
|
||||
assert config.agentic_lock_expire == 240
|
||||
assert config.agentic_lock_timeout == 300
|
||||
|
||||
|
||||
def test_cache_config_custom_values():
|
||||
"""Test that CacheConfig accepts custom values."""
|
||||
config = CacheConfig(
|
||||
caching=True,
|
||||
shared_kuzu_lock=True,
|
||||
cache_host="redis.example.com",
|
||||
cache_port=6380,
|
||||
agentic_lock_expire=120,
|
||||
agentic_lock_timeout=180,
|
||||
)
|
||||
|
||||
assert config.caching is True
|
||||
assert config.shared_kuzu_lock is True
|
||||
assert config.cache_host == "redis.example.com"
|
||||
assert config.cache_port == 6380
|
||||
assert config.agentic_lock_expire == 120
|
||||
assert config.agentic_lock_timeout == 180
|
||||
|
||||
|
||||
def test_cache_config_to_dict():
|
||||
"""Test the to_dict method returns all configuration values."""
|
||||
config = CacheConfig(
|
||||
caching=True,
|
||||
shared_kuzu_lock=True,
|
||||
cache_host="test-host",
|
||||
cache_port=7000,
|
||||
agentic_lock_expire=100,
|
||||
agentic_lock_timeout=200,
|
||||
)
|
||||
|
||||
config_dict = config.to_dict()
|
||||
|
||||
assert config_dict == {
|
||||
"caching": True,
|
||||
"shared_kuzu_lock": True,
|
||||
"cache_host": "test-host",
|
||||
"cache_port": 7000,
|
||||
"agentic_lock_expire": 100,
|
||||
"agentic_lock_timeout": 200,
|
||||
}
|
||||
|
||||
|
||||
def test_get_cache_config_singleton():
|
||||
"""Test that get_cache_config returns the same instance."""
|
||||
config1 = get_cache_config()
|
||||
config2 = get_cache_config()
|
||||
|
||||
assert config1 is config2
|
||||
|
||||
|
||||
def test_cache_config_extra_fields_allowed():
|
||||
"""Test that CacheConfig allows extra fields due to extra='allow'."""
|
||||
config = CacheConfig(extra_field="extra_value", another_field=123)
|
||||
|
||||
assert hasattr(config, "extra_field")
|
||||
assert config.extra_field == "extra_value"
|
||||
assert hasattr(config, "another_field")
|
||||
assert config.another_field == 123
|
||||
|
||||
|
||||
def test_cache_config_boolean_type_validation():
|
||||
"""Test that boolean fields accept various truthy/falsy values."""
|
||||
config1 = CacheConfig(caching="true", shared_kuzu_lock="yes")
|
||||
assert config1.caching is True
|
||||
assert config1.shared_kuzu_lock is True
|
||||
|
||||
config2 = CacheConfig(caching="false", shared_kuzu_lock="no")
|
||||
assert config2.caching is False
|
||||
assert config2.shared_kuzu_lock is False
|
||||
|
|
@ -129,6 +129,30 @@ services:
|
|||
networks:
|
||||
- cognee-network
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: redis
|
||||
profiles:
|
||||
- redis
|
||||
ports:
|
||||
- "6379:6379"
|
||||
networks:
|
||||
- cognee-network
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
command: [ "redis-server", "--appendonly", "yes" ]
|
||||
|
||||
|
||||
redisinsight:
|
||||
image: redislabs/redisinsight:latest
|
||||
container_name: redisinsight
|
||||
restart: always
|
||||
ports:
|
||||
- "5540:5540"
|
||||
networks:
|
||||
- cognee-network
|
||||
|
||||
|
||||
networks:
|
||||
cognee-network:
|
||||
name: cognee-network
|
||||
|
|
@ -136,3 +160,4 @@ networks:
|
|||
volumes:
|
||||
chromadb_data:
|
||||
postgres_data:
|
||||
redis_data:
|
||||
|
|
|
|||
82
notebooks/neptune-analytics-example.ipynb
vendored
82
notebooks/neptune-analytics-example.ipynb
vendored
|
|
@ -83,16 +83,16 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import pathlib\n",
|
||||
"from cognee import config, add, cognify, search, SearchType, prune, visualize_graph\n",
|
||||
"from dotenv import load_dotenv"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
|
@ -106,7 +106,9 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# load environment variables from file .env\n",
|
||||
"load_dotenv()\n",
|
||||
|
|
@ -145,9 +147,7 @@
|
|||
" \"vector_db_url\": f\"neptune-graph://{graph_identifier}\", # Neptune Analytics endpoint with the format neptune-graph://<GRAPH_ID>\n",
|
||||
" }\n",
|
||||
")"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
|
@ -159,19 +159,19 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Prune data and system metadata before running, only if we want \"fresh\" state.\n",
|
||||
"await prune.prune_data()\n",
|
||||
"await prune.prune_system(metadata=True)"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup data and cognify\n",
|
||||
"\n",
|
||||
|
|
@ -180,7 +180,9 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Add sample text to the dataset\n",
|
||||
"sample_text_1 = \"\"\"Neptune Analytics is a memory-optimized graph database engine for analytics. With Neptune\n",
|
||||
|
|
@ -205,9 +207,7 @@
|
|||
"\n",
|
||||
"# Cognify the text data.\n",
|
||||
"await cognify([dataset_name])"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
|
@ -215,14 +215,16 @@
|
|||
"source": [
|
||||
"## Graph Memory visualization\n",
|
||||
"\n",
|
||||
"Initialize Memgraph as a Graph Memory store and save to .artefacts/graph_visualization.html\n",
|
||||
"Initialize Neptune as a Graph Memory store and save to .artefacts/graph_visualization.html\n",
|
||||
"\n",
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get a graphistry url (Register for a free account at https://www.graphistry.com)\n",
|
||||
"# url = await render_graph()\n",
|
||||
|
|
@ -235,9 +237,7 @@
|
|||
" ).resolve()\n",
|
||||
")\n",
|
||||
"await visualize_graph(graph_file_path)"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
|
@ -250,19 +250,19 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Completion query that uses graph data to form context.\n",
|
||||
"graph_completion = await search(query_text=\"What is Neptune Analytics?\", query_type=SearchType.GRAPH_COMPLETION)\n",
|
||||
"print(\"\\nGraph completion result is:\")\n",
|
||||
"print(graph_completion)"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## SEARCH: RAG Completion\n",
|
||||
"\n",
|
||||
|
|
@ -271,19 +271,19 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Completion query that uses document chunks to form context.\n",
|
||||
"rag_completion = await search(query_text=\"What is Neptune Analytics?\", query_type=SearchType.RAG_COMPLETION)\n",
|
||||
"print(\"\\nRAG Completion result is:\")\n",
|
||||
"print(rag_completion)"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## SEARCH: Graph Insights\n",
|
||||
"\n",
|
||||
|
|
@ -291,8 +291,10 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Search graph insights\n",
|
||||
"insights_results = await search(query_text=\"Neptune Analytics\", query_type=SearchType.GRAPH_COMPLETION)\n",
|
||||
|
|
@ -302,13 +304,11 @@
|
|||
" tgt_node = result[2].get(\"name\", result[2][\"type\"])\n",
|
||||
" relationship = result[1].get(\"relationship_name\", \"__relationship__\")\n",
|
||||
" print(f\"- {src_node} -[{relationship}]-> {tgt_node}\")"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## SEARCH: Entity Summaries\n",
|
||||
"\n",
|
||||
|
|
@ -316,8 +316,10 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Query all summaries related to query.\n",
|
||||
"summaries = await search(query_text=\"Neptune Analytics\", query_type=SearchType.SUMMARIES)\n",
|
||||
|
|
@ -326,13 +328,11 @@
|
|||
" type = summary[\"type\"]\n",
|
||||
" text = summary[\"text\"]\n",
|
||||
" print(f\"- {type}: {text}\")"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## SEARCH: Chunks\n",
|
||||
"\n",
|
||||
|
|
@ -340,8 +340,10 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chunks = await search(query_text=\"Neptune Analytics\", query_type=SearchType.CHUNKS)\n",
|
||||
"print(\"\\nChunk results are:\")\n",
|
||||
|
|
@ -349,9 +351,7 @@
|
|||
" type = chunk[\"type\"]\n",
|
||||
" text = chunk[\"text\"]\n",
|
||||
" print(f\"- {type}: {text}\")"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
|||
4374
poetry.lock
generated
4374
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -7,7 +7,7 @@ authors = [
|
|||
{ name = "Vasilije Markovic" },
|
||||
{ name = "Boris Arzentar" },
|
||||
]
|
||||
requires-python = ">=3.10,<=3.13"
|
||||
requires-python = ">=3.10,<3.14"
|
||||
readme = "README.md"
|
||||
license = "Apache-2.0"
|
||||
classifiers = [
|
||||
|
|
@ -67,10 +67,10 @@ distributed = [
|
|||
]
|
||||
|
||||
scraping = [
|
||||
"tavily-python>=0.7.0",
|
||||
"tavily-python>=0.7.12",
|
||||
"beautifulsoup4>=4.13.1",
|
||||
"playwright>=1.9.0",
|
||||
"lxml>=4.9.3,<5.0.0",
|
||||
"lxml>=4.9.3",
|
||||
"protego>=0.1",
|
||||
"APScheduler>=3.10.0,<=3.11.0"
|
||||
]
|
||||
|
|
@ -104,7 +104,7 @@ chromadb = [
|
|||
"chromadb>=0.6,<0.7",
|
||||
"pypika==0.48.9",
|
||||
]
|
||||
docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx, pdf]>=0.18.1,<19"]
|
||||
docs = ["lxml<6.0.0", "unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx, pdf]>=0.18.1,<19"]
|
||||
codegraph = [
|
||||
"fastembed<=0.6.0 ; python_version < '3.13'",
|
||||
"transformers>=4.46.3,<5",
|
||||
|
|
@ -143,6 +143,7 @@ dev = [
|
|||
"mkdocstrings[python]>=0.26.2,<0.27",
|
||||
]
|
||||
debug = ["debugpy>=1.8.9,<2.0.0"]
|
||||
redis = ["redis>=5.0.3,<6.0.0"]
|
||||
|
||||
monitoring = ["sentry-sdk[fastapi]>=2.9.0,<3", "langfuse>=2.32.0,<3"]
|
||||
|
||||
|
|
|
|||
19
uv.lock
generated
19
uv.lock
generated
|
|
@ -1084,6 +1084,9 @@ postgres-binary = [
|
|||
posthog = [
|
||||
{ name = "posthog" },
|
||||
]
|
||||
redis = [
|
||||
{ name = "redis" },
|
||||
]
|
||||
scraping = [
|
||||
{ name = "apscheduler" },
|
||||
{ name = "beautifulsoup4" },
|
||||
|
|
@ -1174,6 +1177,7 @@ requires-dist = [
|
|||
{ name = "python-magic-bin", marker = "sys_platform == 'win32'", specifier = "<0.5" },
|
||||
{ name = "python-multipart", specifier = ">=0.0.20,<1.0.0" },
|
||||
{ name = "rdflib", specifier = ">=7.1.4,<7.2.0" },
|
||||
{ name = "redis", marker = "extra == 'redis'", specifier = ">=5.0.3,<6.0.0" },
|
||||
{ name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.2,<=0.13.1" },
|
||||
{ name = "s3fs", extras = ["boto3"], marker = "extra == 'aws'", specifier = "==2025.3.2" },
|
||||
{ name = "scikit-learn", marker = "extra == 'evals'", specifier = ">=1.6.1,<2" },
|
||||
|
|
@ -1195,7 +1199,7 @@ requires-dist = [
|
|||
{ name = "uvicorn", specifier = ">=0.34.0,<1.0.0" },
|
||||
{ name = "websockets", specifier = ">=15.0.1,<16.0.0" },
|
||||
]
|
||||
provides-extras = ["api", "distributed", "scraping", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "groq", "chromadb", "docs", "codegraph", "evals", "graphiti", "aws", "dlt", "baml", "dev", "debug", "monitoring", "docling"]
|
||||
provides-extras = ["api", "distributed", "scraping", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "groq", "chromadb", "docs", "codegraph", "evals", "graphiti", "aws", "dlt", "baml", "dev", "debug", "redis", "monitoring", "docling"]
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
|
|
@ -7379,6 +7383,19 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/f4/31/e9b6f04288dcd3fa60cb3179260d6dad81b92aef3063d679ac7d80a827ea/rdflib-7.1.4-py3-none-any.whl", hash = "sha256:72f4adb1990fa5241abd22ddaf36d7cafa5d91d9ff2ba13f3086d339b213d997", size = 565051, upload-time = "2025-03-29T02:22:44.987Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redis"
|
||||
version = "5.3.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "async-timeout", marker = "python_full_version < '3.11.3'" },
|
||||
{ name = "pyjwt" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/6a/cf/128b1b6d7086200c9f387bd4be9b2572a30b90745ef078bd8b235042dc9f/redis-5.3.1.tar.gz", hash = "sha256:ca49577a531ea64039b5a36db3d6cd1a0c7a60c34124d46924a45b956e8cf14c", size = 4626200, upload-time = "2025-07-25T08:06:27.778Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/7f/26/5c5fa0e83c3621db835cfc1f1d789b37e7fa99ed54423b5f519beb931aa7/redis-5.3.1-py3-none-any.whl", hash = "sha256:dc1909bd24669cc31b5f67a039700b16ec30571096c5f1f0d9d2324bff31af97", size = 272833, upload-time = "2025-07-25T08:06:26.317Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "referencing"
|
||||
version = "0.37.0"
|
||||
|
|
|
|||
31
working_dir_error_replication/run_subprocess_test.py
Normal file
31
working_dir_error_replication/run_subprocess_test.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
"""
|
||||
Run writer and reader in separate subprocesses to test Kuzu locks.
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import time
|
||||
import os
|
||||
|
||||
|
||||
def main():
|
||||
print("=== Kuzu Subprocess Lock Test ===")
|
||||
print("Starting writer and reader in separate subprocesses...")
|
||||
print("Writer will hold the database lock, reader should block or fail\n")
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Start writer subprocess
|
||||
writer_process = subprocess.Popen([os.sys.executable, "writer.py"])
|
||||
|
||||
reader_process = subprocess.Popen([os.sys.executable, "reader.py"])
|
||||
|
||||
# Wait for both processes to complete
|
||||
writer_process.wait()
|
||||
reader_process.wait()
|
||||
|
||||
total_time = time.time() - start_time
|
||||
print(f"\nTotal execution time: {total_time:.2f}s")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Reference in a new issue