Fix/add async lock to all vector databases (#1244)
<!-- .github/pull_request_template.md --> ## Description 1. Cleans up VectorDB adapters that have been migrated to `cognee-community` repo 2. Adds async lock protection create_collection method in remaining VectorDB - ChromaDB See #1222 ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: Igor Ilic <30923996+dexters1@users.noreply.github.com>
This commit is contained in:
parent
0ed6f255c2
commit
b297289060
20 changed files with 32 additions and 1436 deletions
43
.github/workflows/db_examples_tests.yml
vendored
43
.github/workflows/db_examples_tests.yml
vendored
|
|
@ -29,10 +29,6 @@ on:
|
||||||
required: true
|
required: true
|
||||||
EMBEDDING_API_VERSION:
|
EMBEDDING_API_VERSION:
|
||||||
required: true
|
required: true
|
||||||
QDRANT_API_URL:
|
|
||||||
required: false
|
|
||||||
QDRANT_API_KEY:
|
|
||||||
required: false
|
|
||||||
|
|
||||||
POSTGRES_PASSWORD:
|
POSTGRES_PASSWORD:
|
||||||
required: false
|
required: false
|
||||||
|
|
@ -113,45 +109,6 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
poetry run python examples/database_examples/kuzu_example.py
|
poetry run python examples/database_examples/kuzu_example.py
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
run-db-example-qdrant:
|
|
||||||
name: "Qdrant DB Example Test"
|
|
||||||
runs-on: ubuntu-22.04
|
|
||||||
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'qdrant') }}
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Check out
|
|
||||||
uses: actions/checkout@master
|
|
||||||
|
|
||||||
- name: Cognee Setup
|
|
||||||
uses: ./.github/actions/cognee_setup
|
|
||||||
with:
|
|
||||||
python-version: ${{ inputs.python-version }}
|
|
||||||
|
|
||||||
- name: Install Qdrant extra
|
|
||||||
run: |
|
|
||||||
poetry install -E qdrant
|
|
||||||
|
|
||||||
- name: Run Qdrant Example
|
|
||||||
env:
|
|
||||||
ENV: dev
|
|
||||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
|
||||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
|
||||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
|
||||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
|
||||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
|
||||||
VECTOR_DB_URL: ${{ secrets.QDRANT_API_URL }}
|
|
||||||
VECTOR_DB_KEY: ${{ secrets.QDRANT_API_KEY }}
|
|
||||||
run: |
|
|
||||||
poetry run python examples/database_examples/qdrant_example.py
|
|
||||||
|
|
||||||
run-db-example-pgvector:
|
run-db-example-pgvector:
|
||||||
name: "PostgreSQL PGVector DB Example Test"
|
name: "PostgreSQL PGVector DB Example Test"
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
|
|
|
||||||
59
.github/workflows/vector_db_tests.yml
vendored
59
.github/workflows/vector_db_tests.yml
vendored
|
|
@ -59,65 +59,6 @@ jobs:
|
||||||
# run: poetry run python ./cognee/tests/test_chromadb.py
|
# run: poetry run python ./cognee/tests/test_chromadb.py
|
||||||
|
|
||||||
|
|
||||||
run_qdrant_integration_test:
|
|
||||||
name: Qdrant Tests
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'qdrant') }}
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
|
|
||||||
services:
|
|
||||||
qdrant:
|
|
||||||
image: qdrant/qdrant:v1.14.1
|
|
||||||
env:
|
|
||||||
QDRANT__LOG_LEVEL: ERROR
|
|
||||||
QDRANT__SERVICE__API_KEY: qdrant_api_key
|
|
||||||
QDRANT__SERVICE__ENABLE_TLS: 0
|
|
||||||
ports:
|
|
||||||
- 6333:6333
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Check out
|
|
||||||
uses: actions/checkout@master
|
|
||||||
|
|
||||||
- name: Cognee Setup
|
|
||||||
uses: ./.github/actions/cognee_setup
|
|
||||||
with:
|
|
||||||
python-version: ${{ inputs.python-version }}
|
|
||||||
|
|
||||||
- name: Install specific db dependency
|
|
||||||
run: |
|
|
||||||
poetry install -E qdrant
|
|
||||||
|
|
||||||
- name: Wait for Qdrant to be healthy
|
|
||||||
run: |
|
|
||||||
for i in {1..10}; do
|
|
||||||
if curl -f http://127.0.0.1:6333/healthz; then
|
|
||||||
echo "Qdrant is healthy!"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
echo "Waiting for Qdrant to be healthy..."
|
|
||||||
sleep 3
|
|
||||||
done
|
|
||||||
echo "Qdrant failed to become healthy in time"
|
|
||||||
exit 1
|
|
||||||
|
|
||||||
- name: Run default Qdrant
|
|
||||||
env:
|
|
||||||
ENV: 'dev'
|
|
||||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
|
||||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
|
||||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
|
||||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
|
||||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
|
||||||
VECTOR_DB_URL: 127.0.0.1
|
|
||||||
VECTOR_DB_KEY: qdrant_api_key
|
|
||||||
run: poetry run python ./cognee/tests/test_qdrant.py
|
|
||||||
|
|
||||||
run-postgres-tests:
|
run-postgres-tests:
|
||||||
name: PostgreSQL Tests
|
name: PostgreSQL Tests
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ COPY README.md pyproject.toml uv.lock entrypoint.sh ./
|
||||||
|
|
||||||
# Install the project's dependencies using the lockfile and settings
|
# Install the project's dependencies using the lockfile and settings
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv sync --extra debug --extra api --extra postgres --extra qdrant --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable
|
uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable
|
||||||
|
|
||||||
# Copy Alembic configuration
|
# Copy Alembic configuration
|
||||||
COPY alembic.ini /app/alembic.ini
|
COPY alembic.ini /app/alembic.ini
|
||||||
|
|
@ -42,7 +42,7 @@ COPY alembic/ /app/alembic
|
||||||
COPY ./cognee /app/cognee
|
COPY ./cognee /app/cognee
|
||||||
COPY ./distributed /app/distributed
|
COPY ./distributed /app/distributed
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv sync --extra debug --extra api --extra postgres --extra qdrant --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
|
uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
|
||||||
|
|
||||||
FROM python:3.12-slim-bookworm
|
FROM python:3.12-slim-bookworm
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -130,7 +130,7 @@ async def add(
|
||||||
- LLM_MODEL: Model name (default: "gpt-4o-mini")
|
- LLM_MODEL: Model name (default: "gpt-4o-mini")
|
||||||
- DEFAULT_USER_EMAIL: Custom default user email
|
- DEFAULT_USER_EMAIL: Custom default user email
|
||||||
- DEFAULT_USER_PASSWORD: Custom default user password
|
- DEFAULT_USER_PASSWORD: Custom default user password
|
||||||
- VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "qdrant", "weaviate"
|
- VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
|
||||||
- GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx"
|
- GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx"
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
|
|
|
||||||
|
|
@ -30,8 +30,6 @@ class VectorDBConfigInputDTO(InDTO):
|
||||||
provider: Union[
|
provider: Union[
|
||||||
Literal["lancedb"],
|
Literal["lancedb"],
|
||||||
Literal["chromadb"],
|
Literal["chromadb"],
|
||||||
Literal["qdrant"],
|
|
||||||
Literal["weaviate"],
|
|
||||||
Literal["pgvector"],
|
Literal["pgvector"],
|
||||||
]
|
]
|
||||||
url: str
|
url: str
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import json
|
import json
|
||||||
|
import asyncio
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
from chromadb import AsyncHttpClient, Settings
|
from chromadb import AsyncHttpClient, Settings
|
||||||
|
|
@ -161,6 +162,7 @@ class ChromaDBAdapter(VectorDBInterface):
|
||||||
self.embedding_engine = embedding_engine
|
self.embedding_engine = embedding_engine
|
||||||
self.url = url
|
self.url = url
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
|
self.VECTOR_DB_LOCK = asyncio.Lock()
|
||||||
|
|
||||||
async def get_connection(self) -> AsyncHttpClient:
|
async def get_connection(self) -> AsyncHttpClient:
|
||||||
"""
|
"""
|
||||||
|
|
@ -224,10 +226,13 @@ class ChromaDBAdapter(VectorDBInterface):
|
||||||
- collection_name (str): The name of the collection to create.
|
- collection_name (str): The name of the collection to create.
|
||||||
- payload_schema: The schema for the payload; can be None. (default None)
|
- payload_schema: The schema for the payload; can be None. (default None)
|
||||||
"""
|
"""
|
||||||
client = await self.get_connection()
|
async with self.VECTOR_DB_LOCK:
|
||||||
|
client = await self.get_connection()
|
||||||
|
|
||||||
if not await self.has_collection(collection_name):
|
if not await self.has_collection(collection_name):
|
||||||
await client.create_collection(name=collection_name, metadata={"hnsw:space": "cosine"})
|
await client.create_collection(
|
||||||
|
name=collection_name, metadata={"hnsw:space": "cosine"}
|
||||||
|
)
|
||||||
|
|
||||||
async def get_collection(self, collection_name: str) -> AsyncHttpClient:
|
async def get_collection(self, collection_name: str) -> AsyncHttpClient:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ def create_vector_engine(
|
||||||
for each provider, raising an EnvironmentError if any are missing, or ImportError if the
|
for each provider, raising an EnvironmentError if any are missing, or ImportError if the
|
||||||
ChromaDB package is not installed.
|
ChromaDB package is not installed.
|
||||||
|
|
||||||
Supported providers include: Qdrant, pgvector, FalkorDB, ChromaDB, and
|
Supported providers include: pgvector, FalkorDB, ChromaDB, and
|
||||||
LanceDB.
|
LanceDB.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
|
|
@ -30,7 +30,7 @@ def create_vector_engine(
|
||||||
providers.
|
providers.
|
||||||
- vector_db_key (str): The API key or access token for the vector database instance.
|
- vector_db_key (str): The API key or access token for the vector database instance.
|
||||||
- vector_db_provider (str): The name of the vector database provider to use (e.g.,
|
- vector_db_provider (str): The name of the vector database provider to use (e.g.,
|
||||||
'qdrant', 'pgvector').
|
'pgvector').
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
--------
|
--------
|
||||||
|
|
@ -48,19 +48,7 @@ def create_vector_engine(
|
||||||
embedding_engine=embedding_engine,
|
embedding_engine=embedding_engine,
|
||||||
)
|
)
|
||||||
|
|
||||||
if vector_db_provider == "qdrant":
|
if vector_db_provider == "pgvector":
|
||||||
if not (vector_db_url and vector_db_key):
|
|
||||||
raise EnvironmentError("Missing requred Qdrant credentials!")
|
|
||||||
|
|
||||||
from .qdrant.QDrantAdapter import QDrantAdapter
|
|
||||||
|
|
||||||
return QDrantAdapter(
|
|
||||||
url=vector_db_url,
|
|
||||||
api_key=vector_db_key,
|
|
||||||
embedding_engine=embedding_engine,
|
|
||||||
)
|
|
||||||
|
|
||||||
elif vector_db_provider == "pgvector":
|
|
||||||
from cognee.infrastructure.databases.relational import get_relational_config
|
from cognee.infrastructure.databases.relational import get_relational_config
|
||||||
|
|
||||||
# Get configuration for postgres database
|
# Get configuration for postgres database
|
||||||
|
|
|
||||||
|
|
@ -1,513 +0,0 @@
|
||||||
import os
|
|
||||||
from typing import Dict, List, Optional
|
|
||||||
from qdrant_client import AsyncQdrantClient, models
|
|
||||||
|
|
||||||
from cognee.shared.logging_utils import get_logger
|
|
||||||
from cognee.infrastructure.engine.utils import parse_id
|
|
||||||
from cognee.exceptions import InvalidValueError
|
|
||||||
from cognee.infrastructure.engine import DataPoint
|
|
||||||
from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
|
|
||||||
from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
|
|
||||||
|
|
||||||
from ..embeddings.EmbeddingEngine import EmbeddingEngine
|
|
||||||
from ..vector_db_interface import VectorDBInterface
|
|
||||||
|
|
||||||
logger = get_logger("QDrantAdapter")
|
|
||||||
|
|
||||||
|
|
||||||
class IndexSchema(DataPoint):
|
|
||||||
"""
|
|
||||||
Represents a schema for indexing where each data point contains a text field.
|
|
||||||
|
|
||||||
This class inherits from DataPoint and defines a text attribute as well as metadata
|
|
||||||
containing index fields used for indexing operations.
|
|
||||||
"""
|
|
||||||
|
|
||||||
text: str
|
|
||||||
|
|
||||||
metadata: dict = {"index_fields": ["text"]}
|
|
||||||
|
|
||||||
|
|
||||||
# class CollectionConfig(BaseModel, extra = "forbid"):
|
|
||||||
# vector_config: Dict[str, models.VectorParams] = Field(..., description="Vectors configuration" )
|
|
||||||
# hnsw_config: Optional[models.HnswConfig] = Field(default = None, description="HNSW vector index configuration")
|
|
||||||
# optimizers_config: Optional[models.OptimizersConfig] = Field(default = None, description="Optimizers configuration")
|
|
||||||
# quantization_config: Optional[models.QuantizationConfig] = Field(default = None, description="Quantization configuration")
|
|
||||||
|
|
||||||
|
|
||||||
def create_hnsw_config(hnsw_config: Dict):
|
|
||||||
"""
|
|
||||||
Create HNSW configuration.
|
|
||||||
|
|
||||||
This function returns an HNSW configuration object if the provided configuration is not
|
|
||||||
None, otherwise it returns None.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- hnsw_config (Dict): A dictionary containing HNSW configuration parameters.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
An instance of models.HnswConfig if hnsw_config is not None, otherwise None.
|
|
||||||
"""
|
|
||||||
if hnsw_config is not None:
|
|
||||||
return models.HnswConfig()
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def create_optimizers_config(optimizers_config: Dict):
|
|
||||||
"""
|
|
||||||
Create and return an OptimizersConfig instance if the input configuration is provided.
|
|
||||||
|
|
||||||
This function checks if the given optimizers configuration is not None. If valid, it
|
|
||||||
initializes and returns a new instance of the OptimizersConfig class from the models
|
|
||||||
module. If the configuration is None, it returns None instead.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- optimizers_config (Dict): A dictionary containing optimizer configuration
|
|
||||||
settings.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
Returns an instance of OptimizersConfig if optimizers_config is provided; otherwise,
|
|
||||||
returns None.
|
|
||||||
"""
|
|
||||||
if optimizers_config is not None:
|
|
||||||
return models.OptimizersConfig()
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def create_quantization_config(quantization_config: Dict):
|
|
||||||
"""
|
|
||||||
Create a quantization configuration based on the provided settings.
|
|
||||||
|
|
||||||
This function generates an instance of `QuantizationConfig` if the provided
|
|
||||||
`quantization_config` is not None. If it is None, the function returns None.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- quantization_config (Dict): A dictionary containing the quantization configuration
|
|
||||||
settings.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
An instance of `QuantizationConfig` if `quantization_config` is provided; otherwise,
|
|
||||||
returns None.
|
|
||||||
"""
|
|
||||||
if quantization_config is not None:
|
|
||||||
return models.QuantizationConfig()
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
class QDrantAdapter(VectorDBInterface):
|
|
||||||
"""
|
|
||||||
Adapt to the Qdrant vector database interface.
|
|
||||||
|
|
||||||
Public methods:
|
|
||||||
- get_qdrant_client
|
|
||||||
- embed_data
|
|
||||||
- has_collection
|
|
||||||
- create_collection
|
|
||||||
- create_data_points
|
|
||||||
- create_vector_index
|
|
||||||
- index_data_points
|
|
||||||
- retrieve
|
|
||||||
- search
|
|
||||||
- batch_search
|
|
||||||
- delete_data_points
|
|
||||||
- prune
|
|
||||||
"""
|
|
||||||
|
|
||||||
name = "Qdrant"
|
|
||||||
url: str = None
|
|
||||||
api_key: str = None
|
|
||||||
qdrant_path: str = None
|
|
||||||
|
|
||||||
def __init__(self, url, api_key, embedding_engine: EmbeddingEngine, qdrant_path=None):
|
|
||||||
self.embedding_engine = embedding_engine
|
|
||||||
if qdrant_path is not None:
|
|
||||||
self.qdrant_path = qdrant_path
|
|
||||||
else:
|
|
||||||
self.url = url
|
|
||||||
self.api_key = api_key
|
|
||||||
|
|
||||||
def get_qdrant_client(self) -> AsyncQdrantClient:
|
|
||||||
"""
|
|
||||||
Retrieve an instance of AsyncQdrantClient configured with the appropriate
|
|
||||||
settings based on the instance's attributes.
|
|
||||||
|
|
||||||
Returns an instance of AsyncQdrantClient configured to connect to the database.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
- AsyncQdrantClient: An instance of AsyncQdrantClient configured for database
|
|
||||||
operations.
|
|
||||||
"""
|
|
||||||
is_prod = os.getenv("ENV").lower() == "prod"
|
|
||||||
|
|
||||||
if self.qdrant_path is not None:
|
|
||||||
return AsyncQdrantClient(path=self.qdrant_path, port=6333, https=is_prod)
|
|
||||||
elif self.url is not None:
|
|
||||||
return AsyncQdrantClient(url=self.url, api_key=self.api_key, port=6333, https=is_prod)
|
|
||||||
|
|
||||||
return AsyncQdrantClient(location=":memory:")
|
|
||||||
|
|
||||||
async def embed_data(self, data: List[str]) -> List[float]:
|
|
||||||
"""
|
|
||||||
Embed a list of text data into vector representations asynchronously.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- data (List[str]): A list of strings containing the text data to be embedded.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- List[float]: A list of floating-point vectors representing the embedded text data.
|
|
||||||
"""
|
|
||||||
return await self.embedding_engine.embed_text(data)
|
|
||||||
|
|
||||||
async def has_collection(self, collection_name: str) -> bool:
|
|
||||||
"""
|
|
||||||
Check if a specified collection exists in the Qdrant database asynchronously.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the collection to check for existence.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- bool: True if the specified collection exists, False otherwise.
|
|
||||||
"""
|
|
||||||
client = self.get_qdrant_client()
|
|
||||||
result = await client.collection_exists(collection_name)
|
|
||||||
await client.close()
|
|
||||||
return result
|
|
||||||
|
|
||||||
async def create_collection(
|
|
||||||
self,
|
|
||||||
collection_name: str,
|
|
||||||
payload_schema=None,
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Create a new collection in the Qdrant database if it does not already exist.
|
|
||||||
|
|
||||||
If the collection already exists, this operation has no effect.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the collection to create.
|
|
||||||
- payload_schema: Optional schema for the payload. Defaults to None. (default None)
|
|
||||||
"""
|
|
||||||
client = self.get_qdrant_client()
|
|
||||||
|
|
||||||
if not await client.collection_exists(collection_name):
|
|
||||||
await client.create_collection(
|
|
||||||
collection_name=collection_name,
|
|
||||||
vectors_config={
|
|
||||||
"text": models.VectorParams(
|
|
||||||
size=self.embedding_engine.get_vector_size(), distance="Cosine"
|
|
||||||
)
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
await client.close()
|
|
||||||
|
|
||||||
async def create_data_points(self, collection_name: str, data_points: List[DataPoint]):
|
|
||||||
"""
|
|
||||||
Create and upload data points to a specified collection in the database.
|
|
||||||
|
|
||||||
Raises CollectionNotFoundError if the collection does not exist.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the collection to which data points will be
|
|
||||||
uploaded.
|
|
||||||
- data_points (List[DataPoint]): A list of DataPoint objects to be uploaded.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
None if the operation is successful; raises exceptions on error.
|
|
||||||
"""
|
|
||||||
from qdrant_client.http.exceptions import UnexpectedResponse
|
|
||||||
|
|
||||||
client = self.get_qdrant_client()
|
|
||||||
|
|
||||||
data_vectors = await self.embed_data(
|
|
||||||
[DataPoint.get_embeddable_data(data_point) for data_point in data_points]
|
|
||||||
)
|
|
||||||
|
|
||||||
def convert_to_qdrant_point(data_point: DataPoint):
|
|
||||||
"""
|
|
||||||
Convert a DataPoint object into the format expected by Qdrant for upload.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- data_point (DataPoint): The DataPoint object to convert.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
None; performs an operation without returning a value.
|
|
||||||
"""
|
|
||||||
return models.PointStruct(
|
|
||||||
id=str(data_point.id),
|
|
||||||
payload=data_point.model_dump(),
|
|
||||||
vector={"text": data_vectors[data_points.index(data_point)]},
|
|
||||||
)
|
|
||||||
|
|
||||||
points = [convert_to_qdrant_point(point) for point in data_points]
|
|
||||||
|
|
||||||
try:
|
|
||||||
client.upload_points(collection_name=collection_name, points=points)
|
|
||||||
except UnexpectedResponse as error:
|
|
||||||
if "Collection not found" in str(error):
|
|
||||||
raise CollectionNotFoundError(
|
|
||||||
message=f"Collection {collection_name} not found!"
|
|
||||||
) from error
|
|
||||||
else:
|
|
||||||
raise error
|
|
||||||
except Exception as error:
|
|
||||||
logger.error("Error uploading data points to Qdrant: %s", str(error))
|
|
||||||
raise error
|
|
||||||
finally:
|
|
||||||
await client.close()
|
|
||||||
|
|
||||||
async def create_vector_index(self, index_name: str, index_property_name: str):
|
|
||||||
"""
|
|
||||||
Create a vector index for a specified property name.
|
|
||||||
|
|
||||||
This is essentially a wrapper around create_collection, which allows for more
|
|
||||||
flexibility
|
|
||||||
in index naming.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- index_name (str): The base name for the index to be created.
|
|
||||||
- index_property_name (str): The property name that will be part of the index name.
|
|
||||||
"""
|
|
||||||
await self.create_collection(f"{index_name}_{index_property_name}")
|
|
||||||
|
|
||||||
async def index_data_points(
|
|
||||||
self, index_name: str, index_property_name: str, data_points: list[DataPoint]
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Index data points into a specific collection based on provided metadata.
|
|
||||||
|
|
||||||
Transforms DataPoint objects into an appropriate format and uploads them.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- index_name (str): The base name for the index used for naming the collection.
|
|
||||||
- index_property_name (str): The property name used for naming the collection.
|
|
||||||
- data_points (list[DataPoint]): A list of DataPoint objects to index.
|
|
||||||
"""
|
|
||||||
await self.create_data_points(
|
|
||||||
f"{index_name}_{index_property_name}",
|
|
||||||
[
|
|
||||||
IndexSchema(
|
|
||||||
id=data_point.id,
|
|
||||||
text=getattr(data_point, data_point.metadata["index_fields"][0]),
|
|
||||||
)
|
|
||||||
for data_point in data_points
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
async def retrieve(self, collection_name: str, data_point_ids: list[str]):
|
|
||||||
"""
|
|
||||||
Retrieve data points from a specified collection based on their IDs.
|
|
||||||
|
|
||||||
Returns the data corresponding to the provided IDs from the collection.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the collection to retrieve from.
|
|
||||||
- data_point_ids (list[str]): A list of IDs of the data points to retrieve.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
The retrieved data points, including payloads for each ID.
|
|
||||||
"""
|
|
||||||
client = self.get_qdrant_client()
|
|
||||||
results = await client.retrieve(collection_name, data_point_ids, with_payload=True)
|
|
||||||
await client.close()
|
|
||||||
return results
|
|
||||||
|
|
||||||
async def search(
|
|
||||||
self,
|
|
||||||
collection_name: str,
|
|
||||||
query_text: Optional[str] = None,
|
|
||||||
query_vector: Optional[List[float]] = None,
|
|
||||||
limit: int = 15,
|
|
||||||
with_vector: bool = False,
|
|
||||||
) -> List[ScoredResult]:
|
|
||||||
"""
|
|
||||||
Search for data points in a collection based on either a textual query or a vector
|
|
||||||
query.
|
|
||||||
|
|
||||||
Raises InvalidValueError if both query_text and query_vector are None.
|
|
||||||
|
|
||||||
Returns a list of scored results that match the search criteria.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the collection to search within.
|
|
||||||
- query_text (Optional[str]): The text to be used in the search query; optional if
|
|
||||||
query_vector is provided. (default None)
|
|
||||||
- query_vector (Optional[List[float]]): The vector to be used in the search query;
|
|
||||||
optional if query_text is provided. (default None)
|
|
||||||
- limit (int): The maximum number of results to return; defaults to 15. (default 15)
|
|
||||||
- with_vector (bool): Indicates whether to return vector data along with results;
|
|
||||||
defaults to False. (default False)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- List[ScoredResult]: A list of ScoredResult objects representing the results of the
|
|
||||||
search.
|
|
||||||
"""
|
|
||||||
from qdrant_client.http.exceptions import UnexpectedResponse
|
|
||||||
|
|
||||||
if query_text is None and query_vector is None:
|
|
||||||
raise InvalidValueError(message="One of query_text or query_vector must be provided!")
|
|
||||||
|
|
||||||
if not await self.has_collection(collection_name):
|
|
||||||
return []
|
|
||||||
|
|
||||||
if query_vector is None:
|
|
||||||
query_vector = (await self.embed_data([query_text]))[0]
|
|
||||||
|
|
||||||
try:
|
|
||||||
client = self.get_qdrant_client()
|
|
||||||
if limit == 0:
|
|
||||||
collection_size = await client.count(collection_name=collection_name)
|
|
||||||
|
|
||||||
results = await client.search(
|
|
||||||
collection_name=collection_name,
|
|
||||||
query_vector=models.NamedVector(
|
|
||||||
name="text",
|
|
||||||
vector=query_vector
|
|
||||||
if query_vector is not None
|
|
||||||
else (await self.embed_data([query_text]))[0],
|
|
||||||
),
|
|
||||||
limit=limit if limit > 0 else collection_size.count,
|
|
||||||
with_vectors=with_vector,
|
|
||||||
)
|
|
||||||
|
|
||||||
await client.close()
|
|
||||||
|
|
||||||
return [
|
|
||||||
ScoredResult(
|
|
||||||
id=parse_id(result.id),
|
|
||||||
payload={
|
|
||||||
**result.payload,
|
|
||||||
"id": parse_id(result.id),
|
|
||||||
},
|
|
||||||
score=1 - result.score,
|
|
||||||
)
|
|
||||||
for result in results
|
|
||||||
]
|
|
||||||
finally:
|
|
||||||
await client.close()
|
|
||||||
|
|
||||||
async def batch_search(
|
|
||||||
self,
|
|
||||||
collection_name: str,
|
|
||||||
query_texts: List[str],
|
|
||||||
limit: int = None,
|
|
||||||
with_vectors: bool = False,
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Perform a batch search in a specified collection using multiple query texts.
|
|
||||||
|
|
||||||
Returns the results of the search for each query, filtering for results with a score
|
|
||||||
higher than 0.9.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the collection to search in.
|
|
||||||
- query_texts (List[str]): A list of query texts to search for in the collection.
|
|
||||||
- limit (int): The maximum number of results to return for each search request; can
|
|
||||||
be None. (default None)
|
|
||||||
- with_vectors (bool): Indicates whether to include vector data in the results;
|
|
||||||
defaults to False. (default False)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
A list containing the filtered search results for each query text.
|
|
||||||
"""
|
|
||||||
|
|
||||||
vectors = await self.embed_data(query_texts)
|
|
||||||
|
|
||||||
# Generate dynamic search requests based on the provided embeddings
|
|
||||||
requests = [
|
|
||||||
models.SearchRequest(
|
|
||||||
vector=models.NamedVector(name="text", vector=vector),
|
|
||||||
limit=limit,
|
|
||||||
with_vector=with_vectors,
|
|
||||||
)
|
|
||||||
for vector in vectors
|
|
||||||
]
|
|
||||||
|
|
||||||
client = self.get_qdrant_client()
|
|
||||||
|
|
||||||
# Perform batch search with the dynamically generated requests
|
|
||||||
results = await client.search_batch(collection_name=collection_name, requests=requests)
|
|
||||||
|
|
||||||
await client.close()
|
|
||||||
|
|
||||||
return [filter(lambda result: result.score > 0.9, result_group) for result_group in results]
|
|
||||||
|
|
||||||
async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
|
|
||||||
"""
|
|
||||||
Delete specific data points from a specified collection based on their IDs.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the collection from which to delete the data
|
|
||||||
points.
|
|
||||||
- data_point_ids (list[str]): The list of IDs of data points to be deleted.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
The result of the delete operation from the database.
|
|
||||||
"""
|
|
||||||
client = self.get_qdrant_client()
|
|
||||||
results = await client.delete(collection_name, data_point_ids)
|
|
||||||
return results
|
|
||||||
|
|
||||||
async def prune(self):
|
|
||||||
"""
|
|
||||||
Remove all collections from the Qdrant database asynchronously.
|
|
||||||
"""
|
|
||||||
client = self.get_qdrant_client()
|
|
||||||
|
|
||||||
response = await client.get_collections()
|
|
||||||
|
|
||||||
for collection in response.collections:
|
|
||||||
await client.delete_collection(collection.name)
|
|
||||||
|
|
||||||
await client.close()
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
from .QDrantAdapter import QDrantAdapter
|
|
||||||
from ..models.CollectionConfig import CollectionConfig
|
|
||||||
|
|
@ -1,527 +0,0 @@
|
||||||
from typing import List, Optional
|
|
||||||
|
|
||||||
from tenacity import retry, retry_if_exception, stop_after_attempt, wait_exponential
|
|
||||||
|
|
||||||
from cognee.shared.logging_utils import get_logger
|
|
||||||
from cognee.exceptions import InvalidValueError
|
|
||||||
from cognee.infrastructure.engine import DataPoint
|
|
||||||
from cognee.infrastructure.engine.utils import parse_id
|
|
||||||
from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
|
|
||||||
|
|
||||||
from distributed.utils import override_distributed
|
|
||||||
from distributed.tasks.queued_add_data_points import queued_add_data_points
|
|
||||||
|
|
||||||
from ..embeddings.EmbeddingEngine import EmbeddingEngine
|
|
||||||
from ..models.ScoredResult import ScoredResult
|
|
||||||
from ..vector_db_interface import VectorDBInterface
|
|
||||||
|
|
||||||
logger = get_logger("WeaviateAdapter")
|
|
||||||
|
|
||||||
|
|
||||||
def is_retryable_request(error):
|
|
||||||
from weaviate.exceptions import UnexpectedStatusCodeException
|
|
||||||
from requests.exceptions import RequestException
|
|
||||||
|
|
||||||
if isinstance(error, UnexpectedStatusCodeException):
|
|
||||||
# Retry on conflict, service unavailable, internal error
|
|
||||||
return error.status_code in {409, 503, 500}
|
|
||||||
if isinstance(error, RequestException):
|
|
||||||
return True # Includes timeout, connection error, etc.
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class IndexSchema(DataPoint):
|
|
||||||
"""
|
|
||||||
Define a schema for indexing data points with textual content.
|
|
||||||
|
|
||||||
The IndexSchema class inherits from DataPoint and includes the following public
|
|
||||||
attributes:
|
|
||||||
|
|
||||||
- text: A string representing the main content of the data point.
|
|
||||||
- metadata: A dictionary containing indexing information, specifically the fields to be
|
|
||||||
indexed (in this case, the 'text' field).
|
|
||||||
"""
|
|
||||||
|
|
||||||
text: str
|
|
||||||
|
|
||||||
metadata: dict = {"index_fields": ["text"]}
|
|
||||||
|
|
||||||
|
|
||||||
class WeaviateAdapter(VectorDBInterface):
|
|
||||||
"""
|
|
||||||
Adapt the Weaviate vector database to an interface for managing collections and data
|
|
||||||
points.
|
|
||||||
|
|
||||||
Public methods:
|
|
||||||
- get_client
|
|
||||||
- embed_data
|
|
||||||
- has_collection
|
|
||||||
- create_collection
|
|
||||||
- get_collection
|
|
||||||
- create_data_points
|
|
||||||
- create_vector_index
|
|
||||||
- index_data_points
|
|
||||||
- retrieve
|
|
||||||
- search
|
|
||||||
- batch_search
|
|
||||||
- delete_data_points
|
|
||||||
- prune
|
|
||||||
"""
|
|
||||||
|
|
||||||
name = "Weaviate"
|
|
||||||
url: str
|
|
||||||
api_key: str
|
|
||||||
embedding_engine: EmbeddingEngine = None
|
|
||||||
|
|
||||||
def __init__(self, url: str, api_key: str, embedding_engine: EmbeddingEngine):
|
|
||||||
import weaviate
|
|
||||||
import weaviate.classes as wvc
|
|
||||||
|
|
||||||
self.url = url
|
|
||||||
self.api_key = api_key
|
|
||||||
|
|
||||||
self.embedding_engine = embedding_engine
|
|
||||||
|
|
||||||
self.client = weaviate.use_async_with_weaviate_cloud(
|
|
||||||
cluster_url=url,
|
|
||||||
auth_credentials=weaviate.auth.AuthApiKey(api_key),
|
|
||||||
additional_config=wvc.init.AdditionalConfig(timeout=wvc.init.Timeout(init=30)),
|
|
||||||
)
|
|
||||||
|
|
||||||
async def get_client(self):
|
|
||||||
"""
|
|
||||||
Establish a connection to the Weaviate client.
|
|
||||||
|
|
||||||
Return the Weaviate client instance after connecting asynchronously.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
The Weaviate client instance.
|
|
||||||
"""
|
|
||||||
await self.client.connect()
|
|
||||||
|
|
||||||
return self.client
|
|
||||||
|
|
||||||
async def embed_data(self, data: List[str]) -> List[float]:
|
|
||||||
"""
|
|
||||||
Embed the given text data into vector representations.
|
|
||||||
|
|
||||||
Given a list of strings, return their vector embeddings using the configured embedding
|
|
||||||
engine.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- data (List[str]): A list of strings to be embedded.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- List[float]: A list of float vectors corresponding to the embedded text data.
|
|
||||||
"""
|
|
||||||
return await self.embedding_engine.embed_text(data)
|
|
||||||
|
|
||||||
async def has_collection(self, collection_name: str) -> bool:
|
|
||||||
"""
|
|
||||||
Check if a collection exists in the Weaviate database.
|
|
||||||
|
|
||||||
Return a boolean indicating the presence of the specified collection.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the collection to check.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- bool: True if the collection exists, otherwise False.
|
|
||||||
"""
|
|
||||||
client = await self.get_client()
|
|
||||||
return await client.collections.exists(collection_name)
|
|
||||||
|
|
||||||
@retry(
|
|
||||||
retry=retry_if_exception(is_retryable_request),
|
|
||||||
stop=stop_after_attempt(3),
|
|
||||||
wait=wait_exponential(multiplier=2, min=1, max=6),
|
|
||||||
)
|
|
||||||
async def create_collection(
|
|
||||||
self,
|
|
||||||
collection_name: str,
|
|
||||||
payload_schema=None,
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Create a new collection in the Weaviate database if it does not already exist.
|
|
||||||
|
|
||||||
The collection will be initialized with a default schema.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the new collection to be created.
|
|
||||||
- payload_schema: Optional schema definition for the collection payload. (default
|
|
||||||
None)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
The created collection's configuration, if a new collection was made, otherwise
|
|
||||||
information about the existing collection.
|
|
||||||
"""
|
|
||||||
import weaviate.classes.config as wvcc
|
|
||||||
|
|
||||||
if not await self.has_collection(collection_name):
|
|
||||||
client = await self.get_client()
|
|
||||||
return await client.collections.create(
|
|
||||||
name=collection_name,
|
|
||||||
properties=[
|
|
||||||
wvcc.Property(
|
|
||||||
name="text", data_type=wvcc.DataType.TEXT, skip_vectorization=True
|
|
||||||
)
|
|
||||||
],
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return await self.get_collection(collection_name)
|
|
||||||
|
|
||||||
async def get_collection(self, collection_name: str):
|
|
||||||
"""
|
|
||||||
Retrieve a collection from the Weaviate database by its name.
|
|
||||||
|
|
||||||
Raise a CollectionNotFoundError if the specified collection does not exist.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the collection to be retrieved.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
The requested collection object from the database.
|
|
||||||
"""
|
|
||||||
if not await self.has_collection(collection_name):
|
|
||||||
raise CollectionNotFoundError(f"Collection '{collection_name}' not found.")
|
|
||||||
|
|
||||||
client = await self.get_client()
|
|
||||||
return client.collections.get(collection_name)
|
|
||||||
|
|
||||||
@retry(
|
|
||||||
retry=retry_if_exception(is_retryable_request),
|
|
||||||
stop=stop_after_attempt(3),
|
|
||||||
wait=wait_exponential(multiplier=2, min=1, max=6),
|
|
||||||
)
|
|
||||||
@override_distributed(queued_add_data_points)
|
|
||||||
async def create_data_points(self, collection_name: str, data_points: List[DataPoint]):
|
|
||||||
"""
|
|
||||||
Create or update data points in the specified collection in the Weaviate database.
|
|
||||||
|
|
||||||
Process the list of data points, embedding them and either inserting them or updating if
|
|
||||||
they already exist.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the collection to add data points to.
|
|
||||||
- data_points (List[DataPoint]): A list of DataPoint objects to be created or
|
|
||||||
updated in the collection.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
Information about the inserted or updated data points in the collection.
|
|
||||||
"""
|
|
||||||
from weaviate.classes.data import DataObject
|
|
||||||
|
|
||||||
data_vectors = await self.embed_data(
|
|
||||||
[DataPoint.get_embeddable_data(data_point) for data_point in data_points]
|
|
||||||
)
|
|
||||||
|
|
||||||
def convert_to_weaviate_data_points(data_point: DataPoint):
|
|
||||||
"""
|
|
||||||
Transform a DataPoint object into a Weaviate DataObject format for insertion.
|
|
||||||
|
|
||||||
Return a DataObject ready for use in Weaviate with the properties and vector included.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- data_point (DataPoint): The DataPoint to convert into the Weaviate DataObject
|
|
||||||
format.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
The corresponding Weaviate DataObject representing the data point.
|
|
||||||
"""
|
|
||||||
vector = data_vectors[data_points.index(data_point)]
|
|
||||||
properties = data_point.model_dump()
|
|
||||||
|
|
||||||
if "id" in properties:
|
|
||||||
properties["uuid"] = str(data_point.id)
|
|
||||||
del properties["id"]
|
|
||||||
|
|
||||||
return DataObject(uuid=data_point.id, properties=properties, vector=vector)
|
|
||||||
|
|
||||||
data_points = [convert_to_weaviate_data_points(data_point) for data_point in data_points]
|
|
||||||
|
|
||||||
collection = await self.get_collection(collection_name)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if len(data_points) > 1:
|
|
||||||
return await collection.data.insert_many(data_points)
|
|
||||||
# with collection.batch.dynamic() as batch:
|
|
||||||
# for data_point in data_points:
|
|
||||||
# batch.add_object(
|
|
||||||
# uuid=data_point.uuid,
|
|
||||||
# vector=data_point.vector,
|
|
||||||
# properties=data_point.properties,
|
|
||||||
# references=data_point.references,
|
|
||||||
# )
|
|
||||||
else:
|
|
||||||
data_point: DataObject = data_points[0]
|
|
||||||
if await collection.data.exists(data_point.uuid):
|
|
||||||
return await collection.data.update(
|
|
||||||
uuid=data_point.uuid,
|
|
||||||
vector=data_point.vector,
|
|
||||||
properties=data_point.properties,
|
|
||||||
references=data_point.references,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return await collection.data.insert(
|
|
||||||
uuid=data_point.uuid,
|
|
||||||
vector=data_point.vector,
|
|
||||||
properties=data_point.properties,
|
|
||||||
references=data_point.references,
|
|
||||||
)
|
|
||||||
except Exception as error:
|
|
||||||
logger.error("Error creating data points: %s", str(error))
|
|
||||||
raise error
|
|
||||||
|
|
||||||
async def create_vector_index(self, index_name: str, index_property_name: str):
|
|
||||||
"""
|
|
||||||
Create a vector index based on an index name and property name by creating a
|
|
||||||
corresponding collection.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- index_name (str): The name for the vector index.
|
|
||||||
- index_property_name (str): The property name associated with the vector index.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
The created collection representing the vector index.
|
|
||||||
"""
|
|
||||||
return await self.create_collection(f"{index_name}_{index_property_name}")
|
|
||||||
|
|
||||||
async def index_data_points(
|
|
||||||
self, index_name: str, index_property_name: str, data_points: list[DataPoint]
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Index a list of data points by creating an associated vector index collection.
|
|
||||||
|
|
||||||
Data points are transformed into embeddable data before being processed for indexing.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- index_name (str): The index name under which to store the data points.
|
|
||||||
- index_property_name (str): The associated property name for the index.
|
|
||||||
- data_points (list[DataPoint]): A list of DataPoint objects to be indexed.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
Information about the operation of indexing the data points.
|
|
||||||
"""
|
|
||||||
return await self.create_data_points(
|
|
||||||
f"{index_name}_{index_property_name}",
|
|
||||||
[
|
|
||||||
IndexSchema(
|
|
||||||
id=data_point.id,
|
|
||||||
text=DataPoint.get_embeddable_data(data_point),
|
|
||||||
)
|
|
||||||
for data_point in data_points
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
async def retrieve(self, collection_name: str, data_point_ids: list[str]):
|
|
||||||
"""
|
|
||||||
Fetch data points from a specified collection based on their IDs.
|
|
||||||
|
|
||||||
Return data points wrapped in an object containing their properties after
|
|
||||||
transformation.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the collection to retrieve data points from.
|
|
||||||
- data_point_ids (list[str]): A list of IDs for the data points to retrieve.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
A list of objects representing the retrieved data points.
|
|
||||||
"""
|
|
||||||
from weaviate.classes.query import Filter
|
|
||||||
|
|
||||||
collection = await self.get_collection(collection_name)
|
|
||||||
data_points = await collection.query.fetch_objects(
|
|
||||||
filters=Filter.by_id().contains_any(data_point_ids)
|
|
||||||
)
|
|
||||||
|
|
||||||
for data_point in data_points.objects:
|
|
||||||
data_point.payload = data_point.properties
|
|
||||||
data_point.id = data_point.uuid
|
|
||||||
del data_point.properties
|
|
||||||
|
|
||||||
return data_points.objects
|
|
||||||
|
|
||||||
async def search(
|
|
||||||
self,
|
|
||||||
collection_name: str,
|
|
||||||
query_text: Optional[str] = None,
|
|
||||||
query_vector: Optional[List[float]] = None,
|
|
||||||
limit: int = 15,
|
|
||||||
with_vector: bool = False,
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Perform a search on a collection using either a text query or a vector query.
|
|
||||||
|
|
||||||
Return scored results based on the search criteria provided. Raise InvalidValueError if
|
|
||||||
no query is provided.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the collection to search within.
|
|
||||||
- query_text (Optional[str]): Optional plain text query for searching. (default
|
|
||||||
None)
|
|
||||||
- query_vector (Optional[List[float]]): Optional vector representation for
|
|
||||||
searching. (default None)
|
|
||||||
- limit (int): The maximum number of results to return. (default 15)
|
|
||||||
- with_vector (bool): Include vector information in the results. (default False)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
A list of scored results matching the search criteria.
|
|
||||||
"""
|
|
||||||
import weaviate.classes as wvc
|
|
||||||
import weaviate.exceptions
|
|
||||||
|
|
||||||
if query_text is None and query_vector is None:
|
|
||||||
raise InvalidValueError(message="One of query_text or query_vector must be provided!")
|
|
||||||
|
|
||||||
if query_vector is None:
|
|
||||||
query_vector = (await self.embed_data([query_text]))[0]
|
|
||||||
|
|
||||||
collection = await self.get_collection(collection_name)
|
|
||||||
|
|
||||||
try:
|
|
||||||
search_result = await collection.query.hybrid(
|
|
||||||
query=None,
|
|
||||||
vector=query_vector,
|
|
||||||
limit=limit if limit > 0 else None,
|
|
||||||
include_vector=with_vector,
|
|
||||||
return_metadata=wvc.query.MetadataQuery(score=True),
|
|
||||||
)
|
|
||||||
|
|
||||||
return [
|
|
||||||
ScoredResult(
|
|
||||||
id=parse_id(str(result.uuid)),
|
|
||||||
payload=result.properties,
|
|
||||||
score=1 - float(result.metadata.score),
|
|
||||||
)
|
|
||||||
for result in search_result.objects
|
|
||||||
]
|
|
||||||
except weaviate.exceptions.WeaviateInvalidInputError:
|
|
||||||
# Ignore if the collection doesn't exist
|
|
||||||
return []
|
|
||||||
|
|
||||||
async def batch_search(
|
|
||||||
self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Execute a batch search for multiple query texts in the specified collection.
|
|
||||||
|
|
||||||
Return a list of results for each query performed in parallel.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the collection to search within.
|
|
||||||
- query_texts (List[str]): A list of text queries to be processed in a batch.
|
|
||||||
- limit (int): The maximum number of results to return for each query.
|
|
||||||
- with_vectors (bool): Indicate whether to include vector information in the
|
|
||||||
results. (default False)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
A list containing results for each search query executed.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def query_search(query_vector):
|
|
||||||
"""
|
|
||||||
Wrap the search operation based on a query vector for fetching results.
|
|
||||||
|
|
||||||
This function coordinates the search call, ensuring the collection name and search
|
|
||||||
parameters are applied.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- query_vector: The vector representation of the query for searching.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
The results of the search operation on the specified collection.
|
|
||||||
"""
|
|
||||||
return self.search(
|
|
||||||
collection_name, query_vector=query_vector, limit=limit, with_vector=with_vectors
|
|
||||||
)
|
|
||||||
|
|
||||||
return [
|
|
||||||
await query_search(query_vector) for query_vector in await self.embed_data(query_texts)
|
|
||||||
]
|
|
||||||
|
|
||||||
async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
|
|
||||||
"""
|
|
||||||
Remove specified data points from a collection based on their IDs.
|
|
||||||
|
|
||||||
Return information about the deletion result, ideally confirming the operation's
|
|
||||||
success.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- collection_name (str): The name of the collection from which to delete data
|
|
||||||
points.
|
|
||||||
- data_point_ids (list[str]): A list of IDs for the data points to be deleted.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
Confirmation of deletion operation result.
|
|
||||||
"""
|
|
||||||
from weaviate.classes.query import Filter
|
|
||||||
|
|
||||||
collection = await self.get_collection(collection_name)
|
|
||||||
result = await collection.data.delete_many(
|
|
||||||
filters=Filter.by_id().contains_any(data_point_ids)
|
|
||||||
)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
async def prune(self):
|
|
||||||
"""
|
|
||||||
Delete all collections from the Weaviate database.
|
|
||||||
|
|
||||||
This operation will remove all data and cannot be undone.
|
|
||||||
"""
|
|
||||||
client = await self.get_client()
|
|
||||||
await client.collections.delete_all()
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
from .WeaviateAdapter import WeaviateAdapter
|
|
||||||
|
|
@ -43,10 +43,6 @@ def get_settings() -> SettingsDict:
|
||||||
llm_config = get_llm_config()
|
llm_config = get_llm_config()
|
||||||
|
|
||||||
vector_dbs = [
|
vector_dbs = [
|
||||||
{
|
|
||||||
"value": "qdrant",
|
|
||||||
"label": "Qdrant",
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"value": "lancedb",
|
"value": "lancedb",
|
||||||
"label": "LanceDB",
|
"label": "LanceDB",
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||||
class VectorDBConfig(BaseModel):
|
class VectorDBConfig(BaseModel):
|
||||||
url: str
|
url: str
|
||||||
api_key: str
|
api_key: str
|
||||||
provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["pgvector"]]
|
provider: Union[Literal["lancedb"], Literal["pgvector"]]
|
||||||
|
|
||||||
|
|
||||||
async def save_vector_db_config(vector_db_config: VectorDBConfig):
|
async def save_vector_db_config(vector_db_config: VectorDBConfig):
|
||||||
|
|
|
||||||
|
|
@ -1,99 +0,0 @@
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
import cognee
|
|
||||||
from cognee.infrastructure.files.storage import get_storage_config
|
|
||||||
from cognee.modules.search.operations import get_history
|
|
||||||
from cognee.modules.users.methods import get_default_user
|
|
||||||
from cognee.shared.logging_utils import get_logger
|
|
||||||
from cognee.modules.search.types import SearchType
|
|
||||||
|
|
||||||
logger = get_logger()
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
cognee.config.set_vector_db_provider("qdrant")
|
|
||||||
data_directory_path = str(
|
|
||||||
pathlib.Path(
|
|
||||||
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_qdrant")
|
|
||||||
).resolve()
|
|
||||||
)
|
|
||||||
cognee.config.data_root_directory(data_directory_path)
|
|
||||||
cognee_directory_path = str(
|
|
||||||
pathlib.Path(
|
|
||||||
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_qdrant")
|
|
||||||
).resolve()
|
|
||||||
)
|
|
||||||
cognee.config.system_root_directory(cognee_directory_path)
|
|
||||||
|
|
||||||
await cognee.prune.prune_data()
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
|
||||||
|
|
||||||
dataset_name = "cs_explanations"
|
|
||||||
|
|
||||||
explanation_file_path = os.path.join(
|
|
||||||
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
|
|
||||||
)
|
|
||||||
await cognee.add([explanation_file_path], dataset_name)
|
|
||||||
|
|
||||||
text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
|
|
||||||
At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
|
|
||||||
Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
|
|
||||||
The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
|
|
||||||
Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
|
|
||||||
In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
|
|
||||||
"""
|
|
||||||
|
|
||||||
await cognee.add([text], dataset_name)
|
|
||||||
|
|
||||||
await cognee.cognify([dataset_name])
|
|
||||||
|
|
||||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
||||||
|
|
||||||
vector_engine = get_vector_engine()
|
|
||||||
search_results = await vector_engine.search("Entity_name", "Quantum computer")
|
|
||||||
|
|
||||||
assert len(search_results) != 0, "The search results list is empty."
|
|
||||||
|
|
||||||
random_node = search_results[0]
|
|
||||||
random_node_name = random_node.payload["text"]
|
|
||||||
|
|
||||||
search_results = await cognee.search(
|
|
||||||
query_type=SearchType.INSIGHTS, query_text=random_node_name
|
|
||||||
)
|
|
||||||
assert len(search_results) != 0, "The search results list is empty."
|
|
||||||
print("\n\nExtracted sentences are:\n")
|
|
||||||
for result in search_results:
|
|
||||||
print(f"{result}\n")
|
|
||||||
|
|
||||||
search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name)
|
|
||||||
assert len(search_results) != 0, "The search results list is empty."
|
|
||||||
print("\n\nExtracted chunks are:\n")
|
|
||||||
for result in search_results:
|
|
||||||
print(f"{result}\n")
|
|
||||||
|
|
||||||
search_results = await cognee.search(
|
|
||||||
query_type=SearchType.SUMMARIES, query_text=random_node_name
|
|
||||||
)
|
|
||||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
|
||||||
print("\nExtracted summaries are:\n")
|
|
||||||
for result in search_results:
|
|
||||||
print(f"{result}\n")
|
|
||||||
|
|
||||||
user = await get_default_user()
|
|
||||||
history = await get_history(user.id)
|
|
||||||
assert len(history) == 6, "Search history is not correct."
|
|
||||||
|
|
||||||
await cognee.prune.prune_data()
|
|
||||||
data_root_directory = get_storage_config()["data_root_directory"]
|
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
|
||||||
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
|
||||||
qdrant_client = get_vector_engine().get_qdrant_client()
|
|
||||||
collections_response = await qdrant_client.get_collections()
|
|
||||||
assert len(collections_response.collections) == 0, "QDrant vector database is not empty"
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
asyncio.run(main())
|
|
||||||
|
|
@ -3,7 +3,7 @@ FROM python:3.11-slim
|
||||||
# Define Poetry extras to install
|
# Define Poetry extras to install
|
||||||
ARG POETRY_EXTRAS="\
|
ARG POETRY_EXTRAS="\
|
||||||
# Storage & Databases \
|
# Storage & Databases \
|
||||||
postgres qdrant neo4j falkordb kuzu \
|
postgres neo4j falkordb kuzu \
|
||||||
# Notebooks & Interactive Environments \
|
# Notebooks & Interactive Environments \
|
||||||
notebook \
|
notebook \
|
||||||
# LLM & AI Frameworks \
|
# LLM & AI Frameworks \
|
||||||
|
|
|
||||||
|
|
@ -71,7 +71,6 @@ distributed = [
|
||||||
"modal>=1.0.5,<2.0.0",
|
"modal>=1.0.5,<2.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
qdrant = ["qdrant-client>=1.14.2,<2"]
|
|
||||||
neo4j = ["neo4j>=5.28.0,<6"]
|
neo4j = ["neo4j>=5.28.0,<6"]
|
||||||
postgres = [
|
postgres = [
|
||||||
"psycopg2>=2.9.10,<3",
|
"psycopg2>=2.9.10,<3",
|
||||||
|
|
|
||||||
|
|
@ -1,93 +0,0 @@
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
import asyncio
|
|
||||||
import cognee
|
|
||||||
from cognee.modules.search.types import SearchType
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
"""
|
|
||||||
Example script demonstrating how to use Cognee with Qdrant
|
|
||||||
|
|
||||||
This example:
|
|
||||||
1. Configures Cognee to use Qdrant as vector database
|
|
||||||
2. Sets up data directories
|
|
||||||
3. Adds sample data to Cognee
|
|
||||||
4. Processes (cognifies) the data
|
|
||||||
5. Performs different types of searches
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Set up Qdrant credentials in .env file and get the values from environment variables
|
|
||||||
qdrant_url = os.getenv("VECTOR_DB_URL")
|
|
||||||
qdrant_key = os.getenv("VECTOR_DB_KEY")
|
|
||||||
|
|
||||||
# Configure Qdrant as the vector database provider
|
|
||||||
|
|
||||||
cognee.config.set_vector_db_config(
|
|
||||||
{
|
|
||||||
"vector_db_url": qdrant_url, # Enter Qdrant URL
|
|
||||||
"vector_db_key": qdrant_key, # API key needed
|
|
||||||
"vector_db_provider": "qdrant", # Specify Qdrant as provider
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Set up data directories for storing documents and system files
|
|
||||||
# You should adjust these paths to your needs
|
|
||||||
current_dir = pathlib.Path(__file__).parent
|
|
||||||
data_directory_path = str(current_dir / "data_storage")
|
|
||||||
cognee.config.data_root_directory(data_directory_path)
|
|
||||||
|
|
||||||
cognee_directory_path = str(current_dir / "cognee_system")
|
|
||||||
cognee.config.system_root_directory(cognee_directory_path)
|
|
||||||
|
|
||||||
# Clean any existing data (optional)
|
|
||||||
await cognee.prune.prune_data()
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
|
||||||
|
|
||||||
# Create a dataset
|
|
||||||
dataset_name = "qdrant_example"
|
|
||||||
|
|
||||||
# Add sample text to the dataset
|
|
||||||
sample_text = """Qdrant is a vector similarity search engine and vector database.
|
|
||||||
It provides a production-ready service with a convenient API for storing, searching, and managing vectors.
|
|
||||||
Qdrant supports filtering during vector search, which is essential for real-world applications.
|
|
||||||
The database implements various performance optimizations, including HNSW index for approximate nearest neighbor search.
|
|
||||||
Qdrant can be deployed via Docker, as a managed cloud service, or directly on bare metal.
|
|
||||||
It also supports payload and metadata storage alongside the vectors, allowing for rich data retrieval."""
|
|
||||||
|
|
||||||
# Add the sample text to the dataset
|
|
||||||
await cognee.add([sample_text], dataset_name)
|
|
||||||
|
|
||||||
# Process the added document to extract knowledge
|
|
||||||
await cognee.cognify([dataset_name])
|
|
||||||
|
|
||||||
# Now let's perform some searches
|
|
||||||
# 1. Search for insights related to "Qdrant"
|
|
||||||
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="Qdrant")
|
|
||||||
print("\nInsights about Qdrant:")
|
|
||||||
for result in insights_results:
|
|
||||||
print(f"- {result}")
|
|
||||||
|
|
||||||
# 2. Search for text chunks related to "vector search"
|
|
||||||
chunks_results = await cognee.search(
|
|
||||||
query_type=SearchType.CHUNKS, query_text="vector search", datasets=[dataset_name]
|
|
||||||
)
|
|
||||||
print("\nChunks about vector search:")
|
|
||||||
for result in chunks_results:
|
|
||||||
print(f"- {result}")
|
|
||||||
|
|
||||||
# 3. Get graph completion related to databases
|
|
||||||
graph_completion_results = await cognee.search(
|
|
||||||
query_type=SearchType.GRAPH_COMPLETION, query_text="database"
|
|
||||||
)
|
|
||||||
print("\nGraph completion for databases:")
|
|
||||||
for result in graph_completion_results:
|
|
||||||
print(f"- {result}")
|
|
||||||
|
|
||||||
# Clean up (optional)
|
|
||||||
# await cognee.prune.prune_data()
|
|
||||||
# await cognee.prune.prune_system(metadata=True)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
asyncio.run(main())
|
|
||||||
54
poetry.lock
generated
54
poetry.lock
generated
|
|
@ -1,4 +1,4 @@
|
||||||
# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
|
# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aiobotocore"
|
name = "aiobotocore"
|
||||||
|
|
@ -2873,7 +2873,7 @@ description = "HTTP/2-based RPC framework"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"gemini\" or extra == \"qdrant\" or extra == \"deepeval\""
|
markers = "extra == \"gemini\" or extra == \"deepeval\""
|
||||||
files = [
|
files = [
|
||||||
{file = "grpcio-1.74.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:85bd5cdf4ed7b2d6438871adf6afff9af7096486fcf51818a81b77ef4dd30907"},
|
{file = "grpcio-1.74.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:85bd5cdf4ed7b2d6438871adf6afff9af7096486fcf51818a81b77ef4dd30907"},
|
||||||
{file = "grpcio-1.74.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:68c8ebcca945efff9d86d8d6d7bfb0841cf0071024417e2d7f45c5e46b5b08eb"},
|
{file = "grpcio-1.74.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:68c8ebcca945efff9d86d8d6d7bfb0841cf0071024417e2d7f45c5e46b5b08eb"},
|
||||||
|
|
@ -3011,7 +3011,7 @@ description = "Pure-Python HTTP/2 protocol implementation"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"distributed\" or extra == \"qdrant\""
|
markers = "extra == \"distributed\""
|
||||||
files = [
|
files = [
|
||||||
{file = "h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0"},
|
{file = "h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0"},
|
||||||
{file = "h2-4.2.0.tar.gz", hash = "sha256:c8a52129695e88b1a0578d8d2cc6842bbd79128ac685463b887ee278126ad01f"},
|
{file = "h2-4.2.0.tar.gz", hash = "sha256:c8a52129695e88b1a0578d8d2cc6842bbd79128ac685463b887ee278126ad01f"},
|
||||||
|
|
@ -3082,7 +3082,7 @@ description = "Pure-Python HPACK header encoding"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"distributed\" or extra == \"qdrant\""
|
markers = "extra == \"distributed\""
|
||||||
files = [
|
files = [
|
||||||
{file = "hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496"},
|
{file = "hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496"},
|
||||||
{file = "hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca"},
|
{file = "hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca"},
|
||||||
|
|
@ -3233,7 +3233,6 @@ files = [
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
anyio = "*"
|
anyio = "*"
|
||||||
certifi = "*"
|
certifi = "*"
|
||||||
h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""}
|
|
||||||
httpcore = "==1.*"
|
httpcore = "==1.*"
|
||||||
idna = "*"
|
idna = "*"
|
||||||
|
|
||||||
|
|
@ -3333,7 +3332,7 @@ description = "Pure-Python HTTP/2 framing"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"distributed\" or extra == \"qdrant\""
|
markers = "extra == \"distributed\""
|
||||||
files = [
|
files = [
|
||||||
{file = "hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5"},
|
{file = "hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5"},
|
||||||
{file = "hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08"},
|
{file = "hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08"},
|
||||||
|
|
@ -3911,6 +3910,8 @@ python-versions = "*"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
files = [
|
files = [
|
||||||
{file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
|
{file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
|
||||||
|
{file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
|
||||||
|
{file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
|
@ -5123,10 +5124,12 @@ files = [
|
||||||
{file = "lxml-6.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:219e0431ea8006e15005767f0351e3f7f9143e793e58519dc97fe9e07fae5563"},
|
{file = "lxml-6.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:219e0431ea8006e15005767f0351e3f7f9143e793e58519dc97fe9e07fae5563"},
|
||||||
{file = "lxml-6.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bd5913b4972681ffc9718bc2d4c53cde39ef81415e1671ff93e9aa30b46595e7"},
|
{file = "lxml-6.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bd5913b4972681ffc9718bc2d4c53cde39ef81415e1671ff93e9aa30b46595e7"},
|
||||||
{file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:390240baeb9f415a82eefc2e13285016f9c8b5ad71ec80574ae8fa9605093cd7"},
|
{file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:390240baeb9f415a82eefc2e13285016f9c8b5ad71ec80574ae8fa9605093cd7"},
|
||||||
|
{file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d6e200909a119626744dd81bae409fc44134389e03fbf1d68ed2a55a2fb10991"},
|
||||||
{file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ca50bd612438258a91b5b3788c6621c1f05c8c478e7951899f492be42defc0da"},
|
{file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ca50bd612438258a91b5b3788c6621c1f05c8c478e7951899f492be42defc0da"},
|
||||||
{file = "lxml-6.0.0-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:c24b8efd9c0f62bad0439283c2c795ef916c5a6b75f03c17799775c7ae3c0c9e"},
|
{file = "lxml-6.0.0-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:c24b8efd9c0f62bad0439283c2c795ef916c5a6b75f03c17799775c7ae3c0c9e"},
|
||||||
{file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:afd27d8629ae94c5d863e32ab0e1d5590371d296b87dae0a751fb22bf3685741"},
|
{file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:afd27d8629ae94c5d863e32ab0e1d5590371d296b87dae0a751fb22bf3685741"},
|
||||||
{file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:54c4855eabd9fc29707d30141be99e5cd1102e7d2258d2892314cf4c110726c3"},
|
{file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:54c4855eabd9fc29707d30141be99e5cd1102e7d2258d2892314cf4c110726c3"},
|
||||||
|
{file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c907516d49f77f6cd8ead1322198bdfd902003c3c330c77a1c5f3cc32a0e4d16"},
|
||||||
{file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36531f81c8214e293097cd2b7873f178997dae33d3667caaae8bdfb9666b76c0"},
|
{file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36531f81c8214e293097cd2b7873f178997dae33d3667caaae8bdfb9666b76c0"},
|
||||||
{file = "lxml-6.0.0-cp312-cp312-win32.whl", hash = "sha256:690b20e3388a7ec98e899fd54c924e50ba6693874aa65ef9cb53de7f7de9d64a"},
|
{file = "lxml-6.0.0-cp312-cp312-win32.whl", hash = "sha256:690b20e3388a7ec98e899fd54c924e50ba6693874aa65ef9cb53de7f7de9d64a"},
|
||||||
{file = "lxml-6.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:310b719b695b3dd442cdfbbe64936b2f2e231bb91d998e99e6f0daf991a3eba3"},
|
{file = "lxml-6.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:310b719b695b3dd442cdfbbe64936b2f2e231bb91d998e99e6f0daf991a3eba3"},
|
||||||
|
|
@ -5137,10 +5140,12 @@ files = [
|
||||||
{file = "lxml-6.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d18a25b19ca7307045581b18b3ec9ead2b1db5ccd8719c291f0cd0a5cec6cb81"},
|
{file = "lxml-6.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d18a25b19ca7307045581b18b3ec9ead2b1db5ccd8719c291f0cd0a5cec6cb81"},
|
||||||
{file = "lxml-6.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d4f0c66df4386b75d2ab1e20a489f30dc7fd9a06a896d64980541506086be1f1"},
|
{file = "lxml-6.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d4f0c66df4386b75d2ab1e20a489f30dc7fd9a06a896d64980541506086be1f1"},
|
||||||
{file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f4b481b6cc3a897adb4279216695150bbe7a44c03daba3c894f49d2037e0a24"},
|
{file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f4b481b6cc3a897adb4279216695150bbe7a44c03daba3c894f49d2037e0a24"},
|
||||||
|
{file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8a78d6c9168f5bcb20971bf3329c2b83078611fbe1f807baadc64afc70523b3a"},
|
||||||
{file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ae06fbab4f1bb7db4f7c8ca9897dc8db4447d1a2b9bee78474ad403437bcc29"},
|
{file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ae06fbab4f1bb7db4f7c8ca9897dc8db4447d1a2b9bee78474ad403437bcc29"},
|
||||||
{file = "lxml-6.0.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:1fa377b827ca2023244a06554c6e7dc6828a10aaf74ca41965c5d8a4925aebb4"},
|
{file = "lxml-6.0.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:1fa377b827ca2023244a06554c6e7dc6828a10aaf74ca41965c5d8a4925aebb4"},
|
||||||
{file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1676b56d48048a62ef77a250428d1f31f610763636e0784ba67a9740823988ca"},
|
{file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1676b56d48048a62ef77a250428d1f31f610763636e0784ba67a9740823988ca"},
|
||||||
{file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:0e32698462aacc5c1cf6bdfebc9c781821b7e74c79f13e5ffc8bfe27c42b1abf"},
|
{file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:0e32698462aacc5c1cf6bdfebc9c781821b7e74c79f13e5ffc8bfe27c42b1abf"},
|
||||||
|
{file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4d6036c3a296707357efb375cfc24bb64cd955b9ec731abf11ebb1e40063949f"},
|
||||||
{file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7488a43033c958637b1a08cddc9188eb06d3ad36582cebc7d4815980b47e27ef"},
|
{file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7488a43033c958637b1a08cddc9188eb06d3ad36582cebc7d4815980b47e27ef"},
|
||||||
{file = "lxml-6.0.0-cp313-cp313-win32.whl", hash = "sha256:5fcd7d3b1d8ecb91445bd71b9c88bdbeae528fefee4f379895becfc72298d181"},
|
{file = "lxml-6.0.0-cp313-cp313-win32.whl", hash = "sha256:5fcd7d3b1d8ecb91445bd71b9c88bdbeae528fefee4f379895becfc72298d181"},
|
||||||
{file = "lxml-6.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:2f34687222b78fff795feeb799a7d44eca2477c3d9d3a46ce17d51a4f383e32e"},
|
{file = "lxml-6.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:2f34687222b78fff795feeb799a7d44eca2477c3d9d3a46ce17d51a4f383e32e"},
|
||||||
|
|
@ -7441,7 +7446,7 @@ description = "Wraps the portalocker recipe for easy usage"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"qdrant\" or extra == \"deepeval\""
|
markers = "extra == \"deepeval\""
|
||||||
files = [
|
files = [
|
||||||
{file = "portalocker-3.2.0-py3-none-any.whl", hash = "sha256:3cdc5f565312224bc570c49337bd21428bba0ef363bbcf58b9ef4a9f11779968"},
|
{file = "portalocker-3.2.0-py3-none-any.whl", hash = "sha256:3cdc5f565312224bc570c49337bd21428bba0ef363bbcf58b9ef4a9f11779968"},
|
||||||
{file = "portalocker-3.2.0.tar.gz", hash = "sha256:1f3002956a54a8c3730586c5c77bf18fae4149e07eaf1c29fc3faf4d5a3f89ac"},
|
{file = "portalocker-3.2.0.tar.gz", hash = "sha256:1f3002956a54a8c3730586c5c77bf18fae4149e07eaf1c29fc3faf4d5a3f89ac"},
|
||||||
|
|
@ -7721,6 +7726,7 @@ files = [
|
||||||
{file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"},
|
{file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"},
|
||||||
{file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"},
|
{file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"},
|
||||||
{file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"},
|
{file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"},
|
||||||
|
{file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"},
|
||||||
{file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"},
|
{file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"},
|
||||||
{file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"},
|
{file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"},
|
||||||
{file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"},
|
{file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"},
|
||||||
|
|
@ -7782,6 +7788,7 @@ files = [
|
||||||
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
|
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
|
||||||
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
|
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
|
||||||
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
|
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
|
||||||
|
{file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
|
||||||
{file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
|
{file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
|
||||||
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
|
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
|
||||||
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
|
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
|
||||||
|
|
@ -9075,36 +9082,6 @@ files = [
|
||||||
{file = "qasync-0.27.1.tar.gz", hash = "sha256:8dc768fd1ee5de1044c7c305eccf2d39d24d87803ea71189d4024fb475f4985f"},
|
{file = "qasync-0.27.1.tar.gz", hash = "sha256:8dc768fd1ee5de1044c7c305eccf2d39d24d87803ea71189d4024fb475f4985f"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "qdrant-client"
|
|
||||||
version = "1.15.0"
|
|
||||||
description = "Client library for the Qdrant vector search engine"
|
|
||||||
optional = true
|
|
||||||
python-versions = ">=3.9"
|
|
||||||
groups = ["main"]
|
|
||||||
markers = "extra == \"qdrant\""
|
|
||||||
files = [
|
|
||||||
{file = "qdrant_client-1.15.0-py3-none-any.whl", hash = "sha256:f18bb311543de7e256ffa831be0d8a9d0729aaf549db7bcf95a5d356b48143f2"},
|
|
||||||
{file = "qdrant_client-1.15.0.tar.gz", hash = "sha256:475433b0acec51b66a132e91b631abe922accc64744bbb3180a04fe1fe889843"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[package.dependencies]
|
|
||||||
grpcio = ">=1.41.0"
|
|
||||||
httpx = {version = ">=0.20.0", extras = ["http2"]}
|
|
||||||
numpy = [
|
|
||||||
{version = ">=1.21", markers = "python_version >= \"3.10\" and python_version < \"3.12\""},
|
|
||||||
{version = ">=1.26", markers = "python_version == \"3.12\""},
|
|
||||||
{version = ">=2.1.0", markers = "python_version >= \"3.13\""},
|
|
||||||
]
|
|
||||||
portalocker = ">=2.7.0,<4.0"
|
|
||||||
protobuf = ">=3.20.0"
|
|
||||||
pydantic = ">=1.10.8,<2.0.dev0 || >2.2.0"
|
|
||||||
urllib3 = ">=1.26.14,<3"
|
|
||||||
|
|
||||||
[package.extras]
|
|
||||||
fastembed = ["fastembed (>=0.7,<0.8)"]
|
|
||||||
fastembed-gpu = ["fastembed-gpu (>=0.7,<0.8)"]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rapidfuzz"
|
name = "rapidfuzz"
|
||||||
version = "3.13.0"
|
version = "3.13.0"
|
||||||
|
|
@ -12255,9 +12232,8 @@ ollama = ["transformers"]
|
||||||
postgres = ["asyncpg", "pgvector", "psycopg2"]
|
postgres = ["asyncpg", "pgvector", "psycopg2"]
|
||||||
postgres-binary = ["asyncpg", "pgvector", "psycopg2-binary"]
|
postgres-binary = ["asyncpg", "pgvector", "psycopg2-binary"]
|
||||||
posthog = ["posthog"]
|
posthog = ["posthog"]
|
||||||
qdrant = ["qdrant-client"]
|
|
||||||
|
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.1"
|
lock-version = "2.1"
|
||||||
python-versions = ">=3.10,<=3.13"
|
python-versions = ">=3.10,<=3.13"
|
||||||
content-hash = "d4112e2e71e4a50ecb60baa6a4ce1432c31ba802a308e250d7d1499e9004dba7"
|
content-hash = "7682898d3c726a0b1e3d08560ceb26180e1841e89a4a417377ee876b55878a9b"
|
||||||
|
|
|
||||||
|
|
@ -73,7 +73,6 @@ distributed = [
|
||||||
"modal>=1.0.5,<2.0.0",
|
"modal>=1.0.5,<2.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
qdrant = ["qdrant-client>=1.14.2,<2"]
|
|
||||||
neo4j = ["neo4j>=5.28.0,<6"]
|
neo4j = ["neo4j>=5.28.0,<6"]
|
||||||
neptune = ["langchain_aws>=0.2.22"]
|
neptune = ["langchain_aws>=0.2.22"]
|
||||||
postgres = [
|
postgres = [
|
||||||
|
|
|
||||||
30
uv.lock
generated
30
uv.lock
generated
|
|
@ -1031,9 +1031,6 @@ postgres-binary = [
|
||||||
posthog = [
|
posthog = [
|
||||||
{ name = "posthog" },
|
{ name = "posthog" },
|
||||||
]
|
]
|
||||||
qdrant = [
|
|
||||||
{ name = "qdrant-client" },
|
|
||||||
]
|
|
||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
|
|
@ -1110,7 +1107,6 @@ requires-dist = [
|
||||||
{ name = "python-dotenv", specifier = ">=1.0.1,<2.0.0" },
|
{ name = "python-dotenv", specifier = ">=1.0.1,<2.0.0" },
|
||||||
{ name = "python-multipart", specifier = ">=0.0.20,<1.0.0" },
|
{ name = "python-multipart", specifier = ">=0.0.20,<1.0.0" },
|
||||||
{ name = "qasync", marker = "extra == 'gui'", specifier = ">=0.27.1,<0.28" },
|
{ name = "qasync", marker = "extra == 'gui'", specifier = ">=0.27.1,<0.28" },
|
||||||
{ name = "qdrant-client", marker = "extra == 'qdrant'", specifier = ">=1.14.2,<2" },
|
|
||||||
{ name = "rdflib", specifier = ">=7.1.4,<7.2.0" },
|
{ name = "rdflib", specifier = ">=7.1.4,<7.2.0" },
|
||||||
{ name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.2,<1.0.0" },
|
{ name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.2,<1.0.0" },
|
||||||
{ name = "s3fs", extras = ["boto3"], specifier = "==2025.3.2" },
|
{ name = "s3fs", extras = ["boto3"], specifier = "==2025.3.2" },
|
||||||
|
|
@ -1131,7 +1127,7 @@ requires-dist = [
|
||||||
{ name = "uvicorn", marker = "extra == 'api'", specifier = ">=0.34.0,<1.0.0" },
|
{ name = "uvicorn", marker = "extra == 'api'", specifier = ">=0.34.0,<1.0.0" },
|
||||||
{ name = "websockets", marker = "extra == 'api'", specifier = ">=15.0.1,<16.0.0" },
|
{ name = "websockets", marker = "extra == 'api'", specifier = ">=15.0.1,<16.0.0" },
|
||||||
]
|
]
|
||||||
provides-extras = ["api", "distributed", "qdrant", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "gemini", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "falkordb", "groq", "chromadb", "docs", "codegraph", "evals", "gui", "graphiti", "aws", "dev", "debug"]
|
provides-extras = ["api", "distributed", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "gemini", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "falkordb", "groq", "chromadb", "docs", "codegraph", "evals", "gui", "graphiti", "aws", "dev", "debug"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "colorama"
|
name = "colorama"
|
||||||
|
|
@ -2590,11 +2586,6 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
|
{ url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.optional-dependencies]
|
|
||||||
http2 = [
|
|
||||||
{ name = "h2" },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "httpx-sse"
|
name = "httpx-sse"
|
||||||
version = "0.4.1"
|
version = "0.4.1"
|
||||||
|
|
@ -6698,25 +6689,6 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/51/06/bc628aa2981bcfd452a08ee435b812fd3eee4ada8acb8a76c4a09d1a5a77/qasync-0.27.1-py3-none-any.whl", hash = "sha256:5d57335723bc7d9b328dadd8cb2ed7978640e4bf2da184889ce50ee3ad2602c7", size = 14866, upload-time = "2023-11-19T14:19:54.345Z" },
|
{ url = "https://files.pythonhosted.org/packages/51/06/bc628aa2981bcfd452a08ee435b812fd3eee4ada8acb8a76c4a09d1a5a77/qasync-0.27.1-py3-none-any.whl", hash = "sha256:5d57335723bc7d9b328dadd8cb2ed7978640e4bf2da184889ce50ee3ad2602c7", size = 14866, upload-time = "2023-11-19T14:19:54.345Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "qdrant-client"
|
|
||||||
version = "1.15.1"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
dependencies = [
|
|
||||||
{ name = "grpcio" },
|
|
||||||
{ name = "httpx", extra = ["http2"] },
|
|
||||||
{ name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" },
|
|
||||||
{ name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" },
|
|
||||||
{ name = "portalocker" },
|
|
||||||
{ name = "protobuf" },
|
|
||||||
{ name = "pydantic" },
|
|
||||||
{ name = "urllib3" },
|
|
||||||
]
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/79/8b/76c7d325e11d97cb8eb5e261c3759e9ed6664735afbf32fdded5b580690c/qdrant_client-1.15.1.tar.gz", hash = "sha256:631f1f3caebfad0fd0c1fba98f41be81d9962b7bf3ca653bed3b727c0e0cbe0e", size = 295297, upload-time = "2025-07-31T19:35:19.627Z" }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/ef/33/d8df6a2b214ffbe4138db9a1efe3248f67dc3c671f82308bea1582ecbbb7/qdrant_client-1.15.1-py3-none-any.whl", hash = "sha256:2b975099b378382f6ca1cfb43f0d59e541be6e16a5892f282a4b8de7eff5cb63", size = 337331, upload-time = "2025-07-31T19:35:17.539Z" },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rapidfuzz"
|
name = "rapidfuzz"
|
||||||
version = "3.13.0"
|
version = "3.13.0"
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue