Fix/add async lock to all vector databases (#1244)

<!-- .github/pull_request_template.md -->

## Description
1. Cleans up VectorDB adapters that have been migrated to
`cognee-community` repo
2. Adds async lock protection create_collection method in remaining
VectorDB - ChromaDB

See #1222

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

---------

Co-authored-by: Igor Ilic <30923996+dexters1@users.noreply.github.com>
This commit is contained in:
Daulet Amirkhanov 2025-08-14 14:57:34 +01:00 committed by GitHub
parent 0ed6f255c2
commit b297289060
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 32 additions and 1436 deletions

View file

@ -29,10 +29,6 @@ on:
required: true
EMBEDDING_API_VERSION:
required: true
QDRANT_API_URL:
required: false
QDRANT_API_KEY:
required: false
POSTGRES_PASSWORD:
required: false
@ -113,45 +109,6 @@ jobs:
run: |
poetry run python examples/database_examples/kuzu_example.py
run-db-example-qdrant:
name: "Qdrant DB Example Test"
runs-on: ubuntu-22.04
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'qdrant') }}
defaults:
run:
shell: bash
steps:
- name: Check out
uses: actions/checkout@master
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ inputs.python-version }}
- name: Install Qdrant extra
run: |
poetry install -E qdrant
- name: Run Qdrant Example
env:
ENV: dev
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
VECTOR_DB_URL: ${{ secrets.QDRANT_API_URL }}
VECTOR_DB_KEY: ${{ secrets.QDRANT_API_KEY }}
run: |
poetry run python examples/database_examples/qdrant_example.py
run-db-example-pgvector:
name: "PostgreSQL PGVector DB Example Test"
runs-on: ubuntu-22.04

View file

@ -59,65 +59,6 @@ jobs:
# run: poetry run python ./cognee/tests/test_chromadb.py
run_qdrant_integration_test:
name: Qdrant Tests
runs-on: ubuntu-latest
if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'qdrant') }}
defaults:
run:
shell: bash
services:
qdrant:
image: qdrant/qdrant:v1.14.1
env:
QDRANT__LOG_LEVEL: ERROR
QDRANT__SERVICE__API_KEY: qdrant_api_key
QDRANT__SERVICE__ENABLE_TLS: 0
ports:
- 6333:6333
steps:
- name: Check out
uses: actions/checkout@master
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ inputs.python-version }}
- name: Install specific db dependency
run: |
poetry install -E qdrant
- name: Wait for Qdrant to be healthy
run: |
for i in {1..10}; do
if curl -f http://127.0.0.1:6333/healthz; then
echo "Qdrant is healthy!"
exit 0
fi
echo "Waiting for Qdrant to be healthy..."
sleep 3
done
echo "Qdrant failed to become healthy in time"
exit 1
- name: Run default Qdrant
env:
ENV: 'dev'
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
VECTOR_DB_URL: 127.0.0.1
VECTOR_DB_KEY: qdrant_api_key
run: poetry run python ./cognee/tests/test_qdrant.py
run-postgres-tests:
name: PostgreSQL Tests
runs-on: ubuntu-22.04

View file

@ -31,7 +31,7 @@ COPY README.md pyproject.toml uv.lock entrypoint.sh ./
# Install the project's dependencies using the lockfile and settings
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --extra debug --extra api --extra postgres --extra qdrant --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable
uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable
# Copy Alembic configuration
COPY alembic.ini /app/alembic.ini
@ -42,7 +42,7 @@ COPY alembic/ /app/alembic
COPY ./cognee /app/cognee
COPY ./distributed /app/distributed
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --extra debug --extra api --extra postgres --extra qdrant --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
uv sync --extra debug --extra api --extra postgres --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
FROM python:3.12-slim-bookworm

View file

@ -130,7 +130,7 @@ async def add(
- LLM_MODEL: Model name (default: "gpt-4o-mini")
- DEFAULT_USER_EMAIL: Custom default user email
- DEFAULT_USER_PASSWORD: Custom default user password
- VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "qdrant", "weaviate"
- VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
- GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx"
Raises:

View file

@ -30,8 +30,6 @@ class VectorDBConfigInputDTO(InDTO):
provider: Union[
Literal["lancedb"],
Literal["chromadb"],
Literal["qdrant"],
Literal["weaviate"],
Literal["pgvector"],
]
url: str

View file

@ -1,4 +1,5 @@
import json
import asyncio
from uuid import UUID
from typing import List, Optional
from chromadb import AsyncHttpClient, Settings
@ -161,6 +162,7 @@ class ChromaDBAdapter(VectorDBInterface):
self.embedding_engine = embedding_engine
self.url = url
self.api_key = api_key
self.VECTOR_DB_LOCK = asyncio.Lock()
async def get_connection(self) -> AsyncHttpClient:
"""
@ -224,10 +226,13 @@ class ChromaDBAdapter(VectorDBInterface):
- collection_name (str): The name of the collection to create.
- payload_schema: The schema for the payload; can be None. (default None)
"""
client = await self.get_connection()
async with self.VECTOR_DB_LOCK:
client = await self.get_connection()
if not await self.has_collection(collection_name):
await client.create_collection(name=collection_name, metadata={"hnsw:space": "cosine"})
if not await self.has_collection(collection_name):
await client.create_collection(
name=collection_name, metadata={"hnsw:space": "cosine"}
)
async def get_collection(self, collection_name: str) -> AsyncHttpClient:
"""

View file

@ -19,7 +19,7 @@ def create_vector_engine(
for each provider, raising an EnvironmentError if any are missing, or ImportError if the
ChromaDB package is not installed.
Supported providers include: Qdrant, pgvector, FalkorDB, ChromaDB, and
Supported providers include: pgvector, FalkorDB, ChromaDB, and
LanceDB.
Parameters:
@ -30,7 +30,7 @@ def create_vector_engine(
providers.
- vector_db_key (str): The API key or access token for the vector database instance.
- vector_db_provider (str): The name of the vector database provider to use (e.g.,
'qdrant', 'pgvector').
'pgvector').
Returns:
--------
@ -48,19 +48,7 @@ def create_vector_engine(
embedding_engine=embedding_engine,
)
if vector_db_provider == "qdrant":
if not (vector_db_url and vector_db_key):
raise EnvironmentError("Missing requred Qdrant credentials!")
from .qdrant.QDrantAdapter import QDrantAdapter
return QDrantAdapter(
url=vector_db_url,
api_key=vector_db_key,
embedding_engine=embedding_engine,
)
elif vector_db_provider == "pgvector":
if vector_db_provider == "pgvector":
from cognee.infrastructure.databases.relational import get_relational_config
# Get configuration for postgres database

View file

@ -1,513 +0,0 @@
import os
from typing import Dict, List, Optional
from qdrant_client import AsyncQdrantClient, models
from cognee.shared.logging_utils import get_logger
from cognee.infrastructure.engine.utils import parse_id
from cognee.exceptions import InvalidValueError
from cognee.infrastructure.engine import DataPoint
from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult
from ..embeddings.EmbeddingEngine import EmbeddingEngine
from ..vector_db_interface import VectorDBInterface
logger = get_logger("QDrantAdapter")
class IndexSchema(DataPoint):
"""
Represents a schema for indexing where each data point contains a text field.
This class inherits from DataPoint and defines a text attribute as well as metadata
containing index fields used for indexing operations.
"""
text: str
metadata: dict = {"index_fields": ["text"]}
# class CollectionConfig(BaseModel, extra = "forbid"):
# vector_config: Dict[str, models.VectorParams] = Field(..., description="Vectors configuration" )
# hnsw_config: Optional[models.HnswConfig] = Field(default = None, description="HNSW vector index configuration")
# optimizers_config: Optional[models.OptimizersConfig] = Field(default = None, description="Optimizers configuration")
# quantization_config: Optional[models.QuantizationConfig] = Field(default = None, description="Quantization configuration")
def create_hnsw_config(hnsw_config: Dict):
"""
Create HNSW configuration.
This function returns an HNSW configuration object if the provided configuration is not
None, otherwise it returns None.
Parameters:
-----------
- hnsw_config (Dict): A dictionary containing HNSW configuration parameters.
Returns:
--------
An instance of models.HnswConfig if hnsw_config is not None, otherwise None.
"""
if hnsw_config is not None:
return models.HnswConfig()
return None
def create_optimizers_config(optimizers_config: Dict):
"""
Create and return an OptimizersConfig instance if the input configuration is provided.
This function checks if the given optimizers configuration is not None. If valid, it
initializes and returns a new instance of the OptimizersConfig class from the models
module. If the configuration is None, it returns None instead.
Parameters:
-----------
- optimizers_config (Dict): A dictionary containing optimizer configuration
settings.
Returns:
--------
Returns an instance of OptimizersConfig if optimizers_config is provided; otherwise,
returns None.
"""
if optimizers_config is not None:
return models.OptimizersConfig()
return None
def create_quantization_config(quantization_config: Dict):
"""
Create a quantization configuration based on the provided settings.
This function generates an instance of `QuantizationConfig` if the provided
`quantization_config` is not None. If it is None, the function returns None.
Parameters:
-----------
- quantization_config (Dict): A dictionary containing the quantization configuration
settings.
Returns:
--------
An instance of `QuantizationConfig` if `quantization_config` is provided; otherwise,
returns None.
"""
if quantization_config is not None:
return models.QuantizationConfig()
return None
class QDrantAdapter(VectorDBInterface):
"""
Adapt to the Qdrant vector database interface.
Public methods:
- get_qdrant_client
- embed_data
- has_collection
- create_collection
- create_data_points
- create_vector_index
- index_data_points
- retrieve
- search
- batch_search
- delete_data_points
- prune
"""
name = "Qdrant"
url: str = None
api_key: str = None
qdrant_path: str = None
def __init__(self, url, api_key, embedding_engine: EmbeddingEngine, qdrant_path=None):
self.embedding_engine = embedding_engine
if qdrant_path is not None:
self.qdrant_path = qdrant_path
else:
self.url = url
self.api_key = api_key
def get_qdrant_client(self) -> AsyncQdrantClient:
"""
Retrieve an instance of AsyncQdrantClient configured with the appropriate
settings based on the instance's attributes.
Returns an instance of AsyncQdrantClient configured to connect to the database.
Returns:
--------
- AsyncQdrantClient: An instance of AsyncQdrantClient configured for database
operations.
"""
is_prod = os.getenv("ENV").lower() == "prod"
if self.qdrant_path is not None:
return AsyncQdrantClient(path=self.qdrant_path, port=6333, https=is_prod)
elif self.url is not None:
return AsyncQdrantClient(url=self.url, api_key=self.api_key, port=6333, https=is_prod)
return AsyncQdrantClient(location=":memory:")
async def embed_data(self, data: List[str]) -> List[float]:
"""
Embed a list of text data into vector representations asynchronously.
Parameters:
-----------
- data (List[str]): A list of strings containing the text data to be embedded.
Returns:
--------
- List[float]: A list of floating-point vectors representing the embedded text data.
"""
return await self.embedding_engine.embed_text(data)
async def has_collection(self, collection_name: str) -> bool:
"""
Check if a specified collection exists in the Qdrant database asynchronously.
Parameters:
-----------
- collection_name (str): The name of the collection to check for existence.
Returns:
--------
- bool: True if the specified collection exists, False otherwise.
"""
client = self.get_qdrant_client()
result = await client.collection_exists(collection_name)
await client.close()
return result
async def create_collection(
self,
collection_name: str,
payload_schema=None,
):
"""
Create a new collection in the Qdrant database if it does not already exist.
If the collection already exists, this operation has no effect.
Parameters:
-----------
- collection_name (str): The name of the collection to create.
- payload_schema: Optional schema for the payload. Defaults to None. (default None)
"""
client = self.get_qdrant_client()
if not await client.collection_exists(collection_name):
await client.create_collection(
collection_name=collection_name,
vectors_config={
"text": models.VectorParams(
size=self.embedding_engine.get_vector_size(), distance="Cosine"
)
},
)
await client.close()
async def create_data_points(self, collection_name: str, data_points: List[DataPoint]):
"""
Create and upload data points to a specified collection in the database.
Raises CollectionNotFoundError if the collection does not exist.
Parameters:
-----------
- collection_name (str): The name of the collection to which data points will be
uploaded.
- data_points (List[DataPoint]): A list of DataPoint objects to be uploaded.
Returns:
--------
None if the operation is successful; raises exceptions on error.
"""
from qdrant_client.http.exceptions import UnexpectedResponse
client = self.get_qdrant_client()
data_vectors = await self.embed_data(
[DataPoint.get_embeddable_data(data_point) for data_point in data_points]
)
def convert_to_qdrant_point(data_point: DataPoint):
"""
Convert a DataPoint object into the format expected by Qdrant for upload.
Parameters:
-----------
- data_point (DataPoint): The DataPoint object to convert.
Returns:
--------
None; performs an operation without returning a value.
"""
return models.PointStruct(
id=str(data_point.id),
payload=data_point.model_dump(),
vector={"text": data_vectors[data_points.index(data_point)]},
)
points = [convert_to_qdrant_point(point) for point in data_points]
try:
client.upload_points(collection_name=collection_name, points=points)
except UnexpectedResponse as error:
if "Collection not found" in str(error):
raise CollectionNotFoundError(
message=f"Collection {collection_name} not found!"
) from error
else:
raise error
except Exception as error:
logger.error("Error uploading data points to Qdrant: %s", str(error))
raise error
finally:
await client.close()
async def create_vector_index(self, index_name: str, index_property_name: str):
"""
Create a vector index for a specified property name.
This is essentially a wrapper around create_collection, which allows for more
flexibility
in index naming.
Parameters:
-----------
- index_name (str): The base name for the index to be created.
- index_property_name (str): The property name that will be part of the index name.
"""
await self.create_collection(f"{index_name}_{index_property_name}")
async def index_data_points(
self, index_name: str, index_property_name: str, data_points: list[DataPoint]
):
"""
Index data points into a specific collection based on provided metadata.
Transforms DataPoint objects into an appropriate format and uploads them.
Parameters:
-----------
- index_name (str): The base name for the index used for naming the collection.
- index_property_name (str): The property name used for naming the collection.
- data_points (list[DataPoint]): A list of DataPoint objects to index.
"""
await self.create_data_points(
f"{index_name}_{index_property_name}",
[
IndexSchema(
id=data_point.id,
text=getattr(data_point, data_point.metadata["index_fields"][0]),
)
for data_point in data_points
],
)
async def retrieve(self, collection_name: str, data_point_ids: list[str]):
"""
Retrieve data points from a specified collection based on their IDs.
Returns the data corresponding to the provided IDs from the collection.
Parameters:
-----------
- collection_name (str): The name of the collection to retrieve from.
- data_point_ids (list[str]): A list of IDs of the data points to retrieve.
Returns:
--------
The retrieved data points, including payloads for each ID.
"""
client = self.get_qdrant_client()
results = await client.retrieve(collection_name, data_point_ids, with_payload=True)
await client.close()
return results
async def search(
self,
collection_name: str,
query_text: Optional[str] = None,
query_vector: Optional[List[float]] = None,
limit: int = 15,
with_vector: bool = False,
) -> List[ScoredResult]:
"""
Search for data points in a collection based on either a textual query or a vector
query.
Raises InvalidValueError if both query_text and query_vector are None.
Returns a list of scored results that match the search criteria.
Parameters:
-----------
- collection_name (str): The name of the collection to search within.
- query_text (Optional[str]): The text to be used in the search query; optional if
query_vector is provided. (default None)
- query_vector (Optional[List[float]]): The vector to be used in the search query;
optional if query_text is provided. (default None)
- limit (int): The maximum number of results to return; defaults to 15. (default 15)
- with_vector (bool): Indicates whether to return vector data along with results;
defaults to False. (default False)
Returns:
--------
- List[ScoredResult]: A list of ScoredResult objects representing the results of the
search.
"""
from qdrant_client.http.exceptions import UnexpectedResponse
if query_text is None and query_vector is None:
raise InvalidValueError(message="One of query_text or query_vector must be provided!")
if not await self.has_collection(collection_name):
return []
if query_vector is None:
query_vector = (await self.embed_data([query_text]))[0]
try:
client = self.get_qdrant_client()
if limit == 0:
collection_size = await client.count(collection_name=collection_name)
results = await client.search(
collection_name=collection_name,
query_vector=models.NamedVector(
name="text",
vector=query_vector
if query_vector is not None
else (await self.embed_data([query_text]))[0],
),
limit=limit if limit > 0 else collection_size.count,
with_vectors=with_vector,
)
await client.close()
return [
ScoredResult(
id=parse_id(result.id),
payload={
**result.payload,
"id": parse_id(result.id),
},
score=1 - result.score,
)
for result in results
]
finally:
await client.close()
async def batch_search(
self,
collection_name: str,
query_texts: List[str],
limit: int = None,
with_vectors: bool = False,
):
"""
Perform a batch search in a specified collection using multiple query texts.
Returns the results of the search for each query, filtering for results with a score
higher than 0.9.
Parameters:
-----------
- collection_name (str): The name of the collection to search in.
- query_texts (List[str]): A list of query texts to search for in the collection.
- limit (int): The maximum number of results to return for each search request; can
be None. (default None)
- with_vectors (bool): Indicates whether to include vector data in the results;
defaults to False. (default False)
Returns:
--------
A list containing the filtered search results for each query text.
"""
vectors = await self.embed_data(query_texts)
# Generate dynamic search requests based on the provided embeddings
requests = [
models.SearchRequest(
vector=models.NamedVector(name="text", vector=vector),
limit=limit,
with_vector=with_vectors,
)
for vector in vectors
]
client = self.get_qdrant_client()
# Perform batch search with the dynamically generated requests
results = await client.search_batch(collection_name=collection_name, requests=requests)
await client.close()
return [filter(lambda result: result.score > 0.9, result_group) for result_group in results]
async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
"""
Delete specific data points from a specified collection based on their IDs.
Parameters:
-----------
- collection_name (str): The name of the collection from which to delete the data
points.
- data_point_ids (list[str]): The list of IDs of data points to be deleted.
Returns:
--------
The result of the delete operation from the database.
"""
client = self.get_qdrant_client()
results = await client.delete(collection_name, data_point_ids)
return results
async def prune(self):
"""
Remove all collections from the Qdrant database asynchronously.
"""
client = self.get_qdrant_client()
response = await client.get_collections()
for collection in response.collections:
await client.delete_collection(collection.name)
await client.close()

View file

@ -1,2 +0,0 @@
from .QDrantAdapter import QDrantAdapter
from ..models.CollectionConfig import CollectionConfig

View file

@ -1,527 +0,0 @@
from typing import List, Optional
from tenacity import retry, retry_if_exception, stop_after_attempt, wait_exponential
from cognee.shared.logging_utils import get_logger
from cognee.exceptions import InvalidValueError
from cognee.infrastructure.engine import DataPoint
from cognee.infrastructure.engine.utils import parse_id
from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
from distributed.utils import override_distributed
from distributed.tasks.queued_add_data_points import queued_add_data_points
from ..embeddings.EmbeddingEngine import EmbeddingEngine
from ..models.ScoredResult import ScoredResult
from ..vector_db_interface import VectorDBInterface
logger = get_logger("WeaviateAdapter")
def is_retryable_request(error):
from weaviate.exceptions import UnexpectedStatusCodeException
from requests.exceptions import RequestException
if isinstance(error, UnexpectedStatusCodeException):
# Retry on conflict, service unavailable, internal error
return error.status_code in {409, 503, 500}
if isinstance(error, RequestException):
return True # Includes timeout, connection error, etc.
return False
class IndexSchema(DataPoint):
"""
Define a schema for indexing data points with textual content.
The IndexSchema class inherits from DataPoint and includes the following public
attributes:
- text: A string representing the main content of the data point.
- metadata: A dictionary containing indexing information, specifically the fields to be
indexed (in this case, the 'text' field).
"""
text: str
metadata: dict = {"index_fields": ["text"]}
class WeaviateAdapter(VectorDBInterface):
"""
Adapt the Weaviate vector database to an interface for managing collections and data
points.
Public methods:
- get_client
- embed_data
- has_collection
- create_collection
- get_collection
- create_data_points
- create_vector_index
- index_data_points
- retrieve
- search
- batch_search
- delete_data_points
- prune
"""
name = "Weaviate"
url: str
api_key: str
embedding_engine: EmbeddingEngine = None
def __init__(self, url: str, api_key: str, embedding_engine: EmbeddingEngine):
import weaviate
import weaviate.classes as wvc
self.url = url
self.api_key = api_key
self.embedding_engine = embedding_engine
self.client = weaviate.use_async_with_weaviate_cloud(
cluster_url=url,
auth_credentials=weaviate.auth.AuthApiKey(api_key),
additional_config=wvc.init.AdditionalConfig(timeout=wvc.init.Timeout(init=30)),
)
async def get_client(self):
"""
Establish a connection to the Weaviate client.
Return the Weaviate client instance after connecting asynchronously.
Returns:
--------
The Weaviate client instance.
"""
await self.client.connect()
return self.client
async def embed_data(self, data: List[str]) -> List[float]:
"""
Embed the given text data into vector representations.
Given a list of strings, return their vector embeddings using the configured embedding
engine.
Parameters:
-----------
- data (List[str]): A list of strings to be embedded.
Returns:
--------
- List[float]: A list of float vectors corresponding to the embedded text data.
"""
return await self.embedding_engine.embed_text(data)
async def has_collection(self, collection_name: str) -> bool:
"""
Check if a collection exists in the Weaviate database.
Return a boolean indicating the presence of the specified collection.
Parameters:
-----------
- collection_name (str): The name of the collection to check.
Returns:
--------
- bool: True if the collection exists, otherwise False.
"""
client = await self.get_client()
return await client.collections.exists(collection_name)
@retry(
retry=retry_if_exception(is_retryable_request),
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=2, min=1, max=6),
)
async def create_collection(
self,
collection_name: str,
payload_schema=None,
):
"""
Create a new collection in the Weaviate database if it does not already exist.
The collection will be initialized with a default schema.
Parameters:
-----------
- collection_name (str): The name of the new collection to be created.
- payload_schema: Optional schema definition for the collection payload. (default
None)
Returns:
--------
The created collection's configuration, if a new collection was made, otherwise
information about the existing collection.
"""
import weaviate.classes.config as wvcc
if not await self.has_collection(collection_name):
client = await self.get_client()
return await client.collections.create(
name=collection_name,
properties=[
wvcc.Property(
name="text", data_type=wvcc.DataType.TEXT, skip_vectorization=True
)
],
)
else:
return await self.get_collection(collection_name)
async def get_collection(self, collection_name: str):
"""
Retrieve a collection from the Weaviate database by its name.
Raise a CollectionNotFoundError if the specified collection does not exist.
Parameters:
-----------
- collection_name (str): The name of the collection to be retrieved.
Returns:
--------
The requested collection object from the database.
"""
if not await self.has_collection(collection_name):
raise CollectionNotFoundError(f"Collection '{collection_name}' not found.")
client = await self.get_client()
return client.collections.get(collection_name)
@retry(
retry=retry_if_exception(is_retryable_request),
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=2, min=1, max=6),
)
@override_distributed(queued_add_data_points)
async def create_data_points(self, collection_name: str, data_points: List[DataPoint]):
"""
Create or update data points in the specified collection in the Weaviate database.
Process the list of data points, embedding them and either inserting them or updating if
they already exist.
Parameters:
-----------
- collection_name (str): The name of the collection to add data points to.
- data_points (List[DataPoint]): A list of DataPoint objects to be created or
updated in the collection.
Returns:
--------
Information about the inserted or updated data points in the collection.
"""
from weaviate.classes.data import DataObject
data_vectors = await self.embed_data(
[DataPoint.get_embeddable_data(data_point) for data_point in data_points]
)
def convert_to_weaviate_data_points(data_point: DataPoint):
"""
Transform a DataPoint object into a Weaviate DataObject format for insertion.
Return a DataObject ready for use in Weaviate with the properties and vector included.
Parameters:
-----------
- data_point (DataPoint): The DataPoint to convert into the Weaviate DataObject
format.
Returns:
--------
The corresponding Weaviate DataObject representing the data point.
"""
vector = data_vectors[data_points.index(data_point)]
properties = data_point.model_dump()
if "id" in properties:
properties["uuid"] = str(data_point.id)
del properties["id"]
return DataObject(uuid=data_point.id, properties=properties, vector=vector)
data_points = [convert_to_weaviate_data_points(data_point) for data_point in data_points]
collection = await self.get_collection(collection_name)
try:
if len(data_points) > 1:
return await collection.data.insert_many(data_points)
# with collection.batch.dynamic() as batch:
# for data_point in data_points:
# batch.add_object(
# uuid=data_point.uuid,
# vector=data_point.vector,
# properties=data_point.properties,
# references=data_point.references,
# )
else:
data_point: DataObject = data_points[0]
if await collection.data.exists(data_point.uuid):
return await collection.data.update(
uuid=data_point.uuid,
vector=data_point.vector,
properties=data_point.properties,
references=data_point.references,
)
else:
return await collection.data.insert(
uuid=data_point.uuid,
vector=data_point.vector,
properties=data_point.properties,
references=data_point.references,
)
except Exception as error:
logger.error("Error creating data points: %s", str(error))
raise error
async def create_vector_index(self, index_name: str, index_property_name: str):
"""
Create a vector index based on an index name and property name by creating a
corresponding collection.
Parameters:
-----------
- index_name (str): The name for the vector index.
- index_property_name (str): The property name associated with the vector index.
Returns:
--------
The created collection representing the vector index.
"""
return await self.create_collection(f"{index_name}_{index_property_name}")
async def index_data_points(
self, index_name: str, index_property_name: str, data_points: list[DataPoint]
):
"""
Index a list of data points by creating an associated vector index collection.
Data points are transformed into embeddable data before being processed for indexing.
Parameters:
-----------
- index_name (str): The index name under which to store the data points.
- index_property_name (str): The associated property name for the index.
- data_points (list[DataPoint]): A list of DataPoint objects to be indexed.
Returns:
--------
Information about the operation of indexing the data points.
"""
return await self.create_data_points(
f"{index_name}_{index_property_name}",
[
IndexSchema(
id=data_point.id,
text=DataPoint.get_embeddable_data(data_point),
)
for data_point in data_points
],
)
async def retrieve(self, collection_name: str, data_point_ids: list[str]):
"""
Fetch data points from a specified collection based on their IDs.
Return data points wrapped in an object containing their properties after
transformation.
Parameters:
-----------
- collection_name (str): The name of the collection to retrieve data points from.
- data_point_ids (list[str]): A list of IDs for the data points to retrieve.
Returns:
--------
A list of objects representing the retrieved data points.
"""
from weaviate.classes.query import Filter
collection = await self.get_collection(collection_name)
data_points = await collection.query.fetch_objects(
filters=Filter.by_id().contains_any(data_point_ids)
)
for data_point in data_points.objects:
data_point.payload = data_point.properties
data_point.id = data_point.uuid
del data_point.properties
return data_points.objects
async def search(
self,
collection_name: str,
query_text: Optional[str] = None,
query_vector: Optional[List[float]] = None,
limit: int = 15,
with_vector: bool = False,
):
"""
Perform a search on a collection using either a text query or a vector query.
Return scored results based on the search criteria provided. Raise InvalidValueError if
no query is provided.
Parameters:
-----------
- collection_name (str): The name of the collection to search within.
- query_text (Optional[str]): Optional plain text query for searching. (default
None)
- query_vector (Optional[List[float]]): Optional vector representation for
searching. (default None)
- limit (int): The maximum number of results to return. (default 15)
- with_vector (bool): Include vector information in the results. (default False)
Returns:
--------
A list of scored results matching the search criteria.
"""
import weaviate.classes as wvc
import weaviate.exceptions
if query_text is None and query_vector is None:
raise InvalidValueError(message="One of query_text or query_vector must be provided!")
if query_vector is None:
query_vector = (await self.embed_data([query_text]))[0]
collection = await self.get_collection(collection_name)
try:
search_result = await collection.query.hybrid(
query=None,
vector=query_vector,
limit=limit if limit > 0 else None,
include_vector=with_vector,
return_metadata=wvc.query.MetadataQuery(score=True),
)
return [
ScoredResult(
id=parse_id(str(result.uuid)),
payload=result.properties,
score=1 - float(result.metadata.score),
)
for result in search_result.objects
]
except weaviate.exceptions.WeaviateInvalidInputError:
# Ignore if the collection doesn't exist
return []
async def batch_search(
self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False
):
"""
Execute a batch search for multiple query texts in the specified collection.
Return a list of results for each query performed in parallel.
Parameters:
-----------
- collection_name (str): The name of the collection to search within.
- query_texts (List[str]): A list of text queries to be processed in a batch.
- limit (int): The maximum number of results to return for each query.
- with_vectors (bool): Indicate whether to include vector information in the
results. (default False)
Returns:
--------
A list containing results for each search query executed.
"""
def query_search(query_vector):
"""
Wrap the search operation based on a query vector for fetching results.
This function coordinates the search call, ensuring the collection name and search
parameters are applied.
Parameters:
-----------
- query_vector: The vector representation of the query for searching.
Returns:
--------
The results of the search operation on the specified collection.
"""
return self.search(
collection_name, query_vector=query_vector, limit=limit, with_vector=with_vectors
)
return [
await query_search(query_vector) for query_vector in await self.embed_data(query_texts)
]
async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
"""
Remove specified data points from a collection based on their IDs.
Return information about the deletion result, ideally confirming the operation's
success.
Parameters:
-----------
- collection_name (str): The name of the collection from which to delete data
points.
- data_point_ids (list[str]): A list of IDs for the data points to be deleted.
Returns:
--------
Confirmation of deletion operation result.
"""
from weaviate.classes.query import Filter
collection = await self.get_collection(collection_name)
result = await collection.data.delete_many(
filters=Filter.by_id().contains_any(data_point_ids)
)
return result
async def prune(self):
"""
Delete all collections from the Weaviate database.
This operation will remove all data and cannot be undone.
"""
client = await self.get_client()
await client.collections.delete_all()

View file

@ -1 +0,0 @@
from .WeaviateAdapter import WeaviateAdapter

View file

@ -43,10 +43,6 @@ def get_settings() -> SettingsDict:
llm_config = get_llm_config()
vector_dbs = [
{
"value": "qdrant",
"label": "Qdrant",
},
{
"value": "lancedb",
"label": "LanceDB",

View file

@ -6,7 +6,7 @@ from cognee.infrastructure.databases.vector import get_vectordb_config
class VectorDBConfig(BaseModel):
url: str
api_key: str
provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["pgvector"]]
provider: Union[Literal["lancedb"], Literal["pgvector"]]
async def save_vector_db_config(vector_db_config: VectorDBConfig):

View file

@ -1,99 +0,0 @@
import os
import pathlib
import cognee
from cognee.infrastructure.files.storage import get_storage_config
from cognee.modules.search.operations import get_history
from cognee.modules.users.methods import get_default_user
from cognee.shared.logging_utils import get_logger
from cognee.modules.search.types import SearchType
logger = get_logger()
async def main():
cognee.config.set_vector_db_provider("qdrant")
data_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_qdrant")
).resolve()
)
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_qdrant")
).resolve()
)
cognee.config.system_root_directory(cognee_directory_path)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
dataset_name = "cs_explanations"
explanation_file_path = os.path.join(
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
)
await cognee.add([explanation_file_path], dataset_name)
text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
"""
await cognee.add([text], dataset_name)
await cognee.cognify([dataset_name])
from cognee.infrastructure.databases.vector import get_vector_engine
vector_engine = get_vector_engine()
search_results = await vector_engine.search("Entity_name", "Quantum computer")
assert len(search_results) != 0, "The search results list is empty."
random_node = search_results[0]
random_node_name = random_node.payload["text"]
search_results = await cognee.search(
query_type=SearchType.INSIGHTS, query_text=random_node_name
)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted chunks are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(
query_type=SearchType.SUMMARIES, query_text=random_node_name
)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\nExtracted summaries are:\n")
for result in search_results:
print(f"{result}\n")
user = await get_default_user()
history = await get_history(user.id)
assert len(history) == 6, "Search history is not correct."
await cognee.prune.prune_data()
data_root_directory = get_storage_config()["data_root_directory"]
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
await cognee.prune.prune_system(metadata=True)
qdrant_client = get_vector_engine().get_qdrant_client()
collections_response = await qdrant_client.get_collections()
assert len(collections_response.collections) == 0, "QDrant vector database is not empty"
if __name__ == "__main__":
import asyncio
asyncio.run(main())

View file

@ -3,7 +3,7 @@ FROM python:3.11-slim
# Define Poetry extras to install
ARG POETRY_EXTRAS="\
# Storage & Databases \
postgres qdrant neo4j falkordb kuzu \
postgres neo4j falkordb kuzu \
# Notebooks & Interactive Environments \
notebook \
# LLM & AI Frameworks \

View file

@ -71,7 +71,6 @@ distributed = [
"modal>=1.0.5,<2.0.0",
]
qdrant = ["qdrant-client>=1.14.2,<2"]
neo4j = ["neo4j>=5.28.0,<6"]
postgres = [
"psycopg2>=2.9.10,<3",

View file

@ -1,93 +0,0 @@
import os
import pathlib
import asyncio
import cognee
from cognee.modules.search.types import SearchType
async def main():
"""
Example script demonstrating how to use Cognee with Qdrant
This example:
1. Configures Cognee to use Qdrant as vector database
2. Sets up data directories
3. Adds sample data to Cognee
4. Processes (cognifies) the data
5. Performs different types of searches
"""
# Set up Qdrant credentials in .env file and get the values from environment variables
qdrant_url = os.getenv("VECTOR_DB_URL")
qdrant_key = os.getenv("VECTOR_DB_KEY")
# Configure Qdrant as the vector database provider
cognee.config.set_vector_db_config(
{
"vector_db_url": qdrant_url, # Enter Qdrant URL
"vector_db_key": qdrant_key, # API key needed
"vector_db_provider": "qdrant", # Specify Qdrant as provider
}
)
# Set up data directories for storing documents and system files
# You should adjust these paths to your needs
current_dir = pathlib.Path(__file__).parent
data_directory_path = str(current_dir / "data_storage")
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(current_dir / "cognee_system")
cognee.config.system_root_directory(cognee_directory_path)
# Clean any existing data (optional)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
# Create a dataset
dataset_name = "qdrant_example"
# Add sample text to the dataset
sample_text = """Qdrant is a vector similarity search engine and vector database.
It provides a production-ready service with a convenient API for storing, searching, and managing vectors.
Qdrant supports filtering during vector search, which is essential for real-world applications.
The database implements various performance optimizations, including HNSW index for approximate nearest neighbor search.
Qdrant can be deployed via Docker, as a managed cloud service, or directly on bare metal.
It also supports payload and metadata storage alongside the vectors, allowing for rich data retrieval."""
# Add the sample text to the dataset
await cognee.add([sample_text], dataset_name)
# Process the added document to extract knowledge
await cognee.cognify([dataset_name])
# Now let's perform some searches
# 1. Search for insights related to "Qdrant"
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="Qdrant")
print("\nInsights about Qdrant:")
for result in insights_results:
print(f"- {result}")
# 2. Search for text chunks related to "vector search"
chunks_results = await cognee.search(
query_type=SearchType.CHUNKS, query_text="vector search", datasets=[dataset_name]
)
print("\nChunks about vector search:")
for result in chunks_results:
print(f"- {result}")
# 3. Get graph completion related to databases
graph_completion_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text="database"
)
print("\nGraph completion for databases:")
for result in graph_completion_results:
print(f"- {result}")
# Clean up (optional)
# await cognee.prune.prune_data()
# await cognee.prune.prune_system(metadata=True)
if __name__ == "__main__":
asyncio.run(main())

54
poetry.lock generated
View file

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
[[package]]
name = "aiobotocore"
@ -2873,7 +2873,7 @@ description = "HTTP/2-based RPC framework"
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"gemini\" or extra == \"qdrant\" or extra == \"deepeval\""
markers = "extra == \"gemini\" or extra == \"deepeval\""
files = [
{file = "grpcio-1.74.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:85bd5cdf4ed7b2d6438871adf6afff9af7096486fcf51818a81b77ef4dd30907"},
{file = "grpcio-1.74.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:68c8ebcca945efff9d86d8d6d7bfb0841cf0071024417e2d7f45c5e46b5b08eb"},
@ -3011,7 +3011,7 @@ description = "Pure-Python HTTP/2 protocol implementation"
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"distributed\" or extra == \"qdrant\""
markers = "extra == \"distributed\""
files = [
{file = "h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0"},
{file = "h2-4.2.0.tar.gz", hash = "sha256:c8a52129695e88b1a0578d8d2cc6842bbd79128ac685463b887ee278126ad01f"},
@ -3082,7 +3082,7 @@ description = "Pure-Python HPACK header encoding"
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"distributed\" or extra == \"qdrant\""
markers = "extra == \"distributed\""
files = [
{file = "hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496"},
{file = "hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca"},
@ -3233,7 +3233,6 @@ files = [
[package.dependencies]
anyio = "*"
certifi = "*"
h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""}
httpcore = "==1.*"
idna = "*"
@ -3333,7 +3332,7 @@ description = "Pure-Python HTTP/2 framing"
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"distributed\" or extra == \"qdrant\""
markers = "extra == \"distributed\""
files = [
{file = "hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5"},
{file = "hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08"},
@ -3911,6 +3910,8 @@ python-versions = "*"
groups = ["main"]
files = [
{file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
{file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
{file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
]
[package.dependencies]
@ -5123,10 +5124,12 @@ files = [
{file = "lxml-6.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:219e0431ea8006e15005767f0351e3f7f9143e793e58519dc97fe9e07fae5563"},
{file = "lxml-6.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bd5913b4972681ffc9718bc2d4c53cde39ef81415e1671ff93e9aa30b46595e7"},
{file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:390240baeb9f415a82eefc2e13285016f9c8b5ad71ec80574ae8fa9605093cd7"},
{file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d6e200909a119626744dd81bae409fc44134389e03fbf1d68ed2a55a2fb10991"},
{file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ca50bd612438258a91b5b3788c6621c1f05c8c478e7951899f492be42defc0da"},
{file = "lxml-6.0.0-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:c24b8efd9c0f62bad0439283c2c795ef916c5a6b75f03c17799775c7ae3c0c9e"},
{file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:afd27d8629ae94c5d863e32ab0e1d5590371d296b87dae0a751fb22bf3685741"},
{file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:54c4855eabd9fc29707d30141be99e5cd1102e7d2258d2892314cf4c110726c3"},
{file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c907516d49f77f6cd8ead1322198bdfd902003c3c330c77a1c5f3cc32a0e4d16"},
{file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36531f81c8214e293097cd2b7873f178997dae33d3667caaae8bdfb9666b76c0"},
{file = "lxml-6.0.0-cp312-cp312-win32.whl", hash = "sha256:690b20e3388a7ec98e899fd54c924e50ba6693874aa65ef9cb53de7f7de9d64a"},
{file = "lxml-6.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:310b719b695b3dd442cdfbbe64936b2f2e231bb91d998e99e6f0daf991a3eba3"},
@ -5137,10 +5140,12 @@ files = [
{file = "lxml-6.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d18a25b19ca7307045581b18b3ec9ead2b1db5ccd8719c291f0cd0a5cec6cb81"},
{file = "lxml-6.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d4f0c66df4386b75d2ab1e20a489f30dc7fd9a06a896d64980541506086be1f1"},
{file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f4b481b6cc3a897adb4279216695150bbe7a44c03daba3c894f49d2037e0a24"},
{file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8a78d6c9168f5bcb20971bf3329c2b83078611fbe1f807baadc64afc70523b3a"},
{file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ae06fbab4f1bb7db4f7c8ca9897dc8db4447d1a2b9bee78474ad403437bcc29"},
{file = "lxml-6.0.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:1fa377b827ca2023244a06554c6e7dc6828a10aaf74ca41965c5d8a4925aebb4"},
{file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1676b56d48048a62ef77a250428d1f31f610763636e0784ba67a9740823988ca"},
{file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:0e32698462aacc5c1cf6bdfebc9c781821b7e74c79f13e5ffc8bfe27c42b1abf"},
{file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4d6036c3a296707357efb375cfc24bb64cd955b9ec731abf11ebb1e40063949f"},
{file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7488a43033c958637b1a08cddc9188eb06d3ad36582cebc7d4815980b47e27ef"},
{file = "lxml-6.0.0-cp313-cp313-win32.whl", hash = "sha256:5fcd7d3b1d8ecb91445bd71b9c88bdbeae528fefee4f379895becfc72298d181"},
{file = "lxml-6.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:2f34687222b78fff795feeb799a7d44eca2477c3d9d3a46ce17d51a4f383e32e"},
@ -7441,7 +7446,7 @@ description = "Wraps the portalocker recipe for easy usage"
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"qdrant\" or extra == \"deepeval\""
markers = "extra == \"deepeval\""
files = [
{file = "portalocker-3.2.0-py3-none-any.whl", hash = "sha256:3cdc5f565312224bc570c49337bd21428bba0ef363bbcf58b9ef4a9f11779968"},
{file = "portalocker-3.2.0.tar.gz", hash = "sha256:1f3002956a54a8c3730586c5c77bf18fae4149e07eaf1c29fc3faf4d5a3f89ac"},
@ -7721,6 +7726,7 @@ files = [
{file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"},
{file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"},
{file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"},
{file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"},
{file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"},
{file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"},
{file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"},
@ -7782,6 +7788,7 @@ files = [
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
{file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
{file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
@ -9075,36 +9082,6 @@ files = [
{file = "qasync-0.27.1.tar.gz", hash = "sha256:8dc768fd1ee5de1044c7c305eccf2d39d24d87803ea71189d4024fb475f4985f"},
]
[[package]]
name = "qdrant-client"
version = "1.15.0"
description = "Client library for the Qdrant vector search engine"
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "extra == \"qdrant\""
files = [
{file = "qdrant_client-1.15.0-py3-none-any.whl", hash = "sha256:f18bb311543de7e256ffa831be0d8a9d0729aaf549db7bcf95a5d356b48143f2"},
{file = "qdrant_client-1.15.0.tar.gz", hash = "sha256:475433b0acec51b66a132e91b631abe922accc64744bbb3180a04fe1fe889843"},
]
[package.dependencies]
grpcio = ">=1.41.0"
httpx = {version = ">=0.20.0", extras = ["http2"]}
numpy = [
{version = ">=1.21", markers = "python_version >= \"3.10\" and python_version < \"3.12\""},
{version = ">=1.26", markers = "python_version == \"3.12\""},
{version = ">=2.1.0", markers = "python_version >= \"3.13\""},
]
portalocker = ">=2.7.0,<4.0"
protobuf = ">=3.20.0"
pydantic = ">=1.10.8,<2.0.dev0 || >2.2.0"
urllib3 = ">=1.26.14,<3"
[package.extras]
fastembed = ["fastembed (>=0.7,<0.8)"]
fastembed-gpu = ["fastembed-gpu (>=0.7,<0.8)"]
[[package]]
name = "rapidfuzz"
version = "3.13.0"
@ -12255,9 +12232,8 @@ ollama = ["transformers"]
postgres = ["asyncpg", "pgvector", "psycopg2"]
postgres-binary = ["asyncpg", "pgvector", "psycopg2-binary"]
posthog = ["posthog"]
qdrant = ["qdrant-client"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<=3.13"
content-hash = "d4112e2e71e4a50ecb60baa6a4ce1432c31ba802a308e250d7d1499e9004dba7"
content-hash = "7682898d3c726a0b1e3d08560ceb26180e1841e89a4a417377ee876b55878a9b"

View file

@ -73,7 +73,6 @@ distributed = [
"modal>=1.0.5,<2.0.0",
]
qdrant = ["qdrant-client>=1.14.2,<2"]
neo4j = ["neo4j>=5.28.0,<6"]
neptune = ["langchain_aws>=0.2.22"]
postgres = [

30
uv.lock generated
View file

@ -1031,9 +1031,6 @@ postgres-binary = [
posthog = [
{ name = "posthog" },
]
qdrant = [
{ name = "qdrant-client" },
]
[package.metadata]
requires-dist = [
@ -1110,7 +1107,6 @@ requires-dist = [
{ name = "python-dotenv", specifier = ">=1.0.1,<2.0.0" },
{ name = "python-multipart", specifier = ">=0.0.20,<1.0.0" },
{ name = "qasync", marker = "extra == 'gui'", specifier = ">=0.27.1,<0.28" },
{ name = "qdrant-client", marker = "extra == 'qdrant'", specifier = ">=1.14.2,<2" },
{ name = "rdflib", specifier = ">=7.1.4,<7.2.0" },
{ name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.2,<1.0.0" },
{ name = "s3fs", extras = ["boto3"], specifier = "==2025.3.2" },
@ -1131,7 +1127,7 @@ requires-dist = [
{ name = "uvicorn", marker = "extra == 'api'", specifier = ">=0.34.0,<1.0.0" },
{ name = "websockets", marker = "extra == 'api'", specifier = ">=15.0.1,<16.0.0" },
]
provides-extras = ["api", "distributed", "qdrant", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "gemini", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "falkordb", "groq", "chromadb", "docs", "codegraph", "evals", "gui", "graphiti", "aws", "dev", "debug"]
provides-extras = ["api", "distributed", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "gemini", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "falkordb", "groq", "chromadb", "docs", "codegraph", "evals", "gui", "graphiti", "aws", "dev", "debug"]
[[package]]
name = "colorama"
@ -2590,11 +2586,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
]
[package.optional-dependencies]
http2 = [
{ name = "h2" },
]
[[package]]
name = "httpx-sse"
version = "0.4.1"
@ -6698,25 +6689,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/51/06/bc628aa2981bcfd452a08ee435b812fd3eee4ada8acb8a76c4a09d1a5a77/qasync-0.27.1-py3-none-any.whl", hash = "sha256:5d57335723bc7d9b328dadd8cb2ed7978640e4bf2da184889ce50ee3ad2602c7", size = 14866, upload-time = "2023-11-19T14:19:54.345Z" },
]
[[package]]
name = "qdrant-client"
version = "1.15.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "grpcio" },
{ name = "httpx", extra = ["http2"] },
{ name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" },
{ name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" },
{ name = "portalocker" },
{ name = "protobuf" },
{ name = "pydantic" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/79/8b/76c7d325e11d97cb8eb5e261c3759e9ed6664735afbf32fdded5b580690c/qdrant_client-1.15.1.tar.gz", hash = "sha256:631f1f3caebfad0fd0c1fba98f41be81d9962b7bf3ca653bed3b727c0e0cbe0e", size = 295297, upload-time = "2025-07-31T19:35:19.627Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ef/33/d8df6a2b214ffbe4138db9a1efe3248f67dc3c671f82308bea1582ecbbb7/qdrant_client-1.15.1-py3-none-any.whl", hash = "sha256:2b975099b378382f6ca1cfb43f0d59e541be6e16a5892f282a4b8de7eff5cb63", size = 337331, upload-time = "2025-07-31T19:35:17.539Z" },
]
[[package]]
name = "rapidfuzz"
version = "3.13.0"