Merge pull request #243 from jinhonglin-ryan/feature/milvus-vectordb
Feature: Integrate Milvus as a Vector Database Provider
This commit is contained in:
commit
747a6b9d47
9 changed files with 575 additions and 17 deletions
|
|
@ -14,7 +14,7 @@ GRAPH_DATABASE_URL=
|
|||
GRAPH_DATABASE_USERNAME=
|
||||
GRAPH_DATABASE_PASSWORD=
|
||||
|
||||
# "qdrant", "pgvector", "weaviate" or "lancedb"
|
||||
# "qdrant", "pgvector", "weaviate", "milvus" or "lancedb"
|
||||
VECTOR_DB_PROVIDER="lancedb"
|
||||
# Not needed if using "lancedb" or "pgvector"
|
||||
VECTOR_DB_URL=
|
||||
|
|
|
|||
64
.github/workflows/test_milvus.yml
vendored
Normal file
64
.github/workflows/test_milvus.yml
vendored
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
name: test | milvus
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
types: [labeled, synchronize]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
RUNTIME__LOG_LEVEL: ERROR
|
||||
ENV: 'dev'
|
||||
|
||||
jobs:
|
||||
get_docs_changes:
|
||||
name: docs changes
|
||||
uses: ./.github/workflows/get_docs_changes.yml
|
||||
|
||||
run_milvus:
|
||||
name: test
|
||||
needs: get_docs_changes
|
||||
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@master
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Install Poetry
|
||||
# https://github.com/snok/install-poetry#running-on-windows
|
||||
uses: snok/install-poetry@v1.3.2
|
||||
with:
|
||||
virtualenvs-create: true
|
||||
virtualenvs-in-project: true
|
||||
installer-parallel: true
|
||||
|
||||
- name: Install dependencies
|
||||
run: poetry install -E milvus --no-interaction
|
||||
|
||||
- name: Run default basic pipeline
|
||||
env:
|
||||
ENV: 'dev'
|
||||
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: poetry run python ./cognee/tests/test_milvus.py
|
||||
|
||||
- name: Clean up disk space
|
||||
run: |
|
||||
sudo rm -rf ~/.cache
|
||||
sudo rm -rf /tmp/*
|
||||
df -h
|
||||
12
README.md
12
README.md
|
|
@ -53,6 +53,12 @@ pip install 'cognee[qdrant]'
|
|||
pip install 'cognee[neo4j]'
|
||||
```
|
||||
|
||||
### With pip with Milvus support
|
||||
|
||||
```bash
|
||||
pip install 'cognee[milvus]'
|
||||
```
|
||||
|
||||
### With poetry
|
||||
|
||||
```bash
|
||||
|
|
@ -83,6 +89,12 @@ poetry add cognee -E qdrant
|
|||
poetry add cognee -E neo4j
|
||||
```
|
||||
|
||||
### With poetry with Milvus support
|
||||
|
||||
```bash
|
||||
poetry add cognee -E milvus
|
||||
```
|
||||
|
||||
|
||||
## 💻 Basic Usage
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,13 @@
|
|||
from typing import Dict
|
||||
|
||||
|
||||
class VectorConfig(Dict):
|
||||
vector_db_url: str
|
||||
vector_db_port: str
|
||||
vector_db_key: str
|
||||
vector_db_provider: str
|
||||
|
||||
|
||||
def create_vector_engine(config: VectorConfig, embedding_engine):
|
||||
if config["vector_db_provider"] == "weaviate":
|
||||
from .weaviate_db import WeaviateAdapter
|
||||
|
|
@ -16,24 +18,37 @@ def create_vector_engine(config: VectorConfig, embedding_engine):
|
|||
return WeaviateAdapter(
|
||||
config["vector_db_url"],
|
||||
config["vector_db_key"],
|
||||
embedding_engine = embedding_engine
|
||||
embedding_engine=embedding_engine
|
||||
)
|
||||
|
||||
elif config["vector_db_provider"] == "qdrant":
|
||||
if not (config["vector_db_url"] and config["vector_db_key"]):
|
||||
raise EnvironmentError("Missing requred Qdrant credentials!")
|
||||
|
||||
|
||||
from .qdrant.QDrantAdapter import QDrantAdapter
|
||||
|
||||
return QDrantAdapter(
|
||||
url = config["vector_db_url"],
|
||||
api_key = config["vector_db_key"],
|
||||
embedding_engine = embedding_engine
|
||||
url=config["vector_db_url"],
|
||||
api_key=config["vector_db_key"],
|
||||
embedding_engine=embedding_engine
|
||||
)
|
||||
|
||||
elif config['vector_db_provider'] == 'milvus':
|
||||
from .milvus.MilvusAdapter import MilvusAdapter
|
||||
|
||||
if not config["vector_db_url"]:
|
||||
raise EnvironmentError("Missing required Milvus credentials!")
|
||||
|
||||
return MilvusAdapter(
|
||||
url=config["vector_db_url"],
|
||||
api_key=config['vector_db_key'],
|
||||
embedding_engine=embedding_engine
|
||||
)
|
||||
|
||||
|
||||
elif config["vector_db_provider"] == "pgvector":
|
||||
from cognee.infrastructure.databases.relational import get_relational_config
|
||||
|
||||
|
||||
# Get configuration for postgres database
|
||||
relational_config = get_relational_config()
|
||||
db_username = relational_config.db_username
|
||||
|
|
@ -52,8 +67,8 @@ def create_vector_engine(config: VectorConfig, embedding_engine):
|
|||
from .pgvector.PGVectorAdapter import PGVectorAdapter
|
||||
|
||||
return PGVectorAdapter(
|
||||
connection_string,
|
||||
config["vector_db_key"],
|
||||
connection_string,
|
||||
config["vector_db_key"],
|
||||
embedding_engine,
|
||||
)
|
||||
|
||||
|
|
@ -64,16 +79,16 @@ def create_vector_engine(config: VectorConfig, embedding_engine):
|
|||
from ..hybrid.falkordb.FalkorDBAdapter import FalkorDBAdapter
|
||||
|
||||
return FalkorDBAdapter(
|
||||
database_url = config["vector_db_url"],
|
||||
database_port = config["vector_db_port"],
|
||||
embedding_engine = embedding_engine,
|
||||
database_url=config["vector_db_url"],
|
||||
database_port=config["vector_db_port"],
|
||||
embedding_engine=embedding_engine,
|
||||
)
|
||||
|
||||
else:
|
||||
from .lancedb.LanceDBAdapter import LanceDBAdapter
|
||||
|
||||
return LanceDBAdapter(
|
||||
url = config["vector_db_url"],
|
||||
api_key = config["vector_db_key"],
|
||||
embedding_engine = embedding_engine,
|
||||
url=config["vector_db_url"],
|
||||
api_key=config["vector_db_key"],
|
||||
embedding_engine=embedding_engine,
|
||||
)
|
||||
|
|
|
|||
252
cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py
Normal file
252
cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py
Normal file
|
|
@ -0,0 +1,252 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
from uuid import UUID
|
||||
from cognee.infrastructure.engine import DataPoint
|
||||
from ..vector_db_interface import VectorDBInterface
|
||||
from ..models.ScoredResult import ScoredResult
|
||||
from ..embeddings.EmbeddingEngine import EmbeddingEngine
|
||||
|
||||
logger = logging.getLogger("MilvusAdapter")
|
||||
|
||||
|
||||
class IndexSchema(DataPoint):
|
||||
text: str
|
||||
|
||||
_metadata: dict = {
|
||||
"index_fields": ["text"]
|
||||
}
|
||||
|
||||
|
||||
class MilvusAdapter(VectorDBInterface):
|
||||
name = "Milvus"
|
||||
url: str
|
||||
api_key: Optional[str]
|
||||
embedding_engine: EmbeddingEngine = None
|
||||
|
||||
def __init__(self, url: str, api_key: Optional[str], embedding_engine: EmbeddingEngine):
|
||||
self.url = url
|
||||
self.api_key = api_key
|
||||
|
||||
self.embedding_engine = embedding_engine
|
||||
|
||||
def get_milvus_client(self) -> "MilvusClient":
|
||||
from pymilvus import MilvusClient
|
||||
if self.api_key:
|
||||
client = MilvusClient(uri=self.url, token=self.api_key)
|
||||
else:
|
||||
client = MilvusClient(uri=self.url)
|
||||
return client
|
||||
|
||||
async def embed_data(self, data: List[str]) -> list[list[float]]:
|
||||
return await self.embedding_engine.embed_text(data)
|
||||
|
||||
async def has_collection(self, collection_name: str) -> bool:
|
||||
future = asyncio.Future()
|
||||
client = self.get_milvus_client()
|
||||
future.set_result(client.has_collection(collection_name=collection_name))
|
||||
|
||||
return await future
|
||||
|
||||
async def create_collection(
|
||||
self,
|
||||
collection_name: str,
|
||||
payload_schema=None,
|
||||
):
|
||||
from pymilvus import DataType, MilvusException
|
||||
client = self.get_milvus_client()
|
||||
if client.has_collection(collection_name=collection_name):
|
||||
logger.info(f"Collection '{collection_name}' already exists.")
|
||||
return True
|
||||
|
||||
try:
|
||||
dimension = self.embedding_engine.get_vector_size()
|
||||
assert dimension > 0, "Embedding dimension must be greater than 0."
|
||||
|
||||
schema = client.create_schema(
|
||||
auto_id=False,
|
||||
enable_dynamic_field=False,
|
||||
)
|
||||
|
||||
schema.add_field(
|
||||
field_name="id",
|
||||
datatype=DataType.VARCHAR,
|
||||
is_primary=True,
|
||||
max_length=36
|
||||
)
|
||||
|
||||
schema.add_field(
|
||||
field_name="vector",
|
||||
datatype=DataType.FLOAT_VECTOR,
|
||||
dim=dimension
|
||||
)
|
||||
|
||||
schema.add_field(
|
||||
field_name="text",
|
||||
datatype=DataType.VARCHAR,
|
||||
max_length=60535
|
||||
)
|
||||
|
||||
index_params = client.prepare_index_params()
|
||||
index_params.add_index(
|
||||
field_name="vector",
|
||||
metric_type="COSINE"
|
||||
)
|
||||
|
||||
client.create_collection(
|
||||
collection_name=collection_name,
|
||||
schema=schema,
|
||||
index_params=index_params
|
||||
)
|
||||
|
||||
client.load_collection(collection_name)
|
||||
|
||||
logger.info(f"Collection '{collection_name}' created successfully.")
|
||||
return True
|
||||
except MilvusException as e:
|
||||
logger.error(f"Error creating collection '{collection_name}': {str(e)}")
|
||||
raise e
|
||||
|
||||
async def create_data_points(
|
||||
self,
|
||||
collection_name: str,
|
||||
data_points: List[DataPoint]
|
||||
):
|
||||
from pymilvus import MilvusException
|
||||
client = self.get_milvus_client()
|
||||
data_vectors = await self.embed_data(
|
||||
[data_point.get_embeddable_data() for data_point in data_points]
|
||||
)
|
||||
|
||||
insert_data = [
|
||||
{
|
||||
"id": str(data_point.id),
|
||||
"vector": data_vectors[index],
|
||||
"text": data_point.text,
|
||||
}
|
||||
for index, data_point in enumerate(data_points)
|
||||
]
|
||||
|
||||
try:
|
||||
result = client.insert(
|
||||
collection_name=collection_name,
|
||||
data=insert_data
|
||||
)
|
||||
logger.info(
|
||||
f"Inserted {result.get('insert_count', 0)} data points into collection '{collection_name}'."
|
||||
)
|
||||
return result
|
||||
except MilvusException as e:
|
||||
logger.error(f"Error inserting data points into collection '{collection_name}': {str(e)}")
|
||||
raise e
|
||||
|
||||
async def create_vector_index(self, index_name: str, index_property_name: str):
|
||||
await self.create_collection(f"{index_name}_{index_property_name}")
|
||||
|
||||
async def index_data_points(self, index_name: str, index_property_name: str, data_points: List[DataPoint]):
|
||||
formatted_data_points = [
|
||||
IndexSchema(
|
||||
id=data_point.id,
|
||||
text=getattr(data_point, data_point._metadata["index_fields"][0]),
|
||||
)
|
||||
for data_point in data_points
|
||||
]
|
||||
collection_name = f"{index_name}_{index_property_name}"
|
||||
await self.create_data_points(collection_name, formatted_data_points)
|
||||
|
||||
async def retrieve(self, collection_name: str, data_point_ids: list[str]):
|
||||
from pymilvus import MilvusException
|
||||
client = self.get_milvus_client()
|
||||
try:
|
||||
filter_expression = f"""id in [{", ".join(f'"{id}"' for id in data_point_ids)}]"""
|
||||
|
||||
results = client.query(
|
||||
collection_name=collection_name,
|
||||
expr=filter_expression,
|
||||
output_fields=["*"],
|
||||
)
|
||||
return results
|
||||
except MilvusException as e:
|
||||
logger.error(f"Error retrieving data points from collection '{collection_name}': {str(e)}")
|
||||
raise e
|
||||
|
||||
async def search(
|
||||
self,
|
||||
collection_name: str,
|
||||
query_text: Optional[str] = None,
|
||||
query_vector: Optional[List[float]] = None,
|
||||
limit: int = 5,
|
||||
with_vector: bool = False,
|
||||
):
|
||||
from pymilvus import MilvusException
|
||||
client = self.get_milvus_client()
|
||||
if query_text is None and query_vector is None:
|
||||
raise ValueError("One of query_text or query_vector must be provided!")
|
||||
|
||||
try:
|
||||
query_vector = query_vector or (await self.embed_data([query_text]))[0]
|
||||
|
||||
output_fields = ["id", "text"]
|
||||
if with_vector:
|
||||
output_fields.append("vector")
|
||||
|
||||
results = client.search(
|
||||
collection_name=collection_name,
|
||||
data=[query_vector],
|
||||
anns_field="vector",
|
||||
limit=limit,
|
||||
output_fields=output_fields,
|
||||
search_params={
|
||||
"metric_type": "COSINE",
|
||||
},
|
||||
)
|
||||
|
||||
return [
|
||||
ScoredResult(
|
||||
id=UUID(result["id"]),
|
||||
score=result["distance"],
|
||||
payload=result.get("entity", {}),
|
||||
)
|
||||
for result in results[0]
|
||||
]
|
||||
except MilvusException as e:
|
||||
logger.error(f"Error during search in collection '{collection_name}': {str(e)}")
|
||||
raise e
|
||||
|
||||
async def batch_search(self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False):
|
||||
query_vectors = await self.embed_data(query_texts)
|
||||
|
||||
return await asyncio.gather(
|
||||
*[self.search(collection_name=collection_name,
|
||||
query_vector=query_vector,
|
||||
limit=limit,
|
||||
with_vector=with_vectors,
|
||||
) for query_vector in query_vectors]
|
||||
)
|
||||
|
||||
async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
|
||||
from pymilvus import MilvusException
|
||||
client = self.get_milvus_client()
|
||||
try:
|
||||
filter_expression = f"""id in [{", ".join(f'"{id}"' for id in data_point_ids)}]"""
|
||||
|
||||
delete_result = client.delete(
|
||||
collection_name=collection_name,
|
||||
filter=filter_expression
|
||||
)
|
||||
|
||||
logger.info(f"Deleted data points with IDs {data_point_ids} from collection '{collection_name}'.")
|
||||
return delete_result
|
||||
except MilvusException as e:
|
||||
logger.error(f"Error deleting data points from collection '{collection_name}': {str(e)}")
|
||||
raise e
|
||||
|
||||
async def prune(self):
|
||||
client = self.get_milvus_client()
|
||||
if client:
|
||||
collections = client.list_collections()
|
||||
for collection_name in collections:
|
||||
client.drop_collection(collection_name=collection_name)
|
||||
client.close()
|
||||
|
|
@ -0,0 +1 @@
|
|||
from .MilvusAdapter import MilvusAdapter
|
||||
84
cognee/tests/test_milvus.py
Normal file
84
cognee/tests/test_milvus.py
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
import os
|
||||
import logging
|
||||
import pathlib
|
||||
import cognee
|
||||
from cognee.api.v1.search import SearchType
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
|
||||
async def main():
|
||||
cognee.config.set_vector_db_provider("milvus")
|
||||
data_directory_path = str(
|
||||
pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_milvus")).resolve())
|
||||
cognee.config.data_root_directory(data_directory_path)
|
||||
cognee_directory_path = str(
|
||||
pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_milvus")).resolve())
|
||||
cognee.config.system_root_directory(cognee_directory_path)
|
||||
|
||||
cognee.config.set_vector_db_config(
|
||||
{
|
||||
"vector_db_url": os.path.join(cognee_directory_path, "databases/milvus.db"),
|
||||
"vector_db_key": "",
|
||||
"vector_db_provider": "milvus"
|
||||
}
|
||||
)
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
dataset_name = "cs_explanations"
|
||||
|
||||
explanation_file_path = os.path.join(pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt")
|
||||
await cognee.add([explanation_file_path], dataset_name)
|
||||
|
||||
text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
|
||||
At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
|
||||
Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
|
||||
The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
|
||||
Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
|
||||
In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
|
||||
"""
|
||||
|
||||
await cognee.add([text], dataset_name)
|
||||
|
||||
await cognee.cognify([dataset_name])
|
||||
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
vector_engine = get_vector_engine()
|
||||
random_node = (await vector_engine.search("entity_name", "Quantum computer"))[0]
|
||||
random_node_name = random_node.payload["text"]
|
||||
|
||||
search_results = await cognee.search(SearchType.INSIGHTS, query_text=random_node_name)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted INSIGHTS are:\n")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
search_results = await cognee.search(SearchType.CHUNKS, query_text=random_node_name)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted CHUNKS are:\n")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
search_results = await cognee.search(SearchType.SUMMARIES, query_text=random_node_name)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\nExtracted SUMMARIES are:\n")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
history = await cognee.get_search_history()
|
||||
assert len(history) == 6, "Search history is not correct."
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
assert not os.path.isdir(data_directory_path), "Local data files are not deleted"
|
||||
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
milvus_client = get_vector_engine().get_milvus_client()
|
||||
collections = milvus_client.list_collections()
|
||||
assert len(collections) == 0, "Milvus vector database is not empty"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
asyncio.run(main())
|
||||
131
poetry.lock
generated
131
poetry.lock
generated
|
|
@ -3602,6 +3602,22 @@ files = [
|
|||
{file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "milvus-lite"
|
||||
version = "2.4.10"
|
||||
description = "A lightweight version of Milvus wrapped with Python."
|
||||
optional = true
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "milvus_lite-2.4.10-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:fc4246d3ed7d1910847afce0c9ba18212e93a6e9b8406048436940578dfad5cb"},
|
||||
{file = "milvus_lite-2.4.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:74a8e07c5e3b057df17fbb46913388e84df1dc403a200f4e423799a58184c800"},
|
||||
{file = "milvus_lite-2.4.10-py3-none-manylinux2014_aarch64.whl", hash = "sha256:240c7386b747bad696ecb5bd1f58d491e86b9d4b92dccee3315ed7256256eddc"},
|
||||
{file = "milvus_lite-2.4.10-py3-none-manylinux2014_x86_64.whl", hash = "sha256:211d2e334a043f9282bdd9755f76b9b2d93b23bffa7af240919ffce6a8dfe325"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
tqdm = "*"
|
||||
|
||||
[[package]]
|
||||
name = "mistune"
|
||||
version = "3.0.2"
|
||||
|
|
@ -5360,6 +5376,31 @@ pyyaml = "*"
|
|||
[package.extras]
|
||||
extra = ["pygments (>=2.12)"]
|
||||
|
||||
[[package]]
|
||||
name = "pymilvus"
|
||||
version = "2.5.0"
|
||||
description = "Python Sdk for Milvus"
|
||||
optional = true
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "pymilvus-2.5.0-py3-none-any.whl", hash = "sha256:a0e8653d8fe78019abfda79b3404ef7423f312501e8cbd7dc728051ce8732652"},
|
||||
{file = "pymilvus-2.5.0.tar.gz", hash = "sha256:4da14a3bd957a4921166f9355fd1f1ac5c5e4e80b46f12f64d9c9a6dcb8cb395"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
grpcio = ">=1.49.1,<=1.67.1"
|
||||
milvus-lite = {version = ">=2.4.0", markers = "sys_platform != \"win32\""}
|
||||
pandas = ">=1.2.4"
|
||||
protobuf = ">=3.20.0"
|
||||
python-dotenv = ">=1.0.1,<2.0.0"
|
||||
setuptools = ">69"
|
||||
ujson = ">=2.0.0"
|
||||
|
||||
[package.extras]
|
||||
bulk-writer = ["azure-storage-blob", "minio (>=7.0.0)", "pyarrow (>=12.0.0)", "requests"]
|
||||
dev = ["black", "grpcio (==1.62.2)", "grpcio-testing (==1.62.2)", "grpcio-tools (==1.62.2)", "pytest (>=5.3.4)", "pytest-cov (>=2.8.1)", "pytest-timeout (>=1.3.4)", "ruff (>0.4.0)"]
|
||||
model = ["milvus-model (>=0.1.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pyparsing"
|
||||
version = "3.2.0"
|
||||
|
|
@ -7075,6 +7116,93 @@ files = [
|
|||
{file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ujson"
|
||||
version = "5.10.0"
|
||||
description = "Ultra fast JSON encoder and decoder for Python"
|
||||
optional = true
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "ujson-5.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2601aa9ecdbee1118a1c2065323bda35e2c5a2cf0797ef4522d485f9d3ef65bd"},
|
||||
{file = "ujson-5.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:348898dd702fc1c4f1051bc3aacbf894caa0927fe2c53e68679c073375f732cf"},
|
||||
{file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22cffecf73391e8abd65ef5f4e4dd523162a3399d5e84faa6aebbf9583df86d6"},
|
||||
{file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26b0e2d2366543c1bb4fbd457446f00b0187a2bddf93148ac2da07a53fe51569"},
|
||||
{file = "ujson-5.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:caf270c6dba1be7a41125cd1e4fc7ba384bf564650beef0df2dd21a00b7f5770"},
|
||||
{file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a245d59f2ffe750446292b0094244df163c3dc96b3ce152a2c837a44e7cda9d1"},
|
||||
{file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:94a87f6e151c5f483d7d54ceef83b45d3a9cca7a9cb453dbdbb3f5a6f64033f5"},
|
||||
{file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:29b443c4c0a113bcbb792c88bea67b675c7ca3ca80c3474784e08bba01c18d51"},
|
||||
{file = "ujson-5.10.0-cp310-cp310-win32.whl", hash = "sha256:c18610b9ccd2874950faf474692deee4223a994251bc0a083c114671b64e6518"},
|
||||
{file = "ujson-5.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:924f7318c31874d6bb44d9ee1900167ca32aa9b69389b98ecbde34c1698a250f"},
|
||||
{file = "ujson-5.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a5b366812c90e69d0f379a53648be10a5db38f9d4ad212b60af00bd4048d0f00"},
|
||||
{file = "ujson-5.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:502bf475781e8167f0f9d0e41cd32879d120a524b22358e7f205294224c71126"},
|
||||
{file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b91b5d0d9d283e085e821651184a647699430705b15bf274c7896f23fe9c9d8"},
|
||||
{file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:129e39af3a6d85b9c26d5577169c21d53821d8cf68e079060602e861c6e5da1b"},
|
||||
{file = "ujson-5.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f77b74475c462cb8b88680471193064d3e715c7c6074b1c8c412cb526466efe9"},
|
||||
{file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7ec0ca8c415e81aa4123501fee7f761abf4b7f386aad348501a26940beb1860f"},
|
||||
{file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ab13a2a9e0b2865a6c6db9271f4b46af1c7476bfd51af1f64585e919b7c07fd4"},
|
||||
{file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:57aaf98b92d72fc70886b5a0e1a1ca52c2320377360341715dd3933a18e827b1"},
|
||||
{file = "ujson-5.10.0-cp311-cp311-win32.whl", hash = "sha256:2987713a490ceb27edff77fb184ed09acdc565db700ee852823c3dc3cffe455f"},
|
||||
{file = "ujson-5.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:f00ea7e00447918ee0eff2422c4add4c5752b1b60e88fcb3c067d4a21049a720"},
|
||||
{file = "ujson-5.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:98ba15d8cbc481ce55695beee9f063189dce91a4b08bc1d03e7f0152cd4bbdd5"},
|
||||
{file = "ujson-5.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a9d2edbf1556e4f56e50fab7d8ff993dbad7f54bac68eacdd27a8f55f433578e"},
|
||||
{file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6627029ae4f52d0e1a2451768c2c37c0c814ffc04f796eb36244cf16b8e57043"},
|
||||
{file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ccb77b3e40b151e20519c6ae6d89bfe3f4c14e8e210d910287f778368bb3d1"},
|
||||
{file = "ujson-5.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3caf9cd64abfeb11a3b661329085c5e167abbe15256b3b68cb5d914ba7396f3"},
|
||||
{file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6e32abdce572e3a8c3d02c886c704a38a1b015a1fb858004e03d20ca7cecbb21"},
|
||||
{file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a65b6af4d903103ee7b6f4f5b85f1bfd0c90ba4eeac6421aae436c9988aa64a2"},
|
||||
{file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:604a046d966457b6cdcacc5aa2ec5314f0e8c42bae52842c1e6fa02ea4bda42e"},
|
||||
{file = "ujson-5.10.0-cp312-cp312-win32.whl", hash = "sha256:6dea1c8b4fc921bf78a8ff00bbd2bfe166345f5536c510671bccececb187c80e"},
|
||||
{file = "ujson-5.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:38665e7d8290188b1e0d57d584eb8110951a9591363316dd41cf8686ab1d0abc"},
|
||||
{file = "ujson-5.10.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:618efd84dc1acbd6bff8eaa736bb6c074bfa8b8a98f55b61c38d4ca2c1f7f287"},
|
||||
{file = "ujson-5.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38d5d36b4aedfe81dfe251f76c0467399d575d1395a1755de391e58985ab1c2e"},
|
||||
{file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67079b1f9fb29ed9a2914acf4ef6c02844b3153913eb735d4bf287ee1db6e557"},
|
||||
{file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7d0e0ceeb8fe2468c70ec0c37b439dd554e2aa539a8a56365fd761edb418988"},
|
||||
{file = "ujson-5.10.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:59e02cd37bc7c44d587a0ba45347cc815fb7a5fe48de16bf05caa5f7d0d2e816"},
|
||||
{file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a890b706b64e0065f02577bf6d8ca3b66c11a5e81fb75d757233a38c07a1f20"},
|
||||
{file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:621e34b4632c740ecb491efc7f1fcb4f74b48ddb55e65221995e74e2d00bbff0"},
|
||||
{file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b9500e61fce0cfc86168b248104e954fead61f9be213087153d272e817ec7b4f"},
|
||||
{file = "ujson-5.10.0-cp313-cp313-win32.whl", hash = "sha256:4c4fc16f11ac1612f05b6f5781b384716719547e142cfd67b65d035bd85af165"},
|
||||
{file = "ujson-5.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:4573fd1695932d4f619928fd09d5d03d917274381649ade4328091ceca175539"},
|
||||
{file = "ujson-5.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a984a3131da7f07563057db1c3020b1350a3e27a8ec46ccbfbf21e5928a43050"},
|
||||
{file = "ujson-5.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73814cd1b9db6fc3270e9d8fe3b19f9f89e78ee9d71e8bd6c9a626aeaeaf16bd"},
|
||||
{file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61e1591ed9376e5eddda202ec229eddc56c612b61ac6ad07f96b91460bb6c2fb"},
|
||||
{file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2c75269f8205b2690db4572a4a36fe47cd1338e4368bc73a7a0e48789e2e35a"},
|
||||
{file = "ujson-5.10.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7223f41e5bf1f919cd8d073e35b229295aa8e0f7b5de07ed1c8fddac63a6bc5d"},
|
||||
{file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d4dc2fd6b3067c0782e7002ac3b38cf48608ee6366ff176bbd02cf969c9c20fe"},
|
||||
{file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:232cc85f8ee3c454c115455195a205074a56ff42608fd6b942aa4c378ac14dd7"},
|
||||
{file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:cc6139531f13148055d691e442e4bc6601f6dba1e6d521b1585d4788ab0bfad4"},
|
||||
{file = "ujson-5.10.0-cp38-cp38-win32.whl", hash = "sha256:e7ce306a42b6b93ca47ac4a3b96683ca554f6d35dd8adc5acfcd55096c8dfcb8"},
|
||||
{file = "ujson-5.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:e82d4bb2138ab05e18f089a83b6564fee28048771eb63cdecf4b9b549de8a2cc"},
|
||||
{file = "ujson-5.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dfef2814c6b3291c3c5f10065f745a1307d86019dbd7ea50e83504950136ed5b"},
|
||||
{file = "ujson-5.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4734ee0745d5928d0ba3a213647f1c4a74a2a28edc6d27b2d6d5bd9fa4319e27"},
|
||||
{file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d47ebb01bd865fdea43da56254a3930a413f0c5590372a1241514abae8aa7c76"},
|
||||
{file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dee5e97c2496874acbf1d3e37b521dd1f307349ed955e62d1d2f05382bc36dd5"},
|
||||
{file = "ujson-5.10.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7490655a2272a2d0b072ef16b0b58ee462f4973a8f6bbe64917ce5e0a256f9c0"},
|
||||
{file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba17799fcddaddf5c1f75a4ba3fd6441f6a4f1e9173f8a786b42450851bd74f1"},
|
||||
{file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:2aff2985cef314f21d0fecc56027505804bc78802c0121343874741650a4d3d1"},
|
||||
{file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ad88ac75c432674d05b61184178635d44901eb749786c8eb08c102330e6e8996"},
|
||||
{file = "ujson-5.10.0-cp39-cp39-win32.whl", hash = "sha256:2544912a71da4ff8c4f7ab5606f947d7299971bdd25a45e008e467ca638d13c9"},
|
||||
{file = "ujson-5.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:3ff201d62b1b177a46f113bb43ad300b424b7847f9c5d38b1b4ad8f75d4a282a"},
|
||||
{file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5b6fee72fa77dc172a28f21693f64d93166534c263adb3f96c413ccc85ef6e64"},
|
||||
{file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:61d0af13a9af01d9f26d2331ce49bb5ac1fb9c814964018ac8df605b5422dcb3"},
|
||||
{file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecb24f0bdd899d368b715c9e6664166cf694d1e57be73f17759573a6986dd95a"},
|
||||
{file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbd8fd427f57a03cff3ad6574b5e299131585d9727c8c366da4624a9069ed746"},
|
||||
{file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:beeaf1c48e32f07d8820c705ff8e645f8afa690cca1544adba4ebfa067efdc88"},
|
||||
{file = "ujson-5.10.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:baed37ea46d756aca2955e99525cc02d9181de67f25515c468856c38d52b5f3b"},
|
||||
{file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7663960f08cd5a2bb152f5ee3992e1af7690a64c0e26d31ba7b3ff5b2ee66337"},
|
||||
{file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:d8640fb4072d36b08e95a3a380ba65779d356b2fee8696afeb7794cf0902d0a1"},
|
||||
{file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78778a3aa7aafb11e7ddca4e29f46bc5139131037ad628cc10936764282d6753"},
|
||||
{file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0111b27f2d5c820e7f2dbad7d48e3338c824e7ac4d2a12da3dc6061cc39c8e6"},
|
||||
{file = "ujson-5.10.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:c66962ca7565605b355a9ed478292da628b8f18c0f2793021ca4425abf8b01e5"},
|
||||
{file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ba43cc34cce49cf2d4bc76401a754a81202d8aa926d0e2b79f0ee258cb15d3a4"},
|
||||
{file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ac56eb983edce27e7f51d05bc8dd820586c6e6be1c5216a6809b0c668bb312b8"},
|
||||
{file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44bd4b23a0e723bf8b10628288c2c7c335161d6840013d4d5de20e48551773b"},
|
||||
{file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c10f4654e5326ec14a46bcdeb2b685d4ada6911050aa8baaf3501e57024b804"},
|
||||
{file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0de4971a89a762398006e844ae394bd46991f7c385d7a6a3b93ba229e6dac17e"},
|
||||
{file = "ujson-5.10.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e1402f0564a97d2a52310ae10a64d25bcef94f8dd643fcf5d310219d915484f7"},
|
||||
{file = "ujson-5.10.0.tar.gz", hash = "sha256:b3cd8f3c5d8c7738257f1018880444f7b7d9b66232c64649f562d7ba86ad4bc1"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "uri-template"
|
||||
version = "1.3.0"
|
||||
|
|
@ -7635,6 +7763,7 @@ groq = ["groq"]
|
|||
langchain = ["langchain_text_splitters", "langsmith"]
|
||||
langfuse = ["langfuse"]
|
||||
llama-index = ["llama-index-core"]
|
||||
milvus = ["pymilvus"]
|
||||
neo4j = ["neo4j"]
|
||||
notebook = []
|
||||
postgres = ["asyncpg", "pgvector", "psycopg2"]
|
||||
|
|
@ -7645,4 +7774,4 @@ weaviate = ["weaviate-client"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.9.0,<3.12"
|
||||
content-hash = "6b57d44b0924bcf64397b3807c2a6ba369166e1d2102b5312c8f8ae2d5323376"
|
||||
content-hash = "d6b10b74a910202f224ff34fa06ad3d2767796a6492a96724de0d608ac0356c5"
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ asyncpg = {version = "0.30.0", optional = true}
|
|||
pgvector = {version = "^0.3.5", optional = true}
|
||||
psycopg2 = {version = "^2.9.10", optional = true}
|
||||
llama-index-core = {version = "^0.11.22", optional = true}
|
||||
pymilvus = {version = "^2.5.0", optional = true}
|
||||
|
||||
[tool.poetry.extras]
|
||||
filesystem = ["s3fs", "botocore"]
|
||||
|
|
@ -84,7 +85,7 @@ posthog = ["posthog"]
|
|||
falkordb = ["falkordb"]
|
||||
groq = ["groq"]
|
||||
langfuse = ["langfuse"]
|
||||
|
||||
milvus = ["pymilvus"]
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^7.4.0"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue