Merge remote-tracking branch 'origin/fix/setting-alternative-vector-db' into fix/setting-alternative-vector-db

This commit is contained in:
Vasilije 2024-06-12 22:42:55 +02:00
commit 20d8bc337f
10 changed files with 100 additions and 10 deletions

View file

@ -1,4 +1,4 @@
name: common | common
name: test | environments
on:
pull_request:

View file

@ -1,4 +1,4 @@
name: common
name: test | qdrant
on:
pull_request:
@ -18,7 +18,7 @@ jobs:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_common:
run_qdrant_integration_test:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'

62
.github/workflows/test_weaviate.yml vendored Normal file
View file

@ -0,0 +1,62 @@
name: test | weaviate
on:
pull_request:
branches:
- main
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
RUNTIME__LOG_LEVEL: ERROR
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_weaviate_integration_test:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
runs-on: macos-latest
defaults:
run:
shell: bash
steps:
- name: Check out
uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: 3.11
- name: Install Poetry
uses: snok/install-poetry@v1.3.2
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Install dependencies
run: poetry install --no-interaction
- name: Create .cognee_system directory and print path
run: |
mkdir .cognee_system
echo $(pwd)/.cognee_system
- name: Run default Weaviate
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
VECTOR_DB_URL: ${{ secrets.WEAVIATE_API_URL }}
VECTOR_DB_KEY: ${{ secrets.WEAVIATE_API_KEY }}
ENV: 'dev'
run: poetry run python ./cognee/tests/test_weaviate.py

View file

@ -10,7 +10,7 @@ from cognee.shared.utils import send_telemetry
from cognee.base_config import get_base_config
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None):
async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = "main_dataset"):
if isinstance(data, str):
if "data://" in data:
# data is a data directory path

View file

@ -34,3 +34,5 @@ def create_vector_engine(config: VectorConfig, embedding_engine):
api_key = config["vector_db_key"],
embedding_engine = embedding_engine,
)
raise EnvironmentError(f"Vector provider not configured correctly: {config['vector_db_provider']}")

View file

@ -15,12 +15,12 @@ async def extract_categories(content: str, response_model: Type[BaseModel]):
def process_categories(llm_output) -> List[dict]:
# Extract the first subclass from the list (assuming there could be more)
data_category = llm_output["label"]["subclass"][0]
data_category = llm_output["label"]["subclass"][0] if len(llm_output["label"]["subclass"]) > 0 else None
data_type = llm_output["label"]["type"].lower()
return [{
"data_type": data_type,
# The data_category is the value of the Enum member (e.g., "News stories and blog posts")
"category_name": data_category.value
"category_name": data_category.value if data_category else "Other types of text data",
}]

View file

@ -21,8 +21,6 @@ async def search_similarity(query: str, graph):
vector_engine = get_vector_engine()
results = await vector_engine.search(layer_id, query_text = query, limit = 10)
print("results", results)
print("len_rs", len(results))
if len(results) > 0:
graph_nodes.extend([

View file

@ -0,0 +1,27 @@
import asyncio
async def test_weaviate_integration():
from cognee import config, prune, add, cognify, search
config.set_vector_engine_provider("weaviate")
# config.set_vector_db_url("TEST_URL")
# config.set_vector_db_key("TEST_KEY")
prune.prune_system()
text = """
Incapillo is a Pleistocene-age caldera (a depression formed by the collapse of a volcano) in the La Rioja Province of Argentina. It is the southernmost volcanic centre in the Andean Central Volcanic Zone (CVZ) that erupted during the Pleistocene. Incapillo is one of several ignimbrite[a] or caldera systems that, along with 44 active stratovolcanoes, are part of the CVZ.
Subduction of the Nazca Plate beneath the South American Plate is responsible for most of the volcanism in the CVZ. After activity in the volcanic arc of the western Maricunga Belt ceased six million years ago, volcanism commenced in the Incapillo region, forming the high volcanic edifices Monte Pissis, Cerro Bonete Chico and Sierra de Veladero. Later, a number of lava domes were emplaced between these volcanoes.
Incapillo is the source of the Incapillo ignimbrite, a medium-sized deposit comparable to the Katmai ignimbrite. The Incapillo ignimbrite was erupted 0.52 ± 0.03 and 0.51 ± 0.04 million years ago and has a volume of about 20.4 cubic kilometres (4.9 cu mi). A caldera with dimensions of 5 by 6 kilometres (3.1 mi × 3.7 mi) formed during the eruption. Later volcanism generated more lava domes within the caldera and a debris flow in the Sierra de Veladero. The lake within the caldera may overlie an area of ongoing hydrothermal activity.
"""
await add(text)
await cognify()
result = await search("SIMILARITY", { "query": "volcanic eruption" })
print(result)
if __name__ == "__main__":
asyncio.run(test_weaviate_integration())

3
poetry.lock generated
View file

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
[[package]]
name = "aiofiles"
@ -5753,6 +5753,7 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},

View file

@ -4,4 +4,4 @@ def test_import_cognee():
import cognee
assert True # Pass the test if no error occurs
except ImportError as e:
assert False, f"Failed to import cognee: {e}"
assert False, f"Failed to import cognee: {e}"