From 7c663648990b9313afb81ce824afbe00ac0b8a90 Mon Sep 17 00:00:00 2001 From: Boris Arzentar Date: Wed, 12 Jun 2024 22:32:13 +0200 Subject: [PATCH 1/3] test: add weaviate integration test --- cognee/api/v1/add/add.py | 2 +- .../databases/vector/create_vector_engine.py | 2 ++ .../data/extraction/extract_categories.py | 4 +-- .../search/vector/search_similarity.py | 2 -- poetry.lock | 3 ++- tests/import_test.py | 2 +- tests/weaviate_test.py | 27 +++++++++++++++++++ 7 files changed, 35 insertions(+), 7 deletions(-) create mode 100644 tests/weaviate_test.py diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py index 9b1ecf5ba..995465bfc 100644 --- a/cognee/api/v1/add/add.py +++ b/cognee/api/v1/add/add.py @@ -10,7 +10,7 @@ from cognee.shared.utils import send_telemetry from cognee.base_config import get_base_config from cognee.infrastructure.databases.relational.config import get_relationaldb_config -async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None): +async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = "main_dataset"): if isinstance(data, str): if "data://" in data: # data is a data directory path diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index 62323c14a..19859ae48 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -34,3 +34,5 @@ def create_vector_engine(config: VectorConfig, embedding_engine): api_key = config["vector_db_key"], embedding_engine = embedding_engine, ) + + raise EnvironmentError(f"Vector provider not configured correctly: {config['vector_db_provider']}") diff --git a/cognee/modules/data/extraction/extract_categories.py b/cognee/modules/data/extraction/extract_categories.py index 46346327f..7d4408cab 100644 --- a/cognee/modules/data/extraction/extract_categories.py +++ b/cognee/modules/data/extraction/extract_categories.py @@ -15,12 +15,12 @@ async def extract_categories(content: str, response_model: Type[BaseModel]): def process_categories(llm_output) -> List[dict]: # Extract the first subclass from the list (assuming there could be more) - data_category = llm_output["label"]["subclass"][0] + data_category = llm_output["label"]["subclass"][0] if len(llm_output["label"]["subclass"]) > 0 else None data_type = llm_output["label"]["type"].lower() return [{ "data_type": data_type, # The data_category is the value of the Enum member (e.g., "News stories and blog posts") - "category_name": data_category.value + "category_name": data_category.value if data_category else "Other types of text data", }] diff --git a/cognee/modules/search/vector/search_similarity.py b/cognee/modules/search/vector/search_similarity.py index de0d86e71..13b6039bf 100644 --- a/cognee/modules/search/vector/search_similarity.py +++ b/cognee/modules/search/vector/search_similarity.py @@ -21,8 +21,6 @@ async def search_similarity(query: str, graph): vector_engine = get_vector_engine() results = await vector_engine.search(layer_id, query_text = query, limit = 10) - print("results", results) - print("len_rs", len(results)) if len(results) > 0: graph_nodes.extend([ diff --git a/poetry.lock b/poetry.lock index 2ee9be397..92bf8830f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiofiles" @@ -5753,6 +5753,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, diff --git a/tests/import_test.py b/tests/import_test.py index bcd6f7fad..72034f4f0 100644 --- a/tests/import_test.py +++ b/tests/import_test.py @@ -4,4 +4,4 @@ def test_import_cognee(): import cognee assert True # Pass the test if no error occurs except ImportError as e: - assert False, f"Failed to import cognee: {e}" \ No newline at end of file + assert False, f"Failed to import cognee: {e}" diff --git a/tests/weaviate_test.py b/tests/weaviate_test.py new file mode 100644 index 000000000..478cfb3ea --- /dev/null +++ b/tests/weaviate_test.py @@ -0,0 +1,27 @@ +import asyncio + +async def test_weaviate_integration(): + from cognee import config, prune, add, cognify, search + + config.set_vector_engine_provider("weaviate") + # config.set_vector_db_url("TEST_URL") + # config.set_vector_db_key("TEST_KEY") + + prune.prune_system() + + text = """ + Incapillo is a Pleistocene-age caldera (a depression formed by the collapse of a volcano) in the La Rioja Province of Argentina. It is the southernmost volcanic centre in the Andean Central Volcanic Zone (CVZ) that erupted during the Pleistocene. Incapillo is one of several ignimbrite[a] or caldera systems that, along with 44 active stratovolcanoes, are part of the CVZ. + Subduction of the Nazca Plate beneath the South American Plate is responsible for most of the volcanism in the CVZ. After activity in the volcanic arc of the western Maricunga Belt ceased six million years ago, volcanism commenced in the Incapillo region, forming the high volcanic edifices Monte Pissis, Cerro Bonete Chico and Sierra de Veladero. Later, a number of lava domes were emplaced between these volcanoes. + Incapillo is the source of the Incapillo ignimbrite, a medium-sized deposit comparable to the Katmai ignimbrite. The Incapillo ignimbrite was erupted 0.52 ± 0.03 and 0.51 ± 0.04 million years ago and has a volume of about 20.4 cubic kilometres (4.9 cu mi). A caldera with dimensions of 5 by 6 kilometres (3.1 mi × 3.7 mi) formed during the eruption. Later volcanism generated more lava domes within the caldera and a debris flow in the Sierra de Veladero. The lake within the caldera may overlie an area of ongoing hydrothermal activity. + """ + + await add(text) + + await cognify() + + result = await search("SIMILARITY", { "query": "volcanic eruption" }) + + print(result) + +if __name__ == "__main__": + asyncio.run(test_weaviate_integration()) From 0603fa831ce58d59b297855d0d0c6b504dd8dcb4 Mon Sep 17 00:00:00 2001 From: Boris Arzentar Date: Wed, 12 Jun 2024 22:36:57 +0200 Subject: [PATCH 2/3] test: add github action running weaviate integration test --- .github/workflows/test_qdrant.yml | 2 +- .github/workflows/test_weaviate.yml | 62 +++++++++++++++++++ .../tests/test_weaviate.py | 0 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/test_weaviate.yml rename tests/weaviate_test.py => cognee/tests/test_weaviate.py (100%) diff --git a/.github/workflows/test_qdrant.yml b/.github/workflows/test_qdrant.yml index 779b2a120..09857f5b9 100644 --- a/.github/workflows/test_qdrant.yml +++ b/.github/workflows/test_qdrant.yml @@ -18,7 +18,7 @@ jobs: name: docs changes uses: ./.github/workflows/get_docs_changes.yml - run_common: + run_qdrant_integration_test: name: test needs: get_docs_changes if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' diff --git a/.github/workflows/test_weaviate.yml b/.github/workflows/test_weaviate.yml new file mode 100644 index 000000000..215c75bcc --- /dev/null +++ b/.github/workflows/test_weaviate.yml @@ -0,0 +1,62 @@ +name: common + +on: + pull_request: + branches: + - main + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + RUNTIME__LOG_LEVEL: ERROR + +jobs: + get_docs_changes: + name: docs changes + uses: ./.github/workflows/get_docs_changes.yml + + run_weaviate_integration_test: + name: test + needs: get_docs_changes + if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' + runs-on: macos-latest + + defaults: + run: + shell: bash + + steps: + - name: Check out + uses: actions/checkout@v2 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + + - name: Install Poetry + uses: snok/install-poetry@v1.3.2 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + + - name: Install dependencies + run: poetry install --no-interaction + + - name: Create .cognee_system directory and print path + run: | + mkdir .cognee_system + echo $(pwd)/.cognee_system + + - name: Run default Weaviate + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} + VECTOR_DB_URL: ${{ secrets.WEAVIATE_API_URL }} + VECTOR_DB_KEY: ${{ secrets.WEAVIATE_API_KEY }} + ENV: 'dev' + run: poetry run python ./cognee/tests/test_weaviate.py diff --git a/tests/weaviate_test.py b/cognee/tests/test_weaviate.py similarity index 100% rename from tests/weaviate_test.py rename to cognee/tests/test_weaviate.py From 3577be3d0862030183c05d2105ba54f59668b4c8 Mon Sep 17 00:00:00 2001 From: Boris Arzentar Date: Wed, 12 Jun 2024 22:40:39 +0200 Subject: [PATCH 3/3] fix: change github actions names --- .github/workflows/test_common.yml | 2 +- .github/workflows/test_qdrant.yml | 2 +- .github/workflows/test_weaviate.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index 228398d65..2eeca4d16 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -1,4 +1,4 @@ -name: common | common +name: test | environments on: pull_request: diff --git a/.github/workflows/test_qdrant.yml b/.github/workflows/test_qdrant.yml index 09857f5b9..d9c5444d2 100644 --- a/.github/workflows/test_qdrant.yml +++ b/.github/workflows/test_qdrant.yml @@ -1,4 +1,4 @@ -name: common +name: test | qdrant on: pull_request: diff --git a/.github/workflows/test_weaviate.yml b/.github/workflows/test_weaviate.yml index 215c75bcc..132ee3e8f 100644 --- a/.github/workflows/test_weaviate.yml +++ b/.github/workflows/test_weaviate.yml @@ -1,4 +1,4 @@ -name: common +name: test | weaviate on: pull_request: