diff --git a/.github/workflows/test_deduplication.yml b/.github/workflows/test_deduplication.yml index 46312060e..924aab130 100644 --- a/.github/workflows/test_deduplication.yml +++ b/.github/workflows/test_deduplication.yml @@ -20,7 +20,7 @@ jobs: name: docs changes uses: ./.github/workflows/get_docs_changes.yml - start_postgres: + run_deduplication_test: name: test needs: get_docs_changes if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} @@ -43,12 +43,27 @@ jobs: ports: - 5432:5432 - run_simple_example_test: - needs: start_postgres - uses: ./.github/workflows/reusable_python_example.yml - with: - example-location: ./cognee/tests/test_deduplication.py - secrets: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }} - GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }} + steps: + - name: Check out + uses: actions/checkout@master + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11.x' + + - name: Install Poetry + uses: snok/install-poetry@v1.3.2 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + + - name: Install dependencies + run: poetry install -E postgres --no-interaction + + - name: Run deduplication test + env: + ENV: 'dev' + LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: poetry run python ./cognee/tests/test_deduplication.py diff --git a/cognee/tests/test_deduplication.py b/cognee/tests/test_deduplication.py index b7bdccb4d..79936cba5 100644 --- a/cognee/tests/test_deduplication.py +++ b/cognee/tests/test_deduplication.py @@ -36,6 +36,11 @@ async def test_deduplication(): assert result[0]["name"] == dataset_name, "Result name does not match expected value." assert result[1]["name"] == dataset_name2, "Result name does not match expected value." + result = await relational_engine.get_all_data_from_table("dataset_data") + assert len(result) == 2, "Unexpected number of dataset data relationships found." + assert result[0]["data_id"] == result[1]["data_id"], "Data item is not reused between datasets." + assert result[0]["dataset_id"] != result[1]["dataset_id"], "Dataset items are not different." + await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True)