fix: Resolve issue with Windows path (#1295)
<!-- .github/pull_request_template.md --> ## Description Resolve issue with Windows path ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
commit
24c155b22e
14 changed files with 4147 additions and 3979 deletions
25
.github/workflows/e2e_tests.yml
vendored
25
.github/workflows/e2e_tests.yml
vendored
|
|
@ -153,31 +153,6 @@ jobs:
|
|||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_deduplication.py
|
||||
|
||||
run-deletion-test:
|
||||
name: Deletion Test
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Run Deletion Tests
|
||||
env:
|
||||
ENV: 'dev'
|
||||
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} # Test needs OpenAI endpoint to handle multimedia
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_deletion.py
|
||||
|
||||
run-s3-bucket-test:
|
||||
name: S3 Bucket Test
|
||||
runs-on: ubuntu-22.04
|
||||
|
|
|
|||
116
.github/workflows/python_version_tests.yml
vendored
116
.github/workflows/python_version_tests.yml
vendored
|
|
@ -1,116 +0,0 @@
|
|||
name: Reusable Python Version Tests
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
python-versions:
|
||||
required: false
|
||||
type: string
|
||||
default: '["3.10.x", "3.11.x", "3.12.x"]'
|
||||
secrets:
|
||||
LLM_PROVIDER:
|
||||
required: true
|
||||
LLM_MODEL:
|
||||
required: true
|
||||
LLM_ENDPOINT:
|
||||
required: true
|
||||
LLM_API_KEY:
|
||||
required: true
|
||||
LLM_API_VERSION:
|
||||
required: true
|
||||
EMBEDDING_PROVIDER:
|
||||
required: true
|
||||
EMBEDDING_MODEL:
|
||||
required: true
|
||||
EMBEDDING_ENDPOINT:
|
||||
required: true
|
||||
EMBEDDING_API_KEY:
|
||||
required: true
|
||||
EMBEDDING_API_VERSION:
|
||||
required: true
|
||||
|
||||
env:
|
||||
RUNTIME__LOG_LEVEL: ERROR
|
||||
ENV: 'dev'
|
||||
|
||||
jobs:
|
||||
run-python-version-tests:
|
||||
name: Python ${{ matrix.python-version }} on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
os: [ubuntu-22.04, macos-13, macos-15]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Run unit tests
|
||||
shell: bash
|
||||
run: uv run pytest cognee/tests/unit/
|
||||
env:
|
||||
PYTHONUTF8: 1
|
||||
LLM_PROVIDER: openai
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
|
||||
EMBEDDING_PROVIDER: openai
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
|
||||
- name: Run integration tests
|
||||
if: ${{ !contains(matrix.os, 'windows') }}
|
||||
shell: bash
|
||||
run: uv run pytest cognee/tests/integration/
|
||||
env:
|
||||
PYTHONUTF8: 1
|
||||
LLM_PROVIDER: openai
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
|
||||
EMBEDDING_PROVIDER: openai
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
|
||||
- name: Run default basic pipeline
|
||||
shell: bash
|
||||
env:
|
||||
PYTHONUTF8: 1
|
||||
LLM_PROVIDER: openai
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
|
||||
EMBEDDING_PROVIDER: openai
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_library.py
|
||||
|
||||
- name: Build with uv
|
||||
shell: bash
|
||||
run: uv build
|
||||
|
||||
- name: Install Package
|
||||
if: ${{ !contains(matrix.os, 'windows') }}
|
||||
run: |
|
||||
cd dist
|
||||
pip install *.whl
|
||||
235
.github/workflows/test_different_operating_systems.yml
vendored
Normal file
235
.github/workflows/test_different_operating_systems.yml
vendored
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
name: Tests to run on different Operating Systems
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
python-versions:
|
||||
required: false
|
||||
type: string
|
||||
default: '["3.10.x", "3.11.x", "3.12.x"]'
|
||||
secrets:
|
||||
LLM_PROVIDER:
|
||||
required: true
|
||||
LLM_MODEL:
|
||||
required: true
|
||||
LLM_ENDPOINT:
|
||||
required: true
|
||||
LLM_API_KEY:
|
||||
required: true
|
||||
LLM_API_VERSION:
|
||||
required: true
|
||||
EMBEDDING_PROVIDER:
|
||||
required: true
|
||||
EMBEDDING_MODEL:
|
||||
required: true
|
||||
EMBEDDING_ENDPOINT:
|
||||
required: true
|
||||
EMBEDDING_API_KEY:
|
||||
required: true
|
||||
EMBEDDING_API_VERSION:
|
||||
required: true
|
||||
|
||||
env:
|
||||
RUNTIME__LOG_LEVEL: ERROR
|
||||
ENV: 'dev'
|
||||
|
||||
jobs:
|
||||
run-unit-tests:
|
||||
name: Unit tests ${{ matrix.python-version }} on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
os: [ubuntu-22.04, macos-13, macos-15, windows-latest]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Run unit tests
|
||||
shell: bash
|
||||
run: uv run pytest cognee/tests/unit/
|
||||
env:
|
||||
PYTHONUTF8: 1
|
||||
LLM_PROVIDER: openai
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
|
||||
EMBEDDING_PROVIDER: openai
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
|
||||
run-integration-tests:
|
||||
name: Integration tests ${{ matrix.python-version }} on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Run integration tests
|
||||
shell: bash
|
||||
run: uv run pytest cognee/tests/integration/
|
||||
env:
|
||||
PYTHONUTF8: 1
|
||||
LLM_PROVIDER: openai
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
|
||||
EMBEDDING_PROVIDER: openai
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
|
||||
run-library-test:
|
||||
name: Library test ${{ matrix.python-version }} on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Run default basic pipeline
|
||||
shell: bash
|
||||
env:
|
||||
PYTHONUTF8: 1
|
||||
LLM_PROVIDER: openai
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
|
||||
EMBEDDING_PROVIDER: openai
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_library.py
|
||||
|
||||
run-build-test:
|
||||
name: Build test ${{ matrix.python-version }} on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Build with uv
|
||||
shell: bash
|
||||
run: uv build
|
||||
|
||||
- name: Install Package
|
||||
if: ${{ !contains(matrix.os, 'windows-latest') }}
|
||||
run: |
|
||||
cd dist
|
||||
pip install *.whl
|
||||
|
||||
run-soft-deletion-test:
|
||||
name: Soft Delete test ${{ matrix.python-version }} on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Run Soft Deletion Tests
|
||||
env:
|
||||
ENV: 'dev'
|
||||
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} # Test needs OpenAI endpoint to handle multimedia
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_delete_soft.py
|
||||
|
||||
run-hard-deletion-test:
|
||||
name: Hard Delete test ${{ matrix.python-version }} on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ${{ fromJSON(inputs.python-versions) }}
|
||||
os: [ ubuntu-22.04, macos-13, macos-15, windows-latest ]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Run Hard Deletion Test
|
||||
env:
|
||||
ENV: 'dev'
|
||||
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} # Test needs OpenAI endpoint to handle multimedia
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_delete_hard.py
|
||||
12
.github/workflows/test_suites.yml
vendored
12
.github/workflows/test_suites.yml
vendored
|
|
@ -68,10 +68,10 @@ jobs:
|
|||
uses: ./.github/workflows/notebooks_tests.yml
|
||||
secrets: inherit
|
||||
|
||||
python-version-tests:
|
||||
name: Python Version Tests
|
||||
different-operating-systems-tests:
|
||||
name: Operating System and Python Tests
|
||||
needs: [basic-tests, e2e-tests, cli-tests]
|
||||
uses: ./.github/workflows/python_version_tests.yml
|
||||
uses: ./.github/workflows/test_different_operating_systems.yml
|
||||
with:
|
||||
python-versions: '["3.10.x", "3.11.x", "3.12.x"]'
|
||||
secrets: inherit
|
||||
|
|
@ -124,7 +124,7 @@ jobs:
|
|||
cli-tests,
|
||||
graph-db-tests,
|
||||
notebook-tests,
|
||||
python-version-tests,
|
||||
different-operating-systems-tests,
|
||||
vector-db-tests,
|
||||
example-tests,
|
||||
gemini-tests,
|
||||
|
|
@ -144,7 +144,7 @@ jobs:
|
|||
cli-tests,
|
||||
graph-db-tests,
|
||||
notebook-tests,
|
||||
python-version-tests,
|
||||
different-operating-systems-tests,
|
||||
vector-db-tests,
|
||||
example-tests,
|
||||
db-examples-tests,
|
||||
|
|
@ -165,7 +165,7 @@ jobs:
|
|||
"${{ needs.cli-tests.result }}" == "success" &&
|
||||
"${{ needs.graph-db-tests.result }}" == "success" &&
|
||||
"${{ needs.notebook-tests.result }}" == "success" &&
|
||||
"${{ needs.python-version-tests.result }}" == "success" &&
|
||||
"${{ needs.different-operating-systems-tests.result }}" == "success" &&
|
||||
"${{ needs.vector-db-tests.result }}" == "success" &&
|
||||
"${{ needs.example-tests.result }}" == "success" &&
|
||||
"${{ needs.db-examples-tests.result }}" == "success" &&
|
||||
|
|
|
|||
|
|
@ -5,19 +5,24 @@ from urllib.parse import urlparse
|
|||
def get_data_file_path(file_path: str):
|
||||
# Check if this is a file URI BEFORE normalizing (which corrupts URIs)
|
||||
if file_path.startswith("file://"):
|
||||
# Remove first occurrence of file:// prefix
|
||||
pure_file_path = file_path.replace("file://", "", 1)
|
||||
# Normalize the file URI for Windows - replace backslashes with forward slashes
|
||||
normalized_file_uri = os.path.normpath(file_path)
|
||||
normalized_file_uri = os.path.normpath(pure_file_path)
|
||||
|
||||
parsed_url = urlparse(normalized_file_uri)
|
||||
|
||||
# Convert URI path to file system path
|
||||
# Convert path to proper file system path
|
||||
if os.name == "nt": # Windows
|
||||
# Handle Windows drive letters correctly
|
||||
fs_path = parsed_url.path
|
||||
if fs_path.startswith("/") and len(fs_path) > 1 and fs_path[2] == ":":
|
||||
fs_path = fs_path[1:] # Remove leading slash for Windows drive paths
|
||||
else: # Unix-like systems
|
||||
fs_path = parsed_url.path
|
||||
fs_path = normalized_file_uri
|
||||
if (
|
||||
(fs_path.startswith("/") or fs_path.startswith("\\"))
|
||||
and len(fs_path) > 1
|
||||
and fs_path[2] == ":"
|
||||
):
|
||||
fs_path = fs_path[1:]
|
||||
else:
|
||||
# Unix - like systems
|
||||
fs_path = normalized_file_uri
|
||||
|
||||
# Now split the actual filesystem path
|
||||
actual_fs_path = os.path.normpath(fs_path)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import io
|
||||
import os.path
|
||||
from typing import BinaryIO, TypedDict
|
||||
from pathlib import Path
|
||||
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.infrastructure.files.utils.get_file_content_hash import get_file_content_hash
|
||||
|
|
@ -55,7 +56,7 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata:
|
|||
file_type = guess_file_type(file)
|
||||
|
||||
file_path = getattr(file, "name", None) or getattr(file, "full_name", None)
|
||||
file_name = str(file_path).split("/")[-1].split(".")[0] if file_path else None
|
||||
file_name = Path(file_path).stem if file_path else None
|
||||
|
||||
# Get file size
|
||||
pos = file.tell() # remember current pointer
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import os
|
|||
import sys
|
||||
import uuid
|
||||
import pytest
|
||||
import pathlib
|
||||
from unittest.mock import patch
|
||||
|
||||
from cognee.modules.chunking.TextChunker import TextChunker
|
||||
|
|
@ -24,8 +25,7 @@ GROUND_TRUTH = [
|
|||
@pytest.mark.asyncio
|
||||
async def test_PdfDocument(mock_engine):
|
||||
test_file_path = os.path.join(
|
||||
os.sep,
|
||||
*(os.path.dirname(__file__).split(os.sep)[:-2]),
|
||||
pathlib.Path(__file__).parent.parent.parent,
|
||||
"test_data",
|
||||
"artificial-intelligence.pdf",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import os
|
|||
import sys
|
||||
import uuid
|
||||
import pytest
|
||||
import pathlib
|
||||
from unittest.mock import patch
|
||||
|
||||
from cognee.modules.chunking.TextChunker import TextChunker
|
||||
|
|
@ -34,10 +35,7 @@ GROUND_TRUTH = {
|
|||
@pytest.mark.asyncio
|
||||
async def test_TextDocument(mock_engine, input_file, chunk_size):
|
||||
test_file_path = os.path.join(
|
||||
os.sep,
|
||||
*(os.path.dirname(__file__).split(os.sep)[:-2]),
|
||||
"test_data",
|
||||
input_file,
|
||||
pathlib.Path(__file__).parent.parent.parent, "test_data", input_file
|
||||
)
|
||||
document = TextDocument(
|
||||
id=uuid.uuid4(),
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import os
|
|||
import sys
|
||||
import uuid
|
||||
import pytest
|
||||
import pathlib
|
||||
from unittest.mock import patch
|
||||
|
||||
from cognee.modules.chunking.TextChunker import TextChunker
|
||||
|
|
@ -18,29 +19,25 @@ chunk_by_sentence_module = sys.modules.get("cognee.tasks.chunks.chunk_by_sentenc
|
|||
async def test_UnstructuredDocument(mock_engine):
|
||||
# Define file paths of test data
|
||||
pptx_file_path = os.path.join(
|
||||
os.sep,
|
||||
*(os.path.dirname(__file__).split(os.sep)[:-2]),
|
||||
pathlib.Path(__file__).parent.parent.parent,
|
||||
"test_data",
|
||||
"example.pptx",
|
||||
)
|
||||
|
||||
docx_file_path = os.path.join(
|
||||
os.sep,
|
||||
*(os.path.dirname(__file__).split(os.sep)[:-2]),
|
||||
pathlib.Path(__file__).parent.parent.parent,
|
||||
"test_data",
|
||||
"example.docx",
|
||||
)
|
||||
|
||||
csv_file_path = os.path.join(
|
||||
os.sep,
|
||||
*(os.path.dirname(__file__).split(os.sep)[:-2]),
|
||||
pathlib.Path(__file__).parent.parent.parent,
|
||||
"test_data",
|
||||
"example.csv",
|
||||
)
|
||||
|
||||
xlsx_file_path = os.path.join(
|
||||
os.sep,
|
||||
*(os.path.dirname(__file__).split(os.sep)[:-2]),
|
||||
pathlib.Path(__file__).parent.parent.parent,
|
||||
"test_data",
|
||||
"example.xlsx",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -45,8 +45,6 @@ async def main():
|
|||
Each of these car manufacturer contributes to Germany's reputation as a leader in the global automotive industry, showcasing a blend of innovation, performance, and design excellence.
|
||||
"""
|
||||
|
||||
################### HARD DELETE
|
||||
|
||||
# Add documents and get dataset information
|
||||
add_result = await cognee.add(
|
||||
[
|
||||
|
|
@ -80,41 +78,6 @@ async def main():
|
|||
|
||||
assert len(nodes) == 0 and len(edges) == 0, "Document is not deleted with hard delete."
|
||||
|
||||
################### SOFT DELETE
|
||||
|
||||
# Add documents and get dataset information
|
||||
add_result = await cognee.add(
|
||||
[
|
||||
pdf_document,
|
||||
txt_document,
|
||||
text_document_as_literal,
|
||||
unstructured_document,
|
||||
audio_document,
|
||||
image_document,
|
||||
]
|
||||
)
|
||||
dataset_id = add_result.dataset_id
|
||||
|
||||
await cognee.cognify()
|
||||
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
assert len(nodes) > 10 and len(edges) > 10, "Graph database is not loaded."
|
||||
|
||||
# Get the data IDs from the dataset
|
||||
dataset_data = await get_dataset_data(dataset_id)
|
||||
assert len(dataset_data) > 0, "Dataset should contain data"
|
||||
|
||||
# Delete each document using its ID
|
||||
for data_item in dataset_data:
|
||||
await cognee.delete(data_item.id, dataset_id, mode="soft")
|
||||
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
|
||||
assert len(nodes) == 0 and len(edges) == 0, "Document is not deleted with soft delete."
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
85
cognee/tests/test_delete_soft.py
Normal file
85
cognee/tests/test_delete_soft.py
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
import os
|
||||
import shutil
|
||||
import cognee
|
||||
import pathlib
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.data.methods import get_dataset_data
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
async def main():
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
pdf_document = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/artificial-intelligence.pdf"
|
||||
)
|
||||
|
||||
txt_document = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/Natural_language_processing_copy.txt"
|
||||
)
|
||||
|
||||
audio_document = os.path.join(pathlib.Path(__file__).parent, "test_data/text_to_speech.mp3")
|
||||
|
||||
image_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.png")
|
||||
|
||||
unstructured_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.pptx")
|
||||
|
||||
text_document_as_literal = """
|
||||
1. Audi
|
||||
Audi is known for its modern designs and advanced technology. Founded in the early 1900s, the brand has earned a reputation for precision engineering and innovation. With features like the Quattro all-wheel-drive system, Audi offers a range of vehicles from stylish sedans to high-performance sports cars.
|
||||
|
||||
2. BMW
|
||||
BMW, short for Bayerische Motoren Werke, is celebrated for its focus on performance and driving pleasure. The company's vehicles are designed to provide a dynamic and engaging driving experience, and their slogan, "The Ultimate Driving Machine," reflects that commitment. BMW produces a variety of cars that combine luxury with sporty performance.
|
||||
|
||||
3. Mercedes-Benz
|
||||
Mercedes-Benz is synonymous with luxury and quality. With a history dating back to the early 20th century, the brand is known for its elegant designs, innovative safety features, and high-quality engineering. Mercedes-Benz manufactures not only luxury sedans but also SUVs, sports cars, and commercial vehicles, catering to a wide range of needs.
|
||||
|
||||
4. Porsche
|
||||
Porsche is a name that stands for high-performance sports cars. Founded in 1931, the brand has become famous for models like the iconic Porsche 911. Porsche cars are celebrated for their speed, precision, and distinctive design, appealing to car enthusiasts who value both performance and style.
|
||||
|
||||
5. Volkswagen
|
||||
Volkswagen, which means "people's car" in German, was established with the idea of making affordable and reliable vehicles accessible to everyone. Over the years, Volkswagen has produced several iconic models, such as the Beetle and the Golf. Today, it remains one of the largest car manufacturers in the world, offering a wide range of vehicles that balance practicality with quality.
|
||||
|
||||
Each of these car manufacturer contributes to Germany's reputation as a leader in the global automotive industry, showcasing a blend of innovation, performance, and design excellence.
|
||||
"""
|
||||
|
||||
# Add documents and get dataset information
|
||||
add_result = await cognee.add(
|
||||
[
|
||||
pdf_document,
|
||||
txt_document,
|
||||
text_document_as_literal,
|
||||
unstructured_document,
|
||||
audio_document,
|
||||
image_document,
|
||||
]
|
||||
)
|
||||
dataset_id = add_result.dataset_id
|
||||
|
||||
await cognee.cognify()
|
||||
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
assert len(nodes) > 10 and len(edges) > 10, "Graph database is not loaded."
|
||||
|
||||
# Get the data IDs from the dataset
|
||||
dataset_data = await get_dataset_data(dataset_id)
|
||||
assert len(dataset_data) > 0, "Dataset should contain data"
|
||||
|
||||
# Delete each document using its ID
|
||||
for data_item in dataset_data:
|
||||
await cognee.delete(data_item.id, dataset_id, mode="soft")
|
||||
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
|
||||
assert len(nodes) == 0 and len(edges) == 0, "Document is not deleted with soft delete."
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
||||
asyncio.run(main())
|
||||
21
poetry.lock
generated
21
poetry.lock
generated
|
|
@ -1,4 +1,4 @@
|
|||
# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiobotocore"
|
||||
|
|
@ -3842,8 +3842,6 @@ python-versions = "*"
|
|||
groups = ["main"]
|
||||
files = [
|
||||
{file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
|
||||
{file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
|
||||
{file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
@ -7359,7 +7357,6 @@ files = [
|
|||
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
|
||||
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
|
||||
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
|
||||
{file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
|
||||
{file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
|
||||
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
|
||||
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
|
||||
|
|
@ -8358,6 +8355,20 @@ files = [
|
|||
{file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-magic-bin"
|
||||
version = "0.4.14"
|
||||
description = "File type identification using libmagic binary package"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
markers = "platform_system == \"Windows\""
|
||||
files = [
|
||||
{file = "python_magic_bin-0.4.14-py2.py3-none-macosx_10_6_intel.whl", hash = "sha256:7b1743b3dbf16601d6eedf4e7c2c9a637901b0faaf24ad4df4d4527e7d8f66a4"},
|
||||
{file = "python_magic_bin-0.4.14-py2.py3-none-win32.whl", hash = "sha256:34a788c03adde7608028203e2dbb208f1f62225ad91518787ae26d603ae68892"},
|
||||
{file = "python_magic_bin-0.4.14-py2.py3-none-win_amd64.whl", hash = "sha256:90be6206ad31071a36065a2fc169c5afb5e0355cbe6030e87641c6c62edc2b69"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-multipart"
|
||||
version = "0.0.20"
|
||||
|
|
@ -11772,4 +11783,4 @@ posthog = ["posthog"]
|
|||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.10,<=3.13"
|
||||
content-hash = "4e25aebb8132dfc3dd44bd1b864f650c0ffe76d5bbe147cf9146b4c66ad780ff"
|
||||
content-hash = "9cc2836e79a1e8c5ac03b8f89bbdde0c8738c2f259ae220cba55253ab2ff63cd"
|
||||
|
|
|
|||
|
|
@ -60,7 +60,8 @@ dependencies = [
|
|||
"pympler>=1.1,<2.0.0",
|
||||
"onnxruntime>=1.0.0,<2.0.0",
|
||||
"pylance>=0.22.0,<1.0.0",
|
||||
"kuzu (==0.11.0)"
|
||||
"kuzu (==0.11.0)",
|
||||
"python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
|
@ -186,4 +187,4 @@ exclude = [
|
|||
]
|
||||
|
||||
[tool.ruff.lint]
|
||||
ignore = ["F401"]
|
||||
ignore = ["F401"]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue