diff --git a/.github/workflows/python_version_tests.yml b/.github/workflows/python_version_tests.yml index d7a7da050..5b556fd8b 100644 --- a/.github/workflows/python_version_tests.yml +++ b/.github/workflows/python_version_tests.yml @@ -71,7 +71,6 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - name: Run integration tests - if: ${{ !contains(matrix.os, 'windows') }} shell: bash run: uv run pytest cognee/tests/integration/ env: diff --git a/cognee/infrastructure/files/utils/get_data_file_path.py b/cognee/infrastructure/files/utils/get_data_file_path.py index 095112274..f716948b9 100644 --- a/cognee/infrastructure/files/utils/get_data_file_path.py +++ b/cognee/infrastructure/files/utils/get_data_file_path.py @@ -5,22 +5,13 @@ from urllib.parse import urlparse def get_data_file_path(file_path: str): # Check if this is a file URI BEFORE normalizing (which corrupts URIs) if file_path.startswith("file://"): + # Remove first occurrence of file:// prefix + pure_file_path = file_path.replace("file://", "", 1) # Normalize the file URI for Windows - replace backslashes with forward slashes - normalized_file_uri = os.path.normpath(file_path) - - parsed_url = urlparse(normalized_file_uri) - - # Convert URI path to file system path - if os.name == "nt": # Windows - # Handle Windows drive letters correctly - fs_path = parsed_url.path - if fs_path.startswith("/") and len(fs_path) > 1 and fs_path[2] == ":": - fs_path = fs_path[1:] # Remove leading slash for Windows drive paths - else: # Unix-like systems - fs_path = parsed_url.path + normalized_file_uri = os.path.normpath(pure_file_path) # Now split the actual filesystem path - actual_fs_path = os.path.normpath(fs_path) + actual_fs_path = os.path.normpath(normalized_file_uri) return actual_fs_path elif file_path.startswith("s3://"): diff --git a/cognee/infrastructure/files/utils/get_file_metadata.py b/cognee/infrastructure/files/utils/get_file_metadata.py index 79bd0c5a7..1eb7a1f79 100644 --- a/cognee/infrastructure/files/utils/get_file_metadata.py +++ b/cognee/infrastructure/files/utils/get_file_metadata.py @@ -1,6 +1,7 @@ import io import os.path from typing import BinaryIO, TypedDict +from pathlib import Path from cognee.shared.logging_utils import get_logger from cognee.infrastructure.files.utils.get_file_content_hash import get_file_content_hash @@ -55,7 +56,7 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata: file_type = guess_file_type(file) file_path = getattr(file, "name", None) or getattr(file, "full_name", None) - file_name = str(file_path).split("/")[-1].split(".")[0] if file_path else None + file_name = Path(file_path).stem if file_path else None # Get file size pos = file.tell() # remember current pointer