fix: Resolve issue with Windows path
This commit is contained in:
parent
d6e6e874eb
commit
8c69653912
3 changed files with 6 additions and 15 deletions
1
.github/workflows/python_version_tests.yml
vendored
1
.github/workflows/python_version_tests.yml
vendored
|
|
@ -71,7 +71,6 @@ jobs:
|
|||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
|
||||
- name: Run integration tests
|
||||
if: ${{ !contains(matrix.os, 'windows') }}
|
||||
shell: bash
|
||||
run: uv run pytest cognee/tests/integration/
|
||||
env:
|
||||
|
|
|
|||
|
|
@ -5,22 +5,13 @@ from urllib.parse import urlparse
|
|||
def get_data_file_path(file_path: str):
|
||||
# Check if this is a file URI BEFORE normalizing (which corrupts URIs)
|
||||
if file_path.startswith("file://"):
|
||||
# Remove first occurrence of file:// prefix
|
||||
pure_file_path = file_path.replace("file://", "", 1)
|
||||
# Normalize the file URI for Windows - replace backslashes with forward slashes
|
||||
normalized_file_uri = os.path.normpath(file_path)
|
||||
|
||||
parsed_url = urlparse(normalized_file_uri)
|
||||
|
||||
# Convert URI path to file system path
|
||||
if os.name == "nt": # Windows
|
||||
# Handle Windows drive letters correctly
|
||||
fs_path = parsed_url.path
|
||||
if fs_path.startswith("/") and len(fs_path) > 1 and fs_path[2] == ":":
|
||||
fs_path = fs_path[1:] # Remove leading slash for Windows drive paths
|
||||
else: # Unix-like systems
|
||||
fs_path = parsed_url.path
|
||||
normalized_file_uri = os.path.normpath(pure_file_path)
|
||||
|
||||
# Now split the actual filesystem path
|
||||
actual_fs_path = os.path.normpath(fs_path)
|
||||
actual_fs_path = os.path.normpath(normalized_file_uri)
|
||||
return actual_fs_path
|
||||
|
||||
elif file_path.startswith("s3://"):
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import io
|
||||
import os.path
|
||||
from typing import BinaryIO, TypedDict
|
||||
from pathlib import Path
|
||||
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.infrastructure.files.utils.get_file_content_hash import get_file_content_hash
|
||||
|
|
@ -55,7 +56,7 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata:
|
|||
file_type = guess_file_type(file)
|
||||
|
||||
file_path = getattr(file, "name", None) or getattr(file, "full_name", None)
|
||||
file_name = str(file_path).split("/")[-1].split(".")[0] if file_path else None
|
||||
file_name = Path(file_path).stem if file_path else None
|
||||
|
||||
# Get file size
|
||||
pos = file.tell() # remember current pointer
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue