Merge branch 'main' into main-merge
This commit is contained in:
commit
0d75b6dc76
11 changed files with 1788 additions and 7918 deletions
1
.github/actions/cognee_setup/action.yml
vendored
1
.github/actions/cognee_setup/action.yml
vendored
|
|
@ -21,7 +21,6 @@ runs:
|
|||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install poetry
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: poetry install --no-interaction -E api -E docs -E evals -E gemini -E codegraph -E ollama -E dev
|
||||
|
|
|
|||
2
.github/workflows/python_version_tests.yml
vendored
2
.github/workflows/python_version_tests.yml
vendored
|
|
@ -55,7 +55,7 @@ jobs:
|
|||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Run unit tests
|
||||
shell: bash
|
||||
|
|
|
|||
2
.github/workflows/test_suites.yml
vendored
2
.github/workflows/test_suites.yml
vendored
|
|
@ -67,6 +67,8 @@ jobs:
|
|||
name: Python Version Tests
|
||||
needs: [basic-tests, e2e-tests]
|
||||
uses: ./.github/workflows/python_version_tests.yml
|
||||
with:
|
||||
python-versions: '["3.10.x", "3.11.x", "3.12.x"]'
|
||||
secrets: inherit
|
||||
|
||||
# Matrix-based vector database tests
|
||||
|
|
|
|||
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -36,6 +36,7 @@ share/python-wheels/
|
|||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
.python-version
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
3.11
|
||||
6268
cognee-frontend/package-lock.json
generated
6268
cognee-frontend/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -23,5 +23,9 @@ def open_data_file(
|
|||
return f
|
||||
else:
|
||||
return fs.open(file_path, mode=mode, encoding=encoding, **kwargs)
|
||||
elif file_path.startswith("file://"):
|
||||
# Handle local file URLs by stripping the file:// prefix
|
||||
file_path = file_path.replace("file://", "", 1)
|
||||
return open(file_path, mode=mode, encoding=encoding, **kwargs)
|
||||
else:
|
||||
return open(file_path, mode=mode, encoding=encoding, **kwargs)
|
||||
|
|
|
|||
98
cognee/tests/unit/modules/data/test_open_data_file.py
Normal file
98
cognee/tests/unit/modules/data/test_open_data_file.py
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
import os
|
||||
import tempfile
|
||||
import pytest
|
||||
from cognee.modules.data.processing.document_types.open_data_file import open_data_file
|
||||
|
||||
|
||||
class TestOpenDataFile:
|
||||
"""Test cases for open_data_file function with file:// URL handling."""
|
||||
|
||||
def test_regular_file_path(self):
|
||||
"""Test that regular file paths work as before."""
|
||||
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
|
||||
test_content = "Test content for regular file path"
|
||||
f.write(test_content)
|
||||
temp_file_path = f.name
|
||||
|
||||
try:
|
||||
with open_data_file(temp_file_path, mode='r') as f:
|
||||
content = f.read()
|
||||
assert content == test_content
|
||||
finally:
|
||||
os.unlink(temp_file_path)
|
||||
|
||||
def test_file_url_text_mode(self):
|
||||
"""Test that file:// URLs work correctly in text mode."""
|
||||
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
|
||||
test_content = "Test content for file:// URL handling"
|
||||
f.write(test_content)
|
||||
temp_file_path = f.name
|
||||
|
||||
try:
|
||||
file_url = f"file://{temp_file_path}"
|
||||
with open_data_file(file_url, mode='r') as f:
|
||||
content = f.read()
|
||||
assert content == test_content
|
||||
finally:
|
||||
os.unlink(temp_file_path)
|
||||
|
||||
def test_file_url_binary_mode(self):
|
||||
"""Test that file:// URLs work correctly in binary mode."""
|
||||
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
|
||||
test_content = "Test content for binary mode"
|
||||
f.write(test_content)
|
||||
temp_file_path = f.name
|
||||
|
||||
try:
|
||||
file_url = f"file://{temp_file_path}"
|
||||
with open_data_file(file_url, mode='rb') as f:
|
||||
content = f.read()
|
||||
assert content == test_content.encode()
|
||||
finally:
|
||||
os.unlink(temp_file_path)
|
||||
|
||||
def test_file_url_with_encoding(self):
|
||||
"""Test that file:// URLs work with specific encoding."""
|
||||
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt', encoding='utf-8') as f:
|
||||
test_content = "Test content with UTF-8: café ☕"
|
||||
f.write(test_content)
|
||||
temp_file_path = f.name
|
||||
|
||||
try:
|
||||
file_url = f"file://{temp_file_path}"
|
||||
with open_data_file(file_url, mode='r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
assert content == test_content
|
||||
finally:
|
||||
os.unlink(temp_file_path)
|
||||
|
||||
def test_file_url_nonexistent_file(self):
|
||||
"""Test that file:// URLs raise appropriate error for nonexistent files."""
|
||||
file_url = "file:///nonexistent/path/to/file.txt"
|
||||
with pytest.raises(FileNotFoundError):
|
||||
with open_data_file(file_url, mode='r') as f:
|
||||
f.read()
|
||||
|
||||
def test_multiple_file_prefixes(self):
|
||||
"""Test that multiple file:// prefixes are handled correctly."""
|
||||
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
|
||||
test_content = "Test content"
|
||||
f.write(test_content)
|
||||
temp_file_path = f.name
|
||||
|
||||
try:
|
||||
# Even if someone accidentally adds multiple file:// prefixes
|
||||
file_url = f"file://file://{temp_file_path}"
|
||||
with open_data_file(file_url, mode='r') as f:
|
||||
content = f.read()
|
||||
# This should work because we only replace the first occurrence
|
||||
assert content == test_content
|
||||
except FileNotFoundError:
|
||||
# This is expected behavior - only the first file:// should be stripped
|
||||
pass
|
||||
finally:
|
||||
os.unlink(temp_file_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
2392
poetry.lock
generated
2392
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -1,6 +1,6 @@
|
|||
[project]
|
||||
name = "cognee"
|
||||
version = "0.2.0.dev0"
|
||||
version = "0.2.0"
|
||||
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
|
||||
authors = [
|
||||
{ name = "Vasilije Markovic" },
|
||||
|
|
@ -27,16 +27,19 @@ dependencies = [
|
|||
"nltk==3.9.1",
|
||||
"numpy>=1.26.4, <=2.1",
|
||||
"pandas>=2.2.2",
|
||||
# Note: New s3fs and boto3 versions don't work well together
|
||||
# Always use comaptible fixed versions of these two dependencies
|
||||
"s3fs[boto3]==2025.3.2",
|
||||
"sqlalchemy==2.0.39",
|
||||
"aiosqlite>=0.20.0,<0.21",
|
||||
"tiktoken<=0.9.0",
|
||||
"litellm>=1.57.4",
|
||||
"litellm>=1.57.4, <1.71.0",
|
||||
"instructor>=1.7.2",
|
||||
"langfuse>=2.32.0,<3",
|
||||
"filetype>=1.2.0",
|
||||
"aiohttp>=3.11.14",
|
||||
"aiofiles>=23.2.1",
|
||||
"rdflib>=7.1.4,<7.2.0",
|
||||
"owlready2>=0.47,<0.48",
|
||||
"graphistry>=0.33.5,<0.34",
|
||||
"pypdf>=4.1.0,<6.0.0",
|
||||
"jinja2>=3.1.3,<4",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue