Merge branch 'main' into main-merge
This commit is contained in:
commit
0d75b6dc76
11 changed files with 1788 additions and 7918 deletions
1
.github/actions/cognee_setup/action.yml
vendored
1
.github/actions/cognee_setup/action.yml
vendored
|
|
@ -21,7 +21,6 @@ runs:
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install poetry
|
pip install poetry
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
shell: bash
|
shell: bash
|
||||||
run: poetry install --no-interaction -E api -E docs -E evals -E gemini -E codegraph -E ollama -E dev
|
run: poetry install --no-interaction -E api -E docs -E evals -E gemini -E codegraph -E ollama -E dev
|
||||||
|
|
|
||||||
2
.github/workflows/python_version_tests.yml
vendored
2
.github/workflows/python_version_tests.yml
vendored
|
|
@ -55,7 +55,7 @@ jobs:
|
||||||
- name: Cognee Setup
|
- name: Cognee Setup
|
||||||
uses: ./.github/actions/cognee_setup
|
uses: ./.github/actions/cognee_setup
|
||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
|
|
||||||
- name: Run unit tests
|
- name: Run unit tests
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|
|
||||||
2
.github/workflows/test_suites.yml
vendored
2
.github/workflows/test_suites.yml
vendored
|
|
@ -67,6 +67,8 @@ jobs:
|
||||||
name: Python Version Tests
|
name: Python Version Tests
|
||||||
needs: [basic-tests, e2e-tests]
|
needs: [basic-tests, e2e-tests]
|
||||||
uses: ./.github/workflows/python_version_tests.yml
|
uses: ./.github/workflows/python_version_tests.yml
|
||||||
|
with:
|
||||||
|
python-versions: '["3.10.x", "3.11.x", "3.12.x"]'
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
# Matrix-based vector database tests
|
# Matrix-based vector database tests
|
||||||
|
|
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -36,6 +36,7 @@ share/python-wheels/
|
||||||
*.egg-info/
|
*.egg-info/
|
||||||
.installed.cfg
|
.installed.cfg
|
||||||
*.egg
|
*.egg
|
||||||
|
.python-version
|
||||||
MANIFEST
|
MANIFEST
|
||||||
|
|
||||||
# PyInstaller
|
# PyInstaller
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
3.11
|
|
||||||
6268
cognee-frontend/package-lock.json
generated
6268
cognee-frontend/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -23,5 +23,9 @@ def open_data_file(
|
||||||
return f
|
return f
|
||||||
else:
|
else:
|
||||||
return fs.open(file_path, mode=mode, encoding=encoding, **kwargs)
|
return fs.open(file_path, mode=mode, encoding=encoding, **kwargs)
|
||||||
|
elif file_path.startswith("file://"):
|
||||||
|
# Handle local file URLs by stripping the file:// prefix
|
||||||
|
file_path = file_path.replace("file://", "", 1)
|
||||||
|
return open(file_path, mode=mode, encoding=encoding, **kwargs)
|
||||||
else:
|
else:
|
||||||
return open(file_path, mode=mode, encoding=encoding, **kwargs)
|
return open(file_path, mode=mode, encoding=encoding, **kwargs)
|
||||||
|
|
|
||||||
98
cognee/tests/unit/modules/data/test_open_data_file.py
Normal file
98
cognee/tests/unit/modules/data/test_open_data_file.py
Normal file
|
|
@ -0,0 +1,98 @@
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import pytest
|
||||||
|
from cognee.modules.data.processing.document_types.open_data_file import open_data_file
|
||||||
|
|
||||||
|
|
||||||
|
class TestOpenDataFile:
|
||||||
|
"""Test cases for open_data_file function with file:// URL handling."""
|
||||||
|
|
||||||
|
def test_regular_file_path(self):
|
||||||
|
"""Test that regular file paths work as before."""
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
|
||||||
|
test_content = "Test content for regular file path"
|
||||||
|
f.write(test_content)
|
||||||
|
temp_file_path = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open_data_file(temp_file_path, mode='r') as f:
|
||||||
|
content = f.read()
|
||||||
|
assert content == test_content
|
||||||
|
finally:
|
||||||
|
os.unlink(temp_file_path)
|
||||||
|
|
||||||
|
def test_file_url_text_mode(self):
|
||||||
|
"""Test that file:// URLs work correctly in text mode."""
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
|
||||||
|
test_content = "Test content for file:// URL handling"
|
||||||
|
f.write(test_content)
|
||||||
|
temp_file_path = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_url = f"file://{temp_file_path}"
|
||||||
|
with open_data_file(file_url, mode='r') as f:
|
||||||
|
content = f.read()
|
||||||
|
assert content == test_content
|
||||||
|
finally:
|
||||||
|
os.unlink(temp_file_path)
|
||||||
|
|
||||||
|
def test_file_url_binary_mode(self):
|
||||||
|
"""Test that file:// URLs work correctly in binary mode."""
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
|
||||||
|
test_content = "Test content for binary mode"
|
||||||
|
f.write(test_content)
|
||||||
|
temp_file_path = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_url = f"file://{temp_file_path}"
|
||||||
|
with open_data_file(file_url, mode='rb') as f:
|
||||||
|
content = f.read()
|
||||||
|
assert content == test_content.encode()
|
||||||
|
finally:
|
||||||
|
os.unlink(temp_file_path)
|
||||||
|
|
||||||
|
def test_file_url_with_encoding(self):
|
||||||
|
"""Test that file:// URLs work with specific encoding."""
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt', encoding='utf-8') as f:
|
||||||
|
test_content = "Test content with UTF-8: café ☕"
|
||||||
|
f.write(test_content)
|
||||||
|
temp_file_path = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_url = f"file://{temp_file_path}"
|
||||||
|
with open_data_file(file_url, mode='r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
assert content == test_content
|
||||||
|
finally:
|
||||||
|
os.unlink(temp_file_path)
|
||||||
|
|
||||||
|
def test_file_url_nonexistent_file(self):
|
||||||
|
"""Test that file:// URLs raise appropriate error for nonexistent files."""
|
||||||
|
file_url = "file:///nonexistent/path/to/file.txt"
|
||||||
|
with pytest.raises(FileNotFoundError):
|
||||||
|
with open_data_file(file_url, mode='r') as f:
|
||||||
|
f.read()
|
||||||
|
|
||||||
|
def test_multiple_file_prefixes(self):
|
||||||
|
"""Test that multiple file:// prefixes are handled correctly."""
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
|
||||||
|
test_content = "Test content"
|
||||||
|
f.write(test_content)
|
||||||
|
temp_file_path = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Even if someone accidentally adds multiple file:// prefixes
|
||||||
|
file_url = f"file://file://{temp_file_path}"
|
||||||
|
with open_data_file(file_url, mode='r') as f:
|
||||||
|
content = f.read()
|
||||||
|
# This should work because we only replace the first occurrence
|
||||||
|
assert content == test_content
|
||||||
|
except FileNotFoundError:
|
||||||
|
# This is expected behavior - only the first file:// should be stripped
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
os.unlink(temp_file_path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
pytest.main([__file__, "-v"])
|
||||||
2392
poetry.lock
generated
2392
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -1,6 +1,6 @@
|
||||||
[project]
|
[project]
|
||||||
name = "cognee"
|
name = "cognee"
|
||||||
version = "0.2.0.dev0"
|
version = "0.2.0"
|
||||||
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
|
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
|
||||||
authors = [
|
authors = [
|
||||||
{ name = "Vasilije Markovic" },
|
{ name = "Vasilije Markovic" },
|
||||||
|
|
@ -27,16 +27,19 @@ dependencies = [
|
||||||
"nltk==3.9.1",
|
"nltk==3.9.1",
|
||||||
"numpy>=1.26.4, <=2.1",
|
"numpy>=1.26.4, <=2.1",
|
||||||
"pandas>=2.2.2",
|
"pandas>=2.2.2",
|
||||||
|
# Note: New s3fs and boto3 versions don't work well together
|
||||||
|
# Always use comaptible fixed versions of these two dependencies
|
||||||
|
"s3fs[boto3]==2025.3.2",
|
||||||
"sqlalchemy==2.0.39",
|
"sqlalchemy==2.0.39",
|
||||||
"aiosqlite>=0.20.0,<0.21",
|
"aiosqlite>=0.20.0,<0.21",
|
||||||
"tiktoken<=0.9.0",
|
"tiktoken<=0.9.0",
|
||||||
"litellm>=1.57.4",
|
"litellm>=1.57.4, <1.71.0",
|
||||||
"instructor>=1.7.2",
|
"instructor>=1.7.2",
|
||||||
"langfuse>=2.32.0,<3",
|
"langfuse>=2.32.0,<3",
|
||||||
"filetype>=1.2.0",
|
"filetype>=1.2.0",
|
||||||
"aiohttp>=3.11.14",
|
"aiohttp>=3.11.14",
|
||||||
"aiofiles>=23.2.1",
|
"aiofiles>=23.2.1",
|
||||||
"rdflib>=7.1.4,<7.2.0",
|
"owlready2>=0.47,<0.48",
|
||||||
"graphistry>=0.33.5,<0.34",
|
"graphistry>=0.33.5,<0.34",
|
||||||
"pypdf>=4.1.0,<6.0.0",
|
"pypdf>=4.1.0,<6.0.0",
|
||||||
"jinja2>=3.1.3,<4",
|
"jinja2>=3.1.3,<4",
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue