Merge branch 'main' into main-merge

This commit is contained in:
Igor Ilic 2025-06-30 12:24:24 +02:00
commit 0d75b6dc76
11 changed files with 1788 additions and 7918 deletions

View file

@ -21,7 +21,6 @@ runs:
run: |
python -m pip install --upgrade pip
pip install poetry
- name: Install dependencies
shell: bash
run: poetry install --no-interaction -E api -E docs -E evals -E gemini -E codegraph -E ollama -E dev

View file

@ -55,7 +55,7 @@ jobs:
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ inputs.python-version }}
python-version: ${{ matrix.python-version }}
- name: Run unit tests
shell: bash

View file

@ -67,6 +67,8 @@ jobs:
name: Python Version Tests
needs: [basic-tests, e2e-tests]
uses: ./.github/workflows/python_version_tests.yml
with:
python-versions: '["3.10.x", "3.11.x", "3.12.x"]'
secrets: inherit
# Matrix-based vector database tests

1
.gitignore vendored
View file

@ -36,6 +36,7 @@ share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
.python-version
MANIFEST
# PyInstaller

View file

@ -1 +0,0 @@
3.11

File diff suppressed because it is too large Load diff

View file

@ -23,5 +23,9 @@ def open_data_file(
return f
else:
return fs.open(file_path, mode=mode, encoding=encoding, **kwargs)
elif file_path.startswith("file://"):
# Handle local file URLs by stripping the file:// prefix
file_path = file_path.replace("file://", "", 1)
return open(file_path, mode=mode, encoding=encoding, **kwargs)
else:
return open(file_path, mode=mode, encoding=encoding, **kwargs)

View file

@ -0,0 +1,98 @@
import os
import tempfile
import pytest
from cognee.modules.data.processing.document_types.open_data_file import open_data_file
class TestOpenDataFile:
"""Test cases for open_data_file function with file:// URL handling."""
def test_regular_file_path(self):
"""Test that regular file paths work as before."""
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
test_content = "Test content for regular file path"
f.write(test_content)
temp_file_path = f.name
try:
with open_data_file(temp_file_path, mode='r') as f:
content = f.read()
assert content == test_content
finally:
os.unlink(temp_file_path)
def test_file_url_text_mode(self):
"""Test that file:// URLs work correctly in text mode."""
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
test_content = "Test content for file:// URL handling"
f.write(test_content)
temp_file_path = f.name
try:
file_url = f"file://{temp_file_path}"
with open_data_file(file_url, mode='r') as f:
content = f.read()
assert content == test_content
finally:
os.unlink(temp_file_path)
def test_file_url_binary_mode(self):
"""Test that file:// URLs work correctly in binary mode."""
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
test_content = "Test content for binary mode"
f.write(test_content)
temp_file_path = f.name
try:
file_url = f"file://{temp_file_path}"
with open_data_file(file_url, mode='rb') as f:
content = f.read()
assert content == test_content.encode()
finally:
os.unlink(temp_file_path)
def test_file_url_with_encoding(self):
"""Test that file:// URLs work with specific encoding."""
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt', encoding='utf-8') as f:
test_content = "Test content with UTF-8: café ☕"
f.write(test_content)
temp_file_path = f.name
try:
file_url = f"file://{temp_file_path}"
with open_data_file(file_url, mode='r', encoding='utf-8') as f:
content = f.read()
assert content == test_content
finally:
os.unlink(temp_file_path)
def test_file_url_nonexistent_file(self):
"""Test that file:// URLs raise appropriate error for nonexistent files."""
file_url = "file:///nonexistent/path/to/file.txt"
with pytest.raises(FileNotFoundError):
with open_data_file(file_url, mode='r') as f:
f.read()
def test_multiple_file_prefixes(self):
"""Test that multiple file:// prefixes are handled correctly."""
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
test_content = "Test content"
f.write(test_content)
temp_file_path = f.name
try:
# Even if someone accidentally adds multiple file:// prefixes
file_url = f"file://file://{temp_file_path}"
with open_data_file(file_url, mode='r') as f:
content = f.read()
# This should work because we only replace the first occurrence
assert content == test_content
except FileNotFoundError:
# This is expected behavior - only the first file:// should be stripped
pass
finally:
os.unlink(temp_file_path)
if __name__ == "__main__":
pytest.main([__file__, "-v"])

2392
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
[project]
name = "cognee"
version = "0.2.0.dev0"
version = "0.2.0"
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
authors = [
{ name = "Vasilije Markovic" },
@ -27,16 +27,19 @@ dependencies = [
"nltk==3.9.1",
"numpy>=1.26.4, <=2.1",
"pandas>=2.2.2",
# Note: New s3fs and boto3 versions don't work well together
# Always use comaptible fixed versions of these two dependencies
"s3fs[boto3]==2025.3.2",
"sqlalchemy==2.0.39",
"aiosqlite>=0.20.0,<0.21",
"tiktoken<=0.9.0",
"litellm>=1.57.4",
"litellm>=1.57.4, <1.71.0",
"instructor>=1.7.2",
"langfuse>=2.32.0,<3",
"filetype>=1.2.0",
"aiohttp>=3.11.14",
"aiofiles>=23.2.1",
"rdflib>=7.1.4,<7.2.0",
"owlready2>=0.47,<0.48",
"graphistry>=0.33.5,<0.34",
"pypdf>=4.1.0,<6.0.0",
"jinja2>=3.1.3,<4",

928
uv.lock generated

File diff suppressed because it is too large Load diff