Main merge (#1030)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
Igor Ilic 2025-06-30 13:00:51 +02:00 committed by GitHub
commit 9c26c5a969
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 2411 additions and 3240 deletions

View file

@ -21,7 +21,6 @@ runs:
run: |
python -m pip install --upgrade pip
pip install poetry
- name: Install dependencies
shell: bash
run: poetry install --no-interaction -E api -E docs -E evals -E gemini -E codegraph -E ollama -E dev

View file

@ -55,7 +55,7 @@ jobs:
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: ${{ inputs.python-version }}
python-version: ${{ matrix.python-version }}
- name: Run unit tests
shell: bash

View file

@ -67,6 +67,8 @@ jobs:
name: Python Version Tests
needs: [basic-tests, e2e-tests]
uses: ./.github/workflows/python_version_tests.yml
with:
python-versions: '["3.10.x", "3.11.x", "3.12.x"]'
secrets: inherit
# Matrix-based vector database tests

1
.gitignore vendored
View file

@ -36,6 +36,7 @@ share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
.python-version
MANIFEST
# PyInstaller

View file

@ -1 +0,0 @@
3.11

File diff suppressed because it is too large Load diff

View file

@ -23,5 +23,9 @@ def open_data_file(
return f
else:
return fs.open(file_path, mode=mode, encoding=encoding, **kwargs)
elif file_path.startswith("file://"):
# Handle local file URLs by stripping the file:// prefix
file_path = file_path.replace("file://", "", 1)
return open(file_path, mode=mode, encoding=encoding, **kwargs)
else:
return open(file_path, mode=mode, encoding=encoding, **kwargs)

View file

@ -0,0 +1,100 @@
import os
import tempfile
import pytest
from cognee.modules.data.processing.document_types.open_data_file import open_data_file
class TestOpenDataFile:
"""Test cases for open_data_file function with file:// URL handling."""
def test_regular_file_path(self):
"""Test that regular file paths work as before."""
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as f:
test_content = "Test content for regular file path"
f.write(test_content)
temp_file_path = f.name
try:
with open_data_file(temp_file_path, mode="r") as f:
content = f.read()
assert content == test_content
finally:
os.unlink(temp_file_path)
def test_file_url_text_mode(self):
"""Test that file:// URLs work correctly in text mode."""
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as f:
test_content = "Test content for file:// URL handling"
f.write(test_content)
temp_file_path = f.name
try:
file_url = f"file://{temp_file_path}"
with open_data_file(file_url, mode="r") as f:
content = f.read()
assert content == test_content
finally:
os.unlink(temp_file_path)
def test_file_url_binary_mode(self):
"""Test that file:// URLs work correctly in binary mode."""
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as f:
test_content = "Test content for binary mode"
f.write(test_content)
temp_file_path = f.name
try:
file_url = f"file://{temp_file_path}"
with open_data_file(file_url, mode="rb") as f:
content = f.read()
assert content == test_content.encode()
finally:
os.unlink(temp_file_path)
def test_file_url_with_encoding(self):
"""Test that file:// URLs work with specific encoding."""
with tempfile.NamedTemporaryFile(
mode="w", delete=False, suffix=".txt", encoding="utf-8"
) as f:
test_content = "Test content with UTF-8: café ☕"
f.write(test_content)
temp_file_path = f.name
try:
file_url = f"file://{temp_file_path}"
with open_data_file(file_url, mode="r", encoding="utf-8") as f:
content = f.read()
assert content == test_content
finally:
os.unlink(temp_file_path)
def test_file_url_nonexistent_file(self):
"""Test that file:// URLs raise appropriate error for nonexistent files."""
file_url = "file:///nonexistent/path/to/file.txt"
with pytest.raises(FileNotFoundError):
with open_data_file(file_url, mode="r") as f:
f.read()
def test_multiple_file_prefixes(self):
"""Test that multiple file:// prefixes are handled correctly."""
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as f:
test_content = "Test content"
f.write(test_content)
temp_file_path = f.name
try:
# Even if someone accidentally adds multiple file:// prefixes
file_url = f"file://file://{temp_file_path}"
with open_data_file(file_url, mode="r") as f:
content = f.read()
# This should work because we only replace the first occurrence
assert content == test_content
except FileNotFoundError:
# This is expected behavior - only the first file:// should be stripped
pass
finally:
os.unlink(temp_file_path)
if __name__ == "__main__":
pytest.main([__file__, "-v"])

2342
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
[project]
name = "cognee"
version = "0.2.0.dev0"
version = "0.2.0"
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
authors = [
{ name = "Vasilije Markovic" },
@ -27,10 +27,13 @@ dependencies = [
"nltk==3.9.1",
"numpy>=1.26.4, <=2.1",
"pandas>=2.2.2",
# Note: New s3fs and boto3 versions don't work well together
# Always use comaptible fixed versions of these two dependencies
"s3fs[boto3]==2025.3.2",
"sqlalchemy==2.0.39",
"aiosqlite>=0.20.0,<0.21",
"tiktoken<=0.9.0",
"litellm>=1.57.4",
"litellm>=1.57.4, <1.71.0",
"instructor>=1.7.2",
"langfuse>=2.32.0,<3",
"filetype>=1.2.0",

896
uv.lock generated

File diff suppressed because it is too large Load diff