<!-- .github/pull_request_template.md --> ## Description <!-- Please provide a clear, human-generated description of the changes in this PR. DO NOT use AI-generated descriptions. We want to understand your thought process and reasoning. --> - Added an `edge_text` field to edges that auto-fills from `relationship_type` if not provided. - Containts edges now store descriptions for better embedding - Updated and refactored indexing so that edge_text gets embedded and exposed - Updated retrieval to use the new embeddings - Added a test to verify edge_text exists in the graph with the correct format. ## Type of Change <!-- Please check the relevant option --> - [ ] Bug fix (non-breaking change that fixes an issue) - [x] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [x] Code refactoring - [x] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) <!-- Add screenshots or videos to help explain your changes --> ## Pre-submission Checklist <!-- Please check all boxes that apply before submitting your PR --> - [x] **I have tested my changes thoroughly before submitting this PR** - [x] **This PR contains minimal changes necessary to address the issue/feature** - [x] My code follows the project's coding standards and style guidelines - [x] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [x] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
27 lines
916 B
Python
27 lines
916 B
Python
import pytest
|
|
from unittest.mock import AsyncMock, patch, MagicMock
|
|
from cognee.tasks.storage.index_data_points import index_data_points
|
|
from cognee.infrastructure.engine import DataPoint
|
|
|
|
|
|
class TestDataPoint(DataPoint):
|
|
name: str
|
|
metadata: dict = {"index_fields": ["name"]}
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_index_data_points_calls_vector_engine():
|
|
"""Test that index_data_points creates vector index and indexes data."""
|
|
data_points = [TestDataPoint(name="test1")]
|
|
|
|
mock_vector_engine = AsyncMock()
|
|
mock_vector_engine.embedding_engine.get_batch_size = MagicMock(return_value=100)
|
|
|
|
with patch.dict(
|
|
index_data_points.__globals__,
|
|
{"get_vector_engine": lambda: mock_vector_engine},
|
|
):
|
|
await index_data_points(data_points)
|
|
|
|
assert mock_vector_engine.create_vector_index.await_count >= 1
|
|
assert mock_vector_engine.index_data_points.await_count >= 1
|