From 6223ecf05ba662f08b7d560196f43df248114adb Mon Sep 17 00:00:00 2001
From: lxobr <122801072+lxobr@users.noreply.github.com>
Date: Thu, 30 Oct 2025 13:56:06 +0100
Subject: [PATCH 1/6] feat: optimize repeated entity extraction (#1682)

<!-- .github/pull_request_template.md -->

## Description
<!--
Please provide a clear, human-generated description of the changes in
this PR.
DO NOT use AI-generated descriptions. We want to understand your thought
process and reasoning.
-->

- Added an `edge_text` field to edges that auto-fills from
`relationship_type` if not provided.
- Containts edges now store descriptions for better embedding
- Updated and refactored indexing so that edge_text gets embedded and
exposed
- Updated retrieval to use the new embeddings
- Added a test to verify edge_text exists in the graph with the correct
format.

## Type of Change
<!-- Please check the relevant option -->
- [ ] Bug fix (non-breaking change that fixes an issue)
- [x] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [x] Code refactoring
- [x] Performance improvement
- [ ] Other (please specify):

## Screenshots/Videos (if applicable)
<!-- Add screenshots or videos to help explain your changes -->

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [x] **I have tested my changes thoroughly before submitting this PR**
- [x] **This PR contains minimal changes necessary to address the
issue/feature**
- [x] My code follows the project's coding standards and style
guidelines
- [x] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have added necessary documentation (if applicable)
- [ ] All new and existing tests pass
- [x] I have searched existing PRs to ensure this change hasn't been
submitted already
- [ ] I have linked any relevant issues in the description
- [ ] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
---
 cognee/infrastructure/engine/models/Edge.py   |  14 ++-
 .../modules/chunking/models/DocumentChunk.py  |   3 +-
 .../modules/graph/cognee_graph/CogneeGraph.py |   6 +-
 .../utils/expand_with_nodes_and_edges.py      |  21 +++-
 .../graph/utils/resolve_edges_to_text.py      | 119 +++++++++---------
 .../utils/brute_force_triplet_search.py       |   2 +-
 cognee/tasks/storage/index_data_points.py     |  55 ++++----
 cognee/tasks/storage/index_graph_edges.py     |  94 ++++++--------
 cognee/tests/test_edge_ingestion.py           |  27 ++++
 .../databases/test_index_data_points.py       |  27 ++++
 .../databases/test_index_graph_edges.py       |  30 +++--
 11 files changed, 236 insertions(+), 162 deletions(-)
 create mode 100644 cognee/tests/unit/infrastructure/databases/test_index_data_points.py

diff --git a/cognee/infrastructure/engine/models/Edge.py b/cognee/infrastructure/engine/models/Edge.py
index 5ad9c84dd..59f01a9ab 100644
--- a/cognee/infrastructure/engine/models/Edge.py
+++ b/cognee/infrastructure/engine/models/Edge.py
@@ -1,4 +1,4 @@
-from pydantic import BaseModel
+from pydantic import BaseModel, field_validator
 from typing import Optional, Any, Dict
 
 
@@ -18,9 +18,21 @@ class Edge(BaseModel):
 
         # Mixed usage
         has_items: (Edge(weight=0.5, weights={"confidence": 0.9}), list[Item])
+
+        # With edge_text for rich embedding representation
+        contains: (Edge(relationship_type="contains", edge_text="relationship_name: contains; entity_description: Alice"), Entity)
     """
 
     weight: Optional[float] = None
     weights: Optional[Dict[str, float]] = None
     relationship_type: Optional[str] = None
     properties: Optional[Dict[str, Any]] = None
+    edge_text: Optional[str] = None
+
+    @field_validator("edge_text", mode="before")
+    @classmethod
+    def ensure_edge_text(cls, v, info):
+        """Auto-populate edge_text from relationship_type if not explicitly provided."""
+        if v is None and info.data.get("relationship_type"):
+            return info.data["relationship_type"]
+        return v
diff --git a/cognee/modules/chunking/models/DocumentChunk.py b/cognee/modules/chunking/models/DocumentChunk.py
index 9f8c57486..e024bf00b 100644
--- a/cognee/modules/chunking/models/DocumentChunk.py
+++ b/cognee/modules/chunking/models/DocumentChunk.py
@@ -1,6 +1,7 @@
 from typing import List, Union
 
 from cognee.infrastructure.engine import DataPoint
+from cognee.infrastructure.engine.models.Edge import Edge
 from cognee.modules.data.processing.document_types import Document
 from cognee.modules.engine.models import Entity
 from cognee.tasks.temporal_graph.models import Event
@@ -31,6 +32,6 @@ class DocumentChunk(DataPoint):
     chunk_index: int
     cut_type: str
     is_part_of: Document
-    contains: List[Union[Entity, Event]] = None
+    contains: List[Union[Entity, Event, tuple[Edge, Entity]]] = None
 
     metadata: dict = {"index_fields": ["text"]}
diff --git a/cognee/modules/graph/cognee_graph/CogneeGraph.py b/cognee/modules/graph/cognee_graph/CogneeGraph.py
index 9703928f0..cb7562422 100644
--- a/cognee/modules/graph/cognee_graph/CogneeGraph.py
+++ b/cognee/modules/graph/cognee_graph/CogneeGraph.py
@@ -171,8 +171,10 @@ class CogneeGraph(CogneeAbstractGraph):
             embedding_map = {result.payload["text"]: result.score for result in edge_distances}
 
             for edge in self.edges:
-                relationship_type = edge.attributes.get("relationship_type")
-                distance = embedding_map.get(relationship_type, None)
+                edge_key = edge.attributes.get("edge_text") or edge.attributes.get(
+                    "relationship_type"
+                )
+                distance = embedding_map.get(edge_key, None)
                 if distance is not None:
                     edge.attributes["vector_distance"] = distance
 
diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py
index 3b01f5af4..c68eb494d 100644
--- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py
+++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py
@@ -1,5 +1,6 @@
 from typing import Optional
 
+from cognee.infrastructure.engine.models.Edge import Edge
 from cognee.modules.chunking.models import DocumentChunk
 from cognee.modules.engine.models import Entity, EntityType
 from cognee.modules.engine.utils import (
@@ -243,10 +244,26 @@ def _process_graph_nodes(
             ontology_relationships,
         )
 
-        # Add entity to data chunk
         if data_chunk.contains is None:
             data_chunk.contains = []
-        data_chunk.contains.append(entity_node)
+
+        edge_text = "; ".join(
+            [
+                "relationship_name: contains",
+                f"entity_name: {entity_node.name}",
+                f"entity_description: {entity_node.description}",
+            ]
+        )
+
+        data_chunk.contains.append(
+            (
+                Edge(
+                    relationship_type="contains",
+                    edge_text=edge_text,
+                ),
+                entity_node,
+            )
+        )
 
 
 def _process_graph_edges(
diff --git a/cognee/modules/graph/utils/resolve_edges_to_text.py b/cognee/modules/graph/utils/resolve_edges_to_text.py
index eb5bedd2c..5deb13ba8 100644
--- a/cognee/modules/graph/utils/resolve_edges_to_text.py
+++ b/cognee/modules/graph/utils/resolve_edges_to_text.py
@@ -1,71 +1,70 @@
+import string
 from typing import List
+from collections import Counter
+
 from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
+from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS
+
+
+def _get_top_n_frequent_words(
+    text: str, stop_words: set = None, top_n: int = 3, separator: str = ", "
+) -> str:
+    """Concatenates the top N frequent words in text."""
+    if stop_words is None:
+        stop_words = DEFAULT_STOP_WORDS
+
+    words = [word.lower().strip(string.punctuation) for word in text.split()]
+    words = [word for word in words if word and word not in stop_words]
+
+    top_words = [word for word, freq in Counter(words).most_common(top_n)]
+    return separator.join(top_words)
+
+
+def _create_title_from_text(text: str, first_n_words: int = 7, top_n_words: int = 3) -> str:
+    """Creates a title by combining first words with most frequent words from the text."""
+    first_words = text.split()[:first_n_words]
+    top_words = _get_top_n_frequent_words(text, top_n=top_n_words)
+    return f"{' '.join(first_words)}... [{top_words}]"
+
+
+def _extract_nodes_from_edges(retrieved_edges: List[Edge]) -> dict:
+    """Creates a dictionary of nodes with their names and content."""
+    nodes = {}
+
+    for edge in retrieved_edges:
+        for node in (edge.node1, edge.node2):
+            if node.id in nodes:
+                continue
+
+            text = node.attributes.get("text")
+            if text:
+                name = _create_title_from_text(text)
+                content = text
+            else:
+                name = node.attributes.get("name", "Unnamed Node")
+                content = node.attributes.get("description", name)
+
+            nodes[node.id] = {"node": node, "name": name, "content": content}
+
+    return nodes
 
 
 async def resolve_edges_to_text(retrieved_edges: List[Edge]) -> str:
-    """
-    Converts retrieved graph edges into a human-readable string format.
+    """Converts retrieved graph edges into a human-readable string format."""
+    nodes = _extract_nodes_from_edges(retrieved_edges)
 
-    Parameters:
-    -----------
-
-        - retrieved_edges (list): A list of edges retrieved from the graph.
-
-    Returns:
-    --------
-
-        - str: A formatted string representation of the nodes and their connections.
-    """
-
-    def _get_nodes(retrieved_edges: List[Edge]) -> dict:
-        def _get_title(text: str, first_n_words: int = 7, top_n_words: int = 3) -> str:
-            def _top_n_words(text, stop_words=None, top_n=3, separator=", "):
-                """Concatenates the top N frequent words in text."""
-                if stop_words is None:
-                    from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS
-
-                    stop_words = DEFAULT_STOP_WORDS
-
-                import string
-
-                words = [word.lower().strip(string.punctuation) for word in text.split()]
-
-                if stop_words:
-                    words = [word for word in words if word and word not in stop_words]
-
-                from collections import Counter
-
-                top_words = [word for word, freq in Counter(words).most_common(top_n)]
-
-                return separator.join(top_words)
-
-            """Creates a title, by combining first words with most frequent words from the text."""
-            first_words = text.split()[:first_n_words]
-            top_words = _top_n_words(text, top_n=first_n_words)
-            return f"{' '.join(first_words)}... [{top_words}]"
-
-        """Creates a dictionary of nodes with their names and content."""
-        nodes = {}
-        for edge in retrieved_edges:
-            for node in (edge.node1, edge.node2):
-                if node.id not in nodes:
-                    text = node.attributes.get("text")
-                    if text:
-                        name = _get_title(text)
-                        content = text
-                    else:
-                        name = node.attributes.get("name", "Unnamed Node")
-                        content = node.attributes.get("description", name)
-                    nodes[node.id] = {"node": node, "name": name, "content": content}
-        return nodes
-
-    nodes = _get_nodes(retrieved_edges)
     node_section = "\n".join(
         f"Node: {info['name']}\n__node_content_start__\n{info['content']}\n__node_content_end__\n"
         for info in nodes.values()
     )
-    connection_section = "\n".join(
-        f"{nodes[edge.node1.id]['name']} --[{edge.attributes['relationship_type']}]--> {nodes[edge.node2.id]['name']}"
-        for edge in retrieved_edges
-    )
+
+    connections = []
+    for edge in retrieved_edges:
+        source_name = nodes[edge.node1.id]["name"]
+        target_name = nodes[edge.node2.id]["name"]
+        edge_label = edge.attributes.get("edge_text") or edge.attributes.get("relationship_type")
+        connections.append(f"{source_name} --[{edge_label}]--> {target_name}")
+
+    connection_section = "\n".join(connections)
+
     return f"Nodes:\n{node_section}\n\nConnections:\n{connection_section}"
diff --git a/cognee/modules/retrieval/utils/brute_force_triplet_search.py b/cognee/modules/retrieval/utils/brute_force_triplet_search.py
index 1ef7545c2..f8bdbb97d 100644
--- a/cognee/modules/retrieval/utils/brute_force_triplet_search.py
+++ b/cognee/modules/retrieval/utils/brute_force_triplet_search.py
@@ -71,7 +71,7 @@ async def get_memory_fragment(
         await memory_fragment.project_graph_from_db(
             graph_engine,
             node_properties_to_project=properties_to_project,
-            edge_properties_to_project=["relationship_name"],
+            edge_properties_to_project=["relationship_name", "edge_text"],
             node_type=node_type,
             node_name=node_name,
         )
diff --git a/cognee/tasks/storage/index_data_points.py b/cognee/tasks/storage/index_data_points.py
index 902789c80..b0ec3a5b4 100644
--- a/cognee/tasks/storage/index_data_points.py
+++ b/cognee/tasks/storage/index_data_points.py
@@ -8,47 +8,58 @@ logger = get_logger("index_data_points")
 
 
 async def index_data_points(data_points: list[DataPoint]):
-    created_indexes = {}
-    index_points = {}
+    """Index data points in the vector engine by creating embeddings for specified fields.
+
+    Process:
+    1. Groups data points into a nested dict: {type_name: {field_name: [points]}}
+    2. Creates vector indexes for each (type, field) combination on first encounter
+    3. Batches points per (type, field) and creates async indexing tasks
+    4. Executes all indexing tasks in parallel for efficient embedding generation
+
+    Args:
+        data_points: List of DataPoint objects to index. Each DataPoint's metadata must
+                     contain an 'index_fields' list specifying which fields to embed.
+
+    Returns:
+        The original data_points list.
+    """
+    data_points_by_type = {}
 
     vector_engine = get_vector_engine()
 
     for data_point in data_points:
         data_point_type = type(data_point)
+        type_name = data_point_type.__name__
 
         for field_name in data_point.metadata["index_fields"]:
             if getattr(data_point, field_name, None) is None:
                 continue
 
-            index_name = f"{data_point_type.__name__}_{field_name}"
+            if type_name not in data_points_by_type:
+                data_points_by_type[type_name] = {}
 
-            if index_name not in created_indexes:
-                await vector_engine.create_vector_index(data_point_type.__name__, field_name)
-                created_indexes[index_name] = True
-
-            if index_name not in index_points:
-                index_points[index_name] = []
+            if field_name not in data_points_by_type[type_name]:
+                await vector_engine.create_vector_index(type_name, field_name)
+                data_points_by_type[type_name][field_name] = []
 
             indexed_data_point = data_point.model_copy()
             indexed_data_point.metadata["index_fields"] = [field_name]
-            index_points[index_name].append(indexed_data_point)
+            data_points_by_type[type_name][field_name].append(indexed_data_point)
 
-    tasks: list[asyncio.Task] = []
     batch_size = vector_engine.embedding_engine.get_batch_size()
 
-    for index_name_and_field, points in index_points.items():
-        first = index_name_and_field.index("_")
-        index_name = index_name_and_field[:first]
-        field_name = index_name_and_field[first + 1 :]
+    batches = (
+        (type_name, field_name, points[i : i + batch_size])
+        for type_name, fields in data_points_by_type.items()
+        for field_name, points in fields.items()
+        for i in range(0, len(points), batch_size)
+    )
 
-        # Create embedding requests per batch to run in parallel later
-        for i in range(0, len(points), batch_size):
-            batch = points[i : i + batch_size]
-            tasks.append(
-                asyncio.create_task(vector_engine.index_data_points(index_name, field_name, batch))
-            )
+    tasks = [
+        asyncio.create_task(vector_engine.index_data_points(type_name, field_name, batch_points))
+        for type_name, field_name, batch_points in batches
+    ]
 
-    # Run all embedding requests in parallel
     await asyncio.gather(*tasks)
 
     return data_points
diff --git a/cognee/tasks/storage/index_graph_edges.py b/cognee/tasks/storage/index_graph_edges.py
index 4fa8cfc75..03b5a25a5 100644
--- a/cognee/tasks/storage/index_graph_edges.py
+++ b/cognee/tasks/storage/index_graph_edges.py
@@ -1,17 +1,44 @@
-import asyncio
+from collections import Counter
+from typing import Optional, Dict, Any, List, Tuple, Union
 
 from cognee.modules.engine.utils.generate_edge_id import generate_edge_id
 from cognee.shared.logging_utils import get_logger
-from collections import Counter
-from typing import Optional, Dict, Any, List, Tuple, Union
-from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.modules.graph.models.EdgeType import EdgeType
 from cognee.infrastructure.databases.graph.graph_db_interface import EdgeData
+from cognee.tasks.storage.index_data_points import index_data_points
 
 logger = get_logger()
 
 
+def _get_edge_text(item: dict) -> str:
+    """Extract edge text for embedding - prefers edge_text field with fallback."""
+    if "edge_text" in item:
+        return item["edge_text"]
+
+    if "relationship_name" in item:
+        return item["relationship_name"]
+
+    return ""
+
+
+def create_edge_type_datapoints(edges_data) -> list[EdgeType]:
+    """Transform raw edge data into EdgeType datapoints."""
+    edge_texts = [
+        _get_edge_text(item)
+        for edge in edges_data
+        for item in edge
+        if isinstance(item, dict) and "relationship_name" in item
+    ]
+
+    edge_types = Counter(edge_texts)
+
+    return [
+        EdgeType(id=generate_edge_id(edge_id=text), relationship_name=text, number_of_edges=count)
+        for text, count in edge_types.items()
+    ]
+
+
 async def index_graph_edges(
     edges_data: Union[List[EdgeData], List[Tuple[str, str, str, Optional[Dict[str, Any]]]]] = None,
 ):
@@ -23,24 +50,17 @@ async def index_graph_edges(
     the `relationship_name` field.
 
     Steps:
-    1. Initialize the vector engine and graph engine.
-    2. Retrieve graph edge data and count relationship types (`relationship_name`).
-    3. Create vector indexes for `relationship_name` if they don't exist.
-    4. Transform the counted relationships into `EdgeType` objects.
-    5. Index the transformed data points in the vector engine.
+    1. Initialize the graph engine if needed and retrieve edge data.
+    2. Transform edge data into EdgeType datapoints.
+    3. Index the EdgeType datapoints using the standard indexing function.
 
     Raises:
-        RuntimeError: If initialization of the vector engine or graph engine fails.
+        RuntimeError: If initialization of the graph engine fails.
 
     Returns:
         None
     """
     try:
-        created_indexes = {}
-        index_points = {}
-
-        vector_engine = get_vector_engine()
-
         if edges_data is None:
             graph_engine = await get_graph_engine()
             _, edges_data = await graph_engine.get_graph_data()
@@ -51,47 +71,7 @@ async def index_graph_edges(
         logger.error("Failed to initialize engines: %s", e)
         raise RuntimeError("Initialization error") from e
 
-    edge_types = Counter(
-        item.get("relationship_name")
-        for edge in edges_data
-        for item in edge
-        if isinstance(item, dict) and "relationship_name" in item
-    )
-
-    for text, count in edge_types.items():
-        edge = EdgeType(
-            id=generate_edge_id(edge_id=text), relationship_name=text, number_of_edges=count
-        )
-        data_point_type = type(edge)
-
-        for field_name in edge.metadata["index_fields"]:
-            index_name = f"{data_point_type.__name__}.{field_name}"
-
-            if index_name not in created_indexes:
-                await vector_engine.create_vector_index(data_point_type.__name__, field_name)
-                created_indexes[index_name] = True
-
-            if index_name not in index_points:
-                index_points[index_name] = []
-
-            indexed_data_point = edge.model_copy()
-            indexed_data_point.metadata["index_fields"] = [field_name]
-            index_points[index_name].append(indexed_data_point)
-
-    # Get maximum batch size for embedding model
-    batch_size = vector_engine.embedding_engine.get_batch_size()
-    tasks: list[asyncio.Task] = []
-
-    for index_name, indexable_points in index_points.items():
-        index_name, field_name = index_name.split(".")
-
-        # Create embedding tasks to run in parallel later
-        for start in range(0, len(indexable_points), batch_size):
-            batch = indexable_points[start : start + batch_size]
-
-            tasks.append(vector_engine.index_data_points(index_name, field_name, batch))
-
-    # Start all embedding tasks and wait for completion
-    await asyncio.gather(*tasks)
+    edge_type_datapoints = create_edge_type_datapoints(edges_data)
+    await index_data_points(edge_type_datapoints)
 
     return None
diff --git a/cognee/tests/test_edge_ingestion.py b/cognee/tests/test_edge_ingestion.py
index 5b23f7819..0d1407fab 100755
--- a/cognee/tests/test_edge_ingestion.py
+++ b/cognee/tests/test_edge_ingestion.py
@@ -52,6 +52,33 @@ async def test_edge_ingestion():
 
     edge_type_counts = Counter(edge_type[2] for edge_type in graph[1])
 
+    "Tests edge_text presence and format"
+    contains_edges = [edge for edge in graph[1] if edge[2] == "contains"]
+    assert len(contains_edges) > 0, "Expected at least one contains edge for edge_text verification"
+
+    edge_properties = contains_edges[0][3]
+    assert "edge_text" in edge_properties, "Expected edge_text in edge properties"
+
+    edge_text = edge_properties["edge_text"]
+    assert "relationship_name: contains" in edge_text, (
+        f"Expected 'relationship_name: contains' in edge_text, got: {edge_text}"
+    )
+    assert "entity_name:" in edge_text, f"Expected 'entity_name:' in edge_text, got: {edge_text}"
+    assert "entity_description:" in edge_text, (
+        f"Expected 'entity_description:' in edge_text, got: {edge_text}"
+    )
+
+    all_edge_texts = [
+        edge[3].get("edge_text", "") for edge in contains_edges if "edge_text" in edge[3]
+    ]
+    expected_entities = ["dave", "ana", "bob", "dexter", "apples", "cognee"]
+    found_entity = any(
+        any(entity in text.lower() for entity in expected_entities) for text in all_edge_texts
+    )
+    assert found_entity, (
+        f"Expected to find at least one entity name in edge_text: {all_edge_texts[:3]}"
+    )
+
     "Tests the presence of basic nested edges"
     for basic_nested_edge in basic_nested_edges:
         assert edge_type_counts.get(basic_nested_edge, 0) >= 1, (
diff --git a/cognee/tests/unit/infrastructure/databases/test_index_data_points.py b/cognee/tests/unit/infrastructure/databases/test_index_data_points.py
new file mode 100644
index 000000000..21a5695de
--- /dev/null
+++ b/cognee/tests/unit/infrastructure/databases/test_index_data_points.py
@@ -0,0 +1,27 @@
+import pytest
+from unittest.mock import AsyncMock, patch, MagicMock
+from cognee.tasks.storage.index_data_points import index_data_points
+from cognee.infrastructure.engine import DataPoint
+
+
+class TestDataPoint(DataPoint):
+    name: str
+    metadata: dict = {"index_fields": ["name"]}
+
+
+@pytest.mark.asyncio
+async def test_index_data_points_calls_vector_engine():
+    """Test that index_data_points creates vector index and indexes data."""
+    data_points = [TestDataPoint(name="test1")]
+
+    mock_vector_engine = AsyncMock()
+    mock_vector_engine.embedding_engine.get_batch_size = MagicMock(return_value=100)
+
+    with patch.dict(
+        index_data_points.__globals__,
+        {"get_vector_engine": lambda: mock_vector_engine},
+    ):
+        await index_data_points(data_points)
+
+    assert mock_vector_engine.create_vector_index.await_count >= 1
+    assert mock_vector_engine.index_data_points.await_count >= 1
diff --git a/cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py b/cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py
index 48bbc53e3..cee0896c2 100644
--- a/cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py
+++ b/cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py
@@ -5,8 +5,7 @@ from cognee.tasks.storage.index_graph_edges import index_graph_edges
 
 @pytest.mark.asyncio
 async def test_index_graph_edges_success():
-    """Test that index_graph_edges uses the index datapoints and creates vector index."""
-    # Create the mocks for the graph and vector engines.
+    """Test that index_graph_edges retrieves edges and delegates to index_data_points."""
     mock_graph_engine = AsyncMock()
     mock_graph_engine.get_graph_data.return_value = (
         None,
@@ -15,26 +14,23 @@ async def test_index_graph_edges_success():
             [{"relationship_name": "rel2"}],
         ],
     )
-    mock_vector_engine = AsyncMock()
-    mock_vector_engine.embedding_engine.get_batch_size = MagicMock(return_value=100)
+    mock_index_data_points = AsyncMock()
 
-    # Patch the globals of the function so that when it does:
-    #   vector_engine = get_vector_engine()
-    #   graph_engine = await get_graph_engine()
-    # it uses the mocked versions.
     with patch.dict(
         index_graph_edges.__globals__,
         {
             "get_graph_engine": AsyncMock(return_value=mock_graph_engine),
-            "get_vector_engine": lambda: mock_vector_engine,
+            "index_data_points": mock_index_data_points,
         },
     ):
         await index_graph_edges()
 
-    # Assertions on the mock calls.
     mock_graph_engine.get_graph_data.assert_awaited_once()
-    assert mock_vector_engine.create_vector_index.await_count == 1
-    assert mock_vector_engine.index_data_points.await_count == 1
+    mock_index_data_points.assert_awaited_once()
+
+    call_args = mock_index_data_points.call_args[0][0]
+    assert len(call_args) == 2
+    assert all(hasattr(item, "relationship_name") for item in call_args)
 
 
 @pytest.mark.asyncio
@@ -42,20 +38,22 @@ async def test_index_graph_edges_no_relationships():
     """Test that index_graph_edges handles empty relationships correctly."""
     mock_graph_engine = AsyncMock()
     mock_graph_engine.get_graph_data.return_value = (None, [])
-    mock_vector_engine = AsyncMock()
+    mock_index_data_points = AsyncMock()
 
     with patch.dict(
         index_graph_edges.__globals__,
         {
             "get_graph_engine": AsyncMock(return_value=mock_graph_engine),
-            "get_vector_engine": lambda: mock_vector_engine,
+            "index_data_points": mock_index_data_points,
         },
     ):
         await index_graph_edges()
 
     mock_graph_engine.get_graph_data.assert_awaited_once()
-    mock_vector_engine.create_vector_index.assert_not_awaited()
-    mock_vector_engine.index_data_points.assert_not_awaited()
+    mock_index_data_points.assert_awaited_once()
+
+    call_args = mock_index_data_points.call_args[0][0]
+    assert len(call_args) == 0
 
 
 @pytest.mark.asyncio

From e2457ef277f70d7e91defdd9b33ae24c0ccbfa20 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Thu, 30 Oct 2025 14:27:01 +0100
Subject: [PATCH 2/6] fix: Resolve issue with text classification

---
 .../files/utils/get_file_metadata.py          | 10 ++++----
 .../files/utils/guess_file_type.py            | 24 +++++++++++++++----
 .../ingestion/data_types/BinaryData.py        |  2 +-
 3 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/cognee/infrastructure/files/utils/get_file_metadata.py b/cognee/infrastructure/files/utils/get_file_metadata.py
index 23b10a6df..debc7bbaf 100644
--- a/cognee/infrastructure/files/utils/get_file_metadata.py
+++ b/cognee/infrastructure/files/utils/get_file_metadata.py
@@ -1,6 +1,6 @@
 import io
 import os.path
-from typing import BinaryIO, TypedDict
+from typing import BinaryIO, TypedDict, Optional
 from pathlib import Path
 
 from cognee.shared.logging_utils import get_logger
@@ -27,7 +27,7 @@ class FileMetadata(TypedDict):
     file_size: int
 
 
-async def get_file_metadata(file: BinaryIO) -> FileMetadata:
+async def get_file_metadata(file: BinaryIO, name: Optional[str] = None) -> FileMetadata:
     """
     Retrieve metadata from a file object.
 
@@ -53,15 +53,15 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata:
     except io.UnsupportedOperation as error:
         logger.error(f"Error retrieving content hash for file: {file.name} \n{str(error)}\n\n")
 
-    file_type = guess_file_type(file)
+    file_type = guess_file_type(file, name=name)
 
     file_path = getattr(file, "name", None) or getattr(file, "full_name", None)
 
     if isinstance(file_path, str):
         file_name = Path(file_path).stem if file_path else None
     else:
-        # In case file_path does not exist or is a integer return None
-        file_name = None
+        # In case file_path does not exist try file_name
+        file_name = name
 
     # Get file size
     pos = file.tell()  # remember current pointer
diff --git a/cognee/infrastructure/files/utils/guess_file_type.py b/cognee/infrastructure/files/utils/guess_file_type.py
index 4e3ff6824..78b20c93d 100644
--- a/cognee/infrastructure/files/utils/guess_file_type.py
+++ b/cognee/infrastructure/files/utils/guess_file_type.py
@@ -1,6 +1,9 @@
-from typing import BinaryIO
+import io
+from pathlib import Path
+from typing import BinaryIO, Optional, Any
 import filetype
-from .is_text_content import is_text_content
+from tempfile import SpooledTemporaryFile
+from filetype.types.base import Type
 
 
 class FileTypeException(Exception):
@@ -22,7 +25,7 @@ class FileTypeException(Exception):
         self.message = message
 
 
-def guess_file_type(file: BinaryIO) -> filetype.Type:
+def guess_file_type(file: BinaryIO, name: Optional[str] = None) -> filetype.Type:
     """
     Guess the file type from the given binary file stream.
 
@@ -39,12 +42,23 @@ def guess_file_type(file: BinaryIO) -> filetype.Type:
 
         - filetype.Type: The guessed file type, represented as filetype.Type.
     """
+
+    # Note: If file has .txt or .text extension, consider it a plain text file as filetype.guess may not detect it properly
+    # as it contains no magic number encoding
+    ext = None
+    if isinstance(file, str):
+        ext = Path(file).suffix
+    elif name is not None:
+        ext = Path(name).suffix
+
+    if ext in [".txt", ".text"]:
+        file_type = Type("text/plain", "txt")
+        return file_type
+
     file_type = filetype.guess(file)
 
     # If file type could not be determined consider it a plain text file as they don't have magic number encoding
     if file_type is None:
-        from filetype.types.base import Type
-
         file_type = Type("text/plain", "txt")
 
     if file_type is None:
diff --git a/cognee/modules/ingestion/data_types/BinaryData.py b/cognee/modules/ingestion/data_types/BinaryData.py
index f96e0d65c..9448dddcf 100644
--- a/cognee/modules/ingestion/data_types/BinaryData.py
+++ b/cognee/modules/ingestion/data_types/BinaryData.py
@@ -30,7 +30,7 @@ class BinaryData(IngestionData):
 
     async def ensure_metadata(self):
         if self.metadata is None:
-            self.metadata = await get_file_metadata(self.data)
+            self.metadata = await get_file_metadata(self.data, name=self.name)
 
             if self.metadata["name"] is None:
                 self.metadata["name"] = self.name

From 081cab27b1594b363b3a1ba2e63bc942cdc4cb75 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Thu, 30 Oct 2025 14:46:02 +0100
Subject: [PATCH 3/6] refactor: add x-wav support

---
 cognee/infrastructure/loaders/core/audio_loader.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cognee/infrastructure/loaders/core/audio_loader.py b/cognee/infrastructure/loaders/core/audio_loader.py
index 17294bd94..f04d9a0e0 100644
--- a/cognee/infrastructure/loaders/core/audio_loader.py
+++ b/cognee/infrastructure/loaders/core/audio_loader.py
@@ -42,6 +42,7 @@ class AudioLoader(LoaderInterface):
             "audio/wav",
             "audio/amr",
             "audio/aiff",
+            "audio/x-wav",
         ]
 
     @property

From 5007a2a298d3e0e1a09d96424b4d846c9293098f Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Thu, 30 Oct 2025 14:49:58 +0100
Subject: [PATCH 4/6] refactor: dont use file name from metadata

---
 cognee/infrastructure/files/utils/get_file_metadata.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cognee/infrastructure/files/utils/get_file_metadata.py b/cognee/infrastructure/files/utils/get_file_metadata.py
index debc7bbaf..3b6c5a364 100644
--- a/cognee/infrastructure/files/utils/get_file_metadata.py
+++ b/cognee/infrastructure/files/utils/get_file_metadata.py
@@ -53,15 +53,15 @@ async def get_file_metadata(file: BinaryIO, name: Optional[str] = None) -> FileM
     except io.UnsupportedOperation as error:
         logger.error(f"Error retrieving content hash for file: {file.name} \n{str(error)}\n\n")
 
-    file_type = guess_file_type(file, name=name)
+    file_type = guess_file_type(file, name)
 
     file_path = getattr(file, "name", None) or getattr(file, "full_name", None)
 
     if isinstance(file_path, str):
         file_name = Path(file_path).stem if file_path else None
     else:
-        # In case file_path does not exist try file_name
-        file_name = name
+        # In case file_path does not exist or is a integer return None
+        file_name = None
 
     # Get file size
     pos = file.tell()  # remember current pointer

From 9951d16fd72f36de2ff11b6cd1d20b9934f1c81e Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Thu, 30 Oct 2025 15:46:19 +0100
Subject: [PATCH 5/6] chore: Update version and lock files

---
 pyproject.toml |  2 +-
 uv.lock        | 10 +++++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 15a666517..5f0aef1d8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "cognee"
 
-version = "0.3.8"
+version = "0.3.9"
 description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
 authors = [
     { name = "Vasilije Markovic" },
diff --git a/uv.lock b/uv.lock
index 65ee4b4ca..e2fc1df83 100644
--- a/uv.lock
+++ b/uv.lock
@@ -929,7 +929,7 @@ wheels = [
 
 [[package]]
 name = "cognee"
-version = "0.3.8"
+version = "0.3.9"
 source = { editable = "." }
 dependencies = [
     { name = "aiofiles" },
@@ -6226,8 +6226,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8c/df/16848771155e7c419c60afeb24950b8aaa3ab09c0a091ec3ccca26a574d0/psycopg2_binary-2.9.11-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c47676e5b485393f069b4d7a811267d3168ce46f988fa602658b8bb901e9e64d", size = 4410873, upload-time = "2025-10-10T11:10:38.951Z" },
     { url = "https://files.pythonhosted.org/packages/43/79/5ef5f32621abd5a541b89b04231fe959a9b327c874a1d41156041c75494b/psycopg2_binary-2.9.11-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:a28d8c01a7b27a1e3265b11250ba7557e5f72b5ee9e5f3a2fa8d2949c29bf5d2", size = 4468016, upload-time = "2025-10-10T11:10:43.319Z" },
     { url = "https://files.pythonhosted.org/packages/f0/9b/d7542d0f7ad78f57385971f426704776d7b310f5219ed58da5d605b1892e/psycopg2_binary-2.9.11-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5f3f2732cf504a1aa9e9609d02f79bea1067d99edf844ab92c247bbca143303b", size = 4164996, upload-time = "2025-10-10T11:10:46.705Z" },
+    { url = "https://files.pythonhosted.org/packages/14/ed/e409388b537fa7414330687936917c522f6a77a13474e4238219fcfd9a84/psycopg2_binary-2.9.11-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:865f9945ed1b3950d968ec4690ce68c55019d79e4497366d36e090327ce7db14", size = 3981881, upload-time = "2025-10-30T02:54:57.182Z" },
     { url = "https://files.pythonhosted.org/packages/bf/30/50e330e63bb05efc6fa7c1447df3e08954894025ca3dcb396ecc6739bc26/psycopg2_binary-2.9.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:91537a8df2bde69b1c1db01d6d944c831ca793952e4f57892600e96cee95f2cd", size = 3650857, upload-time = "2025-10-10T11:10:50.112Z" },
     { url = "https://files.pythonhosted.org/packages/f0/e0/4026e4c12bb49dd028756c5b0bc4c572319f2d8f1c9008e0dad8cc9addd7/psycopg2_binary-2.9.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4dca1f356a67ecb68c81a7bc7809f1569ad9e152ce7fd02c2f2036862ca9f66b", size = 3296063, upload-time = "2025-10-10T11:10:54.089Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/34/eb172be293c886fef5299fe5c3fcf180a05478be89856067881007934a7c/psycopg2_binary-2.9.11-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:0da4de5c1ac69d94ed4364b6cbe7190c1a70d325f112ba783d83f8440285f152", size = 3043464, upload-time = "2025-10-30T02:55:02.483Z" },
     { url = "https://files.pythonhosted.org/packages/18/1c/532c5d2cb11986372f14b798a95f2eaafe5779334f6a80589a68b5fcf769/psycopg2_binary-2.9.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37d8412565a7267f7d79e29ab66876e55cb5e8e7b3bbf94f8206f6795f8f7e7e", size = 3345378, upload-time = "2025-10-10T11:11:01.039Z" },
     { url = "https://files.pythonhosted.org/packages/70/e7/de420e1cf16f838e1fa17b1120e83afff374c7c0130d088dba6286fcf8ea/psycopg2_binary-2.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:c665f01ec8ab273a61c62beeb8cce3014c214429ced8a308ca1fc410ecac3a39", size = 2713904, upload-time = "2025-10-10T11:11:04.81Z" },
     { url = "https://files.pythonhosted.org/packages/c7/ae/8d8266f6dd183ab4d48b95b9674034e1b482a3f8619b33a0d86438694577/psycopg2_binary-2.9.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0e8480afd62362d0a6a27dd09e4ca2def6fa50ed3a4e7c09165266106b2ffa10", size = 3756452, upload-time = "2025-10-10T11:11:11.583Z" },
@@ -6235,8 +6237,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/48/89/3fdb5902bdab8868bbedc1c6e6023a4e08112ceac5db97fc2012060e0c9a/psycopg2_binary-2.9.11-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2e164359396576a3cc701ba8af4751ae68a07235d7a380c631184a611220d9a4", size = 4410955, upload-time = "2025-10-10T11:11:21.21Z" },
     { url = "https://files.pythonhosted.org/packages/ce/24/e18339c407a13c72b336e0d9013fbbbde77b6fd13e853979019a1269519c/psycopg2_binary-2.9.11-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d57c9c387660b8893093459738b6abddbb30a7eab058b77b0d0d1c7d521ddfd7", size = 4468007, upload-time = "2025-10-10T11:11:24.831Z" },
     { url = "https://files.pythonhosted.org/packages/91/7e/b8441e831a0f16c159b5381698f9f7f7ed54b77d57bc9c5f99144cc78232/psycopg2_binary-2.9.11-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2c226ef95eb2250974bf6fa7a842082b31f68385c4f3268370e3f3870e7859ee", size = 4165012, upload-time = "2025-10-10T11:11:29.51Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/61/4aa89eeb6d751f05178a13da95516c036e27468c5d4d2509bb1e15341c81/psycopg2_binary-2.9.11-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a311f1edc9967723d3511ea7d2708e2c3592e3405677bf53d5c7246753591fbb", size = 3981881, upload-time = "2025-10-30T02:55:07.332Z" },
     { url = "https://files.pythonhosted.org/packages/76/a1/2f5841cae4c635a9459fe7aca8ed771336e9383b6429e05c01267b0774cf/psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ebb415404821b6d1c47353ebe9c8645967a5235e6d88f914147e7fd411419e6f", size = 3650985, upload-time = "2025-10-10T11:11:34.975Z" },
     { url = "https://files.pythonhosted.org/packages/84/74/4defcac9d002bca5709951b975173c8c2fa968e1a95dc713f61b3a8d3b6a/psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f07c9c4a5093258a03b28fab9b4f151aa376989e7f35f855088234e656ee6a94", size = 3296039, upload-time = "2025-10-10T11:11:40.432Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/c2/782a3c64403d8ce35b5c50e1b684412cf94f171dc18111be8c976abd2de1/psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:00ce1830d971f43b667abe4a56e42c1e2d594b32da4802e44a73bacacb25535f", size = 3043477, upload-time = "2025-10-30T02:55:11.182Z" },
     { url = "https://files.pythonhosted.org/packages/c8/31/36a1d8e702aa35c38fc117c2b8be3f182613faa25d794b8aeaab948d4c03/psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cffe9d7697ae7456649617e8bb8d7a45afb71cd13f7ab22af3e5c61f04840908", size = 3345842, upload-time = "2025-10-10T11:11:45.366Z" },
     { url = "https://files.pythonhosted.org/packages/6e/b4/a5375cda5b54cb95ee9b836930fea30ae5a8f14aa97da7821722323d979b/psycopg2_binary-2.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:304fd7b7f97eef30e91b8f7e720b3db75fee010b520e434ea35ed1ff22501d03", size = 2713894, upload-time = "2025-10-10T11:11:48.775Z" },
     { url = "https://files.pythonhosted.org/packages/d8/91/f870a02f51be4a65987b45a7de4c2e1897dd0d01051e2b559a38fa634e3e/psycopg2_binary-2.9.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:be9b840ac0525a283a96b556616f5b4820e0526addb8dcf6525a0fa162730be4", size = 3756603, upload-time = "2025-10-10T11:11:52.213Z" },
@@ -6244,8 +6248,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2d/75/364847b879eb630b3ac8293798e380e441a957c53657995053c5ec39a316/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ab8905b5dcb05bf3fb22e0cf90e10f469563486ffb6a96569e51f897c750a76a", size = 4411159, upload-time = "2025-10-10T11:12:00.49Z" },
     { url = "https://files.pythonhosted.org/packages/6f/a0/567f7ea38b6e1c62aafd58375665a547c00c608a471620c0edc364733e13/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:bf940cd7e7fec19181fdbc29d76911741153d51cab52e5c21165f3262125685e", size = 4468234, upload-time = "2025-10-10T11:12:04.892Z" },
     { url = "https://files.pythonhosted.org/packages/30/da/4e42788fb811bbbfd7b7f045570c062f49e350e1d1f3df056c3fb5763353/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fa0f693d3c68ae925966f0b14b8edda71696608039f4ed61b1fe9ffa468d16db", size = 4166236, upload-time = "2025-10-10T11:12:11.674Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/94/c1777c355bc560992af848d98216148be5f1be001af06e06fc49cbded578/psycopg2_binary-2.9.11-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a1cf393f1cdaf6a9b57c0a719a1068ba1069f022a59b8b1fe44b006745b59757", size = 3983083, upload-time = "2025-10-30T02:55:15.73Z" },
     { url = "https://files.pythonhosted.org/packages/bd/42/c9a21edf0e3daa7825ed04a4a8588686c6c14904344344a039556d78aa58/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ef7a6beb4beaa62f88592ccc65df20328029d721db309cb3250b0aae0fa146c3", size = 3652281, upload-time = "2025-10-10T11:12:17.713Z" },
     { url = "https://files.pythonhosted.org/packages/12/22/dedfbcfa97917982301496b6b5e5e6c5531d1f35dd2b488b08d1ebc52482/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:31b32c457a6025e74d233957cc9736742ac5a6cb196c6b68499f6bb51390bd6a", size = 3298010, upload-time = "2025-10-10T11:12:22.671Z" },
+    { url = "https://files.pythonhosted.org/packages/66/ea/d3390e6696276078bd01b2ece417deac954dfdd552d2edc3d03204416c0c/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:edcb3aeb11cb4bf13a2af3c53a15b3d612edeb6409047ea0b5d6a21a9d744b34", size = 3044641, upload-time = "2025-10-30T02:55:19.929Z" },
     { url = "https://files.pythonhosted.org/packages/12/9a/0402ded6cbd321da0c0ba7d34dc12b29b14f5764c2fc10750daa38e825fc/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:62b6d93d7c0b61a1dd6197d208ab613eb7dcfdcca0a49c42ceb082257991de9d", size = 3347940, upload-time = "2025-10-10T11:12:26.529Z" },
     { url = "https://files.pythonhosted.org/packages/b1/d2/99b55e85832ccde77b211738ff3925a5d73ad183c0b37bcbbe5a8ff04978/psycopg2_binary-2.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:b33fabeb1fde21180479b2d4667e994de7bbf0eec22832ba5d9b5e4cf65b6c6d", size = 2714147, upload-time = "2025-10-10T11:12:29.535Z" },
     { url = "https://files.pythonhosted.org/packages/ff/a8/a2709681b3ac11b0b1786def10006b8995125ba268c9a54bea6f5ae8bd3e/psycopg2_binary-2.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b8fb3db325435d34235b044b199e56cdf9ff41223a4b9752e8576465170bb38c", size = 3756572, upload-time = "2025-10-10T11:12:32.873Z" },
@@ -6253,8 +6259,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/11/32/b2ffe8f3853c181e88f0a157c5fb4e383102238d73c52ac6d93a5c8bffe6/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c55b385daa2f92cb64b12ec4536c66954ac53654c7f15a203578da4e78105c0", size = 4411242, upload-time = "2025-10-10T11:12:42.388Z" },
     { url = "https://files.pythonhosted.org/packages/10/04/6ca7477e6160ae258dc96f67c371157776564679aefd247b66f4661501a2/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c0377174bf1dd416993d16edc15357f6eb17ac998244cca19bc67cdc0e2e5766", size = 4468258, upload-time = "2025-10-10T11:12:48.654Z" },
     { url = "https://files.pythonhosted.org/packages/3c/7e/6a1a38f86412df101435809f225d57c1a021307dd0689f7a5e7fe83588b1/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5c6ff3335ce08c75afaed19e08699e8aacf95d4a260b495a4a8545244fe2ceb3", size = 4166295, upload-time = "2025-10-10T11:12:52.525Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7d/c07374c501b45f3579a9eb761cbf2604ddef3d96ad48679112c2c5aa9c25/psycopg2_binary-2.9.11-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:84011ba3109e06ac412f95399b704d3d6950e386b7994475b231cf61eec2fc1f", size = 3983133, upload-time = "2025-10-30T02:55:24.329Z" },
     { url = "https://files.pythonhosted.org/packages/82/56/993b7104cb8345ad7d4516538ccf8f0d0ac640b1ebd8c754a7b024e76878/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ba34475ceb08cccbdd98f6b46916917ae6eeb92b5ae111df10b544c3a4621dc4", size = 3652383, upload-time = "2025-10-10T11:12:56.387Z" },
     { url = "https://files.pythonhosted.org/packages/2d/ac/eaeb6029362fd8d454a27374d84c6866c82c33bfc24587b4face5a8e43ef/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b31e90fdd0f968c2de3b26ab014314fe814225b6c324f770952f7d38abf17e3c", size = 3298168, upload-time = "2025-10-10T11:13:00.403Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/39/50c3facc66bded9ada5cbc0de867499a703dc6bca6be03070b4e3b65da6c/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:d526864e0f67f74937a8fce859bd56c979f5e2ec57ca7c627f5f1071ef7fee60", size = 3044712, upload-time = "2025-10-30T02:55:27.975Z" },
     { url = "https://files.pythonhosted.org/packages/9c/8e/b7de019a1f562f72ada81081a12823d3c1590bedc48d7d2559410a2763fe/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04195548662fa544626c8ea0f06561eb6203f1984ba5b4562764fbeb4c3d14b1", size = 3347549, upload-time = "2025-10-10T11:13:03.971Z" },
     { url = "https://files.pythonhosted.org/packages/80/2d/1bb683f64737bbb1f86c82b7359db1eb2be4e2c0c13b947f80efefa7d3e5/psycopg2_binary-2.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:efff12b432179443f54e230fdf60de1f6cc726b6c832db8701227d089310e8aa", size = 2714215, upload-time = "2025-10-10T11:13:07.14Z" },
 ]

From 995e7aa4834bd3967342fded1b7ad6af10b4da0e Mon Sep 17 00:00:00 2001
From: Hande <159312713+hande-k@users.noreply.github.com>
Date: Thu, 30 Oct 2025 17:38:28 +0100
Subject: [PATCH 6/6] fix: update unsupported vector db log (#1708)

<!-- .github/pull_request_template.md -->

## Description
<!--
Please provide a clear, human-generated description of the changes in
this PR.
DO NOT use AI-generated descriptions. We want to understand your thought
process and reasoning.
-->

## Type of Change
<!-- Please check the relevant option -->
- [ ] Bug fix (non-breaking change that fixes an issue)
- [ ] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):

## Screenshots/Videos (if applicable)
<!-- Add screenshots or videos to help explain your changes -->

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [ ] **I have tested my changes thoroughly before submitting this PR**
- [ ] **This PR contains minimal changes necessary to address the
issue/feature**
- [ ] My code follows the project's coding standards and style
guidelines
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have added necessary documentation (if applicable)
- [ ] All new and existing tests pass
- [ ] I have searched existing PRs to ensure this change hasn't been
submitted already
- [ ] I have linked any relevant issues in the description
- [ ] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
---
 cognee/infrastructure/databases/vector/create_vector_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py
index d1cf855d7..c54d94f6c 100644
--- a/cognee/infrastructure/databases/vector/create_vector_engine.py
+++ b/cognee/infrastructure/databases/vector/create_vector_engine.py
@@ -133,6 +133,6 @@ def create_vector_engine(
 
     else:
         raise EnvironmentError(
-            f"Unsupported graph database provider: {vector_db_provider}. "
+            f"Unsupported vector database provider: {vector_db_provider}. "
             f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['LanceDB', 'PGVector', 'neptune_analytics', 'ChromaDB'])}"
         )