fix:Fixes missing entity to entity edges (#1118)

<!-- .github/pull_request_template.md -->

## Description
Fixes missing entity to entity edges

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
hajdul88 2025-07-22 11:48:56 +02:00 committed by GitHub
parent 4474efade3
commit dad7da2e7b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 120 additions and 3 deletions

View file

@ -273,3 +273,32 @@ jobs:
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_permissions.py
test-graph-edges:
name: Test graph edge ingestion
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
- name: Install specific graph db dependency
run: |
poetry install
- name: Run graph edges test
env:
ENV: 'dev'
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_edge_ingestion.py

View file

@ -351,7 +351,7 @@ def expand_with_nodes_and_edges(
_process_graph_edges(graph, name_mapping, existing_edges_map, relationships)
# Return combined results
graph_nodes = list(added_ontology_nodes_map.values())
graph_nodes = data_chunks + list(added_ontology_nodes_map.values())
graph_edges = relationships + ontology_relationships
return graph_nodes, graph_edges

View file

@ -11,6 +11,7 @@ from cognee.modules.graph.utils import (
retrieve_existing_edges,
)
from cognee.shared.data_models import KnowledgeGraph
from cognee.tasks.storage.add_data_points import add_data_points
async def integrate_chunk_graphs(
@ -38,8 +39,7 @@ async def integrate_chunk_graphs(
)
if len(graph_nodes) > 0:
await graph_engine.add_nodes(graph_nodes)
await add_data_points(graph_nodes)
if len(graph_edges) > 0:
await graph_engine.add_edges(graph_edges)

View file

@ -0,0 +1,88 @@
import os
import asyncio
import cognee
import pathlib
from cognee.infrastructure.databases.graph import get_graph_engine
from collections import Counter
from cognee.modules.users.methods import get_default_user
from cognee.shared.logging_utils import get_logger
logger = get_logger()
async def test_edge_ingestion():
"""
Tests whether we ingest additional entity to entity edges
"""
data_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_edge_ingestion")
).resolve()
)
cognee_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_edge_ingestion")
).resolve()
)
cognee.config.data_root_directory(data_directory_path)
cognee.config.system_root_directory(cognee_directory_path)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
basic_nested_edges = ["is_a", "is_part_of", "contains", "made_from"]
entity_to_entity_edges = ["likes", "prefers", "watches"]
text1 = "Dave watches Dexter Resurrection"
text2 = "Ana likes apples"
text3 = "Bob prefers Cognee over other solutions"
await cognee.add([text1, text2, text3], dataset_name="edge_ingestion_test")
user = await get_default_user()
await cognee.cognify(["edge_ingestion_test"], user=user)
graph_engine = await get_graph_engine()
graph = await graph_engine.get_graph_data()
edge_type_counts = Counter(edge_type[2] for edge_type in graph[1])
"Tests the presence of basic nested edges"
for basic_nested_edge in basic_nested_edges:
assert edge_type_counts.get(basic_nested_edge, 0) >= 1, (
f"Expected at least one {basic_nested_edge} edge, but found {edge_type_counts.get(basic_nested_edge, 0)}"
)
"Tests the presence of additional entity to entity edges"
assert len(edge_type_counts) > 4, (
f"Expected at least {5} edges (4 structural plus entity to entity edges), but found only {len(edge_type_counts)}"
)
"Tests the consistency of basic nested edges"
assert edge_type_counts.get("made_from", 0) == edge_type_counts.get("is_part_of", 0), (
f"Number of made_from and is_part_of edges are not matching, found {edge_type_counts.get('made_from', 0)} made from and {edge_type_counts.get('is_part_of', 0)} is_part_of."
)
"Tests whether we generate is_a for all entity that is contained by a chunk"
assert edge_type_counts.get("contains", 0) == edge_type_counts.get("is_a", 0), (
f"Number of contains and is_a edges are not matching, found {edge_type_counts.get('is_a', 0)} is_a and {edge_type_counts.get('is_part_of', 0)} contains."
)
found_edges = 0
for entity_to_entity_edge in entity_to_entity_edges:
if entity_to_entity_edge in edge_type_counts:
found_edges = found_edges + 1
"Tests the presence of extected entity to entity edges"
assert found_edges >= 2, (
f"Expected at least 2 entity to entity edges, but found only {found_edges}"
)
if __name__ == "__main__":
asyncio.run(test_edge_ingestion())