fix:Fixes missing entity to entity edges (#1118)
<!-- .github/pull_request_template.md --> ## Description Fixes missing entity to entity edges ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
parent
647879e57c
commit
d571b9f8bf
4 changed files with 119 additions and 4 deletions
29
.github/workflows/e2e_tests.yml
vendored
29
.github/workflows/e2e_tests.yml
vendored
|
|
@ -273,3 +273,32 @@ jobs:
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: poetry run python ./cognee/tests/test_permissions.py
|
run: poetry run python ./cognee/tests/test_permissions.py
|
||||||
|
|
||||||
|
test-graph-edges:
|
||||||
|
name: Test graph edge ingestion
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
steps:
|
||||||
|
- name: Check out repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Cognee Setup
|
||||||
|
uses: ./.github/actions/cognee_setup
|
||||||
|
with:
|
||||||
|
python-version: '3.11.x'
|
||||||
|
|
||||||
|
- name: Install specific graph db dependency
|
||||||
|
run: |
|
||||||
|
poetry install
|
||||||
|
|
||||||
|
- name: Run graph edges test
|
||||||
|
env:
|
||||||
|
ENV: 'dev'
|
||||||
|
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||||
|
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||||
|
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||||
|
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||||
|
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||||
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
|
run: poetry run python ./cognee/tests/test_edge_ingestion.py
|
||||||
|
|
|
||||||
|
|
@ -351,7 +351,7 @@ def expand_with_nodes_and_edges(
|
||||||
_process_graph_edges(graph, name_mapping, existing_edges_map, relationships)
|
_process_graph_edges(graph, name_mapping, existing_edges_map, relationships)
|
||||||
|
|
||||||
# Return combined results
|
# Return combined results
|
||||||
graph_nodes = list(added_ontology_nodes_map.values())
|
graph_nodes = data_chunks + list(added_ontology_nodes_map.values())
|
||||||
graph_edges = relationships + ontology_relationships
|
graph_edges = relationships + ontology_relationships
|
||||||
|
|
||||||
return graph_nodes, graph_edges
|
return graph_nodes, graph_edges
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
from typing import Type, List
|
from typing import Type, List
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
|
|
@ -41,7 +40,6 @@ async def integrate_chunk_graphs(
|
||||||
for chunk_index, chunk_graph in enumerate(chunk_graphs):
|
for chunk_index, chunk_graph in enumerate(chunk_graphs):
|
||||||
data_chunks[chunk_index].contains = chunk_graph
|
data_chunks[chunk_index].contains = chunk_graph
|
||||||
|
|
||||||
await add_data_points(chunk_graphs)
|
|
||||||
return data_chunks
|
return data_chunks
|
||||||
|
|
||||||
existing_edges_map = await retrieve_existing_edges(
|
existing_edges_map = await retrieve_existing_edges(
|
||||||
|
|
@ -54,7 +52,7 @@ async def integrate_chunk_graphs(
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(graph_nodes) > 0:
|
if len(graph_nodes) > 0:
|
||||||
await add_data_points(graph_nodes)
|
await graph_engine.add_nodes(graph_nodes)
|
||||||
|
|
||||||
if len(graph_edges) > 0:
|
if len(graph_edges) > 0:
|
||||||
await graph_engine.add_edges(graph_edges)
|
await graph_engine.add_edges(graph_edges)
|
||||||
|
|
|
||||||
88
cognee/tests/test_edge_ingestion.py
Executable file
88
cognee/tests/test_edge_ingestion.py
Executable file
|
|
@ -0,0 +1,88 @@
|
||||||
|
import os
|
||||||
|
import asyncio
|
||||||
|
import cognee
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
|
from collections import Counter
|
||||||
|
from cognee.modules.users.methods import get_default_user
|
||||||
|
from cognee.shared.logging_utils import get_logger
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_edge_ingestion():
|
||||||
|
"""
|
||||||
|
Tests whether we ingest additional entity to entity edges
|
||||||
|
"""
|
||||||
|
|
||||||
|
data_directory_path = str(
|
||||||
|
pathlib.Path(
|
||||||
|
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_edge_ingestion")
|
||||||
|
).resolve()
|
||||||
|
)
|
||||||
|
cognee_directory_path = str(
|
||||||
|
pathlib.Path(
|
||||||
|
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_edge_ingestion")
|
||||||
|
).resolve()
|
||||||
|
)
|
||||||
|
|
||||||
|
cognee.config.data_root_directory(data_directory_path)
|
||||||
|
cognee.config.system_root_directory(cognee_directory_path)
|
||||||
|
|
||||||
|
await cognee.prune.prune_data()
|
||||||
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
||||||
|
basic_nested_edges = ["is_a", "is_part_of", "contains", "made_from"]
|
||||||
|
|
||||||
|
entity_to_entity_edges = ["likes", "prefers", "watches"]
|
||||||
|
|
||||||
|
text1 = "Dave watches Dexter Resurrection"
|
||||||
|
text2 = "Ana likes apples"
|
||||||
|
text3 = "Bob prefers Cognee over other solutions"
|
||||||
|
|
||||||
|
await cognee.add([text1, text2, text3], dataset_name="edge_ingestion_test")
|
||||||
|
|
||||||
|
user = await get_default_user()
|
||||||
|
|
||||||
|
await cognee.cognify(["edge_ingestion_test"], user=user)
|
||||||
|
|
||||||
|
graph_engine = await get_graph_engine()
|
||||||
|
graph = await graph_engine.get_graph_data()
|
||||||
|
|
||||||
|
edge_type_counts = Counter(edge_type[2] for edge_type in graph[1])
|
||||||
|
|
||||||
|
"Tests the presence of basic nested edges"
|
||||||
|
for basic_nested_edge in basic_nested_edges:
|
||||||
|
assert edge_type_counts.get(basic_nested_edge, 0) >= 1, (
|
||||||
|
f"Expected at least one {basic_nested_edge} edge, but found {edge_type_counts.get(basic_nested_edge, 0)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
"Tests the presence of additional entity to entity edges"
|
||||||
|
assert len(edge_type_counts) > 4, (
|
||||||
|
f"Expected at least {5} edges (4 structural plus entity to entity edges), but found only {len(edge_type_counts)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
"Tests the consistency of basic nested edges"
|
||||||
|
assert edge_type_counts.get("made_from", 0) == edge_type_counts.get("is_part_of", 0), (
|
||||||
|
f"Number of made_from and is_part_of edges are not matching, found {edge_type_counts.get('made_from', 0)} made from and {edge_type_counts.get('is_part_of', 0)} is_part_of."
|
||||||
|
)
|
||||||
|
|
||||||
|
"Tests whether we generate is_a for all entity that is contained by a chunk"
|
||||||
|
assert edge_type_counts.get("contains", 0) == edge_type_counts.get("is_a", 0), (
|
||||||
|
f"Number of contains and is_a edges are not matching, found {edge_type_counts.get('is_a', 0)} is_a and {edge_type_counts.get('is_part_of', 0)} contains."
|
||||||
|
)
|
||||||
|
|
||||||
|
found_edges = 0
|
||||||
|
for entity_to_entity_edge in entity_to_entity_edges:
|
||||||
|
if entity_to_entity_edge in edge_type_counts:
|
||||||
|
found_edges = found_edges + 1
|
||||||
|
|
||||||
|
"Tests the presence of extected entity to entity edges"
|
||||||
|
assert found_edges >= 2, (
|
||||||
|
f"Expected at least 2 entity to entity edges, but found only {found_edges}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(test_edge_ingestion())
|
||||||
Loading…
Add table
Reference in a new issue