Merge branch 'dev' into incremental-loading
This commit is contained in:
commit
081fae8273
8 changed files with 325 additions and 274 deletions
5
.github/actions/cognee_setup/action.yml
vendored
5
.github/actions/cognee_setup/action.yml
vendored
|
|
@ -21,6 +21,11 @@ runs:
|
|||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install poetry
|
||||
|
||||
- name: Rebuild Poetry lock file
|
||||
shell: bash
|
||||
run: poetry lock
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: poetry install --no-interaction -E api -E docs -E evals -E gemini -E codegraph -E ollama -E dev -E neo4j
|
||||
|
|
|
|||
59
.github/workflows/test_memgraph.yml
vendored
59
.github/workflows/test_memgraph.yml
vendored
|
|
@ -1,59 +0,0 @@
|
|||
name: test | memgraph
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [labeled, synchronize]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
RUNTIME__LOG_LEVEL: ERROR
|
||||
|
||||
jobs:
|
||||
run_memgraph_integration_test:
|
||||
name: test
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
services:
|
||||
memgraph:
|
||||
image: memgraph/memgraph-mage:latest
|
||||
ports:
|
||||
- 7687:7687
|
||||
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10.x'
|
||||
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1.4.1
|
||||
with:
|
||||
virtualenvs-create: true
|
||||
virtualenvs-in-project: true
|
||||
installer-parallel: true
|
||||
|
||||
- name: Install dependencies
|
||||
run: poetry install -E neo4j
|
||||
|
||||
- name: Run default Memgraph
|
||||
env:
|
||||
ENV: 'dev'
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
GRAPH_DATABASE_URL: "bolt://localhost:7687"
|
||||
GRAPH_DATABASE_PASSWORD: "memgraph"
|
||||
GRAPH_DATABASE_USERNAME: "memgraph"
|
||||
run: poetry run python ./cognee/tests/test_memgraph.py
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
import os
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import Form, UploadFile, Depends
|
||||
|
|
@ -31,8 +32,11 @@ def get_add_router() -> APIRouter:
|
|||
raise ValueError("Either datasetId or datasetName must be provided.")
|
||||
|
||||
try:
|
||||
# TODO: Add check if HTTP Requests are enabled before allowing requests and git clone
|
||||
if isinstance(data, str) and data.startswith("http"):
|
||||
if (
|
||||
isinstance(data, str)
|
||||
and data.startswith("http")
|
||||
and (os.getenv("ALLOW_HTTP_REQUESTS", "true").lower() == "true")
|
||||
):
|
||||
if "github" in data:
|
||||
# Perform git clone if the URL is from GitHub
|
||||
repo_name = data.split("/")[-1].replace(".git", "")
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from fastapi.responses import JSONResponse, FileResponse
|
|||
|
||||
from cognee.api.DTO import InDTO, OutDTO
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from cognee.modules.data.methods import get_authorized_existing_datasets
|
||||
from cognee.modules.data.methods import create_dataset, get_datasets_by_name
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.api.v1.delete.exceptions import DataNotFoundError, DatasetNotFoundError
|
||||
|
|
@ -177,7 +178,8 @@ def get_datasets_router() -> APIRouter:
|
|||
async def get_dataset_data(dataset_id: UUID, user: User = Depends(get_authenticated_user)):
|
||||
from cognee.modules.data.methods import get_dataset_data, get_dataset
|
||||
|
||||
dataset = await get_dataset(user.id, dataset_id)
|
||||
# Verify user has permission to read dataset
|
||||
dataset = await get_authorized_existing_datasets([dataset_id], "read", user)
|
||||
|
||||
if dataset is None:
|
||||
return JSONResponse(
|
||||
|
|
@ -185,7 +187,7 @@ def get_datasets_router() -> APIRouter:
|
|||
content=ErrorResponseDTO(f"Dataset ({str(dataset_id)}) not found."),
|
||||
)
|
||||
|
||||
dataset_data = await get_dataset_data(dataset_id=dataset.id)
|
||||
dataset_data = await get_dataset_data(dataset_id=dataset[0].id)
|
||||
|
||||
if dataset_data is None:
|
||||
return []
|
||||
|
|
@ -200,6 +202,9 @@ def get_datasets_router() -> APIRouter:
|
|||
from cognee.api.v1.datasets.datasets import datasets as cognee_datasets
|
||||
|
||||
try:
|
||||
# Verify user has permission to read dataset
|
||||
await get_authorized_existing_datasets(datasets, "read", user)
|
||||
|
||||
datasets_statuses = await cognee_datasets.get_status(datasets)
|
||||
|
||||
return datasets_statuses
|
||||
|
|
@ -211,16 +216,17 @@ def get_datasets_router() -> APIRouter:
|
|||
dataset_id: UUID, data_id: UUID, user: User = Depends(get_authenticated_user)
|
||||
):
|
||||
from cognee.modules.data.methods import get_data
|
||||
from cognee.modules.data.methods import get_dataset, get_dataset_data
|
||||
from cognee.modules.data.methods import get_dataset_data
|
||||
|
||||
dataset = await get_dataset(user.id, dataset_id)
|
||||
# Verify user has permission to read dataset
|
||||
dataset = await get_authorized_existing_datasets([dataset_id], "read", user)
|
||||
|
||||
if dataset is None:
|
||||
return JSONResponse(
|
||||
status_code=404, content={"detail": f"Dataset ({dataset_id}) not found."}
|
||||
)
|
||||
|
||||
dataset_data = await get_dataset_data(dataset.id)
|
||||
dataset_data = await get_dataset_data(dataset[0].id)
|
||||
|
||||
if dataset_data is None:
|
||||
raise DataNotFoundError(message=f"No data found in dataset ({dataset_id}).")
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import os
|
||||
from fastapi import Form, UploadFile, Depends
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi import APIRouter
|
||||
|
|
@ -37,8 +38,9 @@ def get_delete_router() -> APIRouter:
|
|||
# Handle each file in the list
|
||||
results = []
|
||||
for file in data:
|
||||
# TODO: Add check if HTTP Requests are enabled before allowing requests and git clone
|
||||
if file.filename.startswith("http"):
|
||||
if file.filename.startswith("http") and (
|
||||
os.getenv("ALLOW_HTTP_REQUESTS", "true").lower() == "true"
|
||||
):
|
||||
if "github" in file.filename:
|
||||
# For GitHub repos, we need to get the content hash of each file
|
||||
repo_name = file.filename.split("/")[-1].replace(".git", "")
|
||||
|
|
|
|||
|
|
@ -1,6 +1,12 @@
|
|||
from fastapi import APIRouter
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi.responses import HTMLResponse, JSONResponse
|
||||
from uuid import UUID
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.users.methods import get_authenticated_user
|
||||
from cognee.modules.data.methods import get_authorized_existing_datasets
|
||||
from cognee.modules.users.models import User
|
||||
|
||||
from cognee.context_global_variables import set_database_global_context_variables
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
|
@ -9,11 +15,17 @@ def get_visualize_router() -> APIRouter:
|
|||
router = APIRouter()
|
||||
|
||||
@router.get("", response_model=None)
|
||||
async def visualize():
|
||||
async def visualize(dataset_id: UUID, user: User = Depends(get_authenticated_user)):
|
||||
"""This endpoint is responsible for adding data to the graph."""
|
||||
from cognee.api.v1.visualize import visualize_graph
|
||||
|
||||
try:
|
||||
# Verify user has permission to read dataset
|
||||
dataset = await get_authorized_existing_datasets([dataset_id], "read", user)
|
||||
|
||||
# Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
|
||||
await set_database_global_context_variables(dataset[0].id, dataset[0].owner_id)
|
||||
|
||||
html_visualization = await visualize_graph()
|
||||
return HTMLResponse(html_visualization)
|
||||
|
||||
|
|
|
|||
|
|
@ -11,12 +11,278 @@ from cognee.shared.data_models import KnowledgeGraph
|
|||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||
|
||||
|
||||
def _create_node_key(node_id: str, category: str) -> str:
|
||||
"""Create a standardized node key"""
|
||||
return f"{node_id}_{category}"
|
||||
|
||||
|
||||
def _create_edge_key(source_id: str, target_id: str, relationship_name: str) -> str:
|
||||
"""Create a standardized edge key"""
|
||||
return f"{source_id}_{target_id}_{relationship_name}"
|
||||
|
||||
|
||||
def _process_ontology_nodes(
|
||||
ontology_nodes: list,
|
||||
data_chunk: DocumentChunk,
|
||||
added_nodes_map: dict,
|
||||
added_ontology_nodes_map: dict,
|
||||
) -> None:
|
||||
"""Process and store ontology nodes"""
|
||||
for ontology_node in ontology_nodes:
|
||||
ont_node_id = generate_node_id(ontology_node.name)
|
||||
ont_node_name = generate_node_name(ontology_node.name)
|
||||
|
||||
if ontology_node.category == "classes":
|
||||
ont_node_key = _create_node_key(ont_node_id, "type")
|
||||
if ont_node_key not in added_nodes_map and ont_node_key not in added_ontology_nodes_map:
|
||||
added_ontology_nodes_map[ont_node_key] = EntityType(
|
||||
id=ont_node_id,
|
||||
name=ont_node_name,
|
||||
description=ont_node_name,
|
||||
ontology_valid=True,
|
||||
)
|
||||
|
||||
elif ontology_node.category == "individuals":
|
||||
ont_node_key = _create_node_key(ont_node_id, "entity")
|
||||
if ont_node_key not in added_nodes_map and ont_node_key not in added_ontology_nodes_map:
|
||||
added_ontology_nodes_map[ont_node_key] = Entity(
|
||||
id=ont_node_id,
|
||||
name=ont_node_name,
|
||||
description=ont_node_name,
|
||||
ontology_valid=True,
|
||||
belongs_to_set=data_chunk.belongs_to_set,
|
||||
)
|
||||
|
||||
|
||||
def _process_ontology_edges(
|
||||
ontology_edges: list, existing_edges_map: dict, ontology_relationships: list
|
||||
) -> None:
|
||||
"""Process ontology edges and add them if new"""
|
||||
for source, relation, target in ontology_edges:
|
||||
source_node_id = generate_node_id(source)
|
||||
target_node_id = generate_node_id(target)
|
||||
relationship_name = generate_edge_name(relation)
|
||||
edge_key = _create_edge_key(source_node_id, target_node_id, relationship_name)
|
||||
|
||||
if edge_key not in existing_edges_map:
|
||||
ontology_relationships.append(
|
||||
(
|
||||
source_node_id,
|
||||
target_node_id,
|
||||
relationship_name,
|
||||
{
|
||||
"relationship_name": relationship_name,
|
||||
"source_node_id": source_node_id,
|
||||
"target_node_id": target_node_id,
|
||||
"ontology_valid": True,
|
||||
},
|
||||
)
|
||||
)
|
||||
existing_edges_map[edge_key] = True
|
||||
|
||||
|
||||
def _create_type_node(
|
||||
node_type: str,
|
||||
ontology_resolver: OntologyResolver,
|
||||
added_nodes_map: dict,
|
||||
added_ontology_nodes_map: dict,
|
||||
name_mapping: dict,
|
||||
key_mapping: dict,
|
||||
data_chunk: DocumentChunk,
|
||||
existing_edges_map: dict,
|
||||
ontology_relationships: list,
|
||||
) -> EntityType:
|
||||
"""Create or retrieve a type node with ontology validation"""
|
||||
node_id = generate_node_id(node_type)
|
||||
node_name = generate_node_name(node_type)
|
||||
type_node_key = _create_node_key(node_id, "type")
|
||||
|
||||
if type_node_key in added_nodes_map or type_node_key in key_mapping:
|
||||
return added_nodes_map.get(type_node_key) or added_nodes_map.get(
|
||||
key_mapping.get(type_node_key)
|
||||
)
|
||||
|
||||
# Get ontology validation
|
||||
ontology_nodes, ontology_edges, closest_class = ontology_resolver.get_subgraph(
|
||||
node_name=node_name, node_type="classes"
|
||||
)
|
||||
|
||||
ontology_validated = bool(closest_class)
|
||||
|
||||
if ontology_validated:
|
||||
old_key = type_node_key
|
||||
node_id = generate_node_id(closest_class.name)
|
||||
type_node_key = _create_node_key(node_id, "type")
|
||||
new_node_name = generate_node_name(closest_class.name)
|
||||
|
||||
name_mapping[node_name] = closest_class.name
|
||||
key_mapping[old_key] = type_node_key
|
||||
node_name = new_node_name
|
||||
|
||||
type_node = EntityType(
|
||||
id=node_id,
|
||||
name=node_name,
|
||||
type=node_name,
|
||||
description=node_name,
|
||||
ontology_valid=ontology_validated,
|
||||
)
|
||||
|
||||
added_nodes_map[type_node_key] = type_node
|
||||
|
||||
# Process ontology nodes and edges
|
||||
_process_ontology_nodes(ontology_nodes, data_chunk, added_nodes_map, added_ontology_nodes_map)
|
||||
_process_ontology_edges(ontology_edges, existing_edges_map, ontology_relationships)
|
||||
|
||||
return type_node
|
||||
|
||||
|
||||
def _create_entity_node(
|
||||
node_id: str,
|
||||
node_name: str,
|
||||
node_description: str,
|
||||
type_node: EntityType,
|
||||
ontology_resolver: OntologyResolver,
|
||||
added_nodes_map: dict,
|
||||
added_ontology_nodes_map: dict,
|
||||
name_mapping: dict,
|
||||
key_mapping: dict,
|
||||
data_chunk: DocumentChunk,
|
||||
existing_edges_map: dict,
|
||||
ontology_relationships: list,
|
||||
) -> Entity:
|
||||
"""Create or retrieve an entity node with ontology validation"""
|
||||
generated_node_id = generate_node_id(node_id)
|
||||
generated_node_name = generate_node_name(node_name)
|
||||
entity_node_key = _create_node_key(generated_node_id, "entity")
|
||||
|
||||
if entity_node_key in added_nodes_map or entity_node_key in key_mapping:
|
||||
return added_nodes_map.get(entity_node_key) or added_nodes_map.get(
|
||||
key_mapping.get(entity_node_key)
|
||||
)
|
||||
|
||||
# Get ontology validation
|
||||
ontology_nodes, ontology_edges, start_ent_ont = ontology_resolver.get_subgraph(
|
||||
node_name=generated_node_name, node_type="individuals"
|
||||
)
|
||||
|
||||
ontology_validated = bool(start_ent_ont)
|
||||
|
||||
if ontology_validated:
|
||||
old_key = entity_node_key
|
||||
generated_node_id = generate_node_id(start_ent_ont.name)
|
||||
entity_node_key = _create_node_key(generated_node_id, "entity")
|
||||
new_node_name = generate_node_name(start_ent_ont.name)
|
||||
|
||||
name_mapping[generated_node_name] = start_ent_ont.name
|
||||
key_mapping[old_key] = entity_node_key
|
||||
generated_node_name = new_node_name
|
||||
|
||||
entity_node = Entity(
|
||||
id=generated_node_id,
|
||||
name=generated_node_name,
|
||||
is_a=type_node,
|
||||
description=node_description,
|
||||
ontology_valid=ontology_validated,
|
||||
belongs_to_set=data_chunk.belongs_to_set,
|
||||
)
|
||||
|
||||
added_nodes_map[entity_node_key] = entity_node
|
||||
|
||||
# Process ontology nodes and edges
|
||||
_process_ontology_nodes(ontology_nodes, data_chunk, added_nodes_map, added_ontology_nodes_map)
|
||||
_process_ontology_edges(ontology_edges, existing_edges_map, ontology_relationships)
|
||||
|
||||
return entity_node
|
||||
|
||||
|
||||
def _process_graph_nodes(
|
||||
data_chunk: DocumentChunk,
|
||||
graph: KnowledgeGraph,
|
||||
ontology_resolver: OntologyResolver,
|
||||
added_nodes_map: dict,
|
||||
added_ontology_nodes_map: dict,
|
||||
name_mapping: dict,
|
||||
key_mapping: dict,
|
||||
existing_edges_map: dict,
|
||||
ontology_relationships: list,
|
||||
) -> None:
|
||||
"""Process nodes in a knowledge graph"""
|
||||
for node in graph.nodes:
|
||||
# Create type node
|
||||
type_node = _create_type_node(
|
||||
node.type,
|
||||
ontology_resolver,
|
||||
added_nodes_map,
|
||||
added_ontology_nodes_map,
|
||||
name_mapping,
|
||||
key_mapping,
|
||||
data_chunk,
|
||||
existing_edges_map,
|
||||
ontology_relationships,
|
||||
)
|
||||
|
||||
# Create entity node
|
||||
entity_node = _create_entity_node(
|
||||
node.id,
|
||||
node.name,
|
||||
node.description,
|
||||
type_node,
|
||||
ontology_resolver,
|
||||
added_nodes_map,
|
||||
added_ontology_nodes_map,
|
||||
name_mapping,
|
||||
key_mapping,
|
||||
data_chunk,
|
||||
existing_edges_map,
|
||||
ontology_relationships,
|
||||
)
|
||||
|
||||
# Add entity to data chunk
|
||||
if data_chunk.contains is None:
|
||||
data_chunk.contains = []
|
||||
data_chunk.contains.append(entity_node)
|
||||
|
||||
|
||||
def _process_graph_edges(
|
||||
graph: KnowledgeGraph, name_mapping: dict, existing_edges_map: dict, relationships: list
|
||||
) -> None:
|
||||
"""Process edges in a knowledge graph"""
|
||||
for edge in graph.edges:
|
||||
# Apply name mapping if exists
|
||||
source_id = name_mapping.get(edge.source_node_id, edge.source_node_id)
|
||||
target_id = name_mapping.get(edge.target_node_id, edge.target_node_id)
|
||||
|
||||
source_node_id = generate_node_id(source_id)
|
||||
target_node_id = generate_node_id(target_id)
|
||||
relationship_name = generate_edge_name(edge.relationship_name)
|
||||
edge_key = _create_edge_key(source_node_id, target_node_id, relationship_name)
|
||||
|
||||
if edge_key not in existing_edges_map:
|
||||
relationships.append(
|
||||
(
|
||||
source_node_id,
|
||||
target_node_id,
|
||||
relationship_name,
|
||||
{
|
||||
"relationship_name": relationship_name,
|
||||
"source_node_id": source_node_id,
|
||||
"target_node_id": target_node_id,
|
||||
"ontology_valid": False,
|
||||
},
|
||||
)
|
||||
)
|
||||
existing_edges_map[edge_key] = True
|
||||
|
||||
|
||||
def expand_with_nodes_and_edges(
|
||||
data_chunks: list[DocumentChunk],
|
||||
chunk_graphs: list[KnowledgeGraph],
|
||||
ontology_resolver: OntologyResolver = None,
|
||||
existing_edges_map: Optional[dict[str, bool]] = None,
|
||||
):
|
||||
"""
|
||||
Expand data chunks with nodes and edges, validating against ontology.
|
||||
"""
|
||||
if existing_edges_map is None:
|
||||
existing_edges_map = {}
|
||||
|
||||
|
|
@ -27,218 +293,31 @@ def expand_with_nodes_and_edges(
|
|||
added_ontology_nodes_map = {}
|
||||
relationships = []
|
||||
ontology_relationships = []
|
||||
|
||||
name_mapping = {}
|
||||
key_mapping = {}
|
||||
|
||||
# Process each chunk and its corresponding graph
|
||||
for data_chunk, graph in zip(data_chunks, chunk_graphs):
|
||||
if not graph:
|
||||
continue
|
||||
|
||||
for node in graph.nodes:
|
||||
node_id = generate_node_id(node.id)
|
||||
node_name = generate_node_name(node.name)
|
||||
type_node_id = generate_node_id(node.type)
|
||||
type_node_name = generate_node_name(node.type)
|
||||
# Process nodes first
|
||||
_process_graph_nodes(
|
||||
data_chunk,
|
||||
graph,
|
||||
ontology_resolver,
|
||||
added_nodes_map,
|
||||
added_ontology_nodes_map,
|
||||
name_mapping,
|
||||
key_mapping,
|
||||
existing_edges_map,
|
||||
ontology_relationships,
|
||||
)
|
||||
|
||||
ontology_validated_source_type = False
|
||||
ontology_validated_source_ent = False
|
||||
|
||||
type_node_key = f"{type_node_id}_type"
|
||||
|
||||
if type_node_key not in added_nodes_map and type_node_key not in key_mapping:
|
||||
(
|
||||
ontology_entity_type_nodes,
|
||||
ontology_entity_type_edges,
|
||||
ontology_closest_class_node,
|
||||
) = ontology_resolver.get_subgraph(node_name=type_node_name, node_type="classes")
|
||||
|
||||
if ontology_closest_class_node:
|
||||
name_mapping[type_node_name] = ontology_closest_class_node.name
|
||||
ontology_validated_source_type = True
|
||||
old_key = type_node_key
|
||||
type_node_id = generate_node_id(ontology_closest_class_node.name)
|
||||
type_node_key = f"{type_node_id}_type"
|
||||
type_node_name = generate_node_name(ontology_closest_class_node.name)
|
||||
key_mapping[old_key] = type_node_key
|
||||
|
||||
type_node = EntityType(
|
||||
id=type_node_id,
|
||||
name=type_node_name,
|
||||
type=type_node_name,
|
||||
description=type_node_name,
|
||||
ontology_valid=ontology_validated_source_type,
|
||||
)
|
||||
added_nodes_map[type_node_key] = type_node
|
||||
|
||||
for ontology_node_to_store in ontology_entity_type_nodes:
|
||||
ont_node_id = generate_node_id(ontology_node_to_store.name)
|
||||
ont_node_name = generate_node_name(ontology_node_to_store.name)
|
||||
|
||||
if ontology_node_to_store.category == "classes":
|
||||
ont_node_key = f"{ont_node_id}_type"
|
||||
if (ont_node_key not in added_nodes_map) and (
|
||||
ont_node_key not in added_ontology_nodes_map
|
||||
):
|
||||
added_ontology_nodes_map[ont_node_key] = EntityType(
|
||||
id=ont_node_id,
|
||||
name=ont_node_name,
|
||||
description=ont_node_name,
|
||||
ontology_valid=True,
|
||||
)
|
||||
|
||||
elif ontology_node_to_store.category == "individuals":
|
||||
ont_node_key = f"{ont_node_id}_entity"
|
||||
if (ont_node_key not in added_nodes_map) and (
|
||||
ont_node_key not in added_ontology_nodes_map
|
||||
):
|
||||
added_ontology_nodes_map[ont_node_key] = Entity(
|
||||
id=ont_node_id,
|
||||
name=ont_node_name,
|
||||
description=ont_node_name,
|
||||
ontology_valid=True,
|
||||
belongs_to_set=data_chunk.belongs_to_set,
|
||||
)
|
||||
|
||||
for source, relation, target in ontology_entity_type_edges:
|
||||
source_node_id = generate_node_id(source)
|
||||
target_node_id = generate_node_id(target)
|
||||
relationship_name = generate_edge_name(relation)
|
||||
edge_key = f"{source_node_id}_{target_node_id}_{relationship_name}"
|
||||
|
||||
if edge_key not in existing_edges_map:
|
||||
ontology_relationships.append(
|
||||
(
|
||||
source_node_id,
|
||||
target_node_id,
|
||||
relationship_name,
|
||||
dict(
|
||||
relationship_name=relationship_name,
|
||||
source_node_id=source_node_id,
|
||||
target_node_id=target_node_id,
|
||||
),
|
||||
)
|
||||
)
|
||||
existing_edges_map[edge_key] = True
|
||||
else:
|
||||
type_node = added_nodes_map.get(type_node_key) or added_nodes_map.get(
|
||||
key_mapping.get(type_node_key)
|
||||
)
|
||||
|
||||
entity_node_key = f"{node_id}_entity"
|
||||
|
||||
if entity_node_key not in added_nodes_map and entity_node_key not in key_mapping:
|
||||
ontology_entity_nodes, ontology_entity_edges, start_ent_ont = (
|
||||
ontology_resolver.get_subgraph(node_name=node_name, node_type="individuals")
|
||||
)
|
||||
|
||||
if start_ent_ont:
|
||||
name_mapping[node_name] = start_ent_ont.name
|
||||
ontology_validated_source_ent = True
|
||||
old_key = entity_node_key
|
||||
node_id = generate_node_id(start_ent_ont.name)
|
||||
entity_node_key = f"{node_id}_entity"
|
||||
node_name = generate_node_name(start_ent_ont.name)
|
||||
key_mapping[old_key] = entity_node_key
|
||||
|
||||
entity_node = Entity(
|
||||
id=node_id,
|
||||
name=node_name,
|
||||
is_a=type_node,
|
||||
description=node.description,
|
||||
ontology_valid=ontology_validated_source_ent,
|
||||
belongs_to_set=data_chunk.belongs_to_set,
|
||||
)
|
||||
|
||||
added_nodes_map[entity_node_key] = entity_node
|
||||
|
||||
for ontology_node_to_store in ontology_entity_nodes:
|
||||
ont_node_id = generate_node_id(ontology_node_to_store.name)
|
||||
ont_node_name = generate_node_name(ontology_node_to_store.name)
|
||||
|
||||
if ontology_node_to_store.category == "classes":
|
||||
ont_node_key = f"{ont_node_id}_type"
|
||||
if (ont_node_key not in added_nodes_map) and (
|
||||
ont_node_key not in added_ontology_nodes_map
|
||||
):
|
||||
added_ontology_nodes_map[ont_node_key] = EntityType(
|
||||
id=ont_node_id,
|
||||
name=ont_node_name,
|
||||
description=ont_node_name,
|
||||
ontology_valid=True,
|
||||
)
|
||||
|
||||
elif ontology_node_to_store.category == "individuals":
|
||||
ont_node_key = f"{ont_node_id}_entity"
|
||||
if (ont_node_key not in added_nodes_map) and (
|
||||
ont_node_key not in added_ontology_nodes_map
|
||||
):
|
||||
added_ontology_nodes_map[ont_node_key] = Entity(
|
||||
id=ont_node_id,
|
||||
name=ont_node_name,
|
||||
description=ont_node_name,
|
||||
ontology_valid=True,
|
||||
belongs_to_set=data_chunk.belongs_to_set,
|
||||
)
|
||||
|
||||
for source, relation, target in ontology_entity_edges:
|
||||
source_node_id = generate_node_id(source)
|
||||
target_node_id = generate_node_id(target)
|
||||
relationship_name = generate_edge_name(relation)
|
||||
edge_key = f"{source_node_id}_{target_node_id}_{relationship_name}"
|
||||
|
||||
if edge_key not in existing_edges_map:
|
||||
ontology_relationships.append(
|
||||
(
|
||||
source_node_id,
|
||||
target_node_id,
|
||||
relationship_name,
|
||||
dict(
|
||||
relationship_name=relationship_name,
|
||||
source_node_id=source_node_id,
|
||||
target_node_id=target_node_id,
|
||||
ontology_valid=True,
|
||||
),
|
||||
)
|
||||
)
|
||||
existing_edges_map[edge_key] = True
|
||||
|
||||
else:
|
||||
entity_node = added_nodes_map.get(entity_node_key) or added_nodes_map.get(
|
||||
key_mapping.get(entity_node_key)
|
||||
)
|
||||
|
||||
if data_chunk.contains is None:
|
||||
data_chunk.contains = []
|
||||
|
||||
data_chunk.contains.append(entity_node)
|
||||
|
||||
for edge in graph.edges:
|
||||
source_node_id = generate_node_id(
|
||||
name_mapping.get(edge.source_node_id, edge.source_node_id)
|
||||
)
|
||||
target_node_id = generate_node_id(
|
||||
name_mapping.get(edge.target_node_id, edge.target_node_id)
|
||||
)
|
||||
relationship_name = generate_edge_name(edge.relationship_name)
|
||||
edge_key = f"{source_node_id}_{target_node_id}_{relationship_name}"
|
||||
|
||||
if edge_key not in existing_edges_map:
|
||||
relationships.append(
|
||||
(
|
||||
source_node_id,
|
||||
target_node_id,
|
||||
relationship_name,
|
||||
dict(
|
||||
relationship_name=relationship_name,
|
||||
source_node_id=source_node_id,
|
||||
target_node_id=target_node_id,
|
||||
ontology_valid=False,
|
||||
),
|
||||
)
|
||||
)
|
||||
existing_edges_map[edge_key] = True
|
||||
# Then process edges
|
||||
_process_graph_edges(graph, name_mapping, existing_edges_map, relationships)
|
||||
|
||||
# Return combined results
|
||||
graph_nodes = list(added_ontology_nodes_map.values())
|
||||
graph_edges = relationships + ontology_relationships
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import os
|
||||
from typing import Union, BinaryIO, Any
|
||||
|
||||
from cognee.modules.ingestion.exceptions import IngestionError
|
||||
|
|
@ -20,7 +21,8 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any], datase
|
|||
file_path = data_item
|
||||
# data is a file path
|
||||
elif data_item.startswith("file://") or data_item.startswith("/"):
|
||||
# TODO: Add check if ACCEPT_LOCAL_FILE_PATH is enabled, if it's not raise an error
|
||||
if os.getenv("ACCEPT_LOCAL_FILE_PATH", "true").lower() == "false":
|
||||
raise IngestionError(message="Local files are not accepted.")
|
||||
file_path = data_item.replace("file://", "")
|
||||
# data is text
|
||||
else:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue