Compare commits
2 commits
main
...
node_edges
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c486472194 | ||
|
|
602aecba43 |
7 changed files with 1048 additions and 311 deletions
|
|
@ -5,11 +5,12 @@ import json
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from webbrowser import Error
|
from webbrowser import Error
|
||||||
|
from typing import List, Dict, Any, Optional, Tuple, Type, Union
|
||||||
|
|
||||||
from falkordb import FalkorDB
|
from falkordb import FalkorDB
|
||||||
|
|
||||||
from cognee.exceptions import InvalidValueError
|
from cognee.exceptions import InvalidValueError
|
||||||
from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
|
from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface, record_graph_changes, NodeData, EdgeData, Node
|
||||||
from cognee.infrastructure.databases.vector.embeddings import EmbeddingEngine
|
from cognee.infrastructure.databases.vector.embeddings import EmbeddingEngine
|
||||||
from cognee.infrastructure.databases.vector.vector_db_interface import VectorDBInterface
|
from cognee.infrastructure.databases.vector.vector_db_interface import VectorDBInterface
|
||||||
from cognee.infrastructure.engine import DataPoint
|
from cognee.infrastructure.engine import DataPoint
|
||||||
|
|
@ -61,6 +62,12 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
- delete_nodes
|
- delete_nodes
|
||||||
- delete_graph
|
- delete_graph
|
||||||
- prune
|
- prune
|
||||||
|
- get_node
|
||||||
|
- get_nodes
|
||||||
|
- get_neighbors
|
||||||
|
- get_graph_metrics
|
||||||
|
- get_document_subgraph
|
||||||
|
- get_degree_one_nodes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
@ -158,6 +165,7 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
return value
|
return value
|
||||||
if (
|
if (
|
||||||
type(value) is list
|
type(value) is list
|
||||||
|
and len(value) > 0
|
||||||
and type(value[0]) is float
|
and type(value[0]) is float
|
||||||
and len(value) == self.embedding_engine.get_vector_size()
|
and len(value) == self.embedding_engine.get_vector_size()
|
||||||
):
|
):
|
||||||
|
|
@ -165,8 +173,12 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
# if type(value) is datetime:
|
# if type(value) is datetime:
|
||||||
# return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%f%z")
|
# return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||||
if type(value) is dict:
|
if type(value) is dict:
|
||||||
return f"'{json.dumps(value)}'"
|
return f"'{json.dumps(value).replace(chr(39), chr(34))}'"
|
||||||
return f"'{value}'"
|
if type(value) is str:
|
||||||
|
# Escape single quotes and handle special characters
|
||||||
|
escaped_value = str(value).replace("'", "\\'").replace('"', '\\"').replace('\n', '\\n').replace('\r', '\\r').replace('\t', '\\t')
|
||||||
|
return f"'{escaped_value}'"
|
||||||
|
return f"'{str(value)}'"
|
||||||
|
|
||||||
return ",".join([f"{key}:{parse_value(value)}" for key, value in properties.items()])
|
return ",".join([f"{key}:{parse_value(value)}" for key, value in properties.items()])
|
||||||
|
|
||||||
|
|
@ -185,13 +197,12 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
Returns:
|
Returns:
|
||||||
--------
|
--------
|
||||||
|
|
||||||
A string containing the query to be executed for the data point.
|
A tuple containing the query string and parameters dictionary.
|
||||||
"""
|
"""
|
||||||
node_label = type(data_point).__name__
|
node_label = type(data_point).__name__
|
||||||
property_names = DataPoint.get_embeddable_property_names(data_point)
|
property_names = DataPoint.get_embeddable_property_names(data_point)
|
||||||
|
|
||||||
node_properties = await self.stringify_properties(
|
properties = {
|
||||||
{
|
|
||||||
**data_point.model_dump(),
|
**data_point.model_dump(),
|
||||||
**(
|
**(
|
||||||
{
|
{
|
||||||
|
|
@ -204,16 +215,59 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
)
|
|
||||||
|
|
||||||
return dedent(
|
# Clean the properties - remove None values and handle special types
|
||||||
|
clean_properties = {}
|
||||||
|
for key, value in properties.items():
|
||||||
|
if value is not None:
|
||||||
|
if isinstance(value, UUID):
|
||||||
|
clean_properties[key] = str(value)
|
||||||
|
elif isinstance(value, dict):
|
||||||
|
clean_properties[key] = json.dumps(value)
|
||||||
|
elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], float):
|
||||||
|
# This is likely a vector - convert to string representation
|
||||||
|
clean_properties[key] = f"vecf32({value})"
|
||||||
|
else:
|
||||||
|
clean_properties[key] = value
|
||||||
|
|
||||||
|
query = dedent(
|
||||||
f"""
|
f"""
|
||||||
MERGE (node:{node_label} {{id: '{str(data_point.id)}'}})
|
MERGE (node:{node_label} {{id: $node_id}})
|
||||||
ON CREATE SET node += ({{{node_properties}}}), node.updated_at = timestamp()
|
SET node += $properties, node.updated_at = timestamp()
|
||||||
ON MATCH SET node += ({{{node_properties}}}), node.updated_at = timestamp()
|
|
||||||
"""
|
"""
|
||||||
).strip()
|
).strip()
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"node_id": str(data_point.id),
|
||||||
|
"properties": clean_properties
|
||||||
|
}
|
||||||
|
|
||||||
|
return query, params
|
||||||
|
|
||||||
|
def sanitize_relationship_name(self, relationship_name: str) -> str:
|
||||||
|
"""
|
||||||
|
Sanitize relationship name to be valid for Cypher queries.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
- relationship_name (str): The original relationship name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
--------
|
||||||
|
- str: A sanitized relationship name valid for Cypher
|
||||||
|
"""
|
||||||
|
# Replace hyphens, spaces, and other special characters with underscores
|
||||||
|
import re
|
||||||
|
sanitized = re.sub(r'[^\w]', '_', relationship_name)
|
||||||
|
# Remove consecutive underscores
|
||||||
|
sanitized = re.sub(r'_+', '_', sanitized)
|
||||||
|
# Remove leading/trailing underscores
|
||||||
|
sanitized = sanitized.strip('_')
|
||||||
|
# Ensure it starts with a letter or underscore
|
||||||
|
if sanitized and not sanitized[0].isalpha() and sanitized[0] != '_':
|
||||||
|
sanitized = '_' + sanitized
|
||||||
|
return sanitized or 'RELATIONSHIP'
|
||||||
|
|
||||||
async def create_edge_query(self, edge: tuple[str, str, str, dict]) -> str:
|
async def create_edge_query(self, edge: tuple[str, str, str, dict]) -> str:
|
||||||
"""
|
"""
|
||||||
Generate a query to create or update an edge between two nodes in the graph.
|
Generate a query to create or update an edge between two nodes in the graph.
|
||||||
|
|
@ -229,14 +283,19 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
|
|
||||||
- str: A string containing the query to be executed for creating the edge.
|
- str: A string containing the query to be executed for creating the edge.
|
||||||
"""
|
"""
|
||||||
properties = await self.stringify_properties(edge[3])
|
# Sanitize the relationship name for Cypher compatibility
|
||||||
|
sanitized_relationship = self.sanitize_relationship_name(edge[2])
|
||||||
|
|
||||||
|
# Add the original relationship name to properties
|
||||||
|
edge_properties = {**edge[3], "relationship_name": edge[2]}
|
||||||
|
properties = await self.stringify_properties(edge_properties)
|
||||||
properties = f"{{{properties}}}"
|
properties = f"{{{properties}}}"
|
||||||
|
|
||||||
return dedent(
|
return dedent(
|
||||||
f"""
|
f"""
|
||||||
MERGE (source {{id:'{edge[0]}'}})
|
MERGE (source {{id:'{edge[0]}'}})
|
||||||
MERGE (target {{id: '{edge[1]}'}})
|
MERGE (target {{id: '{edge[1]}'}})
|
||||||
MERGE (source)-[edge:{edge[2]} {properties}]->(target)
|
MERGE (source)-[edge:{sanitized_relationship} {properties}]->(target)
|
||||||
ON MATCH SET edge.updated_at = timestamp()
|
ON MATCH SET edge.updated_at = timestamp()
|
||||||
ON CREATE SET edge.updated_at = timestamp()
|
ON CREATE SET edge.updated_at = timestamp()
|
||||||
"""
|
"""
|
||||||
|
|
@ -302,21 +361,16 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
|
|
||||||
vectorized_values = await self.embed_data(embeddable_values)
|
vectorized_values = await self.embed_data(embeddable_values)
|
||||||
|
|
||||||
queries = [
|
for data_point in data_points:
|
||||||
await self.create_data_point_query(
|
vectorized_data = [
|
||||||
data_point,
|
|
||||||
[
|
|
||||||
vectorized_values[vector_map[str(data_point.id)][property_name]]
|
vectorized_values[vector_map[str(data_point.id)][property_name]]
|
||||||
if vector_map[str(data_point.id)][property_name] is not None
|
if vector_map[str(data_point.id)][property_name] is not None
|
||||||
else None
|
else None
|
||||||
for property_name in DataPoint.get_embeddable_property_names(data_point)
|
for property_name in DataPoint.get_embeddable_property_names(data_point)
|
||||||
],
|
|
||||||
)
|
|
||||||
for data_point in data_points
|
|
||||||
]
|
]
|
||||||
|
|
||||||
for query in queries:
|
query, params = await self.create_data_point_query(data_point, vectorized_data)
|
||||||
self.query(query)
|
self.query(query, params)
|
||||||
|
|
||||||
async def create_vector_index(self, index_name: str, index_property_name: str):
|
async def create_vector_index(self, index_name: str, index_property_name: str):
|
||||||
"""
|
"""
|
||||||
|
|
@ -383,7 +437,83 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def add_node(self, node: DataPoint):
|
async def add_node(self, node_id: str, properties: Dict[str, Any]) -> None:
|
||||||
|
"""
|
||||||
|
Add a single node with specified properties to the graph.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- node_id (str): Unique identifier for the node being added.
|
||||||
|
- properties (Dict[str, Any]): A dictionary of properties associated with the node.
|
||||||
|
"""
|
||||||
|
# Clean the properties - remove None values and handle special types
|
||||||
|
clean_properties = {"id": node_id}
|
||||||
|
for key, value in properties.items():
|
||||||
|
if value is not None:
|
||||||
|
if isinstance(value, UUID):
|
||||||
|
clean_properties[key] = str(value)
|
||||||
|
elif isinstance(value, dict):
|
||||||
|
clean_properties[key] = json.dumps(value)
|
||||||
|
elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], float):
|
||||||
|
# This is likely a vector - convert to string representation
|
||||||
|
clean_properties[key] = f"vecf32({value})"
|
||||||
|
else:
|
||||||
|
clean_properties[key] = value
|
||||||
|
|
||||||
|
query = "MERGE (node {id: $node_id}) SET node += $properties, node.updated_at = timestamp()"
|
||||||
|
params = {
|
||||||
|
"node_id": node_id,
|
||||||
|
"properties": clean_properties
|
||||||
|
}
|
||||||
|
|
||||||
|
self.query(query, params)
|
||||||
|
|
||||||
|
# Keep the original create_data_points method for VectorDBInterface compatibility
|
||||||
|
async def create_data_points(self, data_points: list[DataPoint]):
|
||||||
|
"""
|
||||||
|
Add a list of data points to the graph database via batching.
|
||||||
|
|
||||||
|
Can raise exceptions if there are issues during the database operations.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- data_points (list[DataPoint]): A list of DataPoint instances to be inserted into
|
||||||
|
the database.
|
||||||
|
"""
|
||||||
|
embeddable_values = []
|
||||||
|
vector_map = {}
|
||||||
|
|
||||||
|
for data_point in data_points:
|
||||||
|
property_names = DataPoint.get_embeddable_property_names(data_point)
|
||||||
|
key = str(data_point.id)
|
||||||
|
vector_map[key] = {}
|
||||||
|
|
||||||
|
for property_name in property_names:
|
||||||
|
property_value = getattr(data_point, property_name, None)
|
||||||
|
|
||||||
|
if property_value is not None:
|
||||||
|
vector_map[key][property_name] = len(embeddable_values)
|
||||||
|
embeddable_values.append(property_value)
|
||||||
|
else:
|
||||||
|
vector_map[key][property_name] = None
|
||||||
|
|
||||||
|
vectorized_values = await self.embed_data(embeddable_values)
|
||||||
|
|
||||||
|
for data_point in data_points:
|
||||||
|
vectorized_data = [
|
||||||
|
vectorized_values[vector_map[str(data_point.id)][property_name]]
|
||||||
|
if vector_map[str(data_point.id)][property_name] is not None
|
||||||
|
else None
|
||||||
|
for property_name in DataPoint.get_embeddable_property_names(data_point)
|
||||||
|
]
|
||||||
|
|
||||||
|
query, params = await self.create_data_point_query(data_point, vectorized_data)
|
||||||
|
self.query(query, params)
|
||||||
|
|
||||||
|
# Helper methods for DataPoint compatibility
|
||||||
|
async def add_data_point_node(self, node: DataPoint):
|
||||||
"""
|
"""
|
||||||
Add a single data point as a node in the graph.
|
Add a single data point as a node in the graph.
|
||||||
|
|
||||||
|
|
@ -394,7 +524,7 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
"""
|
"""
|
||||||
await self.create_data_points([node])
|
await self.create_data_points([node])
|
||||||
|
|
||||||
async def add_nodes(self, nodes: list[DataPoint]):
|
async def add_data_point_nodes(self, nodes: list[DataPoint]):
|
||||||
"""
|
"""
|
||||||
Add multiple data points as nodes in the graph.
|
Add multiple data points as nodes in the graph.
|
||||||
|
|
||||||
|
|
@ -405,35 +535,72 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
"""
|
"""
|
||||||
await self.create_data_points(nodes)
|
await self.create_data_points(nodes)
|
||||||
|
|
||||||
async def add_edge(self, edge: tuple[str, str, str, dict]):
|
@record_graph_changes
|
||||||
|
async def add_nodes(self, nodes: Union[List[Node], List[DataPoint]]) -> None:
|
||||||
"""
|
"""
|
||||||
Add an edge between two existing nodes in the graph based on the provided details.
|
Add multiple nodes to the graph in a single operation.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
- edge (tuple[str, str, str, dict]): A tuple containing details of the edge to be
|
- nodes (Union[List[Node], List[DataPoint]]): A list of Node tuples or DataPoint objects to be added to the graph.
|
||||||
added.
|
|
||||||
"""
|
"""
|
||||||
query = await self.create_edge_query(edge)
|
for node in nodes:
|
||||||
|
if isinstance(node, tuple) and len(node) == 2:
|
||||||
|
# Node is in (node_id, properties) format
|
||||||
|
node_id, properties = node
|
||||||
|
await self.add_node(node_id, properties)
|
||||||
|
elif hasattr(node, 'id') and hasattr(node, 'model_dump'):
|
||||||
|
# Node is a DataPoint object
|
||||||
|
await self.add_node(str(node.id), node.model_dump())
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid node format: {node}. Expected tuple (node_id, properties) or DataPoint object.")
|
||||||
|
|
||||||
self.query(query)
|
async def add_edge(
|
||||||
|
self,
|
||||||
async def add_edges(self, edges: list[tuple[str, str, str, dict]]):
|
source_id: str,
|
||||||
|
target_id: str,
|
||||||
|
relationship_name: str,
|
||||||
|
properties: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Add multiple edges to the graph in a batch operation.
|
Create a new edge between two nodes in the graph.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
- edges (list[tuple[str, str, str, dict]]): A list of tuples, each containing
|
- source_id (str): The unique identifier of the source node.
|
||||||
details of the edges to be added.
|
- target_id (str): The unique identifier of the target node.
|
||||||
|
- relationship_name (str): The name of the relationship to be established by the
|
||||||
|
edge.
|
||||||
|
- properties (Optional[Dict[str, Any]]): Optional dictionary of properties
|
||||||
|
associated with the edge. (default None)
|
||||||
"""
|
"""
|
||||||
queries = [await self.create_edge_query(edge) for edge in edges]
|
if properties is None:
|
||||||
|
properties = {}
|
||||||
|
|
||||||
for query in queries:
|
edge_tuple = (source_id, target_id, relationship_name, properties)
|
||||||
|
query = await self.create_edge_query(edge_tuple)
|
||||||
self.query(query)
|
self.query(query)
|
||||||
|
|
||||||
|
@record_graph_changes
|
||||||
|
async def add_edges(self, edges: List[EdgeData]) -> None:
|
||||||
|
"""
|
||||||
|
Add multiple edges to the graph in a single operation.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- edges (List[EdgeData]): A list of EdgeData objects representing edges to be added.
|
||||||
|
"""
|
||||||
|
for edge in edges:
|
||||||
|
if isinstance(edge, tuple) and len(edge) == 4:
|
||||||
|
# Edge is in (source_id, target_id, relationship_name, properties) format
|
||||||
|
source_id, target_id, relationship_name, properties = edge
|
||||||
|
await self.add_edge(source_id, target_id, relationship_name, properties)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid edge format: {edge}. Expected tuple (source_id, target_id, relationship_name, properties).")
|
||||||
|
|
||||||
async def has_edges(self, edges):
|
async def has_edges(self, edges):
|
||||||
"""
|
"""
|
||||||
Check if the specified edges exist in the graph based on their attributes.
|
Check if the specified edges exist in the graph based on their attributes.
|
||||||
|
|
@ -446,31 +613,14 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
Returns:
|
Returns:
|
||||||
--------
|
--------
|
||||||
|
|
||||||
Returns a list of boolean values indicating the existence of each edge.
|
Returns a list of edge tuples that exist in the graph.
|
||||||
"""
|
"""
|
||||||
query = dedent(
|
existing_edges = []
|
||||||
"""
|
for edge in edges:
|
||||||
UNWIND $edges AS edge
|
exists = await self.has_edge(str(edge[0]), str(edge[1]), edge[2])
|
||||||
MATCH (a)-[r]->(b)
|
if exists:
|
||||||
WHERE id(a) = edge.from_node AND id(b) = edge.to_node AND type(r) = edge.relationship_name
|
existing_edges.append(edge)
|
||||||
RETURN edge.from_node AS from_node, edge.to_node AS to_node, edge.relationship_name AS relationship_name, count(r) > 0 AS edge_exists
|
return existing_edges
|
||||||
"""
|
|
||||||
).strip()
|
|
||||||
|
|
||||||
params = {
|
|
||||||
"edges": [
|
|
||||||
{
|
|
||||||
"from_node": str(edge[0]),
|
|
||||||
"to_node": str(edge[1]),
|
|
||||||
"relationship_name": edge[2],
|
|
||||||
}
|
|
||||||
for edge in edges
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
results = self.query(query, params).result_set
|
|
||||||
|
|
||||||
return [result["edge_exists"] for result in results]
|
|
||||||
|
|
||||||
async def retrieve(self, data_point_ids: list[UUID]):
|
async def retrieve(self, data_point_ids: list[UUID]):
|
||||||
"""
|
"""
|
||||||
|
|
@ -607,22 +757,38 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
if query_text and not query_vector:
|
if query_text and not query_vector:
|
||||||
query_vector = (await self.embed_data([query_text]))[0]
|
query_vector = (await self.embed_data([query_text]))[0]
|
||||||
|
|
||||||
|
# For FalkorDB, let's do a simple property-based search instead of vector search for now
|
||||||
|
# since the vector index might not be set up correctly
|
||||||
|
if "." in collection_name:
|
||||||
[label, attribute_name] = collection_name.split(".")
|
[label, attribute_name] = collection_name.split(".")
|
||||||
|
else:
|
||||||
|
# If no dot, treat the whole thing as a property search
|
||||||
|
label = ""
|
||||||
|
attribute_name = collection_name
|
||||||
|
|
||||||
query = dedent(
|
# Simple text-based search if we have query_text
|
||||||
f"""
|
if query_text:
|
||||||
CALL db.idx.vector.queryNodes(
|
if label:
|
||||||
'{label}',
|
query = f"""
|
||||||
'{attribute_name}',
|
MATCH (n:{label})
|
||||||
{limit},
|
WHERE toLower(toString(n.{attribute_name})) CONTAINS toLower($query_text)
|
||||||
vecf32({query_vector})
|
RETURN n, 1.0 as score
|
||||||
) YIELD node, score
|
LIMIT $limit
|
||||||
|
"""
|
||||||
|
else:
|
||||||
|
query = f"""
|
||||||
|
MATCH (n)
|
||||||
|
WHERE toLower(toString(n.{attribute_name})) CONTAINS toLower($query_text)
|
||||||
|
RETURN n, 1.0 as score
|
||||||
|
LIMIT $limit
|
||||||
"""
|
"""
|
||||||
).strip()
|
|
||||||
|
|
||||||
result = self.query(query)
|
|
||||||
|
|
||||||
|
params = {"query_text": query_text, "limit": limit}
|
||||||
|
result = self.query(query, params)
|
||||||
return result.result_set
|
return result.result_set
|
||||||
|
else:
|
||||||
|
# For vector search, return empty for now since vector indexing needs proper setup
|
||||||
|
return []
|
||||||
|
|
||||||
async def batch_search(
|
async def batch_search(
|
||||||
self,
|
self,
|
||||||
|
|
@ -726,37 +892,29 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
async def delete_node(self, collection_name: str, data_point_id: str):
|
async def delete_node(self, node_id: str) -> None:
|
||||||
"""
|
"""
|
||||||
Delete a single node specified by its data point ID from the database.
|
Delete a specified node from the graph by its ID.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
- collection_name (str): The name of the collection containing the node to be
|
- node_id (str): Unique identifier for the node to delete.
|
||||||
deleted.
|
|
||||||
- data_point_id (str): The ID of the data point to delete.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
Returns the result of the deletion operation from the database.
|
|
||||||
"""
|
"""
|
||||||
return await self.delete_data_points([data_point_id])
|
query = f"MATCH (node {{id: '{node_id}'}}) DETACH DELETE node"
|
||||||
|
self.query(query)
|
||||||
|
|
||||||
async def delete_nodes(self, collection_name: str, data_point_ids: list[str]):
|
async def delete_nodes(self, node_ids: List[str]) -> None:
|
||||||
"""
|
"""
|
||||||
Delete multiple nodes specified by their IDs from the database.
|
Delete multiple nodes from the graph by their identifiers.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
- collection_name (str): The name of the collection containing the nodes to be
|
- node_ids (List[str]): A list of unique identifiers for the nodes to delete.
|
||||||
deleted.
|
|
||||||
- data_point_ids (list[str]): A list of IDs of the data points to delete from the
|
|
||||||
collection.
|
|
||||||
"""
|
"""
|
||||||
self.delete_data_points(data_point_ids)
|
for node_id in node_ids:
|
||||||
|
await self.delete_node(node_id)
|
||||||
|
|
||||||
async def delete_graph(self):
|
async def delete_graph(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -774,6 +932,309 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error deleting graph: {e}")
|
print(f"Error deleting graph: {e}")
|
||||||
|
|
||||||
|
async def get_node(self, node_id: str) -> Optional[NodeData]:
|
||||||
|
"""
|
||||||
|
Retrieve a single node from the graph using its ID.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- node_id (str): Unique identifier of the node to retrieve.
|
||||||
|
"""
|
||||||
|
result = self.query(
|
||||||
|
"MATCH (node) WHERE node.id = $node_id RETURN node",
|
||||||
|
{"node_id": node_id},
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.result_set and len(result.result_set) > 0:
|
||||||
|
# FalkorDB returns node objects as first element in the result list
|
||||||
|
return result.result_set[0][0].properties
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def get_nodes(self, node_ids: List[str]) -> List[NodeData]:
|
||||||
|
"""
|
||||||
|
Retrieve multiple nodes from the graph using their IDs.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- node_ids (List[str]): A list of unique identifiers for the nodes to retrieve.
|
||||||
|
"""
|
||||||
|
result = self.query(
|
||||||
|
"MATCH (node) WHERE node.id IN $node_ids RETURN node",
|
||||||
|
{"node_ids": node_ids},
|
||||||
|
)
|
||||||
|
|
||||||
|
nodes = []
|
||||||
|
if result.result_set:
|
||||||
|
for record in result.result_set:
|
||||||
|
# FalkorDB returns node objects as first element in each record
|
||||||
|
nodes.append(record[0].properties)
|
||||||
|
return nodes
|
||||||
|
|
||||||
|
async def get_neighbors(self, node_id: str) -> List[NodeData]:
|
||||||
|
"""
|
||||||
|
Get all neighboring nodes connected to the specified node.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- node_id (str): Unique identifier of the node for which to retrieve neighbors.
|
||||||
|
"""
|
||||||
|
result = self.query(
|
||||||
|
"MATCH (node)-[]-(neighbor) WHERE node.id = $node_id RETURN DISTINCT neighbor",
|
||||||
|
{"node_id": node_id},
|
||||||
|
)
|
||||||
|
|
||||||
|
neighbors = []
|
||||||
|
if result.result_set:
|
||||||
|
for record in result.result_set:
|
||||||
|
# FalkorDB returns neighbor objects as first element in each record
|
||||||
|
neighbors.append(record[0].properties)
|
||||||
|
return neighbors
|
||||||
|
|
||||||
|
async def get_edges(self, node_id: str) -> List[EdgeData]:
|
||||||
|
"""
|
||||||
|
Retrieve all edges that are connected to the specified node.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- node_id (str): Unique identifier of the node whose edges are to be retrieved.
|
||||||
|
"""
|
||||||
|
result = self.query(
|
||||||
|
"""
|
||||||
|
MATCH (n)-[r]-(m)
|
||||||
|
WHERE n.id = $node_id
|
||||||
|
RETURN n.id AS source_id, m.id AS target_id, type(r) AS relationship_name, properties(r) AS properties
|
||||||
|
""",
|
||||||
|
{"node_id": node_id},
|
||||||
|
)
|
||||||
|
|
||||||
|
edges = []
|
||||||
|
if result.result_set:
|
||||||
|
for record in result.result_set:
|
||||||
|
# FalkorDB returns values by index: source_id, target_id, relationship_name, properties
|
||||||
|
edges.append((
|
||||||
|
record[0], # source_id
|
||||||
|
record[1], # target_id
|
||||||
|
record[2], # relationship_name
|
||||||
|
record[3] # properties
|
||||||
|
))
|
||||||
|
return edges
|
||||||
|
|
||||||
|
async def has_edge(self, source_id: str, target_id: str, relationship_name: str) -> bool:
|
||||||
|
"""
|
||||||
|
Verify if an edge exists between two specified nodes.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- source_id (str): Unique identifier of the source node.
|
||||||
|
- target_id (str): Unique identifier of the target node.
|
||||||
|
- relationship_name (str): Name of the relationship to verify.
|
||||||
|
"""
|
||||||
|
# Check both the sanitized relationship type and the original name in properties
|
||||||
|
sanitized_relationship = self.sanitize_relationship_name(relationship_name)
|
||||||
|
|
||||||
|
result = self.query(
|
||||||
|
f"""
|
||||||
|
MATCH (source)-[r:{sanitized_relationship}]->(target)
|
||||||
|
WHERE source.id = $source_id AND target.id = $target_id
|
||||||
|
AND (r.relationship_name = $relationship_name OR NOT EXISTS(r.relationship_name))
|
||||||
|
RETURN COUNT(r) > 0 AS edge_exists
|
||||||
|
""",
|
||||||
|
{"source_id": source_id, "target_id": target_id, "relationship_name": relationship_name},
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.result_set and len(result.result_set) > 0:
|
||||||
|
# FalkorDB returns scalar results as a list, access by index instead of key
|
||||||
|
return result.result_set[0][0]
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def get_graph_metrics(self, include_optional: bool = False) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Fetch metrics and statistics of the graph, possibly including optional details.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- include_optional (bool): Flag indicating whether to include optional metrics or
|
||||||
|
not. (default False)
|
||||||
|
"""
|
||||||
|
# Get basic node and edge counts
|
||||||
|
node_result = self.query("MATCH (n) RETURN count(n) AS node_count")
|
||||||
|
edge_result = self.query("MATCH ()-[r]->() RETURN count(r) AS edge_count")
|
||||||
|
|
||||||
|
# FalkorDB returns scalar results as a list, access by index instead of key
|
||||||
|
num_nodes = node_result.result_set[0][0] if node_result.result_set else 0
|
||||||
|
num_edges = edge_result.result_set[0][0] if edge_result.result_set else 0
|
||||||
|
|
||||||
|
metrics = {
|
||||||
|
"num_nodes": num_nodes,
|
||||||
|
"num_edges": num_edges,
|
||||||
|
"mean_degree": (2 * num_edges) / num_nodes if num_nodes > 0 else 0,
|
||||||
|
"edge_density": num_edges / (num_nodes * (num_nodes - 1)) if num_nodes > 1 else 0,
|
||||||
|
"num_connected_components": 1, # Simplified for now
|
||||||
|
"sizes_of_connected_components": [num_nodes] if num_nodes > 0 else [],
|
||||||
|
}
|
||||||
|
|
||||||
|
if include_optional:
|
||||||
|
# Add optional metrics - simplified implementation
|
||||||
|
metrics.update({
|
||||||
|
"num_selfloops": 0, # Simplified
|
||||||
|
"diameter": -1, # Not implemented
|
||||||
|
"avg_shortest_path_length": -1, # Not implemented
|
||||||
|
"avg_clustering": -1, # Not implemented
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
metrics.update({
|
||||||
|
"num_selfloops": -1,
|
||||||
|
"diameter": -1,
|
||||||
|
"avg_shortest_path_length": -1,
|
||||||
|
"avg_clustering": -1,
|
||||||
|
})
|
||||||
|
|
||||||
|
return metrics
|
||||||
|
|
||||||
|
async def get_document_subgraph(self, content_hash: str):
|
||||||
|
"""
|
||||||
|
Get a subgraph related to a specific document by content hash.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- content_hash (str): The content hash of the document to find.
|
||||||
|
"""
|
||||||
|
query = """
|
||||||
|
MATCH (d) WHERE d.id CONTAINS $content_hash
|
||||||
|
OPTIONAL MATCH (d)<-[:CHUNK_OF]-(c)
|
||||||
|
OPTIONAL MATCH (c)-[:HAS_ENTITY]->(e)
|
||||||
|
OPTIONAL MATCH (e)-[:IS_INSTANCE_OF]->(et)
|
||||||
|
RETURN d AS document,
|
||||||
|
COLLECT(DISTINCT c) AS chunks,
|
||||||
|
COLLECT(DISTINCT e) AS orphan_entities,
|
||||||
|
COLLECT(DISTINCT c) AS made_from_nodes,
|
||||||
|
COLLECT(DISTINCT et) AS orphan_types
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = self.query(query, {"content_hash": f"text_{content_hash}"})
|
||||||
|
|
||||||
|
if not result.result_set or not result.result_set[0]:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Convert result to dictionary format
|
||||||
|
# FalkorDB returns values by index: document, chunks, orphan_entities, made_from_nodes, orphan_types
|
||||||
|
record = result.result_set[0]
|
||||||
|
return {
|
||||||
|
"document": record[0],
|
||||||
|
"chunks": record[1],
|
||||||
|
"orphan_entities": record[2],
|
||||||
|
"made_from_nodes": record[3],
|
||||||
|
"orphan_types": record[4],
|
||||||
|
}
|
||||||
|
|
||||||
|
async def get_degree_one_nodes(self, node_type: str):
|
||||||
|
"""
|
||||||
|
Get all nodes that have only one connection.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- node_type (str): The type of nodes to filter by, must be 'Entity' or 'EntityType'.
|
||||||
|
"""
|
||||||
|
if not node_type or node_type not in ["Entity", "EntityType"]:
|
||||||
|
raise ValueError("node_type must be either 'Entity' or 'EntityType'")
|
||||||
|
|
||||||
|
result = self.query(
|
||||||
|
f"""
|
||||||
|
MATCH (n:{node_type})
|
||||||
|
WITH n, COUNT {{ MATCH (n)--() }} as degree
|
||||||
|
WHERE degree = 1
|
||||||
|
RETURN n
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
# FalkorDB returns node objects as first element in each record
|
||||||
|
return [record[0] for record in result.result_set] if result.result_set else []
|
||||||
|
|
||||||
|
async def get_nodeset_subgraph(
|
||||||
|
self, node_type: Type[Any], node_name: List[str]
|
||||||
|
) -> Tuple[List[Tuple[int, dict]], List[Tuple[int, int, str, dict]]]:
|
||||||
|
"""
|
||||||
|
Fetch a subgraph consisting of a specific set of nodes and their relationships.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
-----------
|
||||||
|
|
||||||
|
- node_type (Type[Any]): The type of nodes to include in the subgraph.
|
||||||
|
- node_name (List[str]): A list of names of the nodes to include in the subgraph.
|
||||||
|
"""
|
||||||
|
label = node_type.__name__
|
||||||
|
|
||||||
|
# Find primary nodes of the specified type and names
|
||||||
|
primary_query = f"""
|
||||||
|
UNWIND $names AS wantedName
|
||||||
|
MATCH (n:{label})
|
||||||
|
WHERE n.name = wantedName
|
||||||
|
RETURN DISTINCT n.id, properties(n) AS properties
|
||||||
|
"""
|
||||||
|
|
||||||
|
primary_result = self.query(primary_query, {"names": node_name})
|
||||||
|
if not primary_result.result_set:
|
||||||
|
return [], []
|
||||||
|
|
||||||
|
# FalkorDB returns values by index: id, properties
|
||||||
|
primary_ids = [record[0] for record in primary_result.result_set]
|
||||||
|
|
||||||
|
# Find neighbors of primary nodes
|
||||||
|
neighbor_query = """
|
||||||
|
MATCH (n)-[]-(neighbor)
|
||||||
|
WHERE n.id IN $ids
|
||||||
|
RETURN DISTINCT neighbor.id, properties(neighbor) AS properties
|
||||||
|
"""
|
||||||
|
|
||||||
|
neighbor_result = self.query(neighbor_query, {"ids": primary_ids})
|
||||||
|
# FalkorDB returns values by index: id, properties
|
||||||
|
neighbor_ids = [record[0] for record in neighbor_result.result_set] if neighbor_result.result_set else []
|
||||||
|
|
||||||
|
all_ids = list(set(primary_ids + neighbor_ids))
|
||||||
|
|
||||||
|
# Get all nodes in the subgraph
|
||||||
|
nodes_query = """
|
||||||
|
MATCH (n)
|
||||||
|
WHERE n.id IN $ids
|
||||||
|
RETURN n.id, properties(n) AS properties
|
||||||
|
"""
|
||||||
|
|
||||||
|
nodes_result = self.query(nodes_query, {"ids": all_ids})
|
||||||
|
nodes = []
|
||||||
|
if nodes_result.result_set:
|
||||||
|
for record in nodes_result.result_set:
|
||||||
|
# FalkorDB returns values by index: id, properties
|
||||||
|
nodes.append((record[0], record[1]))
|
||||||
|
|
||||||
|
# Get edges between these nodes
|
||||||
|
edges_query = """
|
||||||
|
MATCH (a)-[r]->(b)
|
||||||
|
WHERE a.id IN $ids AND b.id IN $ids
|
||||||
|
RETURN a.id AS source_id, b.id AS target_id, type(r) AS relationship_name, properties(r) AS properties
|
||||||
|
"""
|
||||||
|
|
||||||
|
edges_result = self.query(edges_query, {"ids": all_ids})
|
||||||
|
edges = []
|
||||||
|
if edges_result.result_set:
|
||||||
|
for record in edges_result.result_set:
|
||||||
|
# FalkorDB returns values by index: source_id, target_id, relationship_name, properties
|
||||||
|
edges.append((
|
||||||
|
record[0], # source_id
|
||||||
|
record[1], # target_id
|
||||||
|
record[2], # relationship_name
|
||||||
|
record[3] # properties
|
||||||
|
))
|
||||||
|
|
||||||
|
return nodes, edges
|
||||||
|
|
||||||
async def prune(self):
|
async def prune(self):
|
||||||
"""
|
"""
|
||||||
Prune the graph by deleting the entire graph structure.
|
Prune the graph by deleting the entire graph structure.
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,202 @@ from cognee.modules.engine.utils import (
|
||||||
from cognee.shared.data_models import KnowledgeGraph
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||||
|
|
||||||
|
# Constants for node key suffixes
|
||||||
|
TYPE_SUFFIX = "_type"
|
||||||
|
ENTITY_SUFFIX = "_entity"
|
||||||
|
|
||||||
|
|
||||||
|
def _create_ontology_node(ont_node, category: str, data_chunk: DocumentChunk):
|
||||||
|
"""Create an ontology node based on category (classes or individuals)."""
|
||||||
|
ont_node_id = generate_node_id(ont_node.name)
|
||||||
|
ont_node_name = generate_node_name(ont_node.name)
|
||||||
|
|
||||||
|
if category == "classes":
|
||||||
|
return f"{ont_node_id}{TYPE_SUFFIX}", EntityType(
|
||||||
|
id=ont_node_id,
|
||||||
|
name=ont_node_name,
|
||||||
|
description=ont_node_name,
|
||||||
|
ontology_valid=True,
|
||||||
|
)
|
||||||
|
elif category == "individuals":
|
||||||
|
return f"{ont_node_id}{ENTITY_SUFFIX}", Entity(
|
||||||
|
id=ont_node_id,
|
||||||
|
name=ont_node_name,
|
||||||
|
description=ont_node_name,
|
||||||
|
ontology_valid=True,
|
||||||
|
belongs_to_set=data_chunk.belongs_to_set,
|
||||||
|
)
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
|
def _process_ontology_nodes(
|
||||||
|
ontology_nodes, data_chunk: DocumentChunk, added_nodes_map: dict, added_ontology_nodes_map: dict
|
||||||
|
):
|
||||||
|
"""Process and add ontology nodes to the appropriate maps."""
|
||||||
|
for ont_node in ontology_nodes:
|
||||||
|
ont_node_key, ont_node_obj = _create_ontology_node(ont_node, ont_node.category, data_chunk)
|
||||||
|
if (
|
||||||
|
ont_node_key
|
||||||
|
and ont_node_obj
|
||||||
|
and ont_node_key not in added_nodes_map
|
||||||
|
and ont_node_key not in added_ontology_nodes_map
|
||||||
|
):
|
||||||
|
added_ontology_nodes_map[ont_node_key] = ont_node_obj
|
||||||
|
|
||||||
|
|
||||||
|
def _process_ontology_edges(
|
||||||
|
ontology_edges,
|
||||||
|
existing_edges_map: dict,
|
||||||
|
ontology_relationships: list,
|
||||||
|
ontology_valid: bool = True,
|
||||||
|
):
|
||||||
|
"""Process ontology edges and add them to relationships if not already existing."""
|
||||||
|
for source, relation, target in ontology_edges:
|
||||||
|
source_node_id = generate_node_id(source)
|
||||||
|
target_node_id = generate_node_id(target)
|
||||||
|
relationship_name = generate_edge_name(relation)
|
||||||
|
edge_key = f"{source_node_id}_{target_node_id}_{relationship_name}"
|
||||||
|
|
||||||
|
if edge_key not in existing_edges_map:
|
||||||
|
ontology_relationships.append(
|
||||||
|
(
|
||||||
|
source_node_id,
|
||||||
|
target_node_id,
|
||||||
|
relationship_name,
|
||||||
|
{
|
||||||
|
"relationship_name": relationship_name,
|
||||||
|
"source_node_id": source_node_id,
|
||||||
|
"target_node_id": target_node_id,
|
||||||
|
"ontology_valid": ontology_valid,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
existing_edges_map[edge_key] = True
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_ontology_mapping(node_name: str, node_type: str, ontology_resolver: OntologyResolver):
|
||||||
|
"""Resolve ontology mapping for a node and return validation result and closest match."""
|
||||||
|
ontology_nodes, ontology_edges, closest_node = ontology_resolver.get_subgraph(
|
||||||
|
node_name=node_name, node_type=node_type
|
||||||
|
)
|
||||||
|
|
||||||
|
ontology_validated = bool(closest_node)
|
||||||
|
mapped_name = closest_node.name if closest_node else node_name
|
||||||
|
|
||||||
|
return ontology_nodes, ontology_edges, ontology_validated, mapped_name
|
||||||
|
|
||||||
|
|
||||||
|
def _get_or_create_type_node(
|
||||||
|
node,
|
||||||
|
data_chunk: DocumentChunk,
|
||||||
|
ontology_resolver: OntologyResolver,
|
||||||
|
added_nodes_map: dict,
|
||||||
|
added_ontology_nodes_map: dict,
|
||||||
|
name_mapping: dict,
|
||||||
|
key_mapping: dict,
|
||||||
|
existing_edges_map: dict,
|
||||||
|
ontology_relationships: list,
|
||||||
|
):
|
||||||
|
"""Get or create a type node with ontology validation."""
|
||||||
|
node_id = generate_node_id(node.id)
|
||||||
|
node_name = generate_node_name(node.name)
|
||||||
|
type_node_id = generate_node_id(node.type)
|
||||||
|
type_node_name = generate_node_name(node.type)
|
||||||
|
type_node_key = f"{type_node_id}{TYPE_SUFFIX}"
|
||||||
|
|
||||||
|
# Check if node already exists
|
||||||
|
if type_node_key in added_nodes_map or type_node_key in key_mapping:
|
||||||
|
return added_nodes_map.get(type_node_key) or added_nodes_map.get(
|
||||||
|
key_mapping.get(type_node_key)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Resolve ontology for type
|
||||||
|
ontology_nodes, ontology_edges, ontology_validated, mapped_type_name = (
|
||||||
|
_resolve_ontology_mapping(type_node_name, "classes", ontology_resolver)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update mappings if ontology validation succeeded
|
||||||
|
if ontology_validated:
|
||||||
|
name_mapping[type_node_name] = mapped_type_name
|
||||||
|
old_key = type_node_key
|
||||||
|
type_node_id = generate_node_id(mapped_type_name)
|
||||||
|
type_node_key = f"{type_node_id}{TYPE_SUFFIX}"
|
||||||
|
type_node_name = generate_node_name(mapped_type_name)
|
||||||
|
key_mapping[old_key] = type_node_key
|
||||||
|
|
||||||
|
# Create type node
|
||||||
|
type_node = EntityType(
|
||||||
|
id=type_node_id,
|
||||||
|
name=type_node_name,
|
||||||
|
type=type_node_name,
|
||||||
|
description=type_node_name,
|
||||||
|
ontology_valid=ontology_validated,
|
||||||
|
)
|
||||||
|
added_nodes_map[type_node_key] = type_node
|
||||||
|
|
||||||
|
# Process ontology nodes and edges
|
||||||
|
_process_ontology_nodes(ontology_nodes, data_chunk, added_nodes_map, added_ontology_nodes_map)
|
||||||
|
_process_ontology_edges(ontology_edges, existing_edges_map, ontology_relationships)
|
||||||
|
|
||||||
|
return type_node
|
||||||
|
|
||||||
|
|
||||||
|
def _get_or_create_entity_node(
|
||||||
|
node,
|
||||||
|
type_node,
|
||||||
|
data_chunk: DocumentChunk,
|
||||||
|
ontology_resolver: OntologyResolver,
|
||||||
|
added_nodes_map: dict,
|
||||||
|
added_ontology_nodes_map: dict,
|
||||||
|
name_mapping: dict,
|
||||||
|
key_mapping: dict,
|
||||||
|
existing_edges_map: dict,
|
||||||
|
ontology_relationships: list,
|
||||||
|
):
|
||||||
|
"""Get or create an entity node with ontology validation."""
|
||||||
|
node_id = generate_node_id(node.id)
|
||||||
|
node_name = generate_node_name(node.name)
|
||||||
|
entity_node_key = f"{node_id}{ENTITY_SUFFIX}"
|
||||||
|
|
||||||
|
# Check if node already exists
|
||||||
|
if entity_node_key in added_nodes_map or entity_node_key in key_mapping:
|
||||||
|
return added_nodes_map.get(entity_node_key) or added_nodes_map.get(
|
||||||
|
key_mapping.get(entity_node_key)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Resolve ontology for entity
|
||||||
|
ontology_nodes, ontology_edges, ontology_validated, mapped_entity_name = (
|
||||||
|
_resolve_ontology_mapping(node_name, "individuals", ontology_resolver)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update mappings if ontology validation succeeded
|
||||||
|
if ontology_validated:
|
||||||
|
name_mapping[node_name] = mapped_entity_name
|
||||||
|
old_key = entity_node_key
|
||||||
|
node_id = generate_node_id(mapped_entity_name)
|
||||||
|
entity_node_key = f"{node_id}{ENTITY_SUFFIX}"
|
||||||
|
node_name = generate_node_name(mapped_entity_name)
|
||||||
|
key_mapping[old_key] = entity_node_key
|
||||||
|
|
||||||
|
# Create entity node
|
||||||
|
entity_node = Entity(
|
||||||
|
id=node_id,
|
||||||
|
name=node_name,
|
||||||
|
is_a=type_node,
|
||||||
|
description=node.description,
|
||||||
|
ontology_valid=ontology_validated,
|
||||||
|
belongs_to_set=data_chunk.belongs_to_set,
|
||||||
|
)
|
||||||
|
added_nodes_map[entity_node_key] = entity_node
|
||||||
|
|
||||||
|
# Process ontology nodes and edges
|
||||||
|
_process_ontology_nodes(ontology_nodes, data_chunk, added_nodes_map, added_ontology_nodes_map)
|
||||||
|
_process_ontology_edges(
|
||||||
|
ontology_edges, existing_edges_map, ontology_relationships, ontology_valid=True
|
||||||
|
)
|
||||||
|
|
||||||
|
return entity_node
|
||||||
|
|
||||||
|
|
||||||
def expand_with_nodes_and_edges(
|
def expand_with_nodes_and_edges(
|
||||||
data_chunks: list[DocumentChunk],
|
data_chunks: list[DocumentChunk],
|
||||||
|
|
@ -17,17 +213,25 @@ def expand_with_nodes_and_edges(
|
||||||
ontology_resolver: OntologyResolver = None,
|
ontology_resolver: OntologyResolver = None,
|
||||||
existing_edges_map: Optional[dict[str, bool]] = None,
|
existing_edges_map: Optional[dict[str, bool]] = None,
|
||||||
):
|
):
|
||||||
if existing_edges_map is None:
|
"""
|
||||||
existing_edges_map = {}
|
Expand chunk graphs with nodes and edges, applying ontology validation.
|
||||||
|
|
||||||
if ontology_resolver is None:
|
Args:
|
||||||
ontology_resolver = OntologyResolver()
|
data_chunks: List of document chunks
|
||||||
|
chunk_graphs: List of knowledge graphs corresponding to chunks
|
||||||
|
ontology_resolver: Optional ontology resolver for validation
|
||||||
|
existing_edges_map: Optional map of existing edges to avoid duplicates
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (graph_nodes, graph_edges)
|
||||||
|
"""
|
||||||
|
existing_edges_map = existing_edges_map or {}
|
||||||
|
ontology_resolver = ontology_resolver or OntologyResolver()
|
||||||
|
|
||||||
added_nodes_map = {}
|
added_nodes_map = {}
|
||||||
added_ontology_nodes_map = {}
|
added_ontology_nodes_map = {}
|
||||||
relationships = []
|
relationships = []
|
||||||
ontology_relationships = []
|
ontology_relationships = []
|
||||||
|
|
||||||
name_mapping = {}
|
name_mapping = {}
|
||||||
key_mapping = {}
|
key_mapping = {}
|
||||||
|
|
||||||
|
|
@ -35,184 +239,41 @@ def expand_with_nodes_and_edges(
|
||||||
if not graph:
|
if not graph:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Process nodes
|
||||||
for node in graph.nodes:
|
for node in graph.nodes:
|
||||||
node_id = generate_node_id(node.id)
|
# Get or create type node
|
||||||
node_name = generate_node_name(node.name)
|
type_node = _get_or_create_type_node(
|
||||||
type_node_id = generate_node_id(node.type)
|
node,
|
||||||
type_node_name = generate_node_name(node.type)
|
data_chunk,
|
||||||
|
ontology_resolver,
|
||||||
ontology_validated_source_type = False
|
added_nodes_map,
|
||||||
ontology_validated_source_ent = False
|
added_ontology_nodes_map,
|
||||||
|
name_mapping,
|
||||||
type_node_key = f"{type_node_id}_type"
|
key_mapping,
|
||||||
|
existing_edges_map,
|
||||||
if type_node_key not in added_nodes_map and type_node_key not in key_mapping:
|
ontology_relationships,
|
||||||
(
|
|
||||||
ontology_entity_type_nodes,
|
|
||||||
ontology_entity_type_edges,
|
|
||||||
ontology_closest_class_node,
|
|
||||||
) = ontology_resolver.get_subgraph(node_name=type_node_name, node_type="classes")
|
|
||||||
|
|
||||||
if ontology_closest_class_node:
|
|
||||||
name_mapping[type_node_name] = ontology_closest_class_node.name
|
|
||||||
ontology_validated_source_type = True
|
|
||||||
old_key = type_node_key
|
|
||||||
type_node_id = generate_node_id(ontology_closest_class_node.name)
|
|
||||||
type_node_key = f"{type_node_id}_type"
|
|
||||||
type_node_name = generate_node_name(ontology_closest_class_node.name)
|
|
||||||
key_mapping[old_key] = type_node_key
|
|
||||||
|
|
||||||
type_node = EntityType(
|
|
||||||
id=type_node_id,
|
|
||||||
name=type_node_name,
|
|
||||||
type=type_node_name,
|
|
||||||
description=type_node_name,
|
|
||||||
ontology_valid=ontology_validated_source_type,
|
|
||||||
)
|
|
||||||
added_nodes_map[type_node_key] = type_node
|
|
||||||
|
|
||||||
for ontology_node_to_store in ontology_entity_type_nodes:
|
|
||||||
ont_node_id = generate_node_id(ontology_node_to_store.name)
|
|
||||||
ont_node_name = generate_node_name(ontology_node_to_store.name)
|
|
||||||
|
|
||||||
if ontology_node_to_store.category == "classes":
|
|
||||||
ont_node_key = f"{ont_node_id}_type"
|
|
||||||
if (ont_node_key not in added_nodes_map) and (
|
|
||||||
ont_node_key not in added_ontology_nodes_map
|
|
||||||
):
|
|
||||||
added_ontology_nodes_map[ont_node_key] = EntityType(
|
|
||||||
id=ont_node_id,
|
|
||||||
name=ont_node_name,
|
|
||||||
description=ont_node_name,
|
|
||||||
ontology_valid=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
elif ontology_node_to_store.category == "individuals":
|
# Get or create entity node
|
||||||
ont_node_key = f"{ont_node_id}_entity"
|
entity_node = _get_or_create_entity_node(
|
||||||
if (ont_node_key not in added_nodes_map) and (
|
node,
|
||||||
ont_node_key not in added_ontology_nodes_map
|
type_node,
|
||||||
):
|
data_chunk,
|
||||||
added_ontology_nodes_map[ont_node_key] = Entity(
|
ontology_resolver,
|
||||||
id=ont_node_id,
|
added_nodes_map,
|
||||||
name=ont_node_name,
|
added_ontology_nodes_map,
|
||||||
description=ont_node_name,
|
name_mapping,
|
||||||
ontology_valid=True,
|
key_mapping,
|
||||||
belongs_to_set=data_chunk.belongs_to_set,
|
existing_edges_map,
|
||||||
)
|
ontology_relationships,
|
||||||
|
|
||||||
for source, relation, target in ontology_entity_type_edges:
|
|
||||||
source_node_id = generate_node_id(source)
|
|
||||||
target_node_id = generate_node_id(target)
|
|
||||||
relationship_name = generate_edge_name(relation)
|
|
||||||
edge_key = f"{source_node_id}_{target_node_id}_{relationship_name}"
|
|
||||||
|
|
||||||
if edge_key not in existing_edges_map:
|
|
||||||
ontology_relationships.append(
|
|
||||||
(
|
|
||||||
source_node_id,
|
|
||||||
target_node_id,
|
|
||||||
relationship_name,
|
|
||||||
dict(
|
|
||||||
relationship_name=relationship_name,
|
|
||||||
source_node_id=source_node_id,
|
|
||||||
target_node_id=target_node_id,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
existing_edges_map[edge_key] = True
|
|
||||||
else:
|
|
||||||
type_node = added_nodes_map.get(type_node_key) or added_nodes_map.get(
|
|
||||||
key_mapping.get(type_node_key)
|
|
||||||
)
|
|
||||||
|
|
||||||
entity_node_key = f"{node_id}_entity"
|
|
||||||
|
|
||||||
if entity_node_key not in added_nodes_map and entity_node_key not in key_mapping:
|
|
||||||
ontology_entity_nodes, ontology_entity_edges, start_ent_ont = (
|
|
||||||
ontology_resolver.get_subgraph(node_name=node_name, node_type="individuals")
|
|
||||||
)
|
|
||||||
|
|
||||||
if start_ent_ont:
|
|
||||||
name_mapping[node_name] = start_ent_ont.name
|
|
||||||
ontology_validated_source_ent = True
|
|
||||||
old_key = entity_node_key
|
|
||||||
node_id = generate_node_id(start_ent_ont.name)
|
|
||||||
entity_node_key = f"{node_id}_entity"
|
|
||||||
node_name = generate_node_name(start_ent_ont.name)
|
|
||||||
key_mapping[old_key] = entity_node_key
|
|
||||||
|
|
||||||
entity_node = Entity(
|
|
||||||
id=node_id,
|
|
||||||
name=node_name,
|
|
||||||
is_a=type_node,
|
|
||||||
description=node.description,
|
|
||||||
ontology_valid=ontology_validated_source_ent,
|
|
||||||
belongs_to_set=data_chunk.belongs_to_set,
|
|
||||||
)
|
|
||||||
|
|
||||||
added_nodes_map[entity_node_key] = entity_node
|
|
||||||
|
|
||||||
for ontology_node_to_store in ontology_entity_nodes:
|
|
||||||
ont_node_id = generate_node_id(ontology_node_to_store.name)
|
|
||||||
ont_node_name = generate_node_name(ontology_node_to_store.name)
|
|
||||||
|
|
||||||
if ontology_node_to_store.category == "classes":
|
|
||||||
ont_node_key = f"{ont_node_id}_type"
|
|
||||||
if (ont_node_key not in added_nodes_map) and (
|
|
||||||
ont_node_key not in added_ontology_nodes_map
|
|
||||||
):
|
|
||||||
added_ontology_nodes_map[ont_node_key] = EntityType(
|
|
||||||
id=ont_node_id,
|
|
||||||
name=ont_node_name,
|
|
||||||
description=ont_node_name,
|
|
||||||
ontology_valid=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
elif ontology_node_to_store.category == "individuals":
|
|
||||||
ont_node_key = f"{ont_node_id}_entity"
|
|
||||||
if (ont_node_key not in added_nodes_map) and (
|
|
||||||
ont_node_key not in added_ontology_nodes_map
|
|
||||||
):
|
|
||||||
added_ontology_nodes_map[ont_node_key] = Entity(
|
|
||||||
id=ont_node_id,
|
|
||||||
name=ont_node_name,
|
|
||||||
description=ont_node_name,
|
|
||||||
ontology_valid=True,
|
|
||||||
belongs_to_set=data_chunk.belongs_to_set,
|
|
||||||
)
|
|
||||||
|
|
||||||
for source, relation, target in ontology_entity_edges:
|
|
||||||
source_node_id = generate_node_id(source)
|
|
||||||
target_node_id = generate_node_id(target)
|
|
||||||
relationship_name = generate_edge_name(relation)
|
|
||||||
edge_key = f"{source_node_id}_{target_node_id}_{relationship_name}"
|
|
||||||
|
|
||||||
if edge_key not in existing_edges_map:
|
|
||||||
ontology_relationships.append(
|
|
||||||
(
|
|
||||||
source_node_id,
|
|
||||||
target_node_id,
|
|
||||||
relationship_name,
|
|
||||||
dict(
|
|
||||||
relationship_name=relationship_name,
|
|
||||||
source_node_id=source_node_id,
|
|
||||||
target_node_id=target_node_id,
|
|
||||||
ontology_valid=True,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
existing_edges_map[edge_key] = True
|
|
||||||
|
|
||||||
else:
|
|
||||||
entity_node = added_nodes_map.get(entity_node_key) or added_nodes_map.get(
|
|
||||||
key_mapping.get(entity_node_key)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Add entity to chunk
|
||||||
if data_chunk.contains is None:
|
if data_chunk.contains is None:
|
||||||
data_chunk.contains = []
|
data_chunk.contains = []
|
||||||
|
|
||||||
data_chunk.contains.append(entity_node)
|
data_chunk.contains.append(entity_node)
|
||||||
|
|
||||||
|
# Process edges
|
||||||
for edge in graph.edges:
|
for edge in graph.edges:
|
||||||
source_node_id = generate_node_id(
|
source_node_id = generate_node_id(
|
||||||
name_mapping.get(edge.source_node_id, edge.source_node_id)
|
name_mapping.get(edge.source_node_id, edge.source_node_id)
|
||||||
|
|
@ -229,12 +290,12 @@ def expand_with_nodes_and_edges(
|
||||||
source_node_id,
|
source_node_id,
|
||||||
target_node_id,
|
target_node_id,
|
||||||
relationship_name,
|
relationship_name,
|
||||||
dict(
|
{
|
||||||
relationship_name=relationship_name,
|
"relationship_name": relationship_name,
|
||||||
source_node_id=source_node_id,
|
"source_node_id": source_node_id,
|
||||||
target_node_id=target_node_id,
|
"target_node_id": target_node_id,
|
||||||
ontology_valid=False,
|
"ontology_valid": False,
|
||||||
),
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
existing_edges_map[edge_key] = True
|
existing_edges_map[edge_key] = True
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,48 @@ from cognee.modules.search.types import SearchType
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
async def check_falkordb_connection():
|
||||||
|
"""Check if FalkorDB is available at localhost:6379"""
|
||||||
|
try:
|
||||||
|
from falkordb import FalkorDB
|
||||||
|
client = FalkorDB(host="localhost", port=6379)
|
||||||
|
# Try to list graphs to check connection
|
||||||
|
client.list_graphs()
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"FalkorDB not available at localhost:6379: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
|
# Check if FalkorDB is available
|
||||||
|
if not await check_falkordb_connection():
|
||||||
|
print("⚠️ FalkorDB is not available at localhost:6379")
|
||||||
|
print(" To run this test, start FalkorDB server:")
|
||||||
|
print(" docker run -p 6379:6379 falkordb/falkordb:latest")
|
||||||
|
print(" Skipping FalkorDB test...")
|
||||||
|
return
|
||||||
|
|
||||||
|
print("✅ FalkorDB connection successful, running test...")
|
||||||
|
|
||||||
|
# Configure FalkorDB as the graph database provider
|
||||||
|
cognee.config.set_graph_db_config(
|
||||||
|
{
|
||||||
|
"graph_database_url": "localhost", # FalkorDB URL (using Redis protocol)
|
||||||
|
"graph_database_port": 6379,
|
||||||
|
"graph_database_provider": "falkordb",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Configure FalkorDB as the vector database provider too since it's a hybrid adapter
|
||||||
|
cognee.config.set_vector_db_config(
|
||||||
|
{
|
||||||
|
"vector_db_url": "localhost",
|
||||||
|
"vector_db_port": 6379,
|
||||||
|
"vector_db_provider": "falkordb",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
data_directory_path = str(
|
data_directory_path = str(
|
||||||
pathlib.Path(
|
pathlib.Path(
|
||||||
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_falkordb")
|
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_falkordb")
|
||||||
|
|
@ -86,9 +127,23 @@ async def main():
|
||||||
# Assert relational, vector and graph databases have been cleaned properly
|
# Assert relational, vector and graph databases have been cleaned properly
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
||||||
|
# For FalkorDB vector engine, check if collections are empty
|
||||||
|
# Since FalkorDB is a hybrid adapter, we can check if the graph is empty
|
||||||
|
# as the vector data is stored in the same graph
|
||||||
|
if hasattr(vector_engine, 'driver'):
|
||||||
|
# This is FalkorDB - check if graphs exist
|
||||||
|
collections = vector_engine.driver.list_graphs()
|
||||||
|
# The graph should be deleted, so either no graphs or empty graph
|
||||||
|
if vector_engine.graph_name in collections:
|
||||||
|
# Graph exists but should be empty
|
||||||
|
vector_graph_data = await vector_engine.get_graph_data()
|
||||||
|
vector_nodes, vector_edges = vector_graph_data
|
||||||
|
assert len(vector_nodes) == 0 and len(vector_edges) == 0, "FalkorDB vector database is not empty"
|
||||||
|
else:
|
||||||
|
# Fallback for other vector engines like LanceDB
|
||||||
connection = await vector_engine.get_connection()
|
connection = await vector_engine.get_connection()
|
||||||
collection_names = await connection.table_names()
|
collection_names = await connection.table_names()
|
||||||
assert len(collection_names) == 0, "LanceDB vector database is not empty"
|
assert len(collection_names) == 0, "Vector database is not empty"
|
||||||
|
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
|
||||||
|
|
@ -96,10 +151,19 @@ async def main():
|
||||||
"SQLite relational database is not empty"
|
"SQLite relational database is not empty"
|
||||||
)
|
)
|
||||||
|
|
||||||
from cognee.infrastructure.databases.graph import get_graph_config
|
# For FalkorDB, check if the graph database is empty
|
||||||
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
|
|
||||||
graph_config = get_graph_config()
|
graph_engine = get_graph_engine()
|
||||||
assert not os.path.exists(graph_config.graph_file_path), "Networkx graph database is not empty"
|
graph_data = await graph_engine.get_graph_data()
|
||||||
|
nodes, edges = graph_data
|
||||||
|
assert len(nodes) == 0 and len(edges) == 0, "FalkorDB graph database is not empty"
|
||||||
|
|
||||||
|
print("🎉 FalkorDB test completed successfully!")
|
||||||
|
print(" ✓ Data ingestion worked")
|
||||||
|
print(" ✓ Cognify processing worked")
|
||||||
|
print(" ✓ Search operations worked")
|
||||||
|
print(" ✓ Cleanup worked")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
0
debug_cognee/databases/cognee_db
Normal file
0
debug_cognee/databases/cognee_db
Normal file
126
debug_falkordb.py
Normal file
126
debug_falkordb.py
Normal file
|
|
@ -0,0 +1,126 @@
|
||||||
|
import os
|
||||||
|
import cognee
|
||||||
|
import pathlib
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
|
||||||
|
async def debug_falkordb():
|
||||||
|
"""Debug script to see what's actually stored in FalkorDB"""
|
||||||
|
|
||||||
|
# Check if FalkorDB is available
|
||||||
|
try:
|
||||||
|
from falkordb import FalkorDB
|
||||||
|
client = FalkorDB(host="localhost", port=6379)
|
||||||
|
client.list_graphs()
|
||||||
|
print("✅ FalkorDB connection successful")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ FalkorDB not available: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Configure FalkorDB
|
||||||
|
cognee.config.set_graph_db_config({
|
||||||
|
"graph_database_url": "localhost",
|
||||||
|
"graph_database_port": 6379,
|
||||||
|
"graph_database_provider": "falkordb",
|
||||||
|
})
|
||||||
|
|
||||||
|
cognee.config.set_vector_db_config({
|
||||||
|
"vector_db_url": "localhost",
|
||||||
|
"vector_db_port": 6379,
|
||||||
|
"vector_db_provider": "falkordb",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Set up directories
|
||||||
|
data_directory_path = str(pathlib.Path("./debug_data").resolve())
|
||||||
|
cognee_directory_path = str(pathlib.Path("./debug_cognee").resolve())
|
||||||
|
|
||||||
|
cognee.config.data_root_directory(data_directory_path)
|
||||||
|
cognee.config.system_root_directory(cognee_directory_path)
|
||||||
|
|
||||||
|
# Clean up first
|
||||||
|
await cognee.prune.prune_data()
|
||||||
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
||||||
|
# Add simple text
|
||||||
|
simple_text = "Artificial Intelligence (AI) is a fascinating technology."
|
||||||
|
dataset_name = "test_dataset"
|
||||||
|
|
||||||
|
print("📝 Adding data...")
|
||||||
|
await cognee.add([simple_text], dataset_name)
|
||||||
|
|
||||||
|
print("🧠 Running cognify...")
|
||||||
|
await cognee.cognify([dataset_name])
|
||||||
|
|
||||||
|
# Debug: Check what's in the database
|
||||||
|
print("\n🔍 Checking what's in the database...")
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||||
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
|
|
||||||
|
vector_engine = get_vector_engine()
|
||||||
|
graph_engine = await get_graph_engine()
|
||||||
|
|
||||||
|
# Get all graph data
|
||||||
|
print("\n📊 Graph data:")
|
||||||
|
graph_data = await graph_engine.get_graph_data()
|
||||||
|
nodes, edges = graph_data
|
||||||
|
|
||||||
|
print(f"Total nodes: {len(nodes)}")
|
||||||
|
print(f"Total edges: {len(edges)}")
|
||||||
|
|
||||||
|
if nodes:
|
||||||
|
print("\n🏷️ Sample nodes:")
|
||||||
|
for i, (node_id, node_props) in enumerate(nodes[:3]):
|
||||||
|
print(f" Node {i+1}: ID={node_id}")
|
||||||
|
print(f" Properties: {node_props}")
|
||||||
|
|
||||||
|
if edges:
|
||||||
|
print("\n🔗 Sample edges:")
|
||||||
|
for i, edge in enumerate(edges[:3]):
|
||||||
|
print(f" Edge {i+1}: {edge}")
|
||||||
|
|
||||||
|
# Try different search variations
|
||||||
|
print("\n🔍 Testing different search queries...")
|
||||||
|
|
||||||
|
# Get available graphs and collections
|
||||||
|
if hasattr(vector_engine, 'driver'):
|
||||||
|
graphs = vector_engine.driver.list_graphs()
|
||||||
|
print(f"Available graphs: {graphs}")
|
||||||
|
|
||||||
|
# Try to query directly to see node labels
|
||||||
|
try:
|
||||||
|
result = vector_engine.query("MATCH (n) RETURN DISTINCT labels(n) as labels LIMIT 10")
|
||||||
|
print(f"Node labels found: {result.result_set}")
|
||||||
|
|
||||||
|
result = vector_engine.query("MATCH (n) RETURN n LIMIT 5")
|
||||||
|
print(f"Sample nodes raw: {result.result_set}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Direct query error: {e}")
|
||||||
|
|
||||||
|
# Try searching with different queries
|
||||||
|
search_queries = [
|
||||||
|
("entity.name + AI", "entity.name", "AI"),
|
||||||
|
("Entity.name + AI", "Entity.name", "AI"),
|
||||||
|
("text + AI", "text", "AI"),
|
||||||
|
("content + AI", "content", "AI"),
|
||||||
|
("name + AI", "name", "AI"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for query_desc, collection_name, query_text in search_queries:
|
||||||
|
try:
|
||||||
|
results = await vector_engine.search(collection_name=collection_name, query_text=query_text)
|
||||||
|
print(f" {query_desc}: {len(results)} results")
|
||||||
|
if results:
|
||||||
|
print(f" First result: {results[0]}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" {query_desc}: Error - {e}")
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
await cognee.prune.prune_data()
|
||||||
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
print("\n✅ Debug completed!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(debug_falkordb())
|
||||||
44
poetry.lock
generated
44
poetry.lock
generated
|
|
@ -1,4 +1,4 @@
|
||||||
# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
|
# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aiobotocore"
|
name = "aiobotocore"
|
||||||
|
|
@ -452,10 +452,10 @@ files = [
|
||||||
name = "async-timeout"
|
name = "async-timeout"
|
||||||
version = "5.0.1"
|
version = "5.0.1"
|
||||||
description = "Timeout context manager for asyncio programs"
|
description = "Timeout context manager for asyncio programs"
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"falkordb\" and python_full_version < \"3.11.3\" and python_version == \"3.11\""
|
markers = "python_version == \"3.11\" and python_full_version < \"3.11.3\""
|
||||||
files = [
|
files = [
|
||||||
{file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
|
{file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
|
||||||
{file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
|
{file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
|
||||||
|
|
@ -1406,6 +1406,27 @@ files = [
|
||||||
{file = "coverage-7.9.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0b3496922cb5f4215bf5caaef4cf12364a26b0be82e9ed6d050f3352cf2d7ef0"},
|
{file = "coverage-7.9.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0b3496922cb5f4215bf5caaef4cf12364a26b0be82e9ed6d050f3352cf2d7ef0"},
|
||||||
{file = "coverage-7.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9565c3ab1c93310569ec0d86b017f128f027cab0b622b7af288696d7ed43a16d"},
|
{file = "coverage-7.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9565c3ab1c93310569ec0d86b017f128f027cab0b622b7af288696d7ed43a16d"},
|
||||||
{file = "coverage-7.9.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2241ad5dbf79ae1d9c08fe52b36d03ca122fb9ac6bca0f34439e99f8327ac89f"},
|
{file = "coverage-7.9.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2241ad5dbf79ae1d9c08fe52b36d03ca122fb9ac6bca0f34439e99f8327ac89f"},
|
||||||
|
{file = "coverage-7.9.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bb5838701ca68b10ebc0937dbd0eb81974bac54447c55cd58dea5bca8451029"},
|
||||||
|
{file = "coverage-7.9.1-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b30a25f814591a8c0c5372c11ac8967f669b97444c47fd794926e175c4047ece"},
|
||||||
|
{file = "coverage-7.9.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2d04b16a6062516df97969f1ae7efd0de9c31eb6ebdceaa0d213b21c0ca1a683"},
|
||||||
|
{file = "coverage-7.9.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7931b9e249edefb07cd6ae10c702788546341d5fe44db5b6108a25da4dca513f"},
|
||||||
|
{file = "coverage-7.9.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:52e92b01041151bf607ee858e5a56c62d4b70f4dac85b8c8cb7fb8a351ab2c10"},
|
||||||
|
{file = "coverage-7.9.1-cp313-cp313t-win32.whl", hash = "sha256:684e2110ed84fd1ca5f40e89aa44adf1729dc85444004111aa01866507adf363"},
|
||||||
|
{file = "coverage-7.9.1-cp313-cp313t-win_amd64.whl", hash = "sha256:437c576979e4db840539674e68c84b3cda82bc824dd138d56bead1435f1cb5d7"},
|
||||||
|
{file = "coverage-7.9.1-cp313-cp313t-win_arm64.whl", hash = "sha256:18a0912944d70aaf5f399e350445738a1a20b50fbea788f640751c2ed9208b6c"},
|
||||||
|
{file = "coverage-7.9.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6f424507f57878e424d9a95dc4ead3fbdd72fd201e404e861e465f28ea469951"},
|
||||||
|
{file = "coverage-7.9.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:535fde4001b2783ac80865d90e7cc7798b6b126f4cd8a8c54acfe76804e54e58"},
|
||||||
|
{file = "coverage-7.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02532fd3290bb8fa6bec876520842428e2a6ed6c27014eca81b031c2d30e3f71"},
|
||||||
|
{file = "coverage-7.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:56f5eb308b17bca3bbff810f55ee26d51926d9f89ba92707ee41d3c061257e55"},
|
||||||
|
{file = "coverage-7.9.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfa447506c1a52271f1b0de3f42ea0fa14676052549095e378d5bff1c505ff7b"},
|
||||||
|
{file = "coverage-7.9.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9ca8e220006966b4a7b68e8984a6aee645a0384b0769e829ba60281fe61ec4f7"},
|
||||||
|
{file = "coverage-7.9.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:49f1d0788ba5b7ba65933f3a18864117c6506619f5ca80326b478f72acf3f385"},
|
||||||
|
{file = "coverage-7.9.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:68cd53aec6f45b8e4724c0950ce86eacb775c6be01ce6e3669fe4f3a21e768ed"},
|
||||||
|
{file = "coverage-7.9.1-cp39-cp39-win32.whl", hash = "sha256:95335095b6c7b1cc14c3f3f17d5452ce677e8490d101698562b2ffcacc304c8d"},
|
||||||
|
{file = "coverage-7.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:e1b5191d1648acc439b24721caab2fd0c86679d8549ed2c84d5a7ec1bedcc244"},
|
||||||
|
{file = "coverage-7.9.1-pp39.pp310.pp311-none-any.whl", hash = "sha256:db0f04118d1db74db6c9e1cb1898532c7dcc220f1d2718f058601f7c3f499514"},
|
||||||
|
{file = "coverage-7.9.1-py3-none-any.whl", hash = "sha256:66b974b145aa189516b6bf2d8423e888b742517d37872f6ee4c5be0073bd9a3c"},
|
||||||
|
{file = "coverage-7.9.1.tar.gz", hash = "sha256:6cf43c78c4282708a28e466316935ec7489a9c487518a77fa68f716c67909cec"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
|
@ -2034,17 +2055,18 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "falkordb"
|
name = "falkordb"
|
||||||
version = "1.0.9"
|
version = "1.2.0"
|
||||||
description = "Python client for interacting with FalkorDB database"
|
description = "Python client for interacting with FalkorDB database"
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = "<4.0,>=3.8"
|
python-versions = "<4.0,>=3.8"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"falkordb\""
|
|
||||||
files = [
|
files = [
|
||||||
{file = "falkordb-1.0.9.tar.gz", hash = "sha256:177008e63c7e4d9ebbdfeb8cad24b0e49175bb0f6e96cac9b4ffb641c0eff0f1"},
|
{file = "falkordb-1.2.0-py3-none-any.whl", hash = "sha256:7572d9cc377735d22efc52fe6fe73c7a435422c827b6ea3ca223a850a77be12e"},
|
||||||
|
{file = "falkordb-1.2.0.tar.gz", hash = "sha256:ce57365b86722d538e75aa5d438de67ecd8eb9478da612506d9812cd7f182d0b"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
python-dateutil = ">=2.9.0,<3.0.0"
|
||||||
redis = ">=5.0.1,<6.0.0"
|
redis = ">=5.0.1,<6.0.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -3988,6 +4010,8 @@ python-versions = "*"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
files = [
|
files = [
|
||||||
{file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
|
{file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
|
||||||
|
{file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
|
||||||
|
{file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
|
@ -7665,6 +7689,7 @@ files = [
|
||||||
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
|
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
|
||||||
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
|
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
|
||||||
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
|
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
|
||||||
|
{file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
|
||||||
{file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
|
{file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
|
||||||
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
|
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
|
||||||
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
|
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
|
||||||
|
|
@ -9137,10 +9162,9 @@ md = ["cmarkgfm (>=0.8.0)"]
|
||||||
name = "redis"
|
name = "redis"
|
||||||
version = "5.2.1"
|
version = "5.2.1"
|
||||||
description = "Python client for Redis database and key-value store"
|
description = "Python client for Redis database and key-value store"
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"falkordb\""
|
|
||||||
files = [
|
files = [
|
||||||
{file = "redis-5.2.1-py3-none-any.whl", hash = "sha256:ee7e1056b9aea0f04c6c2ed59452947f34c4940ee025f5dd83e6a6418b6989e4"},
|
{file = "redis-5.2.1-py3-none-any.whl", hash = "sha256:ee7e1056b9aea0f04c6c2ed59452947f34c4940ee025f5dd83e6a6418b6989e4"},
|
||||||
{file = "redis-5.2.1.tar.gz", hash = "sha256:16f2e22dff21d5125e8481515e386711a34cbec50f0e44413dd7d9c060a54e0f"},
|
{file = "redis-5.2.1.tar.gz", hash = "sha256:16f2e22dff21d5125e8481515e386711a34cbec50f0e44413dd7d9c060a54e0f"},
|
||||||
|
|
@ -12215,4 +12239,4 @@ weaviate = ["weaviate-client"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.1"
|
lock-version = "2.1"
|
||||||
python-versions = ">=3.10,<=3.13"
|
python-versions = ">=3.10,<=3.13"
|
||||||
content-hash = "ca2a3e8260092933419793efe202d50ae7b1c6ce738750876fd5f64a31718790"
|
content-hash = "863cd298459e4e1bc628c99449e2b2b4b897d7f64c8cceb91c9bc7400cdfb339"
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,8 @@ dependencies = [
|
||||||
"pympler>=1.1",
|
"pympler>=1.1",
|
||||||
"onnxruntime<=1.21.1",
|
"onnxruntime<=1.21.1",
|
||||||
"pylance==0.22.0",
|
"pylance==0.22.0",
|
||||||
"kuzu==0.9.0"
|
"kuzu==0.9.0",
|
||||||
|
"falkordb (>=1.2.0,<2.0.0)"
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue