Merge branch 'dev' into feature/cog-3187-feedback-enrichment
This commit is contained in:
commit
cccf523eea
10 changed files with 21 additions and 89 deletions
|
|
@ -71,7 +71,7 @@ Build dynamic memory for Agents and replace RAG using scalable, modular ECL (Ext
|
||||||
|
|
||||||
## Get Started
|
## Get Started
|
||||||
|
|
||||||
Get started quickly with a Google Colab <a href="https://colab.research.google.com/drive/1jHbWVypDgCLwjE71GSXhRL3YxYhCZzG1?usp=sharing">notebook</a> , <a href="https://deepnote.com/workspace/cognee-382213d0-0444-4c89-8265-13770e333c02/project/cognee-demo-78ffacb9-5832-4611-bb1a-560386068b30/notebook/Notebook-1-75b24cda566d4c24ab348f7150792601?utm_source=share-modal&utm_medium=product-shared-content&utm_campaign=notebook&utm_content=78ffacb9-5832-4611-bb1a-560386068b30">Deepnote notebook</a> or <a href="https://github.com/topoteretes/cognee/tree/main/cognee-starter-kit">starter repo</a>
|
Get started quickly with a Google Colab <a href="https://colab.research.google.com/drive/12Vi9zID-M3fpKpKiaqDBvkk98ElkRPWy?usp=sharing">notebook</a> , <a href="https://deepnote.com/workspace/cognee-382213d0-0444-4c89-8265-13770e333c02/project/cognee-demo-78ffacb9-5832-4611-bb1a-560386068b30/notebook/Notebook-1-75b24cda566d4c24ab348f7150792601?utm_source=share-modal&utm_medium=product-shared-content&utm_campaign=notebook&utm_content=78ffacb9-5832-4611-bb1a-560386068b30">Deepnote notebook</a> or <a href="https://github.com/topoteretes/cognee/tree/main/cognee-starter-kit">starter repo</a>
|
||||||
|
|
||||||
|
|
||||||
## About cognee
|
## About cognee
|
||||||
|
|
@ -224,12 +224,12 @@ We now have a paper you can cite:
|
||||||
|
|
||||||
```bibtex
|
```bibtex
|
||||||
@misc{markovic2025optimizinginterfaceknowledgegraphs,
|
@misc{markovic2025optimizinginterfaceknowledgegraphs,
|
||||||
title={Optimizing the Interface Between Knowledge Graphs and LLMs for Complex Reasoning},
|
title={Optimizing the Interface Between Knowledge Graphs and LLMs for Complex Reasoning},
|
||||||
author={Vasilije Markovic and Lazar Obradovic and Laszlo Hajdu and Jovan Pavlovic},
|
author={Vasilije Markovic and Lazar Obradovic and Laszlo Hajdu and Jovan Pavlovic},
|
||||||
year={2025},
|
year={2025},
|
||||||
eprint={2505.24478},
|
eprint={2505.24478},
|
||||||
archivePrefix={arXiv},
|
archivePrefix={arXiv},
|
||||||
primaryClass={cs.AI},
|
primaryClass={cs.AI},
|
||||||
url={https://arxiv.org/abs/2505.24478},
|
url={https://arxiv.org/abs/2505.24478},
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from typing import Union, Optional, List, Type
|
from typing import Union, Optional, List, Type
|
||||||
|
|
||||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
||||||
from cognee.modules.engine.models.node_set import NodeSet
|
from cognee.modules.engine.models.node_set import NodeSet
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
|
from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
|
||||||
|
|
@ -9,9 +8,6 @@ from cognee.modules.users.methods import get_default_user
|
||||||
from cognee.modules.search.methods import search as search_function
|
from cognee.modules.search.methods import search as search_function
|
||||||
from cognee.modules.data.methods import get_authorized_existing_datasets
|
from cognee.modules.data.methods import get_authorized_existing_datasets
|
||||||
from cognee.modules.data.exceptions import DatasetNotFoundError
|
from cognee.modules.data.exceptions import DatasetNotFoundError
|
||||||
from cognee.shared.logging_utils import get_logger
|
|
||||||
|
|
||||||
logger = get_logger()
|
|
||||||
|
|
||||||
|
|
||||||
async def search(
|
async def search(
|
||||||
|
|
|
||||||
|
|
@ -159,11 +159,6 @@ class GraphDBInterface(ABC):
|
||||||
- get_connections
|
- get_connections
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
async def is_empty(self) -> bool:
|
|
||||||
logger.warning("is_empty() is not implemented")
|
|
||||||
return True
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def query(self, query: str, params: dict) -> List[Any]:
|
async def query(self, query: str, params: dict) -> List[Any]:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -198,15 +198,6 @@ class KuzuAdapter(GraphDBInterface):
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
|
logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
|
||||||
|
|
||||||
async def is_empty(self) -> bool:
|
|
||||||
query = """
|
|
||||||
MATCH (n)
|
|
||||||
RETURN true
|
|
||||||
LIMIT 1;
|
|
||||||
"""
|
|
||||||
query_result = await self.query(query)
|
|
||||||
return len(query_result) == 0
|
|
||||||
|
|
||||||
async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
|
async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
|
||||||
"""
|
"""
|
||||||
Execute a Kuzu query asynchronously with automatic reconnection.
|
Execute a Kuzu query asynchronously with automatic reconnection.
|
||||||
|
|
|
||||||
|
|
@ -87,15 +87,6 @@ class Neo4jAdapter(GraphDBInterface):
|
||||||
async with self.driver.session(database=self.graph_database_name) as session:
|
async with self.driver.session(database=self.graph_database_name) as session:
|
||||||
yield session
|
yield session
|
||||||
|
|
||||||
async def is_empty(self) -> bool:
|
|
||||||
query = """
|
|
||||||
RETURN EXISTS {
|
|
||||||
MATCH (n)
|
|
||||||
} AS node_exists;
|
|
||||||
"""
|
|
||||||
query_result = await self.query(query)
|
|
||||||
return not query_result[0]["node_exists"]
|
|
||||||
|
|
||||||
@deadlock_retry()
|
@deadlock_retry()
|
||||||
async def query(
|
async def query(
|
||||||
self,
|
self,
|
||||||
|
|
@ -1076,7 +1067,7 @@ class Neo4jAdapter(GraphDBInterface):
|
||||||
query_nodes = f"""
|
query_nodes = f"""
|
||||||
MATCH (n)
|
MATCH (n)
|
||||||
WHERE {where_clause}
|
WHERE {where_clause}
|
||||||
RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties
|
RETURN n.id AS id, labels(n) AS labels, properties(n) AS properties
|
||||||
"""
|
"""
|
||||||
result_nodes = await self.query(query_nodes)
|
result_nodes = await self.query(query_nodes)
|
||||||
|
|
||||||
|
|
@ -1091,7 +1082,7 @@ class Neo4jAdapter(GraphDBInterface):
|
||||||
query_edges = f"""
|
query_edges = f"""
|
||||||
MATCH (n)-[r]->(m)
|
MATCH (n)-[r]->(m)
|
||||||
WHERE {where_clause} AND {where_clause.replace("n.", "m.")}
|
WHERE {where_clause} AND {where_clause.replace("n.", "m.")}
|
||||||
RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
|
RETURN n.id AS source, n.id AS target, TYPE(r) AS type, properties(r) AS properties
|
||||||
"""
|
"""
|
||||||
result_edges = await self.query(query_edges)
|
result_edges = await self.query(query_edges)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -124,6 +124,12 @@ def guess_file_type(file: BinaryIO) -> filetype.Type:
|
||||||
"""
|
"""
|
||||||
file_type = filetype.guess(file)
|
file_type = filetype.guess(file)
|
||||||
|
|
||||||
|
# If file type could not be determined consider it a plain text file as they don't have magic number encoding
|
||||||
|
if file_type is None:
|
||||||
|
from filetype.types.base import Type
|
||||||
|
|
||||||
|
file_type = Type("text/plain", "txt")
|
||||||
|
|
||||||
if file_type is None:
|
if file_type is None:
|
||||||
raise FileTypeException(f"Unknown file detected: {file.name}.")
|
raise FileTypeException(f"Unknown file detected: {file.name}.")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -88,6 +88,7 @@ async def run_tasks_distributed(
|
||||||
pipeline_name: str = "unknown_pipeline",
|
pipeline_name: str = "unknown_pipeline",
|
||||||
context: dict = None,
|
context: dict = None,
|
||||||
incremental_loading: bool = False,
|
incremental_loading: bool = False,
|
||||||
|
data_per_batch: int = 20,
|
||||||
):
|
):
|
||||||
if not user:
|
if not user:
|
||||||
user = await get_default_user()
|
user = await get_default_user()
|
||||||
|
|
|
||||||
|
|
@ -47,26 +47,10 @@ async def main():
|
||||||
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
||||||
)
|
)
|
||||||
|
|
||||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
||||||
|
|
||||||
graph_engine = await get_graph_engine()
|
|
||||||
|
|
||||||
is_empty = await graph_engine.is_empty()
|
|
||||||
|
|
||||||
assert is_empty, "Kuzu graph database is not empty"
|
|
||||||
|
|
||||||
await cognee.add([explanation_file_path_quantum], dataset_name)
|
await cognee.add([explanation_file_path_quantum], dataset_name)
|
||||||
|
|
||||||
is_empty = await graph_engine.is_empty()
|
|
||||||
|
|
||||||
assert is_empty, "Kuzu graph database should be empty before cognify"
|
|
||||||
|
|
||||||
await cognee.cognify([dataset_name])
|
await cognee.cognify([dataset_name])
|
||||||
|
|
||||||
is_empty = await graph_engine.is_empty()
|
|
||||||
|
|
||||||
assert not is_empty, "Kuzu graph database should not be empty"
|
|
||||||
|
|
||||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||||
|
|
||||||
vector_engine = get_vector_engine()
|
vector_engine = get_vector_engine()
|
||||||
|
|
@ -130,10 +114,11 @@ async def main():
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||||
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
|
|
||||||
is_empty = await graph_engine.is_empty()
|
graph_engine = await get_graph_engine()
|
||||||
|
nodes, edges = await graph_engine.get_graph_data()
|
||||||
assert is_empty, "Kuzu graph database is not empty"
|
assert len(nodes) == 0 and len(edges) == 0, "Kuzu graph database is not empty"
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Ensure cleanup even if tests fail
|
# Ensure cleanup even if tests fail
|
||||||
|
|
|
||||||
|
|
@ -35,14 +35,6 @@ async def main():
|
||||||
explanation_file_path_nlp = os.path.join(
|
explanation_file_path_nlp = os.path.join(
|
||||||
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
|
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
|
||||||
)
|
)
|
||||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
||||||
|
|
||||||
graph_engine = await get_graph_engine()
|
|
||||||
|
|
||||||
is_empty = await graph_engine.is_empty()
|
|
||||||
|
|
||||||
assert is_empty, "Graph has to be empty"
|
|
||||||
|
|
||||||
await cognee.add([explanation_file_path_nlp], dataset_name)
|
await cognee.add([explanation_file_path_nlp], dataset_name)
|
||||||
|
|
||||||
explanation_file_path_quantum = os.path.join(
|
explanation_file_path_quantum = os.path.join(
|
||||||
|
|
@ -50,16 +42,9 @@ async def main():
|
||||||
)
|
)
|
||||||
|
|
||||||
await cognee.add([explanation_file_path_quantum], dataset_name)
|
await cognee.add([explanation_file_path_quantum], dataset_name)
|
||||||
is_empty = await graph_engine.is_empty()
|
|
||||||
|
|
||||||
assert is_empty, "Graph has to be empty before cognify"
|
|
||||||
|
|
||||||
await cognee.cognify([dataset_name])
|
await cognee.cognify([dataset_name])
|
||||||
|
|
||||||
is_empty = await graph_engine.is_empty()
|
|
||||||
|
|
||||||
assert not is_empty, "Graph shouldn't be empty"
|
|
||||||
|
|
||||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||||
|
|
||||||
vector_engine = get_vector_engine()
|
vector_engine = get_vector_engine()
|
||||||
|
|
@ -132,8 +117,11 @@ async def main():
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||||
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
is_empty = await graph_engine.is_empty()
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
assert is_empty, "Neo4j graph database is not empty"
|
|
||||||
|
graph_engine = await get_graph_engine()
|
||||||
|
nodes, edges = await graph_engine.get_graph_data()
|
||||||
|
assert len(nodes) == 0 and len(edges) == 0, "Neo4j graph database is not empty"
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -1,21 +0,0 @@
|
||||||
import pytest
|
|
||||||
import cognee
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_empty_search_raises_SearchOnEmptyGraphError_on_empty_graph():
|
|
||||||
await cognee.prune.prune_data()
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
|
||||||
await cognee.add("Sample input")
|
|
||||||
result = await cognee.search("Sample query")
|
|
||||||
assert result == []
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_empty_search_doesnt_raise_SearchOnEmptyGraphError():
|
|
||||||
await cognee.prune.prune_data()
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
|
||||||
await cognee.add("Sample input")
|
|
||||||
await cognee.cognify()
|
|
||||||
result = await cognee.search("Sample query")
|
|
||||||
assert result != []
|
|
||||||
Loading…
Add table
Reference in a new issue