Merge branch 'dev' into feature/cog-3187-feedback-enrichment

This commit is contained in:
lxobr 2025-10-21 01:31:58 +02:00
commit cccf523eea
10 changed files with 21 additions and 89 deletions

View file

@ -71,7 +71,7 @@ Build dynamic memory for Agents and replace RAG using scalable, modular ECL (Ext
## Get Started
Get started quickly with a Google Colab <a href="https://colab.research.google.com/drive/1jHbWVypDgCLwjE71GSXhRL3YxYhCZzG1?usp=sharing">notebook</a> , <a href="https://deepnote.com/workspace/cognee-382213d0-0444-4c89-8265-13770e333c02/project/cognee-demo-78ffacb9-5832-4611-bb1a-560386068b30/notebook/Notebook-1-75b24cda566d4c24ab348f7150792601?utm_source=share-modal&utm_medium=product-shared-content&utm_campaign=notebook&utm_content=78ffacb9-5832-4611-bb1a-560386068b30">Deepnote notebook</a> or <a href="https://github.com/topoteretes/cognee/tree/main/cognee-starter-kit">starter repo</a>
Get started quickly with a Google Colab <a href="https://colab.research.google.com/drive/12Vi9zID-M3fpKpKiaqDBvkk98ElkRPWy?usp=sharing">notebook</a> , <a href="https://deepnote.com/workspace/cognee-382213d0-0444-4c89-8265-13770e333c02/project/cognee-demo-78ffacb9-5832-4611-bb1a-560386068b30/notebook/Notebook-1-75b24cda566d4c24ab348f7150792601?utm_source=share-modal&utm_medium=product-shared-content&utm_campaign=notebook&utm_content=78ffacb9-5832-4611-bb1a-560386068b30">Deepnote notebook</a> or <a href="https://github.com/topoteretes/cognee/tree/main/cognee-starter-kit">starter repo</a>
## About cognee
@ -224,12 +224,12 @@ We now have a paper you can cite:
```bibtex
@misc{markovic2025optimizinginterfaceknowledgegraphs,
title={Optimizing the Interface Between Knowledge Graphs and LLMs for Complex Reasoning},
title={Optimizing the Interface Between Knowledge Graphs and LLMs for Complex Reasoning},
author={Vasilije Markovic and Lazar Obradovic and Laszlo Hajdu and Jovan Pavlovic},
year={2025},
eprint={2505.24478},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2505.24478},
url={https://arxiv.org/abs/2505.24478},
}
```

View file

@ -1,7 +1,6 @@
from uuid import UUID
from typing import Union, Optional, List, Type
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.modules.engine.models.node_set import NodeSet
from cognee.modules.users.models import User
from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
@ -9,9 +8,6 @@ from cognee.modules.users.methods import get_default_user
from cognee.modules.search.methods import search as search_function
from cognee.modules.data.methods import get_authorized_existing_datasets
from cognee.modules.data.exceptions import DatasetNotFoundError
from cognee.shared.logging_utils import get_logger
logger = get_logger()
async def search(

View file

@ -159,11 +159,6 @@ class GraphDBInterface(ABC):
- get_connections
"""
@abstractmethod
async def is_empty(self) -> bool:
logger.warning("is_empty() is not implemented")
return True
@abstractmethod
async def query(self, query: str, params: dict) -> List[Any]:
"""

View file

@ -198,15 +198,6 @@ class KuzuAdapter(GraphDBInterface):
except FileNotFoundError:
logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
async def is_empty(self) -> bool:
query = """
MATCH (n)
RETURN true
LIMIT 1;
"""
query_result = await self.query(query)
return len(query_result) == 0
async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
"""
Execute a Kuzu query asynchronously with automatic reconnection.

View file

@ -87,15 +87,6 @@ class Neo4jAdapter(GraphDBInterface):
async with self.driver.session(database=self.graph_database_name) as session:
yield session
async def is_empty(self) -> bool:
query = """
RETURN EXISTS {
MATCH (n)
} AS node_exists;
"""
query_result = await self.query(query)
return not query_result[0]["node_exists"]
@deadlock_retry()
async def query(
self,
@ -1076,7 +1067,7 @@ class Neo4jAdapter(GraphDBInterface):
query_nodes = f"""
MATCH (n)
WHERE {where_clause}
RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties
RETURN n.id AS id, labels(n) AS labels, properties(n) AS properties
"""
result_nodes = await self.query(query_nodes)
@ -1091,7 +1082,7 @@ class Neo4jAdapter(GraphDBInterface):
query_edges = f"""
MATCH (n)-[r]->(m)
WHERE {where_clause} AND {where_clause.replace("n.", "m.")}
RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
RETURN n.id AS source, n.id AS target, TYPE(r) AS type, properties(r) AS properties
"""
result_edges = await self.query(query_edges)

View file

@ -124,6 +124,12 @@ def guess_file_type(file: BinaryIO) -> filetype.Type:
"""
file_type = filetype.guess(file)
# If file type could not be determined consider it a plain text file as they don't have magic number encoding
if file_type is None:
from filetype.types.base import Type
file_type = Type("text/plain", "txt")
if file_type is None:
raise FileTypeException(f"Unknown file detected: {file.name}.")

View file

@ -88,6 +88,7 @@ async def run_tasks_distributed(
pipeline_name: str = "unknown_pipeline",
context: dict = None,
incremental_loading: bool = False,
data_per_batch: int = 20,
):
if not user:
user = await get_default_user()

View file

@ -47,26 +47,10 @@ async def main():
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
)
from cognee.infrastructure.databases.graph import get_graph_engine
graph_engine = await get_graph_engine()
is_empty = await graph_engine.is_empty()
assert is_empty, "Kuzu graph database is not empty"
await cognee.add([explanation_file_path_quantum], dataset_name)
is_empty = await graph_engine.is_empty()
assert is_empty, "Kuzu graph database should be empty before cognify"
await cognee.cognify([dataset_name])
is_empty = await graph_engine.is_empty()
assert not is_empty, "Kuzu graph database should not be empty"
from cognee.infrastructure.databases.vector import get_vector_engine
vector_engine = get_vector_engine()
@ -130,10 +114,11 @@ async def main():
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
await cognee.prune.prune_system(metadata=True)
from cognee.infrastructure.databases.graph import get_graph_engine
is_empty = await graph_engine.is_empty()
assert is_empty, "Kuzu graph database is not empty"
graph_engine = await get_graph_engine()
nodes, edges = await graph_engine.get_graph_data()
assert len(nodes) == 0 and len(edges) == 0, "Kuzu graph database is not empty"
finally:
# Ensure cleanup even if tests fail

View file

@ -35,14 +35,6 @@ async def main():
explanation_file_path_nlp = os.path.join(
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
)
from cognee.infrastructure.databases.graph import get_graph_engine
graph_engine = await get_graph_engine()
is_empty = await graph_engine.is_empty()
assert is_empty, "Graph has to be empty"
await cognee.add([explanation_file_path_nlp], dataset_name)
explanation_file_path_quantum = os.path.join(
@ -50,16 +42,9 @@ async def main():
)
await cognee.add([explanation_file_path_quantum], dataset_name)
is_empty = await graph_engine.is_empty()
assert is_empty, "Graph has to be empty before cognify"
await cognee.cognify([dataset_name])
is_empty = await graph_engine.is_empty()
assert not is_empty, "Graph shouldn't be empty"
from cognee.infrastructure.databases.vector import get_vector_engine
vector_engine = get_vector_engine()
@ -132,8 +117,11 @@ async def main():
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
await cognee.prune.prune_system(metadata=True)
is_empty = await graph_engine.is_empty()
assert is_empty, "Neo4j graph database is not empty"
from cognee.infrastructure.databases.graph import get_graph_engine
graph_engine = await get_graph_engine()
nodes, edges = await graph_engine.get_graph_data()
assert len(nodes) == 0 and len(edges) == 0, "Neo4j graph database is not empty"
if __name__ == "__main__":

View file

@ -1,21 +0,0 @@
import pytest
import cognee
@pytest.mark.asyncio
async def test_empty_search_raises_SearchOnEmptyGraphError_on_empty_graph():
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
await cognee.add("Sample input")
result = await cognee.search("Sample query")
assert result == []
@pytest.mark.asyncio
async def test_empty_search_doesnt_raise_SearchOnEmptyGraphError():
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
await cognee.add("Sample input")
await cognee.cognify()
result = await cognee.search("Sample query")
assert result != []