Merge branch 'dev' into feature/cog-3187-feedback-enrichment
This commit is contained in:
commit
cccf523eea
10 changed files with 21 additions and 89 deletions
|
|
@ -71,7 +71,7 @@ Build dynamic memory for Agents and replace RAG using scalable, modular ECL (Ext
|
|||
|
||||
## Get Started
|
||||
|
||||
Get started quickly with a Google Colab <a href="https://colab.research.google.com/drive/1jHbWVypDgCLwjE71GSXhRL3YxYhCZzG1?usp=sharing">notebook</a> , <a href="https://deepnote.com/workspace/cognee-382213d0-0444-4c89-8265-13770e333c02/project/cognee-demo-78ffacb9-5832-4611-bb1a-560386068b30/notebook/Notebook-1-75b24cda566d4c24ab348f7150792601?utm_source=share-modal&utm_medium=product-shared-content&utm_campaign=notebook&utm_content=78ffacb9-5832-4611-bb1a-560386068b30">Deepnote notebook</a> or <a href="https://github.com/topoteretes/cognee/tree/main/cognee-starter-kit">starter repo</a>
|
||||
Get started quickly with a Google Colab <a href="https://colab.research.google.com/drive/12Vi9zID-M3fpKpKiaqDBvkk98ElkRPWy?usp=sharing">notebook</a> , <a href="https://deepnote.com/workspace/cognee-382213d0-0444-4c89-8265-13770e333c02/project/cognee-demo-78ffacb9-5832-4611-bb1a-560386068b30/notebook/Notebook-1-75b24cda566d4c24ab348f7150792601?utm_source=share-modal&utm_medium=product-shared-content&utm_campaign=notebook&utm_content=78ffacb9-5832-4611-bb1a-560386068b30">Deepnote notebook</a> or <a href="https://github.com/topoteretes/cognee/tree/main/cognee-starter-kit">starter repo</a>
|
||||
|
||||
|
||||
## About cognee
|
||||
|
|
@ -224,12 +224,12 @@ We now have a paper you can cite:
|
|||
|
||||
```bibtex
|
||||
@misc{markovic2025optimizinginterfaceknowledgegraphs,
|
||||
title={Optimizing the Interface Between Knowledge Graphs and LLMs for Complex Reasoning},
|
||||
title={Optimizing the Interface Between Knowledge Graphs and LLMs for Complex Reasoning},
|
||||
author={Vasilije Markovic and Lazar Obradovic and Laszlo Hajdu and Jovan Pavlovic},
|
||||
year={2025},
|
||||
eprint={2505.24478},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.AI},
|
||||
url={https://arxiv.org/abs/2505.24478},
|
||||
url={https://arxiv.org/abs/2505.24478},
|
||||
}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
from uuid import UUID
|
||||
from typing import Union, Optional, List, Type
|
||||
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.modules.engine.models.node_set import NodeSet
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
|
||||
|
|
@ -9,9 +8,6 @@ from cognee.modules.users.methods import get_default_user
|
|||
from cognee.modules.search.methods import search as search_function
|
||||
from cognee.modules.data.methods import get_authorized_existing_datasets
|
||||
from cognee.modules.data.exceptions import DatasetNotFoundError
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
async def search(
|
||||
|
|
|
|||
|
|
@ -159,11 +159,6 @@ class GraphDBInterface(ABC):
|
|||
- get_connections
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def is_empty(self) -> bool:
|
||||
logger.warning("is_empty() is not implemented")
|
||||
return True
|
||||
|
||||
@abstractmethod
|
||||
async def query(self, query: str, params: dict) -> List[Any]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -198,15 +198,6 @@ class KuzuAdapter(GraphDBInterface):
|
|||
except FileNotFoundError:
|
||||
logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
|
||||
|
||||
async def is_empty(self) -> bool:
|
||||
query = """
|
||||
MATCH (n)
|
||||
RETURN true
|
||||
LIMIT 1;
|
||||
"""
|
||||
query_result = await self.query(query)
|
||||
return len(query_result) == 0
|
||||
|
||||
async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
|
||||
"""
|
||||
Execute a Kuzu query asynchronously with automatic reconnection.
|
||||
|
|
|
|||
|
|
@ -87,15 +87,6 @@ class Neo4jAdapter(GraphDBInterface):
|
|||
async with self.driver.session(database=self.graph_database_name) as session:
|
||||
yield session
|
||||
|
||||
async def is_empty(self) -> bool:
|
||||
query = """
|
||||
RETURN EXISTS {
|
||||
MATCH (n)
|
||||
} AS node_exists;
|
||||
"""
|
||||
query_result = await self.query(query)
|
||||
return not query_result[0]["node_exists"]
|
||||
|
||||
@deadlock_retry()
|
||||
async def query(
|
||||
self,
|
||||
|
|
@ -1076,7 +1067,7 @@ class Neo4jAdapter(GraphDBInterface):
|
|||
query_nodes = f"""
|
||||
MATCH (n)
|
||||
WHERE {where_clause}
|
||||
RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties
|
||||
RETURN n.id AS id, labels(n) AS labels, properties(n) AS properties
|
||||
"""
|
||||
result_nodes = await self.query(query_nodes)
|
||||
|
||||
|
|
@ -1091,7 +1082,7 @@ class Neo4jAdapter(GraphDBInterface):
|
|||
query_edges = f"""
|
||||
MATCH (n)-[r]->(m)
|
||||
WHERE {where_clause} AND {where_clause.replace("n.", "m.")}
|
||||
RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
|
||||
RETURN n.id AS source, n.id AS target, TYPE(r) AS type, properties(r) AS properties
|
||||
"""
|
||||
result_edges = await self.query(query_edges)
|
||||
|
||||
|
|
|
|||
|
|
@ -124,6 +124,12 @@ def guess_file_type(file: BinaryIO) -> filetype.Type:
|
|||
"""
|
||||
file_type = filetype.guess(file)
|
||||
|
||||
# If file type could not be determined consider it a plain text file as they don't have magic number encoding
|
||||
if file_type is None:
|
||||
from filetype.types.base import Type
|
||||
|
||||
file_type = Type("text/plain", "txt")
|
||||
|
||||
if file_type is None:
|
||||
raise FileTypeException(f"Unknown file detected: {file.name}.")
|
||||
|
||||
|
|
|
|||
|
|
@ -88,6 +88,7 @@ async def run_tasks_distributed(
|
|||
pipeline_name: str = "unknown_pipeline",
|
||||
context: dict = None,
|
||||
incremental_loading: bool = False,
|
||||
data_per_batch: int = 20,
|
||||
):
|
||||
if not user:
|
||||
user = await get_default_user()
|
||||
|
|
|
|||
|
|
@ -47,26 +47,10 @@ async def main():
|
|||
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
||||
)
|
||||
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
assert is_empty, "Kuzu graph database is not empty"
|
||||
|
||||
await cognee.add([explanation_file_path_quantum], dataset_name)
|
||||
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
assert is_empty, "Kuzu graph database should be empty before cognify"
|
||||
|
||||
await cognee.cognify([dataset_name])
|
||||
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
assert not is_empty, "Kuzu graph database should not be empty"
|
||||
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
|
||||
vector_engine = get_vector_engine()
|
||||
|
|
@ -130,10 +114,11 @@ async def main():
|
|||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
assert is_empty, "Kuzu graph database is not empty"
|
||||
graph_engine = await get_graph_engine()
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
assert len(nodes) == 0 and len(edges) == 0, "Kuzu graph database is not empty"
|
||||
|
||||
finally:
|
||||
# Ensure cleanup even if tests fail
|
||||
|
|
|
|||
|
|
@ -35,14 +35,6 @@ async def main():
|
|||
explanation_file_path_nlp = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
|
||||
)
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
assert is_empty, "Graph has to be empty"
|
||||
|
||||
await cognee.add([explanation_file_path_nlp], dataset_name)
|
||||
|
||||
explanation_file_path_quantum = os.path.join(
|
||||
|
|
@ -50,16 +42,9 @@ async def main():
|
|||
)
|
||||
|
||||
await cognee.add([explanation_file_path_quantum], dataset_name)
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
assert is_empty, "Graph has to be empty before cognify"
|
||||
|
||||
await cognee.cognify([dataset_name])
|
||||
|
||||
is_empty = await graph_engine.is_empty()
|
||||
|
||||
assert not is_empty, "Graph shouldn't be empty"
|
||||
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
|
||||
vector_engine = get_vector_engine()
|
||||
|
|
@ -132,8 +117,11 @@ async def main():
|
|||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
is_empty = await graph_engine.is_empty()
|
||||
assert is_empty, "Neo4j graph database is not empty"
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
assert len(nodes) == 0 and len(edges) == 0, "Neo4j graph database is not empty"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -1,21 +0,0 @@
|
|||
import pytest
|
||||
import cognee
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_search_raises_SearchOnEmptyGraphError_on_empty_graph():
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
await cognee.add("Sample input")
|
||||
result = await cognee.search("Sample query")
|
||||
assert result == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_search_doesnt_raise_SearchOnEmptyGraphError():
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
await cognee.add("Sample input")
|
||||
await cognee.cognify()
|
||||
result = await cognee.search("Sample query")
|
||||
assert result != []
|
||||
Loading…
Add table
Reference in a new issue