208 lines
7.3 KiB
Python
208 lines
7.3 KiB
Python
import os
|
|
import pathlib
|
|
|
|
import cognee
|
|
from cognee.shared.logging_utils import get_logger
|
|
from cognee.infrastructure.files.storage import get_storage_config
|
|
from cognee.modules.data.models import Data
|
|
from cognee.modules.users.methods import get_default_user
|
|
from cognee.modules.search.types import SearchType
|
|
from cognee.modules.search.operations import get_history
|
|
|
|
logger = get_logger()
|
|
|
|
|
|
async def test_local_file_deletion(data_text, file_location):
|
|
from sqlalchemy import select
|
|
import hashlib
|
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
|
|
engine = get_relational_engine()
|
|
|
|
async with engine.get_async_session() as session:
|
|
# Get hash of data contents
|
|
encoded_text = data_text.encode("utf-8")
|
|
data_hash = hashlib.md5(encoded_text).hexdigest()
|
|
# Get data entry from database based on hash contents
|
|
data = (await session.scalars(select(Data).where(Data.content_hash == data_hash))).one()
|
|
assert os.path.isfile(data.raw_data_location.replace("file://", "")), (
|
|
f"Data location doesn't exist: {data.raw_data_location}"
|
|
)
|
|
# Test deletion of data along with local files created by cognee
|
|
await engine.delete_data_entity(data.id)
|
|
assert not os.path.exists(data.raw_data_location.replace("file://", "")), (
|
|
f"Data location still exists after deletion: {data.raw_data_location}"
|
|
)
|
|
|
|
async with engine.get_async_session() as session:
|
|
# Get data entry from database based on file path
|
|
data = (
|
|
await session.scalars(select(Data).where(Data.raw_data_location == file_location))
|
|
).one()
|
|
assert os.path.isfile(data.raw_data_location.replace("file://", "")), (
|
|
f"Data location doesn't exist: {data.raw_data_location}"
|
|
)
|
|
# Test local files not created by cognee won't get deleted
|
|
await engine.delete_data_entity(data.id)
|
|
assert os.path.exists(data.raw_data_location.replace("file://", "")), (
|
|
f"Data location doesn't exists: {data.raw_data_location}"
|
|
)
|
|
|
|
|
|
async def test_getting_of_documents(dataset_name_1):
|
|
# Test getting of documents for search per dataset
|
|
from cognee.modules.users.permissions.methods import get_document_ids_for_user
|
|
|
|
user = await get_default_user()
|
|
document_ids = await get_document_ids_for_user(user.id, [dataset_name_1])
|
|
assert len(document_ids) == 1, (
|
|
f"Number of expected documents doesn't match {len(document_ids)} != 1"
|
|
)
|
|
|
|
# Test getting of documents for search when no dataset is provided
|
|
user = await get_default_user()
|
|
document_ids = await get_document_ids_for_user(user.id)
|
|
assert len(document_ids) == 2, (
|
|
f"Number of expected documents doesn't match {len(document_ids)} != 2"
|
|
)
|
|
|
|
|
|
async def test_vector_engine_search_none_limit():
|
|
file_path_quantum = os.path.join(
|
|
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
|
)
|
|
|
|
file_path_nlp = os.path.join(
|
|
pathlib.Path(__file__).parent,
|
|
"test_data/Natural_language_processing.txt",
|
|
)
|
|
|
|
await cognee.prune.prune_data()
|
|
await cognee.prune.prune_system(metadata=True)
|
|
|
|
await cognee.add(file_path_quantum)
|
|
|
|
await cognee.add(file_path_nlp)
|
|
|
|
await cognee.cognify()
|
|
|
|
query_text = "Tell me about Quantum computers"
|
|
|
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
|
|
vector_engine = get_vector_engine()
|
|
|
|
collection_name = "Entity_name"
|
|
|
|
query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]
|
|
|
|
result = await vector_engine.search(
|
|
collection_name=collection_name, query_vector=query_vector, limit=None, include_payload=True
|
|
)
|
|
|
|
# Check that we did not accidentally use any default value for limit
|
|
# in vector search along the way (like 5, 10, or 15)
|
|
assert len(result) > 15
|
|
|
|
|
|
async def main():
|
|
cognee.config.set_vector_db_config(
|
|
{
|
|
"vector_db_url": "http://localhost:3002",
|
|
"vector_db_key": "test-token",
|
|
"vector_db_provider": "chromadb",
|
|
}
|
|
)
|
|
|
|
data_directory_path = str(
|
|
pathlib.Path(
|
|
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_chromadb")
|
|
).resolve()
|
|
)
|
|
cognee.config.data_root_directory(data_directory_path)
|
|
cognee_directory_path = str(
|
|
pathlib.Path(
|
|
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_chromadb")
|
|
).resolve()
|
|
)
|
|
cognee.config.system_root_directory(cognee_directory_path)
|
|
|
|
await cognee.prune.prune_data()
|
|
await cognee.prune.prune_system(metadata=True)
|
|
|
|
dataset_name_1 = "natural_language"
|
|
dataset_name_2 = "quantum"
|
|
|
|
explanation_file_path_nlp = os.path.join(
|
|
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
|
|
)
|
|
await cognee.add([explanation_file_path_nlp], dataset_name_1)
|
|
|
|
explanation_file_path_quantum = os.path.join(
|
|
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
|
)
|
|
|
|
await cognee.add([explanation_file_path_quantum], dataset_name_2)
|
|
|
|
await cognee.cognify([dataset_name_2, dataset_name_1])
|
|
|
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
|
|
await test_getting_of_documents(dataset_name_1)
|
|
|
|
vector_engine = get_vector_engine()
|
|
random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0]
|
|
random_node_name = random_node.payload["text"]
|
|
|
|
search_results = await cognee.search(
|
|
query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
|
|
)
|
|
assert len(search_results) != 0, "The search results list is empty."
|
|
print("\n\nExtracted sentences are:\n")
|
|
for result in search_results:
|
|
print(f"{result}\n")
|
|
|
|
search_results = await cognee.search(
|
|
query_type=SearchType.CHUNKS, query_text=random_node_name, datasets=[dataset_name_2]
|
|
)
|
|
assert len(search_results) != 0, "The search results list is empty."
|
|
print("\n\nExtracted chunks are:\n")
|
|
for result in search_results:
|
|
print(f"{result}\n")
|
|
|
|
graph_completion = await cognee.search(
|
|
query_type=SearchType.GRAPH_COMPLETION,
|
|
query_text=random_node_name,
|
|
datasets=[dataset_name_2],
|
|
)
|
|
assert len(graph_completion) != 0, "Completion result is empty."
|
|
print("Completion result is:")
|
|
print(graph_completion)
|
|
|
|
search_results = await cognee.search(
|
|
query_type=SearchType.SUMMARIES, query_text=random_node_name
|
|
)
|
|
assert len(search_results) != 0, "Query related summaries don't exist."
|
|
print("\n\nExtracted summaries are:\n")
|
|
for result in search_results:
|
|
print(f"{result}\n")
|
|
|
|
user = await get_default_user()
|
|
history = await get_history(user.id)
|
|
assert len(history) == 8, "Search history is not correct."
|
|
|
|
await cognee.prune.prune_data()
|
|
data_root_directory = get_storage_config()["data_root_directory"]
|
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
|
|
|
await cognee.prune.prune_system(metadata=True)
|
|
tables_in_database = await vector_engine.get_collection_names()
|
|
assert len(tables_in_database) == 0, "ChromaDB database is not empty"
|
|
|
|
await test_vector_engine_search_none_limit()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import asyncio
|
|
|
|
asyncio.run(main())
|