From 935763b08d644babb7d700687e704d68895a87e8 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 16 Jan 2025 17:32:44 +0100 Subject: [PATCH 1/9] fix: fixing changed lancedb search + pruning --- .../databases/vector/lancedb/LanceDBAdapter.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py index 2caa8be1e..7dc59151f 100644 --- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py @@ -152,7 +152,9 @@ class LanceDBAdapter(VectorDBInterface): connection = await self.get_connection() collection = await connection.open_table(collection_name) - results = await collection.vector_search(query_vector).to_pandas() + collection_size = await collection.count_rows() + + results = await collection.vector_search(query_vector).limit(collection_size).to_pandas() result_values = list(results.to_dict("index").values()) @@ -250,9 +252,16 @@ class LanceDBAdapter(VectorDBInterface): ) async def prune(self): - # Clean up the database if it was set up as temporary + connection = await self.get_connection() + collection_names = await connection.table_names() + + for collection_name in collection_names: + collection = await connection.open_table(collection_name) + await collection.delete("id IS NOT NULL") + await connection.drop_table(collection_name) + if self.url.startswith("/"): - LocalStorage.remove_all(self.url) # Remove the temporary directory and files inside + LocalStorage.remove_all(self.url) def get_data_point_schema(self, model_type): return copy_model( From bd6aafe9b77af930fefca4e86616e08757e4e786 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Thu, 16 Jan 2025 18:17:11 +0100 Subject: [PATCH 2/9] fix: fixes event loop handling on windows in dynamic steps example --- examples/python/dynamic_steps_example.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/python/dynamic_steps_example.py b/examples/python/dynamic_steps_example.py index 4422dd39d..75881c5be 100644 --- a/examples/python/dynamic_steps_example.py +++ b/examples/python/dynamic_steps_example.py @@ -204,4 +204,9 @@ if __name__ == "__main__": "retriever": retrieve, } - asyncio.run(main(steps_to_enable)) + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(main(steps_to_enable)) + finally: + loop.run_until_complete(loop.shutdown_asyncgens()) From 704f2c68e20bb0026a5e15878ba61288585725ca Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 17 Jan 2025 09:25:05 +0100 Subject: [PATCH 3/9] fix: fixes old 0.8.6 ruff format to 0.9.2 --- .../documents/AudioDocument_test.py | 18 +++++----- .../documents/ImageDocument_test.py | 18 +++++----- .../integration/documents/PdfDocument_test.py | 18 +++++----- .../documents/TextDocument_test.py | 18 +++++----- .../documents/UnstructuredDocument_test.py | 30 ++++++++-------- cognee/tests/test_deduplication.py | 12 +++---- cognee/tests/test_falkordb.py | 6 ++-- cognee/tests/test_library.py | 6 ++-- cognee/tests/test_pgvector.py | 36 +++++++++---------- .../chunks/chunk_by_paragraph_2_test.py | 18 +++++----- .../chunks/chunk_by_paragraph_test.py | 6 ++-- .../chunks/chunk_by_sentence_test.py | 12 +++---- .../processing/chunks/chunk_by_word_test.py | 6 ++-- 13 files changed, 102 insertions(+), 102 deletions(-) diff --git a/cognee/tests/integration/documents/AudioDocument_test.py b/cognee/tests/integration/documents/AudioDocument_test.py index dbd43ddda..e07a2431b 100644 --- a/cognee/tests/integration/documents/AudioDocument_test.py +++ b/cognee/tests/integration/documents/AudioDocument_test.py @@ -36,12 +36,12 @@ def test_AudioDocument(): for ground_truth, paragraph_data in zip( GROUND_TRUTH, document.read(chunk_size=64, chunker="text_chunker") ): - assert ( - ground_truth["word_count"] == paragraph_data.word_count - ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }' - assert ground_truth["len_text"] == len( - paragraph_data.text - ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }' - assert ( - ground_truth["cut_type"] == paragraph_data.cut_type - ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }' + assert ground_truth["word_count"] == paragraph_data.word_count, ( + f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }' + ) + assert ground_truth["len_text"] == len(paragraph_data.text), ( + f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }' + ) + assert ground_truth["cut_type"] == paragraph_data.cut_type, ( + f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }' + ) diff --git a/cognee/tests/integration/documents/ImageDocument_test.py b/cognee/tests/integration/documents/ImageDocument_test.py index c0877ae99..b8d585419 100644 --- a/cognee/tests/integration/documents/ImageDocument_test.py +++ b/cognee/tests/integration/documents/ImageDocument_test.py @@ -25,12 +25,12 @@ def test_ImageDocument(): for ground_truth, paragraph_data in zip( GROUND_TRUTH, document.read(chunk_size=64, chunker="text_chunker") ): - assert ( - ground_truth["word_count"] == paragraph_data.word_count - ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }' - assert ground_truth["len_text"] == len( - paragraph_data.text - ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }' - assert ( - ground_truth["cut_type"] == paragraph_data.cut_type - ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }' + assert ground_truth["word_count"] == paragraph_data.word_count, ( + f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }' + ) + assert ground_truth["len_text"] == len(paragraph_data.text), ( + f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }' + ) + assert ground_truth["cut_type"] == paragraph_data.cut_type, ( + f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }' + ) diff --git a/cognee/tests/integration/documents/PdfDocument_test.py b/cognee/tests/integration/documents/PdfDocument_test.py index 8f28815d3..fc4307846 100644 --- a/cognee/tests/integration/documents/PdfDocument_test.py +++ b/cognee/tests/integration/documents/PdfDocument_test.py @@ -27,12 +27,12 @@ def test_PdfDocument(): for ground_truth, paragraph_data in zip( GROUND_TRUTH, document.read(chunk_size=1024, chunker="text_chunker") ): - assert ( - ground_truth["word_count"] == paragraph_data.word_count - ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }' - assert ground_truth["len_text"] == len( - paragraph_data.text - ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }' - assert ( - ground_truth["cut_type"] == paragraph_data.cut_type - ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }' + assert ground_truth["word_count"] == paragraph_data.word_count, ( + f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }' + ) + assert ground_truth["len_text"] == len(paragraph_data.text), ( + f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }' + ) + assert ground_truth["cut_type"] == paragraph_data.cut_type, ( + f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }' + ) diff --git a/cognee/tests/integration/documents/TextDocument_test.py b/cognee/tests/integration/documents/TextDocument_test.py index 1e143d563..6daec62b7 100644 --- a/cognee/tests/integration/documents/TextDocument_test.py +++ b/cognee/tests/integration/documents/TextDocument_test.py @@ -39,12 +39,12 @@ def test_TextDocument(input_file, chunk_size): for ground_truth, paragraph_data in zip( GROUND_TRUTH[input_file], document.read(chunk_size=chunk_size, chunker="text_chunker") ): - assert ( - ground_truth["word_count"] == paragraph_data.word_count - ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }' - assert ground_truth["len_text"] == len( - paragraph_data.text - ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }' - assert ( - ground_truth["cut_type"] == paragraph_data.cut_type - ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }' + assert ground_truth["word_count"] == paragraph_data.word_count, ( + f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }' + ) + assert ground_truth["len_text"] == len(paragraph_data.text), ( + f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }' + ) + assert ground_truth["cut_type"] == paragraph_data.cut_type, ( + f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }' + ) diff --git a/cognee/tests/integration/documents/UnstructuredDocument_test.py b/cognee/tests/integration/documents/UnstructuredDocument_test.py index e0278de81..773dc2293 100644 --- a/cognee/tests/integration/documents/UnstructuredDocument_test.py +++ b/cognee/tests/integration/documents/UnstructuredDocument_test.py @@ -71,32 +71,32 @@ def test_UnstructuredDocument(): for paragraph_data in pptx_document.read(chunk_size=1024, chunker="text_chunker"): assert 19 == paragraph_data.word_count, f" 19 != {paragraph_data.word_count = }" assert 104 == len(paragraph_data.text), f" 104 != {len(paragraph_data.text) = }" - assert ( - "sentence_cut" == paragraph_data.cut_type - ), f" sentence_cut != {paragraph_data.cut_type = }" + assert "sentence_cut" == paragraph_data.cut_type, ( + f" sentence_cut != {paragraph_data.cut_type = }" + ) # Test DOCX for paragraph_data in docx_document.read(chunk_size=1024, chunker="text_chunker"): assert 16 == paragraph_data.word_count, f" 16 != {paragraph_data.word_count = }" assert 145 == len(paragraph_data.text), f" 145 != {len(paragraph_data.text) = }" - assert ( - "sentence_end" == paragraph_data.cut_type - ), f" sentence_end != {paragraph_data.cut_type = }" + assert "sentence_end" == paragraph_data.cut_type, ( + f" sentence_end != {paragraph_data.cut_type = }" + ) # TEST CSV for paragraph_data in csv_document.read(chunk_size=1024, chunker="text_chunker"): assert 15 == paragraph_data.word_count, f" 15 != {paragraph_data.word_count = }" - assert ( - "A A A A A A A A A,A A A A A A,A A" == paragraph_data.text - ), f"Read text doesn't match expected text: {paragraph_data.text}" - assert ( - "sentence_cut" == paragraph_data.cut_type - ), f" sentence_cut != {paragraph_data.cut_type = }" + assert "A A A A A A A A A,A A A A A A,A A" == paragraph_data.text, ( + f"Read text doesn't match expected text: {paragraph_data.text}" + ) + assert "sentence_cut" == paragraph_data.cut_type, ( + f" sentence_cut != {paragraph_data.cut_type = }" + ) # Test XLSX for paragraph_data in xlsx_document.read(chunk_size=1024, chunker="text_chunker"): assert 36 == paragraph_data.word_count, f" 36 != {paragraph_data.word_count = }" assert 171 == len(paragraph_data.text), f" 171 != {len(paragraph_data.text) = }" - assert ( - "sentence_cut" == paragraph_data.cut_type - ), f" sentence_cut != {paragraph_data.cut_type = }" + assert "sentence_cut" == paragraph_data.cut_type, ( + f" sentence_cut != {paragraph_data.cut_type = }" + ) diff --git a/cognee/tests/test_deduplication.py b/cognee/tests/test_deduplication.py index 9c2df032d..89c866f12 100644 --- a/cognee/tests/test_deduplication.py +++ b/cognee/tests/test_deduplication.py @@ -30,9 +30,9 @@ async def test_deduplication(): result = await relational_engine.get_all_data_from_table("data") assert len(result) == 1, "More than one data entity was found." - assert ( - result[0]["name"] == "Natural_language_processing_copy" - ), "Result name does not match expected value." + assert result[0]["name"] == "Natural_language_processing_copy", ( + "Result name does not match expected value." + ) result = await relational_engine.get_all_data_from_table("datasets") assert len(result) == 2, "Unexpected number of datasets found." @@ -61,9 +61,9 @@ async def test_deduplication(): result = await relational_engine.get_all_data_from_table("data") assert len(result) == 1, "More than one data entity was found." - assert ( - hashlib.md5(text.encode("utf-8")).hexdigest() in result[0]["name"] - ), "Content hash is not a part of file name." + assert hashlib.md5(text.encode("utf-8")).hexdigest() in result[0]["name"], ( + "Content hash is not a part of file name." + ) await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) diff --git a/cognee/tests/test_falkordb.py b/cognee/tests/test_falkordb.py index 07ece9eb2..af0e87916 100755 --- a/cognee/tests/test_falkordb.py +++ b/cognee/tests/test_falkordb.py @@ -85,9 +85,9 @@ async def main(): from cognee.infrastructure.databases.relational import get_relational_engine - assert not os.path.exists( - get_relational_engine().db_path - ), "SQLite relational database is not empty" + assert not os.path.exists(get_relational_engine().db_path), ( + "SQLite relational database is not empty" + ) from cognee.infrastructure.databases.graph import get_graph_config diff --git a/cognee/tests/test_library.py b/cognee/tests/test_library.py index 8352b4161..192b67506 100755 --- a/cognee/tests/test_library.py +++ b/cognee/tests/test_library.py @@ -82,9 +82,9 @@ async def main(): from cognee.infrastructure.databases.relational import get_relational_engine - assert not os.path.exists( - get_relational_engine().db_path - ), "SQLite relational database is not empty" + assert not os.path.exists(get_relational_engine().db_path), ( + "SQLite relational database is not empty" + ) from cognee.infrastructure.databases.graph import get_graph_config diff --git a/cognee/tests/test_pgvector.py b/cognee/tests/test_pgvector.py index c241177f0..73b6be974 100644 --- a/cognee/tests/test_pgvector.py +++ b/cognee/tests/test_pgvector.py @@ -24,28 +24,28 @@ async def test_local_file_deletion(data_text, file_location): data_hash = hashlib.md5(encoded_text).hexdigest() # Get data entry from database based on hash contents data = (await session.scalars(select(Data).where(Data.content_hash == data_hash))).one() - assert os.path.isfile( - data.raw_data_location - ), f"Data location doesn't exist: {data.raw_data_location}" + assert os.path.isfile(data.raw_data_location), ( + f"Data location doesn't exist: {data.raw_data_location}" + ) # Test deletion of data along with local files created by cognee await engine.delete_data_entity(data.id) - assert not os.path.exists( - data.raw_data_location - ), f"Data location still exists after deletion: {data.raw_data_location}" + assert not os.path.exists(data.raw_data_location), ( + f"Data location still exists after deletion: {data.raw_data_location}" + ) async with engine.get_async_session() as session: # Get data entry from database based on file path data = ( await session.scalars(select(Data).where(Data.raw_data_location == file_location)) ).one() - assert os.path.isfile( - data.raw_data_location - ), f"Data location doesn't exist: {data.raw_data_location}" + assert os.path.isfile(data.raw_data_location), ( + f"Data location doesn't exist: {data.raw_data_location}" + ) # Test local files not created by cognee won't get deleted await engine.delete_data_entity(data.id) - assert os.path.exists( - data.raw_data_location - ), f"Data location doesn't exists: {data.raw_data_location}" + assert os.path.exists(data.raw_data_location), ( + f"Data location doesn't exists: {data.raw_data_location}" + ) async def test_getting_of_documents(dataset_name_1): @@ -54,16 +54,16 @@ async def test_getting_of_documents(dataset_name_1): user = await get_default_user() document_ids = await get_document_ids_for_user(user.id, [dataset_name_1]) - assert ( - len(document_ids) == 1 - ), f"Number of expected documents doesn't match {len(document_ids)} != 1" + assert len(document_ids) == 1, ( + f"Number of expected documents doesn't match {len(document_ids)} != 1" + ) # Test getting of documents for search when no dataset is provided user = await get_default_user() document_ids = await get_document_ids_for_user(user.id) - assert ( - len(document_ids) == 2 - ), f"Number of expected documents doesn't match {len(document_ids)} != 2" + assert len(document_ids) == 2, ( + f"Number of expected documents doesn't match {len(document_ids)} != 2" + ) async def main(): diff --git a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py index 53098fc67..d8680a604 100644 --- a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py +++ b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py @@ -17,9 +17,9 @@ batch_paragraphs_vals = [True, False] def test_chunk_by_paragraph_isomorphism(input_text, paragraph_length, batch_paragraphs): chunks = chunk_by_paragraph(input_text, paragraph_length, batch_paragraphs) reconstructed_text = "".join([chunk["text"] for chunk in chunks]) - assert ( - reconstructed_text == input_text - ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }" + assert reconstructed_text == input_text, ( + f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }" + ) @pytest.mark.parametrize( @@ -36,9 +36,9 @@ def test_paragraph_chunk_length(input_text, paragraph_length, batch_paragraphs): chunk_lengths = np.array([len(list(chunk_by_word(chunk["text"]))) for chunk in chunks]) larger_chunks = chunk_lengths[chunk_lengths > paragraph_length] - assert np.all( - chunk_lengths <= paragraph_length - ), f"{paragraph_length = }: {larger_chunks} are too large" + assert np.all(chunk_lengths <= paragraph_length), ( + f"{paragraph_length = }: {larger_chunks} are too large" + ) @pytest.mark.parametrize( @@ -50,6 +50,6 @@ def test_chunk_by_paragraph_chunk_numbering(input_text, paragraph_length, batch_ data=input_text, paragraph_length=paragraph_length, batch_paragraphs=batch_paragraphs ) chunk_indices = np.array([chunk["chunk_index"] for chunk in chunks]) - assert np.all( - chunk_indices == np.arange(len(chunk_indices)) - ), f"{chunk_indices = } are not monotonically increasing" + assert np.all(chunk_indices == np.arange(len(chunk_indices))), ( + f"{chunk_indices = } are not monotonically increasing" + ) diff --git a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py index e7d9a54ba..e420b2e9f 100644 --- a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py +++ b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py @@ -58,9 +58,9 @@ def run_chunking_test(test_text, expected_chunks): for expected_chunks_item, chunk in zip(expected_chunks, chunks): for key in ["text", "word_count", "cut_type"]: - assert ( - chunk[key] == expected_chunks_item[key] - ), f"{key = }: {chunk[key] = } != {expected_chunks_item[key] = }" + assert chunk[key] == expected_chunks_item[key], ( + f"{key = }: {chunk[key] = } != {expected_chunks_item[key] = }" + ) def test_chunking_whole_text(): diff --git a/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py b/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py index d1c75d7ed..efa053077 100644 --- a/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py +++ b/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py @@ -16,9 +16,9 @@ maximum_length_vals = [None, 8, 64] def test_chunk_by_sentence_isomorphism(input_text, maximum_length): chunks = chunk_by_sentence(input_text, maximum_length) reconstructed_text = "".join([chunk[1] for chunk in chunks]) - assert ( - reconstructed_text == input_text - ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }" + assert reconstructed_text == input_text, ( + f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }" + ) @pytest.mark.parametrize( @@ -36,6 +36,6 @@ def test_paragraph_chunk_length(input_text, maximum_length): chunk_lengths = np.array([len(list(chunk_by_word(chunk[1]))) for chunk in chunks]) larger_chunks = chunk_lengths[chunk_lengths > maximum_length] - assert np.all( - chunk_lengths <= maximum_length - ), f"{maximum_length = }: {larger_chunks} are too large" + assert np.all(chunk_lengths <= maximum_length), ( + f"{maximum_length = }: {larger_chunks} are too large" + ) diff --git a/cognee/tests/unit/processing/chunks/chunk_by_word_test.py b/cognee/tests/unit/processing/chunks/chunk_by_word_test.py index fb26638cb..d79fcdbc8 100644 --- a/cognee/tests/unit/processing/chunks/chunk_by_word_test.py +++ b/cognee/tests/unit/processing/chunks/chunk_by_word_test.py @@ -17,9 +17,9 @@ from cognee.tests.unit.processing.chunks.test_input import INPUT_TEXTS def test_chunk_by_word_isomorphism(input_text): chunks = chunk_by_word(input_text) reconstructed_text = "".join([chunk[0] for chunk in chunks]) - assert ( - reconstructed_text == input_text - ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }" + assert reconstructed_text == input_text, ( + f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }" + ) @pytest.mark.parametrize( From 981f35c1e00afb2b8457f9108dc8e99120303c75 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 17 Jan 2025 09:28:10 +0100 Subject: [PATCH 4/9] fix: fixes windows compatibility in examples --- examples/python/graphiti_example.py | 7 ++++++- examples/python/multimedia_example.py | 10 +++++++++- examples/python/simple_example.py | 10 +++++++++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/examples/python/graphiti_example.py b/examples/python/graphiti_example.py index 8a5c8e5f0..2a79110ea 100644 --- a/examples/python/graphiti_example.py +++ b/examples/python/graphiti_example.py @@ -69,4 +69,9 @@ async def main(): if __name__ == "__main__": setup_logging(logging.ERROR) - asyncio.run(main()) + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(main()) + finally: + loop.run_until_complete(loop.shutdown_asyncgens()) diff --git a/examples/python/multimedia_example.py b/examples/python/multimedia_example.py index 6c8bc5995..7355dd09f 100644 --- a/examples/python/multimedia_example.py +++ b/examples/python/multimedia_example.py @@ -1,9 +1,11 @@ import os import asyncio import pathlib +import logging import cognee from cognee.api.v1.search import SearchType +from cognee.shared.utils import setup_logging # Prerequisites: # 1. Copy `.env.template` and rename it to `.env`. @@ -45,4 +47,10 @@ async def main(): if __name__ == "__main__": - asyncio.run(main()) + setup_logging(logging.ERROR) + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(main) + finally: + loop.run_until_complete(loop.shutdown_asyncgens()) diff --git a/examples/python/simple_example.py b/examples/python/simple_example.py index bf3c95de1..27441bb45 100644 --- a/examples/python/simple_example.py +++ b/examples/python/simple_example.py @@ -1,6 +1,8 @@ import asyncio import cognee +import logging from cognee.api.v1.search import SearchType +from cognee.shared.utils import setup_logging # Prerequisites: # 1. Copy `.env.template` and rename it to `.env`. @@ -66,4 +68,10 @@ async def main(): if __name__ == "__main__": - asyncio.run(main()) + setup_logging(logging.ERROR) + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(main()) + finally: + loop.run_until_complete(loop.shutdown_asyncgens()) From 08c22a542aad999cf3a22a58dcb4a9c392e81f74 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 17 Jan 2025 09:31:48 +0100 Subject: [PATCH 5/9] fix: fixes typo in multimedia example --- examples/python/multimedia_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python/multimedia_example.py b/examples/python/multimedia_example.py index 7355dd09f..5b7a581fc 100644 --- a/examples/python/multimedia_example.py +++ b/examples/python/multimedia_example.py @@ -51,6 +51,6 @@ if __name__ == "__main__": loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: - loop.run_until_complete(main) + loop.run_until_complete(main()) finally: loop.run_until_complete(loop.shutdown_asyncgens()) From 4ea01b9d304fc23400d233a7bd96327989db855b Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 17 Jan 2025 09:52:05 +0100 Subject: [PATCH 6/9] fix: fixes cognee backend on windows --- entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/entrypoint.sh b/entrypoint.sh index ea9da9dd6..ec32ba08d 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -30,4 +30,4 @@ if [ "$ENVIRONMENT" = "dev" ]; then else gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level error cognee.api.client:app # python ./cognee/api/client.py -fi +fi \ No newline at end of file From 0b56e4b6880634207ccc4bcb48017a4eccdb3f95 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 17 Jan 2025 11:22:34 +0100 Subject: [PATCH 7/9] feat: Adds OS information to README --- README.md | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 8ff2d71d5..2c48fcdc2 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ import os os.environ["LLM_API_KEY"] = "YOUR OPENAI_API_KEY" ``` -or +or ``` import cognee cognee.config.set_llm_api_key("YOUR_OPENAI_API_KEY") @@ -115,7 +115,7 @@ DB_PORT=5432 DB_NAME=cognee_db DB_USERNAME=cognee DB_PASSWORD=cognee -``` +``` ### Simple example @@ -140,14 +140,14 @@ async def main(): Natural language processing (NLP) is an interdisciplinary subfield of computer science and information retrieval. """ - + print("Adding text to cognee:") - print(text.strip()) + print(text.strip()) # Add the text, and make it available for cognify await cognee.add(text) print("Text added successfully.\n") - + print("Running cognify to create knowledge graph...\n") print("Cognify process steps:") print("1. Classifying the document: Determining the type and category of the input text.") @@ -156,19 +156,19 @@ async def main(): print("4. Adding data points: Storing the extracted chunks for processing.") print("5. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph.") print("6. Summarizing text: Creating concise summaries of the content for quick insights.\n") - + # Use LLMs and cognee to create knowledge graph await cognee.cognify() print("Cognify process complete.\n") - + query_text = 'Tell me about NLP' print(f"Searching cognee for insights with query: '{query_text}'") # Query cognee for insights on the added text search_results = await cognee.search( SearchType.INSIGHTS, query_text=query_text ) - + print("Search results:") # Display results for result_text in search_results: @@ -212,7 +212,7 @@ Cognee supports a variety of tools and services for different operations: - **Language Models (LLMs)**: You can use either Anyscale or Ollama as your LLM provider. - **Graph Stores**: In addition to NetworkX, Neo4j is also supported for graph storage. - + - **User management**: Create individual user graphs and manage permissions ## Demo @@ -258,13 +258,13 @@ pip install cognee -| Name | Type | Current state | Known Issues | -|----------|--------------------|-------------------|--------------| -| Qdrant | Vector | Stable ✅ | | -| Weaviate | Vector | Stable ✅ | | -| LanceDB | Vector | Stable ✅ | | -| Neo4j | Graph | Stable ✅ | | -| NetworkX | Graph | Stable ✅ | | -| FalkorDB | Vector/Graph | Unstable ❌ | | -| PGVector | Vector | Stable ✅ | | -| Milvus | Vector | Stable ✅ | | +| Name | Type | Current state (Mac/Linux) | Known Issues | Current state (Win) | Known Issues | +|----------|--------------------|---------------------------|--------------|---------------------|--------------| +| Qdrant | Vector | Stable ✅ | | Untested ⏳ | | +| Weaviate | Vector | Stable ✅ | | Untested ⏳ | | +| LanceDB | Vector | Stable ✅ | | Stable ✅ | | +| Neo4j | Graph | Stable ✅ | | Stable ✅ | | +| NetworkX | Graph | Stable ✅ | | Stable ✅ | | +| FalkorDB | Vector/Graph | Unstable ❌ | | Unstable ❌ | | +| PGVector | Vector | Stable ✅ | | Unstable ❌ | | +| Milvus | Vector | Stable ✅ | | Untested ⏳ | | From 6f5d2bad4712e58e079b7a3faee7fe29192831e5 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 17 Jan 2025 11:29:51 +0100 Subject: [PATCH 8/9] Fix: Updates README --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2c48fcdc2..5290ebe5c 100644 --- a/README.md +++ b/README.md @@ -260,11 +260,11 @@ pip install cognee | Name | Type | Current state (Mac/Linux) | Known Issues | Current state (Win) | Known Issues | |----------|--------------------|---------------------------|--------------|---------------------|--------------| -| Qdrant | Vector | Stable ✅ | | Untested ⏳ | | -| Weaviate | Vector | Stable ✅ | | Untested ⏳ | | +| Qdrant | Vector | Stable ✅ | | Unstable ❌ | | +| Weaviate | Vector | Stable ✅ | | Unstable ❌ | | | LanceDB | Vector | Stable ✅ | | Stable ✅ | | | Neo4j | Graph | Stable ✅ | | Stable ✅ | | | NetworkX | Graph | Stable ✅ | | Stable ✅ | | -| FalkorDB | Vector/Graph | Unstable ❌ | | Unstable ❌ | | +| FalkorDB | Vector/Graph | Stable ✅ | | Unstable ❌ | | | PGVector | Vector | Stable ✅ | | Unstable ❌ | | -| Milvus | Vector | Stable ✅ | | Untested ⏳ | | +| Milvus | Vector | Stable ✅ | | Unstable ❌ | | From b0634da43e121de40d26f2b61b47b6a8d4f71ac0 Mon Sep 17 00:00:00 2001 From: hajdul88 <52442977+hajdul88@users.noreply.github.com> Date: Fri, 17 Jan 2025 11:30:45 +0100 Subject: [PATCH 9/9] fix: fixes typo in README --- README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 5290ebe5c..a14ddebc2 100644 --- a/README.md +++ b/README.md @@ -258,13 +258,13 @@ pip install cognee -| Name | Type | Current state (Mac/Linux) | Known Issues | Current state (Win) | Known Issues | -|----------|--------------------|---------------------------|--------------|---------------------|--------------| -| Qdrant | Vector | Stable ✅ | | Unstable ❌ | | -| Weaviate | Vector | Stable ✅ | | Unstable ❌ | | -| LanceDB | Vector | Stable ✅ | | Stable ✅ | | -| Neo4j | Graph | Stable ✅ | | Stable ✅ | | -| NetworkX | Graph | Stable ✅ | | Stable ✅ | | -| FalkorDB | Vector/Graph | Stable ✅ | | Unstable ❌ | | -| PGVector | Vector | Stable ✅ | | Unstable ❌ | | -| Milvus | Vector | Stable ✅ | | Unstable ❌ | | +| Name | Type | Current state (Mac/Linux) | Known Issues | Current state (Windows) | Known Issues | +|----------|--------------------|---------------------------|--------------|-------------------------|--------------| +| Qdrant | Vector | Stable ✅ | | Unstable ❌ | | +| Weaviate | Vector | Stable ✅ | | Unstable ❌ | | +| LanceDB | Vector | Stable ✅ | | Stable ✅ | | +| Neo4j | Graph | Stable ✅ | | Stable ✅ | | +| NetworkX | Graph | Stable ✅ | | Stable ✅ | | +| FalkorDB | Vector/Graph | Stable ✅ | | Unstable ❌ | | +| PGVector | Vector | Stable ✅ | | Unstable ❌ | | +| Milvus | Vector | Stable ✅ | | Unstable ❌ | |