From bbd51e8c27c4105edefc415ae4750b9399b9ad9b Mon Sep 17 00:00:00 2001
From: vasilije <vas.markovic@gmail.com>
Date: Thu, 16 Jan 2025 21:08:54 +0100
Subject: [PATCH] Fix for now

---
 cognee-mcp/cognee_mcp/server.py               |  1 +
 cognee/shared/utils.py                        | 16 ---------
 .../documents/AudioDocument_test.py           | 18 +++++-----
 .../documents/ImageDocument_test.py           | 18 +++++-----
 .../integration/documents/PdfDocument_test.py | 18 +++++-----
 .../documents/TextDocument_test.py            | 18 +++++-----
 .../documents/UnstructuredDocument_test.py    | 30 ++++++++--------
 cognee/tests/test_deduplication.py            | 12 +++----
 cognee/tests/test_falkordb.py                 |  6 ++--
 cognee/tests/test_library.py                  |  6 ++--
 cognee/tests/test_pgvector.py                 | 36 +++++++++----------
 .../chunks/chunk_by_paragraph_2_test.py       | 18 +++++-----
 .../chunks/chunk_by_paragraph_test.py         |  6 ++--
 .../chunks/chunk_by_sentence_test.py          | 12 +++----
 .../processing/chunks/chunk_by_word_test.py   |  6 ++--
 15 files changed, 103 insertions(+), 118 deletions(-)

diff --git a/cognee-mcp/cognee_mcp/server.py b/cognee-mcp/cognee_mcp/server.py
index 37238a783..087411fa5 100644
--- a/cognee-mcp/cognee_mcp/server.py
+++ b/cognee-mcp/cognee_mcp/server.py
@@ -130,6 +130,7 @@ def get_freshest_png(directory: str) -> Image.Image:
     except (IOError, OSError) as e:
         raise IOError(f"Failed to open PNG file {freshest_path}") from e
 
+
 @server.call_tool()
 async def handle_call_tool(
     name: str, arguments: dict | None
diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py
index 944ae798e..f1eae1ace 100644
--- a/cognee/shared/utils.py
+++ b/cognee/shared/utils.py
@@ -11,8 +11,6 @@ import networkx as nx
 import pandas as pd
 import matplotlib.pyplot as plt
 import tiktoken
-import nltk
-import base64
 import time
 
 import logging
@@ -30,7 +28,6 @@ from cognee.shared.exceptions import IngestionError
 proxy_url = "https://test.prometh.ai"
 
 
-
 def get_entities(tagged_tokens):
     nltk.download("maxent_ne_chunker", quiet=True)
     from nltk.chunk import ne_chunk
@@ -271,11 +268,6 @@ async def render_graph(
 #     return df.replace([np.inf, -np.inf, np.nan], None)
 
 
-
-
-
-
-
 logging.basicConfig(level=logging.INFO)
 
 
@@ -450,14 +442,6 @@ async def create_cognee_style_network_with_logo(
     )
     p.add_tools(hover_tool)
 
-    # Get the latest Unix timestamp as an integer
-    timestamp = int(time.time())
-
-    # Construct your filename
-    filename = f"{timestamp}.png"
-
-
-
     logging.info(f"Saving visualization to {output_filename}...")
     html_content = file_html(p, CDN, title)
     with open(output_filename, "w") as f:
diff --git a/cognee/tests/integration/documents/AudioDocument_test.py b/cognee/tests/integration/documents/AudioDocument_test.py
index e07a2431b..dbd43ddda 100644
--- a/cognee/tests/integration/documents/AudioDocument_test.py
+++ b/cognee/tests/integration/documents/AudioDocument_test.py
@@ -36,12 +36,12 @@ def test_AudioDocument():
         for ground_truth, paragraph_data in zip(
             GROUND_TRUTH, document.read(chunk_size=64, chunker="text_chunker")
         ):
-            assert ground_truth["word_count"] == paragraph_data.word_count, (
-                f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
-            )
-            assert ground_truth["len_text"] == len(paragraph_data.text), (
-                f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
-            )
-            assert ground_truth["cut_type"] == paragraph_data.cut_type, (
-                f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
-            )
+            assert (
+                ground_truth["word_count"] == paragraph_data.word_count
+            ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
+            assert ground_truth["len_text"] == len(
+                paragraph_data.text
+            ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
+            assert (
+                ground_truth["cut_type"] == paragraph_data.cut_type
+            ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
diff --git a/cognee/tests/integration/documents/ImageDocument_test.py b/cognee/tests/integration/documents/ImageDocument_test.py
index b8d585419..c0877ae99 100644
--- a/cognee/tests/integration/documents/ImageDocument_test.py
+++ b/cognee/tests/integration/documents/ImageDocument_test.py
@@ -25,12 +25,12 @@ def test_ImageDocument():
         for ground_truth, paragraph_data in zip(
             GROUND_TRUTH, document.read(chunk_size=64, chunker="text_chunker")
         ):
-            assert ground_truth["word_count"] == paragraph_data.word_count, (
-                f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
-            )
-            assert ground_truth["len_text"] == len(paragraph_data.text), (
-                f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
-            )
-            assert ground_truth["cut_type"] == paragraph_data.cut_type, (
-                f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
-            )
+            assert (
+                ground_truth["word_count"] == paragraph_data.word_count
+            ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
+            assert ground_truth["len_text"] == len(
+                paragraph_data.text
+            ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
+            assert (
+                ground_truth["cut_type"] == paragraph_data.cut_type
+            ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
diff --git a/cognee/tests/integration/documents/PdfDocument_test.py b/cognee/tests/integration/documents/PdfDocument_test.py
index fc4307846..8f28815d3 100644
--- a/cognee/tests/integration/documents/PdfDocument_test.py
+++ b/cognee/tests/integration/documents/PdfDocument_test.py
@@ -27,12 +27,12 @@ def test_PdfDocument():
     for ground_truth, paragraph_data in zip(
         GROUND_TRUTH, document.read(chunk_size=1024, chunker="text_chunker")
     ):
-        assert ground_truth["word_count"] == paragraph_data.word_count, (
-            f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
-        )
-        assert ground_truth["len_text"] == len(paragraph_data.text), (
-            f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
-        )
-        assert ground_truth["cut_type"] == paragraph_data.cut_type, (
-            f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
-        )
+        assert (
+            ground_truth["word_count"] == paragraph_data.word_count
+        ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
+        assert ground_truth["len_text"] == len(
+            paragraph_data.text
+        ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
+        assert (
+            ground_truth["cut_type"] == paragraph_data.cut_type
+        ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
diff --git a/cognee/tests/integration/documents/TextDocument_test.py b/cognee/tests/integration/documents/TextDocument_test.py
index 6daec62b7..1e143d563 100644
--- a/cognee/tests/integration/documents/TextDocument_test.py
+++ b/cognee/tests/integration/documents/TextDocument_test.py
@@ -39,12 +39,12 @@ def test_TextDocument(input_file, chunk_size):
     for ground_truth, paragraph_data in zip(
         GROUND_TRUTH[input_file], document.read(chunk_size=chunk_size, chunker="text_chunker")
     ):
-        assert ground_truth["word_count"] == paragraph_data.word_count, (
-            f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
-        )
-        assert ground_truth["len_text"] == len(paragraph_data.text), (
-            f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
-        )
-        assert ground_truth["cut_type"] == paragraph_data.cut_type, (
-            f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
-        )
+        assert (
+            ground_truth["word_count"] == paragraph_data.word_count
+        ), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
+        assert ground_truth["len_text"] == len(
+            paragraph_data.text
+        ), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
+        assert (
+            ground_truth["cut_type"] == paragraph_data.cut_type
+        ), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
diff --git a/cognee/tests/integration/documents/UnstructuredDocument_test.py b/cognee/tests/integration/documents/UnstructuredDocument_test.py
index 773dc2293..e0278de81 100644
--- a/cognee/tests/integration/documents/UnstructuredDocument_test.py
+++ b/cognee/tests/integration/documents/UnstructuredDocument_test.py
@@ -71,32 +71,32 @@ def test_UnstructuredDocument():
     for paragraph_data in pptx_document.read(chunk_size=1024, chunker="text_chunker"):
         assert 19 == paragraph_data.word_count, f" 19 != {paragraph_data.word_count = }"
         assert 104 == len(paragraph_data.text), f" 104 != {len(paragraph_data.text) = }"
-        assert "sentence_cut" == paragraph_data.cut_type, (
-            f" sentence_cut != {paragraph_data.cut_type = }"
-        )
+        assert (
+            "sentence_cut" == paragraph_data.cut_type
+        ), f" sentence_cut != {paragraph_data.cut_type = }"
 
     # Test DOCX
     for paragraph_data in docx_document.read(chunk_size=1024, chunker="text_chunker"):
         assert 16 == paragraph_data.word_count, f" 16 != {paragraph_data.word_count = }"
         assert 145 == len(paragraph_data.text), f" 145 != {len(paragraph_data.text) = }"
-        assert "sentence_end" == paragraph_data.cut_type, (
-            f" sentence_end != {paragraph_data.cut_type = }"
-        )
+        assert (
+            "sentence_end" == paragraph_data.cut_type
+        ), f" sentence_end != {paragraph_data.cut_type = }"
 
     # TEST CSV
     for paragraph_data in csv_document.read(chunk_size=1024, chunker="text_chunker"):
         assert 15 == paragraph_data.word_count, f" 15 != {paragraph_data.word_count = }"
-        assert "A A A A A A A A A,A A A A A A,A A" == paragraph_data.text, (
-            f"Read text doesn't match expected text: {paragraph_data.text}"
-        )
-        assert "sentence_cut" == paragraph_data.cut_type, (
-            f" sentence_cut != {paragraph_data.cut_type = }"
-        )
+        assert (
+            "A A A A A A A A A,A A A A A A,A A" == paragraph_data.text
+        ), f"Read text doesn't match expected text: {paragraph_data.text}"
+        assert (
+            "sentence_cut" == paragraph_data.cut_type
+        ), f" sentence_cut != {paragraph_data.cut_type = }"
 
     # Test XLSX
     for paragraph_data in xlsx_document.read(chunk_size=1024, chunker="text_chunker"):
         assert 36 == paragraph_data.word_count, f" 36 != {paragraph_data.word_count = }"
         assert 171 == len(paragraph_data.text), f" 171 != {len(paragraph_data.text) = }"
-        assert "sentence_cut" == paragraph_data.cut_type, (
-            f" sentence_cut != {paragraph_data.cut_type = }"
-        )
+        assert (
+            "sentence_cut" == paragraph_data.cut_type
+        ), f" sentence_cut != {paragraph_data.cut_type = }"
diff --git a/cognee/tests/test_deduplication.py b/cognee/tests/test_deduplication.py
index 89c866f12..9c2df032d 100644
--- a/cognee/tests/test_deduplication.py
+++ b/cognee/tests/test_deduplication.py
@@ -30,9 +30,9 @@ async def test_deduplication():
 
     result = await relational_engine.get_all_data_from_table("data")
     assert len(result) == 1, "More than one data entity was found."
-    assert result[0]["name"] == "Natural_language_processing_copy", (
-        "Result name does not match expected value."
-    )
+    assert (
+        result[0]["name"] == "Natural_language_processing_copy"
+    ), "Result name does not match expected value."
 
     result = await relational_engine.get_all_data_from_table("datasets")
     assert len(result) == 2, "Unexpected number of datasets found."
@@ -61,9 +61,9 @@ async def test_deduplication():
 
     result = await relational_engine.get_all_data_from_table("data")
     assert len(result) == 1, "More than one data entity was found."
-    assert hashlib.md5(text.encode("utf-8")).hexdigest() in result[0]["name"], (
-        "Content hash is not a part of file name."
-    )
+    assert (
+        hashlib.md5(text.encode("utf-8")).hexdigest() in result[0]["name"]
+    ), "Content hash is not a part of file name."
 
     await cognee.prune.prune_data()
     await cognee.prune.prune_system(metadata=True)
diff --git a/cognee/tests/test_falkordb.py b/cognee/tests/test_falkordb.py
index af0e87916..07ece9eb2 100755
--- a/cognee/tests/test_falkordb.py
+++ b/cognee/tests/test_falkordb.py
@@ -85,9 +85,9 @@ async def main():
 
     from cognee.infrastructure.databases.relational import get_relational_engine
 
-    assert not os.path.exists(get_relational_engine().db_path), (
-        "SQLite relational database is not empty"
-    )
+    assert not os.path.exists(
+        get_relational_engine().db_path
+    ), "SQLite relational database is not empty"
 
     from cognee.infrastructure.databases.graph import get_graph_config
 
diff --git a/cognee/tests/test_library.py b/cognee/tests/test_library.py
index 192b67506..8352b4161 100755
--- a/cognee/tests/test_library.py
+++ b/cognee/tests/test_library.py
@@ -82,9 +82,9 @@ async def main():
 
     from cognee.infrastructure.databases.relational import get_relational_engine
 
-    assert not os.path.exists(get_relational_engine().db_path), (
-        "SQLite relational database is not empty"
-    )
+    assert not os.path.exists(
+        get_relational_engine().db_path
+    ), "SQLite relational database is not empty"
 
     from cognee.infrastructure.databases.graph import get_graph_config
 
diff --git a/cognee/tests/test_pgvector.py b/cognee/tests/test_pgvector.py
index 73b6be974..c241177f0 100644
--- a/cognee/tests/test_pgvector.py
+++ b/cognee/tests/test_pgvector.py
@@ -24,28 +24,28 @@ async def test_local_file_deletion(data_text, file_location):
         data_hash = hashlib.md5(encoded_text).hexdigest()
         # Get data entry from database based on hash contents
         data = (await session.scalars(select(Data).where(Data.content_hash == data_hash))).one()
-        assert os.path.isfile(data.raw_data_location), (
-            f"Data location doesn't exist: {data.raw_data_location}"
-        )
+        assert os.path.isfile(
+            data.raw_data_location
+        ), f"Data location doesn't exist: {data.raw_data_location}"
         # Test deletion of data along with local files created by cognee
         await engine.delete_data_entity(data.id)
-        assert not os.path.exists(data.raw_data_location), (
-            f"Data location still exists after deletion: {data.raw_data_location}"
-        )
+        assert not os.path.exists(
+            data.raw_data_location
+        ), f"Data location still exists after deletion: {data.raw_data_location}"
 
     async with engine.get_async_session() as session:
         # Get data entry from database based on file path
         data = (
             await session.scalars(select(Data).where(Data.raw_data_location == file_location))
         ).one()
-        assert os.path.isfile(data.raw_data_location), (
-            f"Data location doesn't exist: {data.raw_data_location}"
-        )
+        assert os.path.isfile(
+            data.raw_data_location
+        ), f"Data location doesn't exist: {data.raw_data_location}"
         # Test local files not created by cognee won't get deleted
         await engine.delete_data_entity(data.id)
-        assert os.path.exists(data.raw_data_location), (
-            f"Data location doesn't exists: {data.raw_data_location}"
-        )
+        assert os.path.exists(
+            data.raw_data_location
+        ), f"Data location doesn't exists: {data.raw_data_location}"
 
 
 async def test_getting_of_documents(dataset_name_1):
@@ -54,16 +54,16 @@ async def test_getting_of_documents(dataset_name_1):
 
     user = await get_default_user()
     document_ids = await get_document_ids_for_user(user.id, [dataset_name_1])
-    assert len(document_ids) == 1, (
-        f"Number of expected documents doesn't match {len(document_ids)} != 1"
-    )
+    assert (
+        len(document_ids) == 1
+    ), f"Number of expected documents doesn't match {len(document_ids)} != 1"
 
     # Test getting of documents for search when no dataset is provided
     user = await get_default_user()
     document_ids = await get_document_ids_for_user(user.id)
-    assert len(document_ids) == 2, (
-        f"Number of expected documents doesn't match {len(document_ids)} != 2"
-    )
+    assert (
+        len(document_ids) == 2
+    ), f"Number of expected documents doesn't match {len(document_ids)} != 2"
 
 
 async def main():
diff --git a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py
index d8680a604..53098fc67 100644
--- a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py
+++ b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py
@@ -17,9 +17,9 @@ batch_paragraphs_vals = [True, False]
 def test_chunk_by_paragraph_isomorphism(input_text, paragraph_length, batch_paragraphs):
     chunks = chunk_by_paragraph(input_text, paragraph_length, batch_paragraphs)
     reconstructed_text = "".join([chunk["text"] for chunk in chunks])
-    assert reconstructed_text == input_text, (
-        f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
-    )
+    assert (
+        reconstructed_text == input_text
+    ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
 
 
 @pytest.mark.parametrize(
@@ -36,9 +36,9 @@ def test_paragraph_chunk_length(input_text, paragraph_length, batch_paragraphs):
     chunk_lengths = np.array([len(list(chunk_by_word(chunk["text"]))) for chunk in chunks])
 
     larger_chunks = chunk_lengths[chunk_lengths > paragraph_length]
-    assert np.all(chunk_lengths <= paragraph_length), (
-        f"{paragraph_length = }: {larger_chunks} are too large"
-    )
+    assert np.all(
+        chunk_lengths <= paragraph_length
+    ), f"{paragraph_length = }: {larger_chunks} are too large"
 
 
 @pytest.mark.parametrize(
@@ -50,6 +50,6 @@ def test_chunk_by_paragraph_chunk_numbering(input_text, paragraph_length, batch_
         data=input_text, paragraph_length=paragraph_length, batch_paragraphs=batch_paragraphs
     )
     chunk_indices = np.array([chunk["chunk_index"] for chunk in chunks])
-    assert np.all(chunk_indices == np.arange(len(chunk_indices))), (
-        f"{chunk_indices = } are not monotonically increasing"
-    )
+    assert np.all(
+        chunk_indices == np.arange(len(chunk_indices))
+    ), f"{chunk_indices = } are not monotonically increasing"
diff --git a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py
index e420b2e9f..e7d9a54ba 100644
--- a/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py
+++ b/cognee/tests/unit/processing/chunks/chunk_by_paragraph_test.py
@@ -58,9 +58,9 @@ def run_chunking_test(test_text, expected_chunks):
 
     for expected_chunks_item, chunk in zip(expected_chunks, chunks):
         for key in ["text", "word_count", "cut_type"]:
-            assert chunk[key] == expected_chunks_item[key], (
-                f"{key = }: {chunk[key] = } != {expected_chunks_item[key] = }"
-            )
+            assert (
+                chunk[key] == expected_chunks_item[key]
+            ), f"{key = }: {chunk[key] = } != {expected_chunks_item[key] = }"
 
 
 def test_chunking_whole_text():
diff --git a/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py b/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py
index efa053077..d1c75d7ed 100644
--- a/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py
+++ b/cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py
@@ -16,9 +16,9 @@ maximum_length_vals = [None, 8, 64]
 def test_chunk_by_sentence_isomorphism(input_text, maximum_length):
     chunks = chunk_by_sentence(input_text, maximum_length)
     reconstructed_text = "".join([chunk[1] for chunk in chunks])
-    assert reconstructed_text == input_text, (
-        f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
-    )
+    assert (
+        reconstructed_text == input_text
+    ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
 
 
 @pytest.mark.parametrize(
@@ -36,6 +36,6 @@ def test_paragraph_chunk_length(input_text, maximum_length):
     chunk_lengths = np.array([len(list(chunk_by_word(chunk[1]))) for chunk in chunks])
 
     larger_chunks = chunk_lengths[chunk_lengths > maximum_length]
-    assert np.all(chunk_lengths <= maximum_length), (
-        f"{maximum_length = }: {larger_chunks} are too large"
-    )
+    assert np.all(
+        chunk_lengths <= maximum_length
+    ), f"{maximum_length = }: {larger_chunks} are too large"
diff --git a/cognee/tests/unit/processing/chunks/chunk_by_word_test.py b/cognee/tests/unit/processing/chunks/chunk_by_word_test.py
index d79fcdbc8..fb26638cb 100644
--- a/cognee/tests/unit/processing/chunks/chunk_by_word_test.py
+++ b/cognee/tests/unit/processing/chunks/chunk_by_word_test.py
@@ -17,9 +17,9 @@ from cognee.tests.unit.processing.chunks.test_input import INPUT_TEXTS
 def test_chunk_by_word_isomorphism(input_text):
     chunks = chunk_by_word(input_text)
     reconstructed_text = "".join([chunk[0] for chunk in chunks])
-    assert reconstructed_text == input_text, (
-        f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
-    )
+    assert (
+        reconstructed_text == input_text
+    ), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
 
 
 @pytest.mark.parametrize(