From 76a0aa7e8b9d698b744c845db9870a7d77ea4350 Mon Sep 17 00:00:00 2001 From: vasilije Date: Sun, 5 Jan 2025 19:48:35 +0100 Subject: [PATCH] Fix linter issues --- alembic/env.py | 3 +- cognee/api/client.py | 1 - cognee/api/v1/cognify/cognify_v2.py | 2 +- .../datasets/routers/get_datasets_router.py | 3 +- cognee/api/v1/search/search.legacy.py | 2 +- cognee/api/v1/search/search_v2.py | 2 +- .../databases/graph/networkx/adapter.py | 6 ++- .../hybrid/falkordb/FalkorDBAdapter.py | 4 +- .../databases/vector/milvus/MilvusAdapter.py | 2 +- .../databases/vector/pinecone/adapter.py | 16 ++++---- cognee/infrastructure/llm/openai/adapter.py | 1 - .../add_model_class_to_graph.py | 2 +- cognee/modules/storage/utils/__init__.py | 4 +- cognee/modules/users/get_user_db.py | 2 - cognee/modules/users/get_user_manager.py | 4 +- cognee/modules/users/models/User.py | 2 +- cognee/shared/utils.py | 2 - cognee/tasks/ingestion/transform_data.py | 4 +- .../processing/chunks/chunk_by_word_test.py | 4 +- .../tests/unit/processing/utils/utils_test.py | 41 ++++++++----------- evals/generate_test_set.py | 9 ++-- evals/simple_rag_vs_cognee_eval.py | 7 +--- .../benchmark_function.py | 5 --- pyproject.toml | 7 +++- 24 files changed, 57 insertions(+), 78 deletions(-) diff --git a/alembic/env.py b/alembic/env.py index caeae0fdc..60af9c0dd 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -6,6 +6,7 @@ from sqlalchemy.engine import Connection from sqlalchemy.ext.asyncio import async_engine_from_config from cognee.infrastructure.databases.relational import Base from alembic import context +from cognee.infrastructure.databases.relational import get_relational_engine, get_relational_config # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -83,8 +84,6 @@ def run_migrations_online() -> None: asyncio.run(run_async_migrations()) -from cognee.infrastructure.databases.relational import get_relational_engine, get_relational_config - db_engine = get_relational_engine() if db_engine.engine.dialect.name == "sqlite": diff --git a/cognee/api/client.py b/cognee/api/client.py index 02ab94cb0..c39eef121 100644 --- a/cognee/api/client.py +++ b/cognee/api/client.py @@ -74,7 +74,6 @@ app.add_middleware( ) - @app.exception_handler(RequestValidationError) async def request_validation_exception_handler(request: Request, exc: RequestValidationError): if request.url.path == "/api/v1/auth/login": diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index f93cf1912..2071c7645 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -46,7 +46,7 @@ async def cognify( # If no datasets are provided, cognify all existing datasets. datasets = existing_datasets - if type(datasets[0]) == str: + if isinstance(datasets[0], str): datasets = await get_datasets_by_name(datasets, user.id) existing_datasets_map = { diff --git a/cognee/api/v1/datasets/routers/get_datasets_router.py b/cognee/api/v1/datasets/routers/get_datasets_router.py index 210ef7b77..ba65cd94c 100644 --- a/cognee/api/v1/datasets/routers/get_datasets_router.py +++ b/cognee/api/v1/datasets/routers/get_datasets_router.py @@ -107,7 +107,8 @@ def get_datasets_router() -> APIRouter: status_code=200, content=str(graph_url), ) - except: + except Exception as error: + print(error) return JSONResponse( status_code=409, content="Graphistry credentials are not set. Please set them in your .env file.", diff --git a/cognee/api/v1/search/search.legacy.py b/cognee/api/v1/search/search.legacy.py index af2a4cc61..c4e490f01 100644 --- a/cognee/api/v1/search/search.legacy.py +++ b/cognee/api/v1/search/search.legacy.py @@ -64,7 +64,7 @@ async def search(search_type: str, params: Dict[str, Any], user: User = None) -> for search_result in search_results: document_id = search_result["document_id"] if "document_id" in search_result else None - document_id = UUID(document_id) if type(document_id) == str else document_id + document_id = UUID(document_id) if isinstance(document_id, str) else document_id if document_id is None or document_id in own_document_ids: filtered_search_results.append(search_result) diff --git a/cognee/api/v1/search/search_v2.py b/cognee/api/v1/search/search_v2.py index 19ccc11b1..52fc565c7 100644 --- a/cognee/api/v1/search/search_v2.py +++ b/cognee/api/v1/search/search_v2.py @@ -49,7 +49,7 @@ async def search( for search_result in search_results: document_id = search_result["document_id"] if "document_id" in search_result else None - document_id = UUID(document_id) if type(document_id) == str else document_id + document_id = UUID(document_id) if isinstance(document_id, str) else document_id if document_id is None or document_id in own_document_ids: filtered_search_results.append(search_result) diff --git a/cognee/infrastructure/databases/graph/networkx/adapter.py b/cognee/infrastructure/databases/graph/networkx/adapter.py index c84de7612..75969c798 100644 --- a/cognee/infrastructure/databases/graph/networkx/adapter.py +++ b/cognee/infrastructure/databases/graph/networkx/adapter.py @@ -266,7 +266,8 @@ class NetworkXAdapter(GraphDBInterface): for node in graph_data["nodes"]: try: node["id"] = UUID(node["id"]) - except: + except Exception as e: + print(e) pass if "updated_at" in node: node["updated_at"] = datetime.strptime( @@ -282,7 +283,8 @@ class NetworkXAdapter(GraphDBInterface): edge["target"] = target_id edge["source_node_id"] = source_id edge["target_node_id"] = target_id - except: + except Exception as e: + print(e) pass if "updated_at" in edge: diff --git a/cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py b/cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py index 871bae4f0..77bfd74e6 100644 --- a/cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +++ b/cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py @@ -4,6 +4,7 @@ import asyncio import json from textwrap import dedent from uuid import UUID +from webbrowser import Error from falkordb import FalkorDB @@ -167,7 +168,8 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface): for index in indices.result_set ] ) - except: + except Error as e: + print(e) return False async def index_data_points( diff --git a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py index ed584fdd4..aecd4650d 100644 --- a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py +++ b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py @@ -32,7 +32,7 @@ class MilvusAdapter(VectorDBInterface): self.embedding_engine = embedding_engine - def get_milvus_client(self) -> "MilvusClient": + def get_milvus_client(self): from pymilvus import MilvusClient if self.api_key: diff --git a/cognee/infrastructure/databases/vector/pinecone/adapter.py b/cognee/infrastructure/databases/vector/pinecone/adapter.py index 2a8e628f9..00dd7daf1 100644 --- a/cognee/infrastructure/databases/vector/pinecone/adapter.py +++ b/cognee/infrastructure/databases/vector/pinecone/adapter.py @@ -1,8 +1,8 @@ -class PineconeVectorDB(VectorDB): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.init_pinecone(self.index_name) - - def init_pinecone(self, index_name): - # Pinecone initialization logic - pass +# class PineconeVectorDB(VectorDB): +# def __init__(self, *args, **kwargs): +# super().__init__(*args, **kwargs) +# self.init_pinecone(self.index_name) +# +# def init_pinecone(self, index_name): +# # Pinecone initialization logic +# pass diff --git a/cognee/infrastructure/llm/openai/adapter.py b/cognee/infrastructure/llm/openai/adapter.py index c452924c9..d45662380 100644 --- a/cognee/infrastructure/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/openai/adapter.py @@ -43,7 +43,6 @@ class OpenAIAdapter(LLMInterface): self.api_version = api_version self.streaming = streaming - @observe(as_type="generation") async def acreate_structured_output( self, text_input: str, system_prompt: str, response_model: Type[BaseModel] diff --git a/cognee/modules/data/extraction/knowledge_graph/add_model_class_to_graph.py b/cognee/modules/data/extraction/knowledge_graph/add_model_class_to_graph.py index 2522015a8..9da226541 100644 --- a/cognee/modules/data/extraction/knowledge_graph/add_model_class_to_graph.py +++ b/cognee/modules/data/extraction/knowledge_graph/add_model_class_to_graph.py @@ -37,7 +37,7 @@ async def add_model_class_to_graph( if hasattr(field_type, "model_fields"): # Check if field type is a Pydantic model await add_model_class_to_graph(field_type, graph, model_name, field_name) - elif get_origin(field.annotation) == list: + elif isinstance(get_origin(field.annotation), list): list_types = get_args(field_type) for item_type in list_types: await add_model_class_to_graph(item_type, graph, model_name, field_name) diff --git a/cognee/modules/storage/utils/__init__.py b/cognee/modules/storage/utils/__init__.py index 132e509f6..a877826db 100644 --- a/cognee/modules/storage/utils/__init__.py +++ b/cognee/modules/storage/utils/__init__.py @@ -2,6 +2,7 @@ import json from uuid import UUID from datetime import datetime from pydantic_core import PydanticUndefined +from pydantic import create_model from cognee.infrastructure.engine import DataPoint @@ -16,9 +17,6 @@ class JSONEncoder(json.JSONEncoder): return json.JSONEncoder.default(self, obj) -from pydantic import create_model - - def copy_model(model: DataPoint, include_fields: dict = {}, exclude_fields: list = []): fields = { name: (field.annotation, field.default if field.default is not None else PydanticUndefined) diff --git a/cognee/modules/users/get_user_db.py b/cognee/modules/users/get_user_db.py index 9eae0503e..80a6f7ba3 100644 --- a/cognee/modules/users/get_user_db.py +++ b/cognee/modules/users/get_user_db.py @@ -17,6 +17,4 @@ async def get_user_db(session: AsyncSession = Depends(get_async_session)): yield SQLAlchemyUserDatabase(session, User) - - get_user_db_context = asynccontextmanager(get_user_db) diff --git a/cognee/modules/users/get_user_manager.py b/cognee/modules/users/get_user_manager.py index 9dfa0a3e7..a1bd9e174 100644 --- a/cognee/modules/users/get_user_manager.py +++ b/cognee/modules/users/get_user_manager.py @@ -4,7 +4,7 @@ from typing import Optional from fastapi import Depends, Request from fastapi_users import BaseUserManager, UUIDIDMixin, models from fastapi_users.db import SQLAlchemyUserDatabase - +from contextlib import asynccontextmanager from .get_user_db import get_user_db from .models import User from .methods import get_user @@ -50,6 +50,4 @@ async def get_user_manager(user_db: SQLAlchemyUserDatabase = Depends(get_user_db yield UserManager(user_db) -from contextlib import asynccontextmanager - get_user_manager_context = asynccontextmanager(get_user_manager) diff --git a/cognee/modules/users/models/User.py b/cognee/modules/users/models/User.py index f5af05662..0cbe75037 100644 --- a/cognee/modules/users/models/User.py +++ b/cognee/modules/users/models/User.py @@ -5,6 +5,7 @@ from fastapi_users.db import SQLAlchemyBaseUserTableUUID from .Principal import Principal from .UserGroup import UserGroup from .Group import Group +from fastapi_users import schemas class User(SQLAlchemyBaseUserTableUUID, Principal): @@ -23,7 +24,6 @@ class User(SQLAlchemyBaseUserTableUUID, Principal): # Keep these schemas in sync with User model -from fastapi_users import schemas class UserRead(schemas.BaseUser[uuid_UUID]): diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py index f257d450c..5b2931ed8 100644 --- a/cognee/shared/utils.py +++ b/cognee/shared/utils.py @@ -274,8 +274,6 @@ def extract_pos_tags(sentence): return pos_tags - - logging.basicConfig(level=logging.INFO) diff --git a/cognee/tasks/ingestion/transform_data.py b/cognee/tasks/ingestion/transform_data.py index 004b5dd95..cc75c7a65 100644 --- a/cognee/tasks/ingestion/transform_data.py +++ b/cognee/tasks/ingestion/transform_data.py @@ -6,13 +6,13 @@ from typing import Union def get_data_from_llama_index(data_point: Union[Document, ImageDocument], dataset_name: str) -> str: # Specific type checking is used to ensure it's not a child class from Document - if type(data_point) == Document: + if isinstance(data_point, Document) and type(data_point) is Document: file_path = data_point.metadata.get("file_path") if file_path is None: file_path = save_data_to_file(data_point.text) return file_path return file_path - elif type(data_point) == ImageDocument: + elif isinstance(data_point, ImageDocument) and type(data_point) is ImageDocument: if data_point.image_path is None: file_path = save_data_to_file(data_point.text) return file_path diff --git a/cognee/tests/unit/processing/chunks/chunk_by_word_test.py b/cognee/tests/unit/processing/chunks/chunk_by_word_test.py index 42523c106..fb26638cb 100644 --- a/cognee/tests/unit/processing/chunks/chunk_by_word_test.py +++ b/cognee/tests/unit/processing/chunks/chunk_by_word_test.py @@ -35,6 +35,4 @@ def test_chunk_by_word_splits(input_text): chunks = np.array(list(chunk_by_word(input_text))) space_test = np.array([" " not in chunk[0].strip() for chunk in chunks]) - assert np.all( - space_test - ), f"These chunks contain spaces within them: {chunks[space_test == False]}" + assert np.all(space_test), f"These chunks contain spaces within them: {chunks[~space_test]}" diff --git a/cognee/tests/unit/processing/utils/utils_test.py b/cognee/tests/unit/processing/utils/utils_test.py index 3abd46f38..c6b808bae 100644 --- a/cognee/tests/unit/processing/utils/utils_test.py +++ b/cognee/tests/unit/processing/utils/utils_test.py @@ -102,7 +102,6 @@ def test_prepare_nodes(): assert len(nodes_df) == 1 - def test_create_cognee_style_network_with_logo(): import networkx as nx from unittest.mock import patch @@ -117,29 +116,23 @@ def test_create_cognee_style_network_with_logo(): # Convert the graph to a tuple format for serialization graph_tuple = graph_to_tuple(graph) - print(graph_tuple) + original_open = open - # Define the output filename - output_filename = "test_network.html" + def mock_open_read_side_effect(*args, **kwargs): + if "cognee-logo.png" in args[0]: + return BytesIO(b"mock_png_data") + return original_open(*args, **kwargs) - with patch("bokeh.plotting.from_networkx") as mock_from_networkx: - original_open = open + with patch("builtins.open", side_effect=mock_open_read_side_effect): + result = create_cognee_style_network_with_logo( + graph_tuple, + title="Test Network", + node_attribute="group", + layout_func=nx.spring_layout, + layout_scale=3.0, + logo_alpha=0.5, + ) - def mock_open_read_side_effect(*args, **kwargs): - if "cognee-logo.png" in args[0]: - return BytesIO(b"mock_png_data") - return original_open(*args, **kwargs) - - with patch("builtins.open", side_effect=mock_open_read_side_effect): - result = create_cognee_style_network_with_logo( - graph_tuple, - title="Test Network", - node_attribute="group", - layout_func=nx.spring_layout, - layout_scale=3.0, - logo_alpha=0.5, - ) - - assert result is not None - assert isinstance(result, str) - assert len(result) > 0 + assert result is not None + assert isinstance(result, str) + assert len(result) > 0 diff --git a/evals/generate_test_set.py b/evals/generate_test_set.py index 92c74bd1e..ccac8d0e9 100644 --- a/evals/generate_test_set.py +++ b/evals/generate_test_set.py @@ -3,6 +3,10 @@ from deepeval.synthesizer import Synthesizer import dotenv from deepeval.test_case import LLMTestCase +# import pytest +# from deepeval import assert_test +from deepeval.metrics import AnswerRelevancyMetric + dotenv.load_dotenv() # synthesizer = Synthesizer() @@ -33,11 +37,6 @@ print(dataset.goldens) print(dataset) -# import pytest -# from deepeval import assert_test -from deepeval.metrics import AnswerRelevancyMetric - - answer_relevancy_metric = AnswerRelevancyMetric(threshold=0.5) # from deepeval import evaluate diff --git a/evals/simple_rag_vs_cognee_eval.py b/evals/simple_rag_vs_cognee_eval.py index 823099c26..82b1df600 100644 --- a/evals/simple_rag_vs_cognee_eval.py +++ b/evals/simple_rag_vs_cognee_eval.py @@ -44,8 +44,6 @@ print(dataset.goldens) print(dataset) - - class AnswerModel(BaseModel): response: str @@ -76,10 +74,7 @@ async def run_cognify_base_rag(): await add("data://test_datasets", "initial_test") graph = await cognify("initial_test") - - pass - - + return graph async def cognify_search_base_rag(content: str, context: str): diff --git a/profiling/graph_pydantic_conversion/benchmark_function.py b/profiling/graph_pydantic_conversion/benchmark_function.py index 413d0cde4..a4f5c839b 100644 --- a/profiling/graph_pydantic_conversion/benchmark_function.py +++ b/profiling/graph_pydantic_conversion/benchmark_function.py @@ -28,13 +28,10 @@ def benchmark_function(func: Callable, *args, num_runs: int = 5) -> Dict[str, An # Start memory tracking tracemalloc.start() - # Measure execution time and CPU usage start_time = time.perf_counter() start_cpu_time = process.cpu_times() - - end_cpu_time = process.cpu_times() end_time = time.perf_counter() @@ -45,8 +42,6 @@ def benchmark_function(func: Callable, *args, num_runs: int = 5) -> Dict[str, An ) current, peak = tracemalloc.get_traced_memory() - - # Store results execution_times.append(execution_time) peak_memory_usages.append(peak / 1024 / 1024) # Convert to MB diff --git a/pyproject.toml b/pyproject.toml index 6a43bb603..73fbc8e77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -119,7 +119,12 @@ line-length = 100 exclude = [ "migrations/", # Ignore migrations directory "notebooks/", # Ignore notebook files - "build/", # Ignore build directory + "build/", # Ignore build directory + "cognee/pipelines.py", + "cognee/modules/users/models/Group.py", + "cognee/modules/users/models/ACL.py", + "cognee/modules/pipelines/models/Task.py", + "cognee/modules/data/models/Dataset.py" ] [tool.ruff.lint]