From 66fb2948f8a31e8010f872432288eca1b05be135 Mon Sep 17 00:00:00 2001 From: Leon Luithlen Date: Tue, 12 Nov 2024 15:37:03 +0100 Subject: [PATCH] Small cleanup pull request --- cognee/tasks/documents/classify_documents.py | 13 +++++++++---- cognee/tasks/graph/infer_data_ontology.py | 2 +- .../models/__pycache__ => graph}/models.py | 0 cognee/tasks/search_evaluate/__init__.py | 0 .../{models/TextSummary.py => models.py} | 0 cognee/tasks/summarization/summarize_text.py | 2 +- 6 files changed, 11 insertions(+), 6 deletions(-) rename cognee/tasks/{infer_data_ontology/models/__pycache__ => graph}/models.py (100%) delete mode 100644 cognee/tasks/search_evaluate/__init__.py rename cognee/tasks/summarization/{models/TextSummary.py => models.py} (100%) diff --git a/cognee/tasks/documents/classify_documents.py b/cognee/tasks/documents/classify_documents.py index 64ed808d6..e83fe8917 100644 --- a/cognee/tasks/documents/classify_documents.py +++ b/cognee/tasks/documents/classify_documents.py @@ -1,12 +1,17 @@ from cognee.modules.data.models import Data from cognee.modules.data.processing.document_types import Document, PdfDocument, AudioDocument, ImageDocument, TextDocument +EXTENSION_TO_DOCUMENT_CLASS = { + "pdf": PdfDocument, + "audio": AudioDocument, + "image": ImageDocument, + "pdf": TextDocument, + "txt": TextDocument +} + def classify_documents(data_documents: list[Data]) -> list[Document]: documents = [ - PdfDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "pdf" else - AudioDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "audio" else - ImageDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "image" else - TextDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) + EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) for data_item in data_documents ] diff --git a/cognee/tasks/graph/infer_data_ontology.py b/cognee/tasks/graph/infer_data_ontology.py index e1a710fa2..eea378eb1 100644 --- a/cognee/tasks/graph/infer_data_ontology.py +++ b/cognee/tasks/graph/infer_data_ontology.py @@ -18,7 +18,7 @@ from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_eng from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException from cognee.modules.data.extraction.knowledge_graph.add_model_class_to_graph import add_model_class_to_graph -from cognee.tasks.infer_data_ontology.models.models import NodeModel, GraphOntology +from cognee.tasks.graph.models import NodeModel, GraphOntology from cognee.shared.data_models import KnowledgeGraph from cognee.modules.engine.utils import generate_node_id, generate_node_name diff --git a/cognee/tasks/infer_data_ontology/models/__pycache__/models.py b/cognee/tasks/graph/models.py similarity index 100% rename from cognee/tasks/infer_data_ontology/models/__pycache__/models.py rename to cognee/tasks/graph/models.py diff --git a/cognee/tasks/search_evaluate/__init__.py b/cognee/tasks/search_evaluate/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cognee/tasks/summarization/models/TextSummary.py b/cognee/tasks/summarization/models.py similarity index 100% rename from cognee/tasks/summarization/models/TextSummary.py rename to cognee/tasks/summarization/models.py diff --git a/cognee/tasks/summarization/summarize_text.py b/cognee/tasks/summarization/summarize_text.py index 47d6946bb..2dab3b0f8 100644 --- a/cognee/tasks/summarization/summarize_text.py +++ b/cognee/tasks/summarization/summarize_text.py @@ -5,7 +5,7 @@ from pydantic import BaseModel from cognee.modules.data.extraction.extract_summary import extract_summary from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.tasks.storage import add_data_points -from .models.TextSummary import TextSummary +from .models import TextSummary async def summarize_text(data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel]): if len(data_chunks) == 0: