From b15a073db74f7f95f39da94543b9e3a4d1a5ec68 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 29 Oct 2025 19:32:28 +0100 Subject: [PATCH 1/2] fix: resolve issue with txt file type handling --- .../files/utils/guess_file_type.py | 36 ------------------- 1 file changed, 36 deletions(-) diff --git a/cognee/infrastructure/files/utils/guess_file_type.py b/cognee/infrastructure/files/utils/guess_file_type.py index f30bd5963..4e3ff6824 100644 --- a/cognee/infrastructure/files/utils/guess_file_type.py +++ b/cognee/infrastructure/files/utils/guess_file_type.py @@ -22,42 +22,6 @@ class FileTypeException(Exception): self.message = message -class TxtFileType(filetype.Type): - """ - Represents a text file type with specific MIME and extension properties. - - Public methods: - - match: Determines whether a given buffer matches the text file type. - """ - - MIME = "text/plain" - EXTENSION = "txt" - - def __init__(self): - super(TxtFileType, self).__init__(mime=TxtFileType.MIME, extension=TxtFileType.EXTENSION) - - def match(self, buf): - """ - Determine if the given buffer contains text content. - - Parameters: - ----------- - - - buf: The buffer to check for text content. - - Returns: - -------- - - Returns True if the buffer is identified as text content, otherwise False. - """ - return is_text_content(buf) - - -txt_file_type = TxtFileType() - -filetype.add_type(txt_file_type) - - def guess_file_type(file: BinaryIO) -> filetype.Type: """ Guess the file type from the given binary file stream. From a38853ae936d7e56fccd992fbaa99d7e9a0a100a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 29 Oct 2025 19:45:17 +0100 Subject: [PATCH 2/2] refactor: use same mechanism to determine file type --- cognee/infrastructure/loaders/LoaderEngine.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cognee/infrastructure/loaders/LoaderEngine.py b/cognee/infrastructure/loaders/LoaderEngine.py index 725f37b14..f9511e7c5 100644 --- a/cognee/infrastructure/loaders/LoaderEngine.py +++ b/cognee/infrastructure/loaders/LoaderEngine.py @@ -1,6 +1,7 @@ import filetype from typing import Dict, List, Optional, Any from .LoaderInterface import LoaderInterface +from cognee.infrastructure.files.utils.guess_file_type import guess_file_type from cognee.shared.logging_utils import get_logger logger = get_logger(__name__) @@ -80,7 +81,7 @@ class LoaderEngine: """ from pathlib import Path - file_info = filetype.guess(file_path) + file_info = guess_file_type(file_path) path_extension = Path(file_path).suffix.lstrip(".")