fix: resolve issue with txt file type handling (#1697)
<!-- .github/pull_request_template.md --> ## Description Remove custom text file resolution, it sometimes wrongly marks other file types as txt ## Type of Change <!-- Please check the relevant option --> - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [x] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) <!-- Add screenshots or videos to help explain your changes --> ## Pre-submission Checklist <!-- Please check all boxes that apply before submitting your PR --> - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
commit
6b2666352a
2 changed files with 2 additions and 37 deletions
|
|
@ -22,42 +22,6 @@ class FileTypeException(Exception):
|
||||||
self.message = message
|
self.message = message
|
||||||
|
|
||||||
|
|
||||||
class TxtFileType(filetype.Type):
|
|
||||||
"""
|
|
||||||
Represents a text file type with specific MIME and extension properties.
|
|
||||||
|
|
||||||
Public methods:
|
|
||||||
- match: Determines whether a given buffer matches the text file type.
|
|
||||||
"""
|
|
||||||
|
|
||||||
MIME = "text/plain"
|
|
||||||
EXTENSION = "txt"
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super(TxtFileType, self).__init__(mime=TxtFileType.MIME, extension=TxtFileType.EXTENSION)
|
|
||||||
|
|
||||||
def match(self, buf):
|
|
||||||
"""
|
|
||||||
Determine if the given buffer contains text content.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- buf: The buffer to check for text content.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
Returns True if the buffer is identified as text content, otherwise False.
|
|
||||||
"""
|
|
||||||
return is_text_content(buf)
|
|
||||||
|
|
||||||
|
|
||||||
txt_file_type = TxtFileType()
|
|
||||||
|
|
||||||
filetype.add_type(txt_file_type)
|
|
||||||
|
|
||||||
|
|
||||||
def guess_file_type(file: BinaryIO) -> filetype.Type:
|
def guess_file_type(file: BinaryIO) -> filetype.Type:
|
||||||
"""
|
"""
|
||||||
Guess the file type from the given binary file stream.
|
Guess the file type from the given binary file stream.
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import filetype
|
import filetype
|
||||||
from typing import Dict, List, Optional, Any
|
from typing import Dict, List, Optional, Any
|
||||||
from .LoaderInterface import LoaderInterface
|
from .LoaderInterface import LoaderInterface
|
||||||
|
from cognee.infrastructure.files.utils.guess_file_type import guess_file_type
|
||||||
from cognee.shared.logging_utils import get_logger
|
from cognee.shared.logging_utils import get_logger
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
@ -80,7 +81,7 @@ class LoaderEngine:
|
||||||
"""
|
"""
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
file_info = filetype.guess(file_path)
|
file_info = guess_file_type(file_path)
|
||||||
|
|
||||||
path_extension = Path(file_path).suffix.lstrip(".")
|
path_extension = Path(file_path).suffix.lstrip(".")
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue