diff --git a/cognee/modules/data/processing/document_types/Document.py b/cognee/modules/data/processing/document_types/Document.py
index 773fc30c8..45441dcce 100644
--- a/cognee/modules/data/processing/document_types/Document.py
+++ b/cognee/modules/data/processing/document_types/Document.py
@@ -6,6 +6,7 @@ class Document(DataPoint):
     name: str
     raw_data_location: str
     metadata_id: UUID
+    mime_type: str
 
     def read(self, chunk_size: int) -> str:
         pass
diff --git a/cognee/modules/data/processing/document_types/UnstructuredDocument.py b/cognee/modules/data/processing/document_types/UnstructuredDocument.py
index c48423653..68ccbe1f2 100644
--- a/cognee/modules/data/processing/document_types/UnstructuredDocument.py
+++ b/cognee/modules/data/processing/document_types/UnstructuredDocument.py
@@ -9,7 +9,7 @@ class UnstructuredDocument(Document):
     def read(self, chunk_size: int):
         def get_text():
             from unstructured.partition.auto import partition
-            elements = partition(self.raw_data_location)
+            elements = partition(self.raw_data_location, content_type=self.mime_type)
             in_memory_file = StringIO("\n\n".join([str(el) for el in elements]))
             in_memory_file.seek(0)
 
diff --git a/cognee/tasks/documents/classify_documents.py b/cognee/tasks/documents/classify_documents.py
index abef4ea9e..47beeb917 100644
--- a/cognee/tasks/documents/classify_documents.py
+++ b/cognee/tasks/documents/classify_documents.py
@@ -13,6 +13,14 @@ EXTENSION_TO_DOCUMENT_CLASS = {
     "pdf": PdfDocument,  # Text documents
     "txt": TextDocument,
     "docx": UnstructuredDocument,
+    "doc": UnstructuredDocument,
+    "odt": UnstructuredDocument,
+    "xls": UnstructuredDocument,
+    "xlsx": UnstructuredDocument,
+    "ppt": UnstructuredDocument,
+    "pptx": UnstructuredDocument,
+    "odp": UnstructuredDocument,
+    "ods": UnstructuredDocument,
     "png": ImageDocument,  # Image documents
     "dwg": ImageDocument,
     "xcf": ImageDocument,
@@ -50,6 +58,7 @@ async def classify_documents(data_documents: list[Data]) -> list[Document]:
             title = f"{data_item.name}.{data_item.extension}",
             raw_data_location = data_item.raw_data_location,
             name = data_item.name,
+            mime_type = data_item.mime_type,
             metadata_id = metadata.id
         )
         documents.append(document)