Add transcribe_image and create_transcript methods

2024-11-14 11:59:46 +01:00 · 2024-11-14 11:59:46 +01:00 · 8b3b2f8156
commit 8b3b2f8156
parent c905510f30
2 changed files with 12 additions and 4 deletions
--- a/cognee/modules/data/processing/document_types/AudioDocument.py
+++ b/cognee/modules/data/processing/document_types/AudioDocument.py
@ -5,10 +5,14 @@ from .Document import Document
 class AudioDocument(Document):
    type: str = "audio"

+    def create_transcript(self):
+        result = get_llm_client().create_transcript(self.raw_data_location)
+        return(result.text)
+
    def read(self, chunk_size: int):
        # Transcribe the audio file
-        result = get_llm_client().create_transcript(self.raw_data_location)
-        text = result.text
+        
+        text = self.create_transcript()

        chunker = TextChunker(self, chunk_size = chunk_size, get_text = lambda: text)

--- a/cognee/modules/data/processing/document_types/ImageDocument.py
+++ b/cognee/modules/data/processing/document_types/ImageDocument.py
@ -5,10 +5,14 @@ from .Document import Document
 class ImageDocument(Document):
    type: str = "image"

+
+    def transcribe_image(self):
+        result = get_llm_client().transcribe_image(self.raw_data_location)
+        return(result.choices[0].message.content)
+
    def read(self, chunk_size: int):
        # Transcribe the image file
-        result = get_llm_client().transcribe_image(self.raw_data_location)
-        text = result.choices[0].message.content
+        text = self.transcribe_image()

        chunker = TextChunker(self, chunk_size = chunk_size, get_text = lambda: text)