From 8b3b2f8156844ea3a10e729cf202ec24fb9b6f45 Mon Sep 17 00:00:00 2001 From: Leon Luithlen Date: Thu, 14 Nov 2024 11:59:46 +0100 Subject: [PATCH] Add transcribe_image and create_transcript methods --- .../data/processing/document_types/AudioDocument.py | 8 ++++++-- .../data/processing/document_types/ImageDocument.py | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/cognee/modules/data/processing/document_types/AudioDocument.py b/cognee/modules/data/processing/document_types/AudioDocument.py index d3ae0974d..989c881a1 100644 --- a/cognee/modules/data/processing/document_types/AudioDocument.py +++ b/cognee/modules/data/processing/document_types/AudioDocument.py @@ -5,10 +5,14 @@ from .Document import Document class AudioDocument(Document): type: str = "audio" + def create_transcript(self): + result = get_llm_client().create_transcript(self.raw_data_location) + return(result.text) + def read(self, chunk_size: int): # Transcribe the audio file - result = get_llm_client().create_transcript(self.raw_data_location) - text = result.text + + text = self.create_transcript() chunker = TextChunker(self, chunk_size = chunk_size, get_text = lambda: text) diff --git a/cognee/modules/data/processing/document_types/ImageDocument.py b/cognee/modules/data/processing/document_types/ImageDocument.py index 5571b3bd8..7338217a5 100644 --- a/cognee/modules/data/processing/document_types/ImageDocument.py +++ b/cognee/modules/data/processing/document_types/ImageDocument.py @@ -5,10 +5,14 @@ from .Document import Document class ImageDocument(Document): type: str = "image" + + def transcribe_image(self): + result = get_llm_client().transcribe_image(self.raw_data_location) + return(result.choices[0].message.content) + def read(self, chunk_size: int): # Transcribe the image file - result = get_llm_client().transcribe_image(self.raw_data_location) - text = result.choices[0].message.content + text = self.transcribe_image() chunker = TextChunker(self, chunk_size = chunk_size, get_text = lambda: text)