Add transcribe_image and create_transcript methods

This commit is contained in:
Leon Luithlen 2024-11-14 11:59:46 +01:00
parent c905510f30
commit 8b3b2f8156
2 changed files with 12 additions and 4 deletions

View file

@ -5,10 +5,14 @@ from .Document import Document
class AudioDocument(Document):
type: str = "audio"
def create_transcript(self):
result = get_llm_client().create_transcript(self.raw_data_location)
return(result.text)
def read(self, chunk_size: int):
# Transcribe the audio file
result = get_llm_client().create_transcript(self.raw_data_location)
text = result.text
text = self.create_transcript()
chunker = TextChunker(self, chunk_size = chunk_size, get_text = lambda: text)

View file

@ -5,10 +5,14 @@ from .Document import Document
class ImageDocument(Document):
type: str = "image"
def transcribe_image(self):
result = get_llm_client().transcribe_image(self.raw_data_location)
return(result.choices[0].message.content)
def read(self, chunk_size: int):
# Transcribe the image file
result = get_llm_client().transcribe_image(self.raw_data_location)
text = result.choices[0].message.content
text = self.transcribe_image()
chunker = TextChunker(self, chunk_size = chunk_size, get_text = lambda: text)