extend langchain splitter with character splitter
This commit is contained in:
parent
e69571b306
commit
6bf38e0784
3 changed files with 13 additions and 0 deletions
|
|
@ -29,6 +29,9 @@ class LangchainChunkEngine():
|
|||
|
||||
if chunk_strategy == ChunkStrategy.CODE:
|
||||
chunked_data = LangchainChunkEngine.chunk_data_by_code(source_data,chunk_size, chunk_overlap)
|
||||
|
||||
elif chunk_strategy == ChunkStrategy.LANGCHAIN_CHARACTER:
|
||||
chunked_data = LangchainChunkEngine.chunk_data_by_character(source_data,chunk_size, chunk_overlap)
|
||||
else:
|
||||
chunked_data = DefaultChunkEngine.chunk_data_by_paragraph(source_data,chunk_size, chunk_overlap)
|
||||
return chunked_data
|
||||
|
|
@ -50,3 +53,12 @@ class LangchainChunkEngine():
|
|||
|
||||
return only_content
|
||||
|
||||
def chunk_data_by_character(self, data_chunks, chunk_size, chunk_overlap):
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
splitter = RecursiveCharacterTextSplitter(chunk_size, chunk_overlap)
|
||||
data = splitter.split(data_chunks)
|
||||
|
||||
only_content = [chunk.page_content for chunk in data]
|
||||
|
||||
return only_content
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ class ChunkStrategy(Enum):
|
|||
PARAGRAPH = "paragraph"
|
||||
SENTENCE = "sentence"
|
||||
CODE = "code"
|
||||
LANGCHAIN_CHARACTER = "langchain_character"
|
||||
|
||||
class MemorySummary(BaseModel):
|
||||
""" Memory summary. """
|
||||
|
|
|
|||
0
docs/blog/posts/llmops-and-knowledge-graphs.md
Normal file
0
docs/blog/posts/llmops-and-knowledge-graphs.md
Normal file
Loading…
Add table
Reference in a new issue