Adjust chunking parameters to match the default environment variable settings
(cherry picked from commit e77340d4a1)
This commit is contained in:
parent
b8dc5de81a
commit
6e3ff18570
1 changed files with 2 additions and 2 deletions
|
|
@ -72,8 +72,8 @@ def chunking_by_token_size(
|
|||
content: str,
|
||||
split_by_character: str | None = None,
|
||||
split_by_character_only: bool = False,
|
||||
chunk_overlap_token_size: int = 128,
|
||||
chunk_token_size: int = 1024,
|
||||
chunk_overlap_token_size: int = 100,
|
||||
chunk_token_size: int = 1200,
|
||||
) -> list[dict[str, Any]]:
|
||||
tokens = tokenizer.encode(content)
|
||||
results: list[dict[str, Any]] = []
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue