Adjust chunking parameters to match the default environment variable settings
This commit is contained in:
parent
24423c9215
commit
e77340d4a1
1 changed files with 2 additions and 2 deletions
|
|
@ -98,8 +98,8 @@ def chunking_by_token_size(
|
||||||
content: str,
|
content: str,
|
||||||
split_by_character: str | None = None,
|
split_by_character: str | None = None,
|
||||||
split_by_character_only: bool = False,
|
split_by_character_only: bool = False,
|
||||||
chunk_overlap_token_size: int = 128,
|
chunk_overlap_token_size: int = 100,
|
||||||
chunk_token_size: int = 1024,
|
chunk_token_size: int = 1200,
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
tokens = tokenizer.encode(content)
|
tokens = tokenizer.encode(content)
|
||||||
results: list[dict[str, Any]] = []
|
results: list[dict[str, Any]] = []
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue