added typing
This commit is contained in:
parent
31fe96d74a
commit
f4287804ce
1 changed files with 3 additions and 3 deletions
|
|
@ -2,7 +2,7 @@ import asyncio
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from tqdm.asyncio import tqdm as tqdm_async
|
from tqdm.asyncio import tqdm as tqdm_async
|
||||||
from typing import Union
|
from typing import Any, Union
|
||||||
from collections import Counter, defaultdict
|
from collections import Counter, defaultdict
|
||||||
from .utils import (
|
from .utils import (
|
||||||
logger,
|
logger,
|
||||||
|
|
@ -42,9 +42,9 @@ def chunking_by_token_size(
|
||||||
max_token_size=1024,
|
max_token_size=1024,
|
||||||
tiktoken_model="gpt-4o",
|
tiktoken_model="gpt-4o",
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
) -> list[dict[str, Any]]:
|
||||||
tokens = encode_string_by_tiktoken(content, model_name=tiktoken_model)
|
tokens = encode_string_by_tiktoken(content, model_name=tiktoken_model)
|
||||||
results = []
|
results: list[dict[str, Any]] = []
|
||||||
if split_by_character:
|
if split_by_character:
|
||||||
raw_chunks = content.split(split_by_character)
|
raw_chunks = content.split(split_by_character)
|
||||||
new_chunks = []
|
new_chunks = []
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue