fix: improve timing accuracy and variable scoping in OllamaAPI
This commit is contained in:
parent
52f4d97172
commit
f83bd765ea
1 changed files with 12 additions and 12 deletions
|
|
@ -203,15 +203,15 @@ class OllamaAPI:
|
|||
)
|
||||
|
||||
async def stream_generator():
|
||||
first_chunk_time = None
|
||||
last_chunk_time = time.time_ns()
|
||||
total_response = ""
|
||||
|
||||
try:
|
||||
first_chunk_time = None
|
||||
last_chunk_time = time.time_ns()
|
||||
total_response = ""
|
||||
|
||||
# Ensure response is an async generator
|
||||
if isinstance(response, str):
|
||||
# If it's a string, send in two parts
|
||||
first_chunk_time = last_chunk_time
|
||||
first_chunk_time = start_time
|
||||
last_chunk_time = time.time_ns()
|
||||
total_response = response
|
||||
|
||||
|
|
@ -284,7 +284,7 @@ class OllamaAPI:
|
|||
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
||||
return
|
||||
if first_chunk_time is None:
|
||||
first_chunk_time = last_chunk_time
|
||||
first_chunk_time = start_time
|
||||
completion_tokens = estimate_tokens(total_response)
|
||||
total_time = last_chunk_time - start_time
|
||||
prompt_eval_time = first_chunk_time - start_time
|
||||
|
|
@ -409,15 +409,15 @@ class OllamaAPI:
|
|||
)
|
||||
|
||||
async def stream_generator():
|
||||
first_chunk_time = None
|
||||
last_chunk_time = time.time_ns()
|
||||
total_response = ""
|
||||
|
||||
try:
|
||||
first_chunk_time = None
|
||||
last_chunk_time = time.time_ns()
|
||||
total_response = ""
|
||||
|
||||
# Ensure response is an async generator
|
||||
if isinstance(response, str):
|
||||
# If it's a string, send in two parts
|
||||
first_chunk_time = last_chunk_time
|
||||
first_chunk_time = start_time
|
||||
last_chunk_time = time.time_ns()
|
||||
total_response = response
|
||||
|
||||
|
|
@ -503,7 +503,7 @@ class OllamaAPI:
|
|||
return
|
||||
|
||||
if first_chunk_time is None:
|
||||
first_chunk_time = last_chunk_time
|
||||
first_chunk_time = start_time
|
||||
completion_tokens = estimate_tokens(total_response)
|
||||
total_time = last_chunk_time - start_time
|
||||
prompt_eval_time = first_chunk_time - start_time
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue