Merge pull request #2078 from danielaskdd/update-cons
Increase default Gunicorn worker timeout from 210 to 300 seconds
This commit is contained in:
commit
14eb3d6a16
2 changed files with 3 additions and 3 deletions
|
|
@ -69,7 +69,7 @@ DEFAULT_EMBEDDING_FUNC_MAX_ASYNC = 8 # Default max async for embedding function
|
|||
DEFAULT_EMBEDDING_BATCH_NUM = 10 # Default batch size for embedding computations
|
||||
|
||||
# Gunicorn worker timeout
|
||||
DEFAULT_TIMEOUT = 210
|
||||
DEFAULT_TIMEOUT = 300
|
||||
|
||||
# Default llm and embedding timeout
|
||||
DEFAULT_LLM_TIMEOUT = 180
|
||||
|
|
|
|||
|
|
@ -188,7 +188,7 @@ export default function RetrievalTesting() {
|
|||
if (thinkEndIndex !== -1) {
|
||||
// Thinking has finished for this chunk
|
||||
assistantMessage.isThinking = false
|
||||
|
||||
|
||||
// Only calculate time and extract thinking content once
|
||||
if (!thinkingProcessed.current) {
|
||||
if (thinkingStartTime.current && !assistantMessage.thinkingTime) {
|
||||
|
|
@ -200,7 +200,7 @@ export default function RetrievalTesting() {
|
|||
.trim()
|
||||
thinkingProcessed.current = true
|
||||
}
|
||||
|
||||
|
||||
// Always update display content as content after </think> may grow
|
||||
assistantMessage.displayContent = assistantMessage.content.substring(thinkEndIndex + thinkEndTag.length).trim()
|
||||
} else {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue