From 5b626870d05167026049c6ccf122494971459ee5 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Fri, 9 May 2025 17:51:49 +0800 Subject: [PATCH] Refa: remove ollama keep alive. (#7560) ### What problem does this PR solve? #7518 ### Type of change - [x] Refactoring --- rag/llm/chat_model.py | 4 ++-- rag/llm/cv_model.py | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index cbfa858f7..3d92f378f 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -915,7 +915,7 @@ class OllamaChat(Base): if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"] - response = self.client.chat(model=self.model_name, messages=history, options=options, keep_alive=10) + response = self.client.chat(model=self.model_name, messages=history, options=options) ans = response["message"]["content"].strip() token_count = response.get("eval_count", 0) + response.get("prompt_eval_count", 0) return ans, token_count @@ -944,7 +944,7 @@ class OllamaChat(Base): ans = "" try: - response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options, keep_alive=10) + response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options) for resp in response: if resp["done"]: token_count = resp.get("prompt_eval_count", 0) + resp.get("eval_count", 0) diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index e0dbea2e5..f9d4e67c1 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -500,8 +500,7 @@ class OllamaCV(Base): response = self.client.chat( model=self.model_name, messages=history, - options=options, - keep_alive=-1 + options=options ) ans = response["message"]["content"].strip() @@ -531,8 +530,7 @@ class OllamaCV(Base): model=self.model_name, messages=history, stream=True, - options=options, - keep_alive=-1 + options=options ) for resp in response: if resp["done"]: