From 5b626870d05167026049c6ccf122494971459ee5 Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhu.sh@gmail.com>
Date: Fri, 9 May 2025 17:51:49 +0800
Subject: [PATCH] Refa: remove ollama keep alive. (#7560)

### What problem does this PR solve?

#7518

### Type of change

- [x] Refactoring
---
 rag/llm/chat_model.py | 4 ++--
 rag/llm/cv_model.py   | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index cbfa858f7..3d92f378f 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -915,7 +915,7 @@ class OllamaChat(Base):
             if "frequency_penalty" in gen_conf:
                 options["frequency_penalty"] = gen_conf["frequency_penalty"]
 
-            response = self.client.chat(model=self.model_name, messages=history, options=options, keep_alive=10)
+            response = self.client.chat(model=self.model_name, messages=history, options=options)
             ans = response["message"]["content"].strip()
             token_count = response.get("eval_count", 0) + response.get("prompt_eval_count", 0)
             return ans, token_count
@@ -944,7 +944,7 @@ class OllamaChat(Base):
 
             ans = ""
             try:
-                response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options, keep_alive=10)
+                response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options)
                 for resp in response:
                     if resp["done"]:
                         token_count = resp.get("prompt_eval_count", 0) + resp.get("eval_count", 0)
diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py
index e0dbea2e5..f9d4e67c1 100644
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@@ -500,8 +500,7 @@ class OllamaCV(Base):
             response = self.client.chat(
                 model=self.model_name,
                 messages=history,
-                options=options,
-                keep_alive=-1
+                options=options
             )
 
             ans = response["message"]["content"].strip()
@@ -531,8 +530,7 @@ class OllamaCV(Base):
                 model=self.model_name,
                 messages=history,
                 stream=True,
-                options=options,
-                keep_alive=-1
+                options=options
             )
             for resp in response:
                 if resp["done"]: