From 0a303d9ae18282a00878ed9d2543e8a6e74e2744 Mon Sep 17 00:00:00 2001
From: Stephen Hu <stephenhu@seismic.com>
Date: Tue, 5 Aug 2025 17:47:00 +0800
Subject: [PATCH] Refactor:Improve the chat stream logic for NvidiaCV (#9242)

### What problem does this PR solve?

Improve the chat stream logic for NvidiaCV

### Type of change


- [x] Refactoring
---
 rag/llm/cv_model.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py
index 328591b6d..758d471bc 100644
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@@ -623,15 +623,18 @@ class NvidiaCV(Base):
             return "**ERROR**: " + str(e), 0
 
     def chat_streamly(self, system, history, gen_conf, images=[], **kwargs):
+        total_tokens = 0
         try:
             response = self._request(self._form_history(system, history, images), gen_conf)
             cnt = response["choices"][0]["message"]["content"]
+            if "usage" in response and "total_tokens" in response["usage"]:
+                total_tokens += response["usage"]["total_tokens"]
             for resp in cnt:
                 yield resp
         except Exception as e:
             yield "\n**ERROR**: " + str(e)
 
-        yield response["usage"]["total_tokens"]
+        yield total_tokens
 
 
 class AnthropicCV(Base):