From 0a303d9ae18282a00878ed9d2543e8a6e74e2744 Mon Sep 17 00:00:00 2001 From: Stephen Hu Date: Tue, 5 Aug 2025 17:47:00 +0800 Subject: [PATCH] Refactor:Improve the chat stream logic for NvidiaCV (#9242) ### What problem does this PR solve? Improve the chat stream logic for NvidiaCV ### Type of change - [x] Refactoring --- rag/llm/cv_model.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index 328591b6d..758d471bc 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -623,15 +623,18 @@ class NvidiaCV(Base): return "**ERROR**: " + str(e), 0 def chat_streamly(self, system, history, gen_conf, images=[], **kwargs): + total_tokens = 0 try: response = self._request(self._form_history(system, history, images), gen_conf) cnt = response["choices"][0]["message"]["content"] + if "usage" in response and "total_tokens" in response["usage"]: + total_tokens += response["usage"]["total_tokens"] for resp in cnt: yield resp except Exception as e: yield "\n**ERROR**: " + str(e) - yield response["usage"]["total_tokens"] + yield total_tokens class AnthropicCV(Base):