final main updates

2025-11-18 11:37:57 +08:00 · 2025-11-18 11:37:57 +08:00 · e0f321bea0
commit e0f321bea0
parent 118334e4e5
1 changed files with 105 additions and 32 deletions
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@ -33,7 +33,8 @@ from rag.nlp import is_english
 from rag.prompts.generator import vision_llm_describe_prompt
 from common.token_utils import num_tokens_from_string, total_token_count_from_response
-
+from rag.llm.chat_model import LENGTH_NOTIFICATION_CN, LENGTH_NOTIFICATION_EN
 from rag.nlp import is_chinese
 class Base(ABC):
    def __init__(self, **kwargs):
        # Configure retry parameters
@ -209,6 +210,7 @@ class GptV4(Base):
            model=self.model_name,
            messages=self.prompt(b64),
            extra_body=self.extra_body,
            unused = None,
        )
        return res.choices[0].message.content.strip(), total_token_count_from_response(res)
@ -320,30 +322,106 @@ class Zhipu4V(GptV4):
    def __init__(self, key, model_name="glm-4v", lang="Chinese", **kwargs):
        self.client = ZhipuAI(api_key=key)
        self.api_key = key
        self.model_name = model_name
        self.lang = lang
        Base.__init__(self, **kwargs)
    def _clean_conf(self, gen_conf):
        if "max_tokens" in gen_conf:
            del gen_conf["max_tokens"]
        gen_conf = self._clean_conf_plealty(gen_conf)
        return gen_conf
    def _clean_conf_plealty(self, gen_conf):
        if "presence_penalty" in gen_conf:
            del gen_conf["presence_penalty"]
        if "frequency_penalty" in gen_conf:
            del gen_conf["frequency_penalty"]
        return gen_conf
    def _request(self, msg, stream, gen_conf={}):
        response = requests.post(
            self.base_url,
            json={
                "model": self.model_name,
                "messages": msg,
                "stream": stream,
                **gen_conf
            },
            headers= {
            "Authorization": f"Bearer {self.api_key}",  
            "Content-Type": "application/json",
            }
        )
        return response.json()
    def chat(self, system, history, gen_conf, images=None, stream=False, **kwargs):
        if system and history and history[0].get("role") != "system":
            history.insert(0, {"role": "system", "content": system})
        gen_conf = self._clean_conf(gen_conf)
        logging.info(json.dumps(history, ensure_ascii=False, indent=2))
        response = self.client.chat.completions.create(model=self.model_name, messages=self._form_history(system, history, images), stream=False, **gen_conf)
        content = response.choices[0].message.content.strip()
        cleaned = re.sub(r"<\|(begin_of_box|end_of_box)\|>", "", content).strip()
        return cleaned, total_token_count_from_response(response)
    def chat_streamly(self, system, history, gen_conf, images=None, **kwargs):
        from rag.llm.chat_model import LENGTH_NOTIFICATION_CN, LENGTH_NOTIFICATION_EN
        from rag.nlp import is_chinese
        if system and history and history[0].get("role") != "system":
            history.insert(0, {"role": "system", "content": system})
        gen_conf = self._clean_conf(gen_conf)
        ans = ""
        tk_count = 0
        try:
            logging.info(json.dumps(history, ensure_ascii=False, indent=2))
            response = self.client.chat.completions.create(model=self.model_name, messages=self._form_history(system, history, images), stream=True, **gen_conf)
            for resp in response:
                if not resp.choices[0].delta.content:
                    continue
                delta = resp.choices[0].delta.content
                ans = delta
                if resp.choices[0].finish_reason == "length":
                    if is_chinese(ans):
                        ans += LENGTH_NOTIFICATION_CN
                    else:
                        ans += LENGTH_NOTIFICATION_EN
                    tk_count = total_token_count_from_response(resp)
                if resp.choices[0].finish_reason == "stop":
                    tk_count = total_token_count_from_response(resp)
                yield ans
        except Exception as e:
            yield ans + "\n**ERROR**: " + str(e)
        yield tk_count
    def describe(self, image):
        return self.describe_with_prompt(image)
    def describe_with_prompt(self, image, prompt=None):
        b64 = self.image2base64(image)
        if prompt is None:
            prompt = "Describe this image."
-        payload = {
+        # Chat messages
-            "model": self.model_name,
+        messages = [
            "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
-                            "image_url": {
+                        "image_url": { "url": b64 }
                                "url": b64
                            }
                    },
                    {
                        "type": "text",
@ -352,21 +430,16 @@ class Zhipu4V(GptV4):
                ]
            }
        ]
        }
-        headers = {
+        resp = self.client.chat.completions.create(
-            "Authorization": f"Bearer {self.api_key}",
+            model=self.model_name,
-            "Content-Type": "application/json"
+            messages=messages,
-        }
+            stream=False
        resp = requests.post(
            "https://open.bigmodel.cn/api/paas/v4/chat/completions",
            json=payload,
            headers=headers
        )
-        content = resp.json()["choices"][0]["message"]["content"].strip()
+        content = resp.choices[0].message.content.strip()
        cleaned = re.sub(r"<\|(begin_of_box|end_of_box)\|>", "", content).strip()
        return cleaned, num_tokens_from_string(cleaned)