From 3f8ea34c24ba308006118b96ea9bd6a647774076 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Thu, 27 Mar 2025 09:32:48 +0800
Subject: [PATCH] 'commit'

---
 AI/WxMini/Start.py                            | 118 ++++++++++++++++--
 AI/WxMini/Test/OCR1_Image_Recognition.py      |  16 ++-
 .../Test/OCR1_Image_Recognition_Stream.py     |  26 ----
 AI/WxMini/Test/TestAutoSelect.py              | 109 ++++++++++++++++
 AI/WxMini/Test/X1_ReadImage.py                |  45 +++----
 5 files changed, 248 insertions(+), 66 deletions(-)
 delete mode 100644 AI/WxMini/Test/OCR1_Image_Recognition_Stream.py
 create mode 100644 AI/WxMini/Test/TestAutoSelect.py

diff --git a/AI/WxMini/Start.py b/AI/WxMini/Start.py
index 9262a160..ca10724f 100644
--- a/AI/WxMini/Start.py
+++ b/AI/WxMini/Start.py
@@ -1,5 +1,6 @@
 import asyncio
 import logging
+import re
 import time
 import uuid
 from contextlib import asynccontextmanager
@@ -11,6 +12,7 @@ from fastapi.security import OAuth2PasswordBearer
 from jose import JWTError, jwt
 from openai import AsyncOpenAI
 from passlib.context import CryptContext
+from starlette.responses import StreamingResponse
 
 from WxMini.Milvus.Config.MulvusConfig import *
 from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
@@ -267,7 +269,7 @@ async def reply(person_id: str = Form(...),
                         # 查询非向量字段
                         record = await asyncio.to_thread(collection_manager.query_by_id, hit.id)
                         if record:
-                            #logger.info(f"查询到的记录: {record}")
+                            # logger.info(f"查询到的记录: {record}")
                             # 添加历史交互
                             history_prompt += f"用户: {record['user_input']}\n大模型: {record['model_response']}\n"
                     except Exception as e:
@@ -275,7 +277,7 @@ async def reply(person_id: str = Form(...),
 
         # 限制历史交互提示词长度
         history_prompt = history_prompt[:2000]
-        #logger.info(f"历史交互提示词: {history_prompt}")
+        # logger.info(f"历史交互提示词: {history_prompt}")
 
         # 调用大模型，将历史交互作为提示词
         try:
@@ -314,7 +316,7 @@ async def reply(person_id: str = Form(...),
             if len(result) > 500:
                 logger.warning(f"大模型回复被截断，原始长度: {len(result)}")
             await asyncio.to_thread(collection_manager.insert_data, entities)
-            #logger.info("用户输入和大模型反馈已记录到向量数据库。")
+            # logger.info("用户输入和大模型反馈已记录到向量数据库。")
 
             # 调用 TTS 生成 MP3
             uuid_str = str(uuid.uuid4())
@@ -327,7 +329,7 @@ async def reply(person_id: str = Form(...),
             t = TTS(None)  # 传入 None 表示不保存到本地文件
             audio_data, duration = await asyncio.to_thread(t.generate_audio,
                                                            result)  # 假设 TTS 类有一个 generate_audio 方法返回音频数据
-            #print(f"音频时长: {duration} 秒")
+            # print(f"音频时长: {duration} 秒")
 
             # 将音频数据直接上传到 OSS
             await asyncio.to_thread(upload_mp3_to_oss_from_memory, tts_file, audio_data)
@@ -338,7 +340,7 @@ async def reply(person_id: str = Form(...),
 
             # 记录聊天数据到 MySQL
             await save_chat_to_mysql(app.state.mysql_pool, person_id, prompt, result, url, duration)
-            #logger.info("用户输入和大模型反馈已记录到 MySQL 数据库。")
+            # logger.info("用户输入和大模型反馈已记录到 MySQL 数据库。")
 
             # 调用会话检查机制，异步执行
             asyncio.create_task(on_session_end(person_id))
@@ -402,7 +404,8 @@ async def get_risk_chat_logs(
     offset = (page - 1) * page_size
 
     # 调用 get_chat_logs_by_risk_flag 方法
-    logs, total = await get_chat_logs_by_risk_flag(app.state.mysql_pool, risk_flag,current_user["person_id"], offset, page_size)
+    logs, total = await get_chat_logs_by_risk_flag(app.state.mysql_pool, risk_flag, current_user["person_id"], offset,
+                                                   page_size)
     if not logs:
         return {
             "success": False,
@@ -425,7 +428,6 @@ async def get_risk_chat_logs(
     }
 
 
-
 # 获取风险统计接口
 @app.get("/aichat/chat_logs_summary")
 async def chat_logs_summary(
@@ -484,6 +486,7 @@ async def chat_logs_summary(
         }
     }
 
+
 # 获取上传OSS的授权Token
 @app.get("/aichat/get_oss_upload_token")
 async def get_oss_upload_token(current_user: dict = Depends(get_current_user)):
@@ -504,6 +507,107 @@ async def get_oss_upload_token(current_user: dict = Depends(get_current_user)):
     }
 
 
+async def is_text_dominant(image_url):
+    """
+    判断图片是否主要是文字内容
+    :param image_url: 图片 URL
+    :return: True（主要是文字） / False（主要是物体/场景）
+    """
+    completion = await client.chat.completions.create(
+        model="qwen-vl-ocr",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": image_url,
+                        "min_pixels": 28 * 28 * 4,
+                        "max_pixels": 28 * 28 * 1280
+                    },
+                    {"type": "text", "text": "Read all the text in the image."},
+                ]
+            }
+        ],
+        stream=False
+    )
+    text = completion.choices[0].message.content
+
+    # 判断是否只有英文和数字
+    if re.match(r'^[A-Za-z0-9\s]+$', text):
+        print("识别到的内容只有英文和数字，可能是无意义的字符，调用识别内容功能。")
+        return False
+    return True
+
+
+async def recognize_text(image_url):
+    """
+    识别图片中的文字，流式输出
+    """
+    completion = await client.chat.completions.create(
+        model="qwen-vl-ocr",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": image_url,
+                        "min_pixels": 28 * 28 * 4,
+                        "max_pixels": 28 * 28 * 1280
+                    },
+                    {"type": "text", "text": "Read all the text in the image."},
+                ]
+            }
+        ],
+        stream=True
+    )
+    async for chunk in completion:
+        if chunk.choices[0].delta.content is not None:
+            for char in chunk.choices[0].delta.content:
+                if char != ' ':
+                    yield char
+                    time.sleep(0.1)
+
+
+async def recognize_content(image_url):
+    """
+    识别图片中的内容，流式输出
+    """
+    completion = await client.chat.completions.create(
+        model="qwen-vl-plus",
+        messages=[{"role": "user", "content": [
+            {"type": "text", "text": "这是什么"},
+            {"type": "image_url", "image_url": {"url": image_url}}
+        ]}],
+        stream=True
+    )
+    async for chunk in completion:
+        if chunk.choices[0].delta.content is not None:
+            for char in chunk.choices[0].delta.content:
+                yield char
+                time.sleep(0.1)
+
+
+@app.get("/aichat/process_image")
+async def process_image(image_url: str, current_user: dict = Depends(get_current_user)):
+    logger.info(f"current_user:{current_user['login_name']}")
+    """
+    处理图片，自动判断调用哪个功能
+    :param image_url: 图片 URL
+    :return: 流式输出结果
+    """
+    try:
+        if await is_text_dominant(image_url):
+            print("检测到图片主要是文字内容，开始识别文字：")
+            return StreamingResponse(recognize_text(image_url), media_type="text/plain")
+        else:
+            print("检测到图片主要是物体/场景，开始识别内容：")
+            return StreamingResponse(recognize_content(image_url), media_type="text/plain")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
 # 运行 FastAPI 应用
 if __name__ == "__main__":
     import uvicorn
diff --git a/AI/WxMini/Test/OCR1_Image_Recognition.py b/AI/WxMini/Test/OCR1_Image_Recognition.py
index dd0e9b97..47f9d2f4 100644
--- a/AI/WxMini/Test/OCR1_Image_Recognition.py
+++ b/AI/WxMini/Test/OCR1_Image_Recognition.py
@@ -1,3 +1,4 @@
+import time
 from openai import OpenAI
 from WxMini.Milvus.Config.MulvusConfig import *
 
@@ -5,6 +6,8 @@ client = OpenAI(
     api_key=MODEL_API_KEY,
     base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
 )
+image_url = "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg"
+
 completion = client.chat.completions.create(
     model="qwen-vl-ocr",
     messages=[
@@ -13,7 +16,7 @@ completion = client.chat.completions.create(
             "content": [
                 {
                     "type": "image_url",
-                    "image_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg",
+                    "image_url": image_url,
                     "min_pixels": 28 * 28 * 4,
                     "max_pixels": 28 * 28 * 1280
                 },
@@ -22,12 +25,17 @@ completion = client.chat.completions.create(
             ]
         }
     ],
-    stream=True)
+    stream=True
+)
+
 full_content = ""
 print("流式输出内容为：")
 for chunk in completion:
     if chunk.choices[0].delta.content is None:
         continue
+    # 遍历每个字符并逐个输出
+    for char in chunk.choices[0].delta.content:
+        if char!=' ':
+            print(char, end="", flush=True)  # 逐个输出字符，不换行
+            time.sleep(0.1)  # 控制输出速度
     full_content += chunk.choices[0].delta.content
-    print(chunk.choices[0].delta.content)
-print(f"完整内容为：{full_content}")
\ No newline at end of file
diff --git a/AI/WxMini/Test/OCR1_Image_Recognition_Stream.py b/AI/WxMini/Test/OCR1_Image_Recognition_Stream.py
deleted file mode 100644
index 55c451a4..00000000
--- a/AI/WxMini/Test/OCR1_Image_Recognition_Stream.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from openai import OpenAI
-from WxMini.Milvus.Config.MulvusConfig import *
-
-client = OpenAI(
-    api_key=MODEL_API_KEY,
-    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
-)
-completion = client.chat.completions.create(
-    model="qwen-vl-ocr",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    "image_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg",
-                    "min_pixels": 28 * 28 * 4,
-                    "max_pixels": 28 * 28 * 1280
-                },
-                # 为保证识别效果，目前模型内部会统一使用"Read all the text in the image."进行识别，用户输入的文本不会生效。
-                {"type": "text", "text": "Read all the text in the image."},
-            ]
-        }
-    ])
-
-print(completion.choices[0].message.content)
\ No newline at end of file
diff --git a/AI/WxMini/Test/TestAutoSelect.py b/AI/WxMini/Test/TestAutoSelect.py
new file mode 100644
index 00000000..639abdd9
--- /dev/null
+++ b/AI/WxMini/Test/TestAutoSelect.py
@@ -0,0 +1,109 @@
+import re
+import time
+from openai import OpenAI
+from WxMini.Milvus.Config.MulvusConfig import *
+
+client = OpenAI(
+    api_key=MODEL_API_KEY,
+    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+)
+
+def is_text_dominant(image_url):
+    """
+    判断图片是否主要是文字内容
+    :param image_url: 图片 URL
+    :return: True（主要是文字） / False（主要是物体/场景）
+    """
+    completion = client.chat.completions.create(
+        model="qwen-vl-ocr",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": image_url,
+                        "min_pixels": 28 * 28 * 4,
+                        "max_pixels": 28 * 28 * 1280
+                    },
+                    {"type": "text", "text": "Read all the text in the image."},
+                ]
+            }
+        ],
+        stream=False
+    )
+    text = completion.choices[0].message.content
+    # 判断条件
+    # 1、有汉字出现就是文字
+    # 2、如果是英文，但是识别出来的内容只有英文，认为是文字
+        # 判断是否只有英文和数字
+    if re.match(r'^[A-Za-z0-9\s]+$', text):
+        print("识别到的内容只有英文和数字，可能是无意义的字符，调用识别内容功能。")
+        return False
+    return True
+
+def recognize_text(image_url):
+    """
+    识别图片中的文字
+    """
+    completion = client.chat.completions.create(
+        model="qwen-vl-ocr",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": image_url,
+                        "min_pixels": 28 * 28 * 4,
+                        "max_pixels": 28 * 28 * 1280
+                    },
+                    {"type": "text", "text": "Read all the text in the image."},
+                ]
+            }
+        ],
+        stream=True
+    )
+    print("流式输出内容为：")
+    for chunk in completion:
+        if chunk.choices[0].delta.content is not None:
+            for char in chunk.choices[0].delta.content:
+                if char != ' ':
+                    print(char, end="", flush=True)
+                    time.sleep(0.1)
+
+def recognize_content(image_url):
+    """
+    识别图片中的内容
+    """
+    completion = client.chat.completions.create(
+        model="qwen-vl-plus",
+        messages=[{"role": "user", "content": [
+            {"type": "text", "text": "这是什么"},
+            {"type": "image_url", "image_url": {"url": image_url}}
+        ]}],
+        stream=True
+    )
+    print("流式输出结果：")
+    for chunk in completion:
+        if chunk.choices[0].delta.content is not None:
+            for char in chunk.choices[0].delta.content:
+                print(char, end="", flush=True)
+                time.sleep(0.1)
+
+def process_image(image_url):
+    """
+    处理图片，自动判断调用哪个功能
+    """
+    if is_text_dominant(image_url):
+        print("检测到图片主要是文字内容，开始识别文字：")
+        recognize_text(image_url)
+    else:
+        print("检测到图片主要是物体/场景，开始识别内容：")
+        recognize_content(image_url)
+
+# 示例调用
+#image_url = "https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/james.png"
+image_url = "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg"
+
+process_image(image_url)
\ No newline at end of file
diff --git a/AI/WxMini/Test/X1_ReadImage.py b/AI/WxMini/Test/X1_ReadImage.py
index f9f88fd2..a4399ae7 100644
--- a/AI/WxMini/Test/X1_ReadImage.py
+++ b/AI/WxMini/Test/X1_ReadImage.py
@@ -1,4 +1,4 @@
-import json
+import time  # 导入 time 模块
 from openai import OpenAI
 from WxMini.Milvus.Config.MulvusConfig import *
 
@@ -6,37 +6,24 @@ client = OpenAI(
     api_key=MODEL_API_KEY,
     base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
 )
-# 一盆花
-# photo_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/mudan.jpg'
 
-# 狗与女主人
-# photo_url = "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"
-
-# 哪吒
-# photo_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/nezha.jpg'
-
-# 锅包肉
-# photo_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/gbr.jpg'
-
-# 儿童看图说话
-# photo_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/xiaoxiong.jpg'
-
-# 詹姆斯与库里
-photo_url='https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/james.png'
+# 图片 URL
+image_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/james.png'
 
+# 创建流式请求
 completion = client.chat.completions.create(
-    model="qwen-vl-plus",
-    # 此处以qwen-vl-plus为例，可按需更换模型名称。模型列表：https://help.aliyun.com/zh/model-studio/getting-started/models
+    model="qwen-vl-plus",  # 使用 qwen-vl-plus 模型
     messages=[{"role": "user", "content": [
         {"type": "text", "text": "这是什么"},
-        {"type": "image_url",
-         "image_url": {"url": photo_url}}
-    ]}]
+        {"type": "image_url", "image_url": {"url": image_url}}
+    ]}],
+    stream=True  # 启用流式输出
 )
-json_data = completion.model_dump_json()
-# 解析 JSON 数据
-data = json.loads(json_data)
-# 提取 content 内容
-content = data['choices'][0]['message']['content']
-# 打印 content
-print(content)
+
+# 流式输出结果
+print("流式输出结果：")
+for chunk in completion:
+    if chunk.choices[0].delta.content is not None:
+        for char in chunk.choices[0].delta.content:  # 逐个字符输出
+            print(char, end="", flush=True)  # 逐个字符输出，不换行
+            time.sleep(0.1)  # 控制输出速度，延迟 0.1 秒
\ No newline at end of file