'commit'

4 months ago · 3f8ea34c24
parent 7a5f225571
commit 3f8ea34c24
5 changed files with 248 additions and 66 deletions
--- a/AI/WxMini/Start.py
+++ b/AI/WxMini/Start.py
@ -1,5 +1,6 @@
 import asyncio
 import logging
 import re
 import time
 import uuid
 from contextlib import asynccontextmanager
@ -11,6 +12,7 @@ from fastapi.security import OAuth2PasswordBearer
 from jose import JWTError, jwt
 from openai import AsyncOpenAI
 from passlib.context import CryptContext
 from starlette.responses import StreamingResponse
 from WxMini.Milvus.Config.MulvusConfig import *
 from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
@ -267,7 +269,7 @@ async def reply(person_id: str = Form(...),
                        # 查询非向量字段
                        record = await asyncio.to_thread(collection_manager.query_by_id, hit.id)
                        if record:
-                            #logger.info(f"查询到的记录: {record}")
+                            # logger.info(f"查询到的记录: {record}")
                            # 添加历史交互
                            history_prompt += f"用户: {record['user_input']}\n大模型: {record['model_response']}\n"
                    except Exception as e:
@ -275,7 +277,7 @@ async def reply(person_id: str = Form(...),
        # 限制历史交互提示词长度
        history_prompt = history_prompt[:2000]
-        #logger.info(f"历史交互提示词: {history_prompt}")
+        # logger.info(f"历史交互提示词: {history_prompt}")
        # 调用大模型，将历史交互作为提示词
        try:
@ -314,7 +316,7 @@ async def reply(person_id: str = Form(...),
            if len(result) > 500:
                logger.warning(f"大模型回复被截断，原始长度: {len(result)}")
            await asyncio.to_thread(collection_manager.insert_data, entities)
-            #logger.info("用户输入和大模型反馈已记录到向量数据库。")
+            # logger.info("用户输入和大模型反馈已记录到向量数据库。")
            # 调用 TTS 生成 MP3
            uuid_str = str(uuid.uuid4())
@ -327,7 +329,7 @@ async def reply(person_id: str = Form(...),
            t = TTS(None)  # 传入 None 表示不保存到本地文件
            audio_data, duration = await asyncio.to_thread(t.generate_audio,
                                                           result)  # 假设 TTS 类有一个 generate_audio 方法返回音频数据
-            #print(f"音频时长: {duration} 秒")
+            # print(f"音频时长: {duration} 秒")
            # 将音频数据直接上传到 OSS
            await asyncio.to_thread(upload_mp3_to_oss_from_memory, tts_file, audio_data)
@ -338,7 +340,7 @@ async def reply(person_id: str = Form(...),
            # 记录聊天数据到 MySQL
            await save_chat_to_mysql(app.state.mysql_pool, person_id, prompt, result, url, duration)
-            #logger.info("用户输入和大模型反馈已记录到 MySQL 数据库。")
+            # logger.info("用户输入和大模型反馈已记录到 MySQL 数据库。")
            # 调用会话检查机制，异步执行
            asyncio.create_task(on_session_end(person_id))
@ -402,7 +404,8 @@ async def get_risk_chat_logs(
    offset = (page - 1) * page_size
    # 调用 get_chat_logs_by_risk_flag 方法
-    logs, total = await get_chat_logs_by_risk_flag(app.state.mysql_pool, risk_flag,current_user["person_id"], offset, page_size)
+    logs, total = await get_chat_logs_by_risk_flag(app.state.mysql_pool, risk_flag, current_user["person_id"], offset,
                                                   page_size)
    if not logs:
        return {
            "success": False,
@ -425,7 +428,6 @@ async def get_risk_chat_logs(
    }
 # 获取风险统计接口
@app.get("/aichat/chat_logs_summary")
 async def chat_logs_summary(
@ -484,6 +486,7 @@ async def chat_logs_summary(
        }
    }
 # 获取上传OSS的授权Token
@app.get("/aichat/get_oss_upload_token")
 async def get_oss_upload_token(current_user: dict = Depends(get_current_user)):
@ -504,6 +507,107 @@ async def get_oss_upload_token(current_user: dict = Depends(get_current_user)):
    }
 async def is_text_dominant(image_url):
    """
    判断图片是否主要是文字内容
    :param image_url: 图片 URL
    :return: True（主要是文字） / False（主要是物体/场景）
    """
    completion = await client.chat.completions.create(
        model="qwen-vl-ocr",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": image_url,
                        "min_pixels": 28 * 28 * 4,
                        "max_pixels": 28 * 28 * 1280
                    },
                    {"type": "text", "text": "Read all the text in the image."},
                ]
            }
        ],
        stream=False
    )
    text = completion.choices[0].message.content
    # 判断是否只有英文和数字
    if re.match(r'^[A-Za-z0-9\s]+$', text):
        print("识别到的内容只有英文和数字，可能是无意义的字符，调用识别内容功能。")
        return False
    return True
 async def recognize_text(image_url):
    """
    识别图片中的文字，流式输出
    """
    completion = await client.chat.completions.create(
        model="qwen-vl-ocr",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": image_url,
                        "min_pixels": 28 * 28 * 4,
                        "max_pixels": 28 * 28 * 1280
                    },
                    {"type": "text", "text": "Read all the text in the image."},
                ]
            }
        ],
        stream=True
    )
    async for chunk in completion:
        if chunk.choices[0].delta.content is not None:
            for char in chunk.choices[0].delta.content:
                if char != ' ':
                    yield char
                    time.sleep(0.1)
 async def recognize_content(image_url):
    """
    识别图片中的内容，流式输出
    """
    completion = await client.chat.completions.create(
        model="qwen-vl-plus",
        messages=[{"role": "user", "content": [
            {"type": "text", "text": "这是什么"},
            {"type": "image_url", "image_url": {"url": image_url}}
        ]}],
        stream=True
    )
    async for chunk in completion:
        if chunk.choices[0].delta.content is not None:
            for char in chunk.choices[0].delta.content:
                yield char
                time.sleep(0.1)
@app.get("/aichat/process_image")
 async def process_image(image_url: str, current_user: dict = Depends(get_current_user)):
    logger.info(f"current_user:{current_user['login_name']}")
    """
    处理图片，自动判断调用哪个功能
    :param image_url: 图片 URL
    :return: 流式输出结果
    """
    try:
        if await is_text_dominant(image_url):
            print("检测到图片主要是文字内容，开始识别文字：")
            return StreamingResponse(recognize_text(image_url), media_type="text/plain")
        else:
            print("检测到图片主要是物体/场景，开始识别内容：")
            return StreamingResponse(recognize_content(image_url), media_type="text/plain")
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
 # 运行 FastAPI 应用
 if __name__ == "__main__":
    import uvicorn
--- a/AI/WxMini/Test/OCR1_Image_Recognition.py
+++ b/AI/WxMini/Test/OCR1_Image_Recognition.py
@ -1,3 +1,4 @@
 import time
 from openai import OpenAI
 from WxMini.Milvus.Config.MulvusConfig import *
@ -5,6 +6,8 @@ client = OpenAI(
    api_key=MODEL_API_KEY,
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
 )
 image_url = "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg"
 completion = client.chat.completions.create(
    model="qwen-vl-ocr",
    messages=[
@ -13,7 +16,7 @@ completion = client.chat.completions.create(
            "content": [
                {
                    "type": "image_url",
-                    "image_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg",
+                    "image_url": image_url,
                    "min_pixels": 28 * 28 * 4,
                    "max_pixels": 28 * 28 * 1280
                },
@ -22,12 +25,17 @@ completion = client.chat.completions.create(
            ]
        }
    ],
-    stream=True)
+    stream=True
 )
 full_content = ""
 print("流式输出内容为：")
 for chunk in completion:
    if chunk.choices[0].delta.content is None:
        continue
    # 遍历每个字符并逐个输出
    for char in chunk.choices[0].delta.content:
        if char!=' ':
            print(char, end="", flush=True)  # 逐个输出字符，不换行
            time.sleep(0.1)  # 控制输出速度
    full_content += chunk.choices[0].delta.content
    print(chunk.choices[0].delta.content)
 print(f"完整内容为：{full_content}")
--- a/AI/WxMini/Test/OCR1_Image_Recognition_Stream.py
+++ b/AI/WxMini/Test/OCR1_Image_Recognition_Stream.py
@ -1,26 +0,0 @@
 from openai import OpenAI
 from WxMini.Milvus.Config.MulvusConfig import *
 client = OpenAI(
    api_key=MODEL_API_KEY,
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
 )
 completion = client.chat.completions.create(
    model="qwen-vl-ocr",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg",
                    "min_pixels": 28 * 28 * 4,
                    "max_pixels": 28 * 28 * 1280
                },
                # 为保证识别效果，目前模型内部会统一使用"Read all the text in the image."进行识别，用户输入的文本不会生效。
                {"type": "text", "text": "Read all the text in the image."},
            ]
        }
    ])
 print(completion.choices[0].message.content)
--- a/AI/WxMini/Test/TestAutoSelect.py
+++ b/AI/WxMini/Test/TestAutoSelect.py
@ -0,0 +1,109 @@
 import re
 import time
 from openai import OpenAI
 from WxMini.Milvus.Config.MulvusConfig import *
 client = OpenAI(
    api_key=MODEL_API_KEY,
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
 )
 def is_text_dominant(image_url):
    """
    判断图片是否主要是文字内容
    :param image_url: 图片 URL
    :return: True（主要是文字） / False（主要是物体/场景）
    """
    completion = client.chat.completions.create(
        model="qwen-vl-ocr",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": image_url,
                        "min_pixels": 28 * 28 * 4,
                        "max_pixels": 28 * 28 * 1280
                    },
                    {"type": "text", "text": "Read all the text in the image."},
                ]
            }
        ],
        stream=False
    )
    text = completion.choices[0].message.content
    # 判断条件
    # 1、有汉字出现就是文字
    # 2、如果是英文，但是识别出来的内容只有英文，认为是文字
        # 判断是否只有英文和数字
    if re.match(r'^[A-Za-z0-9\s]+$', text):
        print("识别到的内容只有英文和数字，可能是无意义的字符，调用识别内容功能。")
        return False
    return True
 def recognize_text(image_url):
    """
    识别图片中的文字
    """
    completion = client.chat.completions.create(
        model="qwen-vl-ocr",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": image_url,
                        "min_pixels": 28 * 28 * 4,
                        "max_pixels": 28 * 28 * 1280
                    },
                    {"type": "text", "text": "Read all the text in the image."},
                ]
            }
        ],
        stream=True
    )
    print("流式输出内容为：")
    for chunk in completion:
        if chunk.choices[0].delta.content is not None:
            for char in chunk.choices[0].delta.content:
                if char != ' ':
                    print(char, end="", flush=True)
                    time.sleep(0.1)
 def recognize_content(image_url):
    """
    识别图片中的内容
    """
    completion = client.chat.completions.create(
        model="qwen-vl-plus",
        messages=[{"role": "user", "content": [
            {"type": "text", "text": "这是什么"},
            {"type": "image_url", "image_url": {"url": image_url}}
        ]}],
        stream=True
    )
    print("流式输出结果：")
    for chunk in completion:
        if chunk.choices[0].delta.content is not None:
            for char in chunk.choices[0].delta.content:
                print(char, end="", flush=True)
                time.sleep(0.1)
 def process_image(image_url):
    """
    处理图片，自动判断调用哪个功能
    """
    if is_text_dominant(image_url):
        print("检测到图片主要是文字内容，开始识别文字：")
        recognize_text(image_url)
    else:
        print("检测到图片主要是物体/场景，开始识别内容：")
        recognize_content(image_url)
 # 示例调用
 #image_url = "https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/james.png"
 image_url = "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg"
 process_image(image_url)
--- a/AI/WxMini/Test/X1_ReadImage.py
+++ b/AI/WxMini/Test/X1_ReadImage.py
@ -1,4 +1,4 @@
-import json
+import time  # 导入 time 模块
 from openai import OpenAI
 from WxMini.Milvus.Config.MulvusConfig import *
@ -6,37 +6,24 @@ client = OpenAI(
    api_key=MODEL_API_KEY,
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
 )
 # 一盆花
 # photo_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/mudan.jpg'
-# 狗与女主人
+# 图片 URL
-# photo_url = "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"
+image_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/james.png'
 # 哪吒
 # photo_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/nezha.jpg'
 # 锅包肉
 # photo_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/gbr.jpg'
 # 儿童看图说话
 # photo_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/xiaoxiong.jpg'
 # 詹姆斯与库里
 photo_url='https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/james.png'
 # 创建流式请求
 completion = client.chat.completions.create(
-    model="qwen-vl-plus",
+    model="qwen-vl-plus",  # 使用 qwen-vl-plus 模型
    # 此处以qwen-vl-plus为例，可按需更换模型名称。模型列表：https://help.aliyun.com/zh/model-studio/getting-started/models
    messages=[{"role": "user", "content": [
        {"type": "text", "text": "这是什么"},
-        {"type": "image_url",
+        {"type": "image_url", "image_url": {"url": image_url}}
-         "image_url": {"url": photo_url}}
+    ]}],
-    ]}]
+    stream=True  # 启用流式输出
 )
-json_data = completion.model_dump_json()
+
-# 解析 JSON 数据
+# 流式输出结果
-data = json.loads(json_data)
+print("流式输出结果：")
-# 提取 content 内容
+for chunk in completion:
-content = data['choices'][0]['message']['content']
+    if chunk.choices[0].delta.content is not None:
-# 打印 content
+        for char in chunk.choices[0].delta.content:  # 逐个字符输出
-print(content)
+            print(char, end="", flush=True)  # 逐个字符输出，不换行
            time.sleep(0.1)  # 控制输出速度，延迟 0.1 秒