dsProject/dsLightRag/Util/LlmUtil.py

from openai import OpenAI
from Config.Config import *
import sys

from openai import AsyncOpenAI  # 新增异步客户端导入

# 保留原同步函数，添加异步版本
async def get_llm_response_async(query_text: str, stream: bool = True):
    """
    异步获取大模型的响应
    @param query_text: 查询文本
    @param stream: 是否使用流式输出
    @return: 流式响应生成器或完整响应文本
    """
    client = AsyncOpenAI(
        api_key=LLM_API_KEY,
        base_url=LLM_BASE_URL,
    )

    try:
        # 创建请求
        completion = await client.chat.completions.create(
            model=LLM_MODEL_NAME,
            messages=[
                {'role': 'system', 'content': 'You are a helpful assistant.'},
                {'role': 'user', 'content': query_text}
            ],
            stream=stream
        )

        if stream:
            # 流式输出模式，返回生成器
            async for chunk in completion:
                # 确保 chunk.choices 存在且不为空
                if chunk and chunk.choices and len(chunk.choices) > 0:
                    # 确保 delta 存在
                    delta = chunk.choices[0].delta
                    if delta:
                        # 确保 content 存在且不为 None 或空字符串
                        content = delta.content
                        if content is not None and content.strip():
                            print(content, end='', flush=True)
                            yield content
        else:
            # 非流式处理
            if completion and completion.choices and len(completion.choices) > 0:
                message = completion.choices[0].message
                if message:
                    content = message.content
                    if content is not None and content.strip():
                        yield content
    except Exception as e:
        print(f"大模型请求异常: {str(e)}", file=sys.stderr)
        yield f"处理请求时发生异常: {str(e)}"

# 保留原同步函数
def get_llm_response(query_text: str, stream: bool = True):
    """
    获取大模型的响应
    @param query_text: 查询文本
    @param stream: 是否使用流式输出
    @return: 完整响应文本
    """
    client = OpenAI(
        api_key=LLM_API_KEY,
        base_url=LLM_BASE_URL,
    )

    # 创建请求
    completion = client.chat.completions.create(
        model=LLM_MODEL_NAME,
        messages=[
            {'role': 'system', 'content': 'You are a helpful assistant.'},
            {'role': 'user', 'content': query_text}
        ],
        stream=stream
    )

    full_response = []

    if stream:
        for chunk in completion:
            # 提取当前块的内容
            if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
                content = chunk.choices[0].delta.content
                full_response.append(content)
                # 实时输出内容，不换行
                print(content, end='', flush=True)
    else:
        # 非流式处理
        full_response.append(completion.choices[0].message.content)

    return ''.join(full_response)