import base64 import logging from openai import OpenAI from Config.Config import ALY_LLM_API_KEY, ALY_LLM_MODEL_NAME, ALY_LLM_BASE_URL # 获取模块专属日志器 logger = logging.getLogger(__name__) def extract_text_from_image(image_path): """ 从图片中提取文字内容(排除几何图形中的文字) Args: image_path: 图片文件的绝对路径 Returns: str: 提取的文字内容,若失败则返回空字符串 """ try: # 读取图片并转换为base64 with open(image_path, "rb") as image_file: base64_image = base64.b64encode(image_file.read()).decode('utf-8') image_url = f"data:image/png;base64,{base64_image}" # 初始化OpenAI客户端(QWen QVQ) client = OpenAI( api_key=ALY_LLM_API_KEY, base_url=ALY_LLM_BASE_URL ) # 构建API请求(遵循QWen的messages格式) prompt = "【任务】仅提取图像中的文字内容(排除几何图形中的文字),并用【】包裹结果。" prompt += "【要求】1. 不添加任何解释、分析或额外说明;2. 不包含思考过程;3. 仅返回提取的文本内容。" prompt += "【2】如上面提到的信息外,其它信息一概不要输出!\n" completion = client.chat.completions.create( model="qvq-max", messages=[{ "role": "user", "content": [ {"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt} ] }], stream=True ) # 处理流式响应 extracted_text = [] for chunk in completion: if not chunk.choices: continue delta = chunk.choices[0].delta # 仅保留content字段,过滤reasoning_content if hasattr(delta, 'content') and delta.content: extracted_text.append(delta.content) # 如果有reasoning_content也一并处理(根据QWen模型特性) if hasattr(delta, 'reasoning_content') and delta.reasoning_content: extracted_text.append(delta.reasoning_content) logger.info("文字提取完成") return ''.join(extracted_text) except FileNotFoundError: logger.error(f"图片文件不存在: {image_path}") except Exception as e: logger.error(f"文字提取失败: {str(e)}", exc_info=True) return "" # 示例用法 if __name__ == "__main__": # 配置日志(实际项目中应在主程序统一配置) logging.basicConfig(level=logging.INFO) # 测试图片路径 test_image = r"D:\dsWork\dsProject\dsLightRag\Test\extracted\a62dce9d67c818accf94113aabefe172\1_1_TXT.png" # 调用提取函数 text = extract_text_from_image(test_image) print("提取结果:\n", text)