import base64 import json import logging import requests from Config.Config import GLM_API_KEY, GLM_MODEL_NAME, GLM_BASE_URL # 获取模块专属日志器 logger = logging.getLogger(__name__) def extract_text_from_image(image_path): """ 从图片中提取文字内容(排除几何图形中的文字) Args: image_path: 图片文件的绝对路径 Returns: str: 提取的文字内容,若失败则返回空字符串 """ try: # 读取图片并转换为base64 with open(image_path, "rb") as image_file: base64_image = base64.b64encode(image_file.read()).decode('utf-8') image_url = f"data:image/png;base64,{base64_image}" # 构建API请求 # 合并重复的要求字段,使用清晰的编号系统 prompt = "【任务】提取图像中的数学题文字内容(排除几何图形),用【】包裹结果。" #prompt += "【专业术语】需准确识别:将军饮马、动点、等量代换、两点之间线段最短、垂线段最短" prompt += "【格式】1. 术语错误需修正;2. 保证句子语法通顺;3. 不添加解释;4. 不包含思考过程" url = GLM_BASE_URL headers = { "Authorization": f"Bearer {GLM_API_KEY}", "Content-Type": "application/json" } data = { "model": GLM_MODEL_NAME, "messages": [{ "role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": image_url}} ] }], "stream": True } # 发送请求并处理流式响应 extracted_text = [] seen_content = set() # 用于去重的集合 with requests.post(url, headers=headers, json=data, stream=True) as response: for chunk in response.iter_lines(): if chunk: decoded = chunk.decode('utf-8') if decoded.startswith('[DONE]'): logger.info("文字提取完成") break try: # 解析流式响应内容 json_data = json.loads(decoded[5:]) # 移除""前缀 content = json_data["choices"][0]["delta"].get('content') if content and len(content) > 0 and content not in seen_content: seen_content.add(content) extracted_text.append(content) except Exception as e: logger.warning(f"解析响应块失败: {str(e)}") # 拼接结果并返回 return ''.join(extracted_text) except FileNotFoundError: logger.error(f"图片文件不存在: {image_path}") except Exception as e: logger.error(f"文字提取失败: {str(e)}", exc_info=True) return "" # 示例用法 if __name__ == "__main__": # 配置日志(实际项目中应在主程序统一配置) logging.basicConfig(level=logging.INFO) # 测试图片路径 #test_image = r"D:\dsWork\dsProject\dsLightRag\Test\extracted\a62dce9d67c818accf94113aabefe172\1_1_TXT.png" test_image = r'D:\dsWork\dsProject\dsLightRag\Test\extracted\a62dce9d67c818accf94113aabefe172\1_1_TXT.png' # 调用提取函数 text = extract_text_from_image(test_image) print("提取结果:\n", text)