diff --git a/dsLightRag/ShiTi/Docx/SourceWithPhoto.jpg b/dsLightRag/ShiTi/Docx/SourceWithPhoto.jpg new file mode 100644 index 00000000..c42ed73c Binary files /dev/null and b/dsLightRag/ShiTi/Docx/SourceWithPhoto.jpg differ diff --git a/dsLightRag/ShiTi/T1_UploadImage.py b/dsLightRag/ShiTi/T1_UploadImage.py index 26b070ae..3fc603fa 100644 --- a/dsLightRag/ShiTi/T1_UploadImage.py +++ b/dsLightRag/ShiTi/T1_UploadImage.py @@ -8,7 +8,7 @@ ALY_ENDPOINT = 'https://oss-cn-hangzhou.aliyuncs.com' ALY_BUCKET_NAME = 'ylt' # 目标键值 -key = "HuangHai/Test/Source.jpg" +key = "HuangHai/Test/SourceWithPhoto.jpg" def main(): # 使用硬编码参数替换命令行参数 @@ -57,7 +57,7 @@ def main(): key=args.key, progress_fn=_progress_fn, ), - r"D:\dsWork\dsProject\dsLightRag\ShiTi\Docx\Source.jpg", # 指定本地文件路径 + r"D:\dsWork\dsProject\dsLightRag\ShiTi\Docx\SourceWithPhoto.jpg", # 指定本地文件路径 ) # 输出请求的结果状态码、请求ID、内容MD5、ETag、CRC64校验码和版本ID,用于检查请求是否成功 diff --git a/dsLightRag/ShiTi/T2_ImageToMd.py b/dsLightRag/ShiTi/T2_ImageToMd.py index 2066762a..059980e1 100644 --- a/dsLightRag/ShiTi/T2_ImageToMd.py +++ b/dsLightRag/ShiTi/T2_ImageToMd.py @@ -1,5 +1,4 @@ from openai import OpenAI -from Util.LightRagUtil import format_exam_content from Config.Config import * # 一、调用OCR整理出试题 @@ -17,7 +16,7 @@ completion = client.chat.completions.create( "content": [ { "type": "image_url", - "image_url": "https://ylt.oss-cn-hangzhou.aliyuncs.com/HuangHai/Test/Source.jpg", + "image_url": "https://ylt.oss-cn-hangzhou.aliyuncs.com/HuangHai/Test/SourceWithPhoto.jpg", "min_pixels": 28 * 28 * 4, "max_pixels": 28 * 28 * 8192 }, @@ -28,6 +27,33 @@ completion = client.chat.completions.create( ocr_text = completion.choices[0].message.content -# 二、调用格式化函数处理内容 -format_exam_content(raw_text=ocr_text, output_path="../output/数学OCR整理后的结果.md") + +prompt = """ + 我将提供一份markdown格式的试卷,请帮我整理出每道题的以下内容: + 1. 题目序号 + 2. 题目内容(自动识别并添加$或$$包裹数学公式) + 3. 选项(如果有) + 4. 答案 + 5. 解析 + + 要求: + - 一道题一道题输出,不要使用表格 + - 自动检测数学表达式并用$或$$正确包裹 + - 确保公式中的特殊字符正确转义 + - 除题目内容外,不要输出其它无关信息 + + 内容如下: + """ +prompt += ocr_text +completion = client.chat.completions.create( + model="deepseek-v3", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", + "content": prompt}, + ], +) +print(completion.choices[0].message.content) +with open("../output/数学OCR整理后的结果.md", "w", encoding="utf-8") as f: + f.write(completion.choices[0].message.content) print("保存成功!") diff --git a/dsLightRag/Topic/JiHe/kv_store_llm_response_cache.json b/dsLightRag/Topic/JiHe/kv_store_llm_response_cache.json index 7c992bd2..abfbd39c 100644 --- a/dsLightRag/Topic/JiHe/kv_store_llm_response_cache.json +++ b/dsLightRag/Topic/JiHe/kv_store_llm_response_cache.json @@ -151,6 +151,16 @@ "embedding_min": null, "embedding_max": null, "original_prompt": "求证:在三角形ABC中,P为其内部任意一点。请证明:∠BPC > ∠A。" + }, + "c66d70f780cadf26dbefdacbf2a5ba8d": { + "return": "{\"high_level_keywords\": [\"\\u4e09\\u89d2\\u5f62\", \"\\u51e0\\u4f55\\u8bc1\\u660e\", \"\\u89d2\\u5ea6\\u5173\\u7cfb\"], \"low_level_keywords\": [\"\\u4e09\\u89d2\\u5f62ABC\", \"\\u70b9P\", \"\\u2220BPC\", \"\\u2220A\"]}", + "cache_type": "keywords", + "chunk_id": null, + "embedding": null, + "embedding_shape": null, + "embedding_min": null, + "embedding_max": null, + "original_prompt": "在三角形ABC中,P为其内部任意一点。请证明:∠BPC > ∠A。" } } } \ No newline at end of file