main
HuangHai 2 weeks ago
parent 0355495ebd
commit 3271b93341

@ -29,5 +29,5 @@ completion = client.chat.completions.create(
ocr_text = completion.choices[0].message.content
# 二、调用格式化函数处理内容
format_exam_content(client=client, raw_text=ocr_text, output_path="../output/数学OCR整理后的结果.md")
format_exam_content(raw_text=ocr_text, output_path="../output/数学OCR整理后的结果.md")
print("保存成功!")

@ -90,11 +90,7 @@ async def main():
f.write(content)
# 新增:使用大模型整理内容
format_exam_content(
client=llm_model_func(),
raw_text=content,
output_path=os.path.join(output_dir, "物理Docx整理后的结果.md")
)
format_exam_content(raw_text=content, output_path=os.path.join(output_dir, "物理Docx整理后的结果.md"))
logger.info(f"内容整理完成,保存至: {os.path.join(output_dir, '物理Docx整理后的结果.md')}")

@ -8,6 +8,8 @@ from lightrag import LightRAG
from lightrag.kg.shared_storage import initialize_pipeline_status
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug
from openai import OpenAI
from Config.Config import *
@ -181,7 +183,11 @@ def create_vision_model_func(llm_model_func):
return vision_model_func
def format_exam_content(client, raw_text, output_path):
def format_exam_content(raw_text, output_path):
client = OpenAI(
api_key=LLM_API_KEY,
base_url=LLM_BASE_URL,
)
"""
将OCR识别的原始试卷内容格式化为标准试题格式

Loading…
Cancel
Save