From f7817ea7f4a2ab8ebc47ac502bcfa4e1341fe7fa Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Sat, 28 Jun 2025 20:29:40 +0800 Subject: [PATCH] 'commit' --- dsRag/Config/Config.py | 4 ++-- dsRag/Test/TestGongShi.py | 12 +++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/dsRag/Config/Config.py b/dsRag/Config/Config.py index 8c01224e..ec1827fe 100644 --- a/dsRag/Config/Config.py +++ b/dsRag/Config/Config.py @@ -7,8 +7,8 @@ MYSQL_DB_NAME = "base_db" # Elasticsearch配置 ES_CONFIG = { - "hosts": "https://10.10.14.206:9200", - #"hosts": "https://127.0.0.1:9200", + #"hosts": "https://10.10.14.206:9200", + "hosts": "https://127.0.0.1:9200", "basic_auth": ("elastic", "jv9h8uwRrRxmDi1dq6u8"), "verify_certs": False, "ssl_show_warn": False, diff --git a/dsRag/Test/TestGongShi.py b/dsRag/Test/TestGongShi.py index c2e20498..9e9cae25 100644 --- a/dsRag/Test/TestGongShi.py +++ b/dsRag/Test/TestGongShi.py @@ -8,7 +8,17 @@ import pypandoc def docx_to_latex(docx_path): latex_content = pypandoc.convert_file(docx_path, 'latex') import re - return re.sub(r'\\\[(.*?)\\\]', r'$$\1$$', latex_content) + # 替换公式格式 + latex_content = re.sub(r'\\\[(.*?)\\\]', r'$$\1$$', latex_content) + # 替换图片路径为【图片X】格式 + img_count = 1 + def replacer(match): + nonlocal img_count + result = f'【图片{img_count}】' + img_count += 1 + return result + latex_content = re.sub(r'\\includegraphics\[.*?\]\{.*?\}', replacer, latex_content) + return latex_content latex_content = docx_to_latex('带公式的WORD文档.docx') print(latex_content) # 包含LaTeX格式公式 \ No newline at end of file