'commit'

3 weeks ago · 4a78606de0
parent 0bfba12fb7
commit 4a78606de0
7 changed files with 0 additions and 125 deletions
--- a/dsRag/Test/T2_SplitTxtTest.py
+++ b/dsRag/Test/T2_SplitTxtTest.py
@ -1,37 +0,0 @@
-import zipfile
-import xml.etree.ElementTree as ET
-
-def parse_docx(docx_path):
-    with zipfile.ZipFile(docx_path) as z:
-        with z.open('word/document.xml') as f:
-            tree = ET.parse(f)
-            root = tree.getroot()
-            ns = {
-                'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
-                'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math'
-            }
-
-            # 查找所有公式(包括浮动和内联公式)
-            formula_count = 0
-            for oMath in root.findall('.//m:oMath', ns):
-
-                print(oMath)
-                formula_count += 1
-                formula_text = ''
-                # 处理公式中的文本节点
-                for t in oMath.findall('.//m:t', ns):
-                    if t.text:
-                        formula_text += t.text
-
-                # 处理公式中的特殊符号
-                for e in oMath.findall('.//m:e', ns):
-                    if e.text:
-                        formula_text += e.text
-
-                print(f"公式{formula_count}内容: {formula_text}")
-
-            print(f"共找到{formula_count}个公式")
-
-if __name__ == "__main__":
-    docx_path = r'D:\dsWork\dsProject\dsRag\Test\化学方程式_CHEMISTRY_1.docx'
-    parse_docx(docx_path)
--- a/dsRag/Test/TestConvertHmtlToDocx.py
+++ b/dsRag/Test/TestConvertHmtlToDocx.py
@ -1,14 +0,0 @@
-import subprocess
-
-def markdown_to_docx_with_pandoc(md_path, docx_path):
-    subprocess.run([
-            'pandoc',
-            '-s',
-            md_path,
-            '-o',
-            docx_path,
-            '--mathml'
-        ], check=True)
-
-# 使用示例
-markdown_to_docx_with_pandoc('input.md', 'output.docx')
--- a/dsRag/Test/TestGongShi.py
+++ b/dsRag/Test/TestGongShi.py
@ -1,33 +0,0 @@
-"""
-conda activate rag
-pip install pypandoc
-"""
-
-import pypandoc
-
-
-def docx_to_latex(docx_path):
-    latex_content = pypandoc.convert_file(docx_path, 'latex')
-    import re
-    # 替换公式格式
-    latex_content = re.sub(r'\\\[(.*?)\\\]', r'$$\1$$', latex_content)
-    latex_content = re.sub(r'\\\((.*?)\\\)', r'$\1$', latex_content)
-    # 替换图片路径为【图片X】格式
-    img_count = 1
-
-    def replacer(match):
-        nonlocal img_count
-        result = f'【图片{img_count}】'
-        img_count += 1
-        return result
-
-    latex_content = re.sub(r'\\includegraphics\[.*?\]\{.*?\}', replacer, latex_content)
-    return latex_content
-
-
-latex_content = docx_to_latex('带公式的WORD文档.docx')
-
-# 遍历字符串的每一行
-for line in latex_content.split('\n'):
-    if len(line.strip()) > 0:
-        print(line.strip())
--- a/dsRag/Test/TestReadGongShi.py
+++ b/dsRag/Test/TestReadGongShi.py
@ -1,23 +0,0 @@
-from docx import Document
-
-def run_has_ole_object(run):
-    """
-    检查run对象是否包含OLE对象
-    :param run: docx.text.run.Run对象
-    :return: bool
-    """
-    # 检查run的XML中是否包含OLE对象标签
-    run_element = run._r
-    for child in run_element.iterchildren():
-        if child.tag.endswith('object') or child.tag.endswith('OLEObject'):
-            print(str(child))
-            return True
-    return False
-
-
-# 测试代码
-doc = Document(r'D:\dsWork\dsProject\dsRag\static\Txt\化学方程式_CHEMISTRY_1.docx')
-for paragraph in doc.paragraphs:
-    for run in paragraph.runs:
-        if run_has_ole_object(run):
-            print("Found Ole")
--- a/dsRag/Test/TestReadMathType.py
+++ b/dsRag/Test/TestReadMathType.py
@ -1,18 +0,0 @@
-import os
-
-
-# 可执行文件路径
-mtef = r'D:\dsWork\dsProject\dsRag\mtef-go-3\mtef-go.exe'
-# 源文件路径
-sourceDocx = r'D:\dsWork\dsProject\dsRag\static\Txt\化学方程式_CHEMISTRY_1.docx'
-# 输出文件路径
-output_file = r"d:\output.txt"  # 可修改为您需要的路径
-
-# 构建命令
-command = mtef + r" -w " + sourceDocx + " -o " + output_file
-os.system(command)
-# 把output.txt里的内容打印出来看看
-# 加上行号
-with open(output_file, 'r', encoding='utf-8') as file:
-    for i, line in enumerate(file):
-        print(f"{i+1}: {line.strip()}")
--- a/dsRag/Test/带公式的WORD文档.docx
+++ b/dsRag/Test/带公式的WORD文档.docx
--- a/dsRag/Test/带图的WORD文档.docx
+++ b/dsRag/Test/带图的WORD文档.docx