'commit'

3 weeks ago · e975510fa2
parent 43b3d4f602
commit e975510fa2
12 changed files with 128 additions and 0 deletions
--- a/dsRag/MathType/MathType-win-zh-7.8.2.441.exe
+++ b/dsRag/MathType/MathType-win-zh-7.8.2.441.exe
--- a/dsRag/MathType/MathType_v7.x_Patch.exe
+++ b/dsRag/MathType/MathType_v7.x_Patch.exe
--- a/dsRag/MathType/录入化学方程式的办法.md
+++ b/dsRag/MathType/录入化学方程式的办法.md
@ -0,0 +1,32 @@
+### 一、安装$MathType$ $7.8 $
+
+由于Word中的公式编辑器在编辑数学、化学、生物等公式时，并不是特别方便，所以可以采用使用插件$MathType$的方式来快速录入$Word$中的公式。
+
+安装、破解、使用，不再赘述。
+
+
+
+### 二、手动调整公式格式 
+
+但由于格式的差别，其它软件并不能正确读取$MathType$制作的公式，为了让其它软件也能正确读取$Word$中$MathType$制作的公式，还需要一些手动的调整办法。
+
+![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104228950.png)
+
+![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104322174.png)
+
+- 鼠标左键点击，然后Ctrl+C 复制到内存
+
+![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104444709.png)
+
+- Ctrl+V 即可
+
+![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104536077.png)
+
+### 三、测试一下
+
+````cmd
+pandoc -f docx -t markdown --extract-media ./images -o c:/1.md D:\dsWork\dsProject\dsRag\Test\化学方程式_CHEMISTRY_1.docx
+````
+
+
+
--- a/dsRag/MathType/激活MathType.docx
+++ b/dsRag/MathType/激活MathType.docx
--- a/dsRag/Test/T2_SplitTxtTest.py
+++ b/dsRag/Test/T2_SplitTxtTest.py
@ -0,0 +1,37 @@
+import zipfile
+import xml.etree.ElementTree as ET
+
+def parse_docx(docx_path):
+    with zipfile.ZipFile(docx_path) as z:
+        with z.open('word/document.xml') as f:
+            tree = ET.parse(f)
+            root = tree.getroot()
+            ns = {
+                'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
+                'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math'
+            }
+
+            # 查找所有公式(包括浮动和内联公式)
+            formula_count = 0
+            for oMath in root.findall('.//m:oMath', ns):
+
+                print(oMath)
+                formula_count += 1
+                formula_text = ''
+                # 处理公式中的文本节点
+                for t in oMath.findall('.//m:t', ns):
+                    if t.text:
+                        formula_text += t.text
+
+                # 处理公式中的特殊符号
+                for e in oMath.findall('.//m:e', ns):
+                    if e.text:
+                        formula_text += e.text
+
+                print(f"公式{formula_count}内容: {formula_text}")
+
+            print(f"共找到{formula_count}个公式")
+
+if __name__ == "__main__":
+    docx_path = r'D:\dsWork\dsProject\dsRag\Test\化学方程式_CHEMISTRY_1.docx'
+    parse_docx(docx_path)
--- a/dsRag/Test/TestReadGongShi.py
+++ b/dsRag/Test/TestReadGongShi.py
@ -0,0 +1,19 @@
+from docx import Document
+
+
+def extract_text_from_docx(file_path):
+    doc = Document(file_path)
+    formulas = []
+
+    for para in doc.paragraphs:
+        for run in para.runs:
+            if run.text:  # 检查文本是否存在
+                formulas.append(run.text)
+
+    # 打印提取的公式
+    for index, formula in enumerate(formulas):
+        print(f'公式 {index + 1}: {formula}')
+
+
+# 路径可替换为您的 Word 文档路径
+extract_text_from_docx('D:\dsWork\dsProject\dsRag\Test\化学方程式_CHEMISTRY_1.docx')
--- a/dsRag/Test/convert_math_type_to_word_formula.py
+++ b/dsRag/Test/convert_math_type_to_word_formula.py
@ -0,0 +1,37 @@
+import win32com.client
+import re
+
+def convert_to_latex(formula_xml):
+    # 简单转换规则 - 实际应用中可能需要更复杂的处理
+    latex = formula_xml
+    latex = re.sub(r'<[^>]+>', '', latex)  # 移除XML标签
+    latex = latex.replace('&lt;', '<').replace('&gt;', '>')
+    return f"${latex}$"
+
+def convert_math_type_to_word_formula(doc_path):
+    word = win32com.client.Dispatch("Word.Application")
+    doc = word.Documents.Open(doc_path)
+    
+    formula_count = 0
+    latex_formulas = []
+    
+    for omath in doc.OMaths:
+        formula_count += 1
+        try:
+            formula_xml = omath.Range.WordOpenXML
+            if formula_xml:
+                latex = convert_to_latex(formula_xml)
+                latex_formulas.append(latex)
+                print(f"公式{formula_count} LaTeX: {latex[:100]}...")
+        except Exception as e:
+            print(f"公式{formula_count}转换错误: {str(e)}")
+    
+    print(f"\n最终LaTeX输出:")
+    for i, latex in enumerate(latex_formulas, 1):
+        print(f"公式{i}: {latex}")
+    
+    doc.Close(False)
+    word.Quit()
+
+if __name__ == '__main__':
+    convert_math_type_to_word_formula(r'D:\dsWork\dsProject\dsRag\Test\化学方程式_CHEMISTRY_1.docx')
--- a/dsRag/Test/化学方程式_CHEMISTRY_1.docx
+++ b/dsRag/Test/化学方程式_CHEMISTRY_1.docx
--- a/dsRag/Txt/CHEMISTRY_1_1.txt
+++ b/dsRag/Txt/CHEMISTRY_1_1.txt
@ -0,0 +1 @@
+：氢气与氧气燃烧的方程式
--- a/dsRag/Txt/CHEMISTRY_1_2.txt
+++ b/dsRag/Txt/CHEMISTRY_1_2.txt
@ -0,0 +1 @@
+：硝酸光照分解的方程式
--- a/dsRag/mtef-go-3
+++ b/dsRag/mtef-go-3
@ -0,0 +1 @@
+Subproject commit 5ede23b5d20dbd12d04f5142828e7f6386763454
--- a/dsRag/static/Txt/化学方程式_CHEMISTRY_1.docx
+++ b/dsRag/static/Txt/化学方程式_CHEMISTRY_1.docx
				`@ -0,0 +1 @@`
				`Subproject commit 5ede23b5d20dbd12d04f5142828e7f6386763454`