82 lines
3.0 KiB
Python
82 lines
3.0 KiB
Python
from docx import Document
|
||
from docx.shared import Pt
|
||
from docx.oxml.ns import qn
|
||
import re
|
||
|
||
def markdown_to_docx(markdown_text, output_file="report.docx"):
|
||
"""
|
||
将 Markdown 格式的字符串转换为 Word 文档 (.docx)
|
||
|
||
参数:
|
||
markdown_text (str): Markdown 格式的字符串
|
||
output_file (str): 输出的 Word 文件名(默认 "report.docx")
|
||
"""
|
||
# 初始化 Word 文档
|
||
doc = Document()
|
||
|
||
# 设置默认字体为宋体
|
||
doc.styles['Normal'].font.name = '宋体'
|
||
doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
|
||
|
||
# 按行处理 Markdown 内容
|
||
for line in markdown_text.split("\n"):
|
||
# 处理标题(#、##、###)
|
||
if line.startswith("#"):
|
||
level = line.count("#")
|
||
text = line.lstrip("#").strip()
|
||
if level == 1:
|
||
doc.add_heading(text, level=0)
|
||
elif level == 2:
|
||
doc.add_heading(text, level=1)
|
||
elif level == 3:
|
||
doc.add_heading(text, level=2)
|
||
# 处理无序列表(- 或 *)
|
||
elif line.startswith("- ") or line.startswith("* "):
|
||
text = line.lstrip("-* ").strip()
|
||
paragraph = doc.add_paragraph(style='List Bullet')
|
||
add_formatted_text(paragraph, text)
|
||
# 处理有序列表(1. 或 2.)
|
||
elif re.match(r"^\d+\. ", line):
|
||
text = re.sub(r"^\d+\. ", "", line).strip()
|
||
paragraph = doc.add_paragraph(style='List Number')
|
||
add_formatted_text(paragraph, text)
|
||
# 处理普通段落
|
||
else:
|
||
if line.strip(): # 忽略空行
|
||
paragraph = doc.add_paragraph()
|
||
add_formatted_text(paragraph, line.strip())
|
||
|
||
# 保存 Word 文档
|
||
doc.save(output_file)
|
||
print(f"Word 文档已生成: {output_file}")
|
||
|
||
|
||
def add_formatted_text(paragraph, text):
|
||
"""
|
||
将 Markdown 格式的文本添加到 Word 段落中,支持加粗语法(**xx**)
|
||
|
||
参数:
|
||
paragraph: Word 段落对象
|
||
text (str): 需要添加的文本
|
||
"""
|
||
# 使用正则表达式匹配加粗语法(**xx** 或 xx** 或 **xx)
|
||
parts = re.split(r"(\*\*[^*]+\*\*|\*\*[^*]+|[^*]+\*\*)", text)
|
||
for part in parts:
|
||
if part.startswith("**") and part.endswith("**"):
|
||
# 去掉 ** 并设置为加粗
|
||
bold_text = part[2:-2]
|
||
run = paragraph.add_run(bold_text)
|
||
run.bold = True
|
||
elif part.startswith("**"):
|
||
# 去掉开头的 ** 并设置为加粗
|
||
bold_text = part[2:]
|
||
run = paragraph.add_run(bold_text)
|
||
run.bold = True
|
||
elif part.endswith("**"):
|
||
# 去掉结尾的 ** 并设置为加粗
|
||
bold_text = part[:-2]
|
||
run = paragraph.add_run(bold_text)
|
||
run.bold = True
|
||
else:
|
||
# 普通文本
|
||
paragraph.add_run(part) |