from bs4 import BeautifulSoup from docx import Document """ pip install python-docx html2text beautifulsoup4 """ def html_to_word(html_content, word_path): # 解析 HTML soup = BeautifulSoup(html_content, 'html.parser') # 创建 Word 文档 doc = Document() # 遍历 HTML 的所有段落 for element in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div']): # 获取文本内容 text = element.get_text(strip=True) if text: # 添加到 Word 文档 doc.add_paragraph(text) # 保存 Word 文档 doc.save(word_path) print(f"HTML content saved to {word_path}") # 示例 HTML 内容 html_content = """ Sample HTML

Heading 1

This is a paragraph.

Heading 2

Content inside a div.

""" # 调用函数 html_to_word(html_content, "output.docx")