You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

44 lines
1008 B

from bs4 import BeautifulSoup
from docx import Document
"""
pip install python-docx html2text beautifulsoup4
"""
def html_to_word(html_content, word_path):
# 解析 HTML
soup = BeautifulSoup(html_content, 'html.parser')
# 创建 Word 文档
doc = Document()
# 遍历 HTML 的所有段落
for element in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div']):
# 获取文本内容
text = element.get_text(strip=True)
if text:
# 添加到 Word 文档
doc.add_paragraph(text)
# 保存 Word 文档
doc.save(word_path)
print(f"HTML content saved to {word_path}")
# 示例 HTML 内容
html_content = """
<!DOCTYPE html>
<html>
<head>
<title>Sample HTML</title>
</head>
<body>
<h1>Heading 1</h1>
<p>This is a paragraph.</p>
<h2>Heading 2</h2>
<div>Content inside a div.</div>
</body>
</html>
"""
# 调用函数
html_to_word(html_content, "output.docx")