You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
38 lines
1.0 KiB
38 lines
1.0 KiB
from docx import Document
|
|
import os
|
|
|
|
|
|
def read_word_file(file_path):
|
|
"""
|
|
读取Word文档内容
|
|
:param file_path: Word文档路径
|
|
:return: 文档文本内容
|
|
"""
|
|
try:
|
|
# 检查文件是否存在
|
|
if not os.path.exists(file_path):
|
|
raise FileNotFoundError(f"文件 {file_path} 不存在")
|
|
|
|
# 检查文件是否为Word文档
|
|
if not file_path.lower().endswith(('.docx')):
|
|
raise ValueError("仅支持.docx格式的Word文档")
|
|
|
|
doc = Document(file_path)
|
|
full_text = []
|
|
|
|
# 读取段落内容
|
|
for para in doc.paragraphs:
|
|
full_text.append(para.text)
|
|
|
|
# 读取表格内容
|
|
for table in doc.tables:
|
|
for row in table.rows:
|
|
for cell in row.cells:
|
|
full_text.append(cell.text)
|
|
|
|
return '\n'.join(full_text)
|
|
|
|
except Exception as e:
|
|
print(f"读取Word文档时出错: {str(e)}")
|
|
return None
|