You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

38 lines
1.0 KiB

from docx import Document
import os
def read_word_file(file_path):
"""
读取Word文档内容
:param file_path: Word文档路径
:return: 文档文本内容
"""
try:
# 检查文件是否存在
if not os.path.exists(file_path):
raise FileNotFoundError(f"文件 {file_path} 不存在")
# 检查文件是否为Word文档
if not file_path.lower().endswith(('.docx')):
raise ValueError("仅支持.docx格式的Word文档")
doc = Document(file_path)
full_text = []
# 读取段落内容
for para in doc.paragraphs:
full_text.append(para.text)
# 读取表格内容
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
full_text.append(cell.text)
return '\n'.join(full_text)
except Exception as e:
print(f"读取Word文档时出错: {str(e)}")
return None