You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

78 lines
3.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import os
import win32com
from win32com.client import Dispatch
# pip install pywin32
working_dir = r"D:/dsWork/YunNanDsBase/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/"
# 在工作目录下创建Excel目录
excel_dir = working_dir + 'Excel'
if not os.path.exists(excel_dir):
os.mkdir(excel_dir)
# 关键词
keyword = '人口变化及其对教育的影响'
# 是不是打Word显示
docApp = win32com.client.Dispatch('Word.Application')
# 是不是打Word显示
docApp.Visible = False
docApp.DisplayAlerts = 0
# doc = docApp.Documents.Open('c:/1.docx')
# doc = docApp.Documents.Open('c:/昭通市人口变化及其对教育的影响20240416.docx')
# doc = docApp.Documents.Open('c:/昆明市人口变化及其对教育的影响20240419.docx')
# doc = docApp.Documents.Open(working_dir+'红河哈尼族彝族自治州人口变化及其对教育的影响20240419.docx')
# 遍历工作目录下所有的docx文件将文件名用keyword进行分隔前一半是州市名称后一半是上报的时间我们取前一半的州市名称
for file in os.listdir(working_dir):
if file.endswith('.docx') and not file.startswith('~'):
file_name = file.split('.')[0]
# 判断一下file_name中是不是存在keyword,如果不存在,则输出错误,并结束程序
if keyword not in file_name:
print('Error: ' + file_name + ' 文件名称中并不包含:' + keyword)
exit()
# 确认包含后,提取出前半部分作为城市名称
city_name = file_name.split(keyword)[0]
# 在excel_dir目录下创建这个城市的子目录准备将生成的excel文件放在这个子目录下
city_dir = excel_dir + '/' + city_name
if not os.path.exists(city_dir):
os.mkdir(city_dir)
# 将当前docx进行读取其中的每一个段落要求以 "图"+数字开头,这是图例的意思
doc_path = working_dir + '/' + file
# print(doc_path)
doc = docApp.Documents.Open(doc_path)
# 遍历文档中所有的文字段落,判断是不是以 图+数字开头
idx = 1
for para in doc.Paragraphs:
x = para.Range.Text.strip().replace("", "").replace(" ", " ")
if x.startswith(""):
print(x)
idx = idx + 1
# 遍历文档中的所有内嵌形状
idx = 1
for inline_shape in doc.InlineShapes:
if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表
shape = doc.InlineShapes(idx)
sheet = shape.Chart.ChartData.Workbook.Worksheets(1)
# 行数
row_size = sheet.UsedRange.rows.Count
# 列数
col_size = sheet.UsedRange.columns.Count
print(row_size, col_size)
# 遍历获取表格中的数据
# for i in range(1, row_size + 1):
# for j in range(1, col_size + 1):
# print(sheet.Cells(i, j).Value, end=" ")
# print("")
# print("")
# 下一个图表的索引号
idx = idx + 1
print(idx - 1)
# 关闭文档和Word应用
doc.Close()
docApp.Quit()