# pip install pywin32 # https://blog.csdn.net/weixin_42927998/article/details/115086797 import win32com from win32com.client import Dispatch docApp = win32com.client.Dispatch('Word.Application') # 是不是打Word显示 docApp.Visible = False docApp.DisplayAlerts = 0 <<<<<<< HEAD doc = docApp.Documents.Open("c:/b.docx") # # # 遍历文档中的所有内嵌形状 idx = 1 for inline_shape in doc.InlineShapes: if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表 # 获取图表的标题,此项目中图表没有标题 shape = doc.InlineShapes(idx) sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1") # 下一个图表的索引号 idx = idx + 1 ======= working_dir = r"D:/dsWork/YunNanDsBase/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/" # doc = docApp.Documents.Open('c:/1.docx') # doc = docApp.Documents.Open('c:/昭通市人口变化及其对教育的影响20240416.docx') # doc = docApp.Documents.Open('c:/昆明市人口变化及其对教育的影响20240419.docx') doc = docApp.Documents.Open(working_dir+'红河哈尼族彝族自治州人口变化及其对教育的影响20240419.docx') # 遍历文档中所有的文字段落,判断是不是以 图+数字开头 idx = 1 for para in doc.Paragraphs: x = para.Range.Text.strip().replace("图 ", "图").replace(" ", " ") if x.startswith("图"): print(x) idx = idx + 1 # 遍历文档中的所有内嵌形状 idx = 1 for inline_shape in doc.InlineShapes: if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表 shape = doc.InlineShapes(idx) # 获取图表的标题,此项目中图表没有标题 # print(shape.Chart.ChartTitle.Text) sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1") # 行数 row_size = sheet.UsedRange.rows.Count # 列数 col_size = sheet.UsedRange.columns.Count # 遍历获取表格中的数据 for i in range(1, row_size + 1): for j in range(1, col_size + 1): print(sheet.Cells(i, j).Value, end=" ") print("") print("") # 下一个图表的索引号 idx = idx + 1 print(idx-1) >>>>>>> cfa48d4d9b83ad3f82bc55fd60f8926d113dfe09 # 关闭文档和Word应用 doc.Close() docApp.Quit()