main
黄海 9 months ago
parent e4e9f1a74c
commit e8a80fc81d

@ -7,18 +7,44 @@ docApp = win32com.client.Dispatch('Word.Application')
# 是不是打Word显示
docApp.Visible = False
docApp.DisplayAlerts = 0
working_dir = r"D:/dsWork/YunNanDsBase/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/"
doc = docApp.Documents.Open("c:/b.docx")
#
# # 遍历文档中的所有内嵌形状
# doc = docApp.Documents.Open('c:/1.docx')
# doc = docApp.Documents.Open('c:/昭通市人口变化及其对教育的影响20240416.docx')
# doc = docApp.Documents.Open('c:/昆明市人口变化及其对教育的影响20240419.docx')
doc = docApp.Documents.Open(working_dir+'红河哈尼族彝族自治州人口变化及其对教育的影响20240419.docx')
# 遍历文档中所有的文字段落,判断是不是以 图+数字开头
idx = 1
for para in doc.Paragraphs:
x = para.Range.Text.strip().replace("", "").replace(" ", " ")
if x.startswith(""):
print(x)
idx = idx + 1
# 遍历文档中的所有内嵌形状
idx = 1
for inline_shape in doc.InlineShapes:
if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表
# 获取图表的标题,此项目中图表没有标题
shape = doc.InlineShapes(idx)
# 获取图表的标题,此项目中图表没有标题
# print(shape.Chart.ChartTitle.Text)
sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1")
# 行数
row_size = sheet.UsedRange.rows.Count
# 列数
col_size = sheet.UsedRange.columns.Count
# 遍历获取表格中的数据
for i in range(1, row_size + 1):
for j in range(1, col_size + 1):
print(sheet.Cells(i, j).Value, end=" ")
print("")
print("")
# 下一个图表的索引号
idx = idx + 1
print(idx-1)
# 关闭文档和Word应用
doc.Close()

@ -26,6 +26,7 @@ def repairWord(docPath):
if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表
shape = doc.InlineShapes(idx)
sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1")
print(sheet.Name)
# 下一个图表的索引号
idx = idx + 1

@ -0,0 +1,65 @@
# pip install pywin32
# https://blog.csdn.net/weixin_42927998/article/details/115086797
import os
import win32com
from win32com.client import constants, Dispatch
from openpyxl import Workbook
if __name__ == '__main__':
# 文件路径
taskPath = r'c:/task.txt'
# 读取文件第一行是docx路径第二行是第几个图表
with open(taskPath, 'r', encoding='utf-8') as f:
docPath = f.readline().strip()
tuBiaoNum = int(f.readline().strip())
docApp = win32com.client.Dispatch('Word.Application')
# 是否显示Word文档
docApp.Visible = False
docApp.DisplayAlerts = 0
doc = docApp.Documents.Open(docPath)
# 初始化数据列表
data = []
# 遍历文档中的所有内嵌形状
idx = 1
for inline_shape in doc.InlineShapes:
if inline_shape.Type == constants.wdInlineShapeChart: # 检查是否为内嵌图表
if idx == tuBiaoNum:
shape = doc.InlineShapes(idx)
sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1")
# 行数
row_size = sheet.UsedRange.Rows.Count
# 列数
col_size = sheet.UsedRange.Columns.Count
# 读取数据
for i in range(1, row_size + 1):
row_data = []
for j in range(1, col_size + 1):
row_data.append(sheet.Cells(i, j).Value)
data.append(row_data)
break
# 下一个图表的索引号
idx = idx + 1
# 关闭文档和Word应用
doc.Close()
docApp.Quit()
# 创建一个新的工作簿
wb = Workbook()
# 选择默认的工作表
ws = wb.active
# 将数据写入工作表
for row in data:
ws.append(row)
# 保存工作簿到文件
wb.save("C:/task.xlsx")

@ -76,14 +76,22 @@ public class C9 {
int firstChartNumber = 1;
XSSFWorkbook workbook = charts.get(firstChartNumber).getWorkbook();
// if(cityName.contains("丽江")){
// System.out.println("丽江");
// }
List<List<String>> source1 = ExcelKit.readSheet(workbook, 6);//从2017年开始
System.out.println(source1);
//遍历source1
for (List<String> r : source1) {
Row outRow = outSheet.createRow(++rowIndex);
// 导出数据
//上级行政区划,行政区划,年份,总人口变化,总人口预测
int year = Integer.parseInt(r.getFirst());
double value = Double.parseDouble(r.get(1));
if (year <= 2023) {
ExcelKit.putData(outRow, new ArrayList<>(Arrays.asList(cityName, "云南省", r.getFirst(), String.format("%.2f", value), "")), dataStyle);

@ -141,6 +141,8 @@ public class ExcelKit {
int rowIndex = 0;
// 遍历工作表中的所有行
if (sheet == null) return array;
System.out.println("Sheet rows="+sheet.getPhysicalNumberOfRows());
for (Row row : sheet) {
rowIndex++;
if (rowIndex <= skipRowCount) continue;//跳过指定的行数

Loading…
Cancel
Save