diff --git a/.idea/libraries/aspose_words_24_10_jdk17.xml b/.idea/libraries/aspose_words_24_10_jdk17.xml new file mode 100644 index 00000000..e3602cd5 --- /dev/null +++ b/.idea/libraries/aspose_words_24_10_jdk17.xml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/临沧市人口变化及其对教育的影响20240419.docx b/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/临沧市人口变化及其对教育的影响20240419.docx new file mode 100644 index 00000000..47af4c39 Binary files /dev/null and b/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/临沧市人口变化及其对教育的影响20240419.docx differ diff --git a/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/丽江市人口变化及其对教育的影响20240418.docx b/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/丽江市人口变化及其对教育的影响20240418.docx new file mode 100644 index 00000000..360d818c Binary files /dev/null and b/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/丽江市人口变化及其对教育的影响20240418.docx differ diff --git a/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/怒江傈僳族自治州人口变化及其对教育的影响20240420.docx b/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/怒江傈僳族自治州人口变化及其对教育的影响20240420.docx new file mode 100644 index 00000000..c6c47871 Binary files /dev/null and b/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/怒江傈僳族自治州人口变化及其对教育的影响20240420.docx differ diff --git a/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/文山壮族苗族自治州人口变化及其对教育的影响20240424.docx b/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/文山壮族苗族自治州人口变化及其对教育的影响20240424.docx new file mode 100644 index 00000000..80d64cef Binary files /dev/null and b/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/文山壮族苗族自治州人口变化及其对教育的影响20240424.docx differ diff --git a/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/普洱市人口变化及其对教育的影响20240419.docx b/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/普洱市人口变化及其对教育的影响20240419.docx new file mode 100644 index 00000000..f695155a Binary files /dev/null and b/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/普洱市人口变化及其对教育的影响20240419.docx differ diff --git a/Doc/全省及州市县区人口与教育报告集20241023/2023年数据更新表/16州市数据更新表汇总20241021/(10.21)2023年红河州各县市人口与教育数据更新表(未完善).xls b/Doc/全省及州市县区人口与教育报告集20241023/2023年数据更新表/16州市数据更新表汇总20241021/(10.21)2023年红河州各县市人口与教育数据更新表(未完善).xls new file mode 100644 index 00000000..7a173ee7 Binary files /dev/null and b/Doc/全省及州市县区人口与教育报告集20241023/2023年数据更新表/16州市数据更新表汇总20241021/(10.21)2023年红河州各县市人口与教育数据更新表(未完善).xls differ diff --git a/Doc/待处理/市/【10】城镇&乡村人口变化及预测-双/城镇&乡村人口变化及预测-双.xlsx b/Doc/待处理/市/【10】城镇&乡村人口变化及预测-双/城镇&乡村人口变化及预测-双.xlsx new file mode 100644 index 00000000..25bc46aa Binary files /dev/null and b/Doc/待处理/市/【10】城镇&乡村人口变化及预测-双/城镇&乡村人口变化及预测-双.xlsx differ diff --git a/Doc/待处理/市/【10】城镇&乡村人口变化及预测-双/城镇&乡村人口变化及预测-双【成果】.xlsx b/Doc/待处理/市/【10】城镇&乡村人口变化及预测-双/城镇&乡村人口变化及预测-双【成果】.xlsx new file mode 100644 index 00000000..d4f93c92 Binary files /dev/null and b/Doc/待处理/市/【10】城镇&乡村人口变化及预测-双/城镇&乡村人口变化及预测-双【成果】.xlsx differ diff --git a/Doc/待处理/市/【11】教育资源配置发展预测/教育资源配置发展预测(人).xlsx b/Doc/待处理/市/【11】教育资源配置发展预测/教育资源配置发展预测(人).xlsx new file mode 100644 index 00000000..58266e80 Binary files /dev/null and b/Doc/待处理/市/【11】教育资源配置发展预测/教育资源配置发展预测(人).xlsx differ diff --git a/Doc/待处理/市/【11】教育资源配置发展预测/教育资源配置发展预测(人)【成果】.xlsx b/Doc/待处理/市/【11】教育资源配置发展预测/教育资源配置发展预测(人)【成果】.xlsx new file mode 100644 index 00000000..3d8b7493 Binary files /dev/null and b/Doc/待处理/市/【11】教育资源配置发展预测/教育资源配置发展预测(人)【成果】.xlsx differ diff --git a/Doc/待处理/市/【9】总人口变化及预测-双/总人口变化及预测-双【成果】.xlsx b/Doc/待处理/市/【9】总人口变化及预测-双/总人口变化及预测-双【成果】.xlsx new file mode 100644 index 00000000..30f84c15 Binary files /dev/null and b/Doc/待处理/市/【9】总人口变化及预测-双/总人口变化及预测-双【成果】.xlsx differ diff --git a/ExtendJar/aspose-words-21.6.0-jdk17.jar b/ExtendJar/aspose-words-21.6.0-jdk17.jar new file mode 100644 index 00000000..4f9163ba Binary files /dev/null and b/ExtendJar/aspose-words-21.6.0-jdk17.jar differ diff --git a/Py/.idea/.gitignore b/Py/.idea/.gitignore new file mode 100644 index 00000000..38a1d4cd --- /dev/null +++ b/Py/.idea/.gitignore @@ -0,0 +1,8 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/Py/.idea/Py.iml b/Py/.idea/Py.iml new file mode 100644 index 00000000..d81667ac --- /dev/null +++ b/Py/.idea/Py.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/Py/.idea/encodings.xml b/Py/.idea/encodings.xml new file mode 100644 index 00000000..345bb0bc --- /dev/null +++ b/Py/.idea/encodings.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Py/.idea/inspectionProfiles/Project_Default.xml b/Py/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 00000000..2432abe9 --- /dev/null +++ b/Py/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,14 @@ + + + + \ No newline at end of file diff --git a/Py/.idea/inspectionProfiles/profiles_settings.xml b/Py/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 00000000..105ce2da --- /dev/null +++ b/Py/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/Py/.idea/misc.xml b/Py/.idea/misc.xml new file mode 100644 index 00000000..80b3409a --- /dev/null +++ b/Py/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/Py/.idea/modules.xml b/Py/.idea/modules.xml new file mode 100644 index 00000000..5d26daad --- /dev/null +++ b/Py/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/Py/.idea/vcs.xml b/Py/.idea/vcs.xml new file mode 100644 index 00000000..2e3f6920 --- /dev/null +++ b/Py/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Py/RepairTuBiao.py b/Py/RepairTuBiao.py new file mode 100644 index 00000000..3d50d9d4 --- /dev/null +++ b/Py/RepairTuBiao.py @@ -0,0 +1,51 @@ +# pip install pywin32 +# https://blog.csdn.net/weixin_42927998/article/details/115086797 +import os + +import win32com +from win32com.client import Dispatch + +# 工作目录 +workingPath = r'D:\dsWork\YunNanDsBase\Doc\全省及州市县区人口与教育报告集20241023\16个州市报告2022\分析报告20240510' + + +# 修复Word文档 +# 经过反复测试发现,WORD文档中的图表,有些POI是无法正确读取的,本来是Sheet1,结果它不认识,说只有一个Sheet0,此时就无法正确读取数据了。 +# 而我通过python+win32com.client.Dispatch可以读取到,直接保存,就修复了这个BUG,真是太神奇了! +def repairWord(docPath): + docApp = win32com.client.Dispatch('Word.Application') + # 是不是打Word显示 + docApp.Visible = False + docApp.DisplayAlerts = 0 + + doc = docApp.Documents.Open(docPath) + # + # # 遍历文档中的所有内嵌形状 + idx = 1 + for inline_shape in doc.InlineShapes: + if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表 + shape = doc.InlineShapes(idx) + sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1") + # 下一个图表的索引号 + idx = idx + 1 + + # 关闭文档和Word应用 + doc.Close() + docApp.Quit() + + +if __name__ == '__main__': + # 1、修复两层扩展名.docx + for file in os.listdir(workingPath): + if file.endswith('.docx.docx'): + # 完整的路径名称 + docPath = os.path.join(workingPath, file) + print("文件名有误,已修复:" + docPath) + os.rename(docPath, docPath.replace('.docx.docx', '.docx')) + + # 2、修复图表异常问题 + for file in os.listdir(workingPath): + if file.endswith('.docx'): + # 开始修复文档 + repairWord(docPath) + print("修复完成") diff --git a/Py/Test/DoAreaDocx.py b/Py/Test/DoAreaDocx.py new file mode 100644 index 00000000..b581f6fe --- /dev/null +++ b/Py/Test/DoAreaDocx.py @@ -0,0 +1,150 @@ +import os +import time + +import win32com +from win32com.client import Dispatch +import re +import logging + +# pip install pywin32 openpyxl +# pip install pywin32 + +logging.basicConfig( + level=logging.DEBUG, # 设置日志级别 + filename='app.log', # 设置日志文件名 + filemode='w', # 文件模式,'w'表示写模式,每次运行都会覆盖旧文件;'a'表示追加模式 + format='%(name)s - %(levelname)s - %(message)s' # 设置日志格式 +) + +working_dir = r"D:\dsWork\YunNanDsBase\Doc\全省及州市县区人口与教育报告集20241023\133个县区报告2022\县区研究报告" +import openpyxl + +# 声明Word应用程序 +docApp = win32com.client.Dispatch('Word.Application') +docApp.Visible = True +docApp.DisplayAlerts = 0 + +# 有问题的县区列表 +errorArea = [] +# 读取ErrorArea.txt,将每一行的县区名称读入列表中 +with open('ErrorArea.txt', 'r', encoding='utf-8') as f: + for line in f: + # 去除每行前后的空白字符,包括空格、制表符和换行符 + line = line.strip() + # 将文本中的关键字替换为空字符串 + errorArea.append(line) + +# 在工作目录下创建Excel目录 +excel_dir = r'D:\dsWork\YunNanDsBase\Doc\全省及州市县区人口与教育报告集20241023\133个县区报告2022\Excel' +if not os.path.exists(excel_dir): + os.mkdir(excel_dir) +# 遍历working_dir目录下的所有子文件夹 +for root, dirs, files in os.walk(working_dir): + for dir in dirs: + # 获取县区名称 + county_name = dir + + # 获取县区文件夹路径 + county_dir = os.path.join(root, dir) + # 遍历县区文件夹下的所有文件 + for file in os.listdir(county_dir): + # 获取文件路径 + file_path = os.path.join(county_dir, file) + cityName = "" + # 判断文件是否是Word文档 + if file_path.endswith('.docx') and not file.startswith('~'): + cityName = file_path.replace(working_dir, '')[1:].split("各县")[0] + areaName = file + + areaName = re.sub(r'[^\u4e00-\u9fa5]', '', areaName) + if '市' not in areaName and '县' not in areaName and '区' not in areaName: + continue + # 打开文件文件,按行读取 + with open('replaceBlank.txt', 'r', encoding='utf-8') as f: + for line in f: + # 去除每行前后的空白字符,包括空格、制表符和换行符 + line = line.strip() + # 将文本中的关键字替换为空字符串 + areaName = areaName.replace(line, '') + # 打开文件文件,按行读取 + with open('replaceText.txt', 'r', encoding='utf-8') as f: + for line in f: + # 去除每行前后的空白字符,包括空格、制表符和换行符 + line = line.strip() + # 将文本中的关键字替换为空字符串 + areaName = areaName.replace(line.split(' ')[0], line.split(' ')[1]) + # 检查Excel目录下是不是存在这个城市的文件夹,如果不存在,则创建 + city_dir = os.path.join(excel_dir, cityName) + if not os.path.exists(city_dir): + os.mkdir(city_dir) + + # 在城市文件夹下,查看是不是存在县区的子文件夹,如果不存在则创建 + county_sub_dir = os.path.join(city_dir, areaName) + if not os.path.exists(county_sub_dir): + os.mkdir(county_sub_dir) + else: # 如果存在,就跳过 + # 查看一下这个文件夹下有多少个文件 + file_count = len([name for name in os.listdir(county_sub_dir)]) + if file_count > 10: + print(county_sub_dir + " 文件夹下有超过10个文件,跳过") + continue + # 跳过错误县区 + flag = False + for e in errorArea: + if e in areaName: + flag = True + if flag: + print(county_sub_dir + " 跳过") + continue + print("正在处理" + cityName + "-" + areaName + "...") + # 使用word读取图表的技术,保存EXCEL文件到城市的目录下 + # 休息3秒,防止WORD打开频繁造成错误 + time.sleep(3) + doc = docApp.Documents.Open(file_path) + # 遍历文档中所有的文字段落,判断是不是以 图+数字开头 + idx = 1 + # 图表的名称列表 + tb_list = [] + for para in doc.Paragraphs: + x = para.Range.Text.strip().replace("图 ", "图").replace(" ", " ") + if x.startswith("图"): + tb_list.append(x) + idx = idx + 1 + + # 遍历文档中的所有内嵌形状 + idx = 1 + for inline_shape in doc.InlineShapes: + if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: + shape = doc.InlineShapes(idx) + sheet = shape.Chart.ChartData.Workbook.Worksheets(1) + # 创建一个新的Excel工作簿 + wb = openpyxl.Workbook() + ws = wb.active + + # 遍历Excel工作表中的所有单元格,并将其写入新的工作簿 + for row in range(1, sheet.UsedRange.Rows.Count + 1): + for col in range(1, sheet.UsedRange.Columns.Count + 1): + cell_value = sheet.Cells(row, col).Value + ws.cell(row=row, column=col, value=cell_value) + # 保存新的Excel文件 + try: + original_string = tb_list[idx - 1] + # 使用正则表达式过滤,只保留中文、英文和数字 + original_string = original_string[1:] + if ' ' in original_string: + original_string = original_string.split(" ")[1] + filtered_string = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9]', '', original_string) + fileName = '【' + str(idx) + '】' + filtered_string + ".xlsx" + + wb.save(county_sub_dir + '/' + fileName) + except: + pass + wb.close() + print("保存文件:" + fileName) + # 下一个图表的索引号 + idx = idx + 1 + # 关闭文档和Word应用 + doc.Close() + print(f"县区处理完成:{cityName}{areaName}") +docApp.Quit() +print("恭喜,所有县区数据整理工作成功完成!") diff --git a/Py/Test/DoCityDocx.py b/Py/Test/DoCityDocx.py new file mode 100644 index 00000000..0b95129c --- /dev/null +++ b/Py/Test/DoCityDocx.py @@ -0,0 +1,84 @@ +import os +import win32com +from win32com.client import Dispatch +import re + +# pip install pywin32 openpyxl +# pip install pywin32 +working_dir = r"D:/dsWork/YunNanDsBase/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/" +import openpyxl + +# 在工作目录下创建Excel目录 +excel_dir = working_dir + 'Excel' +if not os.path.exists(excel_dir): + os.mkdir(excel_dir) + +# 关键词 +keyword = '人口变化及其对教育的影响' + +# 是不是打Word显示 +docApp = win32com.client.Dispatch('Word.Application') +# 是不是打Word显示 +docApp.Visible = False +docApp.DisplayAlerts = 0 + +# 遍历工作目录下所有的docx文件,将文件名用keyword进行分隔,前一半是州市名称,后一半是上报的时间,我们取前一半的州市名称 +for file in os.listdir(working_dir): + if file.endswith('.docx') and not file.startswith('~'): + file_name = file.split('.')[0] + # 判断一下file_name中是不是存在keyword,如果不存在,则输出错误,并结束程序 + if keyword not in file_name: + print('Error: ' + file_name + ' 文件名称中并不包含:' + keyword) + exit() + # 确认包含后,提取出前半部分作为城市名称 + city_name = file_name.split(keyword)[0] + # 在excel_dir目录下创建这个城市的子目录,准备将生成的excel文件放在这个子目录下 + city_dir = excel_dir + '/' + city_name + if not os.path.exists(city_dir): + os.mkdir(city_dir) + # 将当前docx进行读取其中的每一个段落,要求以 "图"+数字开头,这是图例的意思 + doc_path = working_dir + '/' + file + # print(doc_path) + doc = docApp.Documents.Open(doc_path) + # 遍历文档中所有的文字段落,判断是不是以 图+数字开头 + idx = 1 + # 图表的名称列表 + tb_list = [] + for para in doc.Paragraphs: + x = para.Range.Text.strip().replace("图 ", "图").replace(" ", " ") + if x.startswith("图"): + tb_list.append(x) + idx = idx + 1 + + # 遍历文档中的所有内嵌形状 + idx = 1 + for inline_shape in doc.InlineShapes: + if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表 + shape = doc.InlineShapes(idx) + sheet = shape.Chart.ChartData.Workbook.Worksheets(1) + # 创建一个新的Excel工作簿 + wb = openpyxl.Workbook() + ws = wb.active + + # 遍历Excel工作表中的所有单元格,并将其写入新的工作簿 + for row in range(1, sheet.UsedRange.Rows.Count + 1): + for col in range(1, sheet.UsedRange.Columns.Count + 1): + cell_value = sheet.Cells(row, col).Value + ws.cell(row=row, column=col, value=cell_value) + # 保存新的Excel文件 + original_string = tb_list[idx - 1] + # 使用正则表达式过滤,只保留中文、英文和数字 + original_string = original_string[1:] + if ' ' in original_string: + original_string = original_string.split(" ")[1] + filtered_string = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9]', '', original_string) + fileName = '【' + str(idx) + '】' + filtered_string + ".xlsx" + wb.save(city_dir + '/' + fileName) + print("保存文件:" + fileName) + # 下一个图表的索引号 + idx = idx + 1 + # print(idx - 1) + # 关闭文档和Word应用 + doc.Close() +docApp.Quit() +print("恭喜,所有市州数据整理工作成功完成!") diff --git a/Py/Test/DocxTuBiaoAdd.py b/Py/Test/DocxTuBiaoAdd.py new file mode 100644 index 00000000..5990f959 --- /dev/null +++ b/Py/Test/DocxTuBiaoAdd.py @@ -0,0 +1,61 @@ +# pip install pywin32 +# https://blog.csdn.net/weixin_42927998/article/details/115086797 +import win32com +from win32com.client import Dispatch + +docApp = win32com.client.Dispatch('Word.Application') +docApp.Visible = True +docApp.DisplayAlerts = 0 +doc = docApp.Documents.Open('c:/1.docx') + +# 创建图表,图表的插入位置为预先在word文档中插入的书签,书签名为“插入图表位置” +shape_chart = doc.Shapes.AddChart2(Style=201, Type=51, Top=doc.Bookmarks("插入图表位置").Select()) +shape_chart.WrapFormat.Type = 7 # 设置图表为嵌入型 + +# 设置Word中的图表 +chart = shape_chart.Chart +# 图表数据对应的工作表 +worksheet = chart.ChartData.Workbook.Worksheets(1) +chart.SetSourceData("Sheet1!$A$1:$C$4") # 设置数据源范围 + +# 簇状柱形图测试数据 +chart_data = [["", "系列A", "系列B", "系列C", "系列D"], + [2020, 2, 4, 2, 3], + [2019, 4, 5, 3, 2]] + +# 清空工作表默认数据 +worksheet.Range("A1:D5").value = None + +# 填入测试数据 +for row_index, row in enumerate(chart_data): + for column_index, value in enumerate(row): + worksheet.Cells(row_index + 1, column_index + 1).Value = value + +chart.SetSourceData("Sheet1!$A$1:$E$3") # 设置数据源范围 + +# 设置图表样式示例 +chart.ChartTitle.Text = '测试标题' # 设置标题 +chart.FullSeriesCollection(2).Format.Fill.ForeColor.ObjectThemeColor = 10 # 设置系列2的填充颜色 + +chart.ChartData.Workbook.Close() # 关闭workbook窗口 + +doc.Save() +doc.Close() +docApp.Quit() + + +''' +Type +1:柱形图(Column) +2:折线图(Line) +3:饼图(Pie) +51:堆叠柱形图(Stacked Column) +52:堆叠线图(Stacked Line) +53:堆叠区域图(Stacked Area) +55:雷达图(Radar) +65:树状图(Treemap) +73:旭日图(Sunburst) +77:水桶图(Funnel) +109:散点图(Scatter) +183:气泡图(Bubble) +''' \ No newline at end of file diff --git a/Py/Test/DocxTuBiaoRead.py b/Py/Test/DocxTuBiaoRead.py new file mode 100644 index 00000000..e7359dab --- /dev/null +++ b/Py/Test/DocxTuBiaoRead.py @@ -0,0 +1,25 @@ +# pip install pywin32 +# https://blog.csdn.net/weixin_42927998/article/details/115086797 +import win32com +from win32com.client import Dispatch + +docApp = win32com.client.Dispatch('Word.Application') +# 是不是打Word显示 +docApp.Visible = False +docApp.DisplayAlerts = 0 + +doc = docApp.Documents.Open("c:/b.docx") +# +# # 遍历文档中的所有内嵌形状 +idx = 1 +for inline_shape in doc.InlineShapes: + if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表 + # 获取图表的标题,此项目中图表没有标题 + shape = doc.InlineShapes(idx) + sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1") + # 下一个图表的索引号 + idx = idx + 1 + +# 关闭文档和Word应用 +doc.Close() +docApp.Quit() diff --git a/Py/Test/ErrorArea.txt b/Py/Test/ErrorArea.txt new file mode 100644 index 00000000..cf334611 --- /dev/null +++ b/Py/Test/ErrorArea.txt @@ -0,0 +1,12 @@ +寻甸县 +禄劝县 +嵩明县 +富民县 +宁蒗县 +永胜县 +洱源县 +文山市 +西山区 +昭阳区 +鲁甸县 +宁洱县 \ No newline at end of file diff --git a/Py/Test/YunNan.py b/Py/Test/YunNan.py new file mode 100644 index 00000000..96af6f67 --- /dev/null +++ b/Py/Test/YunNan.py @@ -0,0 +1,68 @@ +# pip install pymysql +# pip install requests beautifulsoup4 + +# 查看结果 +# select * from t_dm_area where province_id='FD61813E-70A1-42AB-9A8E-141ED4D47B98' order by level_id; + +import time + +import pymysql +import requests +from bs4 import BeautifulSoup +import re + +if __name__ == '__main__': + # 遍历 mysql数据库,然后开启爬虫 + # 建立数据库连接 + conn = pymysql.connect( + host='10.10.14.203', # 主机名(或IP地址) + port=3306, # 端口号,默认为3306 + user='root', # 用户名 + password='Password123@mysql', # 密码 + charset='utf8mb4' # 设置字符编码 + ) + + # 创建游标对象 + cursor = conn.cursor() + # 选择数据库 + conn.select_db("ds_db") + # 执行查询操作 + cursor.execute( + "SELECT id,full_name FROM t_dm_area where province_id='FD61813E-70A1-42AB-9A8E-141ED4D47B98' order by level_id") + + # 获取查询结果,返回元组 + result: tuple = cursor.fetchall() + + for e in result: + id = e[0] + area_name = e[1] + url = "https://baike.baidu.com/item/" + area_name + "?fromModule=lemma_search-box" + + print(url) + # 发送HTTP GET请求 + response = requests.get(url) + # 检查请求是否成功 + if response.status_code == 200: + # 使用BeautifulSoup解析HTML内容 + soup = BeautifulSoup(response.text, 'html.parser') + # 假设我们要抓取的是

标签中的文字 + # 你可以根据需要修改选择器来抓取不同的内容 + specific_divs = soup.select('div.para_YYuCh.summary_nfAdr.MARK_MODULE') + # 遍历找到的所有特定div标签,并打印它们的文本内容 + for div in specific_divs: + text = div.get_text(strip=True) # 使用get_text()方法获取文本,并去除 + # 使用正则表达式移除所有形如[数字]和[数字-数字]的字符串 + cleaned_text = re.sub(r'\[\d+(?:-\d+)?\]', '', text) + sql = "update t_dm_area set memo=%s where id=%s" + cursor.execute(sql, (cleaned_text, id)) + conn.commit() + print("更新"+area_name+"数据成功") + break + else: + print('Failed to retrieve the webpage') + + time.sleep(2) + # 关闭游标和连接 + cursor.close() + conn.close() + print("结束") \ No newline at end of file diff --git a/Py/Test/replaceBlank.txt b/Py/Test/replaceBlank.txt new file mode 100644 index 00000000..9d706bec --- /dev/null +++ b/Py/Test/replaceBlank.txt @@ -0,0 +1,33 @@ +人口变化及其对教育的影响 +辖区人口变化趋势对基础教育的影响 +样稿 +市教育数据统计 +区报告 +人口变化趋势对基础教育的影响修改 +人口变化趋势对基础教育的影响审稿 +人口变化趋势对基础教育的影响陈副改终稿 +县区最终版 +人口变化趋势对基础教育的影响 +人口变化及其对基础教育影响的报告 +修改终稿 +人口变化及其对教育影响的报告 +正确 +附件 +定稿 +省级课题 +人口变化及其对基础教育的影响报告 +县区 +人口变化对教育的影响 +报告 +研究报告 +文本 +修改稿 +已审核 +已经审核 +报告 +总人口数常住人口数统计局提供 +初稿 +人口变化及其对教育影响的研究 +人口变化趋势对基础教育影响的研究 +研究 +的 \ No newline at end of file diff --git a/Py/Test/replaceText.txt b/Py/Test/replaceText.txt new file mode 100644 index 00000000..43de5f36 --- /dev/null +++ b/Py/Test/replaceText.txt @@ -0,0 +1,11 @@ +县县 县 +曲靖市马龙区 马龙区 +曲靖市麒麟区 麒麟区 +曲靖市沾益区 沾益区 +江城江城县 江城县 +墨江 墨江县 +盐津 盐津县 +盈江 盈江县 +芒市 芒市县 +宾川 宾川县 +镇康 镇康县 \ No newline at end of file diff --git a/src/main/java/com/dsideal/base/Tools/FillData/City/C10.java b/src/main/java/com/dsideal/base/Tools/FillData/City/C10.java new file mode 100644 index 00000000..95e55b22 --- /dev/null +++ b/src/main/java/com/dsideal/base/Tools/FillData/City/C10.java @@ -0,0 +1,124 @@ +package com.dsideal.base.Tools.FillData.City; + +import cn.hutool.core.io.FileUtil; +import com.dsideal.base.Tools.FillData.ExcelKit.ExcelKit; +import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil; +import com.dsideal.base.Tools.Util.ReadDocxUtil; +import com.jfinal.kit.StrKit; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.util.ZipSecureFile; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.xssf.usermodel.XSSFCellStyle; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.apache.poi.xwpf.usermodel.XWPFChart; +import org.apache.poi.xwpf.usermodel.XWPFDocument; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class C10 { + //开始读取市州word文档 + static String parentPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\16个州市报告2022\\分析报告20240510"; + + //示例Excel + static String sampleExcelPath = "D:\\dsWork\\YunNanDsBase\\Doc\\待处理\\市\\【10】城镇&乡村人口变化及预测-双\\城镇&乡村人口变化及预测-双.xlsx"; + + public static void main(String[] args) throws IOException, InvalidFormatException { + //初始化数据库连接 + LocalMysqlConnectUtil.Init(); + //实例化 + ReadDocxUtil ru = new ReadDocxUtil(); + + //目标Excel,就是把文件名解析出来后,后面添加上【成果】,需要动态计算获取,不能写死 + String excelPath = sampleExcelPath.replace(".xlsx", "【成果】.xlsx"); + ExcelKit.delExcel(excelPath); + + //结果Excel + XSSFWorkbook outWorkbook = new XSSFWorkbook(); + //结果Sheet + XSSFSheet outSheet = ExcelKit.CreateSheet(outWorkbook); + //样式 + XSSFCellStyle headerStyle = ExcelKit.getHeaderStyle(outWorkbook); + XSSFCellStyle dataStyle = ExcelKit.getDataStyle(outWorkbook); + + //拷贝文件头 + ExcelKit.CopyHead(sampleExcelPath, outSheet, headerStyle); + + //找到parentPath下一级目录中所有文件 + List files = FileUtil.loopFiles(parentPath, file -> true); + int rowIndex = 0; + //处理这个目录 + if (files != null) { + for (File file : files) { + //判断file是不是目录,是目录的需要跳过 + if (file.isDirectory()) continue; + //城市名称 + String cityName = ru.getCityOrAreaName(file.getName()); + String fileName = file.getName(); + + //判断是否为docx文件 + if (fileName.endsWith(".docx") && !fileName.startsWith("~")) { + System.out.println("正在处理" + cityName + "市州文件:" + fileName); + //读取文件 + String inputUrl = file.getAbsolutePath(); + InputStream is = new FileInputStream(inputUrl); + ZipSecureFile.setMinInflateRatio(-1.0d); + XWPFDocument doc = new XWPFDocument(is); + //排序后的图表 + List charts = ExcelKit.getSortListForXWPFChart(doc.getCharts()); + + //数据在图表2,图3 + int firstChartNumber = 2, secondChartNumber = 3; + + XSSFWorkbook workbook = charts.get(firstChartNumber - 1).getWorkbook(); + List> source1 = ExcelKit.readSheet(workbook, 6);//从2017年开始 + + workbook = charts.get(secondChartNumber - 1).getWorkbook(); + List> source3 = ExcelKit.readSheet(workbook, 2);//从2023年开始 + + //遍历source1 + for (List r : source1) { + // 导出数据 + //上级行政区划,行政区划,年份,城镇人口变化,城镇人口预测,乡村人口变化,乡村人口预测 + int year = Integer.parseInt(r.getFirst()); + //城镇 + double cvalue = Double.parseDouble(r.get(1)); + //乡村 + double xvalue = Double.parseDouble(r.get(2)); + if (year < 2023) { + Row outRow = outSheet.createRow(++rowIndex); + ExcelKit.putData(outRow, new ArrayList<>(Arrays.asList("云南省", cityName, r.getFirst(), String.format("%.2f", cvalue), "", String.format("%.2f", xvalue), "")), dataStyle); + } + } + + //遍历source3 + for (List r : source3) { + // 导出数据 + //上级行政区划,行政区划,年份,城镇人口变化,城镇人口预测,乡村人口变化,乡村人口预测 + int year = Integer.parseInt(r.getFirst()); + //城镇 + double cvalue = 0; + if (!StrKit.isBlank(r.get(1))) cvalue = Double.parseDouble(r.get(1)); + //乡村 + double xvalue = 0; + if (!StrKit.isBlank(r.get(2))) xvalue = Double.parseDouble(r.get(2)); + + if (year >= 2023) { + Row outRow = outSheet.createRow(++rowIndex); + ExcelKit.putData(outRow, new ArrayList<>(Arrays.asList("云南省", cityName, r.getFirst(), "", String.format("%.2f", cvalue), "", String.format("%.2f", xvalue))), dataStyle); + } + } + } + } + } + //保存文件 + ExcelKit.saveExcel(excelPath, outWorkbook); + System.out.println("市州所有文件处理完成!"); + } +} diff --git a/src/main/java/com/dsideal/base/Tools/FillData/City/C11.java b/src/main/java/com/dsideal/base/Tools/FillData/City/C11.java new file mode 100644 index 00000000..2279379e --- /dev/null +++ b/src/main/java/com/dsideal/base/Tools/FillData/City/C11.java @@ -0,0 +1,131 @@ +package com.dsideal.base.Tools.FillData.City; + +import cn.hutool.core.io.FileUtil; +import com.dsideal.base.Tools.FillData.ExcelKit.ExcelKit; +import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil; +import com.dsideal.base.Tools.Util.ReadDocxUtil; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.util.ZipSecureFile; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.xssf.usermodel.XSSFCellStyle; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.apache.poi.xwpf.usermodel.XWPFChart; +import org.apache.poi.xwpf.usermodel.XWPFDocument; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class C11 { + //开始读取市州word文档 + static String parentPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\16个州市报告2022\\分析报告20240510"; + + //示例Excel + static String sampleExcelPath = "D:\\dsWork\\YunNanDsBase\\Doc\\待处理\\市\\【11】教育资源配置发展预测\\教育资源配置发展预测(人).xlsx"; + + public static void main(String[] args) throws IOException, InvalidFormatException { + //初始化数据库连接 + LocalMysqlConnectUtil.Init(); + //实例化 + ReadDocxUtil ru = new ReadDocxUtil(); + + //目标Excel,就是把文件名解析出来后,后面添加上【成果】,需要动态计算获取,不能写死 + String excelPath = sampleExcelPath.replace(".xlsx", "【成果】.xlsx"); + ExcelKit.delExcel(excelPath); + + //结果Excel + XSSFWorkbook outWorkbook = new XSSFWorkbook(); + //结果Sheet + XSSFSheet outSheet = ExcelKit.CreateSheet(outWorkbook); + //样式 + XSSFCellStyle headerStyle = ExcelKit.getHeaderStyle(outWorkbook); + XSSFCellStyle dataStyle = ExcelKit.getDataStyle(outWorkbook); + + //拷贝文件头 + ExcelKit.CopyHead(sampleExcelPath, outSheet, headerStyle); + + //找到parentPath下一级目录中所有文件 + List files = FileUtil.loopFiles(parentPath, file -> true); + int rowIndex = 0; + //处理这个目录 + if (files != null) { + for (File file : files) { + //判断file是不是目录,是目录的需要跳过 + if (file.isDirectory()) continue; + //城市名称 + String cityName = ru.getCityOrAreaName(file.getName()); + String fileName = file.getName(); + + //判断是否为docx文件 + if (fileName.endsWith(".docx") && !fileName.startsWith("~")) { + System.out.println("正在处理" + cityName + "市州文件..."); + //读取文件 + String inputUrl = file.getAbsolutePath(); + InputStream is = new FileInputStream(inputUrl); + ZipSecureFile.setMinInflateRatio(-1.0d); + XWPFDocument doc = new XWPFDocument(is); + //排序后的图表 + List charts = ExcelKit.getSortListForXWPFChart(doc.getCharts()); + + //数据在图表36,教职工总量 + int firstChartNumber = 36, secondChartNumber = 37; + + //占地面积37 + XSSFWorkbook workbook = charts.get(firstChartNumber - 1).getWorkbook(); + List> source1 = ExcelKit.readSheet(workbook, 1); + //年份,学前,小学,初中,高中 + // 0 1 2 3 4 + //所有相关2022基数的列都使用2022的数据 + //学前教职工2022基数 小学教职工2022基数 初中教职工2022基数 高中用房2022年基数 + //取出第一条数据,即2022年数据 + List firRow = source1.getFirst(); + String xqjzg = firRow.get(1);//学前教职工2022基数 + String xxjzg = firRow.get(2);//小学教职工2022基数 + String zxjzg = firRow.get(3);//初中教职工2022基数 + String gxjzg = firRow.get(4);//高中教职工2022基数 + + + workbook = charts.get(secondChartNumber - 1).getWorkbook(); + List> source2 = ExcelKit.readSheet(workbook, 1); + //年份,学前,小学,初中,高中 + // 0 1 2 3 4 + List secRow = source2.getFirst(); + String xqjzgMJ = secRow.get(1);//学前面积2023年基数 + String xxjzgMJ = secRow.get(2);//小学面积2023年基数 + String zxjzgMJ = secRow.get(3);//初中面积2023年基数 + String gxjzgMJ = secRow.get(4);//高中面积2023年基数 + + + //遍历source1 + for (int i = 0; i < source1.size(); i++) { + List r1 = source1.get(i); + List r2 = source2.get(i); + Row outRow = outSheet.createRow(++rowIndex); + ExcelKit.putData(outRow, new ArrayList<>( + Arrays.asList(cityName, r1.getFirst(), + r1.get(1), xqjzg, String.format("%.2f",Double.parseDouble(xqjzg) - Double.parseDouble(r1.get(1))),//学前教职工数,2022基数减去当前值,得到预测值 + r2.get(1), xqjzgMJ, String.format("%.2f",Double.parseDouble(xqjzgMJ) - Double.parseDouble(r2.get(1))),//学前占地面积,2023基数减去当前值,得到预测值 + + r1.get(2), xxjzg, String.format("%.2f",Double.parseDouble(xxjzg) - Double.parseDouble(r1.get(2))),//小学教职工数,2022基数减去当前值,得到预测值 + r2.get(2), xxjzgMJ, String.format("%.2f",Double.parseDouble(xxjzgMJ) - Double.parseDouble(r2.get(2))),//小学占地面积,2023基数减去当前值,得到预测值 + + r1.get(3), zxjzg, String.format("%.2f",Double.parseDouble(zxjzg) - Double.parseDouble(r1.get(3))),//初中教职工数,2022基数减去当前值,得到预测值 + r2.get(3), zxjzgMJ, String.format("%.2f",Double.parseDouble(zxjzgMJ) - Double.parseDouble(r2.get(3))),//初中占地面积,2023基数减去当前值,得到预测值 + + r1.get(4), gxjzg, String.format("%.2f",Double.parseDouble(gxjzg) - Double.parseDouble(r1.get(4))),//高中教职工数,2022基数减去当前值,得到预测值 + r2.get(4), gxjzgMJ, String.format("%.2f",Double.parseDouble(gxjzgMJ) - Double.parseDouble(r2.get(4))),//高中占地面积,2023基数减去当前值,得到预测值 + "", "", "", "", "", "", "云南省")), dataStyle); + } + } + } + } + //保存文件 + ExcelKit.saveExcel(excelPath, outWorkbook); + System.out.println("市州所有文件处理完成!"); + } +} diff --git a/src/main/java/com/dsideal/base/Tools/FillData/ExcelKit/ExcelCoreUtil.java b/src/main/java/com/dsideal/base/Tools/FillData/ExcelKit/ExcelCoreUtil.java new file mode 100644 index 00000000..a23701ba --- /dev/null +++ b/src/main/java/com/dsideal/base/Tools/FillData/ExcelKit/ExcelCoreUtil.java @@ -0,0 +1,239 @@ +package com.dsideal.base.Tools.FillData.ExcelKit; + +import org.apache.poi.hssf.usermodel.HSSFSheet; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.ss.usermodel.*; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class ExcelCoreUtil { + /** + * 将xls的转换为xlsx,数据样式和格式一同转换,并输出为文件流 + * + * @param inputFilePath 待转换Excel + * @param outputFilePath 转换后的Excel存储路径 + * @throws IOException + */ + public static void xlsChangeXlsx(String inputFilePath, String outputFilePath) throws IOException { + + // 使用 try-with-resources 自动管理资源 + try (FileInputStream fis = new FileInputStream(inputFilePath)) { + + // 获得xls模板 + HSSFWorkbook wb = new HSSFWorkbook(fis); + XSSFWorkbook swb = new XSSFWorkbook(); + for (int i = 0; i < wb.getNumberOfSheets(); ++i) { + HSSFSheet sheet = wb.getSheetAt(i); + + // 复制sheet,合并栏和冻结窗格之类 + Sheet sheet1 = copySheet(wb, swb, i); + // 写入xls模板 + ExcelCoreUtil builder = new ExcelCoreUtil(wb); + int rowNum = sheet.getLastRowNum(); + // 复制单元格值与样式 + builder.copyRows(swb, sheet, sheet1, 0, rowNum + 1, 0); + } + + + try (FileOutputStream fos = new FileOutputStream(outputFilePath)) { + swb.write(fos); + fos.flush(); + System.out.println("转换成功,文件已保存至:" + outputFilePath); + } + } + } + + + private Workbook template = null; + private final Map fonts = new HashMap<>(); + private final Map styles = new HashMap<>(); + + public ExcelCoreUtil(Workbook template) { + this.template = template; + } + + public void switchStyles(Workbook dstWorkbook, CellStyle[] styles) { + for (int i = 0; i < styles.length; i++) { + styles[i] = getStyle(dstWorkbook, styles[i]); + } + } + + private Font getFont(Workbook dstWorkbook, Font font) { + return fonts.computeIfAbsent(font.hashCode(), k -> cloneFont(dstWorkbook, font)); + } + + private CellStyle getStyle(Workbook dstWorkbook, CellStyle style) { + Font font = getFont(dstWorkbook, template.getFontAt(style.getFontIndexAsInt())); + return styles.computeIfAbsent(style.hashCode(), k -> cloneStyle(dstWorkbook, style, dstWorkbook.createDataFormat(), font)); + } + + public void copyRows(Workbook dstWorkbook, Sheet srcSheet, Sheet dstSheet, int from, int to, int offset) { + for (int r = from; r < to; r++) { + Row srcRow = srcSheet.getRow(r); + if (srcRow != null) { + CellStyle style = srcRow.getRowStyle(); + Row dstRow = dstSheet.createRow(r + offset); + dstRow.setHeight(srcRow.getHeight()); + if (style != null) { + dstRow.setRowStyle(getStyle(dstWorkbook, style)); + } + for (int c = 0; c < srcRow.getLastCellNum(); c++) { + Cell srcCell = srcRow.getCell(c); + if (srcCell != null) { + CellType type = getCellType(srcCell); + Object value = getCellValue(srcCell); + style = srcCell.getCellStyle(); + Cell newCell = dstRow.createCell(c, type); + setCellValue(newCell, value, type); + newCell.setCellStyle(getStyle(dstWorkbook, style)); + } + } + } + } + } + + public static Sheet copySheet(Workbook srcWorkbook, Workbook dstWorkbook, int sheetIndex) { + Sheet srcSheet = srcWorkbook.getSheetAt(sheetIndex); + Sheet dstSheet = dstWorkbook.createSheet(srcSheet.getSheetName()); + dstSheet.setDisplayFormulas(srcSheet.isDisplayFormulas()); + dstSheet.setDisplayGridlines(srcSheet.isDisplayGridlines()); + dstSheet.setDisplayGuts(srcSheet.getDisplayGuts()); + dstSheet.setDisplayRowColHeadings(srcSheet.isDisplayRowColHeadings()); + dstSheet.setDisplayZeros(srcSheet.isDisplayZeros()); + dstSheet.setFitToPage(srcSheet.getFitToPage()); + dstSheet.setForceFormulaRecalculation(srcSheet.getForceFormulaRecalculation()); + dstSheet.setHorizontallyCenter(srcSheet.getHorizontallyCenter()); + dstSheet.setMargin(Sheet.BottomMargin, srcSheet.getMargin(Sheet.BottomMargin)); + dstSheet.setMargin(Sheet.FooterMargin, srcSheet.getMargin(Sheet.FooterMargin)); + dstSheet.setMargin(Sheet.HeaderMargin, srcSheet.getMargin(Sheet.HeaderMargin)); + dstSheet.setMargin(Sheet.LeftMargin, srcSheet.getMargin(Sheet.LeftMargin)); + dstSheet.setMargin(Sheet.RightMargin, srcSheet.getMargin(Sheet.RightMargin)); + dstSheet.setMargin(Sheet.TopMargin, srcSheet.getMargin(Sheet.TopMargin)); + dstSheet.setPrintGridlines(srcSheet.isPrintGridlines()); + dstSheet.setRightToLeft(srcSheet.isRightToLeft()); + dstSheet.setRowSumsBelow(srcSheet.getRowSumsBelow()); + dstSheet.setRowSumsRight(srcSheet.getRowSumsRight()); + dstSheet.setVerticallyCenter(srcSheet.getVerticallyCenter()); + for (int i = 0; i < 20; i++) { + dstSheet.setColumnWidth(i, srcSheet.getColumnWidth(i)); + dstSheet.setColumnHidden(i, srcSheet.isColumnHidden(i)); + } + srcSheet.getMergedRegions().forEach(dstSheet::addMergedRegion); + Drawing d1 = srcSheet.getDrawingPatriarch(); + if (d1 != null) { + Drawing d2 = dstSheet.getDrawingPatriarch(); + if (d2 == null) { + d2 = dstSheet.createDrawingPatriarch(); + } + for (Shape shape : d1) { + if (shape instanceof Picture) { + Picture p = (Picture) shape; + ClientAnchor a1 = p.getClientAnchor(); + int pictureId = dstWorkbook.addPicture(p.getPictureData().getData(), p.getPictureData().getPictureType()); + ClientAnchor a2 = d2.createAnchor(a1.getDx1(), a1.getDy1(), a1.getDx2(), a1.getDy2(), a1.getCol1(), a1.getRow1(), a1.getCol2(), a1.getRow2()); + d2.createPicture(a2, pictureId); + } + } + } + return dstSheet; + } + + public static Font cloneFont(Workbook dstWorkbook, Font font) { + Font clone = dstWorkbook.createFont(); + clone.setBold(font.getBold()); + clone.setCharSet(font.getCharSet()); + clone.setColor(font.getColor()); + clone.setFontHeight(font.getFontHeight()); + clone.setFontName(font.getFontName()); + clone.setItalic(font.getItalic()); + clone.setStrikeout(font.getStrikeout()); + clone.setTypeOffset(font.getTypeOffset()); + clone.setUnderline(font.getUnderline()); + return clone; + } + + public static CellStyle cloneStyle(Workbook dstWorkbook, CellStyle style, DataFormat formatter, Font font) { + CellStyle clone = dstWorkbook.createCellStyle(); + clone.setAlignment(style.getAlignment()); + clone.setBorderBottom(style.getBorderBottom()); + clone.setBorderLeft(style.getBorderLeft()); + clone.setBorderRight(style.getBorderRight()); + clone.setBorderTop(style.getBorderTop()); + + // 复制数据格式 + String formatString = style.getDataFormatString(); + DataFormat targetDataFormat = dstWorkbook.createDataFormat(); + short targetFormatIndex = targetDataFormat.getFormat(formatString); + clone.setDataFormat(targetFormatIndex); + + clone.setDataFormat(formatter.getFormat(style.getDataFormatString())); + clone.setFillBackgroundColor(style.getFillBackgroundColor()); + clone.setFillForegroundColor(style.getFillForegroundColor()); + clone.setFillPattern(style.getFillPattern()); + clone.setFont(font); + clone.setHidden(style.getHidden()); + clone.setIndention(style.getIndention()); + clone.setLocked(style.getLocked()); + clone.setVerticalAlignment(style.getVerticalAlignment()); + clone.setWrapText(style.getWrapText()); + return clone; + } + + protected static CellType getCellType(Cell cell) { + CellType cellType = cell.getCellType(); + if (cellType == CellType.FORMULA) { + cellType = cell.getSheet().getWorkbook().getCreationHelper().createFormulaEvaluator() + .evaluateFormulaCell(cell); + } + return cellType; + } + + protected static Object getCellValue(Cell cell) { + switch (getCellType(cell)) { + case BLANK: + case STRING: + return cell.getStringCellValue(); + case BOOLEAN: + return cell.getBooleanCellValue(); + case ERROR: + return cell.getErrorCellValue(); + case NUMERIC: + return cell.getNumericCellValue(); + } + return null; + } + + protected static void setCellValue(Cell cell, Object value, CellType type) { + switch (type) { + case BLANK: + return; + case STRING: + cell.setCellValue((String) value); + return; + case BOOLEAN: + cell.setCellValue((Boolean) value); + return; + case ERROR: + cell.setCellErrorValue((Byte) value); + return; + case NUMERIC: + if (value instanceof Double) { + Double d = (Double) value; + // 判断是否为整数,如果是整数,转换为 long,否则保留小数 + if (d == Math.floor(d)) { + cell.setCellValue(String.valueOf(d.longValue())); // 整数时去掉 .0 + } else { + cell.setCellValue(d); // 保留小数 + } + } + return; + default: + break; + } + } +}