diff --git a/Config/BaseConfig.py b/Config/BaseConfig.py new file mode 100644 index 0000000..159586d --- /dev/null +++ b/Config/BaseConfig.py @@ -0,0 +1,40 @@ +import os +from datetime import datetime + +class BaseConfig: + def __init__(self): + # 基础路径配置 + self.root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + self.data_dir = os.path.join(self.root_dir, 'Data') + self.excel_path = os.path.join(self.root_dir, 'Doc', '数据库-2015-2024-v2.xlsx') + self.json_output_suffix = '.json' + self.start_row = 5 # 有效数据起始行 + + # 年份范围配置 + self.years = [str(year) for year in range(2015, 2025)] + + # 教育阶段通用配置 + self.education_stages = { + 'preschool': '学前教育', + 'primary': '小学教育', + 'junior': '初中教育', + 'senior': '高中教育', + 'vocational': '中职教育' + } + + def get_output_path(self, filename): + """获取JSON输出路径""" + return os.path.join(self.data_dir, f'{filename}{self.json_output_suffix}') + + def get_log_path(self): + """获取日志文件路径""" + log_dir = os.path.join(self.root_dir, 'Log') + os.makedirs(log_dir, exist_ok=True) + return os.path.join(log_dir, f'{datetime.now().strftime("%Y%m%d")}.log') + + # 工作表名称配置 + SHEET_NAMES = { + 'population': '人口', # 修改为实际名称 + 'enrollment_rate': '毛入学率', # 其他工作表名称 + 'school_count': '学校数' + } \ No newline at end of file diff --git a/Config/__pycache__/BaseConfig.cpython-310.pyc b/Config/__pycache__/BaseConfig.cpython-310.pyc new file mode 100644 index 0000000..a3d8bf7 Binary files /dev/null and b/Config/__pycache__/BaseConfig.cpython-310.pyc differ diff --git a/Doc/~$数据库-2015-2024-v2.xlsx b/Doc/~$数据库-2015-2024-v2.xlsx deleted file mode 100644 index 1e0c8b3..0000000 Binary files a/Doc/~$数据库-2015-2024-v2.xlsx and /dev/null differ diff --git a/Tools/T1_RenKou.py b/Tools/T1_RenKou.py index 8964044..6da2301 100644 --- a/Tools/T1_RenKou.py +++ b/Tools/T1_RenKou.py @@ -1,49 +1,63 @@ -import openpyxl # 添加缺少的导入 +import openpyxl import json import os - +from typing import List, Dict, Any from Config.Config import EXCEL_PATH from Util.AreaUtil import query_area_info -# 创建数据保存目录 +# ======================= 配置常量 ======================= DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'Data') -os.makedirs(DATA_DIR, exist_ok=True) JSON_PATH = os.path.join(DATA_DIR, 'RenKou.json') +SHEET_NAME = '人口' # 工作表名称 +REGION_NAME_COLUMN = 'A' # 区域名称所在列 +START_ROW = 3 # 数据起始行 +YEAR_RANGE = range(2015, 2025) # 年份范围 -file_name = EXCEL_PATH -population_data = [] -name_conversion_errors = [] # 记录转换失败的名称 -conversion_records = [] # 新增:定义转换记录变量 +# 数据列配置 (指标: (起始列, 结束列)) +DATA_COLUMNS = { + 'total_population': ('B', 'K'), # 年末总人口 + 'urban_population': ('L', 'U'), # 城镇人口 + 'rural_population': ('V', 'AE'), # 乡村人口 + 'urbanization_rate': ('AF', 'AO'), # 城镇化率 + 'birth_population': ('AP', 'AY') # 出生人口 +} -try: - # 加载工作簿并选择人口Sheet - workbook = openpyxl.load_workbook(file_name, read_only=True) - if '人口' not in workbook.sheetnames: - print("❌ 错误:未找到'人口'Sheet") - exit(1) - sheet = workbook['人口'] +# ======================= 工具函数 ======================= +def init_directories() -> None: + """初始化数据目录""" + os.makedirs(DATA_DIR, exist_ok=True) - # 定义数据列范围与英文属性映射 - data_columns = { - 'total_population': {'start_col': 'B', 'end_col': 'K', 'year_start': 2015}, # 年末总人口 - 'urban_population': {'start_col': 'L', 'end_col': 'U', 'year_start': 2015}, # 城镇人口 - 'rural_population': {'start_col': 'V', 'end_col': 'AE', 'year_start': 2015}, # 乡村人口 - 'urbanization_rate': {'start_col': 'AF', 'end_col': 'AO', 'year_start': 2015}, # 城镇化率 - 'birth_population': {'start_col': 'AP', 'end_col': 'AY', 'year_start': 2015} # 出生人口 - } - # 遍历数据行(跳过前2行表头) - for row_num, row in enumerate(sheet.iter_rows(min_row=3, values_only=True), start=3): - raw_name = row[0] if row[0] else '未知地区' - if not raw_name: # 跳过空行 +def process_value(value: Any) -> int | float | int: + """处理单元格值,转换为合适的数值类型""" + if value is None or str(value).strip() == '': + return 0 + try: + if isinstance(value, str): + value = value.replace(',', '').strip() + return float(value) if '.' in str(value) else int(value) + except (ValueError, TypeError): + return 0 + +# ======================= 核心逻辑 ======================= +def extract_area_data(sheet: openpyxl.worksheet.worksheet.Worksheet) -> List[Dict[str, Any]]: + """从工作表提取区域数据""" + population_data: List[Dict[str, Any]] = [] + conversion_records: List[Dict[str, str]] = [] + name_conversion_errors: List[str] = [] + + # 遍历数据行 + for row_num in range(START_ROW, sheet.max_row + 1): + row = sheet[row_num] + raw_name = str(row[openpyxl.utils.column_index_from_string(REGION_NAME_COLUMN)-1].value or '未知地区').strip() + if not raw_name: continue - # 区域名称转换(核心修改) - area_info = query_area_info(raw_name.strip()) + # 区域名称转换 + area_info = query_area_info(raw_name) if area_info: area_name = area_info['full_name'] area_code = area_info['area_code'] - # 新增:检查是否发生实际转换 if raw_name != area_name: conversion_records.append({ 'row': row_num, @@ -55,58 +69,77 @@ try: area_code = 'unknown' name_conversion_errors.append(f"行 {row_num}: '{raw_name}'") + # 构建区域数据 area_data = { 'area_name': area_name, 'area_code': area_code, - 'raw_name': raw_name # 保留原始名称用于调试 + 'raw_name': raw_name } # 提取各指标年度数据 - for metric, config in data_columns.items(): - start_col = openpyxl.utils.column_index_from_string(config['start_col']) - 1 - end_col = openpyxl.utils.column_index_from_string(config['end_col']) - 1 + for metric, (start_col, end_col) in DATA_COLUMNS.items(): + start_idx = openpyxl.utils.column_index_from_string(start_col) - 1 + end_idx = openpyxl.utils.column_index_from_string(end_col) - 1 year_data = {} - for col_idx, year in zip(range(start_col, end_col + 1), range(config['year_start'], 2025)): - value = row[col_idx] - # 处理空值和非数值 - if value is None or str(value).strip() == '': - year_data[str(year)] = 0 - else: - try: - year_data[str(year)] = float(value) if '.' in str(value) else int(value) - except (ValueError, TypeError): - year_data[str(year)] = 0 + for col_idx, year in zip(range(start_idx, end_idx + 1), YEAR_RANGE): + cell_value = row[col_idx].value + year_data[str(year)] = process_value(cell_value) area_data[metric] = year_data population_data.append(area_data) - workbook.close() + # 输出转换统计 + print_conversion_stats(conversion_records, name_conversion_errors) + return population_data - # 保存为JSON文件 - with open(JSON_PATH, 'w', encoding='utf-8') as f: - json.dump(population_data, f, ensure_ascii=False, indent=2) - # 输出转换结果统计 - print(f"✅ 人口数据提取完成,已保存至:{JSON_PATH}") - print(f"📊 共处理 {len(population_data)} 条地区数据") - # 输出转换校验结果 +def print_conversion_stats(conversion_records: List[Dict[str, str]], errors: List[str]) -> None: + """打印名称转换统计信息""" print("\n=== 名称转换记录 ===") - if conversion_records: - for record in conversion_records: - print(f"🔄 行 {record['row']}: {record['raw_name']} → {record['converted_name']}") - print(f"📊 共检测到 {len(conversion_records)} 项名称转换") - else: - print("📝 不存在名称转换的情况") - if name_conversion_errors: - print(f"⚠️ 发现 {len(name_conversion_errors)} 个区域名称转换失败:") - for error in name_conversion_errors: + for record in conversion_records: + print(f"🔄 行 {record['row']}: {record['raw_name']} → {record['converted_name']}") + print(f"📊 共检测到 {len(conversion_records)} 项名称转换") + + if errors: + print(f"⚠️ 发现 {len(errors)} 个区域名称转换失败:") + for error in errors: print(f" - {error}") else: print("✅ 所有区域名称均成功转换为全称") -except FileNotFoundError: - print(f"🔴 错误:Excel文件 '{file_name}' 不存在") -except Exception as e: - print(f"🔴 处理数据时发生错误:{str(e)}") \ No newline at end of file +# ======================= 主函数 ======================= +def main() -> None: + """人口数据提取主函数""" + init_directories() + try: + # 加载工作簿 + workbook = openpyxl.load_workbook(EXCEL_PATH, read_only=True, data_only=True) + if SHEET_NAME not in workbook.sheetnames: + print(f"❌ 错误:未找到'{SHEET_NAME}'工作表") + return + + # 提取并处理数据 + sheet = workbook[SHEET_NAME] + population_data = extract_area_data(sheet) + + # 保存结果 + with open(JSON_PATH, 'w', encoding='utf-8') as f: + json.dump(population_data, f, ensure_ascii=False, indent=2) + + print(f"✅ 人口数据提取完成,已保存至:{JSON_PATH}") + print(f"📊 共处理 {len(population_data)} 条地区数据") + + except FileNotFoundError: + print(f"🔴 错误:Excel文件 '{EXCEL_PATH}' 不存在") + except Exception as e: + print(f"🔴 处理数据时发生错误:{str(e)}") + finally: + try: + workbook.close() + except: + pass + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/Tools/T2_MaoRuXueLv.py b/Tools/T2_MaoRuXueLv.py index 5552e46..3f64b35 100644 --- a/Tools/T2_MaoRuXueLv.py +++ b/Tools/T2_MaoRuXueLv.py @@ -1,156 +1,123 @@ -import openpyxl # 添加缺少的导入 +import openpyxl import json import os +from typing import List, Dict, Any, Tuple from Config.Config import EXCEL_PATH from Util.AreaUtil import query_area_info -# 创建数据保存目录 +# ======================= 配置常量 ======================= +"""数据提取配置""" +# 数据保存目录 DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'Data') -os.makedirs(DATA_DIR, exist_ok=True) -JSON_PATH = os.path.join(DATA_DIR, 'MaoRuXueLv.json') # 修改为毛入学率的JSON路径 +# JSON输出路径 +JSON_PATH = os.path.join(DATA_DIR, 'MaoRuXueLv.json') +# 工作表名称 +SHEET_NAME = '毛入学率' +# 数据起始行 +START_ROW = 5 +# 区域名称所在列 +REGION_NAME_COLUMN = 'B' +# 年份范围 +YEAR_RANGE = [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024] -file_name = EXCEL_PATH -enrollment_data = [] -name_conversion_errors = [] # 记录转换失败的名称 -conversion_records = [] # 定义转换记录变量 - -try: - # 加载工作簿并选择毛入学率Sheet - workbook = openpyxl.load_workbook(file_name, read_only=True) - if '毛入学率' not in workbook.sheetnames: - print("❌ 错误:未找到'毛入学率'Sheet") - exit(1) - sheet = workbook['毛入学率'] - - # 定义数据列范围与英文属性映射 - # 学前教育(交替列逻辑) - data_columns = { - # 学前教育 - 交替列映射(2015-2024) - 'preschool_enrollment': { - 'columns': ['D', 'F', 'H', 'J', 'L', 'N', 'P', 'R', 'T', 'V'], - 'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024] - }, - 'preschool_enrollment_rate': { - 'columns': ['E', 'G', 'I', 'K', 'M', 'O', 'Q', 'S', 'U', 'W'], - 'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024] - }, - - # 小学教育(X-AQ列,交替列逻辑) - 'primary_enrollment': { - 'columns': ['X', 'Z', 'AB', 'AD', 'AF', 'AH', 'AJ', 'AL', 'AN', 'AP'], - 'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024] - }, - 'primary_enrollment_rate': { - 'columns': ['Y', 'AA', 'AC', 'AE', 'AG', 'AI', 'AK', 'AM', 'AO', 'AQ'], - 'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024] - }, - - # 初中教育(AR-BK列,交替列逻辑) - 'junior_high_enrollment': { - 'columns': ['AR', 'AT', 'AV', 'AX', 'AZ', 'BB', 'BD', 'BF', 'BH', 'BJ'], - 'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024] - }, - 'junior_high_enrollment_rate': { - 'columns': ['AS', 'AU', 'AW', 'AY', 'BA', 'BC', 'BE', 'BG', 'BI', 'BK'], - 'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024] - }, - - # 普通高中教育(BL-CE列,交替列逻辑) - 'senior_high_enrollment': { - 'columns': ['BL', 'BN', 'BP', 'BR', 'BT', 'BV', 'BX', 'BZ', 'CB', 'CD'], - 'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024] - }, - 'senior_high_enrollment_rate': { - 'columns': ['BM', 'BO', 'BQ', 'BS', 'BU', 'BW', 'BY', 'CA', 'CC', 'CE'], - 'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024] - }, - - # 中职教育(CF-CY列,交替列逻辑) - 'vocational_enrollment': { - 'columns': ['CF', 'CH', 'CJ', 'CL', 'CN', 'CP', 'CR', 'CT', 'CV', 'CX'], - 'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024] - }, - 'vocational_enrollment_rate': { - 'columns': ['CG', 'CI', 'CK', 'CM', 'CO', 'CQ', 'CS', 'CU', 'CW', 'CY'], - 'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024] - } +# 数据列映射配置 +DATA_COLUMNS = { + # 学前教育 - 交替列映射 + 'preschool_enrollment': { + 'columns': ['D', 'F', 'H', 'J', 'L', 'N', 'P', 'R', 'T', 'V'], + 'years': YEAR_RANGE + }, + 'preschool_enrollment_rate': { + 'columns': ['E', 'G', 'I', 'K', 'M', 'O', 'Q', 'S', 'U', 'W'], + 'years': YEAR_RANGE + }, + + # 小学教育 + 'primary_enrollment': { + 'columns': ['X', 'Z', 'AB', 'AD', 'AF', 'AH', 'AJ', 'AL', 'AN', 'AP'], + 'years': YEAR_RANGE + }, + 'primary_enrollment_rate': { + 'columns': ['Y', 'AA', 'AC', 'AE', 'AG', 'AI', 'AK', 'AM', 'AO', 'AQ'], + 'years': YEAR_RANGE + }, + + # 初中教育 + 'junior_high_enrollment': { + 'columns': ['AR', 'AT', 'AV', 'AX', 'AZ', 'BB', 'BD', 'BF', 'BH', 'BJ'], + 'years': YEAR_RANGE + }, + 'junior_high_enrollment_rate': { + 'columns': ['AS', 'AU', 'AW', 'AY', 'BA', 'BC', 'BE', 'BG', 'BI', 'BK'], + 'years': YEAR_RANGE + }, + + # 普通高中教育 + 'senior_high_enrollment': { + 'columns': ['BL', 'BN', 'BP', 'BR', 'BT', 'BV', 'BX', 'BZ', 'CB', 'CD'], + 'years': YEAR_RANGE + }, + 'senior_high_enrollment_rate': { + 'columns': ['BM', 'BO', 'BQ', 'BS', 'BU', 'BW', 'BY', 'CA', 'CC', 'CE'], + 'years': YEAR_RANGE + }, + + # 中职教育 + 'vocational_enrollment': { + 'columns': ['CF', 'CH', 'CJ', 'CL', 'CN', 'CP', 'CR', 'CT', 'CV', 'CX'], + 'years': YEAR_RANGE + }, + 'vocational_enrollment_rate': { + 'columns': ['CG', 'CI', 'CK', 'CM', 'CO', 'CQ', 'CS', 'CU', 'CW', 'CY'], + 'years': YEAR_RANGE } +} - # 遍历数据行(跳过前4行表头) - for row_num, row in enumerate(sheet.iter_rows(min_row=5, values_only=True), start=5): - # 区域名称从B列获取(索引1),原代码是从A列(索引0)获取 - raw_name = row[1] if (len(row) > 1 and row[1] is not None) else '未知地区' - if not raw_name: # 跳过空行 - continue +# ======================= 工具函数 ======================= +def init_directories() -> None: + """初始化数据目录 + 创建数据保存目录,如果目录已存在则不执行操作 + """ + os.makedirs(DATA_DIR, exist_ok=True) - # 区域名称转换(核心修改) - # 确保raw_name为字符串类型再调用strip() - str_raw_name = str(raw_name).strip() if raw_name is not None else '未知地区' - area_info = query_area_info(str_raw_name) - if area_info: - area_name = area_info['full_name'] - area_code = area_info['area_code'] - # 检查是否发生实际转换 - if raw_name != area_name: - conversion_records.append({ - 'row': row_num, - 'raw_name': raw_name, - 'converted_name': area_name - }) + +def process_value(value: Any) -> int | float | int: + """处理单元格值,转换为合适的数值类型 + + Args: + value: 原始单元格值 + + Returns: + int | float | int: 转换后的数值,无法转换时返回0 + """ + if value is None: + return 0 + + # 统一转换为字符串处理 + str_value = str(value).strip() + if str_value == '' or str_value == '####': + return 0 + + try: + if '%' in str_value: + # 移除百分号并转换为小数 + return float(str_value.replace('%', '')) + elif '.' in str_value: + return float(str_value) else: - area_name = raw_name - area_code = 'unknown' - name_conversion_errors.append(f"行 {row_num}: '{raw_name}'") + return int(str_value) + except (ValueError, TypeError): + return 0 - area_data = { - 'area_name': area_name, - 'area_code': area_code, - 'raw_name': raw_name # 保留原始名称用于调试 - } - # 提取各指标年度数据 - for metric, config in data_columns.items(): - year_data = {} - # 仅保留显式列名映射处理逻辑(完全移除旧格式代码) - if 'columns' in config and 'years' in config: - # 遍历预设的列名和年份对应关系 - for col_name, year in zip(config['columns'], config['years']): - col_idx = openpyxl.utils.column_index_from_string(col_name) - 1 - if col_idx < len(row): - value = row[col_idx] - # 处理空值和非数值(增强版) - if value is None: - year_data[str(year)] = 0 - else: - # 统一转换为字符串处理 - str_value = str(value).strip() - if str_value == '' or str_value == '####': - year_data[str(year)] = 0 - else: - try: - if '%' in str_value: - # 移除百分号并转换为小数 - year_data[str(year)] = float(str_value.replace('%', '')) - else: - year_data[str(year)] = float(str_value) if '.' in str_value else int(str_value) - except (ValueError, TypeError): - year_data[str(year)] = 0 - # 删除旧格式的start_col/end_col处理分支 - area_data[metric] = year_data - - enrollment_data.append(area_data) - - workbook.close() - - # 保存为JSON文件 - with open(JSON_PATH, 'w', encoding='utf-8') as f: - json.dump(enrollment_data, f, ensure_ascii=False, indent=2) - - # 输出转换结果统计 - print(f"✅ 毛入学率数据提取完成,已保存至:{JSON_PATH}") - print(f"📊 共处理 {len(enrollment_data)} 条地区数据") - # 输出转换校验结果 +def print_conversion_stats(conversion_records: List[Dict[str, str]], errors: List[str]) -> None: + """打印名称转换统计信息 + + Args: + conversion_records: 转换记录列表 + errors: 错误信息列表 + """ print("\n=== 名称转换记录 ===") if conversion_records: for record in conversion_records: @@ -158,15 +125,119 @@ try: print(f"📊 共检测到 {len(conversion_records)} 项名称转换") else: print("📝 不存在名称转换的情况") - if name_conversion_errors: - print(f"⚠️ 发现 {len(name_conversion_errors)} 个区域名称转换失败:") - for error in name_conversion_errors: + + if errors: + print(f"⚠️ 发现 {len(errors)} 个区域名称转换失败:") + for error in errors: print(f" - {error}") else: print("✅ 所有区域名称均成功转换为全称") -except FileNotFoundError: - print(f"🔴 错误:Excel文件 '{file_name}' 不存在") -except Exception as e: - print(f"🔴 处理数据时发生错误:{str(e)}") +# ======================= 核心逻辑 ======================= +def extract_enrollment_data(sheet: openpyxl.worksheet.worksheet.Worksheet) -> Tuple[List[Dict[str, Any]], List[Dict[str, str]], List[str]]: + """从工作表提取毛入学率数据 + + Args: + sheet: 毛入学率工作表对象 + + Returns: + Tuple包含: + - enrollment_data: 提取的毛入学率数据列表 + - conversion_records: 名称转换记录 + - name_conversion_errors: 名称转换错误列表 + """ + enrollment_data: List[Dict[str, Any]] = [] + conversion_records: List[Dict[str, str]] = [] + name_conversion_errors: List[str] = [] + + # 计算区域名称列索引 + region_col_idx = openpyxl.utils.column_index_from_string(REGION_NAME_COLUMN) - 1 + + # 遍历数据行 + for row_num, row in enumerate(sheet.iter_rows(min_row=START_ROW, values_only=True), start=START_ROW): + # 获取区域名称 + raw_name = row[region_col_idx] if (len(row) > region_col_idx and row[region_col_idx] is not None) else '未知地区' + if not raw_name: + continue + + # 区域名称转换 + str_raw_name = str(raw_name).strip() if raw_name is not None else '未知地区' + area_info = query_area_info(str_raw_name) + + if area_info: + area_name = area_info['full_name'] + area_code = area_info['area_code'] + + # 记录名称转换 + if str_raw_name != area_name: + conversion_records.append({ + 'row': row_num, + 'raw_name': str_raw_name, + 'converted_name': area_name + }) + else: + area_name = str_raw_name + area_code = 'unknown' + name_conversion_errors.append(f"行 {row_num}: '{str_raw_name}'") + + # 创建区域数据对象 + area_data = { + 'area_name': area_name, + 'area_code': area_code, + 'raw_name': str_raw_name # 保留原始名称用于调试 + } + + # 提取各指标年度数据 + for metric, config in DATA_COLUMNS.items(): + year_data = {} + if 'columns' in config and 'years' in config: + for col_name, year in zip(config['columns'], config['years']): + col_idx = openpyxl.utils.column_index_from_string(col_name) - 1 + if col_idx < len(row): + value = row[col_idx] + year_data[str(year)] = process_value(value) + area_data[metric] = year_data + + enrollment_data.append(area_data) + + return enrollment_data, conversion_records, name_conversion_errors + +# ======================= 主函数 ======================= +def main() -> None: + """主函数:执行毛入学率数据提取流程""" + try: + # 初始化目录 + init_directories() + + # 加载工作簿并选择工作表 + workbook = openpyxl.load_workbook(EXCEL_PATH, read_only=True) + + if SHEET_NAME not in workbook.sheetnames: + print(f"❌ 错误:未找到'{SHEET_NAME}'Sheet") + return + + sheet = workbook[SHEET_NAME] + + # 提取数据 + enrollment_data, conversion_records, name_conversion_errors = extract_enrollment_data(sheet) + + # 关闭工作簿释放资源 + workbook.close() + + # 保存为JSON文件 + with open(JSON_PATH, 'w', encoding='utf-8') as f: + json.dump(enrollment_data, f, ensure_ascii=False, indent=2) + + # 输出结果统计 + print(f"✅ 毛入学率数据提取完成,已保存至:{JSON_PATH}") + print(f"📊 共处理 {len(enrollment_data)} 条地区数据") + print_conversion_stats(conversion_records, name_conversion_errors) + + except FileNotFoundError: + print(f"🔴 错误:Excel文件 '{EXCEL_PATH}' 不存在") + except Exception as e: + print(f"🔴 处理数据时发生错误:{str(e)}") + +if __name__ == '__main__': + main() diff --git a/Util/__pycache__/AreaUtil.cpython-310.pyc b/Util/__pycache__/AreaUtil.cpython-310.pyc index d6f1e1f..6c00559 100644 Binary files a/Util/__pycache__/AreaUtil.cpython-310.pyc and b/Util/__pycache__/AreaUtil.cpython-310.pyc differ