Files
YunNanProject/Tools/T6_ZaiXiaoShengCount.py
2025-09-10 14:09:58 +08:00

282 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import json
import traceback
from typing import List, Dict, Any, Tuple
from openpyxl.utils import column_index_from_string
from openpyxl.workbook import Workbook
from openpyxl.worksheet.worksheet import Worksheet
from Config.Config import EXCEL_PATH
from Util.AreaUtil import query_area_info
from Util.DataUtil import (
init_directories,
process_value,
print_conversion_stats,
convert_area_name,
save_to_json,
load_workbook_sheet
)
# ======================== 配置常量 ======================== #
# 数据目录和JSON路径
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'Data')
JSON_PATH = os.path.join(DATA_DIR, 'ZaiXiaoShengCount.json')
# 工作表名称
SHEET_NAME = '在校生数'
# 区域名称所在列
REGION_NAME_COLUMN = 'B'
# 数据起始行
START_ROW = 5
# 年份范围
YEAR_RANGE = range(2015, 2025)
# 教育阶段配置 - 在校生数(2015-2024年)
EDUCATION_STAGES = [
{
'name': 'preschool',
'chinese_name': '学前教育',
'columns': [
{'year': 2015, 'urban': 'D', 'town': 'E', 'rural': 'F', 'total': 'G'},
{'year': 2016, 'urban': 'H', 'town': 'I', 'rural': 'J', 'total': 'K'},
{'year': 2017, 'urban': 'L', 'town': 'M', 'rural': 'N', 'total': 'O'},
{'year': 2018, 'urban': 'P', 'town': 'Q', 'rural': 'R', 'total': 'S'},
{'year': 2019, 'urban': 'T', 'town': 'U', 'rural': 'V', 'total': 'W'},
{'year': 2020, 'urban': 'X', 'town': 'Y', 'rural': 'Z', 'total': 'AA'},
{'year': 2021, 'urban': 'AB', 'town': 'AC', 'rural': 'AD', 'total': 'AE'},
{'year': 2022, 'urban': 'AF', 'town': 'AG', 'rural': 'AH', 'total': 'AI'},
{'year': 2023, 'urban': 'AJ', 'town': 'AK', 'rural': 'AL', 'total': 'AM'},
{'year': 2024, 'urban': 'AN', 'town': 'AO', 'rural': 'AP', 'total': 'AQ'}
]
},
{
'name': 'primary',
'chinese_name': '小学教育',
'columns': [
{'year': 2015, 'urban': 'AR', 'town': 'AS', 'rural': 'AT', 'total': 'AU'},
{'year': 2016, 'urban': 'AV', 'town': 'AW', 'rural': 'AX', 'total': 'AY'},
{'year': 2017, 'urban': 'AZ', 'town': 'BA', 'rural': 'BB', 'total': 'BC'},
{'year': 2018, 'urban': 'BD', 'town': 'BE', 'rural': 'BF', 'total': 'BG'},
{'year': 2019, 'urban': 'BH', 'town': 'BI', 'rural': 'BJ', 'total': 'BK'},
{'year': 2020, 'urban': 'BL', 'town': 'BM', 'rural': 'BN', 'total': 'BO'},
{'year': 2021, 'urban': 'BP', 'town': 'BQ', 'rural': 'BR', 'total': 'BS'},
{'year': 2022, 'urban': 'BT', 'town': 'BU', 'rural': 'BV', 'total': 'BW'},
{'year': 2023, 'urban': 'BX', 'town': 'BY', 'rural': 'BZ', 'total': 'CA'},
{'year': 2024, 'urban': 'CB', 'town': 'CC', 'rural': 'CD', 'total': 'CE'}
]
},
{
'name': 'junior',
'chinese_name': '初中教育',
'columns': [
{'year': 2015, 'urban': 'CF', 'town': 'CG', 'rural': 'CH', 'total': 'CI'},
{'year': 2016, 'urban': 'CJ', 'town': 'CK', 'rural': 'CL', 'total': 'CM'},
{'year': 2017, 'urban': 'CN', 'town': 'CO', 'rural': 'CP', 'total': 'CQ'},
{'year': 2018, 'urban': 'CR', 'town': 'CS', 'rural': 'CT', 'total': 'CU'},
{'year': 2019, 'urban': 'CV', 'town': 'CW', 'rural': 'CX', 'total': 'CY'},
{'year': 2020, 'urban': 'CZ', 'town': 'DA', 'rural': 'DB', 'total': 'DC'},
{'year': 2021, 'urban': 'DD', 'town': 'DE', 'rural': 'DF', 'total': 'DG'},
{'year': 2022, 'urban': 'DH', 'town': 'DI', 'rural': 'DJ', 'total': 'DK'},
{'year': 2023, 'urban': 'DL', 'town': 'DM', 'rural': 'DN', 'total': 'DO'},
{'year': 2024, 'urban': 'DP', 'town': 'DQ', 'rural': 'DR', 'total': 'DS'}
]
},
{
'name': 'senior',
'chinese_name': '高中教育',
'columns': [
{'year': 2015, 'urban': 'DT', 'town': 'DU', 'rural': 'DV', 'total': 'DW'},
{'year': 2016, 'urban': 'DX', 'town': 'DY', 'rural': 'DZ', 'total': 'EA'},
{'year': 2017, 'urban': 'EB', 'town': 'EC', 'rural': 'ED', 'total': 'EE'},
{'year': 2018, 'urban': 'EF', 'town': 'EG', 'rural': 'EH', 'total': 'EI'},
{'year': 2019, 'urban': 'EJ', 'town': 'EK', 'rural': 'EL', 'total': 'EM'},
{'year': 2020, 'urban': 'EN', 'town': 'EO', 'rural': 'EP', 'total': 'EQ'},
{'year': 2021, 'urban': 'ER', 'town': 'ES', 'rural': 'ET', 'total': 'EU'},
{'year': 2022, 'urban': 'EV', 'town': 'EW', 'rural': 'EX', 'total': 'EY'},
{'year': 2023, 'urban': 'EZ', 'town': 'FA', 'rural': 'FB', 'total': 'FC'},
{'year': 2024, 'urban': 'FD', 'town': 'FE', 'rural': 'FF', 'total': 'FG'}
]
},
{
'name': 'vocational',
'chinese_name': '中职教育',
'columns': [
{'year': 2015, 'total': 'FH'},
{'year': 2016, 'total': 'FI'},
{'year': 2017, 'total': 'FJ'},
{'year': 2018, 'total': 'FK'},
{'year': 2019, 'total': 'FL'},
{'year': 2020, 'total': 'FM'},
{'year': 2021, 'total': 'FN'},
{'year': 2022, 'total': 'FO'},
{'year': 2023, 'total': 'FP'},
{'year': 2024, 'total': 'FQ'}
]
}
]
# ======================== 核心逻辑 ======================== #
def extract_stage_data(row: Tuple[Any, ...], stage: Dict[str, Any]) -> Dict[str, Dict[str, int]]:
"""
提取单个教育阶段的在校生数据
:param row: Excel行数据
:param stage: 教育阶段配置
:return: 格式化的阶段数据
"""
stage_data = {}
for year_config in stage['columns']:
year = year_config['year']
year_data = {}
# 处理多类别教育阶段(学前到高中)
if 'urban' in year_config:
# 城区
urban_col = column_index_from_string(year_config['urban']) - 1
urban_val = row[urban_col] if len(row) > urban_col else None
year_data['urban'] = process_value(urban_val)
# 镇区
town_col = column_index_from_string(year_config['town']) - 1
town_val = row[town_col] if len(row) > town_col else None
year_data['town'] = process_value(town_val)
# 乡村
rural_col = column_index_from_string(year_config['rural']) - 1
rural_val = row[rural_col] if len(row) > rural_col else None
year_data['rural'] = process_value(rural_val)
# 总计
total_col = column_index_from_string(year_config['total']) - 1
total_val = row[total_col] if len(row) > total_col else None
year_data['total'] = process_value(total_val)
# 处理中职教育(单值)
else:
total_col = column_index_from_string(year_config['total']) - 1
total_val = row[total_col] if len(row) > total_col else None
year_data['total'] = process_value(total_val)
stage_data[str(year)] = year_data
return stage_data
# 修改函数定义更新返回类型注解为4个值
def extract_student_data(sheet: Worksheet) -> Tuple[List[Dict[str, Any]], List[str], List[str], int]:
"""
提取所有区域的在校生数据
:param sheet: Excel工作表对象
:return: 在校生数据列表、转换错误列表、处理总数
"""
student_data = []
name_conversion_errors = []
conversion_records = []
processed_count = 0
region_col_index = column_index_from_string(REGION_NAME_COLUMN) - 1
print(f"✅ 开始处理在校生数数据,共{sheet.max_row}行数据")
# 遍历行数据
for row_idx, row in enumerate(sheet.iter_rows(values_only=True), start=1):
# 跳过表头行
if row_idx < START_ROW:
continue
try:
# 检查行是否有足够的列
if len(row) <= region_col_index:
print(f"⚠️ 第{row_idx}行数据不足,跳过")
continue
# 提取区域名称
raw_name = row[region_col_index]
# 修复:接收四个返回值并合并结果
area_name, area_code, new_conversion, new_errors = convert_area_name(raw_name, row_idx)
conversion_records.extend(new_conversion)
name_conversion_errors.extend(new_errors)
is_valid = len(new_errors) == 0
# 记录转换结果
if is_valid:
# 将字符串记录改为字典格式
conversion_records.append({
'row': row_idx,
'raw_name': raw_name,
'converted_name': area_name,
'status': 'success'
})
processed_count += 1
else:
error_msg = f"{row_idx}行: {raw_name}"
name_conversion_errors.append(error_msg)
# 将字符串记录改为字典格式
conversion_records.append({
'row': row_idx,
'raw_name': raw_name,
'converted_name': None,
'status': 'error'
})
continue
# 创建区域数据对象
area_data = {
'area_name': area_name,
'area_code': area_code,
'raw_name': str(raw_name).strip(),
'student_data': {}
}
# 提取各教育阶段数据
for stage in EDUCATION_STAGES:
stage_name = stage['name']
area_data['student_data'][stage_name] = extract_stage_data(row, stage)
student_data.append(area_data)
# 进度提示
if processed_count % 10 == 0 and processed_count > 0:
print(f"🔄 已处理{processed_count}条数据...")
except Exception as e:
print(f"🔴 处理第{row_idx}行时发生错误:{str(e)}")
continue
# 修改return语句添加name_conversion_errors返回值
return student_data, conversion_records, name_conversion_errors, processed_count
def main() -> None:
"""主函数:执行在校生数数据处理流程"""
try:
# 初始化目录
init_directories(DATA_DIR) # 修复:移除列表括号,直接传入路径字符串
# 加载Excel工作表
sheet = load_workbook_sheet(EXCEL_PATH, SHEET_NAME)
if not sheet:
print(f"❌ 错误:未找到'{SHEET_NAME}'工作表")
return
# 提取数据
# 修复:调整返回值顺序,获取转换记录列表
student_data, conversion_records, name_conversion_errors, processed_count = extract_student_data(sheet)
# 保存数据到JSON
save_to_json(student_data, JSON_PATH)
# 打印转换统计
# 修复:传入转换记录列表而非处理数量
print_conversion_stats(conversion_records, name_conversion_errors)
print(f"💾 数据已保存至 {JSON_PATH}")
except FileNotFoundError:
print(f"🔴 错误Excel文件 '{EXCEL_PATH}' 不存在")
except Exception as e:
print(f"🔴 处理数据时发生错误:{str(e)}{traceback.format_exc()}")
if __name__ == "__main__":
main()