Files
YunNanProject/Tools/T4_ClassCount.py
2025-09-10 13:58:30 +08:00

212 lines
8.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import traceback
from typing import List, Dict, Any, Tuple
import openpyxl
from Config.Config import EXCEL_PATH
from Util.DataUtil import (
init_directories, process_value, print_conversion_stats,
convert_area_name, save_to_json, load_workbook_sheet
)
# ======================== 配置常量 ========================
# 数据目录和JSON路径
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'Data')
JSON_PATH = os.path.join(DATA_DIR, 'ClassCount.json')
# 工作表名称
SHEET_NAME = '招生数'
# 区域名称所在列
REGION_NAME_COLUMN = 'B'
# 起始行(跳过表头)
START_ROW = 4
# 年份范围
YEAR_RANGE = [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]
# 数据列映射配置
DATA_COLUMNS = {
# 学前教育每年份3列城区/镇区/乡村)
'preschool_classes': {
'years': YEAR_RANGE,
'columns': [
{'year': 2015, 'urban': 'D', 'town': 'E', 'rural': 'F'},
{'year': 2016, 'urban': 'H', 'town': 'I', 'rural': 'J'},
{'year': 2017, 'urban': 'L', 'town': 'M', 'rural': 'N'},
{'year': 2018, 'urban': 'P', 'town': 'Q', 'rural': 'R'},
{'year': 2019, 'urban': 'T', 'town': 'U', 'rural': 'V'},
{'year': 2020, 'urban': 'X', 'town': 'Y', 'rural': 'Z'},
{'year': 2021, 'urban': 'AB', 'town': 'AC', 'rural': 'AD'},
{'year': 2022, 'urban': 'AF', 'town': 'AG', 'rural': 'AH'},
{'year': 2023, 'urban': 'AJ', 'town': 'AK', 'rural': 'AL'},
{'year': 2024, 'urban': 'AN', 'town': 'AO', 'rural': 'AP'}
],
'categories': ['urban', 'town', 'rural']
},
# 小学教育
'primary_classes': {
'years': YEAR_RANGE,
'columns': [
{'year': 2015, 'urban': 'AR', 'town': 'AS', 'rural': 'AT'},
{'year': 2016, 'urban': 'AV', 'town': 'AW', 'rural': 'AX'},
{'year': 2017, 'urban': 'AZ', 'town': 'BA', 'rural': 'BB'},
{'year': 2018, 'urban': 'BD', 'town': 'BE', 'rural': 'BF'},
{'year': 2019, 'urban': 'BH', 'town': 'BI', 'rural': 'BJ'},
{'year': 2020, 'urban': 'BL', 'town': 'BM', 'rural': 'BN'},
{'year': 2021, 'urban': 'BP', 'town': 'BQ', 'rural': 'BR'},
{'year': 2022, 'urban': 'BT', 'town': 'BU', 'rural': 'BV'},
{'year': 2023, 'urban': 'BX', 'town': 'BY', 'rural': 'BZ'},
{'year': 2024, 'urban': 'CB', 'town': 'CC', 'rural': 'CD'}
],
'categories': ['urban', 'town', 'rural']
},
# 初中教育
'junior_high_classes': {
'years': YEAR_RANGE,
'columns': [
{'year': 2015, 'urban': 'CF', 'town': 'CG', 'rural': 'CH'},
{'year': 2016, 'urban': 'CJ', 'town': 'CK', 'rural': 'CL'},
{'year': 2017, 'urban': 'CN', 'town': 'CO', 'rural': 'CP'},
{'year': 2018, 'urban': 'CR', 'town': 'CS', 'rural': 'CT'},
{'year': 2019, 'urban': 'CV', 'town': 'CW', 'rural': 'CX'},
{'year': 2020, 'urban': 'CZ', 'town': 'DA', 'rural': 'DB'},
{'year': 2021, 'urban': 'DD', 'town': 'DE', 'rural': 'DF'},
{'year': 2022, 'urban': 'DH', 'town': 'DI', 'rural': 'DJ'},
{'year': 2023, 'urban': 'DL', 'town': 'DM', 'rural': 'DN'},
{'year': 2024, 'urban': 'DP', 'town': 'DQ', 'rural': 'DR'}
],
'categories': ['urban', 'town', 'rural']
},
# 高中教育
'senior_high_classes': {
'years': YEAR_RANGE,
'columns': [
{'year': 2015, 'urban': 'DT', 'town': 'DU', 'rural': 'DV'},
{'year': 2016, 'urban': 'DX', 'town': 'DY', 'rural': 'DZ'},
{'year': 2017, 'urban': 'EB', 'town': 'EC', 'rural': 'ED'},
{'year': 2018, 'urban': 'EF', 'town': 'EG', 'rural': 'EH'},
{'year': 2019, 'urban': 'EJ', 'town': 'EK', 'rural': 'EL'},
{'year': 2020, 'urban': 'EN', 'town': 'EO', 'rural': 'EP'},
{'year': 2021, 'urban': 'ER', 'town': 'ES', 'rural': 'ET'},
{'year': 2022, 'urban': 'EV', 'town': 'EW', 'rural': 'EX'},
{'year': 2023, 'urban': 'EZ', 'town': 'FA', 'rural': 'FB'},
{'year': 2024, 'urban': 'FD', 'town': 'FE', 'rural': 'FF'}
],
'categories': ['urban', 'town', 'rural']
},
# 中职教育(单值列)
'vocational_classes': {
'years': YEAR_RANGE,
'columns': [
{'year': 2015, 'column': 'FH'},
{'year': 2016, 'column': 'FI'},
{'year': 2017, 'column': 'FJ'},
{'year': 2018, 'column': 'FK'},
{'year': 2019, 'column': 'FL'},
{'year': 2020, 'column': 'FM'},
{'year': 2021, 'column': 'FN'},
{'year': 2022, 'column': 'FO'},
{'year': 2023, 'column': 'FP'},
{'year': 2024, 'column': 'FQ'}
]
}
}
# ======================== 核心逻辑 ========================
def extract_class_data(sheet: openpyxl.worksheet.worksheet.Worksheet) -> Tuple[List[Dict[str, Any]], List[Dict[str, str]], List[str]]:
"""提取班级数据并处理区域名称转换
Args:
sheet: 工作表对象
Returns:
Tuple包含:
- class_data: 提取的班级数据列表
- conversion_records: 区域名称转换记录
- errors: 错误信息列表
"""
class_data: List[Dict[str, Any]] = []
conversion_records: List[Dict[str, str]] = []
errors: List[str] = []
for row_num, row in enumerate(sheet.iter_rows(min_row=START_ROW, values_only=True), start=START_ROW):
# 获取区域名称B列
raw_name = row[1] if (len(row) > 1 and row[1] is not None) else '未知地区'
if not raw_name: # 跳过空行
continue
# 转换区域名称
area_name, area_code, row_conversions, row_errors = convert_area_name(raw_name, row_num)
conversion_records.extend(row_conversions)
errors.extend(row_errors)
# 创建区域数据对象
area_info = {
'area_name': area_name,
'area_code': area_code,
'raw_name': raw_name
}
# 提取各教育阶段班级数据
for stage, config in DATA_COLUMNS.items():
stage_data = {}
# 处理多类别教育阶段(学前/小学/初中/高中)
if 'categories' in config:
for year_config in config['columns']:
year = year_config['year']
year_data = {}
for category in config['categories']:
col_name = year_config[category]
col_idx = openpyxl.utils.column_index_from_string(col_name) - 1
value = row[col_idx] if col_idx < len(row) else None
year_data[category] = process_value(value)
stage_data[str(year)] = year_data
# 处理中职教育(单值)
else:
for year_config in config['columns']:
year = year_config['year']
col_name = year_config['column']
col_idx = openpyxl.utils.column_index_from_string(col_name) - 1
value = row[col_idx] if col_idx < len(row) else None
stage_data[str(year)] = process_value(value)
area_info[stage] = stage_data
class_data.append(area_info)
return class_data, conversion_records, errors
# ======================== 主函数 ========================
def main() -> None:
"""主函数:加载数据并执行提取流程"""
try:
# 初始化目录
init_directories(DATA_DIR)
# 加载工作表
sheet = load_workbook_sheet(EXCEL_PATH, SHEET_NAME)
if not sheet:
return
# 提取班级数据
class_data, conversion_records, errors = extract_class_data(sheet)
# 打印转换统计
print_conversion_stats(conversion_records, errors)
# 保存数据到JSON
save_to_json(class_data, JSON_PATH)
except Exception as e:
print(f"🔴 处理数据时发生错误:{str(e)}")
print(traceback.format_exc())
if __name__ == '__main__':
main()