Files
YunNanProject/Tools/T4_7_TeacherCount.py
2025-09-10 13:08:18 +08:00

256 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import openpyxl # 添加缺少的导入
import json
import os
import traceback
from Config.Config import EXCEL_PATH
from Util.AreaUtil import query_area_info
# 创建数据保存目录
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'Data')
os.makedirs(DATA_DIR, exist_ok=True)
JSON_PATH = os.path.join(DATA_DIR, 'TeacherCount.json') # 修改为教师数的JSON路径
file_name = EXCEL_PATH
teacher_data = []
name_conversion_errors = [] # 记录转换失败的名称
conversion_records = [] # 定义转换记录变量
try:
# 加载工作簿并选择教职工数、专任教师数Sheet
workbook = openpyxl.load_workbook(file_name, data_only=True)
if '教职工数、专任教师数' not in workbook.sheetnames:
print("❌ 错误:未找到'教职工数、专任教师数'Sheet")
exit(1)
sheet = workbook['教职工数、专任教师数']
# 定义数据列范围与英文属性映射
data_columns = {
# 学前教育每年份8列教职工总数/城区/镇区/乡村,专任教师总数/城区/镇区/乡村)
'preschool_teachers': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'total_staff': 'D', 'urban_staff': 'E', 'town_staff': 'F', 'rural_staff': 'G', 'total_teacher': 'H', 'urban_teacher': 'I', 'town_teacher': 'J', 'rural_teacher': 'K'},
{'year': 2016, 'total_staff': 'L', 'urban_staff': 'M', 'town_staff': 'N', 'rural_staff': 'O', 'total_teacher': 'P', 'urban_teacher': 'Q', 'town_teacher': 'R', 'rural_teacher': 'S'},
{'year': 2017, 'total_staff': 'T', 'urban_staff': 'U', 'town_staff': 'V', 'rural_staff': 'W', 'total_teacher': 'X', 'urban_teacher': 'Y', 'town_teacher': 'Z', 'rural_teacher': 'AA'},
{'year': 2018, 'total_staff': 'AB', 'urban_staff': 'AC', 'town_staff': 'AD', 'rural_staff': 'AE', 'total_teacher': 'AF', 'urban_teacher': 'AG', 'town_teacher': 'AH', 'rural_teacher': 'AI'},
{'year': 2019, 'total_staff': 'AJ', 'urban_staff': 'AK', 'town_staff': 'AL', 'rural_staff': 'AM', 'total_teacher': 'AN', 'urban_teacher': 'AO', 'town_teacher': 'AP', 'rural_teacher': 'AQ'},
{'year': 2020, 'total_staff': 'AR', 'urban_staff': 'AS', 'town_staff': 'AT', 'rural_staff': 'AU', 'total_teacher': 'AV', 'urban_teacher': 'AW', 'town_teacher': 'AX', 'rural_teacher': 'AY'},
{'year': 2021, 'total_staff': 'AZ', 'urban_staff': 'BA', 'town_staff': 'BB', 'rural_staff': 'BC', 'total_teacher': 'BD', 'urban_teacher': 'BE', 'town_teacher': 'BF', 'rural_teacher': 'BG'},
{'year': 2022, 'total_staff': 'BH', 'urban_staff': 'BI', 'town_staff': 'BJ', 'rural_staff': 'BK', 'total_teacher': 'BL', 'urban_teacher': 'BM', 'town_teacher': 'BN', 'rural_teacher': 'BO'},
{'year': 2023, 'total_staff': 'BP', 'urban_staff': 'BQ', 'town_staff': 'BR', 'rural_staff': 'BS', 'total_teacher': 'BT', 'urban_teacher': 'BU', 'town_teacher': 'BV', 'rural_teacher': 'BW'},
{'year': 2024, 'total_staff': 'BX', 'urban_staff': 'BY', 'town_staff': 'BZ', 'rural_staff': 'CA', 'total_teacher': 'CB', 'urban_teacher': 'CC', 'town_teacher': 'CD', 'rural_teacher': 'CE'}
]
},
# 小学教育
'primary_teachers': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'total_staff': 'CF', 'urban_staff': 'CG', 'town_staff': 'CH', 'rural_staff': 'CI', 'total_teacher': 'CJ', 'urban_teacher': 'CK', 'town_teacher': 'CL', 'rural_teacher': 'CM'},
{'year': 2016, 'total_staff': 'CN', 'urban_staff': 'CO', 'town_staff': 'CP', 'rural_staff': 'CQ', 'total_teacher': 'CR', 'urban_teacher': 'CS', 'town_teacher': 'CT', 'rural_teacher': 'CU'},
{'year': 2017, 'total_staff': 'CV', 'urban_staff': 'CW', 'town_staff': 'CX', 'rural_staff': 'CY', 'total_teacher': 'CZ', 'urban_teacher': 'DA', 'town_teacher': 'DB', 'rural_teacher': 'DC'},
{'year': 2018, 'total_staff': 'DD', 'urban_staff': 'DE', 'town_staff': 'DF', 'rural_staff': 'DG', 'total_teacher': 'DH', 'urban_teacher': 'DI', 'town_teacher': 'DJ', 'rural_teacher': 'DK'},
{'year': 2019, 'total_staff': 'DL', 'urban_staff': 'DM', 'town_staff': 'DN', 'rural_staff': 'DO', 'total_teacher': 'DP', 'urban_teacher': 'DQ', 'town_teacher': 'DR', 'rural_teacher': 'DS'},
{'year': 2020, 'total_staff': 'DT', 'urban_staff': 'DU', 'town_staff': 'DV', 'rural_staff': 'DW', 'total_teacher': 'DX', 'urban_teacher': 'DY', 'town_teacher': 'DZ', 'rural_teacher': 'EA'},
{'year': 2021, 'total_staff': 'EB', 'urban_staff': 'EC', 'town_staff': 'ED', 'rural_staff': 'EE', 'total_teacher': 'EF', 'urban_teacher': 'EG', 'town_teacher': 'EH', 'rural_teacher': 'EI'},
{'year': 2022, 'total_staff': 'EJ', 'urban_staff': 'EK', 'town_staff': 'EL', 'rural_staff': 'EM', 'total_teacher': 'EN', 'urban_teacher': 'EO', 'town_teacher': 'EP', 'rural_teacher': 'EQ'},
{'year': 2023, 'total_staff': 'ER', 'urban_staff': 'ES', 'town_staff': 'ET', 'rural_staff': 'EU', 'total_teacher': 'EV', 'urban_teacher': 'EW', 'town_teacher': 'EX', 'rural_teacher': 'EY'},
{'year': 2024, 'total_staff': 'EZ', 'urban_staff': 'FA', 'town_staff': 'FB', 'rural_staff': 'FC', 'total_teacher': 'FD', 'urban_teacher': 'FE', 'town_teacher': 'FF', 'rural_teacher': 'FG'}
]
},
# 初中教育
'junior_high_teachers': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'total_staff': 'FH', 'urban_staff': 'FI', 'town_staff': 'FJ', 'rural_staff': 'FK', 'total_teacher': 'FL', 'urban_teacher': 'FM', 'town_teacher': 'FN', 'rural_teacher': 'FO'},
{'year': 2016, 'total_staff': 'FP', 'urban_staff': 'FQ', 'town_staff': 'FR', 'rural_staff': 'FS', 'total_teacher': 'FT', 'urban_teacher': 'FU', 'town_teacher': 'FV', 'rural_teacher': 'FW'},
{'year': 2017, 'total_staff': 'FX', 'urban_staff': 'FY', 'town_staff': 'FZ', 'rural_staff': 'GA', 'total_teacher': 'GB', 'urban_teacher': 'GC', 'town_teacher': 'GD', 'rural_teacher': 'GE'},
{'year': 2018, 'total_staff': 'GF', 'urban_staff': 'GG', 'town_staff': 'GH', 'rural_staff': 'GI', 'total_teacher': 'GJ', 'urban_teacher': 'GK', 'town_teacher': 'GL', 'rural_teacher': 'GM'},
{'year': 2019, 'total_staff': 'GN', 'urban_staff': 'GO', 'town_staff': 'GP', 'rural_staff': 'GQ', 'total_teacher': 'GR', 'urban_teacher': 'GS', 'town_teacher': 'GT', 'rural_teacher': 'GU'},
{'year': 2020, 'total_staff': 'GV', 'urban_staff': 'GW', 'town_staff': 'GX', 'rural_staff': 'GY', 'total_teacher': 'GZ', 'urban_teacher': 'HA', 'town_teacher': 'HB', 'rural_teacher': 'HC'},
{'year': 2021, 'total_staff': 'HD', 'urban_staff': 'HE', 'town_staff': 'HF', 'rural_staff': 'HG', 'total_teacher': 'HH', 'urban_teacher': 'HI', 'town_teacher': 'HJ', 'rural_teacher': 'HK'},
{'year': 2022, 'total_staff': 'HL', 'urban_staff': 'HM', 'town_staff': 'HN', 'rural_staff': 'HO', 'total_teacher': 'HP', 'urban_teacher': 'HQ', 'town_teacher': 'HR', 'rural_teacher': 'HS'},
{'year': 2023, 'total_staff': 'HT', 'urban_staff': 'HU', 'town_staff': 'HV', 'rural_staff': 'HW', 'total_teacher': 'HX', 'urban_teacher': 'HY', 'town_teacher': 'HZ', 'rural_teacher': 'IA'},
{'year': 2024, 'total_staff': 'IB', 'urban_staff': 'IC', 'town_staff': 'ID', 'rural_staff': 'IE', 'total_teacher': 'IF', 'urban_teacher': 'IG', 'town_teacher': 'IH', 'rural_teacher': 'II'}
]
},
# 高中教育
'senior_high_teachers': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'total_staff': 'IJ', 'urban_staff': 'IK', 'town_staff': 'IL', 'rural_staff': 'IM', 'total_teacher': 'IN', 'urban_teacher': 'IO', 'town_teacher': 'IP', 'rural_teacher': 'IQ'},
{'year': 2016, 'total_staff': 'IR', 'urban_staff': 'IS', 'town_staff': 'IT', 'rural_staff': 'IU', 'total_teacher': 'IV', 'urban_teacher': 'IW', 'town_teacher': 'IX', 'rural_teacher': 'IY'},
{'year': 2017, 'total_staff': 'IZ', 'urban_staff': 'JA', 'town_staff': 'JB', 'rural_staff': 'JC', 'total_teacher': 'JD', 'urban_teacher': 'JE', 'town_teacher': 'JF', 'rural_teacher': 'JG'},
{'year': 2018, 'total_staff': 'JH', 'urban_staff': 'JI', 'town_staff': 'JJ', 'rural_staff': 'JK', 'total_teacher': 'JL', 'urban_teacher': 'JM', 'town_teacher': 'JN', 'rural_teacher': 'JO'},
{'year': 2019, 'total_staff': 'JP', 'urban_staff': 'JQ', 'town_staff': 'JR', 'rural_staff': 'JS', 'total_teacher': 'JT', 'urban_teacher': 'JU', 'town_teacher': 'JV', 'rural_teacher': 'JW'},
{'year': 2020, 'total_staff': 'JX', 'urban_staff': 'JY', 'town_staff': 'JZ', 'rural_staff': 'KA', 'total_teacher': 'KB', 'urban_teacher': 'KC', 'town_teacher': 'KD', 'rural_teacher': 'KE'},
{'year': 2021, 'total_staff': 'KF', 'urban_staff': 'KG', 'town_staff': 'KH', 'rural_staff': 'KI', 'total_teacher': 'KJ', 'urban_teacher': 'KK', 'town_teacher': 'KL', 'rural_teacher': 'KM'},
{'year': 2022, 'total_staff': 'KN', 'urban_staff': 'KO', 'town_staff': 'KP', 'rural_staff': 'KQ', 'total_teacher': 'KR', 'urban_teacher': 'KS', 'town_teacher': 'KT', 'rural_teacher': 'KU'},
{'year': 2023, 'total_staff': 'KV', 'urban_staff': 'KW', 'town_staff': 'KX', 'rural_staff': 'KY', 'total_teacher': 'KZ', 'urban_teacher': 'LA', 'town_teacher': 'LB', 'rural_teacher': 'LC'},
{'year': 2024, 'total_staff': 'LD', 'urban_staff': 'LE', 'town_staff': 'LF', 'rural_staff': 'LG', 'total_teacher': 'LH', 'urban_teacher': 'LI', 'town_teacher': 'LJ', 'rural_teacher': 'LK'}
]
},
# 中职教育特殊每年2列
'vocational_teachers': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'total_staff': 'LL', 'total_teacher': 'LM'},
{'year': 2016, 'total_staff': 'LN', 'total_teacher': 'LO'},
{'year': 2017, 'total_staff': 'LP', 'total_teacher': 'LQ'},
{'year': 2018, 'total_staff': 'LR', 'total_teacher': 'LS'},
{'year': 2019, 'total_staff': 'LT', 'total_teacher': 'LU'},
{'year': 2020, 'total_staff': 'LV', 'total_teacher': 'LW'},
{'year': 2021, 'total_staff': 'LX', 'total_teacher': 'LY'},
{'year': 2022, 'total_staff': 'LZ', 'total_teacher': 'MA'},
{'year': 2023, 'total_staff': 'MB', 'total_teacher': 'MC'},
{'year': 2024, 'total_staff': 'MD', 'total_teacher': 'ME'}
]
}
}
# 遍历数据行跳过前4行表头从第5行开始
for row_num, row in enumerate(sheet.iter_rows(min_row=5, values_only=True), start=5):
# 区域名称从B列获取索引1
raw_name = row[1] if (len(row) > 1 and row[1] is not None) else '未知地区'
if not raw_name: # 跳过空行
continue
# 区域名称转换
str_raw_name = str(raw_name).strip() if raw_name is not None else '未知地区'
area_info = query_area_info(str_raw_name)
if area_info and isinstance(area_info, dict) and 'full_name' in area_info and 'area_code' in area_info:
area_name = area_info['full_name']
area_code = area_info['area_code']
if raw_name != area_name:
conversion_records.append({
'row': row_num,
'raw_name': raw_name,
'converted_name': area_name
})
else:
area_name = raw_name
area_code = 'unknown'
name_conversion_errors.append(f"{row_num}: '{raw_name}'")
area_data = {
'area_name': area_name,
'area_code': area_code,
'raw_name': raw_name
}
# 提取各教育阶段教师数据
for stage, config in data_columns.items():
stage_data = {}
for year_config in config['columns']:
year = year_config['year']
year_data = {}
# 处理教职工数据
staff_cols = ['total_staff', 'urban_staff', 'town_staff', 'rural_staff']
has_staff_categories = all(col in year_config for col in staff_cols)
if has_staff_categories:
# 处理分类教职工数据
for col in staff_cols:
col_name = year_config[col]
col_idx = openpyxl.utils.column_index_from_string(col_name) - 1
if col_idx < len(row):
value = row[col_idx]
# 数据清洗与转换
if value is None:
year_data[col] = 0
else:
str_value = str(value).strip()
if str_value in ['', '####']:
year_data[col] = 0
else:
year_data[col] = int(str_value) if str_value.isdigit() else 0
else:
# 处理中职教职工总数
col_name = year_config['total_staff']
col_idx = openpyxl.utils.column_index_from_string(col_name) - 1
if col_idx < len(row):
value = row[col_idx]
if value is None:
year_data['total_staff'] = 0
else:
str_value = str(value).strip()
if str_value in ['', '####']:
year_data['total_staff'] = 0
else:
year_data['total_staff'] = int(str_value) if str_value.isdigit() else 0
# 处理专任教师数据
teacher_cols = ['total_teacher', 'urban_teacher', 'town_teacher', 'rural_teacher']
has_teacher_categories = all(col in year_config for col in teacher_cols)
if has_teacher_categories:
# 处理分类专任教师数据
for col in teacher_cols:
col_name = year_config[col]
col_idx = openpyxl.utils.column_index_from_string(col_name) - 1
if col_idx < len(row):
value = row[col_idx]
# 数据清洗与转换
if value is None:
year_data[col] = 0
else:
str_value = str(value).strip()
if str_value in ['', '####']:
year_data[col] = 0
else:
year_data[col] = int(str_value) if str_value.isdigit() else 0
else:
# 处理中职专任教师总数
col_name = year_config['total_teacher']
col_idx = openpyxl.utils.column_index_from_string(col_name) - 1
if col_idx < len(row):
value = row[col_idx]
if value is None:
year_data['total_teacher'] = 0
else:
str_value = str(value).strip()
if str_value in ['', '####']:
year_data['total_teacher'] = 0
else:
year_data['total_teacher'] = int(str_value) if str_value.isdigit() else 0
stage_data[str(year)] = year_data
area_data[stage] = stage_data
teacher_data.append(area_data)
workbook.close()
# 保存JSON文件
with open(JSON_PATH, 'w', encoding='utf-8') as f:
json.dump(teacher_data, f, ensure_ascii=False, indent=2)
# 输出统计信息
print(f"✅ 教师数据提取完成,已保存至:{JSON_PATH}")
print(f"📊 共处理 {len(teacher_data)} 条地区数据")
print("\n=== 名称转换记录 ===")
if conversion_records:
for record in conversion_records:
print(f"🔄 行 {record['row']}: {record['raw_name']}{record['converted_name']}")
print(f"📊 共检测到 {len(conversion_records)} 项名称转换")
else:
print("📝 不存在名称转换的情况")
if name_conversion_errors:
print(f"⚠️ 发现 {len(name_conversion_errors)} 个区域名称转换失败:")
for error in name_conversion_errors:
print(f" - {error}")
except FileNotFoundError:
print(f"🔴 错误Excel文件 '{file_name}' 不存在")
except Exception as e:
print(f"🔴 处理数据时发生错误:{str(e)}{traceback.format_exc()}")
finally:
try:
workbook.close()
except:
pass