Files
YunNanProject/Tools/T4_6_ZaiXiaoShengCount.py
2025-09-10 11:56:56 +08:00

269 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import openpyxl
import json
import os
import traceback
from openpyxl.utils import column_index_from_string
from Config.Config import EXCEL_PATH
from Util.AreaUtil import query_area_info
# 创建数据保存目录
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'Data')
os.makedirs(DATA_DIR, exist_ok=True)
JSON_PATH = os.path.join(DATA_DIR, 'ZaiXiaoShengCount.json') # 在校生数JSON路径
# 教育阶段配置 - 在校生数(2015-2024年)
education_stages = [
{
'name': 'preschool',
'chinese_name': '学前教育',
'columns': [
{'year': 2015, 'urban': 'D', 'town': 'E', 'rural': 'F', 'total': 'G'},
{'year': 2016, 'urban': 'H', 'town': 'I', 'rural': 'J', 'total': 'K'},
{'year': 2017, 'urban': 'L', 'town': 'M', 'rural': 'N', 'total': 'O'},
{'year': 2018, 'urban': 'P', 'town': 'Q', 'rural': 'R', 'total': 'S'},
{'year': 2019, 'urban': 'T', 'town': 'U', 'rural': 'V', 'total': 'W'},
{'year': 2020, 'urban': 'X', 'town': 'Y', 'rural': 'Z', 'total': 'AA'},
{'year': 2021, 'urban': 'AB', 'town': 'AC', 'rural': 'AD', 'total': 'AE'},
{'year': 2022, 'urban': 'AF', 'town': 'AG', 'rural': 'AH', 'total': 'AI'},
{'year': 2023, 'urban': 'AJ', 'town': 'AK', 'rural': 'AL', 'total': 'AM'},
{'year': 2024, 'urban': 'AN', 'town': 'AO', 'rural': 'AP', 'total': 'AQ'}
]
},
{
'name': 'primary',
'chinese_name': '小学教育',
'columns': [
{'year': 2015, 'urban': 'AR', 'town': 'AS', 'rural': 'AT', 'total': 'AU'},
{'year': 2016, 'urban': 'AV', 'town': 'AW', 'rural': 'AX', 'total': 'AY'},
{'year': 2017, 'urban': 'AZ', 'town': 'BA', 'rural': 'BB', 'total': 'BC'},
{'year': 2018, 'urban': 'BD', 'town': 'BE', 'rural': 'BF', 'total': 'BG'},
{'year': 2019, 'urban': 'BH', 'town': 'BI', 'rural': 'BJ', 'total': 'BK'},
{'year': 2020, 'urban': 'BL', 'town': 'BM', 'rural': 'BN', 'total': 'BO'},
{'year': 2021, 'urban': 'BP', 'town': 'BQ', 'rural': 'BR', 'total': 'BS'},
{'year': 2022, 'urban': 'BT', 'town': 'BU', 'rural': 'BV', 'total': 'BW'},
{'year': 2023, 'urban': 'BX', 'town': 'BY', 'rural': 'BZ', 'total': 'CA'},
{'year': 2024, 'urban': 'CB', 'town': 'CC', 'rural': 'CD', 'total': 'CE'}
]
},
{
'name': 'junior',
'chinese_name': '初中教育',
'columns': [
{'year': 2015, 'urban': 'CF', 'town': 'CG', 'rural': 'CH', 'total': 'CI'},
{'year': 2016, 'urban': 'CJ', 'town': 'CK', 'rural': 'CL', 'total': 'CM'},
{'year': 2017, 'urban': 'CN', 'town': 'CO', 'rural': 'CP', 'total': 'CQ'},
{'year': 2018, 'urban': 'CR', 'town': 'CS', 'rural': 'CT', 'total': 'CU'},
{'year': 2019, 'urban': 'CV', 'town': 'CW', 'rural': 'CX', 'total': 'CY'},
{'year': 2020, 'urban': 'CZ', 'town': 'DA', 'rural': 'DB', 'total': 'DC'},
{'year': 2021, 'urban': 'DD', 'town': 'DE', 'rural': 'DF', 'total': 'DG'},
{'year': 2022, 'urban': 'DH', 'town': 'DI', 'rural': 'DJ', 'total': 'DK'},
{'year': 2023, 'urban': 'DL', 'town': 'DM', 'rural': 'DN', 'total': 'DO'},
{'year': 2024, 'urban': 'DP', 'town': 'DQ', 'rural': 'DR', 'total': 'DS'}
]
},
{
'name': 'senior',
'chinese_name': '高中教育',
'columns': [
{'year': 2015, 'urban': 'DT', 'town': 'DU', 'rural': 'DV', 'total': 'DW'},
{'year': 2016, 'urban': 'DX', 'town': 'DY', 'rural': 'DZ', 'total': 'EA'},
{'year': 2017, 'urban': 'EB', 'town': 'EC', 'rural': 'ED', 'total': 'EE'},
{'year': 2018, 'urban': 'EF', 'town': 'EG', 'rural': 'EH', 'total': 'EI'},
{'year': 2019, 'urban': 'EJ', 'town': 'EK', 'rural': 'EL', 'total': 'EM'},
{'year': 2020, 'urban': 'EN', 'town': 'EO', 'rural': 'EP', 'total': 'EQ'},
{'year': 2021, 'urban': 'ER', 'town': 'ES', 'rural': 'ET', 'total': 'EU'},
{'year': 2022, 'urban': 'EV', 'town': 'EW', 'rural': 'EX', 'total': 'EY'},
{'year': 2023, 'urban': 'EZ', 'town': 'FA', 'rural': 'FB', 'total': 'FC'},
{'year': 2024, 'urban': 'FD', 'town': 'FE', 'rural': 'FF', 'total': 'FG'}
]
},
{
'name': 'vocational',
'chinese_name': '中职教育',
'columns': [
{'year': 2015, 'total': 'FH'},
{'year': 2016, 'total': 'FI'},
{'year': 2017, 'total': 'FJ'},
{'year': 2018, 'total': 'FK'},
{'year': 2019, 'total': 'FL'},
{'year': 2020, 'total': 'FM'},
{'year': 2021, 'total': 'FN'},
{'year': 2022, 'total': 'FO'},
{'year': 2023, 'total': 'FP'},
{'year': 2024, 'total': 'FQ'}
]
}
]
def process_value(value):
"""处理单元格值,转换为适当类型"""
if value is None:
return 0
# 转换为字符串处理
str_value = str(value).strip()
# 处理空字符串
if not str_value:
return 0
# 处理特殊标记
if str_value in ['####', 'NA', 'N/A', '', ' ']:
return 0
# 处理数字
try:
# 移除千分位逗号
if ',' in str_value:
str_value = str_value.replace(',', '')
return int(float(str_value))
except ValueError:
return 0
def main():
file_name = EXCEL_PATH
student_data = []
name_conversion_errors = []
conversion_records = []
processed_count = 0
try:
# 加载工作簿并选择在校生数Sheet
workbook = openpyxl.load_workbook(file_name, data_only=True)
if '在校生数' not in workbook.sheetnames:
print("❌ 错误:未找到'在校生数'Sheet")
return
sheet = workbook['在校生数']
print(f"✅ 成功加载Excel文件{file_name}")
print(f"✅ 开始处理在校生数数据,共{sheet.max_row}行数据")
# 遍历行数据
for row_idx, row in enumerate(sheet.iter_rows(values_only=True), start=1):
# 跳过前4行表头
if row_idx < 5:
continue
# 从B列获取区域名称索引1
try:
# 检查行是否有足够的列
if len(row) < 2:
print(f"⚠️ 第{row_idx}行数据不足,跳过")
continue
raw_name = row[1]
if raw_name is None:
print(f"⚠️ 第{row_idx}行B列区域名称为空跳过该行")
continue
raw_name = str(raw_name).strip()
if not raw_name:
print(f"⚠️ 第{row_idx}行B列区域名称为空字符串跳过该行")
continue
# 查询区域信息
area_info = query_area_info(raw_name)
area_name = raw_name
area_code = 'unknown'
# 验证区域信息
if isinstance(area_info, dict):
if 'full_name' in area_info and 'area_code' in area_info:
area_name = area_info['full_name']
area_code = area_info['area_code']
conversion_records.append(f"✅ 第{row_idx}行: {raw_name}{area_name}")
processed_count += 1
else:
name_conversion_errors.append(f"{row_idx}行: {raw_name} (缺少必要字段)")
conversion_records.append(f"❌ 第{row_idx}行: {raw_name} (格式错误)")
else:
name_conversion_errors.append(f"{row_idx}行: {raw_name}")
conversion_records.append(f"❌ 第{row_idx}行: {raw_name} (未找到匹配)")
# 创建区域数据对象
area_data = {
'area_name': area_name,
'area_code': area_code,
'raw_name': raw_name,
'student_data': {}
}
# 提取各教育阶段在校生数据
for stage in education_stages:
stage_name = stage['name']
stage_data = {}
for year_config in stage['columns']:
year = year_config['year']
year_data = {}
# 处理多类别教育阶段(学前到高中)
if 'urban' in year_config:
# 城区
urban_col = column_index_from_string(year_config['urban']) - 1
urban_val = row[urban_col] if len(row) > urban_col else None
year_data['urban'] = process_value(urban_val)
# 镇区
town_col = column_index_from_string(year_config['town']) - 1
town_val = row[town_col] if len(row) > town_col else None
year_data['town'] = process_value(town_val)
# 乡村
rural_col = column_index_from_string(year_config['rural']) - 1
rural_val = row[rural_col] if len(row) > rural_col else None
year_data['rural'] = process_value(rural_val)
# 总计
total_col = column_index_from_string(year_config['total']) - 1
total_val = row[total_col] if len(row) > total_col else None
year_data['total'] = process_value(total_val)
# 处理中职教育(单值)
else:
total_col = column_index_from_string(year_config['total']) - 1
total_val = row[total_col] if len(row) > total_col else None
year_data['total'] = process_value(total_val)
stage_data[str(year)] = year_data
area_data['student_data'][stage_name] = stage_data
student_data.append(area_data)
# 进度提示
if processed_count % 10 == 0 and processed_count > 0:
print(f"🔄 已处理{processed_count}条数据...")
except Exception as e:
print(f"🔴 处理第{row_idx}行时发生错误:{str(e)}")
continue
# 保存数据到JSON文件
with open(JSON_PATH, 'w', encoding='utf-8') as f:
json.dump(student_data, f, ensure_ascii=False, indent=2)
print("\n=== 数据处理完成 ===")
print(f"📊 共处理 {processed_count} 条地区数据")
print(f"✅ 区域名称转换成功: {processed_count - len(name_conversion_errors)}")
if name_conversion_errors:
print(f"❌ 区域名称转换失败: {len(name_conversion_errors)}")
for error in name_conversion_errors[:5]:
print(f" - {error}")
if len(name_conversion_errors) > 5:
print(f" - ... 等{len(name_conversion_errors)-5}个错误")
print(f"💾 数据已保存至 {JSON_PATH}")
except FileNotFoundError:
print(f"🔴 错误Excel文件 '{file_name}' 不存在")
except Exception as e:
print(f"🔴 处理数据时发生错误:{str(e)}{traceback.format_exc()}")
finally:
if 'workbook' in locals():
workbook.close()
if __name__ == "__main__":
main()