Files
YunNanProject/Tools/T8_SchoolMianJi.py
2025-09-10 14:26:55 +08:00

278 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import openpyxl # 添加缺少的导入
import json
import os
import traceback
from Config.Config import EXCEL_PATH
from Util.AreaUtil import query_area_info
def main():
"""主函数:提取学校面积数据"""
# 创建数据保存目录
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'Data')
os.makedirs(DATA_DIR, exist_ok=True)
JSON_PATH = os.path.join(DATA_DIR, 'SchoolArea.json') # 修改为学校面积的JSON路径
file_name = EXCEL_PATH
area_data = []
name_conversion_errors = [] # 记录转换失败的名称
conversion_records = [] # 定义转换记录变量
workbook = None # 初始化workbook变量确保在finally块中可以访问
try:
# 加载工作簿并选择学校面积、教学辅助房面积Sheet
workbook = openpyxl.load_workbook(file_name, data_only=True)
if '学校面积、教学辅助房面积' not in workbook.sheetnames:
print("❌ 错误:未找到'学校面积、教学辅助房面积'Sheet")
return
sheet = workbook['学校面积、教学辅助房面积']
# 定义数据列范围与英文属性映射
data_columns = {
# 学前教育每年份8列学校占地面积[总计/城区/镇区/乡村],教学及辅助用房[总计/城区/镇区/乡村]
'preschool_area': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'school_total': 'D', 'school_urban': 'E', 'school_town': 'F', 'school_rural': 'G', 'teaching_total': 'H', 'teaching_urban': 'I', 'teaching_town': 'J', 'teaching_rural': 'K'},
{'year': 2016, 'school_total': 'L', 'school_urban': 'M', 'school_town': 'N', 'school_rural': 'O', 'teaching_total': 'P', 'teaching_urban': 'Q', 'teaching_town': 'R', 'teaching_rural': 'S'},
{'year': 2017, 'school_total': 'T', 'school_urban': 'U', 'school_town': 'V', 'school_rural': 'W', 'teaching_total': 'X', 'teaching_urban': 'Y', 'teaching_town': 'Z', 'teaching_rural': 'AA'},
{'year': 2018, 'school_total': 'AB', 'school_urban': 'AC', 'school_town': 'AD', 'school_rural': 'AE', 'teaching_total': 'AF', 'teaching_urban': 'AG', 'teaching_town': 'AH', 'teaching_rural': 'AI'},
{'year': 2019, 'school_total': 'AJ', 'school_urban': 'AK', 'school_town': 'AL', 'school_rural': 'AM', 'teaching_total': 'AN', 'teaching_urban': 'AO', 'teaching_town': 'AP', 'teaching_rural': 'AQ'},
{'year': 2020, 'school_total': 'AR', 'school_urban': 'AS', 'school_town': 'AT', 'school_rural': 'AU', 'teaching_total': 'AV', 'teaching_urban': 'AW', 'teaching_town': 'AX', 'teaching_rural': 'AY'},
{'year': 2021, 'school_total': 'AZ', 'school_urban': 'BA', 'school_town': 'BB', 'school_rural': 'BC', 'teaching_total': 'BD', 'teaching_urban': 'BE', 'teaching_town': 'BF', 'teaching_rural': 'BG'},
{'year': 2022, 'school_total': 'BH', 'school_urban': 'BI', 'school_town': 'BJ', 'school_rural': 'BK', 'teaching_total': 'BL', 'teaching_urban': 'BM', 'teaching_town': 'BN', 'teaching_rural': 'BO'},
{'year': 2023, 'school_total': 'BP', 'school_urban': 'BQ', 'school_town': 'BR', 'school_rural': 'BS', 'teaching_total': 'BT', 'teaching_urban': 'BU', 'teaching_town': 'BV', 'teaching_rural': 'BW'},
{'year': 2024, 'school_total': 'BX', 'school_urban': 'BY', 'school_town': 'BZ', 'school_rural': 'CA', 'teaching_total': 'CB', 'teaching_urban': 'CC', 'teaching_town': 'CD', 'teaching_rural': 'CE'}
]
},
# 小学教育
'primary_area': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'school_total': 'CF', 'school_urban': 'CG', 'school_town': 'CH', 'school_rural': 'CI', 'teaching_total': 'CJ', 'teaching_urban': 'CK', 'teaching_town': 'CL', 'teaching_rural': 'CM'},
{'year': 2016, 'school_total': 'CN', 'school_urban': 'CO', 'school_town': 'CP', 'school_rural': 'CQ', 'teaching_total': 'CR', 'teaching_urban': 'CS', 'teaching_town': 'CT', 'teaching_rural': 'CU'},
{'year': 2017, 'school_total': 'CV', 'school_urban': 'CW', 'school_town': 'CX', 'school_rural': 'CY', 'teaching_total': 'CZ', 'teaching_urban': 'DA', 'teaching_town': 'DB', 'teaching_rural': 'DC'},
{'year': 2018, 'school_total': 'DD', 'school_urban': 'DE', 'school_town': 'DF', 'school_rural': 'DG', 'teaching_total': 'DH', 'teaching_urban': 'DI', 'teaching_town': 'DJ', 'teaching_rural': 'DK'},
{'year': 2019, 'school_total': 'DL', 'school_urban': 'DM', 'school_town': 'DN', 'school_rural': 'DO', 'teaching_total': 'DP', 'teaching_urban': 'DQ', 'teaching_town': 'DR', 'teaching_rural': 'DS'},
{'year': 2020, 'school_total': 'DT', 'school_urban': 'DU', 'school_town': 'DV', 'school_rural': 'DW', 'teaching_total': 'DX', 'teaching_urban': 'DY', 'teaching_town': 'DZ', 'teaching_rural': 'EA'},
{'year': 2021, 'school_total': 'EB', 'school_urban': 'EC', 'school_town': 'ED', 'school_rural': 'EE', 'teaching_total': 'EF', 'teaching_urban': 'EG', 'teaching_town': 'EH', 'teaching_rural': 'EI'},
{'year': 2022, 'school_total': 'EJ', 'school_urban': 'EK', 'school_town': 'EL', 'school_rural': 'EM', 'teaching_total': 'EN', 'teaching_urban': 'EO', 'teaching_town': 'EP', 'teaching_rural': 'EQ'},
{'year': 2023, 'school_total': 'ER', 'school_urban': 'ES', 'school_town': 'ET', 'school_rural': 'EU', 'teaching_total': 'EV', 'teaching_urban': 'EW', 'teaching_town': 'EX', 'teaching_rural': 'EY'},
{'year': 2024, 'school_total': 'EZ', 'school_urban': 'FA', 'school_town': 'FB', 'school_rural': 'FC', 'teaching_total': 'FD', 'teaching_urban': 'FE', 'teaching_town': 'FF', 'teaching_rural': 'FG'}
]
},
# 初中教育
'junior_high_area': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'school_total': 'FH', 'school_urban': 'FI', 'school_town': 'FJ', 'school_rural': 'FK', 'teaching_total': 'FL', 'teaching_urban': 'FM', 'teaching_town': 'FN', 'teaching_rural': 'FO'},
{'year': 2016, 'school_total': 'FP', 'school_urban': 'FQ', 'school_town': 'FR', 'school_rural': 'FS', 'teaching_total': 'FT', 'teaching_urban': 'FU', 'teaching_town': 'FV', 'teaching_rural': 'FW'},
{'year': 2017, 'school_total': 'FX', 'school_urban': 'FY', 'school_town': 'FZ', 'school_rural': 'GA', 'teaching_total': 'GB', 'teaching_urban': 'GC', 'teaching_town': 'GD', 'teaching_rural': 'GE'},
{'year': 2018, 'school_total': 'GF', 'school_urban': 'GG', 'school_town': 'GH', 'school_rural': 'GI', 'teaching_total': 'GJ', 'teaching_urban': 'GK', 'teaching_town': 'GL', 'teaching_rural': 'GM'},
{'year': 2019, 'school_total': 'GN', 'school_urban': 'GO', 'school_town': 'GP', 'school_rural': 'GQ', 'teaching_total': 'GR', 'teaching_urban': 'GS', 'teaching_town': 'GT', 'teaching_rural': 'GU'},
{'year': 2020, 'school_total': 'GV', 'school_urban': 'GW', 'school_town': 'GX', 'school_rural': 'GY', 'teaching_total': 'GZ', 'teaching_urban': 'HA', 'teaching_town': 'HB', 'teaching_rural': 'HC'},
{'year': 2021, 'school_total': 'HD', 'school_urban': 'HE', 'school_town': 'HF', 'school_rural': 'HG', 'teaching_total': 'HH', 'teaching_urban': 'HI', 'teaching_town': 'HJ', 'teaching_rural': 'HK'},
{'year': 2022, 'school_total': 'HL', 'school_urban': 'HM', 'school_town': 'HN', 'school_rural': 'HO', 'teaching_total': 'HP', 'teaching_urban': 'HQ', 'teaching_town': 'HR', 'teaching_rural': 'HS'},
{'year': 2023, 'school_total': 'HT', 'school_urban': 'HU', 'school_town': 'HV', 'school_rural': 'HW', 'teaching_total': 'HX', 'teaching_urban': 'HY', 'teaching_town': 'HZ', 'teaching_rural': 'IA'},
{'year': 2024, 'school_total': 'IB', 'school_urban': 'IC', 'school_town': 'ID', 'school_rural': 'IE', 'teaching_total': 'IF', 'teaching_urban': 'IG', 'teaching_town': 'IH', 'teaching_rural': 'II'}
]
},
# 高中教育
'senior_high_area': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'school_total': 'IJ', 'school_urban': 'IK', 'school_town': 'IL', 'school_rural': 'IM', 'teaching_total': 'IN', 'teaching_urban': 'IO', 'teaching_town': 'IP', 'teaching_rural': 'IQ'},
{'year': 2016, 'school_total': 'IR', 'school_urban': 'IS', 'school_town': 'IT', 'school_rural': 'IU', 'teaching_total': 'IV', 'teaching_urban': 'IW', 'teaching_town': 'IX', 'teaching_rural': 'IY'},
{'year': 2017, 'school_total': 'IZ', 'school_urban': 'JA', 'school_town': 'JB', 'school_rural': 'JC', 'teaching_total': 'JD', 'teaching_urban': 'JE', 'teaching_town': 'JF', 'teaching_rural': 'JG'},
{'year': 2018, 'school_total': 'JH', 'school_urban': 'JI', 'school_town': 'JJ', 'school_rural': 'JK', 'teaching_total': 'JL', 'teaching_urban': 'JM', 'teaching_town': 'JN', 'teaching_rural': 'JO'},
{'year': 2019, 'school_total': 'JP', 'school_urban': 'JQ', 'school_town': 'JR', 'school_rural': 'JS', 'teaching_total': 'JT', 'teaching_urban': 'JU', 'teaching_town': 'JV', 'teaching_rural': 'JW'},
{'year': 2020, 'school_total': 'JX', 'school_urban': 'JY', 'school_town': 'JZ', 'school_rural': 'KA', 'teaching_total': 'KB', 'teaching_urban': 'KC', 'teaching_town': 'KD', 'teaching_rural': 'KE'},
{'year': 2021, 'school_total': 'KF', 'school_urban': 'KG', 'school_town': 'KH', 'school_rural': 'KI', 'teaching_total': 'KJ', 'teaching_urban': 'KK', 'teaching_town': 'KL', 'teaching_rural': 'KM'},
{'year': 2022, 'school_total': 'KN', 'school_urban': 'KO', 'school_town': 'KP', 'school_rural': 'KQ', 'teaching_total': 'KR', 'teaching_urban': 'KS', 'teaching_town': 'KT', 'teaching_rural': 'KU'},
{'year': 2023, 'school_total': 'KV', 'school_urban': 'KW', 'school_town': 'KX', 'school_rural': 'KY', 'teaching_total': 'KZ', 'teaching_urban': 'LA', 'teaching_town': 'LB', 'teaching_rural': 'LC'},
{'year': 2024, 'school_total': 'LD', 'school_urban': 'LE', 'school_town': 'LF', 'school_rural': 'LG', 'teaching_total': 'LH', 'teaching_urban': 'LI', 'teaching_town': 'LJ', 'teaching_rural': 'LK'}
]
},
# 中职教育特殊每年2列
'vocational_area': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'school_total': 'LL', 'teaching_total': 'LM'},
{'year': 2016, 'school_total': 'LN', 'teaching_total': 'LO'},
{'year': 2017, 'school_total': 'LP', 'teaching_total': 'LQ'},
{'year': 2018, 'school_total': 'LR', 'teaching_total': 'LS'},
{'year': 2019, 'school_total': 'LT', 'teaching_total': 'LU'},
{'year': 2020, 'school_total': 'LV', 'teaching_total': 'LW'},
{'year': 2021, 'school_total': 'LX', 'teaching_total': 'LY'},
{'year': 2022, 'school_total': 'LZ', 'teaching_total': 'MA'},
{'year': 2023, 'school_total': 'MB', 'teaching_total': 'MC'},
{'year': 2024, 'school_total': 'MD', 'teaching_total': 'ME'}
]
}
}
# 遍历数据行跳过前4行表头从第5行开始
for row_num, row in enumerate(sheet.iter_rows(min_row=5, values_only=True), start=5):
# 区域名称从B列获取索引1
raw_name = row[1] if (len(row) > 1 and row[1] is not None) else '未知地区'
if not raw_name: # 跳过空行
continue
# 区域名称转换
str_raw_name = str(raw_name).strip() if raw_name is not None else '未知地区'
area_info = query_area_info(str_raw_name)
if area_info and isinstance(area_info, dict) and 'full_name' in area_info and 'area_code' in area_info:
area_name = area_info['full_name']
area_code = area_info['area_code']
if raw_name != area_name:
conversion_records.append({
'row': row_num,
'raw_name': raw_name,
'converted_name': area_name
})
else:
area_name = raw_name
area_code = 'unknown'
name_conversion_errors.append(f"{row_num}: '{raw_name}'")
# 循环中创建区域数据字典(重命名变量)
current_area_data = {
'area_name': area_name,
'area_code': area_code,
'raw_name': raw_name
}
# 提取各教育阶段面积数据
for stage, config in data_columns.items():
stage_data = {}
for year_config in config['columns']:
year = year_config['year']
year_data = {}
# 处理学校占地面积数据
school_cols = ['school_total', 'school_urban', 'school_town', 'school_rural']
has_school_categories = all(col in year_config for col in school_cols)
if has_school_categories:
# 处理分类学校面积数据
for col in school_cols:
col_name = year_config[col]
col_idx = openpyxl.utils.column_index_from_string(col_name) - 1
if col_idx < len(row):
value = row[col_idx]
# 数据清洗与转换
if value is None:
year_data[col] = 0
else:
str_value = str(value).strip()
if str_value in ['', '####']:
year_data[col] = 0
else:
try:
year_data[col] = float(str_value)
except ValueError:
year_data[col] = 0
else:
# 处理中职学校总面积
col_name = year_config['school_total']
col_idx = openpyxl.utils.column_index_from_string(col_name) - 1
if col_idx < len(row):
value = row[col_idx]
if value is None:
year_data['school_total'] = 0
else:
str_value = str(value).strip()
if str_value in ['', '####']:
year_data['school_total'] = 0
else:
try:
year_data['school_total'] = float(str_value)
except ValueError:
year_data['school_total'] = 0
# 处理教学辅助用房面积数据
teaching_cols = ['teaching_total', 'teaching_urban', 'teaching_town', 'teaching_rural']
has_teaching_categories = all(col in year_config for col in teaching_cols)
if has_teaching_categories:
# 处理分类教学辅助用房面积数据
for col in teaching_cols:
col_name = year_config[col]
col_idx = openpyxl.utils.column_index_from_string(col_name) - 1
if col_idx < len(row):
value = row[col_idx]
# 数据清洗与转换
if value is None:
year_data[col] = 0
else:
str_value = str(value).strip()
if str_value in ['', '####']:
year_data[col] = 0
else:
try:
year_data[col] = float(str_value)
except ValueError:
year_data[col] = 0
else:
# 处理中职教学辅助用房总面积
col_name = year_config['teaching_total']
col_idx = openpyxl.utils.column_index_from_string(col_name) - 1
if col_idx < len(row):
value = row[col_idx]
if value is None:
year_data['teaching_total'] = 0
else:
str_value = str(value).strip()
if str_value in ['', '####']:
year_data['teaching_total'] = 0
else:
try:
year_data['teaching_total'] = float(str_value)
except ValueError:
year_data['teaching_total'] = 0
stage_data[str(year)] = year_data
# 修复:将阶段数据添加到当前区域字典,而非区域列表
current_area_data[stage] = stage_data
# 将当前区域数据添加到列表
area_data.append(current_area_data)
# 保存JSON文件
with open(JSON_PATH, 'w', encoding='utf-8') as f:
json.dump(area_data, f, ensure_ascii=False, indent=2)
# 输出统计信息
print(f"✅ 学校面积数据提取完成,已保存至:{JSON_PATH}")
print(f"📊 共处理 {len(area_data)} 条地区数据")
print("\n=== 名称转换记录 ===")
if conversion_records:
for record in conversion_records:
print(f"🔄 行 {record['row']}: {record['raw_name']}{record['converted_name']}")
print(f"📊 共检测到 {len(conversion_records)} 项名称转换")
else:
print("📝 不存在名称转换的情况")
if name_conversion_errors:
print(f"⚠️ 发现 {len(name_conversion_errors)} 个区域名称转换失败:")
for error in name_conversion_errors:
print(f" - {error}")
except FileNotFoundError:
print(f"🔴 错误Excel文件 '{file_name}' 不存在")
except Exception as e:
print(f"🔴 处理数据时发生错误:{str(e)}{traceback.format_exc()}")
finally:
try:
if workbook:
workbook.close()
except:
pass
if __name__ == "__main__":
main()