This commit is contained in:
2025-09-10 11:38:49 +08:00
parent 9d091e7ffb
commit ce6aa6339b
3 changed files with 33101 additions and 8 deletions

32852
Data/ClassCount.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,7 @@
import openpyxl # 添加缺少的导入
import json
import os
import traceback
from Config.Config import EXCEL_PATH
from Util.AreaUtil import query_area_info
@@ -8,13 +9,14 @@ from Util.AreaUtil import query_area_info
# 创建数据保存目录
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'Data')
os.makedirs(DATA_DIR, exist_ok=True)
JSON_PATH = os.path.join(DATA_DIR, 'MaoRuXueLv.json') # 修改为毛入学率的JSON路径
JSON_PATH = os.path.join(DATA_DIR, 'ClassCount.json') # 班级数JSON路径
file_name = EXCEL_PATH
enrollment_data = []
class_data = []
name_conversion_errors = [] # 记录转换失败的名称
conversion_records = [] # 定义转换记录变量
try:
# 加载工作簿并选择招生数Sheet
workbook = openpyxl.load_workbook(file_name, read_only=True)
if '招生数' not in workbook.sheetnames:
@@ -22,3 +24,190 @@ if '招生数' not in workbook.sheetnames:
exit(1)
sheet = workbook['招生数']
# 定义数据列范围与英文属性映射
data_columns = {
# 学前教育每年份3列城区/镇区/乡村)
'preschool_classes': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'urban': 'D', 'town': 'E', 'rural': 'F'},
{'year': 2016, 'urban': 'H', 'town': 'I', 'rural': 'J'},
{'year': 2017, 'urban': 'L', 'town': 'M', 'rural': 'N'},
{'year': 2018, 'urban': 'P', 'town': 'Q', 'rural': 'R'},
{'year': 2019, 'urban': 'T', 'town': 'U', 'rural': 'V'},
{'year': 2020, 'urban': 'X', 'town': 'Y', 'rural': 'Z'},
{'year': 2021, 'urban': 'AB', 'town': 'AC', 'rural': 'AD'},
{'year': 2022, 'urban': 'AF', 'town': 'AG', 'rural': 'AH'},
{'year': 2023, 'urban': 'AJ', 'town': 'AK', 'rural': 'AL'},
{'year': 2024, 'urban': 'AN', 'town': 'AO', 'rural': 'AP'}
],
'categories': ['urban', 'town', 'rural']
},
# 小学教育
'primary_classes': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'urban': 'AR', 'town': 'AS', 'rural': 'AT'},
{'year': 2016, 'urban': 'AV', 'town': 'AW', 'rural': 'AX'},
{'year': 2017, 'urban': 'AZ', 'town': 'BA', 'rural': 'BB'},
{'year': 2018, 'urban': 'BD', 'town': 'BE', 'rural': 'BF'},
{'year': 2019, 'urban': 'BH', 'town': 'BI', 'rural': 'BJ'},
{'year': 2020, 'urban': 'BL', 'town': 'BM', 'rural': 'BN'},
{'year': 2021, 'urban': 'BP', 'town': 'BQ', 'rural': 'BR'},
{'year': 2022, 'urban': 'BT', 'town': 'BU', 'rural': 'BV'},
{'year': 2023, 'urban': 'BX', 'town': 'BY', 'rural': 'BZ'},
{'year': 2024, 'urban': 'CB', 'town': 'CC', 'rural': 'CD'}
],
'categories': ['urban', 'town', 'rural']
},
# 初中教育
'junior_high_classes': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'urban': 'CF', 'town': 'CG', 'rural': 'CH'},
{'year': 2016, 'urban': 'CJ', 'town': 'CK', 'rural': 'CL'},
{'year': 2017, 'urban': 'CN', 'town': 'CO', 'rural': 'CP'},
{'year': 2018, 'urban': 'CR', 'town': 'CS', 'rural': 'CT'},
{'year': 2019, 'urban': 'CV', 'town': 'CW', 'rural': 'CX'},
{'year': 2020, 'urban': 'CZ', 'town': 'DA', 'rural': 'DB'},
{'year': 2021, 'urban': 'DD', 'town': 'DE', 'rural': 'DF'},
{'year': 2022, 'urban': 'DH', 'town': 'DI', 'rural': 'DJ'},
{'year': 2023, 'urban': 'DL', 'town': 'DM', 'rural': 'DN'},
{'year': 2024, 'urban': 'DP', 'town': 'DQ', 'rural': 'DR'}
],
'categories': ['urban', 'town', 'rural']
},
# 高中教育
'senior_high_classes': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'urban': 'DT', 'town': 'DU', 'rural': 'DV'},
{'year': 2016, 'urban': 'DX', 'town': 'DY', 'rural': 'DZ'},
{'year': 2017, 'urban': 'EB', 'town': 'EC', 'rural': 'ED'},
{'year': 2018, 'urban': 'EF', 'town': 'EG', 'rural': 'EH'},
{'year': 2019, 'urban': 'EJ', 'town': 'EK', 'rural': 'EL'},
{'year': 2020, 'urban': 'EN', 'town': 'EO', 'rural': 'EP'},
{'year': 2021, 'urban': 'ER', 'town': 'ES', 'rural': 'ET'},
{'year': 2022, 'urban': 'EV', 'town': 'EW', 'rural': 'EX'},
{'year': 2023, 'urban': 'EZ', 'town': 'FA', 'rural': 'FB'},
{'year': 2024, 'urban': 'FD', 'town': 'FE', 'rural': 'FF'}
],
'categories': ['urban', 'town', 'rural']
},
# 中职教育特殊每年1列
'vocational_classes': {
'years': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024],
'columns': [
{'year': 2015, 'column': 'FH'},
{'year': 2016, 'column': 'FI'},
{'year': 2017, 'column': 'FJ'},
{'year': 2018, 'column': 'FK'},
{'year': 2019, 'column': 'FL'},
{'year': 2020, 'column': 'FM'},
{'year': 2021, 'column': 'FN'},
{'year': 2022, 'column': 'FO'},
{'year': 2023, 'column': 'FP'},
{'year': 2024, 'column': 'FQ'}
]
}
}
# 遍历数据行跳过前3行表头根据实际情况调整
for row_num, row in enumerate(sheet.iter_rows(min_row=4, values_only=True), start=4):
# 区域名称从B列获取索引1
raw_name = row[1] if (len(row) > 1 and row[1] is not None) else '未知地区'
if not raw_name: # 跳过空行
continue
# 区域名称转换
str_raw_name = str(raw_name).strip() if raw_name is not None else '未知地区'
area_info = query_area_info(str_raw_name)
if area_info:
area_name = area_info['full_name']
area_code = area_info['area_code']
if raw_name != area_name:
conversion_records.append({
'row': row_num,
'raw_name': raw_name,
'converted_name': area_name
})
else:
area_name = raw_name
area_code = 'unknown'
name_conversion_errors.append(f"{row_num}: '{raw_name}'")
area_data = {
'area_name': area_name,
'area_code': area_code,
'raw_name': raw_name
}
# 提取各教育阶段班级数据
for stage, config in data_columns.items():
stage_data = {}
# 处理多类别教育阶段
if 'categories' in config:
for year_config in config['columns']:
year = year_config['year']
year_data = {}
for category in config['categories']:
col_name = year_config[category]
col_idx = openpyxl.utils.column_index_from_string(col_name) - 1
if col_idx < len(row):
value = row[col_idx]
# 数据清洗与转换
if value is None:
year_data[category] = 0
else:
str_value = str(value).strip()
if str_value in ['', '####']:
year_data[category] = 0
else:
year_data[category] = int(str_value) if str_value.isdigit() else 0
stage_data[str(year)] = year_data
# 处理中职教育(单值)
else:
for year_config in config['columns']:
year = year_config['year']
col_name = year_config['column']
col_idx = openpyxl.utils.column_index_from_string(col_name) - 1
if col_idx < len(row):
value = row[col_idx]
if value is None:
stage_data[str(year)] = 0
else:
str_value = str(value).strip()
stage_data[str(year)] = int(str_value) if str_value.isdigit() else 0
area_data[stage] = stage_data
class_data.append(area_data)
workbook.close()
# 保存JSON文件
with open(JSON_PATH, 'w', encoding='utf-8') as f:
json.dump(class_data, f, ensure_ascii=False, indent=2)
# 输出统计信息
print(f"✅ 班级数数据提取完成,已保存至:{JSON_PATH}")
print(f"📊 共处理 {len(class_data)} 条地区数据")
print("\n=== 名称转换记录 ===")
if conversion_records:
for record in conversion_records:
print(f"🔄 行 {record['row']}: {record['raw_name']}{record['converted_name']}")
print(f"📊 共检测到 {len(conversion_records)} 项名称转换")
else:
print("📝 不存在名称转换的情况")
if name_conversion_errors:
print(f"⚠️ 发现 {len(name_conversion_errors)} 个区域名称转换失败:")
for error in name_conversion_errors:
print(f" - {error}")
except FileNotFoundError:
print(f"🔴 错误Excel文件 '{file_name}' 不存在")
except Exception as e:
print(f"🔴 处理数据时发生错误:{str(e)}{traceback.format_exc()}")

52
Tools/prompt.txt Normal file
View File

@@ -0,0 +1,52 @@
省市县区名称在B列
学前教育
2015 城区 镇区 乡村
D E F G
2016 城区 镇区 乡村
H I J K
...
2024 城区 镇区 乡村
AN AO AP AQ
小学教育
2015 城区 镇区 乡村
AR AS AT AU
2016 城区 镇区 乡村
AV AW AX AY
...
2024 城区 镇区 乡村
CB CC CD CE
初中教育
2015 城区 镇区 乡村
CF CG CH CI
2016 城区 镇区 乡村
CJ CK CL CM
...
2024 城区 镇区 乡村
DP DQ DR DS
高中教育
2015 城区 镇区 乡村
DT DU DV DW
2016 城区 镇区 乡村
DX DY DZ EA
...
2024 城区 镇区 乡村
FD FE FF FG
中职教育
2015 2016 2017 。。。 2024
FH FI FJ FK FL FM FN FG FP FQ