Files
YunNanProject/Tools/T4_1_RenKou.py
2025-09-10 10:46:36 +08:00

96 lines
3.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import openpyxl
import json
import os
from Config.Config import EXCEL_PATH
from Util.AreaUtil import query_area_info
# 创建数据保存目录
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'Data')
os.makedirs(DATA_DIR, exist_ok=True)
JSON_PATH = os.path.join(DATA_DIR, 'RenKou.json')
file_name = EXCEL_PATH
population_data = []
name_conversion_errors = [] # 记录转换失败的名称
try:
# 加载工作簿并选择人口Sheet
workbook = openpyxl.load_workbook(file_name, read_only=True)
if '人口' not in workbook.sheetnames:
print("❌ 错误:未找到'人口'Sheet")
exit(1)
sheet = workbook['人口']
# 定义数据列范围与英文属性映射
data_columns = {
'total_population': {'start_col': 'B', 'end_col': 'K', 'year_start': 2015}, # 年末总人口
'urban_population': {'start_col': 'L', 'end_col': 'U', 'year_start': 2015}, # 城镇人口
'rural_population': {'start_col': 'V', 'end_col': 'AE', 'year_start': 2015}, # 乡村人口
'urbanization_rate': {'start_col': 'AF', 'end_col': 'AO', 'year_start': 2015}, # 城镇化率
'birth_population': {'start_col': 'AP', 'end_col': 'AY', 'year_start': 2015} # 出生人口
}
# 遍历数据行跳过前2行表头
for row_num, row in enumerate(sheet.iter_rows(min_row=3, values_only=True), start=3):
raw_name = row[0] if row[0] else '未知地区'
if not raw_name: # 跳过空行
continue
# 区域名称转换(核心修改)
area_info = query_area_info(raw_name.strip())
if area_info:
area_name = area_info['full_name']
area_code = area_info['area_code']
else:
area_name = raw_name
area_code = 'unknown'
name_conversion_errors.append(f"{row_num}: '{raw_name}'")
area_data = {
'area_name': area_name,
'area_code': area_code,
'raw_name': raw_name # 保留原始名称用于调试
}
# 提取各指标年度数据
for metric, config in data_columns.items():
start_col = openpyxl.utils.column_index_from_string(config['start_col']) - 1
end_col = openpyxl.utils.column_index_from_string(config['end_col']) - 1
year_data = {}
for col_idx, year in zip(range(start_col, end_col + 1), range(config['year_start'], 2025)):
value = row[col_idx]
# 处理空值和非数值
if value is None or str(value).strip() == '':
year_data[str(year)] = 0
else:
try:
year_data[str(year)] = float(value) if '.' in str(value) else int(value)
except (ValueError, TypeError):
year_data[str(year)] = 0
area_data[metric] = year_data
population_data.append(area_data)
workbook.close()
# 保存为JSON文件
with open(JSON_PATH, 'w', encoding='utf-8') as f:
json.dump(population_data, f, ensure_ascii=False, indent=2)
# 输出转换结果统计
print(f"✅ 人口数据提取完成,已保存至:{JSON_PATH}")
print(f"📊 共处理 {len(population_data)} 条地区数据")
if name_conversion_errors:
print(f"⚠️ 发现 {len(name_conversion_errors)} 个区域名称转换失败:")
for error in name_conversion_errors:
print(f" - {error}")
else:
print("✅ 所有区域名称均成功转换为全称")
except FileNotFoundError:
print(f"🔴 错误Excel文件 '{file_name}' 不存在")
except Exception as e:
print(f"🔴 处理数据时发生错误:{str(e)}")