Files
YunNanProject/Tools/T1_RenKou.py
2025-09-10 13:50:18 +08:00

145 lines
5.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import openpyxl
import json
import os
from typing import List, Dict, Any
from Config.Config import EXCEL_PATH
from Util.AreaUtil import query_area_info
# ======================= 配置常量 =======================
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'Data')
JSON_PATH = os.path.join(DATA_DIR, 'RenKou.json')
SHEET_NAME = '人口' # 工作表名称
REGION_NAME_COLUMN = 'A' # 区域名称所在列
START_ROW = 3 # 数据起始行
YEAR_RANGE = range(2015, 2025) # 年份范围
# 数据列配置 (指标: (起始列, 结束列))
DATA_COLUMNS = {
'total_population': ('B', 'K'), # 年末总人口
'urban_population': ('L', 'U'), # 城镇人口
'rural_population': ('V', 'AE'), # 乡村人口
'urbanization_rate': ('AF', 'AO'), # 城镇化率
'birth_population': ('AP', 'AY') # 出生人口
}
# ======================= 工具函数 =======================
def init_directories() -> None:
"""初始化数据目录"""
os.makedirs(DATA_DIR, exist_ok=True)
def process_value(value: Any) -> int | float | int:
"""处理单元格值,转换为合适的数值类型"""
if value is None or str(value).strip() == '':
return 0
try:
if isinstance(value, str):
value = value.replace(',', '').strip()
return float(value) if '.' in str(value) else int(value)
except (ValueError, TypeError):
return 0
# ======================= 核心逻辑 =======================
def extract_area_data(sheet: openpyxl.worksheet.worksheet.Worksheet) -> List[Dict[str, Any]]:
"""从工作表提取区域数据"""
population_data: List[Dict[str, Any]] = []
conversion_records: List[Dict[str, str]] = []
name_conversion_errors: List[str] = []
# 遍历数据行
for row_num in range(START_ROW, sheet.max_row + 1):
row = sheet[row_num]
raw_name = str(row[openpyxl.utils.column_index_from_string(REGION_NAME_COLUMN)-1].value or '未知地区').strip()
if not raw_name:
continue
# 区域名称转换
area_info = query_area_info(raw_name)
if area_info:
area_name = area_info['full_name']
area_code = area_info['area_code']
if raw_name != area_name:
conversion_records.append({
'row': row_num,
'raw_name': raw_name,
'converted_name': area_name
})
else:
area_name = raw_name
area_code = 'unknown'
name_conversion_errors.append(f"{row_num}: '{raw_name}'")
# 构建区域数据
area_data = {
'area_name': area_name,
'area_code': area_code,
'raw_name': raw_name
}
# 提取各指标年度数据
for metric, (start_col, end_col) in DATA_COLUMNS.items():
start_idx = openpyxl.utils.column_index_from_string(start_col) - 1
end_idx = openpyxl.utils.column_index_from_string(end_col) - 1
year_data = {}
for col_idx, year in zip(range(start_idx, end_idx + 1), YEAR_RANGE):
cell_value = row[col_idx].value
year_data[str(year)] = process_value(cell_value)
area_data[metric] = year_data
population_data.append(area_data)
# 输出转换统计
print_conversion_stats(conversion_records, name_conversion_errors)
return population_data
def print_conversion_stats(conversion_records: List[Dict[str, str]], errors: List[str]) -> None:
"""打印名称转换统计信息"""
print("\n=== 名称转换记录 ===")
for record in conversion_records:
print(f"🔄 行 {record['row']}: {record['raw_name']}{record['converted_name']}")
print(f"📊 共检测到 {len(conversion_records)} 项名称转换")
if errors:
print(f"⚠️ 发现 {len(errors)} 个区域名称转换失败:")
for error in errors:
print(f" - {error}")
else:
print("✅ 所有区域名称均成功转换为全称")
# ======================= 主函数 =======================
def main() -> None:
"""人口数据提取主函数"""
init_directories()
try:
# 加载工作簿
workbook = openpyxl.load_workbook(EXCEL_PATH, read_only=True, data_only=True)
if SHEET_NAME not in workbook.sheetnames:
print(f"❌ 错误:未找到'{SHEET_NAME}'工作表")
return
# 提取并处理数据
sheet = workbook[SHEET_NAME]
population_data = extract_area_data(sheet)
# 保存结果
with open(JSON_PATH, 'w', encoding='utf-8') as f:
json.dump(population_data, f, ensure_ascii=False, indent=2)
print(f"✅ 人口数据提取完成,已保存至:{JSON_PATH}")
print(f"📊 共处理 {len(population_data)} 条地区数据")
except FileNotFoundError:
print(f"🔴 错误Excel文件 '{EXCEL_PATH}' 不存在")
except Exception as e:
print(f"🔴 处理数据时发生错误:{str(e)}")
finally:
try:
workbook.close()
except:
pass
if __name__ == '__main__':
main()