You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

115 lines
4.1 KiB

import sys
import os
import logging
from tqdm import tqdm
from ClickHouse.utils.logger import configure_logger
logger = configure_logger()
class SyncService:
def __init__(self, mysql_conn, ch_conn, mapper, table_config):
"""
:param table_config: 表配置字典,包含:
- name: 表名
- schema_path: DDL文件路径
- date_columns: 日期字段列表
- uint_columns: 整型字段列表
- batch_size: 批处理大小(可选)
"""
self.mysql_conn = mysql_conn
self.ch_conn = ch_conn
self.mapper = mapper
self.table_config = table_config
self.batch_size = table_config.get('batch_size', 5000)
def sync_all_tables(self):
"""同步所有配置表"""
success_tables = []
failed_tables = []
for table in self.table_config['tables']:
try:
self.sync_single_table(table)
success_tables.append(table['name'])
except Exception as e:
logger.error(f"{table['name']} 同步失败: {str(e)}", exc_info=True)
failed_tables.append(table['name'])
logger.info(f"同步完成!成功:{len(success_tables)} 表,失败:{len(failed_tables)}")
return success_tables, failed_tables
def sync_single_table(self, table_config):
"""同步单个表"""
logger.info(f"开始同步表:{table_config['name']}")
# 初始化表结构
self._init_table(table_config)
# 获取数据总量
total = self._get_total_count(table_config['name'])
# 执行数据同步
with self.mysql_conn.connect().cursor() as cursor:
cursor.execute(f"SELECT * FROM {table_config['name']} ORDER BY id")
progress = tqdm(total=total, desc=f"同步 {table_config['name']}", unit="rec")
batch = []
while True:
row = cursor.fetchone()
if not row:
break
mapped = self.mapper.map_row(
columns=self._get_table_columns(table_config['name']),
row=row,
date_columns=table_config.get('date_columns', []),
uint_columns=table_config.get('uint_columns', [])
)
batch.append(mapped)
if len(batch) >= self.batch_size:
self._insert_batch(table_config['name'], batch)
progress.update(len(batch))
batch = []
if batch:
self._insert_batch(table_config['name'], batch)
progress.update(len(batch))
progress.close()
logger.info(f"{table_config['name']} 同步完成")
def _init_table(self, table_config):
"""初始化ClickHouse表"""
with open(table_config['schema_path'], 'r', encoding='utf-8') as f:
create_sql = f.read()
client = self.ch_conn.connect()
client.execute(f"DROP TABLE IF EXISTS {table_config['name']}")
client.execute(create_sql)
def _get_total_count(self, table_name):
"""获取MySQL表数据总量"""
with self.mysql_conn.connect().cursor() as cursor:
cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
return cursor.fetchone()[0]
def _get_table_columns(self, table_name):
"""获取ClickHouse表字段列表"""
result = self.ch_conn.connect().execute(f"DESCRIBE TABLE {table_name}")
return [row[0] for row in result]
def _insert_batch(self, table_name, batch):
"""批量插入数据"""
self.ch_conn.connect().execute(
f'INSERT INTO {table_name} VALUES',
batch,
types_check=True,
settings={
'date_time_input_format': 'best_effort',
'allow_experimental_analyzer': 0,
'input_format_null_as_default': 1
}
)