You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
115 lines
4.1 KiB
115 lines
4.1 KiB
import sys
|
|
import os
|
|
import logging
|
|
|
|
from tqdm import tqdm
|
|
|
|
from ClickHouse.utils.logger import configure_logger
|
|
|
|
logger = configure_logger()
|
|
|
|
class SyncService:
|
|
def __init__(self, mysql_conn, ch_conn, mapper, table_config):
|
|
"""
|
|
:param table_config: 表配置字典,包含:
|
|
- name: 表名
|
|
- schema_path: DDL文件路径
|
|
- date_columns: 日期字段列表
|
|
- uint_columns: 整型字段列表
|
|
- batch_size: 批处理大小(可选)
|
|
"""
|
|
self.mysql_conn = mysql_conn
|
|
self.ch_conn = ch_conn
|
|
self.mapper = mapper
|
|
self.table_config = table_config
|
|
self.batch_size = table_config.get('batch_size', 5000)
|
|
|
|
def sync_all_tables(self):
|
|
"""同步所有配置表"""
|
|
success_tables = []
|
|
failed_tables = []
|
|
|
|
for table in self.table_config['tables']:
|
|
try:
|
|
self.sync_single_table(table)
|
|
success_tables.append(table['name'])
|
|
except Exception as e:
|
|
logger.error(f"表 {table['name']} 同步失败: {str(e)}", exc_info=True)
|
|
failed_tables.append(table['name'])
|
|
|
|
logger.info(f"同步完成!成功:{len(success_tables)} 表,失败:{len(failed_tables)} 表")
|
|
return success_tables, failed_tables
|
|
|
|
def sync_single_table(self, table_config):
|
|
"""同步单个表"""
|
|
logger.info(f"开始同步表:{table_config['name']}")
|
|
|
|
# 初始化表结构
|
|
self._init_table(table_config)
|
|
|
|
# 获取数据总量
|
|
total = self._get_total_count(table_config['name'])
|
|
|
|
# 执行数据同步
|
|
with self.mysql_conn.connect().cursor() as cursor:
|
|
cursor.execute(f"SELECT * FROM {table_config['name']} ORDER BY id")
|
|
progress = tqdm(total=total, desc=f"同步 {table_config['name']}", unit="rec")
|
|
batch = []
|
|
|
|
while True:
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
break
|
|
|
|
mapped = self.mapper.map_row(
|
|
columns=self._get_table_columns(table_config['name']),
|
|
row=row,
|
|
date_columns=table_config.get('date_columns', []),
|
|
uint_columns=table_config.get('uint_columns', [])
|
|
)
|
|
batch.append(mapped)
|
|
|
|
if len(batch) >= self.batch_size:
|
|
self._insert_batch(table_config['name'], batch)
|
|
progress.update(len(batch))
|
|
batch = []
|
|
|
|
if batch:
|
|
self._insert_batch(table_config['name'], batch)
|
|
progress.update(len(batch))
|
|
|
|
progress.close()
|
|
logger.info(f"表 {table_config['name']} 同步完成")
|
|
|
|
def _init_table(self, table_config):
|
|
"""初始化ClickHouse表"""
|
|
with open(table_config['schema_path'], 'r', encoding='utf-8') as f:
|
|
create_sql = f.read()
|
|
|
|
client = self.ch_conn.connect()
|
|
client.execute(f"DROP TABLE IF EXISTS {table_config['name']}")
|
|
client.execute(create_sql)
|
|
|
|
def _get_total_count(self, table_name):
|
|
"""获取MySQL表数据总量"""
|
|
with self.mysql_conn.connect().cursor() as cursor:
|
|
cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
|
|
return cursor.fetchone()[0]
|
|
|
|
def _get_table_columns(self, table_name):
|
|
"""获取ClickHouse表字段列表"""
|
|
result = self.ch_conn.connect().execute(f"DESCRIBE TABLE {table_name}")
|
|
return [row[0] for row in result]
|
|
|
|
def _insert_batch(self, table_name, batch):
|
|
"""批量插入数据"""
|
|
self.ch_conn.connect().execute(
|
|
f'INSERT INTO {table_name} VALUES',
|
|
batch,
|
|
types_check=True,
|
|
settings={
|
|
'date_time_input_format': 'best_effort',
|
|
'allow_experimental_analyzer': 0,
|
|
'input_format_null_as_default': 1
|
|
}
|
|
) |