You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
2.5 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from tqdm import tqdm
class SyncService:
def __init__(self, mysql_conn, ch_conn, mapper, schema_path):
self.mysql_conn = mysql_conn
self.ch_conn = ch_conn
self.mapper = mapper
self.schema_path = schema_path
self.columns = None # 初始化为None稍后加载
def create_ch_table(self,table):
"""创建ClickHouse表"""
with open(self.schema_path, 'r', encoding='utf-8') as f:
create_sql = f.read()
client = self.ch_conn.connect()
client.execute("DROP TABLE IF EXISTS "+table) # 删除表(如果存在)
client.execute(create_sql) # 创建新表
def _load_table_columns(self,table):
"""加载表的列信息"""
result = self.ch_conn.connect().execute("DESCRIBE TABLE "+table)
return [row[0] for row in result]
# 同步数据
def sync_data(self, table, batch_size):
# 加载表的列信息
self.columns = self._load_table_columns(table)
# 统计个数的连接
count_conn = self.mysql_conn.create_count_connection()
# 读取数据的连接
mysql_conn = self.mysql_conn.connect()
with count_conn.cursor() as count_cursor:
count_cursor.execute("SELECT COUNT(*) FROM " + table)
total = count_cursor.fetchone()[0]
with mysql_conn.cursor() as cursor:
cursor.execute("SELECT * FROM " + table + " ORDER BY id")
progress = tqdm(total=total, desc="同步进度", unit="rec")
batch = []
while True:
row = cursor.fetchone()
if not row:
break
mapped = self.mapper.map_row(self.columns, row)
batch.append(mapped)
if len(batch) >= batch_size:
self._insert_batch(batch,table)
progress.update(len(batch))
batch = []
if batch:
self._insert_batch(batch,table)
progress.update(len(batch))
progress.close()
def _insert_batch(self, batch, table):
"""批量插入数据到ClickHouse"""
self.ch_conn.connect().execute(
'INSERT INTO ' + table + ' VALUES',
batch,
types_check=True,
settings={
'date_time_input_format': 'best_effort',
'allow_experimental_analyzer': 0,
'input_format_null_as_default': 1
}
)