|
|
@ -1,13 +1,40 @@
|
|
|
|
from datetime import datetime
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
from utils.logger import configure_logger
|
|
|
|
|
|
|
|
logger = configure_logger()
|
|
|
|
|
|
|
|
|
|
|
|
class DataMapper:
|
|
|
|
class DataMapper:
|
|
|
|
def __init__(self, date_columns, uint64_columns):
|
|
|
|
def __init__(self, mysql_conn, table):
|
|
|
|
self.date_columns = date_columns
|
|
|
|
self.mysql_conn = mysql_conn
|
|
|
|
self.uint64_columns = uint64_columns
|
|
|
|
self.table = table
|
|
|
|
|
|
|
|
self.date_columns = []
|
|
|
|
|
|
|
|
self.uint64_columns = []
|
|
|
|
|
|
|
|
self._analyze_schema()
|
|
|
|
self.min_date = datetime(1970, 1, 1)
|
|
|
|
self.min_date = datetime(1970, 1, 1)
|
|
|
|
self.max_date = datetime(2105, 12, 31, 23, 59, 59)
|
|
|
|
self.max_date = datetime(2105, 12, 31, 23, 59, 59)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _analyze_schema(self):
|
|
|
|
|
|
|
|
"""分析表结构自动识别字段类型"""
|
|
|
|
|
|
|
|
schema_query = f"""
|
|
|
|
|
|
|
|
SELECT COLUMN_NAME, DATA_TYPE, COLUMN_TYPE
|
|
|
|
|
|
|
|
FROM INFORMATION_SCHEMA.COLUMNS
|
|
|
|
|
|
|
|
WHERE TABLE_SCHEMA = DATABASE()
|
|
|
|
|
|
|
|
AND TABLE_NAME = '{self.table}'
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with self.mysql_conn.connect().cursor() as cursor:
|
|
|
|
|
|
|
|
cursor.execute(schema_query)
|
|
|
|
|
|
|
|
for col_name, data_type, col_type in cursor.fetchall():
|
|
|
|
|
|
|
|
# 识别日期时间字段
|
|
|
|
|
|
|
|
if data_type in ('datetime', 'timestamp', 'date'):
|
|
|
|
|
|
|
|
self.date_columns.append(col_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 识别无符号整数字段(匹配BIGINT UNSIGNED等)
|
|
|
|
|
|
|
|
if 'unsigned' in col_type.lower() and 'int' in data_type:
|
|
|
|
|
|
|
|
self.uint64_columns.append(col_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#logger.info(f"自动识别字段类型 - 日期字段: {self.date_columns}")
|
|
|
|
|
|
|
|
#logger.info(f"自动识别字段类型 - 无符号整数字段: {self.uint64_columns}")
|
|
|
|
def map_row(self, columns, row):
|
|
|
|
def map_row(self, columns, row):
|
|
|
|
row_dict = dict(zip(columns, row))
|
|
|
|
row_dict = dict(zip(columns, row))
|
|
|
|
return {col: self._map_value(col, val) for col, val in row_dict.items()}
|
|
|
|
return {col: self._map_value(col, val) for col, val in row_dict.items()}
|
|
|
|