from datetime import datetime from utils.logger import configure_logger logger = configure_logger() class DataMapper: def __init__(self, mysql_conn, table): self.mysql_conn = mysql_conn self.table = table self.date_columns = [] self.uint64_columns = [] self._analyze_schema() self.min_date = datetime(1970, 1, 1) self.max_date = datetime(2105, 12, 31, 23, 59, 59) def _analyze_schema(self): """分析表结构自动识别字段类型""" schema_query = f""" SELECT COLUMN_NAME, DATA_TYPE, COLUMN_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = '{self.table}' """ with self.mysql_conn.connect().cursor() as cursor: cursor.execute(schema_query) for col_name, data_type, col_type in cursor.fetchall(): # 识别日期时间字段 if data_type in ('datetime', 'timestamp', 'date'): self.date_columns.append(col_name) # 识别无符号整数字段(匹配BIGINT UNSIGNED等) if 'unsigned' in col_type.lower() and 'int' in data_type: self.uint64_columns.append(col_name) #logger.info(f"自动识别字段类型 - 日期字段: {self.date_columns}") #logger.info(f"自动识别字段类型 - 无符号整数字段: {self.uint64_columns}") def map_row(self, columns, row): row_dict = dict(zip(columns, row)) return {col: self._map_value(col, val) for col, val in row_dict.items()} def _map_value(self, col, value): if col in self.uint64_columns: return self._handle_uint64(value) elif col in self.date_columns: return self._handle_datetime(value) elif isinstance(value, str): return value.strip() return value def _handle_uint64(self, value): try: return int(float(value)) if value not in (None, '', 'NULL') else 0 except: return 0 def _handle_datetime(self, value): dt = self._parse_datetime(value) return dt if dt else self.min_date def _parse_datetime(self, value): if value in (None, 0, '0', '0.0', '0.00', '', 'null', 'NULL'): return self.min_date try: str_value = str(value).strip() for fmt in ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d', '%Y%m%d%H%M%S', '%Y/%m/%d %H:%M:%S'): try: parsed = datetime.strptime(str_value, fmt) return self._clamp_datetime(parsed) except ValueError: continue if str_value.isdigit(): ts = int(str_value) if 1e12 < ts < 1e13: # 毫秒级时间戳 parsed = datetime.fromtimestamp(ts / 1000) elif 1e9 < ts < 1e10: # 秒级时间戳 parsed = datetime.fromtimestamp(ts) return self._clamp_datetime(parsed) return self.min_date except: return self.min_date def _clamp_datetime(self, dt): if dt < self.min_date: return self.min_date elif dt > self.max_date: return self.max_date return dt