You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
64 lines
2.2 KiB
64 lines
2.2 KiB
from datetime import datetime
|
|
|
|
|
|
class DataMapper:
|
|
def __init__(self, date_columns, uint64_columns):
|
|
self.date_columns = date_columns
|
|
self.uint64_columns = uint64_columns
|
|
self.min_date = datetime(1970, 1, 1)
|
|
self.max_date = datetime(2105, 12, 31, 23, 59, 59)
|
|
|
|
def map_row(self, columns, row):
|
|
row_dict = dict(zip(columns, row))
|
|
return {col: self._map_value(col, val) for col, val in row_dict.items()}
|
|
|
|
def _map_value(self, col, value):
|
|
if col in self.uint64_columns:
|
|
return self._handle_uint64(value)
|
|
elif col in self.date_columns:
|
|
return self._handle_datetime(value)
|
|
elif isinstance(value, str):
|
|
return value.strip()
|
|
return value
|
|
|
|
def _handle_uint64(self, value):
|
|
try:
|
|
return int(float(value)) if value not in (None, '', 'NULL') else 0
|
|
except:
|
|
return 0
|
|
|
|
def _handle_datetime(self, value):
|
|
dt = self._parse_datetime(value)
|
|
return dt if dt else self.min_date
|
|
|
|
def _parse_datetime(self, value):
|
|
if value in (None, 0, '0', '0.0', '0.00', '', 'null', 'NULL'):
|
|
return self.min_date
|
|
|
|
try:
|
|
str_value = str(value).strip()
|
|
for fmt in ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d', '%Y%m%d%H%M%S', '%Y/%m/%d %H:%M:%S'):
|
|
try:
|
|
parsed = datetime.strptime(str_value, fmt)
|
|
return self._clamp_datetime(parsed)
|
|
except ValueError:
|
|
continue
|
|
|
|
if str_value.isdigit():
|
|
ts = int(str_value)
|
|
if 1e12 < ts < 1e13: # 毫秒级时间戳
|
|
parsed = datetime.fromtimestamp(ts / 1000)
|
|
elif 1e9 < ts < 1e10: # 秒级时间戳
|
|
parsed = datetime.fromtimestamp(ts)
|
|
return self._clamp_datetime(parsed)
|
|
|
|
return self.min_date
|
|
except:
|
|
return self.min_date
|
|
|
|
def _clamp_datetime(self, dt):
|
|
if dt < self.min_date:
|
|
return self.min_date
|
|
elif dt > self.max_date:
|
|
return self.max_date
|
|
return dt |