diff --git a/ETL/Mars/fake.py b/ETL/Mars/fake.py index 4d7099f..88828bd 100644 --- a/ETL/Mars/fake.py +++ b/ETL/Mars/fake.py @@ -1,5 +1,6 @@ # pip install Faker -i https://pypi.douban.com/simple/ import random +import uuid # 导入生成数据类 from faker import Faker @@ -43,17 +44,16 @@ for i in range(maxN): db.executemany(sql, data) logInfo("成功完成作业信息填充,共%s条!" % maxN) - sql = 'truncate table t_zy_score' db.execute(sql) # 存在的关系 -sql = "insert into t_zy_score(zy_id,person_id,score)values(%s,%s,%s)" +sql = "insert into t_zy_score(id,zy_id,person_id,score)values(%s,%s,%s,%s)" data = [] for x in range(1, 1001): # 作业 for y in range(1, 1001): # 人员 - obj = [x, y, random.randint(1, 100)] + obj = [uuid.uuid1(), x, y, random.randint(1, 100)] data.append(obj) db.executemany(sql, data) logInfo("成功完成作业与人员关系信息填充,共%s条!" % len(data)) @@ -62,7 +62,7 @@ logInfo("成功完成作业与人员关系信息填充,共%s条!" % len(data)) data = [] for x in range(1 + maxN, 301 + maxN): # 作业 for y in range(1, 301): # 人员 - obj = [x, y, random.randint(1, 100)] + obj = [uuid.uuid1(), x, y, random.randint(1, 100)] data.append(obj) db.executemany(sql, data) logInfo("成功完成作业不存在,人员存在的关系A填充,共%s条!" % len(data)) @@ -71,7 +71,7 @@ logInfo("成功完成作业不存在,人员存在的关系A填充,共%s条!" data = [] for x in range(1, 301): # 作业 for y in range(1 + maxN, 301 + maxN): # 人员 - obj = [x, y, random.randint(1, 100)] + obj = [uuid.uuid1(), x, y, random.randint(1, 100)] data.append(obj) db.executemany(sql, data) logInfo("成功完成作业存在,人员不存在的关系B填充,共%s条!" % len(data)) diff --git a/ETL/Mars/pack.py b/ETL/Mars/pack.py index 6ed430b..20a5586 100644 --- a/ETL/Mars/pack.py +++ b/ETL/Mars/pack.py @@ -18,14 +18,23 @@ if __name__ == '__main__': db = MySQLHelper() for _bean in _dict: - actions = _dict[_bean] - for action in actions: + # min,max + sql = "select min(_id_int) as mi,max(_id_int) as mx from %s" % _bean + list = db.query(sql) + mi = list[0]['mi'] + mx = list[0]['mx'] + + for action in _dict[_bean]: memo = action["memo"] sql = action["sql"] - while True: - cnt = db.execute(sql) - if cnt == 0: - break - logInfo(memo + ",更新%s条" % cnt) + start = mi + while start < mx: + _sql = sql + " and t1._id_int >=%s and t1._id_int <%s" % (start, start + 1000) + cnt = db.execute(_sql) + if cnt > 0: + logInfo(memo + ",更新%s条" % cnt) + else: + logInfo(memo + ",这个号段,我在空跑!") + start = start + 1000 db.close() logInfo("恭喜,所有清洗工作成功完成!")