diff --git a/src/main/java/Tools/Crawler/CrawlerStart.java b/src/main/java/Tools/Crawler/CrawlerStart.java index 88f7f95e..e15645f7 100644 --- a/src/main/java/Tools/Crawler/CrawlerStart.java +++ b/src/main/java/Tools/Crawler/CrawlerStart.java @@ -8,6 +8,7 @@ public class CrawlerStart { public static void main(String[] args) { PgInit.Init(); BookLesson.Start(); + //BookLesson.fixPatch(); KnowledgeLesson.Start(); } } diff --git a/src/main/java/Tools/Crawler/Util/BookLesson.java b/src/main/java/Tools/Crawler/Util/BookLesson.java index 4a8d3309..3bd0f4c7 100644 --- a/src/main/java/Tools/Crawler/Util/BookLesson.java +++ b/src/main/java/Tools/Crawler/Util/BookLesson.java @@ -489,9 +489,6 @@ public class BookLesson { String lesson_id = record.getStr("lesson_id"); if (lessonAlreadyMatch.contains(lesson_id)) continue;//如果记录过此课程的学校关系,本次就不再记录了 - if (original_school_name.equals("--")) { - System.out.println("--"); - } if (bzSchoolNameList.containsKey(original_school_name)) { //100%命中的名称 Kv kv = bzSchoolNameList.get(original_school_name); @@ -532,9 +529,71 @@ public class BookLesson { print("爬取数据完成!"); print("总共收集资源:" + lessonList.size() + "个"); print("总共收集节点:" + structureList.size() + "个"); + + print("开始记录补丁..."); + fixPatch(); + print("补丁记录完成!"); + //记录结束时间,并输出两者的差值是多少分钟多少少 long endTime = System.currentTimeMillis(); print("爬取数据耗时:" + (endTime - startTime) / 1000 + "秒"); } + /** + * 打补丁 + */ + public static void fixPatch() { + //开始打补丁 + String sql = "select * from t_crawler_fixdata"; + List fixList = Db.find(sql); + Map fixMap = new HashMap<>();//生成一个HashMap + for (Record record : fixList) { + String key = record.getStr("lesson_id"); + fixMap.put(key, record); + } + //找出所有需要补丁的课程【课程与学校的对应关系】 + sql = "select * from t_crawler_lesson_school where lesson_id in (select lesson_id from t_crawler_lesson where teacher_name='--')"; + List toFixList = Db.find(sql); + for (Record record : toFixList) { + String lesson_id = record.getStr("lesson_id"); + if (fixMap.containsKey(lesson_id)) { + Record r = fixMap.get(lesson_id); + String teacherName = r.getStr("teacher_name"); + String teacher_school_name = r.getStr("teacher_school_name"); + record.set("original_school_name", teacher_school_name);//原始名称 + if (bzSchoolNameList.containsKey(teacher_school_name)) { + Kv kv = bzSchoolNameList.get(teacher_school_name); + record.set("organization_name", kv.getStr("organization_name")); + record.set("organization_no", kv.getStr("organization_no")); + record.set("gather_regionc", kv.getStr("gather_regionc")); + record.set("school_running_type", kv.getStr("school_running_type")); + record.set("match_type", 1); + record.set("teacher_name", teacherName); + } else if (handMatchSchoolList.containsKey(teacher_school_name)) { + Kv kv = handMatchSchoolList.get(teacher_school_name); + record.set("organization_name", kv.getStr("organization_name")); + record.set("organization_no", kv.getStr("organization_no")); + record.set("gather_regionc", kv.getStr("gather_regionc")); + record.set("school_running_type", kv.getStr("school_running_type")); + record.set("match_type", 2); + record.set("teacher_name", teacherName); + } + } + } + Db.batchUpdate("t_crawler_lesson_school","lesson_id", toFixList, batchSize); + // 还需要继续打补丁【课程与教师的名称对应关系】 + sql = "select * from t_crawler_lesson where teacher_name='--'"; + List toFixList2 = Db.find(sql); + for (Record record : toFixList2) { + String lesson_id = record.getStr("lesson_id"); + if (fixMap.containsKey(lesson_id)) { + Record r = fixMap.get(lesson_id); + String teacherName = r.getStr("teacher_name"); + String teacher_school_name = r.getStr("teacher_school_name"); + record.set("teacher_school_name", teacher_school_name);//原始名称 + record.set("teacher_name", teacherName); + } + } + Db.batchUpdate("t_crawler_lesson","lesson_id", toFixList2, batchSize); + } }