main
黄海 5 months ago
parent 8fa55f4102
commit 2494120688

@ -8,6 +8,7 @@ public class CrawlerStart {
public static void main(String[] args) {
PgInit.Init();
BookLesson.Start();
//BookLesson.fixPatch();
KnowledgeLesson.Start();
}
}

@ -489,9 +489,6 @@ public class BookLesson {
String lesson_id = record.getStr("lesson_id");
if (lessonAlreadyMatch.contains(lesson_id)) continue;//如果记录过此课程的学校关系,本次就不再记录了
if (original_school_name.equals("--")) {
System.out.println("--");
}
if (bzSchoolNameList.containsKey(original_school_name)) {
//100%命中的名称
Kv kv = bzSchoolNameList.get(original_school_name);
@ -532,9 +529,71 @@ public class BookLesson {
print("爬取数据完成!");
print("总共收集资源:" + lessonList.size() + "个");
print("总共收集节点:" + structureList.size() + "个");
print("开始记录补丁...");
fixPatch();
print("补丁记录完成!");
//记录结束时间,并输出两者的差值是多少分钟多少少
long endTime = System.currentTimeMillis();
print("爬取数据耗时:" + (endTime - startTime) / 1000 + "秒");
}
/**
*
*/
public static void fixPatch() {
//开始打补丁
String sql = "select * from t_crawler_fixdata";
List<Record> fixList = Db.find(sql);
Map<String, Record> fixMap = new HashMap<>();//生成一个HashMap
for (Record record : fixList) {
String key = record.getStr("lesson_id");
fixMap.put(key, record);
}
//找出所有需要补丁的课程【课程与学校的对应关系】
sql = "select * from t_crawler_lesson_school where lesson_id in (select lesson_id from t_crawler_lesson where teacher_name='--')";
List<Record> toFixList = Db.find(sql);
for (Record record : toFixList) {
String lesson_id = record.getStr("lesson_id");
if (fixMap.containsKey(lesson_id)) {
Record r = fixMap.get(lesson_id);
String teacherName = r.getStr("teacher_name");
String teacher_school_name = r.getStr("teacher_school_name");
record.set("original_school_name", teacher_school_name);//原始名称
if (bzSchoolNameList.containsKey(teacher_school_name)) {
Kv kv = bzSchoolNameList.get(teacher_school_name);
record.set("organization_name", kv.getStr("organization_name"));
record.set("organization_no", kv.getStr("organization_no"));
record.set("gather_regionc", kv.getStr("gather_regionc"));
record.set("school_running_type", kv.getStr("school_running_type"));
record.set("match_type", 1);
record.set("teacher_name", teacherName);
} else if (handMatchSchoolList.containsKey(teacher_school_name)) {
Kv kv = handMatchSchoolList.get(teacher_school_name);
record.set("organization_name", kv.getStr("organization_name"));
record.set("organization_no", kv.getStr("organization_no"));
record.set("gather_regionc", kv.getStr("gather_regionc"));
record.set("school_running_type", kv.getStr("school_running_type"));
record.set("match_type", 2);
record.set("teacher_name", teacherName);
}
}
}
Db.batchUpdate("t_crawler_lesson_school","lesson_id", toFixList, batchSize);
// 还需要继续打补丁【课程与教师的名称对应关系】
sql = "select * from t_crawler_lesson where teacher_name='--'";
List<Record> toFixList2 = Db.find(sql);
for (Record record : toFixList2) {
String lesson_id = record.getStr("lesson_id");
if (fixMap.containsKey(lesson_id)) {
Record r = fixMap.get(lesson_id);
String teacherName = r.getStr("teacher_name");
String teacher_school_name = r.getStr("teacher_school_name");
record.set("teacher_school_name", teacher_school_name);//原始名称
record.set("teacher_name", teacherName);
}
}
Db.batchUpdate("t_crawler_lesson","lesson_id", toFixList2, batchSize);
}
}

Loading…
Cancel
Save