|
|
|
@ -489,9 +489,6 @@ public class BookLesson {
|
|
|
|
|
String lesson_id = record.getStr("lesson_id");
|
|
|
|
|
if (lessonAlreadyMatch.contains(lesson_id)) continue;//如果记录过此课程的学校关系,本次就不再记录了
|
|
|
|
|
|
|
|
|
|
if (original_school_name.equals("--")) {
|
|
|
|
|
System.out.println("--");
|
|
|
|
|
}
|
|
|
|
|
if (bzSchoolNameList.containsKey(original_school_name)) {
|
|
|
|
|
//100%命中的名称
|
|
|
|
|
Kv kv = bzSchoolNameList.get(original_school_name);
|
|
|
|
@ -532,9 +529,71 @@ public class BookLesson {
|
|
|
|
|
print("爬取数据完成!");
|
|
|
|
|
print("总共收集资源:" + lessonList.size() + "个");
|
|
|
|
|
print("总共收集节点:" + structureList.size() + "个");
|
|
|
|
|
|
|
|
|
|
print("开始记录补丁...");
|
|
|
|
|
fixPatch();
|
|
|
|
|
print("补丁记录完成!");
|
|
|
|
|
|
|
|
|
|
//记录结束时间,并输出两者的差值是多少分钟多少少
|
|
|
|
|
long endTime = System.currentTimeMillis();
|
|
|
|
|
print("爬取数据耗时:" + (endTime - startTime) / 1000 + "秒");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 打补丁
|
|
|
|
|
*/
|
|
|
|
|
public static void fixPatch() {
|
|
|
|
|
//开始打补丁
|
|
|
|
|
String sql = "select * from t_crawler_fixdata";
|
|
|
|
|
List<Record> fixList = Db.find(sql);
|
|
|
|
|
Map<String, Record> fixMap = new HashMap<>();//生成一个HashMap
|
|
|
|
|
for (Record record : fixList) {
|
|
|
|
|
String key = record.getStr("lesson_id");
|
|
|
|
|
fixMap.put(key, record);
|
|
|
|
|
}
|
|
|
|
|
//找出所有需要补丁的课程【课程与学校的对应关系】
|
|
|
|
|
sql = "select * from t_crawler_lesson_school where lesson_id in (select lesson_id from t_crawler_lesson where teacher_name='--')";
|
|
|
|
|
List<Record> toFixList = Db.find(sql);
|
|
|
|
|
for (Record record : toFixList) {
|
|
|
|
|
String lesson_id = record.getStr("lesson_id");
|
|
|
|
|
if (fixMap.containsKey(lesson_id)) {
|
|
|
|
|
Record r = fixMap.get(lesson_id);
|
|
|
|
|
String teacherName = r.getStr("teacher_name");
|
|
|
|
|
String teacher_school_name = r.getStr("teacher_school_name");
|
|
|
|
|
record.set("original_school_name", teacher_school_name);//原始名称
|
|
|
|
|
if (bzSchoolNameList.containsKey(teacher_school_name)) {
|
|
|
|
|
Kv kv = bzSchoolNameList.get(teacher_school_name);
|
|
|
|
|
record.set("organization_name", kv.getStr("organization_name"));
|
|
|
|
|
record.set("organization_no", kv.getStr("organization_no"));
|
|
|
|
|
record.set("gather_regionc", kv.getStr("gather_regionc"));
|
|
|
|
|
record.set("school_running_type", kv.getStr("school_running_type"));
|
|
|
|
|
record.set("match_type", 1);
|
|
|
|
|
record.set("teacher_name", teacherName);
|
|
|
|
|
} else if (handMatchSchoolList.containsKey(teacher_school_name)) {
|
|
|
|
|
Kv kv = handMatchSchoolList.get(teacher_school_name);
|
|
|
|
|
record.set("organization_name", kv.getStr("organization_name"));
|
|
|
|
|
record.set("organization_no", kv.getStr("organization_no"));
|
|
|
|
|
record.set("gather_regionc", kv.getStr("gather_regionc"));
|
|
|
|
|
record.set("school_running_type", kv.getStr("school_running_type"));
|
|
|
|
|
record.set("match_type", 2);
|
|
|
|
|
record.set("teacher_name", teacherName);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Db.batchUpdate("t_crawler_lesson_school","lesson_id", toFixList, batchSize);
|
|
|
|
|
// 还需要继续打补丁【课程与教师的名称对应关系】
|
|
|
|
|
sql = "select * from t_crawler_lesson where teacher_name='--'";
|
|
|
|
|
List<Record> toFixList2 = Db.find(sql);
|
|
|
|
|
for (Record record : toFixList2) {
|
|
|
|
|
String lesson_id = record.getStr("lesson_id");
|
|
|
|
|
if (fixMap.containsKey(lesson_id)) {
|
|
|
|
|
Record r = fixMap.get(lesson_id);
|
|
|
|
|
String teacherName = r.getStr("teacher_name");
|
|
|
|
|
String teacher_school_name = r.getStr("teacher_school_name");
|
|
|
|
|
record.set("teacher_school_name", teacher_school_name);//原始名称
|
|
|
|
|
record.set("teacher_name", teacherName);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Db.batchUpdate("t_crawler_lesson","lesson_id", toFixList2, batchSize);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|