main
黄海 5 months ago
parent 2494120688
commit 564c458fb0

@ -7,8 +7,11 @@ import Tools.Crawler.Util.PgInit;
public class CrawlerStart {
public static void main(String[] args) {
PgInit.Init();
//同步章节目录
BookLesson.Start();
//BookLesson.fixPatch();
//同步知识点
KnowledgeLesson.Start();
//打补丁
BookLesson.fixPatch();
}
}

@ -42,6 +42,14 @@ public class BookLesson {
//哪些课程已经匹配过
public static Set<String> lessonAlreadyMatch = new HashSet<>();
//初始化
public static void init() {
//初始化三个全局量
if (bzSchoolNameList.isEmpty()) bzSchoolNameList = getBzSchoolNameList();
if (handMatchSchoolList.isEmpty()) handMatchSchoolList = getHandMatchSchoolList();
if (lessonAlreadyMatch.isEmpty()) lessonAlreadyMatch = getLessonAlreadyMatch();
}
public static void traverseTree(JSONArray treeArray, JSONArray tempTree) {
for (Object item : treeArray) {
JSONObject node = (JSONObject) item;
@ -396,11 +404,8 @@ public class BookLesson {
map.put("2", "小学");
map.put("3", "初中");
map.put("4", "高中");
//初始化三个全局量
bzSchoolNameList = getBzSchoolNameList();
handMatchSchoolList = getHandMatchSchoolList();
lessonAlreadyMatch = getLessonAlreadyMatch();
//初始化
init();
print("开始爬取数据!");
@ -529,11 +534,6 @@ public class BookLesson {
print("爬取数据完成!");
print("总共收集资源:" + lessonList.size() + "个");
print("总共收集节点:" + structureList.size() + "个");
print("开始记录补丁...");
fixPatch();
print("补丁记录完成!");
//记录结束时间,并输出两者的差值是多少分钟多少少
long endTime = System.currentTimeMillis();
print("爬取数据耗时:" + (endTime - startTime) / 1000 + "秒");
@ -543,8 +543,11 @@ public class BookLesson {
*
*/
public static void fixPatch() {
//初始化
init();
print("开始打补丁...");
//开始打补丁
String sql = "select * from t_crawler_fixdata";
String sql = "select * from t_crawler_fixdata where teacher_name<>'--'";
List<Record> fixList = Db.find(sql);
Map<String, Record> fixMap = new HashMap<>();//生成一个HashMap
for (Record record : fixList) {
@ -552,38 +555,46 @@ public class BookLesson {
fixMap.put(key, record);
}
//找出所有需要补丁的课程【课程与学校的对应关系】
sql = "select * from t_crawler_lesson_school where lesson_id in (select lesson_id from t_crawler_lesson where teacher_name='--')";
sql = "select * from t_crawler_lesson_school where (organization_name='' or organization_name is null)";
List<Record> toFixList = Db.find(sql);
for (Record record : toFixList) {
String lesson_id = record.getStr("lesson_id");
List<Record> writeList = new ArrayList<>();
for (Record toFixRecord : toFixList) {
String lesson_id = toFixRecord.getStr("lesson_id");
if (fixMap.containsKey(lesson_id)) {
Record r = fixMap.get(lesson_id);
String teacherName = r.getStr("teacher_name");
String teacher_school_name = r.getStr("teacher_school_name");
record.set("original_school_name", teacher_school_name);//原始名称
Record fixRecord = fixMap.get(lesson_id);
String teacherName = fixRecord.getStr("teacher_name");
String teacher_school_name = fixRecord.getStr("teacher_school_name");
toFixRecord.set("original_school_name", teacher_school_name);//原始名称
if (teacher_school_name.equals("长春市第二中学")) {
System.out.println("Here!");
}
if (bzSchoolNameList.containsKey(teacher_school_name)) {
Kv kv = bzSchoolNameList.get(teacher_school_name);
record.set("organization_name", kv.getStr("organization_name"));
record.set("organization_no", kv.getStr("organization_no"));
record.set("gather_regionc", kv.getStr("gather_regionc"));
record.set("school_running_type", kv.getStr("school_running_type"));
record.set("match_type", 1);
record.set("teacher_name", teacherName);
toFixRecord.set("organization_name", teacher_school_name);
toFixRecord.set("organization_no", kv.getStr("organization_no"));
toFixRecord.set("gather_regionc", kv.getStr("gather_regionc"));
toFixRecord.set("school_running_type", kv.getStr("school_running_type"));
toFixRecord.set("match_type", 1);
toFixRecord.set("teacher_name", teacherName);
writeList.add(toFixRecord);
} else if (handMatchSchoolList.containsKey(teacher_school_name)) {
Kv kv = handMatchSchoolList.get(teacher_school_name);
record.set("organization_name", kv.getStr("organization_name"));
record.set("organization_no", kv.getStr("organization_no"));
record.set("gather_regionc", kv.getStr("gather_regionc"));
record.set("school_running_type", kv.getStr("school_running_type"));
record.set("match_type", 2);
record.set("teacher_name", teacherName);
toFixRecord.set("organization_name", kv.getStr("organization_name"));
toFixRecord.set("organization_no", kv.getStr("organization_no"));
toFixRecord.set("gather_regionc", kv.getStr("gather_regionc"));
toFixRecord.set("school_running_type", kv.getStr("school_running_type"));
toFixRecord.set("match_type", 2);
toFixRecord.set("teacher_name", teacherName);
writeList.add(toFixRecord);
}
}
}
Db.batchUpdate("t_crawler_lesson_school","lesson_id", toFixList, batchSize);
Db.batchUpdate("t_crawler_lesson_school", "lesson_id", writeList, batchSize);
// 还需要继续打补丁【课程与教师的名称对应关系】
sql = "select * from t_crawler_lesson where teacher_name='--'";
List<Record> toFixList2 = Db.find(sql);
writeList = new ArrayList<>();
for (Record record : toFixList2) {
String lesson_id = record.getStr("lesson_id");
if (fixMap.containsKey(lesson_id)) {
@ -592,8 +603,10 @@ public class BookLesson {
String teacher_school_name = r.getStr("teacher_school_name");
record.set("teacher_school_name", teacher_school_name);//原始名称
record.set("teacher_name", teacherName);
writeList.add(record);
}
}
Db.batchUpdate("t_crawler_lesson","lesson_id", toFixList2, batchSize);
Db.batchUpdate("t_crawler_lesson", "lesson_id", writeList, batchSize);
print("打补丁完成!");
}
}

Loading…
Cancel
Save