|
|
|
@ -1,22 +1,28 @@
|
|
|
|
|
package Tools.Crawler;
|
|
|
|
|
|
|
|
|
|
import cn.hutool.core.date.DateTime;
|
|
|
|
|
import com.alibaba.fastjson.JSONArray;
|
|
|
|
|
import com.alibaba.fastjson.JSONObject;
|
|
|
|
|
import com.jfinal.plugin.activerecord.Db;
|
|
|
|
|
import com.jfinal.plugin.activerecord.Record;
|
|
|
|
|
|
|
|
|
|
import java.time.LocalDate;
|
|
|
|
|
import java.time.LocalDateTime;
|
|
|
|
|
import java.time.format.DateTimeFormatter;
|
|
|
|
|
import java.util.*;
|
|
|
|
|
import java.util.concurrent.CompletableFuture;
|
|
|
|
|
import java.util.concurrent.ExecutorService;
|
|
|
|
|
import java.util.concurrent.Executors;
|
|
|
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
|
|
|
|
|
|
|
|
public class YunXiaoKnowledge {
|
|
|
|
|
// 用于存储课程信息的线程安全列表
|
|
|
|
|
public static List<Record> lessonList = Collections.synchronizedList(new ArrayList<>());
|
|
|
|
|
public static int batchSize = 100;
|
|
|
|
|
|
|
|
|
|
// 在类的开始处添加一个计数器
|
|
|
|
|
private static final AtomicInteger processedCount = new AtomicInteger(0);
|
|
|
|
|
private static int totalCount = 0; // 添加总数变量
|
|
|
|
|
|
|
|
|
|
// 定义线程池
|
|
|
|
|
private static final ExecutorService executorService = Executors.newFixedThreadPool(
|
|
|
|
|
Math.max(4, Runtime.getRuntime().availableProcessors())
|
|
|
|
@ -54,6 +60,7 @@ public class YunXiaoKnowledge {
|
|
|
|
|
|
|
|
|
|
return tasks;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 递归收集知识点节点信息
|
|
|
|
|
*/
|
|
|
|
@ -86,61 +93,74 @@ public class YunXiaoKnowledge {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 收集课程信息
|
|
|
|
|
*/
|
|
|
|
|
public static void collectLesson(String nodeId, String stageCode, String subjectCode) {
|
|
|
|
|
String url = "https://yx.ccsjy.cn/api/cloud-school/v1/cloudLesson/getOnDemandLessonPage";
|
|
|
|
|
JSONObject argBook = new JSONObject();
|
|
|
|
|
argBook.put("nodeId", nodeId);
|
|
|
|
|
argBook.put("nodeType", 2);
|
|
|
|
|
argBook.put("pageNum", 1);
|
|
|
|
|
argBook.put("pageSize", 10000);
|
|
|
|
|
argBook.put("sortType", 2);
|
|
|
|
|
argBook.put("stageCode", stageCode);
|
|
|
|
|
argBook.put("subjectCode", subjectCode);
|
|
|
|
|
String respBook = YunXiaoBook.doRequestWithRetry(url, argBook.toString(), false, 3);
|
|
|
|
|
|
|
|
|
|
if (respBook == null) return;
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
JSONArray jsonArrSource = JSONObject.parseObject(respBook)
|
|
|
|
|
.getJSONObject("data")
|
|
|
|
|
.getJSONArray("rows");
|
|
|
|
|
|
|
|
|
|
for (int m = 0; m < jsonArrSource.size(); m++) {
|
|
|
|
|
JSONObject jsonSource = jsonArrSource.getJSONObject(m);
|
|
|
|
|
LocalDateTime dateTime = LocalDateTime.parse(
|
|
|
|
|
jsonSource.getString("publishTime"),
|
|
|
|
|
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
|
|
|
|
|
);
|
|
|
|
|
String url = "https://yx.ccsjy.cn/api/cloud-school/v1/cloudLesson/getOnDemandLessonPage";
|
|
|
|
|
JSONObject argBook = new JSONObject();
|
|
|
|
|
argBook.put("nodeId", nodeId);
|
|
|
|
|
argBook.put("nodeType", 2);
|
|
|
|
|
argBook.put("pageNum", 1);
|
|
|
|
|
argBook.put("pageSize", 10000);
|
|
|
|
|
argBook.put("sortType", 2);
|
|
|
|
|
argBook.put("stageCode", stageCode);
|
|
|
|
|
argBook.put("subjectCode", subjectCode);
|
|
|
|
|
String respBook = YunXiaoBook.doRequestWithRetry(url, argBook.toString(), false, 3);
|
|
|
|
|
|
|
|
|
|
Record record = new Record()
|
|
|
|
|
.set("lesson_id", jsonSource.getString("lessonId"))
|
|
|
|
|
.set("lesson_name", jsonSource.getString("lessonName"))
|
|
|
|
|
.set("node_id", nodeId)
|
|
|
|
|
.set("teacher_id", jsonSource.getString("teacherId"))
|
|
|
|
|
.set("teacher_school_id", jsonSource.getString("teacherSchoolId"))
|
|
|
|
|
.set("teacher_school_name", jsonSource.getString("teacherSchoolName"))
|
|
|
|
|
.set("teacher_name", jsonSource.getString("teacherName"))
|
|
|
|
|
.set("book_id", null)
|
|
|
|
|
.set("scheme_id", null)
|
|
|
|
|
.set("subject_id", subjectCode)
|
|
|
|
|
.set("grade_code", jsonSource.getString("gradeCode"))
|
|
|
|
|
.set("publish_time", dateTime.toLocalDate())
|
|
|
|
|
.set("stage_id", stageCode)
|
|
|
|
|
.set("preview_count", jsonSource.getIntValue("previewCount"))
|
|
|
|
|
.set("learning_person_count", jsonSource.getIntValue("learningPersonCount"))
|
|
|
|
|
.set("learning_person_times", jsonSource.getIntValue("learningPersonTimes"))
|
|
|
|
|
.set("id", UUID.randomUUID().toString())
|
|
|
|
|
.set("node_type", 2);
|
|
|
|
|
|
|
|
|
|
lessonList.add(record);
|
|
|
|
|
if (respBook != null) {
|
|
|
|
|
JSONArray jsonArrSource = JSONObject.parseObject(respBook)
|
|
|
|
|
.getJSONObject("data")
|
|
|
|
|
.getJSONArray("rows");
|
|
|
|
|
|
|
|
|
|
// 添加进度输出
|
|
|
|
|
int current = processedCount.incrementAndGet();
|
|
|
|
|
if (current % 10 == 0 || current == totalCount) { // 每处理10个节点输出一次进度
|
|
|
|
|
System.out.println(DateTime.now()+" "+String.format("进度: %d/%d (%.2f%%), 当前节点课程数: %d",
|
|
|
|
|
current, totalCount,
|
|
|
|
|
(current * 100.0 / totalCount),
|
|
|
|
|
jsonArrSource == null ? 0 : jsonArrSource.size()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (jsonArrSource != null && !jsonArrSource.isEmpty()) {
|
|
|
|
|
for (int m = 0; m < jsonArrSource.size(); m++) {
|
|
|
|
|
JSONObject jsonSource = jsonArrSource.getJSONObject(m);
|
|
|
|
|
LocalDateTime dateTime = LocalDateTime.parse(
|
|
|
|
|
jsonSource.getString("publishTime"),
|
|
|
|
|
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Record record = new Record()
|
|
|
|
|
.set("lesson_id", jsonSource.getString("lessonId"))
|
|
|
|
|
.set("lesson_name", jsonSource.getString("lessonName"))
|
|
|
|
|
.set("node_id", nodeId)
|
|
|
|
|
.set("teacher_id", jsonSource.getString("teacherId"))
|
|
|
|
|
.set("teacher_school_id", jsonSource.getString("teacherSchoolId"))
|
|
|
|
|
.set("teacher_school_name", jsonSource.getString("teacherSchoolName"))
|
|
|
|
|
.set("teacher_name", jsonSource.getString("teacherName"))
|
|
|
|
|
.set("book_id", null)
|
|
|
|
|
.set("scheme_id", null)
|
|
|
|
|
.set("subject_id", subjectCode)
|
|
|
|
|
.set("grade_code", jsonSource.getString("gradeCode"))
|
|
|
|
|
.set("publish_time", dateTime.toLocalDate())
|
|
|
|
|
.set("stage_id", stageCode)
|
|
|
|
|
.set("preview_count", jsonSource.getIntValue("previewCount"))
|
|
|
|
|
.set("learning_person_count", jsonSource.getIntValue("learningPersonCount"))
|
|
|
|
|
.set("learning_person_times", jsonSource.getIntValue("learningPersonTimes"))
|
|
|
|
|
.set("id", UUID.randomUUID().toString())
|
|
|
|
|
.set("node_type", 2);
|
|
|
|
|
|
|
|
|
|
lessonList.add(record);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} catch (Exception e) {
|
|
|
|
|
System.err.println("处理课程数据出错: " + e.getMessage());
|
|
|
|
|
System.out.println(DateTime.now()+" 处理课程数据出错 [nodeId=" + nodeId + "]: " + e.getMessage());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 批量保存数据
|
|
|
|
|
*/
|
|
|
|
@ -155,6 +175,7 @@ public class YunXiaoKnowledge {
|
|
|
|
|
Db.batchSave(tableName, batch, batch.size());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
|
|
PgInit.Init();
|
|
|
|
|
// 清空知识点表
|
|
|
|
@ -166,7 +187,7 @@ public class YunXiaoKnowledge {
|
|
|
|
|
Map<String, String> subjectMap = loadSubjectMap();
|
|
|
|
|
List<Map<String, String>> tasks = loadTasks();
|
|
|
|
|
|
|
|
|
|
System.out.println("加载到 " + stageMap.size() + " 个学段, " + subjectMap.size() + " 个学科");
|
|
|
|
|
System.out.println(DateTime.now()+" 加载到 " + stageMap.size() + " 个学段, " + subjectMap.size() + " 个学科");
|
|
|
|
|
|
|
|
|
|
for (Map<String, String> task : tasks) {
|
|
|
|
|
String stageCode = task.get("stageCode");
|
|
|
|
@ -178,7 +199,7 @@ public class YunXiaoKnowledge {
|
|
|
|
|
argBook.put("stageCode", stageCode);
|
|
|
|
|
argBook.put("subjectCode", subjectCode);
|
|
|
|
|
|
|
|
|
|
System.out.printf("开始获取学段[%s-%s]学科[%s-%s]的数据...\n",
|
|
|
|
|
System.out.printf(DateTime.now() + " 开始获取学段[%s-%s]学科[%s-%s]的数据...\n",
|
|
|
|
|
stageCode, stageMap.get(stageCode),
|
|
|
|
|
subjectCode, subjectMap.get(subjectCode));
|
|
|
|
|
String url = "https://yx.ccsjy.cn/api/business/v1/knowledge/tree";
|
|
|
|
@ -193,26 +214,32 @@ public class YunXiaoKnowledge {
|
|
|
|
|
|
|
|
|
|
collectNodes(tree, allRecords, stageCode, subjectCode, stageMap, subjectMap);
|
|
|
|
|
|
|
|
|
|
System.out.printf("学段[%s]学科[%s]共收集到 %d 条记录\n",
|
|
|
|
|
System.out.printf(DateTime.now() + " 学段[%s]学科[%s]共收集到 %d 条记录\n",
|
|
|
|
|
stageMap.get(stageCode),
|
|
|
|
|
subjectMap.get(subjectCode),
|
|
|
|
|
allRecords.size());
|
|
|
|
|
|
|
|
|
|
Db.batchSave("t_crawler_structure_knowledge", allRecords, allRecords.size());
|
|
|
|
|
} catch (Exception e) {
|
|
|
|
|
System.err.println("处理数据时出错: " + e.getMessage());
|
|
|
|
|
System.err.println(DateTime.now() + " 处理数据时出错: " + e.getMessage());
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
System.err.printf("获取学段[%s]学科[%s]数据失败!\n",
|
|
|
|
|
System.err.printf(DateTime.now() + " 获取学段[%s]学科[%s]数据失败!\n",
|
|
|
|
|
stageMap.get(stageCode),
|
|
|
|
|
subjectMap.get(subjectCode));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
//遍历所有的知识点
|
|
|
|
|
// 第二阶段:多线程收集课程信息
|
|
|
|
|
System.out.println("开始收集课程信息...");
|
|
|
|
|
System.out.println(DateTime.now()+" 开始收集课程信息...");
|
|
|
|
|
List<Record> listKnowledge = Db.find("select * from t_crawler_structure_knowledge");
|
|
|
|
|
totalCount = listKnowledge.size(); // 设置总数
|
|
|
|
|
System.out.println(DateTime.now()+" 共有 " + totalCount + " 个知识点需要处理");
|
|
|
|
|
|
|
|
|
|
// 重置计数器
|
|
|
|
|
processedCount.set(0);
|
|
|
|
|
|
|
|
|
|
// 创建任务列表
|
|
|
|
|
List<CompletableFuture<Void>> futures = new ArrayList<>();
|
|
|
|
|
|
|
|
|
@ -227,16 +254,17 @@ public class YunXiaoKnowledge {
|
|
|
|
|
futures.add(future);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 等待所有任务完成
|
|
|
|
|
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
|
|
|
|
|
|
|
|
|
|
// 清理旧数据并保存新数据
|
|
|
|
|
System.out.println("开始保存课程数据...");
|
|
|
|
|
System.out.println(DateTime.now() + " 开始保存课程数据...");
|
|
|
|
|
Db.update("delete from t_crawler_lesson where node_type=2");
|
|
|
|
|
batchSaveData(lessonList, "t_crawler_lesson");
|
|
|
|
|
|
|
|
|
|
// 关闭线程池
|
|
|
|
|
executorService.shutdown();
|
|
|
|
|
System.out.println("数据处理完成! 共处理 " + lessonList.size() + " 条课程数据");
|
|
|
|
|
System.out.println(DateTime.now() + " 数据处理完成! 共处理 " + lessonList.size() + " 条课程数据");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|