|
|
|
@ -1,243 +0,0 @@
|
|
|
|
|
package Tools.Crawler.Backup;
|
|
|
|
|
|
|
|
|
|
import Tools.Crawler.Util;
|
|
|
|
|
import cn.hutool.core.date.DateTime;
|
|
|
|
|
import com.alibaba.fastjson.JSONArray;
|
|
|
|
|
import com.alibaba.fastjson.JSONObject;
|
|
|
|
|
import com.jfinal.kit.PropKit;
|
|
|
|
|
import com.jfinal.plugin.activerecord.ActiveRecordPlugin;
|
|
|
|
|
import com.jfinal.plugin.activerecord.CaseInsensitiveContainerFactory;
|
|
|
|
|
import com.jfinal.plugin.activerecord.Db;
|
|
|
|
|
import com.jfinal.plugin.activerecord.dialect.PostgreSqlDialect;
|
|
|
|
|
import com.jfinal.plugin.hikaricp.HikariCpPlugin;
|
|
|
|
|
import com.jfinal.plugin.activerecord.Record;
|
|
|
|
|
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.util.HashMap;
|
|
|
|
|
import java.util.Map;
|
|
|
|
|
import java.util.UUID;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
|
|
|
|
|
// 爬取一次大约需要20分钟
|
|
|
|
|
public class YunXiaoOneByOne {
|
|
|
|
|
public static JSONArray tempTree = new JSONArray();
|
|
|
|
|
|
|
|
|
|
public static void traverseTree(JSONArray treeArray) {
|
|
|
|
|
for (Object item : treeArray) {
|
|
|
|
|
JSONObject node = (JSONObject) item;
|
|
|
|
|
JSONObject jsonObj = new JSONObject();
|
|
|
|
|
jsonObj.put("nodeId", node.getString("key"));
|
|
|
|
|
jsonObj.put("nodeName", node.getString("title"));
|
|
|
|
|
jsonObj.put("isLeaf", node.getBoolean("isLeaf"));
|
|
|
|
|
jsonObj.put("parentValue", node.getString("parentValue"));
|
|
|
|
|
tempTree.add(jsonObj);
|
|
|
|
|
|
|
|
|
|
// 判断是否有children并且不为空
|
|
|
|
|
if (node.containsKey("children") && !node.getJSONArray("children").isEmpty()) {
|
|
|
|
|
JSONArray children = node.getJSONArray("children");
|
|
|
|
|
traverseTree(children);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static void print(String msg) {
|
|
|
|
|
//先输出时间,再输出内容
|
|
|
|
|
System.out.println(DateTime.now() + " " + msg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
|
|
PropKit.use("application.properties");
|
|
|
|
|
HikariCpPlugin hp = new HikariCpPlugin(PropKit.get("jdbcUrl"), PropKit.get("user"),
|
|
|
|
|
PropKit.get("password").trim(), PropKit.get("driverClassName"));
|
|
|
|
|
hp.start();
|
|
|
|
|
// 配置ActiveRecord插件
|
|
|
|
|
ActiveRecordPlugin arp = new ActiveRecordPlugin(hp);
|
|
|
|
|
//配置默认小写
|
|
|
|
|
arp.setContainerFactory(new CaseInsensitiveContainerFactory(true));
|
|
|
|
|
arp.setDialect(new PostgreSqlDialect());
|
|
|
|
|
arp.start();
|
|
|
|
|
|
|
|
|
|
//清空爬虫表
|
|
|
|
|
String truncatSql = "truncate table t_crawler_subject";
|
|
|
|
|
Db.update(truncatSql);
|
|
|
|
|
truncatSql = "truncate table t_crawler_scheme";
|
|
|
|
|
Db.update(truncatSql);
|
|
|
|
|
truncatSql = "truncate table t_crawler_book";
|
|
|
|
|
Db.update(truncatSql);
|
|
|
|
|
truncatSql = "truncate table t_crawler_structure";
|
|
|
|
|
Db.update(truncatSql);
|
|
|
|
|
truncatSql = "truncate table t_crawler_lesson";
|
|
|
|
|
Db.update(truncatSql);
|
|
|
|
|
|
|
|
|
|
Map<String, String> map = new HashMap<>();
|
|
|
|
|
map.put("1", "学前");
|
|
|
|
|
map.put("2", "小学");
|
|
|
|
|
map.put("3", "初中");
|
|
|
|
|
map.put("4", "高中");
|
|
|
|
|
|
|
|
|
|
print("开始爬取数据!");
|
|
|
|
|
//资源写入的数据集
|
|
|
|
|
List<Record> subjectList = new ArrayList<>();
|
|
|
|
|
List<Record> lessonList = new ArrayList<>();
|
|
|
|
|
List<Record> schemeList = new ArrayList<>();
|
|
|
|
|
List<Record> bookList = new ArrayList<>();
|
|
|
|
|
List<Record> structureList = new ArrayList<>();
|
|
|
|
|
|
|
|
|
|
map.forEach((key, value) -> {
|
|
|
|
|
String respSubject = null;
|
|
|
|
|
try {
|
|
|
|
|
respSubject = Util.doGet("https://yx.ccsjy.cn/api/business/v1/subject/list/" + key);
|
|
|
|
|
} catch (InterruptedException e) {
|
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
|
}
|
|
|
|
|
JSONObject jsonObj = JSONObject.parseObject(respSubject);
|
|
|
|
|
JSONArray jsonArr = jsonObj.getJSONObject("data").getJSONArray("rows");
|
|
|
|
|
for (int i = 0; i < jsonArr.size(); i++) {
|
|
|
|
|
JSONObject jsonSubject = jsonArr.getJSONObject(i);
|
|
|
|
|
String subjectCode = jsonSubject.getString("subjectCode");
|
|
|
|
|
String subjectName = jsonSubject.getString("subjectName");
|
|
|
|
|
|
|
|
|
|
Record rSubject = new Record();
|
|
|
|
|
rSubject.set("subject_id", subjectCode);
|
|
|
|
|
rSubject.set("subject_name", subjectName);
|
|
|
|
|
rSubject.set("stage_id", key);
|
|
|
|
|
subjectList.add(rSubject);
|
|
|
|
|
|
|
|
|
|
JSONObject argScheme = new JSONObject();
|
|
|
|
|
argScheme.put("subjectCode", subjectCode);
|
|
|
|
|
argScheme.put("systemId", 1);
|
|
|
|
|
|
|
|
|
|
String respScheme = Util.doPost("https://yx.ccsjy.cn/api/business/v1/edition/list", argScheme.toString());
|
|
|
|
|
JSONObject jsonObjScheme = JSONObject.parseObject(respScheme);
|
|
|
|
|
JSONArray jsonArrScheme = jsonObjScheme.getJSONArray("data");
|
|
|
|
|
|
|
|
|
|
for (int j = 0; j < jsonArrScheme.size(); j++) {
|
|
|
|
|
JSONObject jsonScheme = jsonArrScheme.getJSONObject(j);
|
|
|
|
|
String businessEditionId = jsonScheme.getString("businessEditionId");
|
|
|
|
|
String editionName = jsonScheme.getString("editionName");
|
|
|
|
|
|
|
|
|
|
Record rScheme = new Record();
|
|
|
|
|
rScheme.set("scheme_id", businessEditionId);
|
|
|
|
|
rScheme.set("scheme_name", editionName);
|
|
|
|
|
rScheme.set("subject_id", subjectCode);
|
|
|
|
|
rScheme.set("stage_id", key);
|
|
|
|
|
rScheme.set("id", UUID.randomUUID().toString());
|
|
|
|
|
schemeList.add(rScheme);
|
|
|
|
|
|
|
|
|
|
JSONObject argBook = new JSONObject();
|
|
|
|
|
argBook.put("stageCode", key);
|
|
|
|
|
argBook.put("subjectCode", subjectCode);
|
|
|
|
|
argBook.put("businessEditionId", businessEditionId);
|
|
|
|
|
|
|
|
|
|
String respBook = Util.doPost("https://yx.ccsjy.cn/api/business/v1/book/list", argBook.toString());
|
|
|
|
|
JSONObject jsonObjBook = JSONObject.parseObject(respBook);
|
|
|
|
|
JSONArray jsonArrBook = jsonObjBook.getJSONArray("data");
|
|
|
|
|
|
|
|
|
|
for (int k = 0; k < jsonArrBook.size(); k++) {
|
|
|
|
|
JSONObject jsonBook = jsonArrBook.getJSONObject(k);
|
|
|
|
|
String businessBookId = jsonBook.getString("businessBookId");
|
|
|
|
|
String bookName = jsonBook.getString("bookName");
|
|
|
|
|
|
|
|
|
|
Record rBook = new Record();
|
|
|
|
|
rBook.set("book_id", businessBookId);
|
|
|
|
|
rBook.set("book_name", bookName);
|
|
|
|
|
rBook.set("scheme_id", businessEditionId);
|
|
|
|
|
rBook.set("subject_id", subjectCode);
|
|
|
|
|
rBook.set("stage_id", key);
|
|
|
|
|
rBook.set("id", UUID.randomUUID().toString());
|
|
|
|
|
bookList.add(rBook);
|
|
|
|
|
|
|
|
|
|
JSONObject argTree = new JSONObject();
|
|
|
|
|
argTree.put("businessBookId", businessBookId);
|
|
|
|
|
argTree.put("childrensFlag", 1);
|
|
|
|
|
argTree.put("parentId", -1);
|
|
|
|
|
argTree.put("searchKeyword", "");
|
|
|
|
|
|
|
|
|
|
String respTree = Util.doPost("https://yx.ccsjy.cn/api/business/v1/chapter/tree", argTree.toString());
|
|
|
|
|
JSONObject jsonObjTree = JSONObject.parseObject(respTree);
|
|
|
|
|
JSONArray jsonArrTree = jsonObjTree.getJSONObject("data").getJSONArray("tree");
|
|
|
|
|
|
|
|
|
|
tempTree = new JSONArray();
|
|
|
|
|
traverseTree(jsonArrTree);
|
|
|
|
|
|
|
|
|
|
for (int n = 0; n < tempTree.size(); n++) {
|
|
|
|
|
JSONObject jsonTree = tempTree.getJSONObject(n);
|
|
|
|
|
String nodeId = jsonTree.getString("nodeId");
|
|
|
|
|
String nodeName = jsonTree.getString("nodeName");
|
|
|
|
|
Boolean isLeaf = jsonTree.getBoolean("isLeaf");
|
|
|
|
|
String parentValue = jsonTree.getString("parentValue");
|
|
|
|
|
int is_leaf = 0;
|
|
|
|
|
if (isLeaf) {
|
|
|
|
|
is_leaf = 1;
|
|
|
|
|
}
|
|
|
|
|
Record rStructure = new Record();
|
|
|
|
|
rStructure.set("node_id", nodeId);
|
|
|
|
|
rStructure.set("node_name", nodeName);
|
|
|
|
|
rStructure.set("parent_id", parentValue);
|
|
|
|
|
rStructure.set("is_leaf", is_leaf);
|
|
|
|
|
rStructure.set("book_id", businessBookId);
|
|
|
|
|
rStructure.set("scheme_id", businessEditionId);
|
|
|
|
|
rStructure.set("subject_id", subjectCode);
|
|
|
|
|
rStructure.set("stage_id", key);
|
|
|
|
|
rStructure.set("id", UUID.randomUUID().toString());
|
|
|
|
|
structureList.add(rStructure);
|
|
|
|
|
|
|
|
|
|
JSONObject argSource = new JSONObject();
|
|
|
|
|
argSource.put("pageNum", 1);
|
|
|
|
|
argSource.put("pageSize", 100);
|
|
|
|
|
argSource.put("businessBookId", businessBookId);
|
|
|
|
|
argSource.put("nodeId", nodeId);
|
|
|
|
|
argSource.put("stageCode", key);
|
|
|
|
|
argSource.put("subjectCode", subjectCode);
|
|
|
|
|
|
|
|
|
|
argSource.put("excellentFlag", "");
|
|
|
|
|
argSource.put("nodeType", 1);
|
|
|
|
|
argSource.put("sortType", 2);
|
|
|
|
|
argSource.put("source", "");
|
|
|
|
|
argSource.put("searchKeyword", "");
|
|
|
|
|
|
|
|
|
|
String respSource = Util.doPost("https://yx.ccsjy.cn/api/cloud-school/v1/cloudLesson/getOnDemandLessonPage", argSource.toString());
|
|
|
|
|
JSONObject jsonObjSource = JSONObject.parseObject(respSource);
|
|
|
|
|
JSONArray jsonArrSource = jsonObjSource.getJSONObject("data").getJSONArray("rows");
|
|
|
|
|
|
|
|
|
|
print("正在爬取:【" + value + "," + subjectName + "," + editionName + "," + bookName + "," + nodeName + "】下的资源!");
|
|
|
|
|
|
|
|
|
|
for (int m = 0; m < jsonArrSource.size(); m++) {
|
|
|
|
|
JSONObject jsonSource = jsonArrSource.getJSONObject(m);
|
|
|
|
|
String lessonId = jsonSource.getString("lessonId");
|
|
|
|
|
String lessonName = jsonSource.getString("lessonName");
|
|
|
|
|
String teacherSchoolId = jsonSource.getString("teacherSchoolId");
|
|
|
|
|
String teacherSchoolName = jsonSource.getString("teacherSchoolName");
|
|
|
|
|
String teacherId = jsonSource.getString("teacherId");
|
|
|
|
|
String teacherName = jsonSource.getString("teacherName");
|
|
|
|
|
Record record = new Record();
|
|
|
|
|
record.set("lesson_id", lessonId);
|
|
|
|
|
record.set("lesson_name", lessonName);
|
|
|
|
|
record.set("node_id", nodeId);
|
|
|
|
|
record.set("teacher_id", teacherId);
|
|
|
|
|
record.set("teacher_school_id", teacherSchoolId);
|
|
|
|
|
record.set("teacher_school_name", teacherSchoolName);
|
|
|
|
|
record.set("teacher_name", teacherName);
|
|
|
|
|
record.set("book_id", businessBookId);
|
|
|
|
|
record.set("scheme_id", businessEditionId);
|
|
|
|
|
record.set("subject_id", subjectCode);
|
|
|
|
|
record.set("stage_id", key);
|
|
|
|
|
record.set("id", UUID.randomUUID().toString());
|
|
|
|
|
lessonList.add(record);
|
|
|
|
|
}
|
|
|
|
|
print("已收集资源数量:"+lessonList.size()+"个。");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
print("开始保存数据...");
|
|
|
|
|
Db.batchSave("t_crawler_subject", subjectList, 300);
|
|
|
|
|
Db.batchSave("t_crawler_scheme", schemeList, 300);
|
|
|
|
|
Db.batchSave("t_crawler_book", bookList, 300);
|
|
|
|
|
Db.batchSave("t_crawler_structure", structureList, 300);
|
|
|
|
|
Db.batchSave("t_crawler_lesson", lessonList, 300);
|
|
|
|
|
print("爬取数据完成!");
|
|
|
|
|
}
|
|
|
|
|
}
|