|
|
|
@ -3,25 +3,41 @@ package Tools.Crawler;
|
|
|
|
|
import cn.hutool.core.date.DateTime;
|
|
|
|
|
import com.alibaba.fastjson.JSONArray;
|
|
|
|
|
import com.alibaba.fastjson.JSONObject;
|
|
|
|
|
import com.dsideal.QingLong.Start;
|
|
|
|
|
import com.jfinal.kit.PropKit;
|
|
|
|
|
import com.jfinal.plugin.activerecord.ActiveRecordPlugin;
|
|
|
|
|
import com.jfinal.plugin.activerecord.CaseInsensitiveContainerFactory;
|
|
|
|
|
import com.jfinal.plugin.activerecord.Db;
|
|
|
|
|
import com.jfinal.plugin.activerecord.Record;
|
|
|
|
|
import com.jfinal.plugin.activerecord.dialect.PostgreSqlDialect;
|
|
|
|
|
import com.jfinal.plugin.hikaricp.HikariCpPlugin;
|
|
|
|
|
import com.jfinal.plugin.activerecord.Record;
|
|
|
|
|
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.io.File;
|
|
|
|
|
import java.util.HashMap;
|
|
|
|
|
import java.util.Map;
|
|
|
|
|
import java.util.UUID;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
import java.util.concurrent.*;
|
|
|
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
|
|
|
|
|
|
|
|
// 爬取一次大约需要20分钟
|
|
|
|
|
public class YunXiao {
|
|
|
|
|
public static JSONArray tempTree = new JSONArray();
|
|
|
|
|
// 使用线程安全的集合
|
|
|
|
|
private static final CopyOnWriteArrayList<Record> subjectList = new CopyOnWriteArrayList<>();
|
|
|
|
|
private static final CopyOnWriteArrayList<Record> lessonList = new CopyOnWriteArrayList<>();
|
|
|
|
|
private static final CopyOnWriteArrayList<Record> schemeList = new CopyOnWriteArrayList<>();
|
|
|
|
|
private static final CopyOnWriteArrayList<Record> bookList = new CopyOnWriteArrayList<>();
|
|
|
|
|
private static final CopyOnWriteArrayList<Record> structureList = new CopyOnWriteArrayList<>();
|
|
|
|
|
|
|
|
|
|
// 用于记录进度
|
|
|
|
|
private static final AtomicInteger resourceCount = new AtomicInteger(0);
|
|
|
|
|
|
|
|
|
|
public static void traverseTree(JSONArray treeArray) {
|
|
|
|
|
//线程池大小
|
|
|
|
|
private static final int ThreadCount = 4;
|
|
|
|
|
//最大重试次数
|
|
|
|
|
private static final int MaxRetryCount = 3;
|
|
|
|
|
//每次写入的数量
|
|
|
|
|
private static final int batchSize = 300;
|
|
|
|
|
|
|
|
|
|
public static void traverseTree(JSONArray treeArray, JSONArray tempTree) {
|
|
|
|
|
for (Object item : treeArray) {
|
|
|
|
|
JSONObject node = (JSONObject) item;
|
|
|
|
|
JSONObject jsonObj = new JSONObject();
|
|
|
|
@ -31,43 +47,333 @@ public class YunXiao {
|
|
|
|
|
jsonObj.put("parentValue", node.getString("parentValue"));
|
|
|
|
|
tempTree.add(jsonObj);
|
|
|
|
|
|
|
|
|
|
// 判断是否有children并且不为空
|
|
|
|
|
if (node.containsKey("children") && !node.getJSONArray("children").isEmpty()) {
|
|
|
|
|
JSONArray children = node.getJSONArray("children");
|
|
|
|
|
traverseTree(children);
|
|
|
|
|
traverseTree(node.getJSONArray("children"), tempTree);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 输出信息
|
|
|
|
|
*
|
|
|
|
|
* @param msg
|
|
|
|
|
*/
|
|
|
|
|
public static void print(String msg) {
|
|
|
|
|
//先输出时间,再输出内容
|
|
|
|
|
System.out.println(DateTime.now() + " " + msg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 带有重试机制的请求方法
|
|
|
|
|
*
|
|
|
|
|
* @param url
|
|
|
|
|
* @param jsonBody
|
|
|
|
|
* @param isGet
|
|
|
|
|
* @param maxRetries
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
private static String doRequestWithRetry(String url, String jsonBody, boolean isGet, int maxRetries) {
|
|
|
|
|
int retries = 0;
|
|
|
|
|
while (retries < maxRetries) {
|
|
|
|
|
try {
|
|
|
|
|
if (isGet) {
|
|
|
|
|
return Util.doGet(url);
|
|
|
|
|
} else {
|
|
|
|
|
return Util.doPost(url, jsonBody);
|
|
|
|
|
}
|
|
|
|
|
} catch (Exception e) {
|
|
|
|
|
retries++;
|
|
|
|
|
if (retries == maxRetries) {
|
|
|
|
|
print("请求失败: " + url + " 重试次数: " + retries);
|
|
|
|
|
throw new RuntimeException("Failed after " + maxRetries + " retries", e);
|
|
|
|
|
}
|
|
|
|
|
try {
|
|
|
|
|
Thread.sleep(1000);
|
|
|
|
|
} catch (InterruptedException ie) {
|
|
|
|
|
Thread.currentThread().interrupt();
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 处理科目
|
|
|
|
|
*
|
|
|
|
|
* @param subject
|
|
|
|
|
* @param key
|
|
|
|
|
* @param value
|
|
|
|
|
*/
|
|
|
|
|
private static void processSubject(JSONObject subject, String key, String value) throws InterruptedException {
|
|
|
|
|
String subjectCode = subject.getString("subjectCode");
|
|
|
|
|
String subjectName = subject.getString("subjectName");
|
|
|
|
|
|
|
|
|
|
Record rSubject = new Record().set("subject_id", subjectCode).set("subject_name", subjectName).set("stage_id", key);
|
|
|
|
|
subjectList.add(rSubject);
|
|
|
|
|
|
|
|
|
|
JSONObject argScheme = new JSONObject();
|
|
|
|
|
argScheme.put("subjectCode", subjectCode);
|
|
|
|
|
argScheme.put("systemId", 1);
|
|
|
|
|
|
|
|
|
|
String respScheme = doRequestWithRetry("https://yx.ccsjy.cn/api/business/v1/edition/list",
|
|
|
|
|
argScheme.toString(), false, MaxRetryCount);
|
|
|
|
|
|
|
|
|
|
if (respScheme != null) {
|
|
|
|
|
//处理科目后,处理此科目下的版本
|
|
|
|
|
processSchemes(JSONObject.parseObject(respScheme).getJSONArray("data"), subjectCode, key, value);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 处理版本
|
|
|
|
|
*
|
|
|
|
|
* @param schemes
|
|
|
|
|
* @param subjectCode
|
|
|
|
|
* @param key
|
|
|
|
|
* @param value
|
|
|
|
|
*/
|
|
|
|
|
private static void processSchemes(JSONArray schemes, String subjectCode, String key, String value) throws InterruptedException {
|
|
|
|
|
ExecutorService schemeExecutor = Executors.newFixedThreadPool(ThreadCount);
|
|
|
|
|
CountDownLatch schemeLatch = new CountDownLatch(schemes.size());
|
|
|
|
|
|
|
|
|
|
for (int j = 0; j < schemes.size(); j++) {
|
|
|
|
|
JSONObject scheme = schemes.getJSONObject(j);
|
|
|
|
|
schemeExecutor.submit(() -> {
|
|
|
|
|
try {
|
|
|
|
|
processScheme(scheme, subjectCode, key, value);
|
|
|
|
|
} catch (InterruptedException e) {
|
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
|
} finally {
|
|
|
|
|
schemeLatch.countDown();
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
schemeLatch.await();
|
|
|
|
|
schemeExecutor.shutdown();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 处理某一个版本
|
|
|
|
|
*
|
|
|
|
|
* @param scheme
|
|
|
|
|
* @param subjectCode
|
|
|
|
|
* @param key
|
|
|
|
|
* @param value
|
|
|
|
|
*/
|
|
|
|
|
private static void processScheme(JSONObject scheme, String subjectCode, String key, String value) throws InterruptedException {
|
|
|
|
|
String businessEditionId = scheme.getString("businessEditionId");
|
|
|
|
|
String editionName = scheme.getString("editionName");
|
|
|
|
|
|
|
|
|
|
Record rScheme = new Record().set("scheme_id", businessEditionId).set("scheme_name", editionName).set("subject_id", subjectCode)
|
|
|
|
|
.set("stage_id", key).set("id", UUID.randomUUID().toString());
|
|
|
|
|
schemeList.add(rScheme);
|
|
|
|
|
|
|
|
|
|
JSONObject argBook = new JSONObject();
|
|
|
|
|
argBook.put("stageCode", key);
|
|
|
|
|
argBook.put("subjectCode", subjectCode);
|
|
|
|
|
argBook.put("businessEditionId", businessEditionId);
|
|
|
|
|
|
|
|
|
|
String respBook = doRequestWithRetry("https://yx.ccsjy.cn/api/business/v1/book/list", argBook.toString(), false, 3);
|
|
|
|
|
if (respBook != null) {
|
|
|
|
|
JSONArray books = JSONObject.parseObject(respBook).getJSONArray("data");
|
|
|
|
|
// 使用线程池处理每本书
|
|
|
|
|
ExecutorService bookExecutor = Executors.newFixedThreadPool(ThreadCount);
|
|
|
|
|
CountDownLatch bookLatch = new CountDownLatch(books.size());
|
|
|
|
|
for (int k = 0; k < books.size(); k++) {
|
|
|
|
|
JSONObject book = books.getJSONObject(k);
|
|
|
|
|
bookExecutor.submit(() -> {
|
|
|
|
|
try {
|
|
|
|
|
processBook(book, businessEditionId, subjectCode, key, value, editionName);
|
|
|
|
|
} catch (InterruptedException e) {
|
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
|
} finally {
|
|
|
|
|
bookLatch.countDown();
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
bookLatch.await();
|
|
|
|
|
bookExecutor.shutdown();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 处理册
|
|
|
|
|
*
|
|
|
|
|
* @param book
|
|
|
|
|
* @param businessEditionId
|
|
|
|
|
* @param subjectCode
|
|
|
|
|
* @param key
|
|
|
|
|
* @param value
|
|
|
|
|
* @param editionName
|
|
|
|
|
*/
|
|
|
|
|
private static void processBook(JSONObject book, String businessEditionId, String subjectCode,
|
|
|
|
|
String key, String value, String editionName) throws InterruptedException {
|
|
|
|
|
String businessBookId = book.getString("businessBookId");
|
|
|
|
|
String bookName = book.getString("bookName");
|
|
|
|
|
|
|
|
|
|
Record rBook = new Record().set("book_id", businessBookId)
|
|
|
|
|
.set("book_name", bookName).set("scheme_id", businessEditionId)
|
|
|
|
|
.set("subject_id", subjectCode).set("stage_id", key)
|
|
|
|
|
.set("id", UUID.randomUUID().toString());
|
|
|
|
|
bookList.add(rBook);
|
|
|
|
|
|
|
|
|
|
JSONObject argTree = new JSONObject();
|
|
|
|
|
argTree.put("businessBookId", businessBookId);
|
|
|
|
|
argTree.put("childrensFlag", 1);
|
|
|
|
|
argTree.put("parentId", -1);
|
|
|
|
|
argTree.put("searchKeyword", "");
|
|
|
|
|
|
|
|
|
|
String respTree = doRequestWithRetry("https://yx.ccsjy.cn/api/business/v1/chapter/tree",
|
|
|
|
|
argTree.toString(), false, MaxRetryCount);
|
|
|
|
|
|
|
|
|
|
if (respTree != null) {
|
|
|
|
|
JSONArray jsonArrTree = JSONObject.parseObject(respTree)
|
|
|
|
|
.getJSONObject("data")
|
|
|
|
|
.getJSONArray("tree");
|
|
|
|
|
|
|
|
|
|
JSONArray tempTree = new JSONArray();
|
|
|
|
|
traverseTree(jsonArrTree, tempTree);
|
|
|
|
|
|
|
|
|
|
// 使用线程池处理每个节点
|
|
|
|
|
ExecutorService nodeExecutor = Executors.newFixedThreadPool(ThreadCount);
|
|
|
|
|
CountDownLatch nodeLatch = new CountDownLatch(tempTree.size());
|
|
|
|
|
|
|
|
|
|
for (int n = 0; n < tempTree.size(); n++) {
|
|
|
|
|
JSONObject jsonTree = tempTree.getJSONObject(n);
|
|
|
|
|
nodeExecutor.submit(() -> {
|
|
|
|
|
try {
|
|
|
|
|
processNode(jsonTree, businessBookId, businessEditionId, subjectCode,
|
|
|
|
|
key, value, editionName, bookName);
|
|
|
|
|
} finally {
|
|
|
|
|
nodeLatch.countDown();
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
nodeLatch.await();
|
|
|
|
|
nodeExecutor.shutdown();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 处理节点
|
|
|
|
|
*
|
|
|
|
|
* @param jsonTree
|
|
|
|
|
* @param businessBookId
|
|
|
|
|
* @param businessEditionId
|
|
|
|
|
* @param subjectCode
|
|
|
|
|
* @param key
|
|
|
|
|
* @param value
|
|
|
|
|
* @param editionName
|
|
|
|
|
* @param bookName
|
|
|
|
|
*/
|
|
|
|
|
private static void processNode(JSONObject jsonTree, String businessBookId, String businessEditionId,
|
|
|
|
|
String subjectCode, String key, String value, String editionName,
|
|
|
|
|
String bookName) {
|
|
|
|
|
String nodeId = jsonTree.getString("nodeId");
|
|
|
|
|
String nodeName = jsonTree.getString("nodeName");
|
|
|
|
|
Boolean isLeaf = jsonTree.getBoolean("isLeaf");
|
|
|
|
|
String parentValue = jsonTree.getString("parentValue");
|
|
|
|
|
|
|
|
|
|
Record rStructure = new Record()
|
|
|
|
|
.set("node_id", nodeId).set("node_name", nodeName).set("parent_id", parentValue)
|
|
|
|
|
.set("is_leaf", isLeaf ? 1 : 0).set("book_id", businessBookId)
|
|
|
|
|
.set("scheme_id", businessEditionId).set("subject_id", subjectCode)
|
|
|
|
|
.set("stage_id", key).set("id", UUID.randomUUID().toString());
|
|
|
|
|
structureList.add(rStructure);
|
|
|
|
|
processResource(nodeId, businessBookId, subjectCode, key, value,
|
|
|
|
|
editionName, bookName, nodeName);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 处理资源
|
|
|
|
|
*
|
|
|
|
|
* @param nodeId
|
|
|
|
|
* @param businessBookId
|
|
|
|
|
* @param subjectCode
|
|
|
|
|
* @param key
|
|
|
|
|
* @param value
|
|
|
|
|
* @param editionName
|
|
|
|
|
* @param bookName
|
|
|
|
|
* @param nodeName
|
|
|
|
|
*/
|
|
|
|
|
private static void processResource(String nodeId, String businessBookId, String subjectCode,
|
|
|
|
|
String key, String value, String editionName, String bookName, String nodeName) {
|
|
|
|
|
JSONObject argSource = new JSONObject();
|
|
|
|
|
argSource.put("pageNum", 1);
|
|
|
|
|
argSource.put("pageSize", 100);
|
|
|
|
|
argSource.put("businessBookId", businessBookId);
|
|
|
|
|
argSource.put("nodeId", nodeId);
|
|
|
|
|
argSource.put("stageCode", key);
|
|
|
|
|
argSource.put("subjectCode", subjectCode);
|
|
|
|
|
argSource.put("excellentFlag", "");
|
|
|
|
|
argSource.put("nodeType", 1);
|
|
|
|
|
argSource.put("sortType", 2);
|
|
|
|
|
argSource.put("source", "");
|
|
|
|
|
argSource.put("searchKeyword", "");
|
|
|
|
|
|
|
|
|
|
String respSource = doRequestWithRetry("https://yx.ccsjy.cn/api/cloud-school/v1/cloudLesson/getOnDemandLessonPage",
|
|
|
|
|
argSource.toString(), false, MaxRetryCount);
|
|
|
|
|
|
|
|
|
|
if (respSource != null) {
|
|
|
|
|
JSONObject jsonObjSource = JSONObject.parseObject(respSource);
|
|
|
|
|
JSONArray jsonArrSource = jsonObjSource.getJSONObject("data").getJSONArray("rows");
|
|
|
|
|
|
|
|
|
|
print("正在爬取:【" + value + "," + editionName + "," + bookName + "," + nodeName + "】下的资源!");
|
|
|
|
|
|
|
|
|
|
for (int m = 0; m < jsonArrSource.size(); m++) {
|
|
|
|
|
JSONObject jsonSource = jsonArrSource.getJSONObject(m);
|
|
|
|
|
Record record = new Record()
|
|
|
|
|
.set("lesson_id", jsonSource.getString("lessonId"))
|
|
|
|
|
.set("lesson_name", jsonSource.getString("lessonName"))
|
|
|
|
|
.set("node_id", nodeId)
|
|
|
|
|
.set("teacher_id", jsonSource.getString("teacherId"))
|
|
|
|
|
.set("teacher_school_id", jsonSource.getString("teacherSchoolId"))
|
|
|
|
|
.set("teacher_school_name", jsonSource.getString("teacherSchoolName"))
|
|
|
|
|
.set("teacher_name", jsonSource.getString("teacherName"))
|
|
|
|
|
.set("book_id", businessBookId)
|
|
|
|
|
.set("scheme_id", editionName)
|
|
|
|
|
.set("subject_id", subjectCode)
|
|
|
|
|
.set("stage_id", key)
|
|
|
|
|
.set("id", UUID.randomUUID().toString());
|
|
|
|
|
lessonList.add(record);
|
|
|
|
|
int count = resourceCount.incrementAndGet();
|
|
|
|
|
if (count % 100 == 0) {
|
|
|
|
|
print("已收集资源数量:" + count + "个。");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
|
|
// 初始化数据库连接
|
|
|
|
|
PropKit.use("application.properties");
|
|
|
|
|
HikariCpPlugin hp = new HikariCpPlugin(PropKit.get("jdbcUrl"), PropKit.get("user"),
|
|
|
|
|
PropKit.get("password").trim(), PropKit.get("driverClassName"));
|
|
|
|
|
hp.start();
|
|
|
|
|
// 配置ActiveRecord插件
|
|
|
|
|
ActiveRecordPlugin arp = new ActiveRecordPlugin(hp);
|
|
|
|
|
//配置默认小写
|
|
|
|
|
arp.setContainerFactory(new CaseInsensitiveContainerFactory(true));
|
|
|
|
|
arp.setDialect(new PostgreSqlDialect());
|
|
|
|
|
|
|
|
|
|
String basePath = Start.class.getClassLoader().getResource(".").getPath();
|
|
|
|
|
File sqlDir = new File(basePath + "/Sql");
|
|
|
|
|
File[] sqlFiles = sqlDir.listFiles();
|
|
|
|
|
File[] var10 = sqlFiles != null ? sqlFiles : new File[0];
|
|
|
|
|
int var11 = var10.length;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < var11; ++i) {
|
|
|
|
|
File sqlFile = var10[i];
|
|
|
|
|
if (sqlFile.getName().indexOf(".sql") > 0) {
|
|
|
|
|
arp.addSqlTemplate("/Sql/" + sqlFile.getName());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
arp.start();
|
|
|
|
|
|
|
|
|
|
//清空爬虫表
|
|
|
|
|
String truncatSql = "truncate table t_crawler_subject";
|
|
|
|
|
Db.update(truncatSql);
|
|
|
|
|
truncatSql = "truncate table t_crawler_scheme";
|
|
|
|
|
Db.update(truncatSql);
|
|
|
|
|
truncatSql = "truncate table t_crawler_book";
|
|
|
|
|
Db.update(truncatSql);
|
|
|
|
|
truncatSql = "truncate table t_crawler_structure";
|
|
|
|
|
Db.update(truncatSql);
|
|
|
|
|
truncatSql = "truncate table t_crawler_resource";
|
|
|
|
|
Db.update(truncatSql);
|
|
|
|
|
// 清空表
|
|
|
|
|
String[] tables = {"t_crawler_subject", "t_crawler_scheme", "t_crawler_book",
|
|
|
|
|
"t_crawler_structure", "t_crawler_resource"};
|
|
|
|
|
for (String table : tables) {
|
|
|
|
|
Db.update("truncate table " + table);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Map<String, String> map = new HashMap<>();
|
|
|
|
|
map.put("1", "学前");
|
|
|
|
@ -76,162 +382,69 @@ public class YunXiao {
|
|
|
|
|
map.put("4", "高中");
|
|
|
|
|
|
|
|
|
|
print("开始爬取数据!");
|
|
|
|
|
//资源写入的数据集
|
|
|
|
|
List<Record> subjectList = new ArrayList<>();
|
|
|
|
|
List<Record> lessonList = new ArrayList<>();
|
|
|
|
|
List<Record> schemeList = new ArrayList<>();
|
|
|
|
|
List<Record> bookList = new ArrayList<>();
|
|
|
|
|
List<Record> structureList = new ArrayList<>();
|
|
|
|
|
|
|
|
|
|
//记录开始时间
|
|
|
|
|
long startTime = System.currentTimeMillis();
|
|
|
|
|
|
|
|
|
|
// 创建线程池
|
|
|
|
|
ExecutorService executorService = Executors.newFixedThreadPool(10);
|
|
|
|
|
CountDownLatch mainLatch = new CountDownLatch(map.size());
|
|
|
|
|
|
|
|
|
|
map.forEach((key, value) -> {
|
|
|
|
|
String respSubject = Util.doGet("https://yx.ccsjy.cn/api/business/v1/subject/list/" + key);
|
|
|
|
|
JSONObject jsonObj = JSONObject.parseObject(respSubject);
|
|
|
|
|
JSONArray jsonArr = jsonObj.getJSONObject("data").getJSONArray("rows");
|
|
|
|
|
for (int i = 0; i < jsonArr.size(); i++) {
|
|
|
|
|
JSONObject jsonSubject = jsonArr.getJSONObject(i);
|
|
|
|
|
String subjectCode = jsonSubject.getString("subjectCode");
|
|
|
|
|
String subjectName = jsonSubject.getString("subjectName");
|
|
|
|
|
|
|
|
|
|
Record rSubject = new Record();
|
|
|
|
|
rSubject.set("subject_id", subjectCode);
|
|
|
|
|
rSubject.set("subject_name", subjectName);
|
|
|
|
|
rSubject.set("stage_id", key);
|
|
|
|
|
subjectList.add(rSubject);
|
|
|
|
|
|
|
|
|
|
JSONObject argScheme = new JSONObject();
|
|
|
|
|
argScheme.put("subjectCode", subjectCode);
|
|
|
|
|
argScheme.put("systemId", 1);
|
|
|
|
|
|
|
|
|
|
String respScheme = Util.doPost("https://yx.ccsjy.cn/api/business/v1/edition/list", argScheme.toString());
|
|
|
|
|
JSONObject jsonObjScheme = JSONObject.parseObject(respScheme);
|
|
|
|
|
JSONArray jsonArrScheme = jsonObjScheme.getJSONArray("data");
|
|
|
|
|
|
|
|
|
|
for (int j = 0; j < jsonArrScheme.size(); j++) {
|
|
|
|
|
JSONObject jsonScheme = jsonArrScheme.getJSONObject(j);
|
|
|
|
|
String businessEditionId = jsonScheme.getString("businessEditionId");
|
|
|
|
|
String editionName = jsonScheme.getString("editionName");
|
|
|
|
|
|
|
|
|
|
Record rScheme = new Record();
|
|
|
|
|
rScheme.set("scheme_id", businessEditionId);
|
|
|
|
|
rScheme.set("scheme_name", editionName);
|
|
|
|
|
rScheme.set("subject_id", subjectCode);
|
|
|
|
|
rScheme.set("stage_id", key);
|
|
|
|
|
rScheme.set("id", UUID.randomUUID().toString());
|
|
|
|
|
schemeList.add(rScheme);
|
|
|
|
|
|
|
|
|
|
JSONObject argBook = new JSONObject();
|
|
|
|
|
argBook.put("stageCode", key);
|
|
|
|
|
argBook.put("subjectCode", subjectCode);
|
|
|
|
|
argBook.put("businessEditionId", businessEditionId);
|
|
|
|
|
|
|
|
|
|
String respBook = Util.doPost("https://yx.ccsjy.cn/api/business/v1/book/list", argBook.toString());
|
|
|
|
|
JSONObject jsonObjBook = JSONObject.parseObject(respBook);
|
|
|
|
|
JSONArray jsonArrBook = jsonObjBook.getJSONArray("data");
|
|
|
|
|
|
|
|
|
|
for (int k = 0; k < jsonArrBook.size(); k++) {
|
|
|
|
|
JSONObject jsonBook = jsonArrBook.getJSONObject(k);
|
|
|
|
|
String businessBookId = jsonBook.getString("businessBookId");
|
|
|
|
|
String bookName = jsonBook.getString("bookName");
|
|
|
|
|
|
|
|
|
|
Record rBook = new Record();
|
|
|
|
|
rBook.set("book_id", businessBookId);
|
|
|
|
|
rBook.set("book_name", bookName);
|
|
|
|
|
rBook.set("scheme_id", businessEditionId);
|
|
|
|
|
rBook.set("subject_id", subjectCode);
|
|
|
|
|
rBook.set("stage_id", key);
|
|
|
|
|
rBook.set("id", UUID.randomUUID().toString());
|
|
|
|
|
bookList.add(rBook);
|
|
|
|
|
|
|
|
|
|
JSONObject argTree = new JSONObject();
|
|
|
|
|
argTree.put("businessBookId", businessBookId);
|
|
|
|
|
argTree.put("childrensFlag", 1);
|
|
|
|
|
argTree.put("parentId", -1);
|
|
|
|
|
argTree.put("searchKeyword", "");
|
|
|
|
|
|
|
|
|
|
String respTree = Util.doPost("https://yx.ccsjy.cn/api/business/v1/chapter/tree", argTree.toString());
|
|
|
|
|
JSONObject jsonObjTree = JSONObject.parseObject(respTree);
|
|
|
|
|
JSONArray jsonArrTree = jsonObjTree.getJSONObject("data").getJSONArray("tree");
|
|
|
|
|
|
|
|
|
|
tempTree = new JSONArray();
|
|
|
|
|
traverseTree(jsonArrTree);
|
|
|
|
|
|
|
|
|
|
for (int n = 0; n < tempTree.size(); n++) {
|
|
|
|
|
JSONObject jsonTree = tempTree.getJSONObject(n);
|
|
|
|
|
String nodeId = jsonTree.getString("nodeId");
|
|
|
|
|
String nodeName = jsonTree.getString("nodeName");
|
|
|
|
|
Boolean isLeaf = jsonTree.getBoolean("isLeaf");
|
|
|
|
|
String parentValue = jsonTree.getString("parentValue");
|
|
|
|
|
int is_leaf = 0;
|
|
|
|
|
if (isLeaf) {
|
|
|
|
|
is_leaf = 1;
|
|
|
|
|
}
|
|
|
|
|
Record rStructure = new Record();
|
|
|
|
|
rStructure.set("node_id", nodeId);
|
|
|
|
|
rStructure.set("node_name", nodeName);
|
|
|
|
|
rStructure.set("parent_id", parentValue);
|
|
|
|
|
rStructure.set("is_leaf", is_leaf);
|
|
|
|
|
rStructure.set("book_id", businessBookId);
|
|
|
|
|
rStructure.set("scheme_id", businessEditionId);
|
|
|
|
|
rStructure.set("subject_id", subjectCode);
|
|
|
|
|
rStructure.set("stage_id", key);
|
|
|
|
|
rStructure.set("id", UUID.randomUUID().toString());
|
|
|
|
|
structureList.add(rStructure);
|
|
|
|
|
|
|
|
|
|
JSONObject argSource = new JSONObject();
|
|
|
|
|
argSource.put("pageNum", 1);
|
|
|
|
|
argSource.put("pageSize", 100);
|
|
|
|
|
argSource.put("businessBookId", businessBookId);
|
|
|
|
|
argSource.put("nodeId", nodeId);
|
|
|
|
|
argSource.put("stageCode", key);
|
|
|
|
|
argSource.put("subjectCode", subjectCode);
|
|
|
|
|
|
|
|
|
|
argSource.put("excellentFlag", "");
|
|
|
|
|
argSource.put("nodeType", 1);
|
|
|
|
|
argSource.put("sortType", 2);
|
|
|
|
|
argSource.put("source", "");
|
|
|
|
|
argSource.put("searchKeyword", "");
|
|
|
|
|
|
|
|
|
|
String respSource = Util.doPost("https://yx.ccsjy.cn/api/cloud-school/v1/cloudLesson/getOnDemandLessonPage", argSource.toString());
|
|
|
|
|
JSONObject jsonObjSource = JSONObject.parseObject(respSource);
|
|
|
|
|
JSONArray jsonArrSource = jsonObjSource.getJSONObject("data").getJSONArray("rows");
|
|
|
|
|
|
|
|
|
|
print("正在爬取:【" + value + "," + subjectName + "," + editionName + "," + bookName + "," + nodeName + "】下的资源!");
|
|
|
|
|
|
|
|
|
|
for (int m = 0; m < jsonArrSource.size(); m++) {
|
|
|
|
|
JSONObject jsonSource = jsonArrSource.getJSONObject(m);
|
|
|
|
|
String lessonId = jsonSource.getString("lessonId");
|
|
|
|
|
String lessonName = jsonSource.getString("lessonName");
|
|
|
|
|
String teacherSchoolId = jsonSource.getString("teacherSchoolId");
|
|
|
|
|
String teacherSchoolName = jsonSource.getString("teacherSchoolName");
|
|
|
|
|
String teacherId = jsonSource.getString("teacherId");
|
|
|
|
|
String teacherName = jsonSource.getString("teacherName");
|
|
|
|
|
Record record = new Record();
|
|
|
|
|
record.set("lesson_id", lessonId);
|
|
|
|
|
record.set("lesson_name", lessonName);
|
|
|
|
|
record.set("node_id", nodeId);
|
|
|
|
|
record.set("teacher_id", teacherId);
|
|
|
|
|
record.set("teacher_school_id", teacherSchoolId);
|
|
|
|
|
record.set("teacher_school_name", teacherSchoolName);
|
|
|
|
|
record.set("teacher_name", teacherName);
|
|
|
|
|
record.set("book_id", businessBookId);
|
|
|
|
|
record.set("scheme_id", businessEditionId);
|
|
|
|
|
record.set("subject_id", subjectCode);
|
|
|
|
|
record.set("stage_id", key);
|
|
|
|
|
record.set("id", UUID.randomUUID().toString());
|
|
|
|
|
lessonList.add(record);
|
|
|
|
|
}
|
|
|
|
|
print("已收集资源数量:"+lessonList.size()+"个。");
|
|
|
|
|
executorService.submit(() -> {
|
|
|
|
|
try {
|
|
|
|
|
String respSubject = doRequestWithRetry("https://yx.ccsjy.cn/api/business/v1/subject/list/" + key,
|
|
|
|
|
null, true, MaxRetryCount);
|
|
|
|
|
if (respSubject != null) {
|
|
|
|
|
JSONObject jsonObj = JSONObject.parseObject(respSubject);
|
|
|
|
|
JSONArray subjects = jsonObj.getJSONObject("data").getJSONArray("rows");
|
|
|
|
|
|
|
|
|
|
ExecutorService subjectExecutor = Executors.newFixedThreadPool(ThreadCount);
|
|
|
|
|
CountDownLatch subjectLatch = new CountDownLatch(subjects.size());
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < subjects.size(); i++) {
|
|
|
|
|
JSONObject subject = subjects.getJSONObject(i);
|
|
|
|
|
subjectExecutor.submit(() -> {
|
|
|
|
|
try {
|
|
|
|
|
processSubject(subject, key, value);
|
|
|
|
|
} catch (InterruptedException e) {
|
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
|
} finally {
|
|
|
|
|
subjectLatch.countDown();
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
subjectLatch.await();
|
|
|
|
|
subjectExecutor.shutdown();
|
|
|
|
|
}
|
|
|
|
|
} catch (Exception e) {
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
} finally {
|
|
|
|
|
mainLatch.countDown();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
mainLatch.await();
|
|
|
|
|
} catch (InterruptedException e) {
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
}
|
|
|
|
|
executorService.shutdown();
|
|
|
|
|
// 保存数据
|
|
|
|
|
print("开始保存数据...");
|
|
|
|
|
Db.batchSave("t_crawler_subject", subjectList, 300);
|
|
|
|
|
Db.batchSave("t_crawler_scheme", schemeList, 300);
|
|
|
|
|
Db.batchSave("t_crawler_book", bookList, 300);
|
|
|
|
|
Db.batchSave("t_crawler_structure", structureList, 300);
|
|
|
|
|
Db.batchSave("t_crawler_resource", lessonList, 300);
|
|
|
|
|
Db.batchSave("t_crawler_subject", subjectList, batchSize);
|
|
|
|
|
Db.batchSave("t_crawler_scheme", schemeList, batchSize);
|
|
|
|
|
Db.batchSave("t_crawler_book", bookList, batchSize);
|
|
|
|
|
Db.batchSave("t_crawler_structure", structureList, batchSize);
|
|
|
|
|
Db.batchSave("t_crawler_resource", lessonList, batchSize);
|
|
|
|
|
|
|
|
|
|
print("爬取数据完成!");
|
|
|
|
|
print("总共收集资源:" + lessonList.size() + "个");
|
|
|
|
|
print("总共收集节点:" + structureList.size() + "个");
|
|
|
|
|
//记录结束时间,并输出两者的差值是多少分钟多少少
|
|
|
|
|
long endTime = System.currentTimeMillis();
|
|
|
|
|
print("爬取数据耗时:" + (endTime - startTime) / 1000 + "秒");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|