|
|
package Tools.Crawler.Util;
|
|
|
|
|
|
import cn.hutool.core.date.DateTime;
|
|
|
import com.alibaba.fastjson.JSONArray;
|
|
|
import com.alibaba.fastjson.JSONObject;
|
|
|
import com.jfinal.kit.Kv;
|
|
|
import com.jfinal.plugin.activerecord.*;
|
|
|
import com.jfinal.plugin.activerecord.Record;
|
|
|
|
|
|
import java.time.LocalDate;
|
|
|
import java.time.LocalDateTime;
|
|
|
import java.time.format.DateTimeFormatter;
|
|
|
import java.util.*;
|
|
|
import java.util.concurrent.*;
|
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
import static Tools.Crawler.Util.Util.doRequestWithRetry;
|
|
|
|
|
|
public class BookLesson {
|
|
|
// 使用线程安全的集合
|
|
|
private static final CopyOnWriteArrayList<Record> subjectList = new CopyOnWriteArrayList<>();
|
|
|
private static final CopyOnWriteArrayList<Record> lessonList = new CopyOnWriteArrayList<>();
|
|
|
private static final CopyOnWriteArrayList<Record> schemeList = new CopyOnWriteArrayList<>();
|
|
|
private static final CopyOnWriteArrayList<Record> bookList = new CopyOnWriteArrayList<>();
|
|
|
private static final CopyOnWriteArrayList<Record> structureList = new CopyOnWriteArrayList<>();
|
|
|
|
|
|
// 用于记录进度
|
|
|
private static final AtomicInteger resourceCount = new AtomicInteger(0);
|
|
|
|
|
|
//线程池大小
|
|
|
private static final int ThreadCount = 4;
|
|
|
//最大重试次数
|
|
|
private static final int MaxRetryCount = 10;
|
|
|
//每次写入的数量
|
|
|
private static final int batchSize = 300;
|
|
|
|
|
|
//标准名称
|
|
|
public static Map<String, Kv> bzSchoolNameList = new HashMap<>();
|
|
|
//手工映射过名称
|
|
|
public static Map<String, Kv> handMatchSchoolList = new HashMap<>();
|
|
|
//哪些课程已经匹配过
|
|
|
public static Set<String> lessonAlreadyMatch = new HashSet<>();
|
|
|
|
|
|
public static void traverseTree(JSONArray treeArray, JSONArray tempTree) {
|
|
|
for (Object item : treeArray) {
|
|
|
JSONObject node = (JSONObject) item;
|
|
|
JSONObject jsonObj = new JSONObject();
|
|
|
jsonObj.put("nodeId", node.getString("key"));
|
|
|
jsonObj.put("nodeName", node.getString("title"));
|
|
|
jsonObj.put("isLeaf", node.getBoolean("isLeaf"));
|
|
|
jsonObj.put("parentValue", node.getString("parentValue"));
|
|
|
tempTree.add(jsonObj);
|
|
|
|
|
|
if (node.containsKey("children") && !node.getJSONArray("children").isEmpty()) {
|
|
|
traverseTree(node.getJSONArray("children"), tempTree);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 输出信息
|
|
|
*
|
|
|
* @param msg
|
|
|
*/
|
|
|
public static void print(String msg) {
|
|
|
System.out.println(DateTime.now() + " " + msg);
|
|
|
}
|
|
|
|
|
|
|
|
|
/**
|
|
|
* 处理科目
|
|
|
*
|
|
|
* @param subject
|
|
|
* @param key
|
|
|
* @param value
|
|
|
*/
|
|
|
private static void processSubject(JSONObject subject, String key, String value) throws InterruptedException {
|
|
|
String subjectCode = subject.getString("subjectCode");
|
|
|
String subjectName = subject.getString("subjectName");
|
|
|
|
|
|
Record rSubject = new Record().set("subject_id", subjectCode).set("subject_name", subjectName).set("stage_id", key);
|
|
|
subjectList.add(rSubject);
|
|
|
|
|
|
JSONObject argScheme = new JSONObject();
|
|
|
argScheme.put("subjectCode", subjectCode);
|
|
|
argScheme.put("systemId", 1);
|
|
|
|
|
|
String respScheme = doRequestWithRetry("https://yx.ccsjy.cn/api/business/v1/edition/list",
|
|
|
argScheme.toString(), false, MaxRetryCount);
|
|
|
|
|
|
if (respScheme != null) {
|
|
|
//处理科目后,处理此科目下的版本
|
|
|
processSchemes(JSONObject.parseObject(respScheme).getJSONArray("data"), subjectCode, key, value);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 处理版本
|
|
|
*
|
|
|
* @param schemes
|
|
|
* @param subjectCode
|
|
|
* @param key
|
|
|
* @param value
|
|
|
*/
|
|
|
private static void processSchemes(JSONArray schemes, String subjectCode, String key, String value) throws InterruptedException {
|
|
|
ExecutorService schemeExecutor = Executors.newFixedThreadPool(ThreadCount);
|
|
|
CountDownLatch schemeLatch = new CountDownLatch(schemes.size());
|
|
|
|
|
|
for (int j = 0; j < schemes.size(); j++) {
|
|
|
JSONObject scheme = schemes.getJSONObject(j);
|
|
|
schemeExecutor.submit(() -> {
|
|
|
try {
|
|
|
processScheme(scheme, subjectCode, key, value);
|
|
|
} catch (InterruptedException e) {
|
|
|
throw new RuntimeException(e);
|
|
|
} finally {
|
|
|
schemeLatch.countDown();
|
|
|
}
|
|
|
});
|
|
|
}
|
|
|
schemeLatch.await();
|
|
|
schemeExecutor.shutdown();
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 处理某一个版本
|
|
|
*
|
|
|
* @param scheme
|
|
|
* @param subjectCode
|
|
|
* @param key
|
|
|
* @param value
|
|
|
*/
|
|
|
private static void processScheme(JSONObject scheme, String subjectCode, String key, String value) throws InterruptedException {
|
|
|
String businessEditionId = scheme.getString("businessEditionId");
|
|
|
String editionName = scheme.getString("editionName");
|
|
|
|
|
|
Record rScheme = new Record().set("scheme_id", businessEditionId).set("scheme_name", editionName).set("subject_id", subjectCode)
|
|
|
.set("stage_id", key).set("id", UUID.randomUUID().toString());
|
|
|
schemeList.add(rScheme);
|
|
|
|
|
|
JSONObject argBook = new JSONObject();
|
|
|
argBook.put("stageCode", key);
|
|
|
argBook.put("subjectCode", subjectCode);
|
|
|
argBook.put("businessEditionId", businessEditionId);
|
|
|
|
|
|
String respBook = doRequestWithRetry("https://yx.ccsjy.cn/api/business/v1/book/list", argBook.toString(), false, 3);
|
|
|
if (respBook != null) {
|
|
|
JSONArray books = JSONObject.parseObject(respBook).getJSONArray("data");
|
|
|
// 使用线程池处理每本书
|
|
|
ExecutorService bookExecutor = Executors.newFixedThreadPool(ThreadCount);
|
|
|
CountDownLatch bookLatch = new CountDownLatch(books.size());
|
|
|
for (int k = 0; k < books.size(); k++) {
|
|
|
JSONObject book = books.getJSONObject(k);
|
|
|
bookExecutor.submit(() -> {
|
|
|
try {
|
|
|
//章节目录
|
|
|
processBook(book, businessEditionId, subjectCode, key, value, editionName);
|
|
|
//知识点
|
|
|
|
|
|
} catch (InterruptedException e) {
|
|
|
throw new RuntimeException(e);
|
|
|
} finally {
|
|
|
bookLatch.countDown();
|
|
|
}
|
|
|
});
|
|
|
}
|
|
|
bookLatch.await();
|
|
|
bookExecutor.shutdown();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 处理册
|
|
|
*
|
|
|
* @param book
|
|
|
* @param businessEditionId
|
|
|
* @param subjectCode
|
|
|
* @param key
|
|
|
* @param value
|
|
|
* @param editionName
|
|
|
*/
|
|
|
private static void processBook(JSONObject book, String businessEditionId, String subjectCode,
|
|
|
String key, String value, String editionName) throws InterruptedException {
|
|
|
String businessBookId = book.getString("businessBookId");
|
|
|
String bookName = book.getString("bookName");
|
|
|
|
|
|
Record rBook = new Record().set("book_id", businessBookId)
|
|
|
.set("book_name", bookName).set("scheme_id", businessEditionId)
|
|
|
.set("subject_id", subjectCode).set("stage_id", key)
|
|
|
.set("id", UUID.randomUUID().toString());
|
|
|
bookList.add(rBook);
|
|
|
|
|
|
JSONObject argTree = new JSONObject();
|
|
|
argTree.put("businessBookId", businessBookId);
|
|
|
argTree.put("childrensFlag", 1);
|
|
|
argTree.put("parentId", -1);
|
|
|
argTree.put("searchKeyword", "");
|
|
|
|
|
|
String respTree = doRequestWithRetry("https://yx.ccsjy.cn/api/business/v1/chapter/tree",
|
|
|
argTree.toString(), false, MaxRetryCount);
|
|
|
|
|
|
if (respTree != null) {
|
|
|
JSONArray jsonArrTree = JSONObject.parseObject(respTree)
|
|
|
.getJSONObject("data")
|
|
|
.getJSONArray("tree");
|
|
|
|
|
|
JSONArray tempTree = new JSONArray();
|
|
|
traverseTree(jsonArrTree, tempTree);
|
|
|
|
|
|
// 使用线程池处理每个节点
|
|
|
ExecutorService nodeExecutor = Executors.newFixedThreadPool(ThreadCount);
|
|
|
CountDownLatch nodeLatch = new CountDownLatch(tempTree.size());
|
|
|
|
|
|
for (int n = 0; n < tempTree.size(); n++) {
|
|
|
JSONObject jsonTree = tempTree.getJSONObject(n);
|
|
|
nodeExecutor.submit(() -> {
|
|
|
try {
|
|
|
processNode(jsonTree, businessBookId, businessEditionId, subjectCode,
|
|
|
key, value, editionName, bookName);
|
|
|
} finally {
|
|
|
nodeLatch.countDown();
|
|
|
}
|
|
|
});
|
|
|
}
|
|
|
nodeLatch.await();
|
|
|
nodeExecutor.shutdown();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 处理节点
|
|
|
*
|
|
|
* @param jsonTree
|
|
|
* @param businessBookId
|
|
|
* @param businessEditionId
|
|
|
* @param subjectCode
|
|
|
* @param key
|
|
|
* @param value
|
|
|
* @param editionName
|
|
|
* @param bookName
|
|
|
*/
|
|
|
private static void processNode(JSONObject jsonTree, String businessBookId, String businessEditionId,
|
|
|
String subjectCode, String key, String value, String editionName,
|
|
|
String bookName) {
|
|
|
String nodeId = jsonTree.getString("nodeId");
|
|
|
String nodeName = jsonTree.getString("nodeName");
|
|
|
Boolean isLeaf = jsonTree.getBoolean("isLeaf");
|
|
|
String parentValue = jsonTree.getString("parentValue");
|
|
|
|
|
|
Record rStructure = new Record()
|
|
|
.set("node_id", nodeId).set("node_name", nodeName).set("parent_id", parentValue)
|
|
|
.set("is_leaf", isLeaf ? 1 : 0).set("book_id", businessBookId)
|
|
|
.set("scheme_id", businessEditionId).set("subject_id", subjectCode)
|
|
|
.set("stage_id", key).set("id", UUID.randomUUID().toString());
|
|
|
structureList.add(rStructure);
|
|
|
processResource(nodeId, businessBookId, subjectCode, key, value,
|
|
|
editionName, bookName, nodeName);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 处理资源
|
|
|
*
|
|
|
* @param nodeId
|
|
|
* @param businessBookId
|
|
|
* @param subjectCode
|
|
|
* @param key
|
|
|
* @param value
|
|
|
* @param editionName
|
|
|
* @param bookName
|
|
|
* @param nodeName
|
|
|
*/
|
|
|
private static void processResource(String nodeId, String businessBookId, String subjectCode,
|
|
|
String key, String value, String editionName, String bookName, String nodeName) {
|
|
|
JSONObject argSource = new JSONObject();
|
|
|
argSource.put("pageNum", 1);
|
|
|
argSource.put("pageSize", 100);
|
|
|
argSource.put("businessBookId", businessBookId);
|
|
|
argSource.put("nodeId", nodeId);
|
|
|
argSource.put("stageCode", key);
|
|
|
argSource.put("subjectCode", subjectCode);
|
|
|
argSource.put("excellentFlag", "");
|
|
|
argSource.put("nodeType", 1);//教材体系 2:知识点体系
|
|
|
argSource.put("sortType", 2);
|
|
|
argSource.put("source", "");
|
|
|
argSource.put("searchKeyword", "");
|
|
|
|
|
|
String respSource = doRequestWithRetry("https://yx.ccsjy.cn/api/cloud-school/v1/cloudLesson/getOnDemandLessonPage",
|
|
|
argSource.toString(), false, MaxRetryCount);
|
|
|
|
|
|
if (respSource != null) {
|
|
|
JSONObject jsonObjSource = JSONObject.parseObject(respSource);
|
|
|
JSONArray jsonArrSource = jsonObjSource.getJSONObject("data").getJSONArray("rows");
|
|
|
|
|
|
print("正在爬取:【" + value + "," + editionName + "," + bookName + "," + nodeName + "】下的资源!");
|
|
|
|
|
|
for (int m = 0; m < jsonArrSource.size(); m++) {
|
|
|
JSONObject jsonSource = jsonArrSource.getJSONObject(m);
|
|
|
|
|
|
// 解析日期时间字符串
|
|
|
String publishTimeStr = jsonSource.getString("publishTime");
|
|
|
LocalDateTime dateTime = LocalDateTime.parse(publishTimeStr,
|
|
|
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
|
|
|
LocalDate date = dateTime.toLocalDate();
|
|
|
|
|
|
Record record = new Record()
|
|
|
.set("lesson_id", jsonSource.getString("lessonId"))
|
|
|
.set("lesson_name", jsonSource.getString("lessonName"))
|
|
|
.set("node_id", nodeId)
|
|
|
.set("teacher_id", jsonSource.getString("teacherId"))
|
|
|
.set("teacher_school_id", jsonSource.getString("teacherSchoolId"))
|
|
|
.set("teacher_school_name", jsonSource.getString("teacherSchoolName"))
|
|
|
.set("teacher_name", jsonSource.getString("teacherName"))
|
|
|
.set("book_id", businessBookId)
|
|
|
.set("scheme_id", editionName)
|
|
|
.set("subject_id", subjectCode)
|
|
|
.set("grade_code", jsonSource.getString("gradeCode"))
|
|
|
.set("publish_time", date)
|
|
|
.set("stage_id", key)
|
|
|
//增加学习次数
|
|
|
.set("preview_count", jsonSource.getIntValue("previewCount"))
|
|
|
.set("learning_person_count", jsonSource.getIntValue("learningPersonCount"))
|
|
|
.set("learning_person_times", jsonSource.getIntValue("learningPersonTimes"))
|
|
|
.set("id", UUID.randomUUID().toString());
|
|
|
lessonList.add(record);
|
|
|
int count = resourceCount.incrementAndGet();
|
|
|
if (count % 100 == 0) {
|
|
|
print("已收集资源数量:" + count + "个。");
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 获取标准学校名称列表
|
|
|
*
|
|
|
* @return
|
|
|
*/
|
|
|
public static Map<String, Kv> getBzSchoolNameList() {
|
|
|
SqlPara sqlPara = Db.getSqlPara("YunXiao.getBzSchoolNameList");
|
|
|
List<Record> list = Db.find(sqlPara);
|
|
|
Map<String, Kv> map = new HashMap<>();
|
|
|
for (Record record : list) {
|
|
|
String bz_school_name = record.getStr("organization_name");
|
|
|
Kv kv = Kv.create();
|
|
|
kv.set("organization_no", record.getStr("organization_no"));
|
|
|
kv.set("gather_regionc", record.getStr("gather_regionc"));
|
|
|
kv.set("school_running_type", record.getStr("school_running_type"));
|
|
|
map.put(bz_school_name, kv);
|
|
|
}
|
|
|
return map;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 获取手动匹配学校名称列表
|
|
|
*
|
|
|
* @return
|
|
|
*/
|
|
|
public static Map<String, Kv> getHandMatchSchoolList() {
|
|
|
String sql = "select * from t_crawler_lesson_school where match_type=2";
|
|
|
List<Record> list = Db.find(sql);
|
|
|
Map<String, Kv> map = new HashMap<>();
|
|
|
for (Record record : list) {
|
|
|
String original_school_name = record.getStr("original_school_name");
|
|
|
Kv kv = Kv.create();
|
|
|
String organization_name = record.getStr("organization_name");
|
|
|
String organization_no = record.getStr("organization_no");
|
|
|
String gather_regionc = record.getStr("gather_regionc");
|
|
|
kv.set("organization_name", organization_name);
|
|
|
kv.set("organization_no", organization_no);
|
|
|
kv.set("gather_regionc", gather_regionc);
|
|
|
map.put(original_school_name, kv);
|
|
|
}
|
|
|
return map;
|
|
|
}
|
|
|
|
|
|
|
|
|
/**
|
|
|
* 获取已匹配的课程
|
|
|
*
|
|
|
* @return
|
|
|
*/
|
|
|
public static Set<String> getLessonAlreadyMatch() {
|
|
|
String sql = "select * from t_crawler_lesson_school where match_type>0";
|
|
|
List<Record> list = Db.find(sql);
|
|
|
Set<String> set = new HashSet<>();
|
|
|
for (Record record : list) {
|
|
|
set.add(record.getStr("lesson_id"));
|
|
|
}
|
|
|
return set;
|
|
|
}
|
|
|
|
|
|
public static void Start() {
|
|
|
Map<String, String> map = new HashMap<>();
|
|
|
map.put("1", "学前");
|
|
|
map.put("2", "小学");
|
|
|
map.put("3", "初中");
|
|
|
map.put("4", "高中");
|
|
|
|
|
|
//初始化三个全局量
|
|
|
bzSchoolNameList = getBzSchoolNameList();
|
|
|
handMatchSchoolList = getHandMatchSchoolList();
|
|
|
lessonAlreadyMatch = getLessonAlreadyMatch();
|
|
|
|
|
|
print("开始爬取数据!");
|
|
|
|
|
|
//记录开始时间
|
|
|
long startTime = System.currentTimeMillis();
|
|
|
|
|
|
// 创建线程池
|
|
|
ExecutorService executorService = Executors.newFixedThreadPool(10);
|
|
|
CountDownLatch mainLatch = new CountDownLatch(map.size());
|
|
|
|
|
|
map.forEach((key, value) -> {
|
|
|
executorService.submit(() -> {
|
|
|
try {
|
|
|
String respSubject = doRequestWithRetry("https://yx.ccsjy.cn/api/business/v1/subject/list/" + key,
|
|
|
null, true, MaxRetryCount);
|
|
|
if (respSubject != null) {
|
|
|
JSONObject jsonObj = JSONObject.parseObject(respSubject);
|
|
|
JSONArray subjects = jsonObj.getJSONObject("data").getJSONArray("rows");
|
|
|
|
|
|
ExecutorService subjectExecutor = Executors.newFixedThreadPool(ThreadCount);
|
|
|
CountDownLatch subjectLatch = new CountDownLatch(subjects.size());
|
|
|
|
|
|
for (int i = 0; i < subjects.size(); i++) {
|
|
|
JSONObject subject = subjects.getJSONObject(i);
|
|
|
subjectExecutor.submit(() -> {
|
|
|
try {
|
|
|
processSubject(subject, key, value);
|
|
|
} catch (InterruptedException e) {
|
|
|
throw new RuntimeException(e);
|
|
|
} finally {
|
|
|
subjectLatch.countDown();
|
|
|
}
|
|
|
});
|
|
|
}
|
|
|
|
|
|
subjectLatch.await();
|
|
|
subjectExecutor.shutdown();
|
|
|
}
|
|
|
} catch (Exception e) {
|
|
|
e.printStackTrace();
|
|
|
} finally {
|
|
|
mainLatch.countDown();
|
|
|
}
|
|
|
});
|
|
|
});
|
|
|
|
|
|
try {
|
|
|
mainLatch.await();
|
|
|
} catch (InterruptedException e) {
|
|
|
e.printStackTrace();
|
|
|
}
|
|
|
executorService.shutdown();
|
|
|
|
|
|
// 清空表
|
|
|
String[] tables = {"t_crawler_subject", "t_crawler_scheme", "t_crawler_book",
|
|
|
"t_crawler_structure", "t_crawler_lesson"};
|
|
|
for (String table : tables) {
|
|
|
Db.update("truncate table " + table);
|
|
|
}
|
|
|
String sql = "delete from t_crawler_lesson_school where match_type=0;";
|
|
|
Db.update(sql);
|
|
|
|
|
|
// 保存数据
|
|
|
print("开始保存数据...");
|
|
|
Db.batchSave("t_crawler_subject", subjectList, batchSize);
|
|
|
Db.batchSave("t_crawler_scheme", schemeList, batchSize);
|
|
|
Db.batchSave("t_crawler_book", bookList, batchSize);
|
|
|
Db.batchSave("t_crawler_structure", structureList, batchSize);
|
|
|
Db.batchSave("t_crawler_lesson", lessonList, batchSize);
|
|
|
|
|
|
print("开始记录课程与学校的关系...");
|
|
|
//保存课程与学校的关联表
|
|
|
List<Record> writeList = new ArrayList<>();
|
|
|
for (Record record : lessonList) {
|
|
|
String original_school_name = record.getStr("teacher_school_name");//原始学校名称
|
|
|
|
|
|
if (original_school_name.equals("东北师范大学东安实验学校")) {
|
|
|
System.out.println("Here!");
|
|
|
}
|
|
|
String teacher_name = record.getStr("teacher_name");
|
|
|
String organization_name = "";
|
|
|
String organization_no = "";
|
|
|
String gather_regionc = "";
|
|
|
String school_running_type = "";
|
|
|
int match_type;
|
|
|
String lesson_id = record.getStr("lesson_id");
|
|
|
if (lessonAlreadyMatch.contains(lesson_id)) continue;//如果记录过此课程的学校关系,本次就不再记录了
|
|
|
|
|
|
if (original_school_name.equals("--")) {
|
|
|
System.out.println("--");
|
|
|
}
|
|
|
if (bzSchoolNameList.containsKey(original_school_name)) {
|
|
|
//100%命中的名称
|
|
|
Kv kv = bzSchoolNameList.get(original_school_name);
|
|
|
organization_name = original_school_name;
|
|
|
organization_no = kv.getStr("organization_no");
|
|
|
gather_regionc = kv.getStr("gather_regionc");
|
|
|
school_running_type = kv.getStr("school_running_type");
|
|
|
match_type = 1;
|
|
|
} else if (handMatchSchoolList.containsKey(original_school_name)) {//手动映射过的名称
|
|
|
Kv kv = handMatchSchoolList.get(original_school_name);
|
|
|
organization_name = kv.getStr("organization_name");
|
|
|
organization_no = kv.getStr("organization_no");
|
|
|
gather_regionc = kv.getStr("gather_regionc");
|
|
|
school_running_type = kv.getStr("school_running_type");
|
|
|
match_type = 2;
|
|
|
} else {
|
|
|
match_type = 0;//待匹配
|
|
|
}
|
|
|
Record rWrite = new Record();
|
|
|
rWrite.set("lesson_id", lesson_id);
|
|
|
rWrite.set("original_school_name", original_school_name);
|
|
|
rWrite.set("organization_name", organization_name);
|
|
|
rWrite.set("organization_no", organization_no);
|
|
|
rWrite.set("gather_regionc", gather_regionc);
|
|
|
rWrite.set("match_type", match_type);
|
|
|
rWrite.set("teacher_name", teacher_name);
|
|
|
rWrite.set("school_running_type", school_running_type);
|
|
|
writeList.add(rWrite);
|
|
|
}
|
|
|
//对writeList根据lesson_id去重
|
|
|
writeList = writeList.stream().collect(
|
|
|
Collectors.collectingAndThen(Collectors.toCollection(() -> new TreeSet<>(
|
|
|
Comparator.comparing(r -> r.getStr("lesson_id"))
|
|
|
)
|
|
|
), ArrayList::new));
|
|
|
|
|
|
Db.batchSave("t_crawler_lesson_school", writeList, batchSize);
|
|
|
print("爬取数据完成!");
|
|
|
print("总共收集资源:" + lessonList.size() + "个");
|
|
|
print("总共收集节点:" + structureList.size() + "个");
|
|
|
//记录结束时间,并输出两者的差值是多少分钟多少少
|
|
|
long endTime = System.currentTimeMillis();
|
|
|
print("爬取数据耗时:" + (endTime - startTime) / 1000 + "秒");
|
|
|
}
|
|
|
|
|
|
}
|