From c95089a7627df6f1c7f11e0006b02578098b6850 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=BB=84=E6=B5=B7?= <10402852@qq.com>
Date: Thu, 1 Feb 2024 15:29:41 +0800
Subject: [PATCH] 'commit'
---
pom.xml | 5 +
src/main/java/com/YunXiao/Study.java | 93 +++++++++++--------
.../dsideal/QingLong/Util/SimilarayUtil.java | 4 +-
3 files changed, 60 insertions(+), 42 deletions(-)
diff --git a/pom.xml b/pom.xml
index 18cdcb0c..be2d1bcb 100644
--- a/pom.xml
+++ b/pom.xml
@@ -379,6 +379,11 @@
jakarta.json
2.0.1
+
+ com.hankcs
+ hanlp
+ portable-1.8.4
+
diff --git a/src/main/java/com/YunXiao/Study.java b/src/main/java/com/YunXiao/Study.java
index dbd89312..8557e954 100644
--- a/src/main/java/com/YunXiao/Study.java
+++ b/src/main/java/com/YunXiao/Study.java
@@ -1,7 +1,5 @@
package com.YunXiao;
-import cn.hutool.core.date.DateTime;
-import cn.hutool.core.io.file.FileWriter;
import com.YunXiao.Model.BaseModel;
import com.YunXiao.Util.SyncUtil;
import com.alibaba.fastjson.JSONArray;
@@ -9,57 +7,69 @@ import com.alibaba.fastjson.JSONObject;
import com.dsideal.QingLong.Util.RedisKit;
import com.dsideal.QingLong.Util.SimilarayUtil;
+import com.hankcs.hanlp.seg.common.Term;
+import com.hankcs.hanlp.tokenizer.StandardTokenizer;
import com.jfinal.plugin.activerecord.Db;
import com.jfinal.plugin.activerecord.Record;
-import java.io.File;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
+import java.util.*;
public class Study {
//Model层的实例,在此层,不允许调用Dao层,只能对接Model层
public static BaseModel bm = new BaseModel();
-
- static long start;
- static boolean logFirst = false;
- static String LOG_FILE = null;
- static String directoryPath = null;
+ public static String KEY = "TY_BUREAU";
/**
- * 功能:记录日志,每个任务一个单独文件
+ * 功能:利用中文分词技术,将两个字符串都进行分词,通过对比完全命中的数量来评估是不是两个单位名称是同一个单位
*
- * @param msg
+ * @param s1
+ * @param s2
+ * @return
*/
- public static void log(String msg) {
- if (!logFirst) {
- // 记录到日志文件
- LOG_FILE = SyncUtil.WEBPATH + "/Logs/{start_time}.log";
- // 创建 File 对象
- File file = new File(LOG_FILE);
- // 获取目录部分
- directoryPath = file.getParent();
- // 判断目录是否存在,如果不存在则创建
- File directory = new File(directoryPath);
- if (!directory.exists()) {
- if (directory.mkdirs()) {
- System.out.println(DateTime.now() + " 目录已创建:" + directoryPath);
- } else {
- System.out.println("创建日志目录失败,请检查!");
- System.exit(0);
+ public static boolean check(String s1, String s2) {
+ //静态变量
+ String[] area = {
+ "南关区", "宽城区", "朝阳区", "二道区", "绿园区", "双阳区", "九台市",
+ "农安县", "经开区", "德惠市", "高新区", "净月区", "汽开区", "榆树市", "公主岭市", "长春市"
+ };
+ List areaList = Arrays.stream(area).toList();
+
+ List termList1 = StandardTokenizer.segment(s1);
+ Map map = new HashMap<>();
+ for (Term term : termList1) {
+ String key = SimilarayUtil.Convert(term.word);
+
+ for (int i = 0; i < areaList.size(); i++) {
+ if (areaList.get(i).startsWith(key)) {
+ key = areaList.get(i);
+ break;
}
}
- LOG_FILE = LOG_FILE.replace("{start_time}", String.valueOf(start));
- logFirst = true;
+ int x = 0;
+ if (map.containsKey(key)) x = map.get(key);
+ map.put(key, x + 1);
}
- System.out.println(DateTime.now() + " " + msg);
- FileWriter writer = new FileWriter(LOG_FILE, StandardCharsets.UTF_8);
- writer.append(DateTime.now() + " " + msg + "\n");
- }
- public static String KEY = "TY_BUREAU";
+ List termList2 = StandardTokenizer.segment(s2);
+ for (Term term : termList2) {
+ String key = SimilarayUtil.Convert(term.word);
+ for (int i = 0; i < areaList.size(); i++) {
+ if (areaList.get(i).startsWith(key)) {
+ key = areaList.get(i);
+ break;
+ }
+ }
+ int x = 0;
+ if (map.containsKey(key)) x = map.get(key);
+ map.put(key, x + 1);
+ }
+ // 使用 for-each 循环遍历 Map
+ for (Map.Entry entry : map.entrySet()) {
+ if (entry.getValue() == 1) return false;
+ }
+ return true;
+ }
public static void main(String[] args) throws Exception {
//初始化
@@ -108,9 +118,12 @@ public class Study {
sql = "update t_base_organization set third_party_id=? where org_id=?";
Db.update(sql, ty_org_id, org_id);
} else {
- System.out.println("QingLong系统 org_id:" + org_id + ",org_name:" + org_name + "\n天喻平台 ty_org_id:" + ty_org_id + ",ty_org_name:" + ty_org_name );
- System.out.println("相似度:" + similary);
- System.out.println();
+ boolean success = check(org_name, ty_org_name);
+ if (success) {
+ System.out.println("QingLong系统 org_id:" + org_id + ",org_name:" + org_name + "\n天喻平台 ty_org_id:" + ty_org_id + ",ty_org_name:" + ty_org_name);
+ } else {
+ System.out.println("QingLong系统 org_id:" + org_id + ",org_name:" + org_name + " 无法推荐出合适的单位名,请人工检索!");
+ }
}
}
// 关闭连接
diff --git a/src/main/java/com/dsideal/QingLong/Util/SimilarayUtil.java b/src/main/java/com/dsideal/QingLong/Util/SimilarayUtil.java
index b4cf26f6..4fcf6a16 100644
--- a/src/main/java/com/dsideal/QingLong/Util/SimilarayUtil.java
+++ b/src/main/java/com/dsideal/QingLong/Util/SimilarayUtil.java
@@ -35,7 +35,7 @@ public class SimilarayUtil {
* @param chineseNumber
* @return
*/
- public static String ChineseToArabic(String chineseNumber) {
+ private static String ChineseToArabic(String chineseNumber) {
int arabicNumber = 0;
for (int i = 0; i < chineseNumber.length(); i++) {
char chineseChar = chineseNumber.charAt(i);
@@ -51,7 +51,7 @@ public class SimilarayUtil {
* @param arabicNumberStr
* @return
*/
- public static String ArabicToChinese(String arabicNumberStr) {
+ private static String ArabicToChinese(String arabicNumberStr) {
String chineseNumberStr = "";
for (int i = 0; i < arabicNumberStr.length(); i++) {
char arabicChar = arabicNumberStr.charAt(i);