From c95089a7627df6f1c7f11e0006b02578098b6850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E6=B5=B7?= <10402852@qq.com> Date: Thu, 1 Feb 2024 15:29:41 +0800 Subject: [PATCH] 'commit' --- pom.xml | 5 + src/main/java/com/YunXiao/Study.java | 93 +++++++++++-------- .../dsideal/QingLong/Util/SimilarayUtil.java | 4 +- 3 files changed, 60 insertions(+), 42 deletions(-) diff --git a/pom.xml b/pom.xml index 18cdcb0c..be2d1bcb 100644 --- a/pom.xml +++ b/pom.xml @@ -379,6 +379,11 @@ jakarta.json 2.0.1 + + com.hankcs + hanlp + portable-1.8.4 + diff --git a/src/main/java/com/YunXiao/Study.java b/src/main/java/com/YunXiao/Study.java index dbd89312..8557e954 100644 --- a/src/main/java/com/YunXiao/Study.java +++ b/src/main/java/com/YunXiao/Study.java @@ -1,7 +1,5 @@ package com.YunXiao; -import cn.hutool.core.date.DateTime; -import cn.hutool.core.io.file.FileWriter; import com.YunXiao.Model.BaseModel; import com.YunXiao.Util.SyncUtil; import com.alibaba.fastjson.JSONArray; @@ -9,57 +7,69 @@ import com.alibaba.fastjson.JSONObject; import com.dsideal.QingLong.Util.RedisKit; import com.dsideal.QingLong.Util.SimilarayUtil; +import com.hankcs.hanlp.seg.common.Term; +import com.hankcs.hanlp.tokenizer.StandardTokenizer; import com.jfinal.plugin.activerecord.Db; import com.jfinal.plugin.activerecord.Record; -import java.io.File; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; +import java.util.*; public class Study { //Model层的实例,在此层,不允许调用Dao层,只能对接Model层 public static BaseModel bm = new BaseModel(); - - static long start; - static boolean logFirst = false; - static String LOG_FILE = null; - static String directoryPath = null; + public static String KEY = "TY_BUREAU"; /** - * 功能:记录日志,每个任务一个单独文件 + * 功能:利用中文分词技术,将两个字符串都进行分词,通过对比完全命中的数量来评估是不是两个单位名称是同一个单位 * - * @param msg + * @param s1 + * @param s2 + * @return */ - public static void log(String msg) { - if (!logFirst) { - // 记录到日志文件 - LOG_FILE = SyncUtil.WEBPATH + "/Logs/{start_time}.log"; - // 创建 File 对象 - File file = new File(LOG_FILE); - // 获取目录部分 - directoryPath = file.getParent(); - // 判断目录是否存在,如果不存在则创建 - File directory = new File(directoryPath); - if (!directory.exists()) { - if (directory.mkdirs()) { - System.out.println(DateTime.now() + " 目录已创建:" + directoryPath); - } else { - System.out.println("创建日志目录失败,请检查!"); - System.exit(0); + public static boolean check(String s1, String s2) { + //静态变量 + String[] area = { + "南关区", "宽城区", "朝阳区", "二道区", "绿园区", "双阳区", "九台市", + "农安县", "经开区", "德惠市", "高新区", "净月区", "汽开区", "榆树市", "公主岭市", "长春市" + }; + List areaList = Arrays.stream(area).toList(); + + List termList1 = StandardTokenizer.segment(s1); + Map map = new HashMap<>(); + for (Term term : termList1) { + String key = SimilarayUtil.Convert(term.word); + + for (int i = 0; i < areaList.size(); i++) { + if (areaList.get(i).startsWith(key)) { + key = areaList.get(i); + break; } } - LOG_FILE = LOG_FILE.replace("{start_time}", String.valueOf(start)); - logFirst = true; + int x = 0; + if (map.containsKey(key)) x = map.get(key); + map.put(key, x + 1); } - System.out.println(DateTime.now() + " " + msg); - FileWriter writer = new FileWriter(LOG_FILE, StandardCharsets.UTF_8); - writer.append(DateTime.now() + " " + msg + "\n"); - } - public static String KEY = "TY_BUREAU"; + List termList2 = StandardTokenizer.segment(s2); + for (Term term : termList2) { + String key = SimilarayUtil.Convert(term.word); + for (int i = 0; i < areaList.size(); i++) { + if (areaList.get(i).startsWith(key)) { + key = areaList.get(i); + break; + } + } + int x = 0; + if (map.containsKey(key)) x = map.get(key); + map.put(key, x + 1); + } + // 使用 for-each 循环遍历 Map + for (Map.Entry entry : map.entrySet()) { + if (entry.getValue() == 1) return false; + } + return true; + } public static void main(String[] args) throws Exception { //初始化 @@ -108,9 +118,12 @@ public class Study { sql = "update t_base_organization set third_party_id=? where org_id=?"; Db.update(sql, ty_org_id, org_id); } else { - System.out.println("QingLong系统 org_id:" + org_id + ",org_name:" + org_name + "\n天喻平台 ty_org_id:" + ty_org_id + ",ty_org_name:" + ty_org_name ); - System.out.println("相似度:" + similary); - System.out.println(); + boolean success = check(org_name, ty_org_name); + if (success) { + System.out.println("QingLong系统 org_id:" + org_id + ",org_name:" + org_name + "\n天喻平台 ty_org_id:" + ty_org_id + ",ty_org_name:" + ty_org_name); + } else { + System.out.println("QingLong系统 org_id:" + org_id + ",org_name:" + org_name + " 无法推荐出合适的单位名,请人工检索!"); + } } } // 关闭连接 diff --git a/src/main/java/com/dsideal/QingLong/Util/SimilarayUtil.java b/src/main/java/com/dsideal/QingLong/Util/SimilarayUtil.java index b4cf26f6..4fcf6a16 100644 --- a/src/main/java/com/dsideal/QingLong/Util/SimilarayUtil.java +++ b/src/main/java/com/dsideal/QingLong/Util/SimilarayUtil.java @@ -35,7 +35,7 @@ public class SimilarayUtil { * @param chineseNumber * @return */ - public static String ChineseToArabic(String chineseNumber) { + private static String ChineseToArabic(String chineseNumber) { int arabicNumber = 0; for (int i = 0; i < chineseNumber.length(); i++) { char chineseChar = chineseNumber.charAt(i); @@ -51,7 +51,7 @@ public class SimilarayUtil { * @param arabicNumberStr * @return */ - public static String ArabicToChinese(String arabicNumberStr) { + private static String ArabicToChinese(String arabicNumberStr) { String chineseNumberStr = ""; for (int i = 0; i < arabicNumberStr.length(); i++) { char arabicChar = arabicNumberStr.charAt(i);