From 9324b5b153f71a9bc29772cd91cf611218177c64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E6=B5=B7?= <10402852@qq.com> Date: Mon, 30 Dec 2024 10:12:40 +0800 Subject: [PATCH] 'commit' --- .../dsideal/base/Tools/DeleteNotUnique.java | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 src/main/java/com/dsideal/base/Tools/DeleteNotUnique.java diff --git a/src/main/java/com/dsideal/base/Tools/DeleteNotUnique.java b/src/main/java/com/dsideal/base/Tools/DeleteNotUnique.java new file mode 100644 index 00000000..177d937d --- /dev/null +++ b/src/main/java/com/dsideal/base/Tools/DeleteNotUnique.java @@ -0,0 +1,119 @@ +package com.dsideal.base.Tools; + +import java.sql.SQLOutput; +import java.util.List; +import java.util.Map; +import java.util.HashMap; +import java.util.Set; +import java.util.HashSet; +import java.util.stream.Collectors; + +import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil; +import com.jfinal.plugin.activerecord.Record; +import com.jfinal.plugin.activerecord.Db; + +import static com.dsideal.base.DataEase.Model.DataEaseModel.DB_NAME; + +public class DeleteNotUnique { + + public static void main(String[] args) { + LocalMysqlConnectUtil.Init(); + DeleteNotUnique deleteNotUnique = new DeleteNotUnique(); + + String sql = "select * from t_dp_dataset where b_use=1"; + List list = Db.find(sql); + + for (Record record : list) { + String table_name = record.getStr("table_name"); + deleteNotUnique.removeDuplicates(table_name); + System.out.println("删除完成:" + table_name); + } + System.out.println("所有表清理完成! "); + } + + /** + * 删除重复数据,保留ID最小的记录 + * + * @param tableName 表名 + */ + public void removeDuplicates(String tableName) { + // 1. 获取表的所有字段名(除id外) + List columnNames = getColumnNames(tableName); + System.out.println("表字段:" + columnNames); + + // 2. 获取所有数据 + List allRecords = Db.use(DB_NAME).find("SELECT * FROM `" + tableName + "` ORDER BY id"); + + // 3. 使用Map存储唯一记录 + Map uniqueRecords = new HashMap<>(); + Set idsToDelete = new HashSet<>(); + + for (Record record : allRecords) { + // 生成数据特征 + String dataKey = generateDataKey(record, columnNames); + + if (uniqueRecords.containsKey(dataKey)) { + // 比较ID,保留较小的 + Record existingRecord = uniqueRecords.get(dataKey); + int existingId = existingRecord.getInt("id"); + int currentId = record.getInt("id"); + + if (currentId < existingId) { + idsToDelete.add(existingId); + uniqueRecords.put(dataKey, record); + } else { + idsToDelete.add(currentId); + } + } else { + uniqueRecords.put(dataKey, record); + } + } + + // 4. 删除重复记录 + if (!idsToDelete.isEmpty()) { + Db.use(DB_NAME).batch("DELETE FROM " + tableName + " WHERE id = ?", + idsToDelete.stream() + .map(id -> new Object[]{id}) + .collect(Collectors.toList()).toArray(new Object[0][]), + 100); + + System.out.println("已删除 " + idsToDelete.size() + " 条重复数据"); + } else { + System.out.println("没有发现重复数据"); + } + } + + /** + * 获取表的所有字段名(除id外) + */ + private List getColumnNames(String tableName) { + String sql = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS " + + "WHERE TABLE_NAME = ? " + + "AND COLUMN_NAME != 'id' " + + "ORDER BY ORDINAL_POSITION"; + + return Db.use(DB_NAME).find(sql, tableName) + .stream() + .map(record -> record.getStr("COLUMN_NAME")) + .collect(Collectors.toList()); + } + + /** + * 生成数据特征 + */ + private String generateDataKey(Record record, List columnNames) { + StringBuilder key = new StringBuilder(); + + for (String columnName : columnNames) { + // 添加分隔符避免值的混淆 + key.append("#").append(columnName).append("="); + + // 获取字段值,处理null情况 + Object value = record.get(columnName); + key.append(value == null ? "NULL" : value.toString()); + } + + return key.toString(); + } + +}