|
|
|
@ -0,0 +1,119 @@
|
|
|
|
|
package com.dsideal.base.Tools;
|
|
|
|
|
|
|
|
|
|
import java.sql.SQLOutput;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
import java.util.Map;
|
|
|
|
|
import java.util.HashMap;
|
|
|
|
|
import java.util.Set;
|
|
|
|
|
import java.util.HashSet;
|
|
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
|
|
|
|
import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil;
|
|
|
|
|
import com.jfinal.plugin.activerecord.Record;
|
|
|
|
|
import com.jfinal.plugin.activerecord.Db;
|
|
|
|
|
|
|
|
|
|
import static com.dsideal.base.DataEase.Model.DataEaseModel.DB_NAME;
|
|
|
|
|
|
|
|
|
|
public class DeleteNotUnique {
|
|
|
|
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
|
|
LocalMysqlConnectUtil.Init();
|
|
|
|
|
DeleteNotUnique deleteNotUnique = new DeleteNotUnique();
|
|
|
|
|
|
|
|
|
|
String sql = "select * from t_dp_dataset where b_use=1";
|
|
|
|
|
List<Record> list = Db.find(sql);
|
|
|
|
|
|
|
|
|
|
for (Record record : list) {
|
|
|
|
|
String table_name = record.getStr("table_name");
|
|
|
|
|
deleteNotUnique.removeDuplicates(table_name);
|
|
|
|
|
System.out.println("删除完成:" + table_name);
|
|
|
|
|
}
|
|
|
|
|
System.out.println("所有表清理完成! ");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 删除重复数据,保留ID最小的记录
|
|
|
|
|
*
|
|
|
|
|
* @param tableName 表名
|
|
|
|
|
*/
|
|
|
|
|
public void removeDuplicates(String tableName) {
|
|
|
|
|
// 1. 获取表的所有字段名(除id外)
|
|
|
|
|
List<String> columnNames = getColumnNames(tableName);
|
|
|
|
|
System.out.println("表字段:" + columnNames);
|
|
|
|
|
|
|
|
|
|
// 2. 获取所有数据
|
|
|
|
|
List<Record> allRecords = Db.use(DB_NAME).find("SELECT * FROM `" + tableName + "` ORDER BY id");
|
|
|
|
|
|
|
|
|
|
// 3. 使用Map存储唯一记录
|
|
|
|
|
Map<String, Record> uniqueRecords = new HashMap<>();
|
|
|
|
|
Set<Integer> idsToDelete = new HashSet<>();
|
|
|
|
|
|
|
|
|
|
for (Record record : allRecords) {
|
|
|
|
|
// 生成数据特征
|
|
|
|
|
String dataKey = generateDataKey(record, columnNames);
|
|
|
|
|
|
|
|
|
|
if (uniqueRecords.containsKey(dataKey)) {
|
|
|
|
|
// 比较ID,保留较小的
|
|
|
|
|
Record existingRecord = uniqueRecords.get(dataKey);
|
|
|
|
|
int existingId = existingRecord.getInt("id");
|
|
|
|
|
int currentId = record.getInt("id");
|
|
|
|
|
|
|
|
|
|
if (currentId < existingId) {
|
|
|
|
|
idsToDelete.add(existingId);
|
|
|
|
|
uniqueRecords.put(dataKey, record);
|
|
|
|
|
} else {
|
|
|
|
|
idsToDelete.add(currentId);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
uniqueRecords.put(dataKey, record);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 4. 删除重复记录
|
|
|
|
|
if (!idsToDelete.isEmpty()) {
|
|
|
|
|
Db.use(DB_NAME).batch("DELETE FROM " + tableName + " WHERE id = ?",
|
|
|
|
|
idsToDelete.stream()
|
|
|
|
|
.map(id -> new Object[]{id})
|
|
|
|
|
.collect(Collectors.toList()).toArray(new Object[0][]),
|
|
|
|
|
100);
|
|
|
|
|
|
|
|
|
|
System.out.println("已删除 " + idsToDelete.size() + " 条重复数据");
|
|
|
|
|
} else {
|
|
|
|
|
System.out.println("没有发现重复数据");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 获取表的所有字段名(除id外)
|
|
|
|
|
*/
|
|
|
|
|
private List<String> getColumnNames(String tableName) {
|
|
|
|
|
String sql = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS " +
|
|
|
|
|
"WHERE TABLE_NAME = ? " +
|
|
|
|
|
"AND COLUMN_NAME != 'id' " +
|
|
|
|
|
"ORDER BY ORDINAL_POSITION";
|
|
|
|
|
|
|
|
|
|
return Db.use(DB_NAME).find(sql, tableName)
|
|
|
|
|
.stream()
|
|
|
|
|
.map(record -> record.getStr("COLUMN_NAME"))
|
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 生成数据特征
|
|
|
|
|
*/
|
|
|
|
|
private String generateDataKey(Record record, List<String> columnNames) {
|
|
|
|
|
StringBuilder key = new StringBuilder();
|
|
|
|
|
|
|
|
|
|
for (String columnName : columnNames) {
|
|
|
|
|
// 添加分隔符避免值的混淆
|
|
|
|
|
key.append("#").append(columnName).append("=");
|
|
|
|
|
|
|
|
|
|
// 获取字段值,处理null情况
|
|
|
|
|
Object value = record.get(columnName);
|
|
|
|
|
key.append(value == null ? "NULL" : value.toString());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return key.toString();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|