package com.dsideal.base.AI; import com.dsideal.base.DataEase.Model.DataEaseModel; import com.dsideal.base.Util.LocalMysqlConnectUtil; import com.jfinal.plugin.activerecord.Db; import com.jfinal.plugin.activerecord.Record; import java.util.List; import java.util.Map; import java.util.Set; import java.util.LinkedHashSet; import java.util.ArrayList; public class TestMax32K { private static final int MAX_CHUNK_SIZE = 30000; // 30K字符限制 public static void main(String[] args) { LocalMysqlConnectUtil.Init(); String[] regions = {"文山州", "楚雄州"}; String sql = "select table_name as TABLE_NAME from core_dataset_table where dataset_group_id in (select id from core_dataset_group where pid='1036317909951057920')"; List tableList = Db.use(DataEaseModel.DB_NAME).find(sql); // 使用字符串数组存储分块数据 List dataChunks = new ArrayList<>(); StringBuilder currentChunk = new StringBuilder(); // 添加数据说明头部 String header = "数据说明: 以下是云南省教育数据的压缩格式\n" + "格式: 表名 -> 字段列表 -> 数据行(数组格式)\n" + "地区范围: " + String.join(",", regions) + "\n\n"; currentChunk.append(header); // 遍历所有相关数据表 for (Record record : tableList) { String tableName = record.getStr("TABLE_NAME"); // 为当前表收集所有数据 List allTableData = new ArrayList<>(); Set fieldNames = new LinkedHashSet<>(); // 为每个地区收集数据 for (String region : regions) { sql = "select * from `" + tableName + "` where `行政区划`=?"; List listContent = Db.use(DataEaseModel.DB_NAME).find(sql, region); if (!listContent.isEmpty()) { allTableData.addAll(listContent); // 收集字段名(使用第一条记录的字段结构) if (fieldNames.isEmpty()) { fieldNames.addAll(listContent.get(0).getColumns().keySet()); } } } if (!allTableData.isEmpty()) { // 构建当前表的完整数据块 StringBuilder tableData = new StringBuilder(); tableData.append("\n表: ").append(tableName).append("\n"); tableData.append("字段: ").append(String.join(",", fieldNames)).append("\n"); // 输出压缩格式的数据 for (Record dataRecord : allTableData) { Map columns = dataRecord.getColumns(); tableData.append("["); boolean first = true; for (String fieldName : fieldNames) { if (!first) tableData.append(","); Object value = columns.get(fieldName); if (value instanceof String) { tableData.append("\"").append(value).append("\""); } else { tableData.append(value); } first = false; } tableData.append("]\n"); } // 检查是否需要分块 String tableDataStr = tableData.toString(); if (currentChunk.length() + tableDataStr.length() > MAX_CHUNK_SIZE) { // 当前块已满,保存并开始新块 if (currentChunk.length() > header.length()) { dataChunks.add(currentChunk.toString()); currentChunk = new StringBuilder(); currentChunk.append(header); } // 如果单个表数据超过限制,需要进一步分割 if (tableDataStr.length() > MAX_CHUNK_SIZE - header.length()) { List tableChunks = splitLargeTable(tableName, fieldNames, allTableData, MAX_CHUNK_SIZE - header.length()); for (int i = 0; i < tableChunks.size(); i++) { StringBuilder chunkBuilder = new StringBuilder(); chunkBuilder.append(header); chunkBuilder.append("\n[续] 表: ").append(tableName).append(" (第").append(i + 1).append("部分)\n"); chunkBuilder.append("字段: ").append(String.join(",", fieldNames)).append("\n"); chunkBuilder.append(tableChunks.get(i)); dataChunks.add(chunkBuilder.toString()); } } else { currentChunk.append(tableDataStr); } } else { currentChunk.append(tableDataStr); } } } // 添加最后一个块 if (currentChunk.length() > header.length()) { dataChunks.add(currentChunk.toString()); } // 输出分块结果统计 System.out.println("总共分成 " + dataChunks.size() + " 个数据块:"); for (int i = 0; i < dataChunks.size(); i++) { String chunk = dataChunks.get(i); System.out.println("数据块 " + (i + 1) + " 长度: " + chunk.length() + " 字符"); } // 返回分块数据数组供后续使用 String[] chunksArray = dataChunks.toArray(new String[0]); // 示例:如何使用分块数据 System.out.println("\n=== 可以这样使用分块数据 ==="); for (int i = 0; i < chunksArray.length; i++) { System.out.println("处理第 " + (i + 1) + " 个数据块..."); // 这里可以调用DeepSeek API处理每个块 // String result = CallDeepSeek.callDeepSeek(chunksArray[i]); System.out.println("块 " + (i + 1) + " 内容预览: " + chunksArray[i].substring(0, Math.min(200, chunksArray[i].length())) + "..."); } } /** * 分割过大的单表数据 */ private static List splitLargeTable(String tableName, Set fieldNames, List allTableData, int maxSize) { List chunks = new ArrayList<>(); StringBuilder currentTableChunk = new StringBuilder(); for (Record dataRecord : allTableData) { Map columns = dataRecord.getColumns(); StringBuilder rowData = new StringBuilder(); rowData.append("["); boolean first = true; for (String fieldName : fieldNames) { if (!first) rowData.append(","); Object value = columns.get(fieldName); if (value instanceof String) { rowData.append("\"").append(value).append("\""); } else { rowData.append(value); } first = false; } rowData.append("]\n"); // 检查是否超过限制 if (currentTableChunk.length() + rowData.length() > maxSize) { if (currentTableChunk.length() > 0) { chunks.add(currentTableChunk.toString()); currentTableChunk = new StringBuilder(); } } currentTableChunk.append(rowData); } if (currentTableChunk.length() > 0) { chunks.add(currentTableChunk.toString()); } return chunks; } /** * 获取分块数据的方法(可供其他类调用) */ public static String[] getDataChunks() { // 这里可以将main方法中的逻辑提取出来,返回分块数据 // 为了简化,这里只是示例 return new String[]{"示例数据块1", "示例数据块2"}; } }