From 3638caaed6bb75967ffce0f5ebb1b4d9e11f7730 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Mon, 16 Jun 2025 08:23:54 +0800 Subject: [PATCH] 'commit' --- .../java/com/dsideal/base/AI/TestMax32K.java | 219 +++++++++--------- 1 file changed, 108 insertions(+), 111 deletions(-) diff --git a/src/main/java/com/dsideal/base/AI/TestMax32K.java b/src/main/java/com/dsideal/base/AI/TestMax32K.java index c33f9479..c9bdf935 100644 --- a/src/main/java/com/dsideal/base/AI/TestMax32K.java +++ b/src/main/java/com/dsideal/base/AI/TestMax32K.java @@ -5,7 +5,6 @@ import com.dsideal.base.Util.LocalMysqlConnectUtil; import com.jfinal.plugin.activerecord.Db; import com.jfinal.plugin.activerecord.Record; import com.dsideal.base.Util.CallDeepSeek; - import java.util.List; import java.util.Map; import java.util.Set; @@ -14,9 +13,7 @@ import java.util.ArrayList; import java.util.concurrent.CountDownLatch; import java.text.SimpleDateFormat; import java.util.Date; - import cn.hutool.core.io.FileUtil; - import java.io.File; public class TestMax32K { @@ -24,27 +21,27 @@ public class TestMax32K { public static void main(String[] args) { LocalMysqlConnectUtil.Init(); - + // 直接调用生成综合报告 String report = generateComprehensiveReport(); System.out.println("\n=== 最终报告 ==="); System.out.println(report); } - + /** * 分割过大的单表数据 */ - private static List splitLargeTable(Set fieldNames, - List allTableData, int maxSize) { + private static List splitLargeTable(String tableName, Set fieldNames, + List allTableData, int maxSize) { List chunks = new ArrayList<>(); StringBuilder currentTableChunk = new StringBuilder(); - + for (Record dataRecord : allTableData) { Map columns = dataRecord.getColumns(); StringBuilder rowData = new StringBuilder(); rowData.append("["); - + boolean first = true; for (String fieldName : fieldNames) { if (!first) rowData.append(","); @@ -57,159 +54,159 @@ public class TestMax32K { first = false; } rowData.append("]\n"); - + // 检查是否超过限制 if (currentTableChunk.length() + rowData.length() > maxSize) { - if (!currentTableChunk.isEmpty()) { + if (currentTableChunk.length() > 0) { chunks.add(currentTableChunk.toString()); currentTableChunk = new StringBuilder(); } } currentTableChunk.append(rowData); } - - if (!currentTableChunk.isEmpty()) { + + if (currentTableChunk.length() > 0) { chunks.add(currentTableChunk.toString()); } - + return chunks; } - + + /** + * 获取分块数据的方法(可供其他类调用) + */ + public static String[] getDataChunks() { + // 这里可以将main方法中的逻辑提取出来,返回分块数据 + // 为了简化,这里只是示例 + return new String[]{"示例数据块1", "示例数据块2"}; + } + public static String generateComprehensiveReport() { - + LocalMysqlConnectUtil.Init(); + String[] regions = {"文山州", "楚雄州"}; String sql = "select table_name as TABLE_NAME from core_dataset_table where dataset_group_id in (select id from core_dataset_group where pid='1036317909951057920')"; List tableList = Db.use(DataEaseModel.DB_NAME).find(sql); - + // 获取分块数据 String[] dataChunks = getDataChunks(regions, tableList); - List chunkAnalyses = new ArrayList<>(); - - System.out.println("开始分析 " + dataChunks.length + " 个数据块..."); - - // 第一阶段:流式分析各个数据块 + + System.out.println("开始逐步提交 " + dataChunks.length + " 个数据块..."); + + final StringBuilder finalReport = new StringBuilder(); + final CountDownLatch finalLatch = new CountDownLatch(1); + + // 逐步提交数据块 for (int i = 0; i < dataChunks.length; i++) { final int chunkIndex = i; - final StringBuilder chunkResult = new StringBuilder(); - final CountDownLatch latch = new CountDownLatch(1); - - String prompt = "请对以下教育数据进行简要分析,重点关注关键指标和趋势,控制在500字以内:\n" + dataChunks[i]; - - System.out.println("\n=== 正在分析第 " + (i + 1) + " 个数据块 ==="); - + final boolean isLastChunk = (i == dataChunks.length - 1); + + String prompt; + if (isLastChunk) { + // 最后一个数据块:要求返回完整分析报告 + prompt = "这是最后一部分教育数据,请基于之前提交的所有数据生成一份完整的综合分析报告(3000字以内):\n" + dataChunks[i]; + } else { + // 中间数据块:只提交数据,不要求返回分析 + prompt = "这是第" + (i + 1) + "部分教育数据,共" + dataChunks.length + "部分,请接收并记录,暂不需要分析:\n" + dataChunks[i]; + } + + System.out.println("\n=== 提交第 " + (i + 1) + "/" + dataChunks.length + " 个数据块 ==="); + + final CountDownLatch chunkLatch = new CountDownLatch(1); + CallDeepSeek.callDeepSeekStream(prompt, new CallDeepSeek.SSEListener() { @Override public void onData(String data) { - System.out.print(data); - chunkResult.append(data); + if (isLastChunk) { + // 只有最后一个数据块才显示和保存返回内容 + System.out.print(data); + finalReport.append(data); + } else { + // 中间数据块的响应不显示(或只显示确认信息) + // System.out.print("."); // 可选:显示进度点 + } } - + @Override public void onComplete(String fullResponse) { - System.out.println("\n--- 第 " + (chunkIndex + 1) + " 个数据块分析完成 ---\n"); - chunkAnalyses.add(chunkResult.toString()); - latch.countDown(); + if (isLastChunk) { + System.out.println("\n\n=== 综合分析报告生成完成 ==="); + + // 保存报告到文件 + try { + String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); + String fileName = "教育数据综合分析报告_" + timestamp + ".txt"; + String filePath = "WebRoot/upload/" + fileName; + + FileUtil.writeString(finalReport.toString(), new File(filePath), "UTF-8"); + System.out.println("报告已保存到: " + filePath); + } catch (Exception e) { + System.err.println("保存报告时出错: " + e.getMessage()); + } + + finalLatch.countDown(); + } else { + System.out.println("第 " + (chunkIndex + 1) + " 个数据块已提交"); + } + chunkLatch.countDown(); } - + @Override public void onError(String error) { - System.err.println("分析第 " + (chunkIndex + 1) + " 个数据块时出错: " + error); - chunkAnalyses.add("分析失败: " + error); - latch.countDown(); + System.err.println("提交第 " + (chunkIndex + 1) + " 个数据块时出错: " + error); + if (isLastChunk) { + finalReport.append("生成失败: ").append(error); + finalLatch.countDown(); + } + chunkLatch.countDown(); } }); - + try { - // 等待当前块分析完成 - latch.await(); - Thread.sleep(1000); // 稍微延迟,避免API调用过于频繁 + // 等待当前块处理完成 + chunkLatch.await(); + if (!isLastChunk) { + Thread.sleep(1000); // 中间块之间稍微延迟 + } } catch (InterruptedException e) { - System.err.println("等待分析结果时被中断: " + e.getMessage()); + System.err.println("等待数据块处理时被中断: " + e.getMessage()); } } - - // 第二阶段:流式生成综合报告 - System.out.println("\n=== 开始生成综合分析报告 ==="); - - StringBuilder combinedAnalysis = new StringBuilder(); - combinedAnalysis.append("基于以下分块分析结果,请生成一份完整的教育数据综合分析报告(3000字以内):\n\n"); - - for (int i = 0; i < chunkAnalyses.size(); i++) { - combinedAnalysis.append("数据块").append(i + 1).append("分析:\n"); - combinedAnalysis.append(chunkAnalyses.get(i)).append("\n\n"); - } - - final StringBuilder finalReport = new StringBuilder(); - final CountDownLatch finalLatch = new CountDownLatch(1); - - CallDeepSeek.callDeepSeekStream(combinedAnalysis.toString(), new CallDeepSeek.SSEListener() { - @Override - public void onData(String data) { - System.out.print(data); - finalReport.append(data); - } - - @Override - public void onComplete(String fullResponse) { - System.out.println("\n\n=== 综合分析报告生成完成 ==="); - - // 保存报告到文件 - try { - String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); - String fileName = "教育数据综合分析报告_" + timestamp + ".txt"; - String filePath = "WebRoot/upload/" + fileName; - - FileUtil.writeString(finalReport.toString(), new File(filePath), "UTF-8"); - System.out.println("报告已保存到: " + filePath); - } catch (Exception e) { - System.err.println("保存报告时出错: " + e.getMessage()); - } - - finalLatch.countDown(); - } - - @Override - public void onError(String error) { - System.err.println("生成综合报告时出错: " + error); - finalReport.append("生成失败: ").append(error); - finalLatch.countDown(); - } - }); - + try { finalLatch.await(); } catch (InterruptedException e) { System.err.println("等待最终报告时被中断: " + e.getMessage()); } - + return finalReport.toString(); } - + /** * 提取数据分块逻辑为独立方法 */ private static String[] getDataChunks(String[] regions, List tableList) { List dataChunks = new ArrayList<>(); StringBuilder currentChunk = new StringBuilder(); - + String header = "数据说明: 以下是云南省教育数据的压缩格式\n" + - "格式: 表名 -> 字段列表 -> 数据行(数组格式)\n" + - "地区范围: " + String.join(",", regions) + "\n\n"; + "格式: 表名 -> 字段列表 -> 数据行(数组格式)\n" + + "地区范围: " + String.join(",", regions) + "\n\n"; currentChunk.append(header); - + // 遍历所有相关数据表 for (Record record : tableList) { String tableName = record.getStr("TABLE_NAME"); - + // 为当前表收集所有数据 List allTableData = new ArrayList<>(); Set fieldNames = new LinkedHashSet<>(); - + // 为每个地区收集数据 for (String region : regions) { String sql = "select * from `" + tableName + "` where `行政区划`=?"; List listContent = Db.use(DataEaseModel.DB_NAME).find(sql, region); - + if (!listContent.isEmpty()) { allTableData.addAll(listContent); // 收集字段名(使用第一条记录的字段结构) @@ -218,18 +215,18 @@ public class TestMax32K { } } } - + if (!allTableData.isEmpty()) { // 构建当前表的完整数据块 StringBuilder tableData = new StringBuilder(); tableData.append("\n表: ").append(tableName).append("\n"); tableData.append("字段: ").append(String.join(",", fieldNames)).append("\n"); - + // 输出压缩格式的数据 for (Record dataRecord : allTableData) { Map columns = dataRecord.getColumns(); tableData.append("["); - + boolean first = true; for (String fieldName : fieldNames) { if (!first) tableData.append(","); @@ -241,10 +238,10 @@ public class TestMax32K { } first = false; } - + tableData.append("]\n"); } - + // 检查是否需要分块 String tableDataStr = tableData.toString(); if (currentChunk.length() + tableDataStr.length() > MAX_CHUNK_SIZE) { @@ -254,10 +251,10 @@ public class TestMax32K { currentChunk = new StringBuilder(); currentChunk.append(header); } - + // 如果单个表数据超过限制,需要进一步分割 if (tableDataStr.length() > MAX_CHUNK_SIZE - header.length()) { - List tableChunks = splitLargeTable(fieldNames, allTableData, MAX_CHUNK_SIZE - header.length()); + List tableChunks = splitLargeTable(tableName, fieldNames, allTableData, MAX_CHUNK_SIZE - header.length()); for (int i = 0; i < tableChunks.size(); i++) { StringBuilder chunkBuilder = new StringBuilder(); chunkBuilder.append(header); @@ -274,12 +271,12 @@ public class TestMax32K { } } } - + // 添加最后一个块 if (currentChunk.length() > header.length()) { dataChunks.add(currentChunk.toString()); } - + return dataChunks.toArray(new String[0]); } }