From 28b8b8abbebdefdaf8bfbc56d2004b46b1ca1537 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Mon, 16 Jun 2025 11:52:35 +0800 Subject: [PATCH] 'commit' --- .../AI32K/Controller/AiController32K.java | 6 +- .../base/AI32K/Model/YunNanModel32K.java | 625 +++++++++++- .../com/dsideal/base/AI32K/TestMax32K.java | 936 ------------------ 3 files changed, 600 insertions(+), 967 deletions(-) delete mode 100644 src/main/java/com/dsideal/base/AI32K/TestMax32K.java diff --git a/src/main/java/com/dsideal/base/AI32K/Controller/AiController32K.java b/src/main/java/com/dsideal/base/AI32K/Controller/AiController32K.java index a827f7ba..72bec64f 100644 --- a/src/main/java/com/dsideal/base/AI32K/Controller/AiController32K.java +++ b/src/main/java/com/dsideal/base/AI32K/Controller/AiController32K.java @@ -35,10 +35,12 @@ public class AiController32K extends Controller { */ @Before({GET.class}) public void compareShiZhouWord(String shiZhouA, String shiZhouB) throws Exception { - // 数据获取 - String content = ym.collectEducationData(new String[]{shiZhouA, shiZhouB}); + //创建sse final SseEmitter sseEmitter = new SseEmitter(getResponse()); + // 数据获取 + ym.submitAllDataChunks(); + // 发送开始分析的消息 sseEmitter.sendMessage("data: 开始数据分析...\n\n"); diff --git a/src/main/java/com/dsideal/base/AI32K/Model/YunNanModel32K.java b/src/main/java/com/dsideal/base/AI32K/Model/YunNanModel32K.java index 57ede09d..36f2f45d 100644 --- a/src/main/java/com/dsideal/base/AI32K/Model/YunNanModel32K.java +++ b/src/main/java/com/dsideal/base/AI32K/Model/YunNanModel32K.java @@ -1,64 +1,631 @@ package com.dsideal.base.AI32K.Model; import cn.hutool.json.JSONUtil; +import com.dsideal.base.AI.Generator.WordGenerator; import com.dsideal.base.DataEase.Model.DataEaseModel; +import com.dsideal.base.Util.CallDeepSeek; +import com.dsideal.base.Util.LocalMysqlConnectUtil; import com.jfinal.plugin.activerecord.Db; import com.jfinal.plugin.activerecord.Record; -import java.util.List; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.text.SimpleDateFormat; +import java.util.*; +import java.util.concurrent.CountDownLatch; public class YunNanModel32K { + /** - * 收集指定地区的教育资源配置数据 + * 获取云南省下所有城市名称 * - * @param regions 要对比的地区数组 - * @return 格式化的数据内容 + * @return + */ + public List getYunNanCity() { + String sql = "select id,area_code,area_name,full_name from t_dm_area where parent_id='FD61813E-70A1-42AB-9A8E-141ED4D47B98'"; + return Db.find(sql); + } + + private static final int MAX_CHUNK_SIZE = 30000; // 调整为30K字符,充分利用32K输入限制 + private static boolean dataSubmitted = false; // 标记数据是否已提交 + private static final String[] regions = {"楚雄州", "文山州"}; // 或者根据需要设置具体的地区 + + public static void main(String[] args) { + Scanner scanner = new Scanner(System.in); + LocalMysqlConnectUtil.Init(); + + try { + // 第一步:提交所有数据块 + System.out.println("开始提交数据到大模型..."); + //submitAllDataChunks(); + dataSubmitted = true; + System.out.println("\n数据提交完成!"); + + // 第二步:询问用户想生成哪种格式 + while (true) { + System.out.println("\n=== 报告生成选项 ==="); + System.out.println("现在可以基于上传的数据生成以下格式的报告:"); + System.out.println("1. WORD文档 (.docx)"); + System.out.println("2. 退出程序"); + System.out.print("\n请选择您想生成的报告格式 (1-4): "); + + String choice = scanner.nextLine().trim(); + + switch (choice) { + case "1": + generateWordReport(); + break; + case "2": + System.out.println("程序退出。"); + return; + default: + System.out.println("无效选择,请输入 1-2之间的数字。"); + } + } + + } catch (Exception e) { + System.err.println("程序执行出错: " + e.getMessage()); + e.printStackTrace(); + } finally { + scanner.close(); + } + } + + /** + * 提交所有数据块到大模型 */ - public String collectEducationData(String[] regions) { - // 查询教育资源配置发展预测相关表【低于32K】 - String sql = "SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = 'dataease' AND TABLE_NAME LIKE 'excel_报告-教育资源配置发展预测%';"; + public void submitAllDataChunks() throws Exception { + if (dataSubmitted) { + System.out.println("数据已经提交过了,无需重复提交。"); + return; + } - // 超过32K - //String sql="select table_name as TABLE_NAME from core_dataset_table where dataset_group_id in (select id from core_dataset_group where pid='1036317909951057920')"; + // 添加获取参数的代码 + String sql = "select table_name as TABLE_NAME from core_dataset_table where dataset_group_id in (select id from core_dataset_group where pid='1036317909951057920')"; List tableList = Db.use(DataEaseModel.DB_NAME).find(sql); - StringBuilder dataContent = new StringBuilder(); + // 修改这一行,传递正确的参数 + String[] dataChunks = getDataChunks(regions, tableList); + System.out.println("总共需要提交 " + dataChunks.length + " 个数据块"); + + // 添加调试功能:保存所有数据块到文件 + saveDataChunksToFile(dataChunks); + + CountDownLatch latch = new CountDownLatch(dataChunks.length); + + for (int i = 0; i < dataChunks.length; i++) { + final int chunkIndex = i; + final boolean isLastChunk = (i == dataChunks.length - 1); + + String prompt; + if (isLastChunk) { + prompt = "这是最后一个数据块(第" + (chunkIndex + 1) + "/" + dataChunks.length + "个)。\n" + + "请确认已接收所有数据块,现在数据提交完成。请回复'数据接收完成'以确认。\n\n" + + "数据内容:\n" + dataChunks[chunkIndex]; + } else { + prompt = "这是第" + (chunkIndex + 1) + "/" + dataChunks.length + "个数据块,后续还有更多数据。\n" + + "请接收此数据块,无需分析,等待所有数据提交完成。\n\n" + + "数据内容:\n" + dataChunks[chunkIndex]; + } + + // 保存每个提示词到单独文件(可选) + savePromptToFile(prompt, chunkIndex + 1); + + CallDeepSeek.callDeepSeekStream(prompt, new CallDeepSeek.SSEListener() { + @Override + public void onData(String data) { + if (isLastChunk) { + System.out.println("\n大模型确认: " + data); + } + } + + @Override + public void onComplete(String fullResponse) { + System.out.println("数据块 " + (chunkIndex + 1) + "/" + dataChunks.length + " 提交完成"); + latch.countDown(); + } + + @Override + public void onError(String error) { + System.err.println("数据块 " + (chunkIndex + 1) + " 提交失败: " + error); + latch.countDown(); + } + }); + + // 避免请求过于频繁 + Thread.sleep(1000); + } + + latch.await(); + } + + /** + * 保存所有数据块到文件进行调试 + */ + private static void saveDataChunksToFile(String[] dataChunks) { + try { + String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); + + // 保存所有数据块到一个文件 + StringBuilder allData = new StringBuilder(); + allData.append("=== 提交给大模型的所有数据块 ===").append("\n"); + allData.append("生成时间: ").append(timestamp).append("\n"); + allData.append("总数据块数: ").append(dataChunks.length).append("\n"); + allData.append("地区: ").append(String.join(", ", regions)).append("\n"); + allData.append("\n"); + + for (int i = 0; i < dataChunks.length; i++) { + allData.append("\n").append("=".repeat(80)).append("\n"); + allData.append("数据块 ").append(i + 1).append("/").append(dataChunks.length).append("\n"); + allData.append("数据块大小: ").append(dataChunks[i].length()).append(" 字符\n"); + allData.append("=".repeat(80)).append("\n"); + allData.append(dataChunks[i]); + allData.append("\n"); + } + + String debugFileName = "debug_data_chunks_" + timestamp + ".txt"; + String debugFilePath = "Log/" + debugFileName; + Files.write(Paths.get(debugFilePath), allData.toString().getBytes("UTF-8")); + + System.out.println("调试文件已保存: " + debugFilePath); + System.out.println("文件包含 " + dataChunks.length + " 个数据块,总大小: " + allData.length() + " 字符"); + + // 另外保存每个数据块到单独文件 + for (int i = 0; i < dataChunks.length; i++) { + String chunkFileName = "debug_chunk_" + (i + 1) + "_" + timestamp + ".txt"; + String chunkFilePath = "Log/" + chunkFileName; + Files.write(Paths.get(chunkFilePath), dataChunks[i].getBytes("UTF-8")); + } + + System.out.println("每个数据块也已保存为单独文件: Log/debug_chunk_*_" + timestamp + ".txt"); + + } catch (Exception e) { + System.err.println("保存调试文件失败: " + e.getMessage()); + e.printStackTrace(); + } + } + + /** + * 保存提示词到文件(可选) + */ + private static void savePromptToFile(String prompt, int chunkIndex) { + try { + String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); + String promptFileName = "debug_prompt_" + chunkIndex + "_" + timestamp + ".txt"; + String promptFilePath = "Log/" + promptFileName; + Files.write(Paths.get(promptFilePath), prompt.getBytes("UTF-8")); + } catch (Exception e) { + System.err.println("保存提示词文件失败: " + e.getMessage()); + } + } + + /** + * 生成Word报告(分批次生成) + */ + private static void generateWordReport() { + if (!dataSubmitted) { + System.out.println("请先提交数据!"); + return; + } + + System.out.println("\n开始分批次生成Word报告..."); + + try { + // 动态生成地区名称字符串 + String regionNames = String.join("与", regions); + String reportTitle = regionNames + "教育资源配置对比分析报告"; + + // 定义报告的4个部分 + String[] reportSections = { + "摘要和数据概览", + generateRegionAnalysisSections(regions), + "对比分析", + "结论与建议" + }; + + StringBuilder fullReport = new StringBuilder(); + fullReport.append("# ").append(reportTitle).append("\n\n"); + + // 分4次生成报告内容 + for (int i = 0; i < 4; i++) { + System.out.println("正在生成第" + (i + 1) + "/4部分: " + getSectionTitle(i)); + String sectionContent = generateReportSection(i, regionNames); + if (sectionContent != null && !sectionContent.trim().isEmpty()) { + fullReport.append(sectionContent).append("\n\n"); + } + + // 添加延迟避免API限制 + if (i < 3) { + Thread.sleep(2000); + } + } + + // 保存完整报告 + saveCompleteReport(fullReport.toString(), reportTitle); + + } catch (Exception e) { + System.err.println("生成Word报告时出错: " + e.getMessage()); + } + } + + /** + * 构建各部分的提示词(优化版) + */ + private static String buildSectionPrompt(int sectionIndex, String regionNames) { + String basePrompt = "基于之前提交的" + regionNames + "的完整教育数据,"; + + switch (sectionIndex) { + case 0: // 摘要和数据概览 + return basePrompt + "请生成报告的摘要和数据概览部分。\n\n" + + "**严格格式要求:**\n" + + "1. 直接从## 摘要开始,绝对不要重复报告主标题\n" + + "2. 摘要内容2-3个段落,每段之间空一行\n" + + "3. 使用## 数据概览作为标题\n" + + "4. 数据概览用简洁的段落描述,不使用表格\n" + + "5. 段落间必须有空行\n" + + "6. 不要使用复杂的格式符号\n" + + "7. 控制在2500字符以内\n" + + "8. 必须基于实际数据,不得编造\n" + + "9. 【重要】报告主标题已存在,请勿重复添加任何形式的主标题\n" + + "10. 【示例格式】直接以下面格式开始:\n" + + "## 摘要\n" + + "根据楚雄州与文山州的教育数据分析...\n\n" + + "## 数据概览\n" + + "本次分析涵盖了..."; + + case 1: // 各地区分析 + return basePrompt + "请生成各地区的详细分析部分。\n\n" + + "**严格格式要求:**\n" + + "1. 为每个地区使用## [地区名]教育资源分析作为标题\n" + + "2. 每个地区分析包含:\n" + + " - 学前教育情况(一个段落)\n" + + " - 义务教育情况(一个段落)\n" + + " - 高中教育情况(一个段落)\n" + + "3. 段落间用空行分隔\n" + + "4. 使用简单的Markdown表格展示关键数据(最多2个表格)\n" + + "5. 表格格式:| 指标 | 数值 | 说明 |\n" + + "6. 控制在3500字符以内"; + + case 2: // 对比分析 + return basePrompt + "请生成地区间的对比分析部分。\n\n" + + "**严格格式要求:**\n" + + "1. 使用## 对比分析作为标题\n" + + "2. 分为以下子部分:\n" + + " ### 教育资源配置对比\n" + + " ### 教育质量差异分析\n" + + " ### 发展趋势对比\n" + + "3. 每个子部分2-3个段落\n" + + "4. 段落间用空行分隔\n" + + "5. 可使用一个对比表格,格式简洁\n" + + "6. 控制在3000字符以内"; + + case 3: // 结论与建议 + return basePrompt + "请生成结论与建议部分。\n\n" + + "**严格格式要求:**\n" + + "1. 使用## 结论与建议作为主标题\n" + + "2. 分为两个子部分:\n" + + " ### 主要结论\n" + + " ### 政策建议\n" + + "3. 主要结论部分:\n" + + " - 使用1. 2. 3. 4.编号列表\n" + + " - 每条结论一个段落\n" + + " - 结论间用空行分隔\n" + + "4. 政策建议部分:\n" + + " - 使用1. 2. 3. 4. 5.编号列表\n" + + " - 每条建议包含具体措施\n" + + " - 建议间用空行分隔\n" + + "5. 不要使用过多的子标题\n" + + "6. 控制在2800字符以内\n" + + "7. 必须基于前面的数据分析"; + + default: + return basePrompt + "请生成报告内容。"; + } + } + + /** + * 格式验证和修正方法 + */ + private static String validateAndFixFormat(String content) { + if (content == null || content.trim().isEmpty()) { + return content; + } + + StringBuilder fixed = new StringBuilder(); + String[] lines = content.split("\n"); + + for (int i = 0; i < lines.length; i++) { + String line = lines[i]; + + // 修正标题格式 + if (line.trim().startsWith("#")) { + // 确保标题前后有空行 + if (i > 0 && !lines[i - 1].trim().isEmpty()) { + fixed.append("\n"); + } + fixed.append(line).append("\n\n"); + } + // 修正列表格式 + else if (line.trim().matches("^[0-9]+\\.") || line.trim().startsWith("-")) { + fixed.append(line).append("\n\n"); + } + // 普通段落 + else if (!line.trim().isEmpty()) { + fixed.append(line).append("\n\n"); + } + // 保留空行但不重复 + else if (line.trim().isEmpty() && i > 0 && !lines[i - 1].trim().isEmpty()) { + // 空行已经在上面添加了,这里跳过 + } + } + + return fixed.toString().trim(); + } + + /** + * 修改后的生成报告的单个部分方法 + */ + private static String generateReportSection(int sectionIndex, String regionNames) { + try { + String prompt = buildSectionPrompt(sectionIndex, regionNames); + + StringBuilder sectionContent = new StringBuilder(); + CountDownLatch latch = new CountDownLatch(1); + + CallDeepSeek.callDeepSeekStream(prompt, new CallDeepSeek.SSEListener() { + @Override + public void onData(String data) { + sectionContent.append(data); + System.out.print("."); + } + + @Override + public void onComplete(String fullResponse) { + System.out.println(" 完成!"); + latch.countDown(); + } + + @Override + public void onError(String error) { + System.err.println("\n生成第" + (sectionIndex + 1) + "部分失败: " + error); + latch.countDown(); + } + }); + + latch.await(); + + // 格式验证和修正 + String rawContent = sectionContent.toString(); + String fixedContent = validateAndFixFormat(rawContent); + + // 保存原始和修正后的内容用于调试 + saveDebugContent(rawContent, fixedContent, sectionIndex); + + return fixedContent; + + } catch (Exception e) { + System.err.println("生成第" + (sectionIndex + 1) + "部分时出错: " + e.getMessage()); + return ""; + } + } + + /** + * 保存调试内容 + */ + private static void saveDebugContent(String rawContent, String fixedContent, int sectionIndex) { + try { + String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); + + // 保存原始内容 + String rawFileName = "debug_section_" + (sectionIndex + 1) + "_raw_" + timestamp + ".txt"; + Files.write(Paths.get("Doc/" + rawFileName), rawContent.getBytes("UTF-8")); + + // 保存修正后内容 + String fixedFileName = "debug_section_" + (sectionIndex + 1) + "_fixed_" + timestamp + ".txt"; + Files.write(Paths.get("Doc/" + fixedFileName), fixedContent.getBytes("UTF-8")); + + System.out.println("第" + (sectionIndex + 1) + "部分调试文件已保存"); + + } catch (Exception e) { + System.err.println("保存调试文件失败: " + e.getMessage()); + } + } + + /** + * 获取部分标题 + */ + private static String getSectionTitle(int index) { + switch (index) { + case 0: + return "摘要和数据概览"; + case 1: + return "各地区详细分析"; + case 2: + return "对比分析"; + case 3: + return "结论与建议"; + default: + return "未知部分"; + } + } + + /** + * 生成地区分析部分的信息 + */ + private static String generateRegionAnalysisSections(String[] regions) { + return "各地区分析(" + String.join("、", regions) + ")"; + } + + /** + * 保存完整报告 + */ + private static void saveCompleteReport(String fullContent, String reportTitle) { + try { + String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); + + // 保存为文本文件用于调试 + String txtFileName = "complete_report_" + timestamp + ".txt"; + String txtFilePath = "Log/" + txtFileName; + Files.write(Paths.get(txtFilePath), fullContent.getBytes("UTF-8")); + System.out.println("完整报告文本已保存: " + txtFilePath); + + // 生成Word文档 + String docxFileName = "analysis_report_" + timestamp + ".docx"; + WordGenerator.generateWordDocument(fullContent, docxFileName, regions); + System.out.println("Word报告已保存至: WebRoot/upload/" + docxFileName); + + System.out.println("\n=== 报告生成完成 ==="); + System.out.println("总字符数: " + fullContent.length()); + System.out.println("报告标题: " + reportTitle); - // 构建数据标题 - dataContent.append("教育资源配置发展预测数据对比分析\n\n"); - dataContent.append("对比州市:").append(String.join(" vs ", regions)).append("\n\n"); + } catch (Exception e) { + System.err.println("保存完整报告失败: " + e.getMessage()); + } + } + + + /** + * 分割过大的单表数据 + */ + private static List splitLargeTable(Set fieldNames, + List allTableData, int maxSize) { + List chunks = new ArrayList<>(); + StringBuilder currentTableChunk = new StringBuilder(); + + for (Record dataRecord : allTableData) { + Map columns = dataRecord.getColumns(); + StringBuilder rowData = new StringBuilder(); + rowData.append("["); + + boolean first = true; + for (String fieldName : fieldNames) { + if (!first) rowData.append(","); + Object value = columns.get(fieldName); + if (value instanceof String) { + rowData.append("\"").append(value).append("\""); + } else { + rowData.append(value); + } + first = false; + } + rowData.append("]\n"); + + // 检查是否超过限制 + if (currentTableChunk.length() + rowData.length() > maxSize) { + if (!currentTableChunk.isEmpty()) { + chunks.add(currentTableChunk.toString()); + currentTableChunk = new StringBuilder(); + } + } + currentTableChunk.append(rowData); + } + + if (!currentTableChunk.isEmpty()) { + chunks.add(currentTableChunk.toString()); + } + + return chunks; + } + + /** + * 提取数据分块逻辑为独立方法 + */ + private static String[] getDataChunks(String[] regions, List tableList) { + List dataChunks = new ArrayList<>(); + StringBuilder currentChunk = new StringBuilder(); + + String header = "数据说明: 以下是云南省教育数据的压缩格式\n" + + "格式: 表名 -> 字段列表 -> 数据行(数组格式)\n" + + "地区范围: " + String.join(",", regions) + "\n\n"; + currentChunk.append(header); // 遍历所有相关数据表 for (Record record : tableList) { String tableName = record.getStr("TABLE_NAME"); - dataContent.append("数据表:").append(tableName).append("\n"); + + // 为当前表收集所有数据 + List allTableData = new ArrayList<>(); + Set fieldNames = new LinkedHashSet<>(); // 为每个地区收集数据 for (String region : regions) { - sql = "select * from `" + tableName + "` where `行政区划`=?"; + String sql = "select * from `" + tableName + "` where `行政区划`=?"; List listContent = Db.use(DataEaseModel.DB_NAME).find(sql, region); if (!listContent.isEmpty()) { - dataContent.append("\n").append(region).append("数据:\n"); - for (Record dataRecord : listContent) { - dataContent.append(JSONUtil.toJsonPrettyStr(dataRecord.getColumns())).append("\n"); + allTableData.addAll(listContent); + // 收集字段名(使用第一条记录的字段结构) + if (fieldNames.isEmpty()) { + fieldNames.addAll(listContent.get(0).getColumns().keySet()); + } + } + } + + if (!allTableData.isEmpty()) { + // 构建当前表的完整数据块 + StringBuilder tableData = new StringBuilder(); + tableData.append("\n表: ").append(tableName).append("\n"); + tableData.append("字段: ").append(String.join(",", fieldNames)).append("\n"); + + // 输出压缩格式的数据 + for (Record dataRecord : allTableData) { + Map columns = dataRecord.getColumns(); + tableData.append("["); + + boolean first = true; + for (String fieldName : fieldNames) { + if (!first) tableData.append(","); + Object value = columns.get(fieldName); + if (value instanceof String) { + tableData.append("\"").append(value).append("\""); + } else { + tableData.append(value); + } + first = false; + } + + tableData.append("]\n"); + } + + // 检查是否需要分块 + String tableDataStr = tableData.toString(); + if (currentChunk.length() + tableDataStr.length() > MAX_CHUNK_SIZE) { + // 当前块已满,保存并开始新块 + if (currentChunk.length() > header.length()) { + dataChunks.add(currentChunk.toString()); + currentChunk = new StringBuilder(); + currentChunk.append(header); + } + + // 如果单个表数据超过限制,需要进一步分割 + if (tableDataStr.length() > MAX_CHUNK_SIZE - header.length()) { + List tableChunks = splitLargeTable(fieldNames, allTableData, MAX_CHUNK_SIZE - header.length()); + for (int i = 0; i < tableChunks.size(); i++) { + StringBuilder chunkBuilder = new StringBuilder(); + chunkBuilder.append(header); + chunkBuilder.append("\n[续] 表: ").append(tableName).append(" (第").append(i + 1).append("部分)\n"); + chunkBuilder.append("字段: ").append(String.join(",", fieldNames)).append("\n"); + chunkBuilder.append(tableChunks.get(i)); + dataChunks.add(chunkBuilder.toString()); + } + } else { + currentChunk.append(tableDataStr); } } else { - dataContent.append("\n").append(region).append(":无相关数据\n"); + currentChunk.append(tableDataStr); } } - dataContent.append("\n----------------------------------------\n\n"); } - return dataContent.toString(); - } + // 添加最后一个块 + if (currentChunk.length() > header.length()) { + dataChunks.add(currentChunk.toString()); + } + + return dataChunks.toArray(new String[0]); + } /** - * 获取云南省下所有城市名称 - * - * @return + * 保存完整的HTML文件 + * @param htmlContent HTML内容 */ - public List getYunNanCity() { - String sql = "select id,area_code,area_name,full_name from t_dm_area where parent_id='FD61813E-70A1-42AB-9A8E-141ED4D47B98'"; - return Db.find(sql); - } } diff --git a/src/main/java/com/dsideal/base/AI32K/TestMax32K.java b/src/main/java/com/dsideal/base/AI32K/TestMax32K.java deleted file mode 100644 index 4979a88a..00000000 --- a/src/main/java/com/dsideal/base/AI32K/TestMax32K.java +++ /dev/null @@ -1,936 +0,0 @@ -package com.dsideal.base.AI32K; - -import com.dsideal.base.AI.Generator.PptGenerator; -import com.dsideal.base.DataEase.Model.DataEaseModel; -import com.dsideal.base.Util.LocalMysqlConnectUtil; -import com.dsideal.base.Util.PptAIKit; -import com.jfinal.plugin.activerecord.Db; -import com.jfinal.plugin.activerecord.Record; -import com.dsideal.base.Util.CallDeepSeek; - -import java.io.FileWriter; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.LinkedHashSet; -import java.util.ArrayList; -import java.util.concurrent.CountDownLatch; -import java.text.SimpleDateFormat; -import java.util.Date; - -import java.util.Scanner; - -import com.dsideal.base.AI.Generator.WordGenerator; - -public class TestMax32K { - private static final int MAX_CHUNK_SIZE = 30000; // 调整为30K字符,充分利用32K输入限制 - private static boolean dataSubmitted = false; // 标记数据是否已提交 - private static final String[] regions = {"楚雄州", "文山州"}; // 或者根据需要设置具体的地区 - - public static void main(String[] args) { - Scanner scanner = new Scanner(System.in); - LocalMysqlConnectUtil.Init(); - - try { - // 第一步:提交所有数据块 - System.out.println("开始提交数据到大模型..."); - submitAllDataChunks(); - dataSubmitted = true; - System.out.println("\n数据提交完成!"); - - // 第二步:询问用户想生成哪种格式 - while (true) { - System.out.println("\n=== 报告生成选项 ==="); - System.out.println("现在可以基于上传的数据生成以下格式的报告:"); - System.out.println("1. WORD文档 (.docx)"); - System.out.println("2. HTML网页 (.html)"); - System.out.println("3. PPT演示文稿 (.pptx)"); - System.out.println("4. 退出程序"); - System.out.print("\n请选择您想生成的报告格式 (1-4): "); - - String choice = scanner.nextLine().trim(); - - switch (choice) { - case "1": - generateWordReport(); - break; - case "2": - generateHtmlReport(); - break; - case "3": - generatePptReport(); - break; - case "4": - System.out.println("程序退出。"); - return; - default: - System.out.println("无效选择,请输入 1-4 之间的数字。"); - } - } - - } catch (Exception e) { - System.err.println("程序执行出错: " + e.getMessage()); - e.printStackTrace(); - } finally { - scanner.close(); - } - } - - /** - * 提交所有数据块到大模型 - */ - private static void submitAllDataChunks() throws Exception { - if (dataSubmitted) { - System.out.println("数据已经提交过了,无需重复提交。"); - return; - } - - // 添加获取参数的代码 - String sql = "select table_name as TABLE_NAME from core_dataset_table where dataset_group_id in (select id from core_dataset_group where pid='1036317909951057920')"; - List tableList = Db.use(DataEaseModel.DB_NAME).find(sql); - - // 修改这一行,传递正确的参数 - String[] dataChunks = getDataChunks(regions, tableList); - System.out.println("总共需要提交 " + dataChunks.length + " 个数据块"); - - // 添加调试功能:保存所有数据块到文件 - saveDataChunksToFile(dataChunks); - - CountDownLatch latch = new CountDownLatch(dataChunks.length); - - for (int i = 0; i < dataChunks.length; i++) { - final int chunkIndex = i; - final boolean isLastChunk = (i == dataChunks.length - 1); - - String prompt; - if (isLastChunk) { - prompt = "这是最后一个数据块(第" + (chunkIndex + 1) + "/" + dataChunks.length + "个)。\n" + - "请确认已接收所有数据块,现在数据提交完成。请回复'数据接收完成'以确认。\n\n" + - "数据内容:\n" + dataChunks[chunkIndex]; - } else { - prompt = "这是第" + (chunkIndex + 1) + "/" + dataChunks.length + "个数据块,后续还有更多数据。\n" + - "请接收此数据块,无需分析,等待所有数据提交完成。\n\n" + - "数据内容:\n" + dataChunks[chunkIndex]; - } - - // 保存每个提示词到单独文件(可选) - savePromptToFile(prompt, chunkIndex + 1); - - CallDeepSeek.callDeepSeekStream(prompt, new CallDeepSeek.SSEListener() { - @Override - public void onData(String data) { - if (isLastChunk) { - System.out.println("\n大模型确认: " + data); - } - } - - @Override - public void onComplete(String fullResponse) { - System.out.println("数据块 " + (chunkIndex + 1) + "/" + dataChunks.length + " 提交完成"); - latch.countDown(); - } - - @Override - public void onError(String error) { - System.err.println("数据块 " + (chunkIndex + 1) + " 提交失败: " + error); - latch.countDown(); - } - }); - - // 避免请求过于频繁 - Thread.sleep(1000); - } - - latch.await(); - } - - /** - * 保存所有数据块到文件进行调试 - */ - private static void saveDataChunksToFile(String[] dataChunks) { - try { - String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); - - // 保存所有数据块到一个文件 - StringBuilder allData = new StringBuilder(); - allData.append("=== 提交给大模型的所有数据块 ===").append("\n"); - allData.append("生成时间: ").append(timestamp).append("\n"); - allData.append("总数据块数: ").append(dataChunks.length).append("\n"); - allData.append("地区: ").append(String.join(", ", regions)).append("\n"); - allData.append("\n"); - - for (int i = 0; i < dataChunks.length; i++) { - allData.append("\n").append("=".repeat(80)).append("\n"); - allData.append("数据块 ").append(i + 1).append("/").append(dataChunks.length).append("\n"); - allData.append("数据块大小: ").append(dataChunks[i].length()).append(" 字符\n"); - allData.append("=".repeat(80)).append("\n"); - allData.append(dataChunks[i]); - allData.append("\n"); - } - - String debugFileName = "debug_data_chunks_" + timestamp + ".txt"; - String debugFilePath = "Log/" + debugFileName; - Files.write(Paths.get(debugFilePath), allData.toString().getBytes("UTF-8")); - - System.out.println("调试文件已保存: " + debugFilePath); - System.out.println("文件包含 " + dataChunks.length + " 个数据块,总大小: " + allData.length() + " 字符"); - - // 另外保存每个数据块到单独文件 - for (int i = 0; i < dataChunks.length; i++) { - String chunkFileName = "debug_chunk_" + (i + 1) + "_" + timestamp + ".txt"; - String chunkFilePath = "Log/" + chunkFileName; - Files.write(Paths.get(chunkFilePath), dataChunks[i].getBytes("UTF-8")); - } - - System.out.println("每个数据块也已保存为单独文件: Log/debug_chunk_*_" + timestamp + ".txt"); - - } catch (Exception e) { - System.err.println("保存调试文件失败: " + e.getMessage()); - e.printStackTrace(); - } - } - - /** - * 保存提示词到文件(可选) - */ - private static void savePromptToFile(String prompt, int chunkIndex) { - try { - String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); - String promptFileName = "debug_prompt_" + chunkIndex + "_" + timestamp + ".txt"; - String promptFilePath = "Log/" + promptFileName; - Files.write(Paths.get(promptFilePath), prompt.getBytes("UTF-8")); - } catch (Exception e) { - System.err.println("保存提示词文件失败: " + e.getMessage()); - } - } - - /** - * 生成Word报告(分批次生成) - */ - private static void generateWordReport() { - if (!dataSubmitted) { - System.out.println("请先提交数据!"); - return; - } - - System.out.println("\n开始分批次生成Word报告..."); - - try { - // 动态生成地区名称字符串 - String regionNames = String.join("与", regions); - String reportTitle = regionNames + "教育资源配置对比分析报告"; - - // 定义报告的4个部分 - String[] reportSections = { - "摘要和数据概览", - generateRegionAnalysisSections(regions), - "对比分析", - "结论与建议" - }; - - StringBuilder fullReport = new StringBuilder(); - fullReport.append("# ").append(reportTitle).append("\n\n"); - - // 分4次生成报告内容 - for (int i = 0; i < 4; i++) { - System.out.println("正在生成第" + (i + 1) + "/4部分: " + getSectionTitle(i)); - String sectionContent = generateReportSection(i, regionNames); - if (sectionContent != null && !sectionContent.trim().isEmpty()) { - fullReport.append(sectionContent).append("\n\n"); - } - - // 添加延迟避免API限制 - if (i < 3) { - Thread.sleep(2000); - } - } - - // 保存完整报告 - saveCompleteReport(fullReport.toString(), reportTitle); - - } catch (Exception e) { - System.err.println("生成Word报告时出错: " + e.getMessage()); - } - } - - /** - * 构建各部分的提示词(优化版) - */ - private static String buildSectionPrompt(int sectionIndex, String regionNames) { - String basePrompt = "基于之前提交的" + regionNames + "的完整教育数据,"; - - switch (sectionIndex) { - case 0: // 摘要和数据概览 - return basePrompt + "请生成报告的摘要和数据概览部分。\n\n" + - "**严格格式要求:**\n" + - "1. 直接从## 摘要开始,绝对不要重复报告主标题\n" + - "2. 摘要内容2-3个段落,每段之间空一行\n" + - "3. 使用## 数据概览作为标题\n" + - "4. 数据概览用简洁的段落描述,不使用表格\n" + - "5. 段落间必须有空行\n" + - "6. 不要使用复杂的格式符号\n" + - "7. 控制在2500字符以内\n" + - "8. 必须基于实际数据,不得编造\n" + - "9. 【重要】报告主标题已存在,请勿重复添加任何形式的主标题\n" + - "10. 【示例格式】直接以下面格式开始:\n" + - "## 摘要\n" + - "根据楚雄州与文山州的教育数据分析...\n\n" + - "## 数据概览\n" + - "本次分析涵盖了..."; - - case 1: // 各地区分析 - return basePrompt + "请生成各地区的详细分析部分。\n\n" + - "**严格格式要求:**\n" + - "1. 为每个地区使用## [地区名]教育资源分析作为标题\n" + - "2. 每个地区分析包含:\n" + - " - 学前教育情况(一个段落)\n" + - " - 义务教育情况(一个段落)\n" + - " - 高中教育情况(一个段落)\n" + - "3. 段落间用空行分隔\n" + - "4. 使用简单的Markdown表格展示关键数据(最多2个表格)\n" + - "5. 表格格式:| 指标 | 数值 | 说明 |\n" + - "6. 控制在3500字符以内"; - - case 2: // 对比分析 - return basePrompt + "请生成地区间的对比分析部分。\n\n" + - "**严格格式要求:**\n" + - "1. 使用## 对比分析作为标题\n" + - "2. 分为以下子部分:\n" + - " ### 教育资源配置对比\n" + - " ### 教育质量差异分析\n" + - " ### 发展趋势对比\n" + - "3. 每个子部分2-3个段落\n" + - "4. 段落间用空行分隔\n" + - "5. 可使用一个对比表格,格式简洁\n" + - "6. 控制在3000字符以内"; - - case 3: // 结论与建议 - return basePrompt + "请生成结论与建议部分。\n\n" + - "**严格格式要求:**\n" + - "1. 使用## 结论与建议作为主标题\n" + - "2. 分为两个子部分:\n" + - " ### 主要结论\n" + - " ### 政策建议\n" + - "3. 主要结论部分:\n" + - " - 使用1. 2. 3. 4.编号列表\n" + - " - 每条结论一个段落\n" + - " - 结论间用空行分隔\n" + - "4. 政策建议部分:\n" + - " - 使用1. 2. 3. 4. 5.编号列表\n" + - " - 每条建议包含具体措施\n" + - " - 建议间用空行分隔\n" + - "5. 不要使用过多的子标题\n" + - "6. 控制在2800字符以内\n" + - "7. 必须基于前面的数据分析"; - - default: - return basePrompt + "请生成报告内容。"; - } - } - - /** - * 格式验证和修正方法 - */ - private static String validateAndFixFormat(String content) { - if (content == null || content.trim().isEmpty()) { - return content; - } - - StringBuilder fixed = new StringBuilder(); - String[] lines = content.split("\n"); - - for (int i = 0; i < lines.length; i++) { - String line = lines[i]; - - // 修正标题格式 - if (line.trim().startsWith("#")) { - // 确保标题前后有空行 - if (i > 0 && !lines[i - 1].trim().isEmpty()) { - fixed.append("\n"); - } - fixed.append(line).append("\n\n"); - } - // 修正列表格式 - else if (line.trim().matches("^[0-9]+\\.") || line.trim().startsWith("-")) { - fixed.append(line).append("\n\n"); - } - // 普通段落 - else if (!line.trim().isEmpty()) { - fixed.append(line).append("\n\n"); - } - // 保留空行但不重复 - else if (line.trim().isEmpty() && i > 0 && !lines[i - 1].trim().isEmpty()) { - // 空行已经在上面添加了,这里跳过 - } - } - - return fixed.toString().trim(); - } - - /** - * 修改后的生成报告的单个部分方法 - */ - private static String generateReportSection(int sectionIndex, String regionNames) { - try { - String prompt = buildSectionPrompt(sectionIndex, regionNames); - - StringBuilder sectionContent = new StringBuilder(); - CountDownLatch latch = new CountDownLatch(1); - - CallDeepSeek.callDeepSeekStream(prompt, new CallDeepSeek.SSEListener() { - @Override - public void onData(String data) { - sectionContent.append(data); - System.out.print("."); - } - - @Override - public void onComplete(String fullResponse) { - System.out.println(" 完成!"); - latch.countDown(); - } - - @Override - public void onError(String error) { - System.err.println("\n生成第" + (sectionIndex + 1) + "部分失败: " + error); - latch.countDown(); - } - }); - - latch.await(); - - // 格式验证和修正 - String rawContent = sectionContent.toString(); - String fixedContent = validateAndFixFormat(rawContent); - - // 保存原始和修正后的内容用于调试 - saveDebugContent(rawContent, fixedContent, sectionIndex); - - return fixedContent; - - } catch (Exception e) { - System.err.println("生成第" + (sectionIndex + 1) + "部分时出错: " + e.getMessage()); - return ""; - } - } - - /** - * 保存调试内容 - */ - private static void saveDebugContent(String rawContent, String fixedContent, int sectionIndex) { - try { - String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); - - // 保存原始内容 - String rawFileName = "debug_section_" + (sectionIndex + 1) + "_raw_" + timestamp + ".txt"; - Files.write(Paths.get("Doc/" + rawFileName), rawContent.getBytes("UTF-8")); - - // 保存修正后内容 - String fixedFileName = "debug_section_" + (sectionIndex + 1) + "_fixed_" + timestamp + ".txt"; - Files.write(Paths.get("Doc/" + fixedFileName), fixedContent.getBytes("UTF-8")); - - System.out.println("第" + (sectionIndex + 1) + "部分调试文件已保存"); - - } catch (Exception e) { - System.err.println("保存调试文件失败: " + e.getMessage()); - } - } - - /** - * 获取部分标题 - */ - private static String getSectionTitle(int index) { - switch (index) { - case 0: - return "摘要和数据概览"; - case 1: - return "各地区详细分析"; - case 2: - return "对比分析"; - case 3: - return "结论与建议"; - default: - return "未知部分"; - } - } - - /** - * 生成地区分析部分的信息 - */ - private static String generateRegionAnalysisSections(String[] regions) { - return "各地区分析(" + String.join("、", regions) + ")"; - } - - /** - * 保存完整报告 - */ - private static void saveCompleteReport(String fullContent, String reportTitle) { - try { - String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); - - // 保存为文本文件用于调试 - String txtFileName = "complete_report_" + timestamp + ".txt"; - String txtFilePath = "Log/" + txtFileName; - Files.write(Paths.get(txtFilePath), fullContent.getBytes("UTF-8")); - System.out.println("完整报告文本已保存: " + txtFilePath); - - // 生成Word文档 - String docxFileName = "analysis_report_" + timestamp + ".docx"; - WordGenerator.generateWordDocument(fullContent, docxFileName, regions); - System.out.println("Word报告已保存至: WebRoot/upload/" + docxFileName); - - System.out.println("\n=== 报告生成完成 ==="); - System.out.println("总字符数: " + fullContent.length()); - System.out.println("报告标题: " + reportTitle); - - } catch (Exception e) { - System.err.println("保存完整报告失败: " + e.getMessage()); - } - } - - /** - * 生成HTML报告 - */ - private static void generateHtmlReport() throws IOException { - if (!dataSubmitted) { - System.out.println("请先提交数据!"); - return; - } - - System.out.println("\n开始分段生成HTML报告..."); - - StringBuilder fullHtmlContent = new StringBuilder(); - - // 第一段:HTML基础结构 + 前3个图表 - generateHtmlPart1(fullHtmlContent); - - // 第二段:中间3个图表 - generateHtmlPart2(fullHtmlContent); - - // 第三段:最后3个图表 + 结束标签 - generateHtmlPart3(fullHtmlContent); - - // 保存完整的HTML文件 - saveCompleteHtmlFile(fullHtmlContent.toString()); - } - - private static void generateHtmlPart1(StringBuilder fullContent) { - String prompt1 = "生成HTML报告的第一部分,要求:\n" + - "1. 包含完整的HTML文档开头:、、等\n" + - "2. 在中引入ECharts:\n" + - "3. 包含CSS样式\n" + - "4. 包含报告标题和摘要部分\n" + - "5. 生成前3个图表的完整代码:\n" + - " - 人口变化趋势图(折线图,id: populationTrend)\n" + - " - 教育规模分布图(柱状图,id: educationScale)\n" + - " - 城乡人口对比图(饼图,id: urbanRuralComparison)\n" + - "6. 每个图表包含div容器和完整的JavaScript初始化代码\n" + - "7. 使用window.addEventListener('DOMContentLoaded', function() {包装JavaScript\n" + - "8. 【重要】不要添加和结束标签,这是第一部分\n" + - "9. 基于之前提交的数据生成真实图表"; - - try { - StringBuilder part1Content = new StringBuilder(); - CountDownLatch latch = new CountDownLatch(1); - - CallDeepSeek.callDeepSeekStream(prompt1, new CallDeepSeek.SSEListener() { - @Override - public void onData(String data) { - part1Content.append(data); - System.out.print("."); // 显示进度 - } - - @Override - public void onComplete(String fullResponse) { - fullContent.append(fullResponse); - System.out.println("\n第一部分生成完成"); - latch.countDown(); - } - - @Override - public void onError(String error) { - System.err.println("生成第一部分时出错: " + error); - latch.countDown(); - } - }); - - latch.await(); - } catch (Exception e) { - System.err.println("生成HTML第一部分失败: " + e.getMessage()); - e.printStackTrace(); - } - } - - private static void generateHtmlPart2(StringBuilder fullContent) { - String prompt2 = "生成HTML报告的第二部分,要求:\n" + - "1. 【重要】不要包含HTML文档头部,直接从图表内容开始\n" + - "2. 生成中间3个图表的完整代码:\n" + - " - 学龄人口预测图(面积图,id: schoolAgePrediction)\n" + - " - 教育资源配置图(雷达图,id: educationResources)\n" + - " - 区域教育发展对比图(条形图,id: regionalComparison)\n" + - "3. 每个图表包含div容器和完整的JavaScript初始化代码\n" + - "4. JavaScript代码要与第一部分的DOMContentLoaded事件兼容\n" + - "5. 【重要】不要添加和结束标签\n" + - "6. 基于之前提交的数据生成真实图表"; - - try { - StringBuilder part2Content = new StringBuilder(); - CountDownLatch latch = new CountDownLatch(1); - - CallDeepSeek.callDeepSeekStream(prompt2, new CallDeepSeek.SSEListener() { - @Override - public void onData(String data) { - part2Content.append(data); - System.out.print("."); // 显示进度 - } - - @Override - public void onComplete(String fullResponse) { - fullContent.append(fullResponse); - System.out.println("\n第二部分生成完成"); - latch.countDown(); - } - - @Override - public void onError(String error) { - System.err.println("生成第二部分时出错: " + error); - latch.countDown(); - } - }); - - latch.await(); - } catch (Exception e) { - System.err.println("生成HTML第二部分失败: " + e.getMessage()); - e.printStackTrace(); - } - } - - private static void generateHtmlPart3(StringBuilder fullContent) { - String prompt3 = "生成HTML报告的第三部分,要求:\n" + - "1. 【重要】不要包含HTML文档头部,直接从图表内容开始\n" + - "2. 生成最后3个图表的完整代码:\n" + - " - 教育投入产出分析图(散点图,id: inputOutputAnalysis)\n" + - " - 师资力量分布图(热力图,id: teacherDistribution)\n" + - " - 综合发展指数图(仪表盘图,id: comprehensiveIndex)\n" + - "3. 每个图表包含div容器和完整的JavaScript初始化代码\n" + - "4. 添加结论和建议部分\n" + - "5. 添加窗口大小变化处理代码\n" + - "6. 【重要】必须以完整的结束\n" + - "7. 基于之前提交的数据生成真实图表"; - - try { - StringBuilder part3Content = new StringBuilder(); - CountDownLatch latch = new CountDownLatch(1); - - CallDeepSeek.callDeepSeekStream(prompt3, new CallDeepSeek.SSEListener() { - @Override - public void onData(String data) { - part3Content.append(data); - System.out.print("."); // 显示进度 - } - - @Override - public void onComplete(String fullResponse) { - fullContent.append(fullResponse); - System.out.println("\n第三部分生成完成"); - latch.countDown(); - } - - @Override - public void onError(String error) { - System.err.println("生成第三部分时出错: " + error); - latch.countDown(); - } - }); - - latch.await(); - } catch (Exception e) { - System.err.println("生成HTML第三部分失败: " + e.getMessage()); - e.printStackTrace(); - } - } - - /** - * 生成PPT报告 - */ - private static void generatePptReport() { - if (!dataSubmitted) { - System.out.println("请先提交数据!"); - return; - } - - System.out.println("\n开始生成PPT报告..."); - - try { - String prompt = "基于之前提交的所有数据,请生成一份PPT演示文稿的详细内容大纲。\n" + - "要求:\n" + - "1. 提供完整的PPT结构和每页内容\n" + - "2. 包含标题页、目录、数据分析、图表说明、结论建议等\n" + - "3. 每页PPT都要有明确的标题和要点\n" + - "4. 适合制作成专业的演示文稿\n" + - "5. 内容要简洁明了,重点突出"; - - StringBuilder pptContent = new StringBuilder(); - CountDownLatch latch = new CountDownLatch(1); - - CallDeepSeek.callDeepSeekStream(prompt, new CallDeepSeek.SSEListener() { - @Override - public void onData(String data) { - pptContent.append(data); - System.out.print("."); // 显示进度 - } - - @Override - public void onComplete(String fullResponse) { - System.out.println("\nPPT内容生成完成!"); - - // 调用PptGenerator生成PPT - try { - String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); - String fileName = "analysis_report_" + timestamp + ".pptx"; - - // 使用PptGenerator生成PPT - String token = PptAIKit.createApiToken("dsideal", 1000); - String pptInfo = PptGenerator.generatePptFromMarkdown(pptContent.toString(), token); - - // 保存PPT信息到文本文件 - String infoFilePath = "WebRoot/upload/ppt_info_" + timestamp + ".txt"; - if (pptInfo != null) { - Files.write(Paths.get(infoFilePath), pptInfo.getBytes("UTF-8")); - System.out.println("PPT生成信息已保存至: " + infoFilePath); - } else { - Files.write(Paths.get(infoFilePath), "PPT生成失败".getBytes("UTF-8")); - System.out.println("PPT生成失败,错误信息已保存至: " + infoFilePath); - } - - } catch (Exception e) { - System.err.println("生成PPT失败: " + e.getMessage()); - } - - latch.countDown(); - } - - @Override - public void onError(String error) { - System.err.println("\n生成PPT报告失败: " + error); - latch.countDown(); - } - }); - - latch.await(); - - } catch (Exception e) { - System.err.println("生成PPT报告时出错: " + e.getMessage()); - } - } - - /** - * 分割过大的单表数据 - */ - private static List splitLargeTable(Set fieldNames, - List allTableData, int maxSize) { - List chunks = new ArrayList<>(); - StringBuilder currentTableChunk = new StringBuilder(); - - for (Record dataRecord : allTableData) { - Map columns = dataRecord.getColumns(); - StringBuilder rowData = new StringBuilder(); - rowData.append("["); - - boolean first = true; - for (String fieldName : fieldNames) { - if (!first) rowData.append(","); - Object value = columns.get(fieldName); - if (value instanceof String) { - rowData.append("\"").append(value).append("\""); - } else { - rowData.append(value); - } - first = false; - } - rowData.append("]\n"); - - // 检查是否超过限制 - if (currentTableChunk.length() + rowData.length() > maxSize) { - if (!currentTableChunk.isEmpty()) { - chunks.add(currentTableChunk.toString()); - currentTableChunk = new StringBuilder(); - } - } - currentTableChunk.append(rowData); - } - - if (!currentTableChunk.isEmpty()) { - chunks.add(currentTableChunk.toString()); - } - - return chunks; - } - - /** - * 提取数据分块逻辑为独立方法 - */ - private static String[] getDataChunks(String[] regions, List tableList) { - List dataChunks = new ArrayList<>(); - StringBuilder currentChunk = new StringBuilder(); - - String header = "数据说明: 以下是云南省教育数据的压缩格式\n" + - "格式: 表名 -> 字段列表 -> 数据行(数组格式)\n" + - "地区范围: " + String.join(",", regions) + "\n\n"; - currentChunk.append(header); - - // 遍历所有相关数据表 - for (Record record : tableList) { - String tableName = record.getStr("TABLE_NAME"); - - // 为当前表收集所有数据 - List allTableData = new ArrayList<>(); - Set fieldNames = new LinkedHashSet<>(); - - // 为每个地区收集数据 - for (String region : regions) { - String sql = "select * from `" + tableName + "` where `行政区划`=?"; - List listContent = Db.use(DataEaseModel.DB_NAME).find(sql, region); - - if (!listContent.isEmpty()) { - allTableData.addAll(listContent); - // 收集字段名(使用第一条记录的字段结构) - if (fieldNames.isEmpty()) { - fieldNames.addAll(listContent.get(0).getColumns().keySet()); - } - } - } - - if (!allTableData.isEmpty()) { - // 构建当前表的完整数据块 - StringBuilder tableData = new StringBuilder(); - tableData.append("\n表: ").append(tableName).append("\n"); - tableData.append("字段: ").append(String.join(",", fieldNames)).append("\n"); - - // 输出压缩格式的数据 - for (Record dataRecord : allTableData) { - Map columns = dataRecord.getColumns(); - tableData.append("["); - - boolean first = true; - for (String fieldName : fieldNames) { - if (!first) tableData.append(","); - Object value = columns.get(fieldName); - if (value instanceof String) { - tableData.append("\"").append(value).append("\""); - } else { - tableData.append(value); - } - first = false; - } - - tableData.append("]\n"); - } - - // 检查是否需要分块 - String tableDataStr = tableData.toString(); - if (currentChunk.length() + tableDataStr.length() > MAX_CHUNK_SIZE) { - // 当前块已满,保存并开始新块 - if (currentChunk.length() > header.length()) { - dataChunks.add(currentChunk.toString()); - currentChunk = new StringBuilder(); - currentChunk.append(header); - } - - // 如果单个表数据超过限制,需要进一步分割 - if (tableDataStr.length() > MAX_CHUNK_SIZE - header.length()) { - List tableChunks = splitLargeTable(fieldNames, allTableData, MAX_CHUNK_SIZE - header.length()); - for (int i = 0; i < tableChunks.size(); i++) { - StringBuilder chunkBuilder = new StringBuilder(); - chunkBuilder.append(header); - chunkBuilder.append("\n[续] 表: ").append(tableName).append(" (第").append(i + 1).append("部分)\n"); - chunkBuilder.append("字段: ").append(String.join(",", fieldNames)).append("\n"); - chunkBuilder.append(tableChunks.get(i)); - dataChunks.add(chunkBuilder.toString()); - } - } else { - currentChunk.append(tableDataStr); - } - } else { - currentChunk.append(tableDataStr); - } - } - } - - // 添加最后一个块 - if (currentChunk.length() > header.length()) { - dataChunks.add(currentChunk.toString()); - } - - return dataChunks.toArray(new String[0]); - } - /** - * 保存完整的HTML文件 - * @param htmlContent HTML内容 - */ - private static void saveCompleteHtmlFile(String htmlContent) { - try { - // 生成文件名(带时间戳) - SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd_HHmmss"); - String timestamp = sdf.format(new Date()); - String fileName = "analysis_report_" + timestamp + ".html"; - String outputPath = "WebRoot/upload/" + fileName; - - // 清理HTML内容 - htmlContent = cleanHtmlContent(htmlContent); - - // 保存HTML文件 - try (FileWriter writer = new FileWriter(outputPath, StandardCharsets.UTF_8)) { - writer.write(htmlContent); - } - System.out.println("HTML文件保存成功: " + outputPath); - } catch (IOException e) { - System.err.println("保存HTML文件时出错: " + e.getMessage()); - e.printStackTrace(); - throw new RuntimeException("保存HTML文件失败", e); - } - } - - /** - * 清理HTML内容 - */ - private static String cleanHtmlContent(String htmlContent) { - // 移除可能的markdown代码块标记 - htmlContent = htmlContent.replaceAll("```html\\s*", ""); - htmlContent = htmlContent.replaceAll("```\\s*$", ""); - htmlContent = htmlContent.replaceAll("^```\\s*", ""); - - // 移除DeepSeek可能添加的说明文本 - htmlContent = htmlContent.replaceAll("Here's the complete HTML report.*?:", ""); - htmlContent = htmlContent.replaceAll("Here's.*?HTML.*?:", ""); - htmlContent = htmlContent.replaceAll("以下是.*?HTML.*?:", ""); - - // 查找真正的HTML文档开始位置 - int doctypeIndex = htmlContent.toLowerCase().indexOf(""); - int htmlIndex = htmlContent.toLowerCase().indexOf("开始截取 - htmlContent = htmlContent.substring(doctypeIndex); - } else if (htmlIndex != -1) { - // 从\n"; - } - - // 移除可能存在的重复DOCTYPE和html标签 - htmlContent = htmlContent.replaceAll("(?i)\\s*", ""); - htmlContent = htmlContent.replaceAll("(?i)]*>\\s*]*>", ""); - - return htmlContent.trim(); - } -} - - -