diff --git a/AI/Ocr/Test.py b/AI/Ocr/Test.py new file mode 100644 index 00000000..a4e03bdd --- /dev/null +++ b/AI/Ocr/Test.py @@ -0,0 +1,54 @@ +import easyocr +import os +import re +import pandas as pd + + +class id_card_ocr(): + + def __init__(self): # 文件位置 + self.images = r'D:/id_card' # 需要注意的是,图片文件的名称不能有汉字,否则会报错~ + + def ocr_reader(self): # 创建ocr对象,识别中英文 + ocr = easyocr.Reader(['ch_sim', 'en'], gpu=True) + return ocr + + def read_content(self): # 识别图片文字,并遍历 + data = [] + for image in os.listdir(self.images): + content = self.ocr_reader().readtext(f'{self.images}/{image}', detail=0) + content = ''.join(content) # 列表转换为纯文本 + new_content = content.replace(" ", "") # 去除掉空格内容 + print(f'正在识别:{image}') + name = re.findall(r'名(.*?)性', new_content) + gender = re.findall(r'别(.*?)民族|民', new_content) + nation = re.findall(r'族|民族(.*?)出', new_content) + address = re.findall(r'址(.*?)公', new_content) + number = re.findall(r'身份号码(\d+)', new_content) + + new_name = ''.join(name) + new_gender = ''.join(gender) + new_nation = ''.join(nation) + new_address = ''.join(address) + new_number = ''.join(number) + if len(new_number) == 18: # 判断身份证的位数 + pass + elif len(new_number) == 17: + new_number = new_number + "X" + print(f'完成识别:{image}') + data.append([new_name, new_gender, new_nation, new_address, new_number]) + print(data) + return data + + def read_to_excel(self): + df = pd.DataFrame(self.read_content(), columns=['姓名', '性别', '民族', '地址', '身份证号码']) + print(f'识别结果如下:') + print(df) + df.to_excel(r'D:/id_card/识别结果.xlsx', index=False) + return df + + +if __name__ == '__main__': + info = id_card_ocr() + info.read_content() + info.read_to_excel() \ No newline at end of file diff --git a/AI/Ocr/__init__.py b/AI/Ocr/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/AI/Text2Sql/YunXiao.py b/AI/Text2Sql/YunXiao.py index 662cfc0a..0eed0729 100644 --- a/AI/Text2Sql/YunXiao.py +++ b/AI/Text2Sql/YunXiao.py @@ -128,6 +128,10 @@ if __name__ == "__main__": print(chunk_content, end="", flush=True) # 实时打印到控制台 summary += chunk_content # 将内容拼接到 summary 中 + # 保存markdown + with open("d:/report.md", "w", encoding="utf-8") as file: + file.write(summary) + # 最终 summary 为完整的 Markdown 内容 print("\n\n流式输出完成,summary 已拼接为完整字符串。") # 生成 Word 文档 diff --git a/src/main/java/Tools/Zysy/ExportFenShuXian.java b/src/main/java/Tools/Zysy/ExportFenShuXian.java new file mode 100644 index 00000000..26e005ff --- /dev/null +++ b/src/main/java/Tools/Zysy/ExportFenShuXian.java @@ -0,0 +1,115 @@ +package Tools.Zysy; + +import com.jfinal.kit.PropKit; +import com.jfinal.plugin.activerecord.ActiveRecordPlugin; +import com.jfinal.plugin.activerecord.Db; +import com.jfinal.plugin.activerecord.Record; +import com.jfinal.plugin.activerecord.dialect.MysqlDialect; +import com.jfinal.plugin.druid.DruidPlugin; + +import java.io.FileWriter; +import java.io.IOException; +import java.util.List; + +public class ExportFenShuXian { + public static void Init() { + //加载配置文件 + String configFile = "197.properties"; + PropKit.use(configFile); + + // 数据库配置 + DruidPlugin druidPlugin = new DruidPlugin(PropKit.get("jdbcUrl"), PropKit.get("user"), + PropKit.get("password").trim(), PropKit.get("driverClassName")); + druidPlugin.start(); + + ActiveRecordPlugin arp = new ActiveRecordPlugin(druidPlugin); + arp.setDialect(new MysqlDialect()); + arp.start(); + } + + /** + * 根据省份名称获取省份ID + * + * @param provinceName + * @return + */ + public static int getProvinceId(String provinceName) { + //云南省=100025 + String sql = "select * from t_gov_province where PROVINCENAME=?"; + Record record = Db.findFirst(sql, provinceName); + return record.getInt("ID"); + } + + /** + * 导出 SQL 数据 + * + * @param provinceId 省份 ID + * @param outputFile 输出文件路径 + */ + public static void exportSql(int provinceId, String outputFile) { + // 查询大学分数数据 + String sql = "select * from t_zygh_university_score where year=2023 and source_province_id=?"; + List listUniversityScore = Db.find(sql, provinceId); + + // 查询专业分数数据 + sql = "select * from t_zygh_major_score where year=2023 and source_province_id=?"; + List listMajorScore = Db.find(sql, provinceId); + + // 将数据写入 SQL 文件 + try (FileWriter writer = new FileWriter(outputFile)) { + // 导出大学分数数据 + for (Record record : listUniversityScore) { + String insertSql = generateInsertSql("t_zygh_university_score", record); + writer.write(insertSql + ";\n"); + } + // 导出专业分数数据 + for (Record record : listMajorScore) { + String insertSql = generateInsertSql("t_zygh_major_score", record); + writer.write(insertSql + ";\n"); + } + System.out.println("数据已成功导出到文件:" + outputFile); + } catch (IOException e) { + System.err.println("导出数据时发生错误:" + e.getMessage()); + } + } + + /** + * 生成 INSERT SQL 语句 + * + * @param tableName 表名 + * @param record 数据记录 + * @return INSERT SQL 语句 + */ + private static String generateInsertSql(String tableName, Record record) { + StringBuilder sql = new StringBuilder("INSERT INTO " + tableName + " ("); + StringBuilder values = new StringBuilder("VALUES ("); + + // 遍历记录中的字段 + for (String column : record.getColumnNames()) { + sql.append(column).append(", "); + Object value = record.get(column); + if (value instanceof String) { + values.append("'").append(value).append("', "); + } else { + values.append(value).append(", "); + } + } + + // 去除最后的逗号和空格 + sql.delete(sql.length() - 2, sql.length()); + values.delete(values.length() - 2, values.length()); + + // 拼接完整的 SQL 语句 + sql.append(") ").append(values).append(")"); + return sql.toString(); + } + + public static void main(String[] args) { + Init(); + //云南省的省份ID + int provinceId = getProvinceId("云南省"); + //System.out.println(provinceId); + exportSql(provinceId, "d:\\output.sql"); + System.out.println("恭喜,数据生成成功!"); + } +} diff --git a/src/main/java/Tools/Zysy/Sql/t_zygh_major_score.sql b/src/main/java/Tools/Zysy/Sql/t_zygh_major_score.sql new file mode 100644 index 00000000..34887e3c --- /dev/null +++ b/src/main/java/Tools/Zysy/Sql/t_zygh_major_score.sql @@ -0,0 +1,29 @@ +CREATE TABLE `t_zygh_major_score` ( + `id` int NOT NULL AUTO_INCREMENT, + `university_id` int NULL DEFAULT NULL COMMENT '大学ID', + `major_id` int NULL DEFAULT NULL COMMENT '专业ID', + `source_province_id` int NULL DEFAULT NULL COMMENT '生源地', + `type` int NULL DEFAULT NULL COMMENT '1文科 2理科 3艺术类 4体育类 5综合', + `year` int NULL DEFAULT NULL, + `batch` int NULL DEFAULT NULL COMMENT '1本科一批2本科二批3本科三批4专科一批5专科二批6本科提前批7专科提前批8高职专科批9本科批10专科批11一段12二段13三段14本科A15本科B', + `max_score` int NULL DEFAULT NULL, + `average_score` int NULL DEFAULT NULL, + `min_score` int NULL DEFAULT NULL, + `seating` int NULL DEFAULT NULL COMMENT '最低位次', + `recruit_count` int NULL DEFAULT NULL COMMENT '招生人数', + `recruit_students_direction` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '招生方向', + `other_major` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '专业备注', + `first_subject` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '首选科目', + `second_subjects` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '再选学科', + `tuition` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '学费', + `university_code` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '院校代码', + `major_code` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '专业代码', + `educational_system` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '学制', + `major_remark` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '专业备注(招生方向/院校名称备注)', + `major_describe` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '描述(备注)', + `level2_major_id` int NULL DEFAULT NULL COMMENT '二级专业ID(学科门类)', + `major_name` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '专业名称', + PRIMARY KEY (`id`) USING BTREE, + INDEX `source_province_id`(`source_province_id` ASC, `year` ASC) USING BTREE, + INDEX `university_major_id`(`university_id` ASC, `major_id` ASC) USING BTREE + ) ENGINE = InnoDB AUTO_INCREMENT = 3626054 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = COMPACT; diff --git a/src/main/java/Tools/Zysy/Sql/t_zygh_university_score.sql b/src/main/java/Tools/Zysy/Sql/t_zygh_university_score.sql new file mode 100644 index 00000000..944914e6 --- /dev/null +++ b/src/main/java/Tools/Zysy/Sql/t_zygh_university_score.sql @@ -0,0 +1,49 @@ +/* + Navicat Premium Dump SQL + + Source Server : 10.10.14.197【旧版云平台】 + Source Server Type : MySQL + Source Server Version : 100522 (10.5.22-MariaDB-log) + Source Host : 10.10.14.197:22066 + Source Schema : dsideal_db + + Target Server Type : MySQL + Target Server Version : 100522 (10.5.22-MariaDB-log) + File Encoding : 65001 + + Date: 12/03/2025 13:52:02 +*/ + +SET NAMES utf8mb4; +SET FOREIGN_KEY_CHECKS = 0; + +-- ---------------------------- +-- Table structure for t_zygh_university_score +-- ---------------------------- +DROP TABLE IF EXISTS `t_zygh_university_score`; +CREATE TABLE `t_zygh_university_score` ( + `id` int NOT NULL AUTO_INCREMENT, + `university_id` int NULL DEFAULT NULL COMMENT '学校ID', + `type` int NULL DEFAULT NULL COMMENT '1文科 2理科 3艺术类 4体育类 5综合 6物理 7历史', + `source_province_id` int NULL DEFAULT NULL COMMENT '生源地省份', + `year` int NULL DEFAULT NULL, + `batch` int NULL DEFAULT NULL COMMENT '1本科一批2本科二批3本科三批4专科一批5专科二批6本科提前批7专科提前批8高职专科批9本科批10专科批11一段12二段13三段14本科A15本科B', + `batch_line` int NULL DEFAULT NULL COMMENT '批次线', + `max_score` int NULL DEFAULT NULL COMMENT '最高分', + `min_score` int NULL DEFAULT NULL COMMENT '最低分', + `average_score` int NULL DEFAULT NULL COMMENT '平均分', + `seating` int NULL DEFAULT NULL COMMENT '最低位次', + `recruit_count` int NULL DEFAULT NULL COMMENT '招生人数', + `is_subtype` int(11) UNSIGNED ZEROFILL NOT NULL COMMENT '是否是子类学校:0非子类,1子类', + `subtype` int(11) UNSIGNED ZEROFILL NOT NULL COMMENT '子类型code,对应t_tncee_dic_item表ZYGH_UNIVERSITY_SUBCODE', + `subtype_name` varchar(63) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '子类名称', + `xk_subject` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '限考学科', + `major_group_code` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '专业组代码', + `remark` varchar(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '备注', + `new_examination` int NULL DEFAULT NULL COMMENT '0-新高考,1-老高考', + PRIMARY KEY (`id`) USING BTREE, + INDEX `source_province_id`(`year` ASC, `source_province_id` ASC) USING BTREE, + INDEX `university_id`(`university_id` ASC) USING BTREE +) ENGINE = InnoDB AUTO_INCREMENT = 936221 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact; + +SET FOREIGN_KEY_CHECKS = 1; diff --git a/src/main/java/Tools/Zysy/文本.txt b/src/main/java/Tools/Zysy/文本.txt new file mode 100644 index 00000000..7821a141 --- /dev/null +++ b/src/main/java/Tools/Zysy/文本.txt @@ -0,0 +1,18 @@ +------------------------------------------------------------ +# 小萌给的办法 +院校分数线表t_zygh_university_score,专业分数线t_zygh_major_score,把需要的省份年份数据导出更新就行了,还得检查一下学科类别和批次数据,那两个表我记不清叫啥了 +看调用的那个接口能查到 +# 用这个地址进入 +http://10.10.14.199//dsideal_yy/html/zysy/grzx_check_score.html?area_id=300529 + +select * from t_zygh_university_score where year=2023 and source_province_id=100025; +select * from t_zygh_major_score where year=2023 and source_province_id=100025; +select * from t_gov_province where PROVINCENAME='云南省';-- 100025 + +-- select * from t_zygh_switch_batch + +------------------------------------------------------------ +# 验证 + + +------------------------------------------------------------ \ No newline at end of file diff --git a/src/main/resources/197.properties b/src/main/resources/197.properties new file mode 100644 index 00000000..729b7c34 --- /dev/null +++ b/src/main/resources/197.properties @@ -0,0 +1,6 @@ +# 数据库信息 +driverClassName=com.mysql.cj.jdbc.Driver +user=root +password=DsideaL147258369 +jdbcUrl=jdbc:mysql://10.10.14.197:22066/dsideal_db?rewriteBatchedStatements=true&useUnicode=true&zeroDateTimeBehavior=CONVERT_TO_NULL&useSSL=true&characterEncoding=UTF-8&serverTimezone=Asia/Shanghai&autoReconnect=true&failOverReadOnly=false +