From f7e40dea81b3c07c10ff336bc8555ce60b33f209 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Sat, 8 Mar 2025 14:10:31 +0800 Subject: [PATCH] 'commit' --- AI/Text2Sql/Sql/CreateTable.sql | 63 ++++--------------- AI/Text2Sql/YunXiao_Deepseek.py | 10 +-- AI/Text2Sql/YunXiao_Vanna.py | 10 +-- .../java/Tools/Crawler/Util/BookLesson.java | 17 ++++- 4 files changed, 36 insertions(+), 64 deletions(-) diff --git a/AI/Text2Sql/Sql/CreateTable.sql b/AI/Text2Sql/Sql/CreateTable.sql index cfe0c90f..c3a52d1a 100644 --- a/AI/Text2Sql/Sql/CreateTable.sql +++ b/AI/Text2Sql/Sql/CreateTable.sql @@ -30,26 +30,11 @@ COMMENT ON COLUMN "public"."t_crawler_scheme"."stage_id" IS '学段ID'; COMMENT ON COLUMN "public"."t_crawler_scheme"."id" IS '主键'; COMMENT ON TABLE "public"."t_crawler_scheme" IS '教材版本,目前一般一个学科一个版本'; -CREATE TABLE "public"."t_crawler_stage" ( - "stage_id" varchar(255) COLLATE "pg_catalog"."default" NOT NULL, - "stage_name" varchar(255) COLLATE "pg_catalog"."default" -) -; -COMMENT ON COLUMN "public"."t_crawler_stage"."stage_id" IS '学段ID'; -COMMENT ON COLUMN "public"."t_crawler_stage"."stage_name" IS '学段名称'; -COMMENT ON TABLE "public"."t_crawler_stage" IS '学段表'; - -CREATE TABLE "public"."t_crawler_subject" ( - "subject_id" varchar(255) COLLATE "pg_catalog"."default" NOT NULL, - "subject_name" varchar(255) COLLATE "pg_catalog"."default", - "stage_id" varchar(255) COLLATE "pg_catalog"."default" -) -; -COMMENT ON COLUMN "public"."t_crawler_subject"."subject_id" IS '科目ID'; -COMMENT ON COLUMN "public"."t_crawler_subject"."subject_name" IS '科目名称'; -COMMENT ON COLUMN "public"."t_crawler_subject"."stage_id" IS '学段ID'; -COMMENT ON TABLE "public"."t_crawler_subject" IS '学科表'; +-- ---------------------------- +-- Table structure for t_crawler_lesson +-- ---------------------------- +DROP TABLE IF EXISTS "public"."t_crawler_lesson"; CREATE TABLE "public"."t_crawler_lesson" ( "lesson_id" varchar(255) COLLATE "pg_catalog"."default", "lesson_name" varchar(255) COLLATE "pg_catalog"."default", @@ -68,7 +53,10 @@ CREATE TABLE "public"."t_crawler_lesson" ( "learning_person_times" int4 DEFAULT 0, "grade_code" varchar(255) COLLATE "pg_catalog"."default", "publish_time" date, - "node_type" int2 DEFAULT 1 + "node_type" int2 DEFAULT 1, + "gather_regionc" varchar(255) COLLATE "pg_catalog"."default", + "school_running_type" varchar(255) COLLATE "pg_catalog"."default", + "stage_name" varchar(255) COLLATE "pg_catalog"."default" ) ; COMMENT ON COLUMN "public"."t_crawler_lesson"."lesson_id" IS '课程ID'; @@ -77,7 +65,7 @@ COMMENT ON COLUMN "public"."t_crawler_lesson"."node_id" IS '隶属哪个章节 COMMENT ON COLUMN "public"."t_crawler_lesson"."teacher_id" IS '教师ID'; COMMENT ON COLUMN "public"."t_crawler_lesson"."teacher_name" IS '教师姓名'; COMMENT ON COLUMN "public"."t_crawler_lesson"."teacher_school_id" IS '教师学校ID,这个无用,处理关联学校时,使用t_crawler_lesson_school表'; -COMMENT ON COLUMN "public"."t_crawler_lesson"."teacher_school_name" IS '教师学校名称,这个无用,处理关联学校时,使用t_crawler_lesson_school中数据'; +COMMENT ON COLUMN "public"."t_crawler_lesson"."teacher_school_name" IS '教师学校名称'; COMMENT ON COLUMN "public"."t_crawler_lesson"."id" IS '主键'; COMMENT ON COLUMN "public"."t_crawler_lesson"."book_id" IS '册ID'; COMMENT ON COLUMN "public"."t_crawler_lesson"."scheme_id" IS '版本ID'; @@ -87,31 +75,11 @@ COMMENT ON COLUMN "public"."t_crawler_lesson"."learning_person_times" IS '学习 COMMENT ON COLUMN "public"."t_crawler_lesson"."grade_code" IS '学段代码'; COMMENT ON COLUMN "public"."t_crawler_lesson"."publish_time" IS '发布时间'; COMMENT ON COLUMN "public"."t_crawler_lesson"."node_type" IS '节点类型 1:章节目录 2:知识点'; +COMMENT ON COLUMN "public"."t_crawler_lesson"."gather_regionc" IS '行政区域名称'; +COMMENT ON COLUMN "public"."t_crawler_lesson"."school_running_type" IS '学校类型'; +COMMENT ON COLUMN "public"."t_crawler_lesson"."stage_name" IS '学段名称'; COMMENT ON TABLE "public"."t_crawler_lesson" IS '课程,resource资源表'; -CREATE TABLE "public"."t_crawler_lesson_school" ( - "lesson_id" varchar(255) COLLATE "pg_catalog"."default" NOT NULL, - "original_school_name" varchar(255) COLLATE "pg_catalog"."default", - "organization_name" varchar(255) COLLATE "pg_catalog"."default", - "match_type" int4, - "organization_no" varchar(255) COLLATE "pg_catalog"."default", - "gather_regionc" varchar(255) COLLATE "pg_catalog"."default", - "teacher_name" varchar(255) COLLATE "pg_catalog"."default", - "update_ts" timestamp(6) DEFAULT now(), - "school_running_type" varchar(255) COLLATE "pg_catalog"."default" -) -; -COMMENT ON COLUMN "public"."t_crawler_lesson_school"."lesson_id" IS '资源ID,通过资源ID与t_crawler_lesson中lesson_id关联,所有资源的学校、教师信息以本表为准'; -COMMENT ON COLUMN "public"."t_crawler_lesson_school"."original_school_name" IS '原学校名称'; -COMMENT ON COLUMN "public"."t_crawler_lesson_school"."organization_name" IS '正式学校名称'; -COMMENT ON COLUMN "public"."t_crawler_lesson_school"."match_type" IS '0:未处理 1:名称完全一至 2:手工对应名称'; -COMMENT ON COLUMN "public"."t_crawler_lesson_school"."organization_no" IS '正式学校代码'; -COMMENT ON COLUMN "public"."t_crawler_lesson_school"."gather_regionc" IS '隶属行政区域名称'; -COMMENT ON COLUMN "public"."t_crawler_lesson_school"."teacher_name" IS '教师名称'; -COMMENT ON COLUMN "public"."t_crawler_lesson_school"."update_ts" IS '修改时间'; -COMMENT ON COLUMN "public"."t_crawler_lesson_school"."school_running_type" IS '学校类型'; -COMMENT ON TABLE "public"."t_crawler_lesson_school" IS '资源所属学校的原名和正式名对应表'; - CREATE TABLE "public"."t_crawler_structure" ( "node_id" varchar(255) COLLATE "pg_catalog"."default" NOT NULL, @@ -163,11 +131,6 @@ COMMENT ON COLUMN "public"."t_crawler_structure_knowledge"."subject_id" IS '科 COMMENT ON COLUMN "public"."t_crawler_structure_knowledge"."subject_name" IS '科目名称'; 特别注意: -1、如果用户需要检索学段,比如小学,初中,需要先到 - select stage_id from t_crawler_stage where stage_name='小学' - 用得到stage_id再与其它表进行关联查询,不要直接使用学段名称进行查询。 -2、如果用户需要检索科目,比如语文,数学,需要先到 +1、如果用户需要检索科目,比如语文,数学,需要先到 select subject_id from t_crawler_subject where subject_name='语文' 用得到subject_id再与其它表进行关联查询,不要直接使用科目名称进行查询。 -3、凡是涉及到行政区划,也就是gather_regionc字段的,不能直接从t_crawler_lesson表中读取,它没有这个列, -需要通过lesson_id 关联到t_crawler_lesson_school表中,再从t_crawler_lesson_school表中读取。 \ No newline at end of file diff --git a/AI/Text2Sql/YunXiao_Deepseek.py b/AI/Text2Sql/YunXiao_Deepseek.py index 57225e89..3aed7871 100644 --- a/AI/Text2Sql/YunXiao_Deepseek.py +++ b/AI/Text2Sql/YunXiao_Deepseek.py @@ -47,19 +47,19 @@ if __name__ == '__main__': ddl = file.read() # 自然语言描述 - prompt = "查询 2024 年每个学段下,上传课程数量排名前 10 的学校,并按行政区名称和上传课程数量排序。" + prompt = "查询 2024 年每个学段下,上传课程数量排名前 10 的学校,显示排名,并按上传课程数量排序。" common_prompt=''' 要求: - 1、对于学校名称和行政区划名称为空的不要进行统计。 - 2、有行政区划列返回时,先按行政区划排序 - 3、有课程数量时,再按课程数量由高到低排序 + 1、只返回可以运行的SQL,不要描述信息和```sql 还有``` + 2、对于学校名称和行政区划名称等于NULL 或者为空的不要进行统计 + 3、有行政区划列返回时,先按行政区划排序 + 4、有课程数量时,再按课程数量由高到低排序 ''' prompt = prompt + common_prompt # 生成 SQL try: sql = generate_sql_from_prompt(ddl, prompt) - print("生成的 SQL 查询:") print(sql) except Exception as e: print(f"生成 SQL 时出错:{e}") \ No newline at end of file diff --git a/AI/Text2Sql/YunXiao_Vanna.py b/AI/Text2Sql/YunXiao_Vanna.py index ba7b81a4..c18229f4 100644 --- a/AI/Text2Sql/YunXiao_Vanna.py +++ b/AI/Text2Sql/YunXiao_Vanna.py @@ -110,20 +110,14 @@ if __name__ == "__main__": # 自然语言提问 # ''' - question1 = ''' - 查询发布时间是2024年度,每个行政区划每个学校都上传了多少课程数量, - 返回:行政区名称,学校名称,上传课程数量等属性. - ''' question = ''' - 查询发布时间是2024年度,按学段分组,比如小学、初中、高中,每个学段中上传课程数量前10名的都是些学校, - 注意:排名是指分组内部排名,不是整体排名 + 查询发布时间是2024年度,每个行政区划每个学校都上传了多少课程数量, 返回: 学段,排名,行政区名称,学校名称,上传课程数量等属性. ''' common_prompt = ''' 要求: 1、行政区划为NULL 或者是空字符的不参加统计工作, - 2、有行政区划列返回时,先按行政区划排序 - 3、有课程数量时,再按课程数量由高到低排序''' + ''' question = question + common_prompt # 开始查询 print("开始查询...") diff --git a/src/main/java/Tools/Crawler/Util/BookLesson.java b/src/main/java/Tools/Crawler/Util/BookLesson.java index 8e22e035..5a0d848d 100644 --- a/src/main/java/Tools/Crawler/Util/BookLesson.java +++ b/src/main/java/Tools/Crawler/Util/BookLesson.java @@ -582,7 +582,7 @@ public class BookLesson { toFixRecord.set("match_type", 2); toFixRecord.set("teacher_name", teacherName); writeList.add(toFixRecord); - }else{ + } else { toFixRecord.set("teacher_name", teacherName); toFixRecord.set("match_type", 0); writeList.add(toFixRecord); @@ -606,6 +606,21 @@ public class BookLesson { } } Db.batchUpdate("t_crawler_lesson", "lesson_id", writeList, batchSize); + + + sql = "UPDATE t_crawler_lesson AS tcl\n" + + "SET \n" + + " teacher_name = tcls.teacher_name,\n" + + " teacher_school_name = tcls.organization_name,\n" + + " gather_regionc = tcls.gather_regionc,\n" + + " school_running_type = tcls.school_running_type,\n" + + " stage_name = tcs.stage_name,\n" + + " subject_name = tsub.subject_name\n" + + "FROM t_crawler_lesson_school AS tcls, t_crawler_stage AS tcs, t_crawler_subject AS tsub\n" + + "WHERE tcl.lesson_id = tcls.lesson_id\n" + + " AND tcl.stage_id = tcs.stage_id\n" + + " AND tcl.subject_id = tsub.subject_id;"; + Db.update(sql); print("打补丁完成!"); } }