'commit'

6 days ago · daed5693f3
parent 1ac102f688
commit daed5693f3
17 changed files with 2660 additions and 21 deletions
--- a/dsLightRag/.idea/misc.xml
+++ b/dsLightRag/.idea/misc.xml
@ -3,5 +3,5 @@
  <component name="Black">
    <option name="sdkName" value="D:\anaconda3\envs\lightrag" />
  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="D:\anaconda3\envs\py310" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (5)" project-jdk-type="Python SDK" />
 </project>
--- a/dsLightRag/File_Start.py
+++ b/dsLightRag/File_Start.py
--- a/dsLightRag/Backup/PG_Start.py
+++ b/dsLightRag/Backup/PG_Start.py
@ -34,7 +34,7 @@ async def lifespan(app: FastAPI):
 app = FastAPI(lifespan=lifespan)
 # 挂载静态文件目录
-app.mount("../static", StaticFiles(directory="Static"), name="static")
+app.mount("/static", StaticFiles(directory="Static"), name="static")
 # 访问根的跳转
@ -285,10 +285,10 @@ async def render_html(request: fastapi.Request):
    html_content = html_content.replace("```", "")
    # 创建临时文件
    filename = f"relation_{uuid.uuid4().hex}.html"
-    filepath = os.path.join('../static/temp', filename)
+    filepath = os.path.join('static/temp', filename)
    # 确保temp目录存在
-    os.makedirs('../static/temp', exist_ok=True)
+    os.makedirs('static/temp', exist_ok=True)
    # 写入文件
    with open(filepath, 'w', encoding='utf-8') as f:
--- a/dsLightRag/SpliteDocx.py
+++ b/dsLightRag/SpliteDocx.py
@ -0,0 +1,64 @@
 from docx import Document
 import os
 # 读取文档并按一级标题中的"少年读史记"拆分文档
 def split_docx_by_heading(input_path):
    # 打开文档
    doc = Document(input_path)
    sections = []
    current_section = []
    start_flag = False
    split_keyword = "少年读史记"
    # 遍历所有段落，按一级标题拆分
    for para in doc.paragraphs:
        # 检查是否为一级标题且包含关键字
        if para.style.name == 'Heading 1' and split_keyword in para.text:
            if start_flag:
                # 保存当前章节并开始新章节
                sections.append(current_section)
                current_section = [para]
            else:
                # 找到第一个起始点
                start_flag = True
                current_section = [para]
        elif start_flag:
            # 添加内容到当前章节
            current_section.append(para)
    # 添加最后一个章节
    if start_flag and current_section:
        sections.append(current_section)
    # 保存拆分后的文档
    output_dir = os.path.dirname(input_path)
    for i, section in enumerate(sections, 1):
        new_doc = Document()
        for para in section:
            # 复制段落内容和样式
            new_para = new_doc.add_paragraph(para.text)
            new_para.style = para.style
            # 复制段落中的_run格式
            for run in para.runs:
                new_run = new_para.add_run(run.text)
                new_run.bold = run.bold
                new_run.italic = run.italic
                new_run.underline = run.underline
                new_run.font.size = run.font.size
                new_run.font.name = run.font.name
        # 生成输出文件名
        output_filename = f"ShiJi_{i}.docx"
        output_path = os.path.join(output_dir, output_filename)
        new_doc.save(output_path)
        print(f"已保存拆分文档: {output_path}")
    return len(sections)
 # 主执行逻辑
 if __name__ == "__main__":
    file = r'D:\dsWork\dsProject\dsLightRag\static\Txt\ShiJi.docx'
    if not os.path.exists(file):
        print(f"错误: 文件不存在 - {file}")
    else:
        section_count = split_docx_by_heading(file)
        print(f"文档拆分完成，共生成 {section_count} 个章节文件")
--- a/dsLightRag/T1_Train.py
+++ b/dsLightRag/T1_Train.py
@ -10,6 +10,7 @@ KEMU = 'ShiJi'  # JiHe,Math,SuShi,Chemistry,ShiJi,ChangChun
 WORKING_DIR = "./Topic/" + KEMU
 docx_file = 'static/Txt/'
 async def main():
    # 注释掉或删除以下清理代码
    files_to_delete = [
@ -21,12 +22,6 @@ async def main():
        "vdb_entities.json",
        "vdb_relationships.json",
    ]
    # 在docx_file 目录下遍历所有以KEMU开头的文件
    for filename in os.listdir(docx_file):
        if filename.startswith(KEMU):
            file_path = os.path.join(docx_file, filename)
            # 获取docx文件的内容
            content = get_docx_content_by_pandoc(file_path)
    # 删除文件
    for file in files_to_delete:
@ -38,8 +33,15 @@ async def main():
    try:
        # 注意：默认设置使用NetworkX
        rag = await initialize_rag(WORKING_DIR)
-        await rag.ainsert(content)
+
-        print("\nIndexing completed successfully!")
+        # 在docx_file 目录下遍历所有以KEMU开头的文件
        for filename in os.listdir(docx_file):
            if filename.startswith(KEMU):
                file_path = os.path.join(docx_file, filename)
                # 获取docx文件的内容
                content = get_docx_content_by_pandoc(file_path)
                await rag.ainsert(content, file_paths=[filename])
                print(f"Inserted content from {filename}")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
--- a/dsLightRag/Topic/Math/kv_store_llm_response_cache.json
+++ b/dsLightRag/Topic/Math/kv_store_llm_response_cache.json
@ -411,6 +411,96 @@
      "embedding_min": null,
      "embedding_max": null,
      "original_prompt": "小学数学中有哪些常见模型？"
    },
    "fe8f9d5f1f3819dd5ebc978ba772b236": {
      "return": "{\"high_level_keywords\": [\"\\u5b66\\u6821\\u4ecb\\u7ecd\", \"\\u6559\\u80b2\\u673a\\u6784\", \"\\u57fa\\u7840\\u6559\\u80b2\"], \"low_level_keywords\": [\"\\u901a\\u8fbe\\u5c0f\\u5b66\", \"\\u5b66\\u6821\\u5386\\u53f2\", \"\\u6559\\u5b66\\u7279\\u8272\", \"\\u5e08\\u8d44\\u529b\\u91cf\", \"\\u6821\\u56ed\\u8bbe\\u65bd\"]}",
      "cache_type": "keywords",
      "chunk_id": null,
      "embedding": null,
      "embedding_shape": null,
      "embedding_min": null,
      "embedding_max": null,
      "original_prompt": "通达小学介绍"
    },
    "1d11d89fe9a102a889364fbd16c8afb2": {
      "return": "{\"high_level_keywords\": [\"\\u5b66\\u6821\\u8868\\u73b0\", \"730\\u5206\\u5360\\u6bd4\", \"\\u6559\\u80b2\\u8d28\\u91cf\"], \"low_level_keywords\": [\"\\u4f18\\u79c0\\u5b66\\u6821\", \"\\u9ad8\\u5206\\u5360\\u6bd4\", \"\\u5f55\\u53d6\\u5206\\u6570\\u7ebf\"]}",
      "cache_type": "keywords",
      "chunk_id": null,
      "embedding": null,
      "embedding_shape": null,
      "embedding_min": null,
      "embedding_max": null,
      "original_prompt": "在730分占比中，哪些学校表现优秀？"
    },
    "21207bf8247331ee0f7d943bf72cdff5": {
      "return": "{\"high_level_keywords\": [\"\\u9ad8\\u8003\", \"\\u5f55\\u53d6\\u5206\\u6570\\u7ebf\", \"\\u5e08\\u8303\\u5927\\u5b66\"], \"low_level_keywords\": [\"2024\\u5e74\", \"\\u5e08\\u5927\\u81ea\\u7531\\u6821\\u533a\", \"\\u5206\\u6570\\u8981\\u6c42\"]}",
      "cache_type": "keywords",
      "chunk_id": null,
      "embedding": null,
      "embedding_shape": null,
      "embedding_min": null,
      "embedding_max": null,
      "original_prompt": "2024年考师大自由校区需要多少分？"
    },
    "e4fa0b1977d1998840b34a739fe1f21a": {
      "return": "{\"high_level_keywords\": [\"\\u9ad8\\u8003\\u5206\\u6570\\u7ebf\", \"\\u5f55\\u53d6\\u6807\\u51c6\", \"\\u6559\\u80b2\\u653f\\u7b56\"], \"low_level_keywords\": [\"2025\\u5e74\", \"\\u5404\\u6279\\u6b21\", \"\\u6700\\u4f4e\\u5206\\u6570\\u7ebf\"]}",
      "cache_type": "keywords",
      "chunk_id": null,
      "embedding": null,
      "embedding_shape": null,
      "embedding_min": null,
      "embedding_max": null,
      "original_prompt": "2025年各批次最低分数线是多少？"
    },
    "312828f7ddccf7eb1f0514f449555003": {
      "return": "{\"high_level_keywords\": [\"\\u5e08\\u8303\\u5927\\u5b66\", \"\\u6821\\u533a\\u4ecb\\u7ecd\", \"\\u6559\\u80b2\\u73af\\u5883\"], \"low_level_keywords\": [\"\\u81ea\\u7531\\u6821\\u533a\", \"\\u5730\\u7406\\u4f4d\\u7f6e\", \"\\u6821\\u56ed\\u8bbe\\u65bd\", \"\\u5b66\\u9662\\u8bbe\\u7f6e\", \"\\u5386\\u53f2\\u80cc\\u666f\"]}",
      "cache_type": "keywords",
      "chunk_id": null,
      "embedding": null,
      "embedding_shape": null,
      "embedding_min": null,
      "embedding_max": null,
      "original_prompt": "介绍一下师大自由校区？"
    },
    "700cf7455202607abf835a157b8c8bb8": {
      "return": "{\"high_level_keywords\": [\"\\u4e2d\\u8003\\u6210\\u7ee9\", \"\\u6559\\u80b2\\u8d28\\u91cf\", \"\\u5b66\\u6821\\u8868\\u73b0\"], \"low_level_keywords\": [\"\\u529b\\u65fa\\u5b9e\\u9a8c\\u4e2d\\u5b66\", \"2023\\u5e74\", \"\\u5347\\u5b66\\u7387\", \"\\u5e73\\u5747\\u5206\", \"\\u6392\\u540d\"]}",
      "cache_type": "keywords",
      "chunk_id": null,
      "embedding": null,
      "embedding_shape": null,
      "embedding_min": null,
      "embedding_max": null,
      "original_prompt": "力旺实验中学今年的中考成绩怎么样？"
    },
    "29669f536e2df4aa01e70b9fcb45af82": {
      "return": "{\"high_level_keywords\": [\"\\u5de5\\u7a0b\\u6a21\\u578b\", \"\\u5de5\\u7a0b\\u5b66\", \"\\u6a21\\u578b\\u7406\\u8bba\"], \"low_level_keywords\": [\"\\u7ed3\\u6784\\u6a21\\u578b\", \"\\u673a\\u68b0\\u6a21\\u578b\", \"\\u7535\\u5b50\\u6a21\\u578b\", \"\\u5efa\\u7b51\\u6a21\\u578b\", \"\\u4eff\\u771f\\u6a21\\u578b\"]}",
      "cache_type": "keywords",
      "chunk_id": null,
      "embedding": null,
      "embedding_shape": null,
      "embedding_min": null,
      "embedding_max": null,
      "original_prompt": "工程模型是什么"
    },
    "631cc8bcf403b3379af06ac0af3e57e5": {
      "return": "{\"high_level_keywords\": [\"\\u5de5\\u7a0b\\u6a21\\u578b\", \"\\u5de5\\u7a0b\\u5b66\", \"\\u6a21\\u578b\\u6784\\u5efa\"], \"low_level_keywords\": [\"\\u5b9e\\u4f8b\", \"\\u7ed3\\u6784\\u6a21\\u578b\", \"\\u6d41\\u4f53\\u529b\\u5b66\\u6a21\\u578b\", \"\\u673a\\u68b0\\u7cfb\\u7edf\\u6a21\\u578b\", \"\\u5efa\\u7b51\\u6a21\\u578b\"]}",
      "cache_type": "keywords",
      "chunk_id": null,
      "embedding": null,
      "embedding_shape": null,
      "embedding_min": null,
      "embedding_max": null,
      "original_prompt": "工程模型是什么，请举例说明"
    },
    "58c5e62add9b9b5ed797cff23284715b": {
      "return": "{\"high_level_keywords\": [\"\\u5de5\\u7a0b\\u6a21\\u578b\", \"\\u65bd\\u5de5\\u95ee\\u9898\", \"\\u8ba1\\u7b97\\u65b9\\u6cd5\"], \"low_level_keywords\": [\"\\u7ed3\\u6784\\u5206\\u6790\", \"\\u6750\\u6599\\u529b\\u5b66\", \"\\u6709\\u9650\\u5143\\u6cd5\", \"\\u8377\\u8f7d\\u8ba1\\u7b97\", \"\\u65bd\\u5de5\\u6a21\\u62df\"]}",
      "cache_type": "keywords",
      "chunk_id": null,
      "embedding": null,
      "embedding_shape": null,
      "embedding_min": null,
      "embedding_max": null,
      "original_prompt": "工程模型是什么，请举例说明，比如施工问题给出具体的计算方法"
    }
  }
 }
--- a/dsLightRag/Topic/ShiJi/kv_store_doc_status.json
+++ b/dsLightRag/Topic/ShiJi/kv_store_doc_status.json
--- a/dsLightRag/Topic/ShiJi/kv_store_llm_response_cache.json
+++ b/dsLightRag/Topic/ShiJi/kv_store_llm_response_cache.json
--- a/dsLightRag/Util/pycache/DocxUtil.cpython-310.pyc
+++ b/dsLightRag/Util/pycache/DocxUtil.cpython-310.pyc
--- a/dsLightRag/static/Txt/ShiJi.docx
+++ b/dsLightRag/static/Txt/ShiJi.docx
--- a/dsLightRag/static/Txt/ShiJi_1.docx
+++ b/dsLightRag/static/Txt/ShiJi_1.docx
--- a/dsLightRag/static/Txt/ShiJi_2.docx
+++ b/dsLightRag/static/Txt/ShiJi_2.docx
--- a/dsLightRag/static/Txt/ShiJi_3.docx
+++ b/dsLightRag/static/Txt/ShiJi_3.docx
--- a/dsLightRag/static/Txt/ShiJi_4.docx
+++ b/dsLightRag/static/Txt/ShiJi_4.docx
--- a/dsLightRag/static/Txt/ShiJi_5.docx
+++ b/dsLightRag/static/Txt/ShiJi_5.docx
--- a/dsLightRag/static/ai.html
+++ b/dsLightRag/static/ai.html
@ -213,7 +213,7 @@
            <a href="https://blog.csdn.net/xixiaoyaoww/article/details/141364224" class="btn" target="_blank">进入 <i class="fas fa-arrow-right"></i></a>
        </div>
        <div class="card">
-            <i class="fas fa-project-diagram"></i>
+            <i class="fas fa-newspaper"></i>
            <h3>知识图谱</h3>
            <p>小学数学知识图谱(开发中)</p>
            <a href="tree.html" class="btn" target="_blank">进入 <i class="fas fa-arrow-right"></i></a>
--- a/dsLightRag/static/markdown/ShiJi_1.md
+++ b/dsLightRag/static/markdown/ShiJi_1.md