main
HuangHai 6 days ago
parent 1ac102f688
commit daed5693f3

@ -3,5 +3,5 @@
<component name="Black">
<option name="sdkName" value="D:\anaconda3\envs\lightrag" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="D:\anaconda3\envs\py310" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (5)" project-jdk-type="Python SDK" />
</project>

@ -34,7 +34,7 @@ async def lifespan(app: FastAPI):
app = FastAPI(lifespan=lifespan)
# 挂载静态文件目录
app.mount("../static", StaticFiles(directory="Static"), name="static")
app.mount("/static", StaticFiles(directory="Static"), name="static")
# 访问根的跳转
@ -285,10 +285,10 @@ async def render_html(request: fastapi.Request):
html_content = html_content.replace("```", "")
# 创建临时文件
filename = f"relation_{uuid.uuid4().hex}.html"
filepath = os.path.join('../static/temp', filename)
filepath = os.path.join('static/temp', filename)
# 确保temp目录存在
os.makedirs('../static/temp', exist_ok=True)
os.makedirs('static/temp', exist_ok=True)
# 写入文件
with open(filepath, 'w', encoding='utf-8') as f:

@ -0,0 +1,64 @@
from docx import Document
import os
# 读取文档并按一级标题中的"少年读史记"拆分文档
def split_docx_by_heading(input_path):
# 打开文档
doc = Document(input_path)
sections = []
current_section = []
start_flag = False
split_keyword = "少年读史记"
# 遍历所有段落,按一级标题拆分
for para in doc.paragraphs:
# 检查是否为一级标题且包含关键字
if para.style.name == 'Heading 1' and split_keyword in para.text:
if start_flag:
# 保存当前章节并开始新章节
sections.append(current_section)
current_section = [para]
else:
# 找到第一个起始点
start_flag = True
current_section = [para]
elif start_flag:
# 添加内容到当前章节
current_section.append(para)
# 添加最后一个章节
if start_flag and current_section:
sections.append(current_section)
# 保存拆分后的文档
output_dir = os.path.dirname(input_path)
for i, section in enumerate(sections, 1):
new_doc = Document()
for para in section:
# 复制段落内容和样式
new_para = new_doc.add_paragraph(para.text)
new_para.style = para.style
# 复制段落中的_run格式
for run in para.runs:
new_run = new_para.add_run(run.text)
new_run.bold = run.bold
new_run.italic = run.italic
new_run.underline = run.underline
new_run.font.size = run.font.size
new_run.font.name = run.font.name
# 生成输出文件名
output_filename = f"ShiJi_{i}.docx"
output_path = os.path.join(output_dir, output_filename)
new_doc.save(output_path)
print(f"已保存拆分文档: {output_path}")
return len(sections)
# 主执行逻辑
if __name__ == "__main__":
file = r'D:\dsWork\dsProject\dsLightRag\static\Txt\ShiJi.docx'
if not os.path.exists(file):
print(f"错误: 文件不存在 - {file}")
else:
section_count = split_docx_by_heading(file)
print(f"文档拆分完成,共生成 {section_count} 个章节文件")

@ -10,6 +10,7 @@ KEMU = 'ShiJi' # JiHe,Math,SuShi,Chemistry,ShiJi,ChangChun
WORKING_DIR = "./Topic/" + KEMU
docx_file = 'static/Txt/'
async def main():
# 注释掉或删除以下清理代码
files_to_delete = [
@ -21,12 +22,6 @@ async def main():
"vdb_entities.json",
"vdb_relationships.json",
]
# 在docx_file 目录下遍历所有以KEMU开头的文件
for filename in os.listdir(docx_file):
if filename.startswith(KEMU):
file_path = os.path.join(docx_file, filename)
# 获取docx文件的内容
content = get_docx_content_by_pandoc(file_path)
# 删除文件
for file in files_to_delete:
@ -38,8 +33,15 @@ async def main():
try:
# 注意默认设置使用NetworkX
rag = await initialize_rag(WORKING_DIR)
await rag.ainsert(content)
print("\nIndexing completed successfully!")
# 在docx_file 目录下遍历所有以KEMU开头的文件
for filename in os.listdir(docx_file):
if filename.startswith(KEMU):
file_path = os.path.join(docx_file, filename)
# 获取docx文件的内容
content = get_docx_content_by_pandoc(file_path)
await rag.ainsert(content, file_paths=[filename])
print(f"Inserted content from {filename}")
except Exception as e:
print(f"An error occurred: {e}")
finally:

@ -411,6 +411,96 @@
"embedding_min": null,
"embedding_max": null,
"original_prompt": "小学数学中有哪些常见模型?"
},
"fe8f9d5f1f3819dd5ebc978ba772b236": {
"return": "{\"high_level_keywords\": [\"\\u5b66\\u6821\\u4ecb\\u7ecd\", \"\\u6559\\u80b2\\u673a\\u6784\", \"\\u57fa\\u7840\\u6559\\u80b2\"], \"low_level_keywords\": [\"\\u901a\\u8fbe\\u5c0f\\u5b66\", \"\\u5b66\\u6821\\u5386\\u53f2\", \"\\u6559\\u5b66\\u7279\\u8272\", \"\\u5e08\\u8d44\\u529b\\u91cf\", \"\\u6821\\u56ed\\u8bbe\\u65bd\"]}",
"cache_type": "keywords",
"chunk_id": null,
"embedding": null,
"embedding_shape": null,
"embedding_min": null,
"embedding_max": null,
"original_prompt": "通达小学介绍"
},
"1d11d89fe9a102a889364fbd16c8afb2": {
"return": "{\"high_level_keywords\": [\"\\u5b66\\u6821\\u8868\\u73b0\", \"730\\u5206\\u5360\\u6bd4\", \"\\u6559\\u80b2\\u8d28\\u91cf\"], \"low_level_keywords\": [\"\\u4f18\\u79c0\\u5b66\\u6821\", \"\\u9ad8\\u5206\\u5360\\u6bd4\", \"\\u5f55\\u53d6\\u5206\\u6570\\u7ebf\"]}",
"cache_type": "keywords",
"chunk_id": null,
"embedding": null,
"embedding_shape": null,
"embedding_min": null,
"embedding_max": null,
"original_prompt": "在730分占比中哪些学校表现优秀"
},
"21207bf8247331ee0f7d943bf72cdff5": {
"return": "{\"high_level_keywords\": [\"\\u9ad8\\u8003\", \"\\u5f55\\u53d6\\u5206\\u6570\\u7ebf\", \"\\u5e08\\u8303\\u5927\\u5b66\"], \"low_level_keywords\": [\"2024\\u5e74\", \"\\u5e08\\u5927\\u81ea\\u7531\\u6821\\u533a\", \"\\u5206\\u6570\\u8981\\u6c42\"]}",
"cache_type": "keywords",
"chunk_id": null,
"embedding": null,
"embedding_shape": null,
"embedding_min": null,
"embedding_max": null,
"original_prompt": "2024年考师大自由校区需要多少分"
},
"e4fa0b1977d1998840b34a739fe1f21a": {
"return": "{\"high_level_keywords\": [\"\\u9ad8\\u8003\\u5206\\u6570\\u7ebf\", \"\\u5f55\\u53d6\\u6807\\u51c6\", \"\\u6559\\u80b2\\u653f\\u7b56\"], \"low_level_keywords\": [\"2025\\u5e74\", \"\\u5404\\u6279\\u6b21\", \"\\u6700\\u4f4e\\u5206\\u6570\\u7ebf\"]}",
"cache_type": "keywords",
"chunk_id": null,
"embedding": null,
"embedding_shape": null,
"embedding_min": null,
"embedding_max": null,
"original_prompt": "2025年各批次最低分数线是多少"
},
"312828f7ddccf7eb1f0514f449555003": {
"return": "{\"high_level_keywords\": [\"\\u5e08\\u8303\\u5927\\u5b66\", \"\\u6821\\u533a\\u4ecb\\u7ecd\", \"\\u6559\\u80b2\\u73af\\u5883\"], \"low_level_keywords\": [\"\\u81ea\\u7531\\u6821\\u533a\", \"\\u5730\\u7406\\u4f4d\\u7f6e\", \"\\u6821\\u56ed\\u8bbe\\u65bd\", \"\\u5b66\\u9662\\u8bbe\\u7f6e\", \"\\u5386\\u53f2\\u80cc\\u666f\"]}",
"cache_type": "keywords",
"chunk_id": null,
"embedding": null,
"embedding_shape": null,
"embedding_min": null,
"embedding_max": null,
"original_prompt": "介绍一下师大自由校区?"
},
"700cf7455202607abf835a157b8c8bb8": {
"return": "{\"high_level_keywords\": [\"\\u4e2d\\u8003\\u6210\\u7ee9\", \"\\u6559\\u80b2\\u8d28\\u91cf\", \"\\u5b66\\u6821\\u8868\\u73b0\"], \"low_level_keywords\": [\"\\u529b\\u65fa\\u5b9e\\u9a8c\\u4e2d\\u5b66\", \"2023\\u5e74\", \"\\u5347\\u5b66\\u7387\", \"\\u5e73\\u5747\\u5206\", \"\\u6392\\u540d\"]}",
"cache_type": "keywords",
"chunk_id": null,
"embedding": null,
"embedding_shape": null,
"embedding_min": null,
"embedding_max": null,
"original_prompt": "力旺实验中学今年的中考成绩怎么样?"
},
"29669f536e2df4aa01e70b9fcb45af82": {
"return": "{\"high_level_keywords\": [\"\\u5de5\\u7a0b\\u6a21\\u578b\", \"\\u5de5\\u7a0b\\u5b66\", \"\\u6a21\\u578b\\u7406\\u8bba\"], \"low_level_keywords\": [\"\\u7ed3\\u6784\\u6a21\\u578b\", \"\\u673a\\u68b0\\u6a21\\u578b\", \"\\u7535\\u5b50\\u6a21\\u578b\", \"\\u5efa\\u7b51\\u6a21\\u578b\", \"\\u4eff\\u771f\\u6a21\\u578b\"]}",
"cache_type": "keywords",
"chunk_id": null,
"embedding": null,
"embedding_shape": null,
"embedding_min": null,
"embedding_max": null,
"original_prompt": "工程模型是什么"
},
"631cc8bcf403b3379af06ac0af3e57e5": {
"return": "{\"high_level_keywords\": [\"\\u5de5\\u7a0b\\u6a21\\u578b\", \"\\u5de5\\u7a0b\\u5b66\", \"\\u6a21\\u578b\\u6784\\u5efa\"], \"low_level_keywords\": [\"\\u5b9e\\u4f8b\", \"\\u7ed3\\u6784\\u6a21\\u578b\", \"\\u6d41\\u4f53\\u529b\\u5b66\\u6a21\\u578b\", \"\\u673a\\u68b0\\u7cfb\\u7edf\\u6a21\\u578b\", \"\\u5efa\\u7b51\\u6a21\\u578b\"]}",
"cache_type": "keywords",
"chunk_id": null,
"embedding": null,
"embedding_shape": null,
"embedding_min": null,
"embedding_max": null,
"original_prompt": "工程模型是什么,请举例说明"
},
"58c5e62add9b9b5ed797cff23284715b": {
"return": "{\"high_level_keywords\": [\"\\u5de5\\u7a0b\\u6a21\\u578b\", \"\\u65bd\\u5de5\\u95ee\\u9898\", \"\\u8ba1\\u7b97\\u65b9\\u6cd5\"], \"low_level_keywords\": [\"\\u7ed3\\u6784\\u5206\\u6790\", \"\\u6750\\u6599\\u529b\\u5b66\", \"\\u6709\\u9650\\u5143\\u6cd5\", \"\\u8377\\u8f7d\\u8ba1\\u7b97\", \"\\u65bd\\u5de5\\u6a21\\u62df\"]}",
"cache_type": "keywords",
"chunk_id": null,
"embedding": null,
"embedding_shape": null,
"embedding_min": null,
"embedding_max": null,
"original_prompt": "工程模型是什么,请举例说明,比如施工问题给出具体的计算方法"
}
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

@ -213,7 +213,7 @@
<a href="https://blog.csdn.net/xixiaoyaoww/article/details/141364224" class="btn" target="_blank">进入 <i class="fas fa-arrow-right"></i></a>
</div>
<div class="card">
<i class="fas fa-project-diagram"></i>
<i class="fas fa-newspaper"></i>
<h3>知识图谱</h3>
<p>小学数学知识图谱(开发中)</p>
<a href="tree.html" class="btn" target="_blank">进入 <i class="fas fa-arrow-right"></i></a>

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save