|
|
|
@ -4,6 +4,8 @@ pip install faiss-cpu
|
|
|
|
|
import re
|
|
|
|
|
from Util.SplitDocxUtil import SplitDocxUtil
|
|
|
|
|
|
|
|
|
|
# 源文件
|
|
|
|
|
input_file = '../Txt/小学数学(史校长).docx'
|
|
|
|
|
|
|
|
|
|
def split_into_blocks(text):
|
|
|
|
|
"""使用正则表达式匹配问题和话题的标题及内容"""
|
|
|
|
@ -12,12 +14,10 @@ def split_into_blocks(text):
|
|
|
|
|
return [(i + 1, title + content) for i, (title, content) in enumerate(blocks)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 源文件
|
|
|
|
|
input_file = '../Txt/小学数学(史校长).docx'
|
|
|
|
|
|
|
|
|
|
"""处理文档主函数"""
|
|
|
|
|
text = SplitDocxUtil.read_docx(input_file)
|
|
|
|
|
|
|
|
|
|
# 切开块
|
|
|
|
|
blocks = split_into_blocks(text)
|
|
|
|
|
|
|
|
|
|
# 将块编码并添加到向量数据库
|
|
|
|
|