""" pip install faiss-cpu """ import re from Util.SplitDocxUtil import SplitDocxUtil # 源文件 input_file = '../Txt/小学数学(史校长).docx' def split_into_blocks(text): """使用正则表达式匹配问题和话题的标题及内容""" pattern = r'(问题\d+|话题\d+)([\s\S]+?)(?=问题\d+|话题\d+|$)' blocks = re.findall(pattern, text, re.DOTALL) return [(i + 1, title + content) for i, (title, content) in enumerate(blocks)] """处理文档主函数""" text = SplitDocxUtil.read_docx(input_file) # 切开块 blocks = split_into_blocks(text) # 将块编码并添加到向量数据库 for block in blocks: if (len(block[1]) > 10): print(block[1]) print("\r\n") # 初始化 Milvus 连接池 milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS) # 初始化集合管理器 collection_name = MS_COLLECTION_NAME collection_manager = MilvusCollectionManager(collection_name)