Files
dsProject/dsLightRag/Volcengine/T2_CreateIndex.py
2025-09-07 08:58:11 +08:00

100 lines
4.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import logging
import time
from Config.Config import VOLC_ACCESSKEY, VOLC_SECRETKEY
from VikingDBMemoryService import VikingDBMemoryService, MEMORY_COLLECTION_NAME, VikingDBMemoryException
# 控制日志输出
logger = logging.getLogger('CollectionMemory')
logger.setLevel(logging.INFO)
# 只添加一次处理器,避免重复日志
if not logger.handlers:
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logger.addHandler(handler)
def create_memory_collection(collection_name, description="情感陪伴记忆库"):
# 初始化记忆库服务
memory_service = VikingDBMemoryService(
ak=VOLC_ACCESSKEY,
sk=VOLC_SECRETKEY,
host="api-knowledgebase.mlp.cn-beijing.volces.com",
region="cn-beijing"
)
try:
# 检查集合是否已存在
logger.info(f"正在检查集合 '{collection_name}'...")
memory_service.get_collection(collection_name)
logger.info(f"集合 '{collection_name}' 已存在,无需重复创建")
return False
except Exception as e:
if "collection not exist" not in str(e):
logger.info(f"检查集合时发生错误: {str(e)}")
raise
# 创建新集合
logger.info(f"开始创建集合 '{collection_name}'...")
try:
response = memory_service.create_collection(
collection_name=collection_name,
description=description,
builtin_event_types=["sys_event_v1", "sys_profile_collect_v1"],
builtin_entity_types=["sys_profile_v1"]
)
logger.info(f"创建响应: {json.dumps(response, ensure_ascii=False, indent=2)}")
logger.info(f"集合 '{collection_name}' 创建成功")
# 等待集合就绪 - 修改为模拟chat.py的索引就绪检查机制
logger.info("等待集合初始化完成...")
max_retries = 30 # 最多重试30次
retry_interval = 10 # 每10秒重试一次
retry_count = 0
# 增加初始延迟,避免创建后立即检查
logger.info(f"初始延迟30秒等待索引构建...")
time.sleep(30)
while retry_count < max_retries:
try:
# 尝试执行需要索引的操作模拟chat.py中的搜索逻辑
filter_params = {
"memory_type": ["sys_event_v1"],
"user_id": "test_user" # 添加测试用户ID满足API要求
}
memory_service.search_memory(
collection_name=collection_name,
query="test",
filter=filter_params,
limit=1
)
# 如果没有抛出索引错误,则认为就绪
logger.info(f"集合 '{collection_name}' 索引构建完成,已就绪")
return True
except VikingDBMemoryException as e:
error_msg = str(e)
# 检查是否是索引未就绪相关错误
if "index not exist" in error_msg or "need to add messages" in error_msg:
retry_count += 1
logger.info(f"索引尚未就绪,等待中... (重试 {retry_count}/{max_retries})")
time.sleep(retry_interval)
else:
logger.error(f"检查索引就绪状态时发生错误: {str(e)}")
return False
except Exception as e:
logger.error(f"检查过程发生意外错误: {str(e)}")
return False
logger.error(f"集合 '{collection_name}' 索引构建超时")
return False
except Exception as e:
logger.error(f"创建集合失败: {str(e)}")
raise
if __name__ == "__main__":
create_memory_collection(MEMORY_COLLECTION_NAME)