Files
dsProject/dsLightRag/Volcengine/T2_CreateIndex.py

114 lines
3.9 KiB
Python
Raw Normal View History

2025-09-07 09:19:08 +08:00
import logging
2025-09-07 13:13:21 +08:00
import sys
import time
2025-09-07 13:00:43 +08:00
2025-09-07 13:13:21 +08:00
from volcenginesdkarkruntime import Ark
2025-09-07 13:15:38 +08:00
2025-09-07 13:13:21 +08:00
from Config.Config import VOLC_ACCESSKEY, VOLC_SECRETKEY, VOLC_API_KEY
from VikingDBMemoryService import VikingDBMemoryService, MEMORY_COLLECTION_NAME
2025-09-07 08:56:35 +08:00
2025-09-07 09:19:08 +08:00
# 控制日志输出
2025-09-07 08:04:43 +08:00
logger = logging.getLogger('CollectionMemory')
2025-09-07 09:19:08 +08:00
logger.setLevel(logging.INFO)
2025-09-07 08:00:15 +08:00
2025-09-07 09:19:08 +08:00
# 只添加一次处理器,避免重复日志
if not logger.handlers:
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logger.addHandler(handler)
2025-09-07 08:00:15 +08:00
2025-09-07 09:19:08 +08:00
def main():
2025-09-07 13:13:21 +08:00
logger.info("开始创建索引...")
# 初始化记忆库服务
memory_service = VikingDBMemoryService(
ak=VOLC_ACCESSKEY,
sk=VOLC_SECRETKEY,
host="api-knowledgebase.mlp.cn-beijing.volces.com",
region="cn-beijing"
)
# 初始化LLM客户端
llm_client = Ark(
base_url="https://ark.cn-beijing.volces.com/api/v3",
api_key=VOLC_API_KEY
)
2025-09-07 09:19:08 +08:00
try:
collection_name = MEMORY_COLLECTION_NAME
user_id = "system"
assistant_id = "assistant"
2025-09-07 13:13:21 +08:00
# 确保集合存在
logger.info("检查/创建集合...")
memory_service.ensure_collection_exists(collection_name)
# 添加测试数据以触发索引构建
logger.info("添加测试数据...")
test_messages = [
{"role": "user", "content": "你好,我是测试用户"},
{"role": "assistant", "content": "你好,我是测试助手"}
]
test_metadata = {
"default_user_id": user_id,
"default_assistant_id": assistant_id,
"time": int(time.time() * 1000)
}
session_id = f"test_session_{int(time.time())}"
memory_service.add_session(
collection_name=collection_name,
session_id=session_id,
messages=test_messages,
metadata=test_metadata
)
logger.info("测试数据添加成功,等待索引构建...")
max_retries = 30
2025-09-07 13:15:38 +08:00
retry_interval = 60 # 秒
2025-09-07 13:13:21 +08:00
for retry in range(max_retries):
try:
# 尝试搜索以验证索引是否就绪
filter_params = {
"user_id": user_id,
"memory_type": ["sys_event_v1", "sys_profile_v1"]
}
response = memory_service.search_memory(
collection_name=collection_name,
query="测试查询",
filter=filter_params,
limit=1
)
# 如果搜索成功,说明索引已就绪
logger.info(f"索引已就绪,找到 {response.get('data', {}).get('count', 0)} 条记录")
break
except Exception as e:
error_message = str(e)
if "1000023" in error_message: # 与chat.py.backup中的错误码一致
retry_attempt = retry + 1
logger.info(f"记忆索引正在构建中。将在{retry_interval}秒后重试... (尝试次数 {retry_attempt})")
time.sleep(retry_interval)
else:
logger.error(f"搜索时发生错误: {error_message}")
raise
else:
# 如果循环正常结束未break说明超时
logger.error(f"索引构建超时,已尝试 {max_retries}")
sys.exit(1)
logger.info("索引创建和测试完成!")
2025-09-07 08:00:15 +08:00
except Exception as e:
2025-09-07 13:13:21 +08:00
logger.error(f"操作失败: {e}")
sys.exit(1)
2025-09-07 09:19:08 +08:00
2025-09-07 08:00:15 +08:00
if __name__ == "__main__":
2025-09-07 09:13:16 +08:00
main()