From 25eb08b1db7d24801b1ae540384c80007acc68f3 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Sun, 7 Sep 2025 08:56:35 +0800 Subject: [PATCH] 'commit' --- dsLightRag/Volcengine/State.py | 53 +++ dsLightRag/Volcengine/T1_DropIndex.py | 9 +- dsLightRag/Volcengine/T2_CreateIndex.py | 59 ++- dsLightRag/Volcengine/T3_Chat.py | 107 +++++ .../Volcengine/VikingDBMemoryService.py | 15 +- .../VikingDBMemoryService.cpython-310.pyc | Bin 14840 -> 14960 bytes dsLightRag/Volcengine/chat.py | 403 ++++++++++++++++++ 7 files changed, 626 insertions(+), 20 deletions(-) create mode 100644 dsLightRag/Volcengine/State.py create mode 100644 dsLightRag/Volcengine/T3_Chat.py create mode 100644 dsLightRag/Volcengine/chat.py diff --git a/dsLightRag/Volcengine/State.py b/dsLightRag/Volcengine/State.py new file mode 100644 index 00000000..44a1aaf6 --- /dev/null +++ b/dsLightRag/Volcengine/State.py @@ -0,0 +1,53 @@ +import json +import requests +from volcengine.base.Request import Request +from volcengine.Credentials import Credentials +from volcengine.auth.SignerV4 import SignerV4 + +from Config.Config import VOLC_ACCESSKEY, VOLC_SECRETKEY +from Volcengine.VikingDBMemoryService import MEMORY_COLLECTION_NAME + +AK = VOLC_ACCESSKEY +SK = VOLC_SECRETKEY +Domain = "api-knowledgebase.mlp.cn-beijing.volces.com" + +def prepare_request(method, path, ak, sk, data=None): + r = Request() + r.set_shema("http") # 注意:这里用 http,因为 SignerV4 内部会拼 host + r.set_method(method) + r.set_host(Domain) + r.set_path(path) + + if data is not None: + r.set_body(json.dumps(data)) + + # 使用 air 服务和 cn-north-1 区域 + credentials = Credentials(ak, sk, 'air', 'cn-north-1') + SignerV4.sign(r, credentials) + return r + +def internal_request(method, api, payload, params=None): + req = prepare_request( + method=method, + path=api, + ak=AK, + sk=SK, + data=payload + ) + + r = requests.request( + method=req.method, + url="{}://{}{}".format(req.schema, req.host, req.path), + headers=req.headers, + data=req.body, + params=params, + ) + return r + +# 查询记忆库信息 +path = '/api/memory/collection/info' +payload = { + "CollectionName": MEMORY_COLLECTION_NAME +} +rsp = internal_request("POST", path, payload) +print(rsp.json()) \ No newline at end of file diff --git a/dsLightRag/Volcengine/T1_DropIndex.py b/dsLightRag/Volcengine/T1_DropIndex.py index 9db0cbfd..c61b9454 100644 --- a/dsLightRag/Volcengine/T1_DropIndex.py +++ b/dsLightRag/Volcengine/T1_DropIndex.py @@ -7,9 +7,12 @@ from Config.Config import VOLC_ACCESSKEY, VOLC_SECRETKEY # 控制日志输出 logger = logging.getLogger('CollectionMemory') logger.setLevel(logging.INFO) -handler = logging.StreamHandler() -handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) -logger.addHandler(handler) + +# 只添加一次处理器,避免重复日志 +if not logger.handlers: + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) + logger.addHandler(handler) def drop_existing_collection(collection_name): # 初始化记忆库服务 diff --git a/dsLightRag/Volcengine/T2_CreateIndex.py b/dsLightRag/Volcengine/T2_CreateIndex.py index 4148e9c4..13135c9a 100644 --- a/dsLightRag/Volcengine/T2_CreateIndex.py +++ b/dsLightRag/Volcengine/T2_CreateIndex.py @@ -1,14 +1,20 @@ import json import logging +import time from Config.Config import VOLC_ACCESSKEY, VOLC_SECRETKEY -from VikingDBMemoryService import VikingDBMemoryService, MEMORY_COLLECTION_NAME +from VikingDBMemoryService import VikingDBMemoryService, MEMORY_COLLECTION_NAME, VikingDBMemoryException + + # 控制日志输出 logger = logging.getLogger('CollectionMemory') logger.setLevel(logging.INFO) -handler = logging.StreamHandler() -handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) -logger.addHandler(handler) + +# 只添加一次处理器,避免重复日志 +if not logger.handlers: + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) + logger.addHandler(handler) def create_memory_collection(collection_name, description="情感陪伴记忆库"): @@ -44,15 +50,44 @@ def create_memory_collection(collection_name, description="情感陪伴记忆库 logger.info(f"创建响应: {json.dumps(response, ensure_ascii=False, indent=2)}") logger.info(f"集合 '{collection_name}' 创建成功") - # 等待集合就绪 + # 等待集合就绪 - 修改为模拟chat.py的索引就绪检查机制 logger.info("等待集合初始化完成...") - # 将独立函数调用改为实例方法调用 - if memory_service.wait_for_collection_ready(): - logger.info(f"集合 '{collection_name}' 已就绪,可以开始使用") - return True - else: - logger.info(f"集合 '{collection_name}' 初始化超时") - return False + max_retries = 30 # 最多重试30次 + retry_interval = 10 # 每10秒重试一次 + retry_count = 0 + + # 增加初始延迟,避免创建后立即检查 + logger.info(f"初始延迟30秒,等待索引构建...") + time.sleep(30) + + while retry_count < max_retries: + try: + # 尝试执行需要索引的操作(模拟chat.py中的搜索逻辑) + filter_params = {"memory_type": ["sys_event_v1"]} + memory_service.search_memory( + collection_name=collection_name, + query="test", + filter=filter_params, + limit=1 + ) + # 如果没有抛出索引错误,则认为就绪 + logger.info(f"集合 '{collection_name}' 索引构建完成,已就绪") + return True + except VikingDBMemoryException as e: + error_msg = str(e) + # 检查是否是索引未就绪相关错误 + if "index not exist" in error_msg or "need to add messages" in error_msg: + retry_count += 1 + logger.info(f"索引尚未就绪,等待中... (重试 {retry_count}/{max_retries})") + time.sleep(retry_interval) + else: + logger.error(f"检查索引就绪状态时发生错误: {str(e)}") + return False + except Exception as e: + logger.error(f"检查过程发生意外错误: {str(e)}") + return False + logger.error(f"集合 '{collection_name}' 索引构建超时") + return False except Exception as e: logger.error(f"创建集合失败: {str(e)}") diff --git a/dsLightRag/Volcengine/T3_Chat.py b/dsLightRag/Volcengine/T3_Chat.py new file mode 100644 index 00000000..b0bbee72 --- /dev/null +++ b/dsLightRag/Volcengine/T3_Chat.py @@ -0,0 +1,107 @@ +import json +import logging +import time +from Volcengine.VikingDBMemoryService import initialize_services, MEMORY_COLLECTION_NAME +from volcenginesdkarkruntime import Ark +from Config.Config import VOLC_API_KEY + +# 配置日志 +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('MemoryChatSimulator') + +def simulate_chat_session(memory_service, llm_client, user_id, session_topic): + """模拟完整对话会话:创建对话→存储记忆→搜索记忆""" + # 初始化对话历史 + conversation_history = [] + logger.info(f"\n=== 开始新对话会话: {session_topic} ===") + + # 模拟两轮对话 + user_messages = [ + f"你好,我是{user_id},{session_topic}", + "我最近感觉压力很大,不知道该怎么办" + ] + + for message in user_messages: + # 处理对话轮次 + assistant_reply = memory_service.handle_conversation_turn( + llm_client=llm_client, + user_id=user_id, + user_message=message, + conversation_history=conversation_history + ) + time.sleep(1) # 模拟思考延迟 + + # 归档对话记忆 + logger.info("\n=== 归档对话记忆 ===") + archive_success = memory_service.archive_conversation( + user_id=user_id, + assistant_id="simulated_assistant", + conversation_history=conversation_history, + topic_name=session_topic + ) + + # 搜索相关记忆 + if archive_success: + logger.info("\n=== 搜索相关记忆 ===") + # 等待索引更新 + logger.info("等待记忆库索引更新...") + time.sleep(5) # 实际环境可能需要更长时间 + + # 搜索与当前主题相关的记忆 + search_query = f"{session_topic}相关的问题" + relevant_memories = memory_service.search_relevant_memories( + collection_name=MEMORY_COLLECTION_NAME, + user_id=user_id, + query=search_query, + limit=3 + ) + + logger.info(f"\n搜索到 '{search_query}' 的相关记忆 ({len(relevant_memories)}条):") + for i, memory in enumerate(relevant_memories, 1): + logger.info(f"\n记忆 {i} (相关度: {memory['score']:.3f}):") + logger.info(f"内容: {memory['memory_info']}") + + return archive_success + +def main(): + """主函数:初始化服务并执行模拟测试""" + logger.info("===== 记忆库存储与搜索模拟测试 ====") + + try: + # 初始化服务 + logger.info("初始化记忆库服务和LLM客户端...") + memory_service, llm_client = initialize_services() + + # 确保集合就绪 + logger.info(f"检查集合 '{MEMORY_COLLECTION_NAME}' 是否就绪...") + + # 先验证集合是否存在 + try: + collection_info = memory_service.get_collection(MEMORY_COLLECTION_NAME) + logger.info(f"集合 '{MEMORY_COLLECTION_NAME}' 存在,详细信息: {json.dumps(collection_info, ensure_ascii=False)}...") + except Exception as e: + logger.error(f"集合 '{MEMORY_COLLECTION_NAME}' 不存在: {str(e)}") + logger.error("请先运行T2_CreateIndex.py创建集合") + return + + if not memory_service.wait_for_collection_ready(timeout=120): + logger.error("集合未就绪,无法继续测试") + return + + # 模拟两个不同用户的对话会话 + user_sessions = [ + ("student_001", "学习压力问题"), + ("teacher_001", "教学方法讨论") + ] + + for user_id, topic in user_sessions: + simulate_chat_session(memory_service, llm_client, user_id, topic) + time.sleep(2) # 会话间隔 + + logger.info("\n===== 模拟测试完成 ====") + + except Exception as e: + logger.error(f"测试过程中发生错误: {str(e)}", exc_info=True) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/dsLightRag/Volcengine/VikingDBMemoryService.py b/dsLightRag/Volcengine/VikingDBMemoryService.py index 9dd09ca1..b9b5f990 100644 --- a/dsLightRag/Volcengine/VikingDBMemoryService.py +++ b/dsLightRag/Volcengine/VikingDBMemoryService.py @@ -377,17 +377,22 @@ class VikingDBMemoryService(Service): start_time = time.time() while time.time() - start_time < timeout: try: - # 使用类中定义的集合名称常量 collection_info = self.get_collection(MEMORY_COLLECTION_NAME) - status = collection_info.get("Status", "UNKNOWN") + # 打印完整集合信息用于调试 + logger.info(f"集合详细信息: {json.dumps(collection_info, ensure_ascii=False)}") + + # 尝试多种可能的状态字段名 + status = collection_info.get("Status") or collection_info.get("status") or "UNKNOWN" logger.info(f"集合 '{MEMORY_COLLECTION_NAME}' 当前状态: {status}") - if status == "READY": + + # 检查是否为就绪状态(可能的值:READY, RUNNING, ACTIVE等) + if status in ["READY", "RUNNING", "ACTIVE"]: return True time.sleep(interval) except Exception as e: - logger.info(f"检查集合状态失败: {e}") + logger.error(f"检查集合状态失败: {str(e)}") time.sleep(interval) - logger.info(f"集合 '{MEMORY_COLLECTION_NAME}' 在{timeout}秒内未就绪") + logger.error(f"集合 '{MEMORY_COLLECTION_NAME}' 在{timeout}秒内未就绪") return False def setup_memory_collection(self): diff --git a/dsLightRag/Volcengine/__pycache__/VikingDBMemoryService.cpython-310.pyc b/dsLightRag/Volcengine/__pycache__/VikingDBMemoryService.cpython-310.pyc index 1fb44e15827b19b57cd1ffb8a2fc11ad19ae4620..bfa155546c44355d85d951febfd231474c085017 100644 GIT binary patch delta 3167 zcmai0TWl298J;sUJG)-5Z{WKD)6p)98$>d=jcpNEgNKa{Fggo=2(~hIkMM;aXln+ig}~LFgy%e~~l>mxW%f3`soP=YMv<9)PnrMOgfmu(hy`E|mIiXD*=QV46VmPpyI9_Qn> zYfPTOcWvOL!;gx`E6%Yx@lM6N%oTSw^^k)PH}&Qbh-=uE6N^uJUwhK6z;D~K?PMk$ zb25`w+6{~o>tx29;Sa)HH$WB^vGRTZN*oT#tX+K~V>_Bek;>@g%4IOu<;?C^GkfHuxmy6aYe=24nyUz?xye&y$5; zRrRV(G0++Txy14t)y%@~+C9vef#{q_*0r!U@l;(yr_@Ca-LfpVvT<(3xb=l;%Xava z;Oes6eo5NN@ib3}Kh`zn55Uh#U~aMP#GPq7mdH>&R(eCDZaqN_u+rd%1726OcsA)8 zv{HP^=2ziImWy#$o8~E3=a!w_82u?y`#JsD*o#3HD4eU`#EP%L_;Em7ywK28zF|Yu ziC;7fybx&%rMu;m-rshe7Ic>#j!d0+g^ge-ZG%*Db+Tv7Sg+ zStpqhL)&iChAV2cnrF#$L0j#S3}SgL@5~d=G~VBWLi0v|%wDd&J~h5W+->~g-j|?r z9`JR71>K8Udiq4mCh^+#k)D@HKdcRL=^?-DfxiN3{Zek3$un7+u2jlOP(JBWMmpaT zJx#e|()tE$z6O{B%mXd}5Yy7&c_;ce!2KrR|70kt?pYX>X?RC6Oj-dK0n)F3O|DW& z%I|~x6I1UJ70q*WOT5wC)_rNkleaaz2ww$v5%3z|7T{yu6&hRKQQJF6-nYQ`Ui#8G z@*vc^CGDzhyIpN+{rOXf?CfAgu5XHVWQs2ebJsN%eKy?IBJYsz!CIaM2JKucZH7Z$ zj#Usu#7KCIEsDi(Q@NbOHC-ya8{VoK2cdRTu-0_`Hpn{!u9i$YTuNTY&+5VG;PA-t zSnu$_K(zPK{^6n6P|sjgF1y^S2oytf3@7Pep{qG*@*fauEa+si$wVw2pR%~DlPuB3 zSYRyPZ+$WU0a1=Jr_3v}n)l6VwMvdn(#d*uUrw20jylg~bw``iP3HJc89CilP3=qS zHRU=y`KJT>0~x^MAc~>%jIt z{su~FjLM^%u3b^lzPmoGjrm5RJr5lp^N)OXXsCba(3sIfT{;>S54U&s(>xp`E8Yb7 zF<_<7tE1}?=bi02C^dJGH@iWk1gd6xU=tlrY z2;3rPg46RPnI3obflOkOKMK)NzyM$xN+I0a$;6bEcP26>KM8>eI0g7B;2K~DK=%CG zpcVjK0PN`1^vwfvxxjn2ZX27uQTU9IA!eciq-<(v3s@uz)N zyTZf`x4QmV%*-Un5~rH$Pi4l(k&l|O`P(o)D7Nko)5UgZe;47!{g>HoVRRiiBpY7~ q4LJuu(%a6`wmZJGi z*DtrW6T?%yh;!aC?Xz~?HP7z$Du2=6Qd?W0yB*LC=mGSK?+1G}Z38KxP5dGFIW|xk z4eeHqPO!T~I=r227gxfM8GFFkTlrIXkgdL_A*qamO);9s^8mR65a++6HEn@#&4SKQ`=VGorb1 zU{4I9$JfjK#esK};1c~DgCL}YStP)ZlH(TfY~vl)BAlk{gw4%62A&{=sOD+OtmEkk zKJR!&<}3Kt22MKsggDiFk#&ds&j66(Xjrb=(^Cb<^$c#$lL_v30Ey#uj_a&i zG@NA07Pq(T>_WCzm{(_-$!x-LOmnr)Fd7qD%j=UfFs<_zaBKq4NZAR?=KM<#1pG`F zAqB(JGWK~-PrG)mHVxvhEmx;INFLV`X-@UYV6w`QWF7!*@HFVB01)ylAP2AjHO+uw zvhqRegxY%wv_?QC;eMlRWg4-syzkJ4a zT>dn1Jzb_dC`l(hpXZkNb9-<35n@yWi*It=gnPy@lLgAh7-XmowYq(d3Sj5KkB0oJ zXo+IlGssgS=kUwWmFZ&K)6Vd$r*qpWZj3%fYVXs}!LAImP~~FBX13`$Oiln!i))>| zO&c~uhxkqBR2ftKCK)5j3uFrk%i(d*hGoluk3!{c_i(IjwT1W#5PcPZNIi{$vKa3zoshGyVa}_7Zvqwp3joBl+IaqnejVH!fF8iTS14NVUzwB3@Q-9@4*@O# zq}^c6TBVYd4?}*>r5_N@eM@vp-0B-TSiWn?-x_{|Z-9Ff@Frjd@ITE}y8Ay=hjG;$ zrcM7aed!z-gZj9nJ#}cr({k(2pUS|_-K;Ji$kC3>@|(iiwaAWKjSlrsVd9SfI1Omi zo^$QI6^;07+zP>r- zi<}=U88ov?s?;igr5{-!GhJK>OjX808 zcqAAP%ubF!dQ!YKd~ok!%9BiJA1#o-3wQ)@H|Kl;LliQ)+N95jy(0tdp5Y(V{xMQh zKH-k^_1{nWpEi;=6nIru^zsNp}7XM!CnI` z-vBDV4H}GSCqE8)7BENP)wwB7&w_M*-qWWF$qfH2L?-}v=5Pl}5tJdF%-Ll(Rj_yx z0vnJ5yaM<-U?)Hp`8%M#3pfD4PO9dE%Y@66{h9<`{puqoX=k%*4;h(}Jf^h)@QS&j zD57QG!9&}cHp{UWGR<5foi|N7mSZtm^Cw6vI_L#W(<&s%(in}sV74$nkM*eqhrdr8 zen`BsFG?58?S1X5cF7S?XbbF(tTyT 0: + for result in response['data']['result_list']: + if 'memory_info' in result and result['memory_info']: + memories.append({ + 'memory_info': result['memory_info'], + 'score': result['score'] + }) + + if memories: + if retry_attempt > 0: + print("重试后搜索成功。") + print(f"找到 {len(memories)} 条相关记忆:") + for i, memory in enumerate(memories, 1): + print( + f" {i}. (相关度: {memory['score']:.3f}): {json.dumps(memory['memory_info'], ensure_ascii=False, indent=2)}") + else: + print("未找到相关记忆。") + return memories + + except Exception as e: + error_message = str(e) + if "1000023" in error_message: + retry_attempt += 1 + print(f"记忆索引正在构建中。将在60秒后重试... (尝试次数 {retry_attempt})") + time.sleep(60) + else: + print(f"搜索记忆时出错 (不可重试): {e}") + return [] + + +def handle_conversation_turn(memory_service, llm_client, collection_name, user_id, user_message, conversation_history): + """处理一轮对话,包括记忆搜索和LLM响应。""" + print("\n" + "=" * 60) + print(f"用户: {user_message}") + + relevant_memories = search_relevant_memories(memory_service, collection_name, user_id, user_message) + + system_prompt = "你是一个富有同情心、善于倾听的AI伙伴,拥有长期记忆能力。你的目标是为用户提供情感支持和温暖的陪伴。" + if relevant_memories: + memory_context = "\n".join( + [f"- {json.dumps(mem['memory_info'], ensure_ascii=False)}" for mem in relevant_memories]) + system_prompt += f"\n\n这是我们过去的一些对话记忆,请参考:\n{memory_context}\n\n请利用这些信息来更好地理解和回应用户。" + + print("AI正在思考...") + + try: + messages = [{"role": "system", "content": system_prompt}] + conversation_history + [ + {"role": "user", "content": user_message}] + completion = llm_client.chat.completions.create( + model="doubao-seed-1-6-flash-250715", + messages=messages + ) + assistant_reply = completion.choices[0].message.content + except Exception as e: + print(f"LLM调用失败: {e}") + assistant_reply = "抱歉,我现在有点混乱,无法回应。我们可以稍后再聊吗?" + + print(f"伙伴: {assistant_reply}") + + conversation_history.extend([ + {"role": "user", "content": user_message}, + {"role": "assistant", "content": assistant_reply} + ]) + return assistant_reply + + +def archive_conversation(memory_service, collection_name, user_id, assistant_id, conversation_history, topic_name): + """将对话历史归档到记忆数据库。""" + if not conversation_history: + print("没有对话可以归档。") + return False + + print(f"\n正在归档关于 '{topic_name}' 的对话...") + session_id = f"{topic_name}_{int(time.time())}" + metadata = { + "default_user_id": user_id, + "default_assistant_id": assistant_id, + "time": int(time.time() * 1000) + } + + try: + memory_service.add_session( + collection_name=collection_name, + session_id=session_id, + messages=conversation_history, + metadata=metadata + ) + print(f"对话已成功归档,会话ID: {session_id}") + print("正在等待记忆索引更新...") + return True + except Exception as e: + print(f"归档对话失败: {e}") + return False + + +def main(): + print("开始端到端记忆测试...") + + try: + memory_service, llm_client = initialize_services() + collection_name = "emotional_support" + user_id = "xiaoming" + assistant_id = "assistant" + ensure_collection_exists(memory_service, collection_name) + except Exception as e: + print(f"初始化失败: {e}") + return + + print("\n--- 阶段 1: 初始对话 ---") + initial_conversation_history = [] + handle_conversation_turn( + memory_service, llm_client, collection_name, user_id, + "你好,我是小明,今年18岁,但压力好大。", + initial_conversation_history + ) + handle_conversation_turn( + memory_service, llm_client, collection_name, user_id, + "马上就要高考了,家里人的期待好高。", + initial_conversation_history + ) + + print("\n--- 阶段 2: 归档记忆 ---") + archive_conversation( + memory_service, collection_name, user_id, assistant_id, + initial_conversation_history, "study_stress_discussion" + ) + + print("\n--- 阶段 3: 验证记忆 ---") + verification_conversation_history = [] + handle_conversation_turn( + memory_service, llm_client, collection_name, user_id, + "我最近很焦虑,不知道该怎么办。", + verification_conversation_history + ) + + print("\n端到端记忆测试完成!") + + +if __name__ == "__main__": + main() \ No newline at end of file