From 5e08b8c53e804036b0ec66abc3a3e5202ef4605a Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Fri, 27 Jun 2025 14:35:03 +0800 Subject: [PATCH] 'commit' --- dsRag/ElasticSearch/T6_XiangLiangQuery.py | 53 +++++++++++------- dsRag/Util/EsSearchUtil.py | 2 +- .../__pycache__/EsSearchUtil.cpython-310.pyc | Bin 0 -> 2627 bytes 3 files changed, 33 insertions(+), 22 deletions(-) create mode 100644 dsRag/Util/__pycache__/EsSearchUtil.cpython-310.pyc diff --git a/dsRag/ElasticSearch/T6_XiangLiangQuery.py b/dsRag/ElasticSearch/T6_XiangLiangQuery.py index 07a27b4e..4d6a8466 100644 --- a/dsRag/ElasticSearch/T6_XiangLiangQuery.py +++ b/dsRag/ElasticSearch/T6_XiangLiangQuery.py @@ -35,23 +35,11 @@ def init_es_pool(): return es_pool -# 将文本转换为嵌入向量 -def text_to_embedding(text): - words = jieba.lcut(text) # 使用 jieba 分词 - print(f"文本: {text}, 分词结果: {words}") - try: - embeddings = [model[word] for word in words if word in model] - logger.info(f"有效词向量数量: {len(embeddings)}") - if embeddings: - avg_embedding = sum(embeddings) / len(embeddings) - logger.info(f"生成的平均向量: {avg_embedding[:5]}...") # 打印前 5 维 - return avg_embedding - else: - logger.warning("未找到有效词,返回零向量") - return [0.0] * model.vector_size - except Exception as e: - logger.error(f"向量转换失败: {str(e)}") - return [0.0] * model.vector_size +# 导入EsSearchUtil +from Util.EsSearchUtil import EsSearchUtil + +# 初始化EsSearchUtil +es_search_util = EsSearchUtil(ES_CONFIG) def main(): @@ -60,6 +48,7 @@ def main(): # 测试查询 query = "小学数学中有哪些模型" + query_tags = ["MATH_1"] # 默认搜索标签,可修改 print(f"\n=== 开始执行查询 ===") print(f"原始查询文本: {query}") @@ -69,7 +58,7 @@ def main(): # 向量搜索 print("\n=== 向量搜索阶段 ===") print("1. 文本分词和向量化处理中...") - query_embedding = text_to_embedding(query) + query_embedding = es_search_util.text_to_embedding(query) print(f"2. 生成的查询向量维度: {len(query_embedding)}") print(f"3. 前5维向量值: {query_embedding[:5]}") @@ -79,7 +68,18 @@ def main(): body={ "query": { "script_score": { - "query": {"match_all": {}}, + "query": { + "bool": { + "should": [ + { + "terms": { + "tags.tags": query_tags + } + } + ], + "minimum_should_match": 1 + } + }, "script": { "source": "double score = cosineSimilarity(params.query_vector, 'embedding'); return score >= 0 ? score : 0", "params": {"query_vector": query_embedding} @@ -98,8 +98,19 @@ def main(): index=ES_CONFIG['index_name'], body={ "query": { - "match": { - "user_input": query + "bool": { + "must": [ + { + "match": { + "user_input": query + } + }, + { + "terms": { + "tags.tags": query_tags + } + } + ] } }, "size": 5 diff --git a/dsRag/Util/EsSearchUtil.py b/dsRag/Util/EsSearchUtil.py index 5be19c8a..c0e436df 100644 --- a/dsRag/Util/EsSearchUtil.py +++ b/dsRag/Util/EsSearchUtil.py @@ -11,7 +11,7 @@ class EsSearchUtil: self.es_config = es_config self.es_conn = Elasticsearch( hosts=es_config['hosts'], - basic_auth=(es_config['username'], es_config['password']), + basic_auth=es_config['basic_auth'], verify_certs=False ) diff --git a/dsRag/Util/__pycache__/EsSearchUtil.cpython-310.pyc b/dsRag/Util/__pycache__/EsSearchUtil.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b0a8cb27fee52a7b6c3b705436dac01b565a8512 GIT binary patch literal 2627 zcmb7G&2Jk;6yKTsu;Z-LG!$qnQC1-2nnqC%oLt%>(ZVf4R0zqcST;M`W>eei&g_;t zawQ}ZHB}QD1QY?GO$t?2LHUr7QmI88LE=A{3kPbuIrWaz!kZa;W2YBbYiHlQdGqn! z@Auv~%I9+$Jm(L*GyTVqru`*4{f`cvSKwt6&=8HFidIEsgoKPMxQyjjFB`9GL?_0a zMvM<|*(CUwRx~$Yj#flGf3)JUIP_WIQGa^U7wY~e-Q);p1FejR1`8vC=d`k}mMt4( zlvpG)hhXthk|jeRSs>-eFi07)o#e>~NLjKCwjSg5QFcsj_f{NMeB8I4kWF}Bg_k`C zO%pXWSBp^-5wr_>1n-j0Ea{-f#Dw*%~$9tdZcXE3}_({xhte@Qt%LTn%XK558TrQ?L#d@g7!@vzUB%h)(Q^MIisn~pcxFUPBK7L zqlrm&Nw_~W$!+G-upYb2k7!UHs(P_M?Ru5U?7JjNrYeCW2b_bBAF;3&91E*q#iL=| z*eeOjCQc^-ZMZ4~<4z%Uwop9a&>&7|t-HjbgU){EaJN%(_H(;`DiFwGQ9}KITWV#{ zn9>3UOcoT0jq{gOcVGMR`S!|py|K+Q)~%HiyON{E+gGpO z|KXaNk#5m76l5tm_kaDQ_2re$osZjBR@$Hb2-|GhDdLtKRafub{-$$dNv*5wJ73JH z+M7-RwvKdhrU6STG2^xjR<~Eg^ab#i?uF(tK2i~Q`l4t=^!tiPe!4M5L(-+*ei)Sa z*clsP9q@Uyg=z!l+)`a%Ak%wl`IFzUwqr#r1H7l92072yKZ84=rp z(_Xmj6qJ*N2X`N1#D536AKdil^eOO>?tlieC`FI%6AE<^sOmGUvPYg-)|V|P%4|Fg zvm|!etPR;^BOA{?F_2qomB}KaJH-x|j>AXEL*i}}x3}x^oaN!6vh$BmdYYOq3>D?1IMdzjoy6ivR!s literal 0 HcmV?d00001