diff --git a/dsRag/ElasticSearch/T6_XiangLiangQuery.py b/dsRag/ElasticSearch/T6_XiangLiangQuery.py index 07a27b4e..4d6a8466 100644 --- a/dsRag/ElasticSearch/T6_XiangLiangQuery.py +++ b/dsRag/ElasticSearch/T6_XiangLiangQuery.py @@ -35,23 +35,11 @@ def init_es_pool(): return es_pool -# 将文本转换为嵌入向量 -def text_to_embedding(text): - words = jieba.lcut(text) # 使用 jieba 分词 - print(f"文本: {text}, 分词结果: {words}") - try: - embeddings = [model[word] for word in words if word in model] - logger.info(f"有效词向量数量: {len(embeddings)}") - if embeddings: - avg_embedding = sum(embeddings) / len(embeddings) - logger.info(f"生成的平均向量: {avg_embedding[:5]}...") # 打印前 5 维 - return avg_embedding - else: - logger.warning("未找到有效词,返回零向量") - return [0.0] * model.vector_size - except Exception as e: - logger.error(f"向量转换失败: {str(e)}") - return [0.0] * model.vector_size +# 导入EsSearchUtil +from Util.EsSearchUtil import EsSearchUtil + +# 初始化EsSearchUtil +es_search_util = EsSearchUtil(ES_CONFIG) def main(): @@ -60,6 +48,7 @@ def main(): # 测试查询 query = "小学数学中有哪些模型" + query_tags = ["MATH_1"] # 默认搜索标签,可修改 print(f"\n=== 开始执行查询 ===") print(f"原始查询文本: {query}") @@ -69,7 +58,7 @@ def main(): # 向量搜索 print("\n=== 向量搜索阶段 ===") print("1. 文本分词和向量化处理中...") - query_embedding = text_to_embedding(query) + query_embedding = es_search_util.text_to_embedding(query) print(f"2. 生成的查询向量维度: {len(query_embedding)}") print(f"3. 前5维向量值: {query_embedding[:5]}") @@ -79,7 +68,18 @@ def main(): body={ "query": { "script_score": { - "query": {"match_all": {}}, + "query": { + "bool": { + "should": [ + { + "terms": { + "tags.tags": query_tags + } + } + ], + "minimum_should_match": 1 + } + }, "script": { "source": "double score = cosineSimilarity(params.query_vector, 'embedding'); return score >= 0 ? score : 0", "params": {"query_vector": query_embedding} @@ -98,8 +98,19 @@ def main(): index=ES_CONFIG['index_name'], body={ "query": { - "match": { - "user_input": query + "bool": { + "must": [ + { + "match": { + "user_input": query + } + }, + { + "terms": { + "tags.tags": query_tags + } + } + ] } }, "size": 5 diff --git a/dsRag/Util/EsSearchUtil.py b/dsRag/Util/EsSearchUtil.py index 5be19c8a..c0e436df 100644 --- a/dsRag/Util/EsSearchUtil.py +++ b/dsRag/Util/EsSearchUtil.py @@ -11,7 +11,7 @@ class EsSearchUtil: self.es_config = es_config self.es_conn = Elasticsearch( hosts=es_config['hosts'], - basic_auth=(es_config['username'], es_config['password']), + basic_auth=es_config['basic_auth'], verify_certs=False ) diff --git a/dsRag/Util/__pycache__/EsSearchUtil.cpython-310.pyc b/dsRag/Util/__pycache__/EsSearchUtil.cpython-310.pyc new file mode 100644 index 00000000..b0a8cb27 Binary files /dev/null and b/dsRag/Util/__pycache__/EsSearchUtil.cpython-310.pyc differ