main
HuangHai 1 month ago
parent bd28335d5d
commit e9de960359

@ -4,6 +4,7 @@ from elasticsearch import Elasticsearch
from Config.Config import ES_CONFIG
from T2_Txt2Vec import text_to_embedding
from Util.EsMappingUtil import create_vector_index # 导入工具函数
# 初始化ES连接
es = Elasticsearch(
@ -13,34 +14,25 @@ es = Elasticsearch(
ssl_show_warn=ES_CONFIG["ssl_show_warn"]
)
def create_vector_index(index_name="knowledge_base"):
"""创建带有向量字段的索引(适配200维腾讯词向量)"""
mapping = {
"mappings": {
"properties": {
"text": {"type": "text", "analyzer": "ik_max_word"},
"vector": {
"type": "dense_vector",
"dims": 200, # 修改为腾讯词向量实际维度
"index": True,
"similarity": "cosine"
},
"timestamp": {"type": "date"}
def save_to_es(text, index_name="knowledge_base"):
"""将文本向量化后保存到ES"""
# 检查是否已存在相同文本
query = {
"query": {
"term": {
"text.keyword": {
"value": text
}
}
}
}
try:
if es.indices.exists(index=index_name):
es.indices.delete(index=index_name)
es.indices.create(index=index_name, body=mapping)
print(f"索引 {index_name} 创建成功(200维)")
except Exception as e:
print(f"创建索引失败: {str(e)}")
raise
def save_to_es(text, index_name="knowledge_base"):
"""将文本向量化后保存到ES"""
exists = es.search(index=index_name, body=query)
if exists["hits"]["total"]["value"] > 0:
print(f"文档已存在,跳过保存: {text}")
return exists["hits"]["hits"][0]["_id"] # 返回现有文档ID
# 保存新文档
vector = text_to_embedding(text)
doc = {
"text": text,
@ -51,12 +43,12 @@ def save_to_es(text, index_name="knowledge_base"):
try:
res = es.index(index=index_name, document=doc)
print(f"文档已保存ID: {res['_id']}")
return res
return res["_id"]
except Exception as e:
print(f"保存到ES失败: {str(e)}")
raise
# 使用示例
if __name__ == "__main__":
create_vector_index() # 首次运行前执行
create_vector_index(dims=200) # 使用工具函数创建索引
save_to_es("如何更换支付宝绑定银行卡")

@ -0,0 +1,49 @@
from elasticsearch import Elasticsearch
from Config.Config import ES_CONFIG
es = Elasticsearch(
hosts=ES_CONFIG["hosts"],
basic_auth=ES_CONFIG["basic_auth"],
verify_certs=ES_CONFIG["verify_certs"],
ssl_show_warn=ES_CONFIG["ssl_show_warn"]
)
def get_vector_mapping(dims=200):
"""获取向量索引的mapping结构"""
return {
"mappings": {
"properties": {
"text": {"type": "text", "analyzer": "ik_max_word"},
"vector": {
"type": "dense_vector",
"dims": dims,
"index": True,
"similarity": "cosine"
},
"timestamp": {"type": "date"}
}
}
}
def create_vector_index(index_name="knowledge_base", dims=200):
"""创建带有向量字段的索引"""
mapping = get_vector_mapping(dims)
try:
if es.indices.exists(index=index_name):
current_mapping = es.indices.get_mapping(index=index_name)
current_dims = current_mapping[index_name]["mappings"]["properties"]["vector"].get("dims", 0)
if current_dims == dims:
print(f"索引 {index_name} 已存在且维度正确({dims}维),无需操作")
return True
else:
print(f"警告:索引 {index_name} 已存在但维度不匹配(当前:{current_dims}维,需要:{dims}维)")
return False
es.indices.create(index=index_name, body=mapping)
print(f"索引 {index_name} 创建成功({dims}维)")
return True
except Exception as e:
print(f"操作索引失败: {str(e)}")
raise
Loading…
Cancel
Save