You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

62 lines
1.9 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import datetime
from elasticsearch import Elasticsearch
from Config.Config import ES_CONFIG
from T2_Txt2Vec import text_to_embedding
# 初始化ES连接
es = Elasticsearch(
hosts=ES_CONFIG["hosts"],
basic_auth=ES_CONFIG["basic_auth"],
verify_certs=ES_CONFIG["verify_certs"],
ssl_show_warn=ES_CONFIG["ssl_show_warn"]
)
def create_vector_index(index_name="knowledge_base"):
"""创建带有向量字段的索引(适配200维腾讯词向量)"""
mapping = {
"mappings": {
"properties": {
"text": {"type": "text", "analyzer": "ik_max_word"},
"vector": {
"type": "dense_vector",
"dims": 200, # 修改为腾讯词向量实际维度
"index": True,
"similarity": "cosine"
},
"timestamp": {"type": "date"}
}
}
}
try:
if es.indices.exists(index=index_name):
es.indices.delete(index=index_name)
es.indices.create(index=index_name, body=mapping)
print(f"索引 {index_name} 创建成功(200维)")
except Exception as e:
print(f"创建索引失败: {str(e)}")
raise
def save_to_es(text, index_name="knowledge_base"):
"""将文本向量化后保存到ES"""
vector = text_to_embedding(text)
doc = {
"text": text,
"vector": vector,
"timestamp": datetime.datetime.now().isoformat()
}
try:
res = es.index(index=index_name, document=doc)
print(f"文档已保存ID: {res['_id']}")
return res
except Exception as e:
print(f"保存到ES失败: {str(e)}")
raise
# 使用示例
if __name__ == "__main__":
create_vector_index() # 首次运行前执行
save_to_es("如何更换支付宝绑定银行卡")