main
HuangHai 1 month ago
parent a145af2490
commit a6a04529ec

@ -1,7 +1,7 @@
from elasticsearch import Elasticsearch
from Config.Config import ES_CONFIG
from Util.EsMappingUtil import create_vector_index, delete_index # 导入工具函数
from Util.EsMappingUtil import create_vector_index, delete_index, create_text_index, delete_text_index
# 初始化ES连接
es = Elasticsearch(
@ -11,22 +11,37 @@ es = Elasticsearch(
ssl_show_warn=ES_CONFIG["ssl_show_warn"]
)
def manage_index(action, index_name="knowledge_base", dims=200):
def manage_index(action, index_type="vector", index_name=None, dims=200):
"""管理Elasticsearch索引
:param action: 'create''delete'
:param index_name: 索引名称
:param dims: 向量维度(仅创建时有效)
:param index_type: 'vector''text'
:param index_name: 索引名称(默认根据类型自动生成)
:param dims: 向量维度(仅向量索引有效)
"""
if index_name is None:
index_name = "knowledge_base" if index_type == "vector" else "raw_texts"
if action == "create":
return create_vector_index(index_name, dims)
if index_type == "vector":
return create_vector_index(index_name, dims)
else:
return create_text_index(index_name)
elif action == "delete":
return delete_index(index_name)
if index_type == "vector":
return delete_index(index_name)
else:
return delete_text_index(index_name)
else:
raise ValueError("action参数必须是'create''delete'")
# 使用示例
if __name__ == "__main__":
# 删除索引
manage_index("delete")
# 创建索引
manage_index("create", dims=200)
# 先删除现有索引(如果存在)
manage_index("delete", "vector")
manage_index("delete", "text")
# 创建新的向量索引
manage_index("create", "vector", dims=200)
# 创建新的原始文本索引
manage_index("create", "text")

@ -1,10 +1,9 @@
import datetime
from datetime import datetime
from elasticsearch import Elasticsearch
from Config.Config import ES_CONFIG
from T2_Txt2Vec import text_to_embedding
from Util.EsMappingUtil import create_vector_index # 导入工具函数
from Util.EsMappingUtil import create_vector_index
# 初始化ES连接
es = Elasticsearch(
@ -14,7 +13,54 @@ es = Elasticsearch(
ssl_show_warn=ES_CONFIG["ssl_show_warn"]
)
def save_vector(text, index_name="knowledge_base"):
"""将文本向量化后保存到ES"""
try:
# 向量化文本
vector = text_to_embedding(text)
# 准备文档
doc = {
"text": text,
"vector": vector,
"timestamp": datetime.now()
}
# 保存到ES
res = es.index(index=index_name, document=doc)
print(f"向量化文档已保存ID: {res['_id']}")
return res['_id']
except Exception as e:
print(f"保存失败: {str(e)}")
raise
def save_raw_text(text, index_name="raw_texts"):
"""保存原始文本到ES"""
try:
# 准备文档
doc = {
"text": text,
"timestamp": datetime.now()
}
# 保存到ES
res = es.index(index=index_name, document=doc)
print(f"原始文本已保存ID: {res['_id']}")
return res['_id']
except Exception as e:
print(f"保存失败: {str(e)}")
raise
# 使用示例
if __name__ == "__main__":
create_vector_index(dims=200) # 使用工具函数创建索引
# 创建向量索引
create_vector_index(dims=200)
# 示例文本
sample_text = "如何更换支付宝绑定银行卡"
# 保存向量化文档
save_vector(sample_text)
# 保存原始文本
save_raw_text(sample_text)

@ -61,4 +61,49 @@ def delete_index(index_name):
return False
except Exception as e:
print(f"删除索引失败: {str(e)}")
raise
def create_text_index(index_name="raw_texts"):
"""创建原始文本索引"""
mapping = {
"mappings": {
"properties": {
"text": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"timestamp": {
"type": "date"
}
}
}
}
try:
if not es.indices.exists(index=index_name):
es.indices.create(index=index_name, body=mapping)
print(f"原始文本索引 {index_name} 创建成功")
return True
else:
print(f"原始文本索引 {index_name} 已存在")
return False
except Exception as e:
print(f"创建原始文本索引失败: {str(e)}")
raise
def delete_text_index(index_name="raw_texts"):
"""删除原始文本索引"""
try:
if es.indices.exists(index=index_name):
es.indices.delete(index=index_name)
print(f"原始文本索引 {index_name} 删除成功")
return True
else:
print(f"原始文本索引 {index_name} 不存在")
return False
except Exception as e:
print(f"删除原始文本索引失败: {str(e)}")
raise
Loading…
Cancel
Save