|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
from Config.Config import ES_CONFIG
|
|
|
|
|
|
|
|
es = Elasticsearch(
|
|
|
|
hosts=ES_CONFIG["hosts"],
|
|
|
|
basic_auth=ES_CONFIG["basic_auth"],
|
|
|
|
verify_certs=ES_CONFIG["verify_certs"],
|
|
|
|
ssl_show_warn=ES_CONFIG["ssl_show_warn"]
|
|
|
|
)
|
|
|
|
|
|
|
|
def get_vector_mapping(dims=200):
|
|
|
|
"""获取向量索引的mapping结构"""
|
|
|
|
return {
|
|
|
|
"mappings": {
|
|
|
|
"properties": {
|
|
|
|
"text": {"type": "text", "analyzer": "ik_max_word"},
|
|
|
|
"vector": {
|
|
|
|
"type": "dense_vector",
|
|
|
|
"dims": dims,
|
|
|
|
"index": True,
|
|
|
|
"similarity": "cosine"
|
|
|
|
},
|
|
|
|
"timestamp": {"type": "date"}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
def create_vector_index(index_name="knowledge_base", dims=200):
|
|
|
|
"""创建带有向量字段的索引"""
|
|
|
|
mapping = get_vector_mapping(dims)
|
|
|
|
|
|
|
|
try:
|
|
|
|
if es.indices.exists(index=index_name):
|
|
|
|
current_mapping = es.indices.get_mapping(index=index_name)
|
|
|
|
current_dims = current_mapping[index_name]["mappings"]["properties"]["vector"].get("dims", 0)
|
|
|
|
|
|
|
|
if current_dims == dims:
|
|
|
|
print(f"索引 {index_name} 已存在且维度正确({dims}维),无需操作")
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
print(f"警告:索引 {index_name} 已存在但维度不匹配(当前:{current_dims}维,需要:{dims}维)")
|
|
|
|
return False
|
|
|
|
|
|
|
|
es.indices.create(index=index_name, body=mapping)
|
|
|
|
print(f"索引 {index_name} 创建成功({dims}维)")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
|
|
print(f"操作索引失败: {str(e)}")
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
|
|
def delete_index(index_name):
|
|
|
|
"""删除Elasticsearch索引"""
|
|
|
|
try:
|
|
|
|
if es.indices.exists(index=index_name):
|
|
|
|
es.indices.delete(index=index_name)
|
|
|
|
print(f"索引 {index_name} 删除成功")
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
print(f"索引 {index_name} 不存在")
|
|
|
|
return False
|
|
|
|
except Exception as e:
|
|
|
|
print(f"删除索引失败: {str(e)}")
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
|
|
def create_text_index(index_name="raw_texts"):
|
|
|
|
"""创建原始文本索引"""
|
|
|
|
mapping = {
|
|
|
|
"mappings": {
|
|
|
|
"properties": {
|
|
|
|
"text": {
|
|
|
|
"type": "text",
|
|
|
|
"analyzer": "ik_max_word",
|
|
|
|
"search_analyzer": "ik_smart"
|
|
|
|
},
|
|
|
|
"timestamp": {
|
|
|
|
"type": "date"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
try:
|
|
|
|
if not es.indices.exists(index=index_name):
|
|
|
|
es.indices.create(index=index_name, body=mapping)
|
|
|
|
print(f"原始文本索引 {index_name} 创建成功")
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
print(f"原始文本索引 {index_name} 已存在")
|
|
|
|
return False
|
|
|
|
except Exception as e:
|
|
|
|
print(f"创建原始文本索引失败: {str(e)}")
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
|
|
def delete_text_index(index_name="raw_texts"):
|
|
|
|
"""删除原始文本索引"""
|
|
|
|
try:
|
|
|
|
if es.indices.exists(index=index_name):
|
|
|
|
es.indices.delete(index=index_name)
|
|
|
|
print(f"原始文本索引 {index_name} 删除成功")
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
print(f"原始文本索引 {index_name} 不存在")
|
|
|
|
return False
|
|
|
|
except Exception as e:
|
|
|
|
print(f"删除原始文本索引失败: {str(e)}")
|
|
|
|
raise
|