You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

96 lines
2.9 KiB

from elasticsearch import Elasticsearch
from Config.Config import ES_CONFIG
from Util.EsMappingUtil import create_vector_index, delete_index, create_text_index, delete_text_index
# 初始化ES连接
es = Elasticsearch(
hosts=ES_CONFIG["hosts"],
basic_auth=ES_CONFIG["basic_auth"],
verify_certs=ES_CONFIG["verify_certs"],
ssl_show_warn=ES_CONFIG["ssl_show_warn"]
)
def manage_index(action, index_type="vector", index_name=None, dims=200):
"""管理Elasticsearch索引
:param action: 'create''delete'
:param index_type: 'vector''text'
:param index_name: 索引名称(默认根据类型自动生成)
:param dims: 向量维度(仅向量索引有效)
"""
if index_name is None:
index_name = "knowledge_base" if index_type == "vector" else "raw_texts"
if action == "create":
if index_type == "vector":
return create_vector_index(index_name, dims)
else:
return create_text_index(index_name)
elif action == "delete":
if index_type == "vector":
return delete_index(index_name)
else:
return delete_text_index(index_name)
else:
raise ValueError("action参数必须是'create''delete'")
# 使用示例
if __name__ == "__main__":
# 先删除现有索引(如果存在)
manage_index("delete", "vector")
manage_index("delete", "text")
# 创建新的向量索引
manage_index("create", "vector", dims=200)
# 创建新的原始文本索引
manage_index("create", "text")
# 修改knowledge_base索引的mapping
knowledge_base_mapping = {
"properties": {
# 在knowledge_base_mapping中添加
"content": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 8192 # 可以设置为1024/2048等更大值
}
}
},
# 在raw_texts_mapping中添加
"raw_text": {
"type": "text",
"analyzer": "ik_max_word",
"fielddata": True # 允许对长文本进行聚合
},
"vector": {
"type": "dense_vector",
"dims": 200,
"index": True,
"similarity": "cosine"
},
"timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
}
}
}
# 修改raw_texts索引的mapping
raw_texts_mapping = {
"properties": {
"raw_text": {
"type": "text",
"analyzer": "ik_max_word"
},
"timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
}
}
}