Files
dsProject/dsRag/ElasticSearch/T1_RebuildMapping.py
2025-08-14 15:45:08 +08:00

47 lines
1.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import warnings
from elasticsearch import Elasticsearch
from Config import Config
# 抑制HTTPS相关警告
warnings.filterwarnings('ignore', message='Connecting to .* using TLS with verify_certs=False is insecure')
warnings.filterwarnings('ignore', message='Unverified HTTPS request is being made to host')
# 初始化ES连接
es = Elasticsearch(
hosts=Config.ES_CONFIG['hosts'],
basic_auth=Config.ES_CONFIG['basic_auth'],
verify_certs=False
)
# 定义mapping结构
mapping = {
"mappings": {
"properties": {
"embedding": {
"type": "dense_vector",
"dims": 200, # embedding维度为200
"index": True,
"similarity": "l2_norm" # 使用L2距离
},
"user_input": {"type": "text"},
"tags": {
"type": "object",
"properties": {
"tags": {"type": "keyword"},
"full_content": {"type": "text"}
}
}
}
}
}
# 创建索引
index_name = Config.ES_CONFIG['index_name']
if es.indices.exists(index=index_name):
es.indices.delete(index=index_name)
print(f"删除已存在的索引 '{index_name}'")
es.indices.create(index=index_name, body=mapping)
print(f"索引 '{index_name}' 创建成功mapping结构已设置。")