'commit'
This commit is contained in:
@@ -1,35 +1,16 @@
|
|||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from elasticsearch import Elasticsearch
|
|
||||||
|
|
||||||
from Config import Config
|
from Config import Config
|
||||||
|
from ElasticSearch.Utils.EsSearchUtil import EsSearchUtil
|
||||||
|
|
||||||
# 抑制HTTPS相关警告
|
# 创建EsSearchUtil实例
|
||||||
warnings.filterwarnings('ignore', message='Connecting to .* using TLS with verify_certs=False is insecure')
|
search_util = EsSearchUtil(Config.ES_CONFIG)
|
||||||
warnings.filterwarnings('ignore', message='Unverified HTTPS request is being made to host')
|
|
||||||
|
|
||||||
|
|
||||||
# 初始化Elasticsearch连接
|
|
||||||
es = Elasticsearch(
|
|
||||||
hosts=Config.ES_CONFIG['hosts'],
|
|
||||||
basic_auth=Config.ES_CONFIG['basic_auth'],
|
|
||||||
verify_certs=False
|
|
||||||
)
|
|
||||||
|
|
||||||
# 查询所有数据
|
# 查询所有数据
|
||||||
def select_all_data(index_name):
|
def select_all_data(index_name):
|
||||||
try:
|
try:
|
||||||
# 构建查询条件 - 匹配所有文档
|
# 调用EsSearchUtil中的select_all_data方法
|
||||||
# 修改查询条件为获取前10条数据
|
response = search_util.select_all_data()
|
||||||
query = {
|
|
||||||
"query": {
|
|
||||||
"match_all": {}
|
|
||||||
},
|
|
||||||
"size": 1000 # 仅获取10条数据
|
|
||||||
}
|
|
||||||
|
|
||||||
# 执行查询
|
|
||||||
response = es.search(index=index_name, body=query)
|
|
||||||
hits = response['hits']['hits']
|
hits = response['hits']['hits']
|
||||||
|
|
||||||
if not hits:
|
if not hits:
|
||||||
|
@@ -108,6 +108,37 @@ class EsSearchUtil:
|
|||||||
# 释放连接回连接池
|
# 释放连接回连接池
|
||||||
self.es_pool.release_connection(conn)
|
self.es_pool.release_connection(conn)
|
||||||
|
|
||||||
|
def select_all_data(self, size=1000):
|
||||||
|
"""
|
||||||
|
查询索引中的所有数据
|
||||||
|
|
||||||
|
参数:
|
||||||
|
size: 返回的最大结果数量,默认1000
|
||||||
|
|
||||||
|
返回:
|
||||||
|
dict: 查询结果
|
||||||
|
"""
|
||||||
|
# 从连接池获取连接
|
||||||
|
conn = self.es_pool.get_connection()
|
||||||
|
try:
|
||||||
|
# 构建查询条件 - 匹配所有文档
|
||||||
|
query = {
|
||||||
|
"query": {
|
||||||
|
"match_all": {}
|
||||||
|
},
|
||||||
|
"size": size
|
||||||
|
}
|
||||||
|
|
||||||
|
# 执行查询
|
||||||
|
response = conn.search(index=self.es_config['index_name'], body=query)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"查询所有数据失败: {str(e)}")
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
# 释放连接回连接池
|
||||||
|
self.es_pool.release_connection(conn)
|
||||||
|
|
||||||
def split_text_into_chunks(self,text: str, chunk_size: int = 200, chunk_overlap: int = 0) -> list:
|
def split_text_into_chunks(self,text: str, chunk_size: int = 200, chunk_overlap: int = 0) -> list:
|
||||||
"""
|
"""
|
||||||
将文本切割成块
|
将文本切割成块
|
||||||
|
Binary file not shown.
Reference in New Issue
Block a user