This commit is contained in:
2025-08-19 09:33:09 +08:00
parent 1a7c5944be
commit 1044d5cf48
3 changed files with 36 additions and 24 deletions

View File

@@ -1,35 +1,16 @@
import warnings import warnings
from elasticsearch import Elasticsearch
from Config import Config from Config import Config
from ElasticSearch.Utils.EsSearchUtil import EsSearchUtil
# 抑制HTTPS相关警告 # 创建EsSearchUtil实例
warnings.filterwarnings('ignore', message='Connecting to .* using TLS with verify_certs=False is insecure') search_util = EsSearchUtil(Config.ES_CONFIG)
warnings.filterwarnings('ignore', message='Unverified HTTPS request is being made to host')
# 初始化Elasticsearch连接
es = Elasticsearch(
hosts=Config.ES_CONFIG['hosts'],
basic_auth=Config.ES_CONFIG['basic_auth'],
verify_certs=False
)
# 查询所有数据 # 查询所有数据
def select_all_data(index_name): def select_all_data(index_name):
try: try:
# 构建查询条件 - 匹配所有文档 # 调用EsSearchUtil中的select_all_data方法
# 修改查询条件为获取前10条数据 response = search_util.select_all_data()
query = {
"query": {
"match_all": {}
},
"size": 1000 # 仅获取10条数据
}
# 执行查询
response = es.search(index=index_name, body=query)
hits = response['hits']['hits'] hits = response['hits']['hits']
if not hits: if not hits:

View File

@@ -108,6 +108,37 @@ class EsSearchUtil:
# 释放连接回连接池 # 释放连接回连接池
self.es_pool.release_connection(conn) self.es_pool.release_connection(conn)
def select_all_data(self, size=1000):
"""
查询索引中的所有数据
参数:
size: 返回的最大结果数量默认1000
返回:
dict: 查询结果
"""
# 从连接池获取连接
conn = self.es_pool.get_connection()
try:
# 构建查询条件 - 匹配所有文档
query = {
"query": {
"match_all": {}
},
"size": size
}
# 执行查询
response = conn.search(index=self.es_config['index_name'], body=query)
return response
except Exception as e:
logger.error(f"查询所有数据失败: {str(e)}")
raise
finally:
# 释放连接回连接池
self.es_pool.release_connection(conn)
def split_text_into_chunks(self,text: str, chunk_size: int = 200, chunk_overlap: int = 0) -> list: def split_text_into_chunks(self,text: str, chunk_size: int = 200, chunk_overlap: int = 0) -> list:
""" """
将文本切割成块 将文本切割成块