'commit'
This commit is contained in:
@@ -1,12 +1,12 @@
|
||||
import warnings
|
||||
import os
|
||||
import time
|
||||
import warnings
|
||||
|
||||
from Config import Config
|
||||
from elasticsearch import Elasticsearch
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from Config import Config
|
||||
from Util.VectorUtil import text_to_vector_db # 导入向量化工具函数
|
||||
from langchain_openai import OpenAIEmbeddings # 直接导入嵌入模型
|
||||
from pydantic import SecretStr # 用于包装API密钥
|
||||
|
||||
# 抑制HTTPS相关警告
|
||||
warnings.filterwarnings('ignore', message='Connecting to .* using TLS with verify_certs=False is insecure')
|
||||
@@ -60,8 +60,7 @@ def insert_long_text_to_es(long_text: str, tags: list = None) -> bool:
|
||||
es.indices.create(index=index_name, body=mapping)
|
||||
print(f"索引 '{index_name}' 创建成功")
|
||||
|
||||
# 3. 使用VectorUtil处理文本
|
||||
# 这里我们创建一个临时的Document对象
|
||||
# 3. 创建文档对象
|
||||
docs = [Document(page_content=long_text, metadata={"source": "user_provided_text"})]
|
||||
|
||||
# 4. 获取当前时间
|
||||
@@ -72,14 +71,15 @@ def insert_long_text_to_es(long_text: str, tags: list = None) -> bool:
|
||||
tags = ["general_text"]
|
||||
tags_dict = {"tags": tags, "full_content": long_text}
|
||||
|
||||
# 6. 使用VectorUtil中的功能获取嵌入向量
|
||||
# 注意:我们需要修改text_to_vector_db函数或创建新函数来获取单个文档的嵌入
|
||||
# 这里为了演示,我们直接调用text_to_vector_db并获取第一个文档的嵌入
|
||||
vector_store, _, _ = text_to_vector_db(long_text)
|
||||
# 6. 直接创建嵌入模型并生成向量
|
||||
embeddings = OpenAIEmbeddings(
|
||||
model=Config.EMBED_MODEL_NAME,
|
||||
base_url=Config.EMBED_BASE_URL,
|
||||
api_key=SecretStr(Config.EMBED_API_KEY)
|
||||
)
|
||||
|
||||
# 7. 提取嵌入向量
|
||||
# 注意:这里假设我们只处理一个文档
|
||||
embedding = vector_store._embeddings.embed_documents([long_text])[0]
|
||||
# 7. 生成文本嵌入向量
|
||||
embedding = embeddings.embed_documents([long_text])[0]
|
||||
|
||||
# 8. 插入数据到Elasticsearch
|
||||
doc = {
|
||||
|
Reference in New Issue
Block a user