diff --git a/dsSchoolBuddy/ElasticSearch/T2_BgeM3.py b/dsSchoolBuddy/ElasticSearch/T2_BgeM3.py index 2d79e644..44799a92 100644 --- a/dsSchoolBuddy/ElasticSearch/T2_BgeM3.py +++ b/dsSchoolBuddy/ElasticSearch/T2_BgeM3.py @@ -1,15 +1,11 @@ -import os - +# pip install pydantic from langchain_core.documents import Document from langchain_core.vectorstores import InMemoryVectorStore from langchain_openai import OpenAIEmbeddings from langchain_text_splitters import RecursiveCharacterTextSplitter - +from pydantic import SecretStr # 导入 SecretStr from Config.Config import EMBED_MODEL_NAME, EMBED_BASE_URL, EMBED_API_KEY -# 设置环境变量 -os.environ["OPENAI_BASE_URL"] = EMBED_BASE_URL -os.environ["OPENAI_API_KEY"] = EMBED_API_KEY # 模拟长字符串文档内容 long_text = """混凝土是一种广泛使用的建筑材料,由水泥、砂、石子和水混合而成。它具有高强度、耐久性和良好的可塑性,被广泛应用于建筑、桥梁、道路等土木工程领域。 @@ -37,7 +33,11 @@ all_splits = text_splitter.split_documents(docs) print(f"切割后的文档块数量:{len(all_splits)}") # 嵌入模型 -embeddings = OpenAIEmbeddings(model=EMBED_MODEL_NAME) +embeddings = OpenAIEmbeddings( + model=EMBED_MODEL_NAME, + base_url=EMBED_BASE_URL, + api_key=SecretStr(EMBED_API_KEY) # 包装成 SecretStr 类型 +) # 向量存储 vector_store = InMemoryVectorStore(embeddings)