diff --git a/dsRag/Config/Config.py b/dsRag/Config/Config.py index d5365f11..2588e16a 100644 --- a/dsRag/Config/Config.py +++ b/dsRag/Config/Config.py @@ -1,32 +1,38 @@ -# MYSQL配置信息 -MYSQL_HOST = "10.10.14.210" -MYSQL_PORT = 22066 -MYSQL_USER = "root" -MYSQL_PASSWORD = "DsideaL147258369" -MYSQL_DB_NAME = "base_db" - -# Milvus 服务器的主机地址 -MS_HOST = "10.10.14.207" -# Milvus 服务器的端口号 -MS_PORT = "19530" -# Milvus 集合的名称 -MS_COLLECTION_NAME = "ds_collection" -# Milvus 连接池的最大连接数 -MS_MAX_CONNECTIONS = 50 -# 腾讯 AI Lab 中文词向量模型的路径 -MS_MODEL_PATH = "D:/Tencent_AILab_ChineseEmbedding/Tencent_AILab_ChineseEmbedding.txt" -# 加载词向量模型时限制的词汇数量 -MS_MODEL_LIMIT = 10000 -# 词向量的维度(腾讯 AI Lab 中文词向量模型的维度为 200) -MS_DIMENSION = 200 -# Milvus 搜索时的 nprobe 参数,用于控制搜索的精度和性能 -MS_NPROBE = 100 - -# DeepSeek -DEEPSEEK_API_KEY = 'sk-44ae895eeb614aa1a9c6460579e322f1' -DEEPSEEK_URL = 'https://api.deepseek.com' - -# 阿里云中用来调用 deepseek v3 的密钥【驿来特】 -MODEL_API_KEY = "sk-f6da0c787eff4b0389e4ad03a35a911f" -MODEL_NAME = "qwen-plus" -#MODEL_NAME = "deepseek-v3" \ No newline at end of file +# MYSQL配置信息 +MYSQL_HOST = "10.10.14.210" +MYSQL_PORT = 22066 +MYSQL_USER = "root" +MYSQL_PASSWORD = "DsideaL147258369" +MYSQL_DB_NAME = "base_db" + +# Milvus 服务器的主机地址 +MS_HOST = "10.10.14.207" +# Milvus 服务器的端口号 +MS_PORT = "19530" +# Milvus 集合的名称 +MS_COLLECTION_NAME = "ds_collection" +# Milvus 连接池的最大连接数 +MS_MAX_CONNECTIONS = 50 +# 腾讯 AI Lab 中文词向量模型的路径 +MS_MODEL_PATH = "D:/Tencent_AILab_ChineseEmbedding/Tencent_AILab_ChineseEmbedding.txt" +# 加载词向量模型时限制的词汇数量 +MS_MODEL_LIMIT = 10000 +# 词向量的维度(腾讯 AI Lab 中文词向量模型的维度为 200) +MS_DIMENSION = 200 +# Milvus 搜索时的 nprobe 参数,用于控制搜索的精度和性能 +MS_NPROBE = 100 + +# DeepSeek +DEEPSEEK_API_KEY = 'sk-44ae895eeb614aa1a9c6460579e322f1' +DEEPSEEK_URL = 'https://api.deepseek.com' + +# 阿里云中用来调用 deepseek v3 的密钥【驿来特】 +MODEL_API_KEY = "sk-f6da0c787eff4b0389e4ad03a35a911f" +MODEL_NAME = "qwen-plus" +#MODEL_NAME = "deepseek-v3" + +# Jieba分词自定义词典配置 +JIEBA_CUSTOM_WORDS = [ + '文言虚词', + # 可在此添加更多自定义词语 +] \ No newline at end of file diff --git a/dsRag/Config/__pycache__/Config.cpython-310.pyc b/dsRag/Config/__pycache__/Config.cpython-310.pyc index a5b9b101..dd5b8039 100644 Binary files a/dsRag/Config/__pycache__/Config.cpython-310.pyc and b/dsRag/Config/__pycache__/Config.cpython-310.pyc differ diff --git a/dsRag/Test/TestJieba.py b/dsRag/Test/TestJieba.py new file mode 100644 index 00000000..eeb5b5ca --- /dev/null +++ b/dsRag/Test/TestJieba.py @@ -0,0 +1,12 @@ +import jieba +from Config.Config import JIEBA_CUSTOM_WORDS + +# 从配置文件加载自定义词典 +for word in JIEBA_CUSTOM_WORDS: + jieba.add_word(word) + +# 分词演示 +text = "文言虚词" +print("默认模式:", list(jieba.cut(text))) +print("全模式:", list(jieba.cut(text, cut_all=True))) +print("搜索引擎模式:", list(jieba.cut_for_search(text))) \ No newline at end of file