parent
ca984af410
commit
56e6db5e92
@ -1,94 +1,18 @@
|
||||
D:\anaconda3\envs\rag\python.exe D:\dsWork\dsProject\dsRag\T8_RAG.py
|
||||
D:\anaconda3\envs\rag\lib\site-packages\jieba\_compat.py:18: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
|
||||
import pkg_resources
|
||||
2025-06-23 20:13:47,913 - INFO - loading projection weights from D:\Tencent_AILab_ChineseEmbedding\Tencent_AILab_ChineseEmbedding.txt
|
||||
2025-06-23 20:13:49,382 - INFO - KeyedVectors lifecycle event {'msg': 'loaded (10000, 200) matrix of type float32 from D:\\Tencent_AILab_ChineseEmbedding\\Tencent_AILab_ChineseEmbedding.txt', 'binary': False, 'encoding': 'utf8', 'datetime': '2025-06-23T20:13:49.361378', 'gensim': '4.3.3', 'python': '3.10.18 | packaged by conda-forge | (main, Jun 4 2025, 14:42:04) [MSC v.1943 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.19044-SP0', 'event': 'load_word2vec_format'}
|
||||
2025-06-23 20:13:49,382 - INFO - 模型加载成功,词向量维度: 200
|
||||
D:\anaconda3\envs\rag\lib\site-packages\elasticsearch\_sync\client\__init__.py:311: SecurityWarning: Connecting to 'https://10.10.14.206:9200' using TLS with verify_certs=False is insecure
|
||||
_transport = transport_class(
|
||||
正在搜索与'整理云南省初中在校生情况文档'相关的数据...
|
||||
Building prefix dict from the default dictionary ...
|
||||
2025-06-23 20:13:50,520 - DEBUG - Building prefix dict from the default dictionary ...
|
||||
Loading model from cache C:\Users\ADMINI~1\AppData\Local\Temp\jieba.cache
|
||||
2025-06-23 20:13:50,520 - DEBUG - Loading model from cache C:\Users\ADMINI~1\AppData\Local\Temp\jieba.cache
|
||||
Loading model cost 0.648 seconds.
|
||||
2025-06-23 20:13:51,168 - DEBUG - Loading model cost 0.648 seconds.
|
||||
Prefix dict has been built successfully.
|
||||
2025-06-23 20:13:51,169 - DEBUG - Prefix dict has been built successfully.
|
||||
2025-06-23 20:13:51,169 - INFO - 文本: 整理云南省初中在校生情况文档, 分词结果: ['整理', '云南省', '初中', '在校生', '情况', '文档']
|
||||
2025-06-23 20:13:51,169 - INFO - 有效词向量数量: 3
|
||||
2025-06-23 20:13:51,169 - INFO - 生成的平均向量: [ 0.18687634 -0.19447033 -0.04333766 -0.05068566 0.09433967]...
|
||||
D:\anaconda3\envs\rag\lib\site-packages\urllib3\connectionpool.py:1097: InsecureRequestWarning: Unverified HTTPS request is being made to host '10.10.14.206'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings
|
||||
warnings.warn(
|
||||
2025-06-23 20:13:51,222 - INFO - POST https://10.10.14.206:9200/knowledge_base/_search [status:200 duration:0.051s]
|
||||
D:\anaconda3\envs\rag\lib\site-packages\urllib3\connectionpool.py:1097: InsecureRequestWarning: Unverified HTTPS request is being made to host '10.10.14.206'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings
|
||||
warnings.warn(
|
||||
2025-06-23 20:13:51,231 - INFO - POST https://10.10.14.206:9200/raw_texts/_search [status:200 duration:0.009s]
|
||||
找到10条相关数据
|
||||
正在生成报告...
|
||||
2025-06-23 20:13:56,582 - INFO - Retrying request to /chat/completions in 0.469314 seconds
|
||||
2025-06-23 20:14:02,067 - INFO - Retrying request to /chat/completions in 0.762535 seconds
|
||||
2025-06-23 21:08:46,327 - INFO - POST https://10.10.14.206:9200/knowledge_base/_doc [status:400 duration:0.004s]
|
||||
处理进度: 8%|▊ | 40/503 [00:01<00:17, 26.02句/s]
|
||||
Traceback (most recent call last):
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_transports\default.py", line 101, in map_httpcore_exceptions
|
||||
yield
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_transports\default.py", line 250, in handle_request
|
||||
resp = self._pool.handle_request(req)
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpcore\_sync\connection_pool.py", line 256, in handle_request
|
||||
raise exc from None
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpcore\_sync\connection_pool.py", line 236, in handle_request
|
||||
response = connection.handle_request(
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpcore\_sync\connection.py", line 101, in handle_request
|
||||
raise exc
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpcore\_sync\connection.py", line 78, in handle_request
|
||||
stream = self._connect(request)
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpcore\_sync\connection.py", line 124, in _connect
|
||||
stream = self._network_backend.connect_tcp(**kwargs)
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpcore\_backends\sync.py", line 207, in connect_tcp
|
||||
with map_exceptions(exc_map):
|
||||
File "D:\anaconda3\envs\rag\lib\contextlib.py", line 153, in __exit__
|
||||
self.gen.throw(typ, value, traceback)
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpcore\_exceptions.py", line 14, in map_exceptions
|
||||
raise to_exc(exc) from exc
|
||||
httpcore.ConnectTimeout: timed out
|
||||
|
||||
The above exception was the direct cause of the following exception:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\openai\_base_client.py", line 972, in request
|
||||
response = self._client.send(
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_client.py", line 914, in send
|
||||
response = self._send_handling_auth(
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_client.py", line 942, in _send_handling_auth
|
||||
response = self._send_handling_redirects(
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_client.py", line 979, in _send_handling_redirects
|
||||
response = self._send_single_request(request)
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_client.py", line 1014, in _send_single_request
|
||||
response = transport.handle_request(request)
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_transports\default.py", line 249, in handle_request
|
||||
with map_httpcore_exceptions():
|
||||
File "D:\anaconda3\envs\rag\lib\contextlib.py", line 153, in __exit__
|
||||
self.gen.throw(typ, value, traceback)
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_transports\default.py", line 118, in map_httpcore_exceptions
|
||||
raise mapped_exc(message) from exc
|
||||
httpx.ConnectTimeout: timed out
|
||||
|
||||
The above exception was the direct cause of the following exception:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "D:\dsWork\dsProject\dsRag\T8_RAG.py", line 98, in <module>
|
||||
report = process_query(user_query)
|
||||
File "D:\dsWork\dsProject\dsRag\T8_RAG.py", line 91, in process_query
|
||||
report = generate_report(query, context)
|
||||
File "D:\dsWork\dsProject\dsRag\T8_RAG.py", line 73, in generate_report
|
||||
response = client.chat.completions.create(
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\openai\_utils\_utils.py", line 287, in wrapper
|
||||
return func(*args, **kwargs)
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\openai\resources\chat\completions\completions.py", line 925, in create
|
||||
return self._post(
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\openai\_base_client.py", line 1249, in post
|
||||
return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\openai\_base_client.py", line 990, in request
|
||||
raise APITimeoutError(request=request) from err
|
||||
openai.APITimeoutError: Request timed out.
|
||||
|
||||
进程已结束,退出代码为 1
|
||||
File "D:\dsWork\dsProject\dsRag\T2_ImportTxt.py", line 51, in <module>
|
||||
process_file(file_path)
|
||||
File "D:\dsWork\dsProject\dsRag\T2_ImportTxt.py", line 38, in process_file
|
||||
save_to_es(sentence)
|
||||
File "D:\dsWork\dsProject\dsRag\T2_ImportTxt.py", line 26, in save_to_es
|
||||
es.index(index='knowledge_base', body=doc)
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\elasticsearch\_sync\client\utils.py", line 415, in wrapped
|
||||
return api(*args, **kwargs)
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\elasticsearch\_sync\client\__init__.py", line 2951, in index
|
||||
return self.perform_request( # type: ignore[return-value]
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\elasticsearch\_sync\client\_base.py", line 271, in perform_request
|
||||
response = self._perform_request(
|
||||
File "D:\anaconda3\envs\rag\lib\site-packages\elasticsearch\_sync\client\_base.py", line 351, in _perform_request
|
||||
raise HTTP_EXCEPTIONS.get(meta.status, ApiError)(
|
||||
elasticsearch.BadRequestError: BadRequestError(400, 'document_parsing_exception', '[1:828] failed to parse: The [cosine] similarity does not support vectors with zero magnitude. Preview of invalid vector: [0.0, 0.0, 0.0, 0.0, 0.0, ...]', The [cosine] similarity does not support vectors with zero magnitude. Preview of invalid vector: [0.0, 0.0, 0.0, 0.0, 0.0, ...])
|
@ -1,113 +0,0 @@
|
||||
from elasticsearch import Elasticsearch
|
||||
from Config.Config import ES_CONFIG
|
||||
|
||||
es = Elasticsearch(
|
||||
hosts=ES_CONFIG["hosts"],
|
||||
basic_auth=ES_CONFIG["basic_auth"],
|
||||
verify_certs=ES_CONFIG["verify_certs"],
|
||||
ssl_show_warn=ES_CONFIG["ssl_show_warn"]
|
||||
)
|
||||
|
||||
def get_vector_mapping(dims=200):
|
||||
"""获取向量索引的mapping结构"""
|
||||
return {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"text": {"type": "text", "analyzer": "ik_max_word"},
|
||||
"vector": {
|
||||
"type": "dense_vector",
|
||||
"dims": dims,
|
||||
"index": True,
|
||||
"similarity": "cosine"
|
||||
},
|
||||
"timestamp": {"type": "date"}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def create_vector_index(index_name="knowledge_base", dims=200):
|
||||
"""创建带有向量字段的索引"""
|
||||
mapping = get_vector_mapping(dims)
|
||||
|
||||
try:
|
||||
if es.indices.exists(index=index_name):
|
||||
current_mapping = es.indices.get_mapping(index=index_name)
|
||||
current_dims = current_mapping[index_name]["mappings"]["properties"]["vector"].get("dims", 0)
|
||||
|
||||
if current_dims == dims:
|
||||
print(f"索引 {index_name} 已存在且维度正确({dims}维),无需操作")
|
||||
return True
|
||||
else:
|
||||
print(f"警告:索引 {index_name} 已存在但维度不匹配(当前:{current_dims}维,需要:{dims}维)")
|
||||
return False
|
||||
|
||||
es.indices.create(index=index_name, body=mapping)
|
||||
print(f"索引 {index_name} 创建成功({dims}维)")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"操作索引失败: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
def delete_index(index_name):
|
||||
"""删除Elasticsearch索引"""
|
||||
try:
|
||||
if es.indices.exists(index=index_name):
|
||||
es.indices.delete(index=index_name)
|
||||
print(f"索引 {index_name} 删除成功")
|
||||
return True
|
||||
else:
|
||||
print(f"索引 {index_name} 不存在")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"删除索引失败: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
def create_text_index(index_name="raw_texts"):
|
||||
"""创建原始文本索引"""
|
||||
mapping = {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"text": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"keyword": {
|
||||
"type": "keyword",
|
||||
"ignore_above": 256
|
||||
}
|
||||
}
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "date"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
if not es.indices.exists(index=index_name):
|
||||
es.indices.create(index=index_name, body=mapping)
|
||||
print(f"原始文本索引 {index_name} 创建成功")
|
||||
return True
|
||||
else:
|
||||
print(f"原始文本索引 {index_name} 已存在")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"创建原始文本索引失败: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
def delete_text_index(index_name="raw_texts"):
|
||||
"""删除原始文本索引"""
|
||||
try:
|
||||
if es.indices.exists(index=index_name):
|
||||
es.indices.delete(index=index_name)
|
||||
print(f"原始文本索引 {index_name} 删除成功")
|
||||
return True
|
||||
else:
|
||||
print(f"原始文本索引 {index_name} 不存在")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"删除原始文本索引失败: {str(e)}")
|
||||
raise
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in new issue