parent
ca984af410
commit
56e6db5e92
@ -1,94 +1,18 @@
|
|||||||
D:\anaconda3\envs\rag\python.exe D:\dsWork\dsProject\dsRag\T8_RAG.py
|
2025-06-23 21:08:46,327 - INFO - POST https://10.10.14.206:9200/knowledge_base/_doc [status:400 duration:0.004s]
|
||||||
D:\anaconda3\envs\rag\lib\site-packages\jieba\_compat.py:18: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
|
处理进度: 8%|▊ | 40/503 [00:01<00:17, 26.02句/s]
|
||||||
import pkg_resources
|
|
||||||
2025-06-23 20:13:47,913 - INFO - loading projection weights from D:\Tencent_AILab_ChineseEmbedding\Tencent_AILab_ChineseEmbedding.txt
|
|
||||||
2025-06-23 20:13:49,382 - INFO - KeyedVectors lifecycle event {'msg': 'loaded (10000, 200) matrix of type float32 from D:\\Tencent_AILab_ChineseEmbedding\\Tencent_AILab_ChineseEmbedding.txt', 'binary': False, 'encoding': 'utf8', 'datetime': '2025-06-23T20:13:49.361378', 'gensim': '4.3.3', 'python': '3.10.18 | packaged by conda-forge | (main, Jun 4 2025, 14:42:04) [MSC v.1943 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.19044-SP0', 'event': 'load_word2vec_format'}
|
|
||||||
2025-06-23 20:13:49,382 - INFO - 模型加载成功,词向量维度: 200
|
|
||||||
D:\anaconda3\envs\rag\lib\site-packages\elasticsearch\_sync\client\__init__.py:311: SecurityWarning: Connecting to 'https://10.10.14.206:9200' using TLS with verify_certs=False is insecure
|
|
||||||
_transport = transport_class(
|
|
||||||
正在搜索与'整理云南省初中在校生情况文档'相关的数据...
|
|
||||||
Building prefix dict from the default dictionary ...
|
|
||||||
2025-06-23 20:13:50,520 - DEBUG - Building prefix dict from the default dictionary ...
|
|
||||||
Loading model from cache C:\Users\ADMINI~1\AppData\Local\Temp\jieba.cache
|
|
||||||
2025-06-23 20:13:50,520 - DEBUG - Loading model from cache C:\Users\ADMINI~1\AppData\Local\Temp\jieba.cache
|
|
||||||
Loading model cost 0.648 seconds.
|
|
||||||
2025-06-23 20:13:51,168 - DEBUG - Loading model cost 0.648 seconds.
|
|
||||||
Prefix dict has been built successfully.
|
|
||||||
2025-06-23 20:13:51,169 - DEBUG - Prefix dict has been built successfully.
|
|
||||||
2025-06-23 20:13:51,169 - INFO - 文本: 整理云南省初中在校生情况文档, 分词结果: ['整理', '云南省', '初中', '在校生', '情况', '文档']
|
|
||||||
2025-06-23 20:13:51,169 - INFO - 有效词向量数量: 3
|
|
||||||
2025-06-23 20:13:51,169 - INFO - 生成的平均向量: [ 0.18687634 -0.19447033 -0.04333766 -0.05068566 0.09433967]...
|
|
||||||
D:\anaconda3\envs\rag\lib\site-packages\urllib3\connectionpool.py:1097: InsecureRequestWarning: Unverified HTTPS request is being made to host '10.10.14.206'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings
|
|
||||||
warnings.warn(
|
|
||||||
2025-06-23 20:13:51,222 - INFO - POST https://10.10.14.206:9200/knowledge_base/_search [status:200 duration:0.051s]
|
|
||||||
D:\anaconda3\envs\rag\lib\site-packages\urllib3\connectionpool.py:1097: InsecureRequestWarning: Unverified HTTPS request is being made to host '10.10.14.206'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings
|
|
||||||
warnings.warn(
|
|
||||||
2025-06-23 20:13:51,231 - INFO - POST https://10.10.14.206:9200/raw_texts/_search [status:200 duration:0.009s]
|
|
||||||
找到10条相关数据
|
|
||||||
正在生成报告...
|
|
||||||
2025-06-23 20:13:56,582 - INFO - Retrying request to /chat/completions in 0.469314 seconds
|
|
||||||
2025-06-23 20:14:02,067 - INFO - Retrying request to /chat/completions in 0.762535 seconds
|
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_transports\default.py", line 101, in map_httpcore_exceptions
|
File "D:\dsWork\dsProject\dsRag\T2_ImportTxt.py", line 51, in <module>
|
||||||
yield
|
process_file(file_path)
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_transports\default.py", line 250, in handle_request
|
File "D:\dsWork\dsProject\dsRag\T2_ImportTxt.py", line 38, in process_file
|
||||||
resp = self._pool.handle_request(req)
|
save_to_es(sentence)
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpcore\_sync\connection_pool.py", line 256, in handle_request
|
File "D:\dsWork\dsProject\dsRag\T2_ImportTxt.py", line 26, in save_to_es
|
||||||
raise exc from None
|
es.index(index='knowledge_base', body=doc)
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpcore\_sync\connection_pool.py", line 236, in handle_request
|
File "D:\anaconda3\envs\rag\lib\site-packages\elasticsearch\_sync\client\utils.py", line 415, in wrapped
|
||||||
response = connection.handle_request(
|
return api(*args, **kwargs)
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpcore\_sync\connection.py", line 101, in handle_request
|
File "D:\anaconda3\envs\rag\lib\site-packages\elasticsearch\_sync\client\__init__.py", line 2951, in index
|
||||||
raise exc
|
return self.perform_request( # type: ignore[return-value]
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpcore\_sync\connection.py", line 78, in handle_request
|
File "D:\anaconda3\envs\rag\lib\site-packages\elasticsearch\_sync\client\_base.py", line 271, in perform_request
|
||||||
stream = self._connect(request)
|
response = self._perform_request(
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpcore\_sync\connection.py", line 124, in _connect
|
File "D:\anaconda3\envs\rag\lib\site-packages\elasticsearch\_sync\client\_base.py", line 351, in _perform_request
|
||||||
stream = self._network_backend.connect_tcp(**kwargs)
|
raise HTTP_EXCEPTIONS.get(meta.status, ApiError)(
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpcore\_backends\sync.py", line 207, in connect_tcp
|
elasticsearch.BadRequestError: BadRequestError(400, 'document_parsing_exception', '[1:828] failed to parse: The [cosine] similarity does not support vectors with zero magnitude. Preview of invalid vector: [0.0, 0.0, 0.0, 0.0, 0.0, ...]', The [cosine] similarity does not support vectors with zero magnitude. Preview of invalid vector: [0.0, 0.0, 0.0, 0.0, 0.0, ...])
|
||||||
with map_exceptions(exc_map):
|
|
||||||
File "D:\anaconda3\envs\rag\lib\contextlib.py", line 153, in __exit__
|
|
||||||
self.gen.throw(typ, value, traceback)
|
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpcore\_exceptions.py", line 14, in map_exceptions
|
|
||||||
raise to_exc(exc) from exc
|
|
||||||
httpcore.ConnectTimeout: timed out
|
|
||||||
|
|
||||||
The above exception was the direct cause of the following exception:
|
|
||||||
|
|
||||||
Traceback (most recent call last):
|
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\openai\_base_client.py", line 972, in request
|
|
||||||
response = self._client.send(
|
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_client.py", line 914, in send
|
|
||||||
response = self._send_handling_auth(
|
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_client.py", line 942, in _send_handling_auth
|
|
||||||
response = self._send_handling_redirects(
|
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_client.py", line 979, in _send_handling_redirects
|
|
||||||
response = self._send_single_request(request)
|
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_client.py", line 1014, in _send_single_request
|
|
||||||
response = transport.handle_request(request)
|
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_transports\default.py", line 249, in handle_request
|
|
||||||
with map_httpcore_exceptions():
|
|
||||||
File "D:\anaconda3\envs\rag\lib\contextlib.py", line 153, in __exit__
|
|
||||||
self.gen.throw(typ, value, traceback)
|
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\httpx\_transports\default.py", line 118, in map_httpcore_exceptions
|
|
||||||
raise mapped_exc(message) from exc
|
|
||||||
httpx.ConnectTimeout: timed out
|
|
||||||
|
|
||||||
The above exception was the direct cause of the following exception:
|
|
||||||
|
|
||||||
Traceback (most recent call last):
|
|
||||||
File "D:\dsWork\dsProject\dsRag\T8_RAG.py", line 98, in <module>
|
|
||||||
report = process_query(user_query)
|
|
||||||
File "D:\dsWork\dsProject\dsRag\T8_RAG.py", line 91, in process_query
|
|
||||||
report = generate_report(query, context)
|
|
||||||
File "D:\dsWork\dsProject\dsRag\T8_RAG.py", line 73, in generate_report
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\openai\_utils\_utils.py", line 287, in wrapper
|
|
||||||
return func(*args, **kwargs)
|
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\openai\resources\chat\completions\completions.py", line 925, in create
|
|
||||||
return self._post(
|
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\openai\_base_client.py", line 1249, in post
|
|
||||||
return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
|
|
||||||
File "D:\anaconda3\envs\rag\lib\site-packages\openai\_base_client.py", line 990, in request
|
|
||||||
raise APITimeoutError(request=request) from err
|
|
||||||
openai.APITimeoutError: Request timed out.
|
|
||||||
|
|
||||||
进程已结束,退出代码为 1
|
|
@ -1,113 +0,0 @@
|
|||||||
from elasticsearch import Elasticsearch
|
|
||||||
from Config.Config import ES_CONFIG
|
|
||||||
|
|
||||||
es = Elasticsearch(
|
|
||||||
hosts=ES_CONFIG["hosts"],
|
|
||||||
basic_auth=ES_CONFIG["basic_auth"],
|
|
||||||
verify_certs=ES_CONFIG["verify_certs"],
|
|
||||||
ssl_show_warn=ES_CONFIG["ssl_show_warn"]
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_vector_mapping(dims=200):
|
|
||||||
"""获取向量索引的mapping结构"""
|
|
||||||
return {
|
|
||||||
"mappings": {
|
|
||||||
"properties": {
|
|
||||||
"text": {"type": "text", "analyzer": "ik_max_word"},
|
|
||||||
"vector": {
|
|
||||||
"type": "dense_vector",
|
|
||||||
"dims": dims,
|
|
||||||
"index": True,
|
|
||||||
"similarity": "cosine"
|
|
||||||
},
|
|
||||||
"timestamp": {"type": "date"}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def create_vector_index(index_name="knowledge_base", dims=200):
|
|
||||||
"""创建带有向量字段的索引"""
|
|
||||||
mapping = get_vector_mapping(dims)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if es.indices.exists(index=index_name):
|
|
||||||
current_mapping = es.indices.get_mapping(index=index_name)
|
|
||||||
current_dims = current_mapping[index_name]["mappings"]["properties"]["vector"].get("dims", 0)
|
|
||||||
|
|
||||||
if current_dims == dims:
|
|
||||||
print(f"索引 {index_name} 已存在且维度正确({dims}维),无需操作")
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
print(f"警告:索引 {index_name} 已存在但维度不匹配(当前:{current_dims}维,需要:{dims}维)")
|
|
||||||
return False
|
|
||||||
|
|
||||||
es.indices.create(index=index_name, body=mapping)
|
|
||||||
print(f"索引 {index_name} 创建成功({dims}维)")
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
print(f"操作索引失败: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def delete_index(index_name):
|
|
||||||
"""删除Elasticsearch索引"""
|
|
||||||
try:
|
|
||||||
if es.indices.exists(index=index_name):
|
|
||||||
es.indices.delete(index=index_name)
|
|
||||||
print(f"索引 {index_name} 删除成功")
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
print(f"索引 {index_name} 不存在")
|
|
||||||
return False
|
|
||||||
except Exception as e:
|
|
||||||
print(f"删除索引失败: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def create_text_index(index_name="raw_texts"):
|
|
||||||
"""创建原始文本索引"""
|
|
||||||
mapping = {
|
|
||||||
"mappings": {
|
|
||||||
"properties": {
|
|
||||||
"text": {
|
|
||||||
"type": "text",
|
|
||||||
"fields": {
|
|
||||||
"keyword": {
|
|
||||||
"type": "keyword",
|
|
||||||
"ignore_above": 256
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"timestamp": {
|
|
||||||
"type": "date"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
if not es.indices.exists(index=index_name):
|
|
||||||
es.indices.create(index=index_name, body=mapping)
|
|
||||||
print(f"原始文本索引 {index_name} 创建成功")
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
print(f"原始文本索引 {index_name} 已存在")
|
|
||||||
return False
|
|
||||||
except Exception as e:
|
|
||||||
print(f"创建原始文本索引失败: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def delete_text_index(index_name="raw_texts"):
|
|
||||||
"""删除原始文本索引"""
|
|
||||||
try:
|
|
||||||
if es.indices.exists(index=index_name):
|
|
||||||
es.indices.delete(index=index_name)
|
|
||||||
print(f"原始文本索引 {index_name} 删除成功")
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
print(f"原始文本索引 {index_name} 不存在")
|
|
||||||
return False
|
|
||||||
except Exception as e:
|
|
||||||
print(f"删除原始文本索引失败: {str(e)}")
|
|
||||||
raise
|
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in new issue