diff --git a/dsRag/Doc/10、下载NLTK库.md b/dsRag/Doc/10、下载NLTK库.md new file mode 100644 index 00000000..fc9a8fdf --- /dev/null +++ b/dsRag/Doc/10、下载NLTK库.md @@ -0,0 +1,5 @@ +### 手动下载nltk库 +https://blog.csdn.net/QM19900420/article/details/128100591 + +### Python NLTK库【NLP核心库】全面解析 +https://blog.csdn.net/webcai_3/article/details/147347144 \ No newline at end of file diff --git a/dsRag/Start.py b/dsRag/Start.py index d9b158c6..8731daf0 100644 --- a/dsRag/Start.py +++ b/dsRag/Start.py @@ -121,6 +121,13 @@ async def generate_stream(client, milvus_pool, collection_manager, query, docume # 获取完整内容 full_content = record['tags'].get('full_content', record['user_input']) context = context + full_content + else: + logger.warning(f"距离太远,忽略此结果: {hit.id}") + logger.info(f"标签: {record['tags']}") + logger.info(f"用户问题: {record['user_input']}") + logger.info(f"时间: {record['timestamp']}") + logger.info(f"距离: {hit.distance}") + continue except Exception as e: logger.error(f"查询失败: {e}") diff --git a/dsRag/Test/expand_with_synonyms.py b/dsRag/Test/expand_with_synonyms.py new file mode 100644 index 00000000..492baee2 --- /dev/null +++ b/dsRag/Test/expand_with_synonyms.py @@ -0,0 +1,21 @@ +from nltk.corpus import wordnet +import jieba + +def expand_with_synonyms(query): + words = jieba.lcut(query) + expanded = [] + for word in words: + synonyms = set() + for syn in wordnet.synsets(word, lang='cmn'): + for lemma in syn.lemma_names('cmn'): + synonyms.add(lemma) + if synonyms: + expanded.append(f"({'|'.join(synonyms)})") + else: + expanded.append(word) + return ' '.join(expanded) + +original_query = "微积分的基本定理是什么?" +expanded_query = expand_with_synonyms(original_query) +print(f"原始查询: {original_query}") +print(f"扩展后查询: {expanded_query}") \ No newline at end of file diff --git a/dsRag/static/ai.html b/dsRag/static/ai.html index 6b1ff779..0afd627f 100644 --- a/dsRag/static/ai.html +++ b/dsRag/static/ai.html @@ -3,7 +3,7 @@
-