You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
21 lines
683 B
21 lines
683 B
from nltk.corpus import wordnet
|
|
import jieba
|
|
|
|
def expand_with_synonyms(query):
|
|
words = jieba.lcut(query)
|
|
expanded = []
|
|
for word in words:
|
|
synonyms = set()
|
|
for syn in wordnet.synsets(word, lang='cmn'):
|
|
for lemma in syn.lemma_names('cmn'):
|
|
synonyms.add(lemma)
|
|
if synonyms:
|
|
expanded.append(f"({'|'.join(synonyms)})")
|
|
else:
|
|
expanded.append(word)
|
|
return ' '.join(expanded)
|
|
|
|
original_query = "微积分的基本定理是什么?"
|
|
expanded_query = expand_with_synonyms(original_query)
|
|
print(f"原始查询: {original_query}")
|
|
print(f"扩展后查询: {expanded_query}") |