You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
# pip install numpy-1.21.4+mkl-cp39-cp39-win_amd64.whl
|
|
|
|
|
# pip install wordcloud-1.8.1-cp39-cp39-win_amd64.whl
|
|
|
|
|
# pip install imageio
|
|
|
|
|
# pip install jieba
|
|
|
|
|
|
|
|
|
|
from wordcloud import WordCloud
|
|
|
|
|
from imageio import imread
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
import jieba
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_deal_text():
|
|
|
|
|
with open("ciyun.txt", "r", encoding='utf-8') as f: # 读取我们的待处理本文
|
|
|
|
|
txt = f.read()
|
|
|
|
|
|
|
|
|
|
re_move = [",", "。", '\n', '\xa0', ' ', '\u3000', '…'] # 无效数据
|
|
|
|
|
# 去除无效数据
|
|
|
|
|
for i in re_move:
|
|
|
|
|
txt = txt.replace(i, "")
|
|
|
|
|
word = jieba.lcut(txt) # 使用精确分词模式进行分词后保存为word列表
|
|
|
|
|
print(word)
|
|
|
|
|
with open("txt_save.txt", 'w', encoding='utf-8') as file:
|
|
|
|
|
for i in word:
|
|
|
|
|
file.write(str(i) + ' ')
|
|
|
|
|
print("文本处理完成")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def img_grearte():
|
|
|
|
|
mask = imread("boy.png")
|
|
|
|
|
with open("txt_save.txt", "r", encoding='utf-8') as file:
|
|
|
|
|
txt = file.read()
|
|
|
|
|
word = WordCloud(background_color="white", \
|
|
|
|
|
width=800, \
|
|
|
|
|
height=800,
|
|
|
|
|
font_path='simhei.ttf',
|
|
|
|
|
mask=mask,
|
|
|
|
|
).generate(txt)
|
|
|
|
|
word.to_file('test.png')
|
|
|
|
|
print("词云图片已保存")
|
|
|
|
|
|
|
|
|
|
plt.imshow(word) # 使用plt库显示图片
|
|
|
|
|
plt.axis("off")
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
read_deal_text()
|
|
|
|
|
img_grearte()
|