You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.
# pip install numpy-1.21.4+mkl-cp39-cp39-win_amd64.whl
# pip install wordcloud-1.8.1-cp39-cp39-win_amd64.whl
# pip install imageio
# pip install jieba
from wordcloud import WordCloud
from imageio import imread
import matplotlib . pyplot as plt
import jieba
def read_deal_text ( ) :
with open ( " ciyun.txt " , " r " , encoding = ' utf-8 ' ) as f : # 读取我们的待处理本文
txt = f . read ( )
re_move = [ " , " , " 。 " , ' \n ' , ' \xa0 ' , ' ' , ' \u3000 ' , ' … ' ] # 无效数据
# 去除无效数据
for i in re_move :
txt = txt . replace ( i , " " )
word = jieba . lcut ( txt ) # 使用精确分词模式进行分词后保存为word列表
print ( word )
with open ( " txt_save.txt " , ' w ' , encoding = ' utf-8 ' ) as file :
for i in word :
file . write ( str ( i ) + ' ' )
print ( " 文本处理完成 " )
def img_grearte ( ) :
mask = imread ( " boy.png " )
with open ( " txt_save.txt " , " r " , encoding = ' utf-8 ' ) as file :
txt = file . read ( )
word = WordCloud ( background_color = " white " , \
width = 800 , \
height = 800 ,
font_path = ' simhei.ttf ' ,
mask = mask ,
) . generate ( txt )
word . to_file ( ' test.png ' )
print ( " 词云图片已保存 " )
plt . imshow ( word ) # 使用plt库显示图片
plt . axis ( " off " )
plt . show ( )
read_deal_text ( )
img_grearte ( )