import newspaper # Word frequency database import collections # numpy library import numpy as np # Stuttering participle import jieba # Word cloud display library import wordcloud # Image processing library from PIL import Image # Image gallery import matplotlib.pyplot as plt # Access to the article article = newspaper.Article('https://news.sina.com.cn/o/2019-11-28/doc-iihnzahi3991780.shtml') # Download article article.download() # Analyze the article article.parse() # Make a study of the article nlp Handle article.nlp() # nlp Processed article splicing article_words = "".join(article.keywords) # Precise pattern segmentation ( The default mode ) seg_list_exact = jieba.cut(article_words, cut_all=False) # Store word segmentation results object_list = [] # Removed word rm_words = [' To meet ', ' since ', ' take '] # Iteration participle object for word in seg_list_exact: if word not in rm_words: object_list.append(word) # Word frequency statistics word_counts = collections.Counter(object_list) # Before acquisition 10 The most frequent words word_top10 = word_counts.most_common(10) # Entries and times for w, c in word_top10: print(w, c) # Word frequency display # Define the word frequency background mask = np.array(Image.open('bg.jpg')) wc = wordcloud.WordCloud( # Set the font format font_path='C:/Windows/Fonts/simhei.ttf', # Background map mask=mask, # Set the maximum number of words displayed max_words=100, # Set font maximum max_font_size=80 ) # Generating word clouds from dictionaries wc.generate_from_frequencies(word_counts) # Create a color scheme from the background image image_colors = wordcloud.ImageColorGenerator(mask) # Show word cloud plt.imshow(wc) # Turn off the axis plt.axis('off') plt.savefig('wc.jpg') # Display images plt.show()