| 知乎专栏 |
https://github.com/amueller/word_cloud
pip install wordcloud
演示
import wordcloud
w = wordcloud.WordCloud()
w.generate(
"Netkiller Neo Linux Nginx SSH Ubuntu CentOS MySQL PostgreSQL Java Python")
w.to_file("wordcloud.png")
更多演示代码 https://amueller.github.io/word_cloud/auto_examples/index.html#example-gallery
neo@MacBook-Pro-Neo ~ % wordcloud_cli -h
usage: wordcloud_cli [-h] [--text file] [--regexp regexp] [--stopwords file] [--imagefile file]
[--fontfile path] [--mask file] [--colormask file] [--contour_width width]
[--contour_color color] [--relative_scaling rs] [--margin width]
[--width width] [--height height] [--color color] [--background color]
[--no_collocations] [--include_numbers] [--min_word_length min_word_length]
[--prefer_horizontal ratio] [--scale scale] [--colormap map] [--mode mode]
[--max_words N] [--min_font_size size] [--max_font_size size]
[--font_step step] [--random_state seed] [--no_normalize_plurals] [--repeat]
[--version]
A simple command line interface for wordcloud module.
optional arguments:
-h, --help show this help message and exit
--text file specify file of words to build the word cloud (default: stdin)
--regexp regexp override the regular expression defining what constitutes a word
--stopwords file specify file of stopwords (containing one word per line) to remove from
the given text after parsing
--imagefile file file the completed PNG image should be written to (default: stdout)
--fontfile path path to font file you wish to use (default: DroidSansMono)
--mask file mask to use for the image form
--colormask file color mask to use for image coloring
--contour_width width
if greater than 0, draw mask contour (default: 0)
--contour_color color
use given color as mask contour color - accepts any value from
PIL.ImageColor.getcolor
--relative_scaling rs
scaling of words by frequency (0 - 1)
--margin width spacing to leave around words
--width width define output image width
--height height define output image height
--color color use given color as coloring for the image - accepts any value from
PIL.ImageColor.getcolor
--background color use given color as background color for the image - accepts any value from
PIL.ImageColor.getcolor
--no_collocations do not add collocations (bigrams) to word cloud (default: add unigrams and
bigrams)
--include_numbers include numbers in wordcloud?
--min_word_length min_word_length
only include words with more than X letters
--prefer_horizontal ratio
ratio of times to try horizontal fitting as opposed to vertical
--scale scale scaling between computation and drawing
--colormap map matplotlib colormap name
--mode mode use RGB or RGBA for transparent background
--max_words N maximum number of words
--min_font_size size smallest font size to use
--max_font_size size maximum font size for the largest word
--font_step step step size for the font
--random_state seed random seed
--no_normalize_plurals
whether to remove trailing 's' from words
--repeat whether to repeat words and phrases
--version show program's version number and exit
w = wordcloud.WordCloud(<参数>) 参数 描述 width 指定词云对象生成图片的宽度,默认400像素 height 指定词云对象生成图片的高度,默认200像素 min_font_size 指定词云中字体的最小字号,默认4号 max_font_size 指定词云中字体的最大字号,根据高度自动调节 font_step 指定词云中字体字号的步进间隔,默认为1 font_path 指定字体文件的路径,默认None max_words 指定词云显示的最大单词数量,默认200 stop_words 指定词云的排除词列表,即不显示的单词列表 mask 指定词云形状,默认为长方形,需要引用imread()函数 background_color 指定词云图片的背景颜色,默认为黑色
import wordcloud
w = wordcloud.WordCloud(background_color="white")
w.generate(
"Netkiller Neo Linux Nginx SSH Ubuntu CentOS MySQL PostgreSQL Java Python")
w.to_file("wordcloud.png")
# encoding=utf-8
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import jieba
import jieba.analyse
# background = plt.imread('background.jpg') # 遮罩图
content = open('text.txt', 'r').read() # 生成词云的文档
# seg_list = jieba.lcut(f) # 默认是精确模式
tags = jieba.analyse.extract_tags(content, topK=50)
text = ", ".join(list(tags))
print(text)
wordcloud = WordCloud(
background_color='white', # 背景颜色,根据图片背景设置,默认为黑色
# mask = background, #笼罩图
font_path='/Library/Fonts/AdobeSongStd-Light.otf', # 若有中文需要设置才会显示中文
width=1024, # 宽度
height=768, # 高度
margin=5 # 边缘空白
).generate(text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
plt.close()
# 保存图片
wordcloud.to_file('wordcloud.jpg')
# encoding=utf-8
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from wordcloud import WordCloud
import jieba
import jieba.analyse
content = open('text.txt', 'r').read()
tags = jieba.analyse.extract_tags(content, topK=50)
text = ", ".join(list(tags))
# print(text)
# mask = np.array(Image.open("stormtrooper_mask.png"))
mask = np.array(Image.open("background.png"))
wordcloud = WordCloud(
background_color='white',
font_path='/Library/Fonts/AdobeSongStd-Light.otf',
mask=mask, # 遮罩图
width=1024, # 宽度
height=768, # 高度
margin=5 # 边缘空白
).generate(text)
# 保存图片
wordcloud.to_file('wordcloud.jpg')
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
plt.close()