File tree Expand file tree Collapse file tree 1 file changed +17
-7
lines changed
Expand file tree Collapse file tree 1 file changed +17
-7
lines changed Original file line number Diff line number Diff line change 66from collections import Counter
77from wordcloud import WordCloud
88import matplotlib .pyplot as plt
9-
9+ import nltk
10+ from nltk .corpus import stopwords
11+ import re
12+ from collections import Counter
1013
1114# 스크래핑
1215url = "https://healthguides.cnn.com/finding-the-right-migraine-treatment/how-to-ensure-your-doctor-understands-your-migraine-severity"
1316webpage = requests .get (url )
1417soup = BeautifulSoup (webpage .content , "html.parser" )
1518item = soup .select_one ("article.ArticlePage-mainContent" )
16- data = item .text
19+ data = str ( item .text ). lower ()
1720
1821# 단어 분리
19- word = str (data ).lower ().strip ().split ()
20- count = Counter (word ).most_common ()
21- words = dict (count )
22- # print(words)
22+ nltk .download ('all' )
23+ cleaned_data = re .sub (r'[^\.\?\!\w\d\s]' ,'' ,data )
24+ words = nltk .word_tokenize (cleaned_data )
25+ tokens_pos = nltk .pos_tag (words )
26+
27+ NN_words = []
28+ for word , pos in tokens_pos :
29+ if 'NN' in pos :
30+ NN_words .append (word )
31+ c = Counter (NN_words )
2332
2433# 워드클라우드
2534wordcloud = WordCloud (font_path = 'C:\\ Windows\\ Fonts\\ Gothic.ttf' ,
26- background_color = "white" , max_font_size = 100 ).generate_from_frequencies (words )
35+ background_color = "white" , max_font_size = 100 ).generate_from_frequencies (c )
2736
2837plt .imshow (wordcloud , interpolation = 'bilinear' )
2938plt .axis ('off' )
3039plt .show ()
40+ wordcloud .to_file ("wordcloud.png" )
You can’t perform that action at this time.
0 commit comments