From 6f51591c13946828c80da342d2d16842a90c6e7e Mon Sep 17 00:00:00 2001 From: momerio <84517709+momerio@users.noreply.github.com> Date: Wed, 22 Dec 2021 04:15:47 +0900 Subject: [PATCH] =?UTF-8?q?=E5=90=8D=E8=A9=9E=E9=80=A3=E7=B5=90=E9=96=A2?= =?UTF-8?q?=E6=95=B0=E4=BD=9C=E6=88=90=EF=BC=8E=E3=81=A0=E3=81=8C=E3=81=97?= =?UTF-8?q?=E3=81=8B=E3=81=97=EF=BC=8C=20=E5=90=8D=E8=A9=9E=E9=80=A3?= =?UTF-8?q?=E7=B5=90=E3=81=97=E3=81=AA=E3=81=84=E3=81=BB=E3=81=86=E3=81=8C?= =?UTF-8?q?=E8=89=AF=E3=81=84=E3=81=A8=E6=80=9D=E3=81=86=EF=BC=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cmd_message.py | 78 +++++++++++++++++++++-------------- src/project/RepoMainForPy.git | 1 + 2 files changed, 49 insertions(+), 30 deletions(-) create mode 160000 src/project/RepoMainForPy.git diff --git a/src/cmd_message.py b/src/cmd_message.py index 1f2768b..ccca3cd 100644 --- a/src/cmd_message.py +++ b/src/cmd_message.py @@ -5,6 +5,7 @@ from tqdm import tqdm import seaborn as sns import matplotlib.pyplot as plt +import re def _mecab_wakati(text): @@ -30,6 +31,43 @@ def _mecab(text): return node_list +def _get_linked_nouns(mecab_all_list): + """ + 名詞連結する + + @param: + meab_all_list: _mecab関数の戻り値 + """ + + # 英数記号のみ + re_alphabet_number_and_symbol = re.compile(r"^[a-zA-Z0-9!-/:-@¥[-`{-~]*$") + + nouns = [] + linked_nouns_temp = [] + for idx in mecab_all_list: + if re_alphabet_number_and_symbol.fullmatch(idx[0]) is not None: + # すべて英語,数字,記号の場合は名詞連結しない + linked_nouns_temp.append([idx[0]]) + elif idx[1] == "名詞": + # 品詞が名詞であれば + nouns.append(idx[0]) + else: + linked_nouns_temp.append(nouns) + nouns = [] + else: + linked_nouns_temp.append(nouns) + + # 結合と空リストを削除 + linked_nouns = [] + for idx in linked_nouns_temp: + if len(idx) >= 1: + linked_nouns.append("".join(idx)) + else: + pass + + return linked_nouns + + # キーワード解析 def keyword_analy(text): key_out = [] @@ -129,21 +167,10 @@ def _wordcloud_all_messages(repo, wordCloudGenerator): mecab_all = _mecab(text) # 形態素解析 # 名詞及び名詞連結を取得# - """ - 名詞連結は,現状うまく動かないので,一旦コメントアウト - """ - # mecab_linking_noun = [] - # for m in range(len(mecab_all)-1): - # if mecab_all[m][1] == "名詞" and mecab_all[m+1][1] == "名詞": - # mecab_linking_noun.append( - # mecab_all[m][0]+mecab_all[m+1][0]) - # elif mecab_all[m][1] == "名詞": - # mecab_linking_noun.append(mecab_all[m][0]) - # else: - # pass + # mecab_only_nouns = [m[0] for m in mecab_all if m[1] == "名詞"] # 名詞のみ取得 + mecab_only_nouns = _get_linked_nouns(mecab_all) # 名詞連結で取得 - mecab_only_noun = [m[0] for m in mecab_all if m[1] == "名詞"] # 名詞のみ取得 - wakati = " ".join(mecab_only_noun) # 分かち書き + wakati = " ".join(mecab_only_nouns) # 分かち書き wordCloudGenerator.out_file_name = OUT_FILE_NAME # 出力ファイル名 wordCloudGenerator.wordcloud_draw(wakati) # 出力 @@ -186,23 +213,13 @@ def _wordcloud_by_author(repo, wordCloudGenerator): continue # 名詞及び名詞連結を取得# - """ - 名詞連結は,現状うまく動かないので,一旦コメントアウト - """ - # mecab_linking_noun = [] - # for m in range(len(mecab_all)-1): - # if mecab_all[m][1] == "名詞" and mecab_all[m+1][1] == "名詞": - # mecab_linking_noun.append( - # mecab_all[m][0]+mecab_all[m+1][0]) - # elif mecab_all[m][1] == "名詞": - # mecab_linking_noun.append(mecab_all[m][0]) - # else: - # pass + # mecab_only_nouns = [m[0] for m in mecab_all if m[1] == "名詞"] # 名詞のみ取得 + mecab_only_nouns = _get_linked_nouns(mecab_all) # 名詞連結で取得 - mecab_only_noun = [m[0] for m in mecab_all if m[1] == "名詞"] # 名詞のみ取得 - wakati = " ".join(mecab_only_noun) # 分かち書き + wakati = " ".join(mecab_only_nouns) # 分かち書き - wordCloudGenerator.out_file_name = OUT_FILE_NAME.format(author) # 出力ファイル名 + wordCloudGenerator.out_file_name = OUT_FILE_NAME.format( + author) # 出力ファイル名 wordCloudGenerator.wordcloud_draw(wakati) # 出力 @@ -281,7 +298,8 @@ def run(repo): mecab_only_noun = [m[0] for m in mecab_all if m[1] == "名詞"] # 名詞のみ取得 wakati = " ".join(mecab_only_noun) # 分かち書き - frequency_words = wordCloudGenerator.frequency_count(wakati).most_common(30) + frequency_words = wordCloudGenerator.frequency_count( + wakati).most_common(30) sns.set(context="talk", font='Yu Gothic') fig = plt.subplots(figsize=(18, 8)) sns.countplot(y=mecab_only_noun, order=[i[0] for i in frequency_words]) diff --git a/src/project/RepoMainForPy.git b/src/project/RepoMainForPy.git new file mode 160000 index 0000000..168f71d --- /dev/null +++ b/src/project/RepoMainForPy.git @@ -0,0 +1 @@ +Subproject commit 168f71d97b1ac7f60417b6abf6c79b85ba4c611e