「Mecab」の版間の差分
ナビゲーションに移動
検索に移動
Sufee Admin (トーク | 投稿記録) ページの作成:「== 概要 == 文章を配列等に変換させたり、詞のカテゴリに分けたりする優れもの == 使用例 == === 文章を配列化 ===」 |
Sufee Admin (トーク | 投稿記録) 編集の要約なし |
||
(同じ利用者による、間の4版が非表示) | |||
1行目: | 1行目: | ||
== 概要 == | == 概要 == | ||
文章を配列等に変換させたり、詞のカテゴリに分けたりする優れもの | 文章を配列等に変換させたり、詞のカテゴリに分けたりする優れもの | ||
== インストール == | |||
* mecab | |||
pip install mecab-python3 | |||
* kanjize | |||
pip install kanjize | |||
== 使用例 == | == 使用例 == | ||
=== 文章を配列化 === | === 文章を配列化 === | ||
<syntaxhighlight lang="python3"> | |||
from typing import Any | |||
import MeCab | |||
from kanjize import int2kanji, kanji2int | |||
from datetime import datetime | |||
from dateutil.relativedelta import relativedelta | |||
def strToNumber(num,i): | |||
try: | |||
print(num[i-1]) | |||
if "." == num[i-1][0]["other"]: | |||
print(str(num[i-2][0]) + str(num[i-1][0]["other"]) + str(num[i][0])) | |||
num[i][0] = float(str(num[i-2][0]) + str(num[i-1][0]["other"]) + str(num[i][0])) | |||
num[i-2][0] = None | |||
num[i-1][0] = None | |||
except Exception as e: | |||
print(e) | |||
def mecab_list(text): | |||
tagger = MeCab.Tagger("-Ochasen") | |||
tagger.parse('') | |||
number = ["0","1","2","3","4","5","6","7","8","9"] | |||
for i in range(len(number)): | |||
text = text.replace(number[i],str(i)) | |||
node = tagger.parseToNode(text) | |||
word_class = [] | |||
while node: | |||
word = node.surface | |||
wclass = node.feature.split(',') | |||
#print(wclass) | |||
if wclass[0] != u'BOS/EOS': | |||
if wclass[6] == None: | |||
word_class.append([word,wclass[0],wclass[1],wclass[2],"",""]) | |||
elif wclass[5]: | |||
word_class.append([word,wclass[0],wclass[1],wclass[2],wclass[5],wclass[6]]) | |||
else: | |||
word_class.append([word,wclass[0],wclass[1],wclass[2],"",wclass[6]]) | |||
node = node.next | |||
return word_class | |||
def strToint(num): | |||
try: | |||
return int(num) | |||
except: | |||
return kanji2int(num) | |||
msg = input("text>>") | |||
keitaiso = mecab_list(msg) | |||
print("--------------------------\n") | |||
print(keitaiso) | |||
print("--------------------------") | |||
</syntaxhighlight> | |||
[[カテゴリ:Python]] |
2022年2月22日 (火) 16:09時点における最新版
概要
文章を配列等に変換させたり、詞のカテゴリに分けたりする優れもの
インストール
- mecab
pip install mecab-python3
- kanjize
pip install kanjize
使用例
文章を配列化
from typing import Any
import MeCab
from kanjize import int2kanji, kanji2int
from datetime import datetime
from dateutil.relativedelta import relativedelta
def strToNumber(num,i):
try:
print(num[i-1])
if "." == num[i-1][0]["other"]:
print(str(num[i-2][0]) + str(num[i-1][0]["other"]) + str(num[i][0]))
num[i][0] = float(str(num[i-2][0]) + str(num[i-1][0]["other"]) + str(num[i][0]))
num[i-2][0] = None
num[i-1][0] = None
except Exception as e:
print(e)
def mecab_list(text):
tagger = MeCab.Tagger("-Ochasen")
tagger.parse('')
number = ["0","1","2","3","4","5","6","7","8","9"]
for i in range(len(number)):
text = text.replace(number[i],str(i))
node = tagger.parseToNode(text)
word_class = []
while node:
word = node.surface
wclass = node.feature.split(',')
#print(wclass)
if wclass[0] != u'BOS/EOS':
if wclass[6] == None:
word_class.append([word,wclass[0],wclass[1],wclass[2],"",""])
elif wclass[5]:
word_class.append([word,wclass[0],wclass[1],wclass[2],wclass[5],wclass[6]])
else:
word_class.append([word,wclass[0],wclass[1],wclass[2],"",wclass[6]])
node = node.next
return word_class
def strToint(num):
try:
return int(num)
except:
return kanji2int(num)
msg = input("text>>")
keitaiso = mecab_list(msg)
print("--------------------------\n")
print(keitaiso)
print("--------------------------")