Mecab
ナビゲーションに移動
検索に移動
概要
文章を配列等に変換させたり、詞のカテゴリに分けたりする優れもの
インストール
- mecab
pip install mecab-python3
- kanjize
pip install kanjize
使用例
文章を配列化
from typing import Any
import MeCab
from kanjize import int2kanji, kanji2int
from datetime import datetime
from dateutil.relativedelta import relativedelta
def strToNumber(num,i):
try:
print(num[i-1])
if "." == num[i-1][0]["other"]:
print(str(num[i-2][0]) + str(num[i-1][0]["other"]) + str(num[i][0]))
num[i][0] = float(str(num[i-2][0]) + str(num[i-1][0]["other"]) + str(num[i][0]))
num[i-2][0] = None
num[i-1][0] = None
except Exception as e:
print(e)
def mecab_list(text):
tagger = MeCab.Tagger("-Ochasen")
tagger.parse('')
number = ["0","1","2","3","4","5","6","7","8","9"]
for i in range(len(number)):
text = text.replace(number[i],str(i))
node = tagger.parseToNode(text)
word_class = []
while node:
word = node.surface
wclass = node.feature.split(',')
#print(wclass)
if wclass[0] != u'BOS/EOS':
if wclass[6] == None:
word_class.append([word,wclass[0],wclass[1],wclass[2],"",""])
elif wclass[5]:
word_class.append([word,wclass[0],wclass[1],wclass[2],wclass[5],wclass[6]])
else:
word_class.append([word,wclass[0],wclass[1],wclass[2],"",wclass[6]])
node = node.next
return word_class
def strToint(num):
try:
return int(num)
except:
return kanji2int(num)
msg = input("text>>")
keitaiso = mecab_list(msg)
print("--------------------------\n")
print(keitaiso)
print("--------------------------")