Mecab

提供:sufeeWiki
ナビゲーションに移動 検索に移動

概要

文章を配列等に変換させたり、詞のカテゴリに分けたりする優れもの

インストール

  • mecab
pip install mecab-python3
  • kanjize
pip install kanjize

使用例

文章を配列化

from typing import Any
import MeCab
from kanjize import int2kanji, kanji2int
from datetime import datetime
from dateutil.relativedelta import relativedelta


def strToNumber(num,i):
    try:
        print(num[i-1])
        if "." == num[i-1][0]["other"]:
            print(str(num[i-2][0]) + str(num[i-1][0]["other"]) + str(num[i][0]))
            num[i][0] = float(str(num[i-2][0]) + str(num[i-1][0]["other"]) + str(num[i][0]))
            num[i-2][0] = None
            num[i-1][0] = None
    except Exception as e:
        print(e)

def mecab_list(text):
    tagger = MeCab.Tagger("-Ochasen")
    tagger.parse('')
    number = ["0","1","2","3","4","5","6","7","8","9"]
    for i in range(len(number)):
        text = text.replace(number[i],str(i))
    node = tagger.parseToNode(text)
    word_class = []
    while node:
        word = node.surface
        wclass = node.feature.split(',')
        #print(wclass)
        if wclass[0] != u'BOS/EOS':
            if wclass[6] == None:
                word_class.append([word,wclass[0],wclass[1],wclass[2],"",""])
            elif wclass[5]:
                word_class.append([word,wclass[0],wclass[1],wclass[2],wclass[5],wclass[6]])
            else:
                word_class.append([word,wclass[0],wclass[1],wclass[2],"",wclass[6]])
        node = node.next
    return word_class

def strToint(num):
    try:
        return int(num)
    except:
        return kanji2int(num)
    
    
msg = input("text>>")
keitaiso = mecab_list(msg)

print("--------------------------\n")
print(keitaiso)
print("--------------------------")