Mecab

提供:sufeeWiki
2022年2月22日 (火) 16:07時点におけるSufee Admin (トーク | 投稿記録)による版 (概要)
ナビゲーションに移動 検索に移動

概要

文章を配列等に変換させたり、詞のカテゴリに分けたりする優れもの

インストール

pip install kanze

使用例

文章を配列化

from typing import Any
import MeCab
from kanjize import int2kanji, kanji2int
from datetime import datetime
from dateutil.relativedelta import relativedelta


def strToNumber(num,i):
    try:
        print(num[i-1])
        if "." == num[i-1][0]["other"]:
            print(str(num[i-2][0]) + str(num[i-1][0]["other"]) + str(num[i][0]))
            num[i][0] = float(str(num[i-2][0]) + str(num[i-1][0]["other"]) + str(num[i][0]))
            num[i-2][0] = None
            num[i-1][0] = None
    except Exception as e:
        print(e)

def mecab_list(text):
    tagger = MeCab.Tagger("-Ochasen")
    tagger.parse('')
    number = ["0","1","2","3","4","5","6","7","8","9"]
    for i in range(len(number)):
        text = text.replace(number[i],str(i))
    node = tagger.parseToNode(text)
    word_class = []
    while node:
        word = node.surface
        wclass = node.feature.split(',')
        #print(wclass)
        if wclass[0] != u'BOS/EOS':
            if wclass[6] == None:
                word_class.append([word,wclass[0],wclass[1],wclass[2],"",""])
            elif wclass[5]:
                word_class.append([word,wclass[0],wclass[1],wclass[2],wclass[5],wclass[6]])
            else:
                word_class.append([word,wclass[0],wclass[1],wclass[2],"",wclass[6]])
        node = node.next
    return word_class

def strToint(num):
    try:
        return int(num)
    except:
        return kanji2int(num)
    
    
msg = input("text>>")
keitaiso = mecab_list(msg)

print("--------------------------\n")
print(keitaiso)
print("--------------------------")