「Mecab」の版間の差分
ナビゲーションに移動
検索に移動
Sufee Admin (トーク | 投稿記録) ページの作成:「== 概要 == 文章を配列等に変換させたり、詞のカテゴリに分けたりする優れもの == 使用例 == === 文章を配列化 ===」 |
Sufee Admin (トーク | 投稿記録) |
||
4行目: | 4行目: | ||
== 使用例 == | == 使用例 == | ||
=== 文章を配列化 === | === 文章を配列化 === | ||
<syntaxhighlight lang="python3"> | |||
from typing import Any | |||
import MeCab | |||
from kanjize import int2kanji, kanji2int | |||
from datetime import datetime | |||
from dateutil.relativedelta import relativedelta | |||
def strToNumber(num,i): | |||
try: | |||
print(num[i-1]) | |||
if "." == num[i-1][0]["other"]: | |||
print(str(num[i-2][0]) + str(num[i-1][0]["other"]) + str(num[i][0])) | |||
num[i][0] = float(str(num[i-2][0]) + str(num[i-1][0]["other"]) + str(num[i][0])) | |||
num[i-2][0] = None | |||
num[i-1][0] = None | |||
except Exception as e: | |||
print(e) | |||
def mecab_list(text): | |||
tagger = MeCab.Tagger("-Ochasen") | |||
tagger.parse('') | |||
number = ["0","1","2","3","4","5","6","7","8","9"] | |||
for i in range(len(number)): | |||
text = text.replace(number[i],str(i)) | |||
node = tagger.parseToNode(text) | |||
word_class = [] | |||
while node: | |||
word = node.surface | |||
wclass = node.feature.split(',') | |||
#print(wclass) | |||
if wclass[0] != u'BOS/EOS': | |||
if wclass[6] == None: | |||
word_class.append([word,wclass[0],wclass[1],wclass[2],"",""]) | |||
elif wclass[5]: | |||
word_class.append([word,wclass[0],wclass[1],wclass[2],wclass[5],wclass[6]]) | |||
else: | |||
word_class.append([word,wclass[0],wclass[1],wclass[2],"",wclass[6]]) | |||
node = node.next | |||
return word_class | |||
def strToint(num): | |||
try: | |||
return int(num) | |||
except: | |||
return kanji2int(num) | |||
msg = input("text>>") | |||
keitaiso = mecab_list(msg) | |||
print("--------------------------\n") | |||
print(keitaiso) | |||
print("--------------------------") | |||
</syntaxhighlight> |
2022年2月22日 (火) 16:06時点における版
概要
文章を配列等に変換させたり、詞のカテゴリに分けたりする優れもの
使用例
文章を配列化
from typing import Any
import MeCab
from kanjize import int2kanji, kanji2int
from datetime import datetime
from dateutil.relativedelta import relativedelta
def strToNumber(num,i):
try:
print(num[i-1])
if "." == num[i-1][0]["other"]:
print(str(num[i-2][0]) + str(num[i-1][0]["other"]) + str(num[i][0]))
num[i][0] = float(str(num[i-2][0]) + str(num[i-1][0]["other"]) + str(num[i][0]))
num[i-2][0] = None
num[i-1][0] = None
except Exception as e:
print(e)
def mecab_list(text):
tagger = MeCab.Tagger("-Ochasen")
tagger.parse('')
number = ["0","1","2","3","4","5","6","7","8","9"]
for i in range(len(number)):
text = text.replace(number[i],str(i))
node = tagger.parseToNode(text)
word_class = []
while node:
word = node.surface
wclass = node.feature.split(',')
#print(wclass)
if wclass[0] != u'BOS/EOS':
if wclass[6] == None:
word_class.append([word,wclass[0],wclass[1],wclass[2],"",""])
elif wclass[5]:
word_class.append([word,wclass[0],wclass[1],wclass[2],wclass[5],wclass[6]])
else:
word_class.append([word,wclass[0],wclass[1],wclass[2],"",wclass[6]])
node = node.next
return word_class
def strToint(num):
try:
return int(num)
except:
return kanji2int(num)
msg = input("text>>")
keitaiso = mecab_list(msg)
print("--------------------------\n")
print(keitaiso)
print("--------------------------")