some helpers; the Irish part of UD only has 2500 sentences so this will need source of morphology

This commit is contained in:
Jim O'Regan 2017-06-28 00:42:00 +01:00
parent 559e03605a
commit 1ba38b2036

View File

@ -0,0 +1,33 @@
# coding: utf8
from __future__ import unicode_literals
class IrishMorph:
consonants = ['b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z']
broad_vowels = ['a', 'á', 'o', 'ó', 'u', 'ú']
slender_vowels = ['e', 'é', 'i', 'í']
vowels = broad_vowels + slender_vowels
def ends_dentals(word):
if word[-1:] in ['d', 'n', 't', 's']:
return True
else:
return False
def devoice(word):
if word[-2] == 's' and word[-1] == 'd':
return word[:-1] + 't'
else:
return word
def ends_with_vowel(word):
return word[-1] in vowels
def starts_with_vowel(word):
return word[0] in vowels
def deduplicate(word):
if word[-2] == word[-1] and word[-1] in consonants:
return word[:-1]
else:
return word