spaCy/spacy/lang/ga/irish_morphology_helpers.py

34 lines
984 B
Python
Raw Normal View History

# coding: utf8
from __future__ import unicode_literals
class IrishMorph:
consonants = ['b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z']
broad_vowels = ['a', 'á', 'o', 'ó', 'u', 'ú']
slender_vowels = ['e', 'é', 'i', 'í']
vowels = broad_vowels + slender_vowels
def ends_dentals(word):
2017-06-28 12:59:46 +03:00
if word != "" and word[-1] in ['d', 'n', 't', 's']:
return True
else:
return False
def devoice(word):
2017-06-28 12:59:46 +03:00
if len(word) > 2 and word[-2] == 's' and word[-1] == 'd':
return word[:-1] + 't'
else:
return word
def ends_with_vowel(word):
2017-06-28 12:59:46 +03:00
return word != "" and word[-1] in vowels
def starts_with_vowel(word):
2017-06-28 12:59:46 +03:00
return word != "" and word[0] in vowels
def deduplicate(word):
2017-06-28 12:59:46 +03:00
if len(word) > 2 and word[-2] == word[-1] and word[-1] in consonants:
return word[:-1]
else:
return word