some helpers; the Irish part of UD only has 2500 sentences so this will need source of morphology

2025-12-17 07:04:29 +03:00 · 2017-06-28 00:42:00 +01:00 · 2017-06-28 00:42:00 +01:00 · 1ba38b2036
commit 1ba38b2036
parent 559e03605a
1 changed files with 33 additions and 0 deletions
--- a/spacy/lang/ga/irish_morphology_helpers.py
+++ b/spacy/lang/ga/irish_morphology_helpers.py
@ -0,0 +1,33 @@
 # coding: utf8
 from __future__ import unicode_literals
 class IrishMorph:
    consonants = ['b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z']
    broad_vowels = ['a', 'á', 'o', 'ó', 'u', 'ú']
    slender_vowels = ['e', 'é', 'i', 'í']
    vowels = broad_vowels + slender_vowels
    def ends_dentals(word):
        if word[-1:] in ['d', 'n', 't', 's']:
            return True
        else:
            return False
    def devoice(word):
        if word[-2] == 's' and word[-1] == 'd':
            return word[:-1] + 't'
        else:
            return word
    def ends_with_vowel(word):
        return word[-1] in vowels
    def starts_with_vowel(word):
        return word[0] in vowels
    def deduplicate(word):
        if word[-2] == word[-1] and word[-1] in consonants:
            return word[:-1]
        else:
            return word