From 1ba38b2036e69ea0ff400e14e217d887f09f7165 Mon Sep 17 00:00:00 2001 From: Jim O'Regan Date: Wed, 28 Jun 2017 00:42:00 +0100 Subject: [PATCH] some helpers; the Irish part of UD only has 2500 sentences so this will need source of morphology --- spacy/lang/ga/irish_morphology_helpers.py | 33 +++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 spacy/lang/ga/irish_morphology_helpers.py diff --git a/spacy/lang/ga/irish_morphology_helpers.py b/spacy/lang/ga/irish_morphology_helpers.py new file mode 100644 index 000000000..2b008f295 --- /dev/null +++ b/spacy/lang/ga/irish_morphology_helpers.py @@ -0,0 +1,33 @@ +# coding: utf8 +from __future__ import unicode_literals + +class IrishMorph: + consonants = ['b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z'] + broad_vowels = ['a', 'á', 'o', 'ó', 'u', 'ú'] + slender_vowels = ['e', 'é', 'i', 'í'] + vowels = broad_vowels + slender_vowels + + def ends_dentals(word): + if word[-1:] in ['d', 'n', 't', 's']: + return True + else: + return False + + def devoice(word): + if word[-2] == 's' and word[-1] == 'd': + return word[:-1] + 't' + else: + return word + + def ends_with_vowel(word): + return word[-1] in vowels + + def starts_with_vowel(word): + return word[0] in vowels + + def deduplicate(word): + if word[-2] == word[-1] and word[-1] in consonants: + return word[:-1] + else: + return word +