mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
some helpers; the Irish part of UD only has 2500 sentences so this will need source of morphology
This commit is contained in:
parent
559e03605a
commit
1ba38b2036
33
spacy/lang/ga/irish_morphology_helpers.py
Normal file
33
spacy/lang/ga/irish_morphology_helpers.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
class IrishMorph:
|
||||
consonants = ['b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z']
|
||||
broad_vowels = ['a', 'á', 'o', 'ó', 'u', 'ú']
|
||||
slender_vowels = ['e', 'é', 'i', 'í']
|
||||
vowels = broad_vowels + slender_vowels
|
||||
|
||||
def ends_dentals(word):
|
||||
if word[-1:] in ['d', 'n', 't', 's']:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def devoice(word):
|
||||
if word[-2] == 's' and word[-1] == 'd':
|
||||
return word[:-1] + 't'
|
||||
else:
|
||||
return word
|
||||
|
||||
def ends_with_vowel(word):
|
||||
return word[-1] in vowels
|
||||
|
||||
def starts_with_vowel(word):
|
||||
return word[0] in vowels
|
||||
|
||||
def deduplicate(word):
|
||||
if word[-2] == word[-1] and word[-1] in consonants:
|
||||
return word[:-1]
|
||||
else:
|
||||
return word
|
||||
|
Loading…
Reference in New Issue
Block a user