mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
some helpers; the Irish part of UD only has 2500 sentences so this will need source of morphology
This commit is contained in:
parent
559e03605a
commit
1ba38b2036
33
spacy/lang/ga/irish_morphology_helpers.py
Normal file
33
spacy/lang/ga/irish_morphology_helpers.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
class IrishMorph:
|
||||||
|
consonants = ['b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z']
|
||||||
|
broad_vowels = ['a', 'á', 'o', 'ó', 'u', 'ú']
|
||||||
|
slender_vowels = ['e', 'é', 'i', 'í']
|
||||||
|
vowels = broad_vowels + slender_vowels
|
||||||
|
|
||||||
|
def ends_dentals(word):
|
||||||
|
if word[-1:] in ['d', 'n', 't', 's']:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def devoice(word):
|
||||||
|
if word[-2] == 's' and word[-1] == 'd':
|
||||||
|
return word[:-1] + 't'
|
||||||
|
else:
|
||||||
|
return word
|
||||||
|
|
||||||
|
def ends_with_vowel(word):
|
||||||
|
return word[-1] in vowels
|
||||||
|
|
||||||
|
def starts_with_vowel(word):
|
||||||
|
return word[0] in vowels
|
||||||
|
|
||||||
|
def deduplicate(word):
|
||||||
|
if word[-2] == word[-1] and word[-1] in consonants:
|
||||||
|
return word[:-1]
|
||||||
|
else:
|
||||||
|
return word
|
||||||
|
|
Loading…
Reference in New Issue
Block a user