Look_up table for languages in spacy.

Need to find an another name for lemmatizerlookup. I was not inspired.
Trying to uses new files in fr language.
This commit is contained in:
ghoward 2017-04-24 16:39:00 +02:00
parent 1b12f342e4
commit 55c6910f90
10 changed files with 2961774 additions and 1 deletions

354974
spacy/de/lemmatization.py Normal file

File diff suppressed because it is too large Load Diff

41588
spacy/en/lemmatization.py Normal file

File diff suppressed because it is too large Load Diff

491552
spacy/es/lemmatization.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,8 @@ from ..attrs import LANG
from .language_data import *
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
from ..lemmatizerlookup import Lemmatizer
from .lemmatization import LOOK_UP
class FrenchDefaults(BaseDefaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
@ -17,13 +18,21 @@ class FrenchDefaults(BaseDefaults):
suffixes = tuple(TOKENIZER_SUFFIXES)
token_match = TOKEN_MATCH
@classmethod
def create_tokenizer(cls, nlp=None):
cls.tokenizer_exceptions = get_tokenizer_exceptions()
return super(FrenchDefaults, cls).create_tokenizer(nlp)
@classmethod
def create_lemmatizer(cls, nlp=None):
return Lemmatizer(LOOK_UP)
class French(Language):
lang = 'fr'
Defaults = FrenchDefaults

217121
spacy/fr/lemmatization.py Normal file

File diff suppressed because it is too large Load Diff

37736
spacy/hu/lemmatization.py Normal file

File diff suppressed because it is too large Load Diff

333687
spacy/it/lemmatization.py Normal file

File diff suppressed because it is too large Load Diff

21
spacy/lemmatizerlookup.py Normal file
View File

@ -0,0 +1,21 @@
# coding: utf8
from __future__ import unicode_literals
from .lemmatizer import Lemmatizer
class Lemmatizer(Lemmatizer):
@classmethod
def load(cls, path, lookup):
return cls(lookup or {})
def __init__(self, lookup):
print("Mon lemmatizer")
self.lookup = lookup
def __call__(self, string, univ_pos, morphology=None):
print("call")
try:
return self.lookup[string]
except:
return string

824772
spacy/pt/lemmatization.py Normal file

File diff suppressed because it is too large Load Diff

660313
spacy/sv/lemmatization.py Normal file

File diff suppressed because it is too large Load Diff