mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-30 01:43:21 +03:00
Look_up table for languages in spacy.
Need to find an another name for lemmatizerlookup. I was not inspired. Trying to uses new files in fr language.
This commit is contained in:
parent
1b12f342e4
commit
55c6910f90
354974
spacy/de/lemmatization.py
Normal file
354974
spacy/de/lemmatization.py
Normal file
File diff suppressed because it is too large
Load Diff
41588
spacy/en/lemmatization.py
Normal file
41588
spacy/en/lemmatization.py
Normal file
File diff suppressed because it is too large
Load Diff
491552
spacy/es/lemmatization.py
Normal file
491552
spacy/es/lemmatization.py
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -6,7 +6,8 @@ from ..attrs import LANG
|
||||||
|
|
||||||
from .language_data import *
|
from .language_data import *
|
||||||
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
|
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
|
||||||
|
from ..lemmatizerlookup import Lemmatizer
|
||||||
|
from .lemmatization import LOOK_UP
|
||||||
|
|
||||||
class FrenchDefaults(BaseDefaults):
|
class FrenchDefaults(BaseDefaults):
|
||||||
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
|
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
|
||||||
|
@ -17,13 +18,21 @@ class FrenchDefaults(BaseDefaults):
|
||||||
suffixes = tuple(TOKENIZER_SUFFIXES)
|
suffixes = tuple(TOKENIZER_SUFFIXES)
|
||||||
token_match = TOKEN_MATCH
|
token_match = TOKEN_MATCH
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_tokenizer(cls, nlp=None):
|
def create_tokenizer(cls, nlp=None):
|
||||||
cls.tokenizer_exceptions = get_tokenizer_exceptions()
|
cls.tokenizer_exceptions = get_tokenizer_exceptions()
|
||||||
return super(FrenchDefaults, cls).create_tokenizer(nlp)
|
return super(FrenchDefaults, cls).create_tokenizer(nlp)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def create_lemmatizer(cls, nlp=None):
|
||||||
|
return Lemmatizer(LOOK_UP)
|
||||||
|
|
||||||
|
|
||||||
class French(Language):
|
class French(Language):
|
||||||
lang = 'fr'
|
lang = 'fr'
|
||||||
|
|
||||||
Defaults = FrenchDefaults
|
Defaults = FrenchDefaults
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
217121
spacy/fr/lemmatization.py
Normal file
217121
spacy/fr/lemmatization.py
Normal file
File diff suppressed because it is too large
Load Diff
37736
spacy/hu/lemmatization.py
Normal file
37736
spacy/hu/lemmatization.py
Normal file
File diff suppressed because it is too large
Load Diff
333687
spacy/it/lemmatization.py
Normal file
333687
spacy/it/lemmatization.py
Normal file
File diff suppressed because it is too large
Load Diff
21
spacy/lemmatizerlookup.py
Normal file
21
spacy/lemmatizerlookup.py
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .lemmatizer import Lemmatizer
|
||||||
|
|
||||||
|
|
||||||
|
class Lemmatizer(Lemmatizer):
|
||||||
|
@classmethod
|
||||||
|
def load(cls, path, lookup):
|
||||||
|
return cls(lookup or {})
|
||||||
|
|
||||||
|
def __init__(self, lookup):
|
||||||
|
print("Mon lemmatizer")
|
||||||
|
self.lookup = lookup
|
||||||
|
|
||||||
|
def __call__(self, string, univ_pos, morphology=None):
|
||||||
|
print("call")
|
||||||
|
try:
|
||||||
|
return self.lookup[string]
|
||||||
|
except:
|
||||||
|
return string
|
824772
spacy/pt/lemmatization.py
Normal file
824772
spacy/pt/lemmatization.py
Normal file
File diff suppressed because it is too large
Load Diff
660313
spacy/sv/lemmatization.py
Normal file
660313
spacy/sv/lemmatization.py
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user