mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Look_up table for languages in spacy.
Need to find an another name for lemmatizerlookup. I was not inspired. Trying to uses new files in fr language.
This commit is contained in:
		
							parent
							
								
									1b12f342e4
								
							
						
					
					
						commit
						55c6910f90
					
				
							
								
								
									
										354974
									
								
								spacy/de/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										354974
									
								
								spacy/de/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										41588
									
								
								spacy/en/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41588
									
								
								spacy/en/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										491552
									
								
								spacy/es/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										491552
									
								
								spacy/es/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| 
						 | 
					@ -6,7 +6,8 @@ from ..attrs import LANG
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .language_data import *
 | 
					from .language_data import *
 | 
				
			||||||
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
 | 
					from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
 | 
				
			||||||
 | 
					from ..lemmatizerlookup import Lemmatizer
 | 
				
			||||||
 | 
					from .lemmatization import LOOK_UP
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class FrenchDefaults(BaseDefaults):
 | 
					class FrenchDefaults(BaseDefaults):
 | 
				
			||||||
    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
 | 
					    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
 | 
				
			||||||
| 
						 | 
					@ -17,13 +18,21 @@ class FrenchDefaults(BaseDefaults):
 | 
				
			||||||
    suffixes = tuple(TOKENIZER_SUFFIXES)
 | 
					    suffixes = tuple(TOKENIZER_SUFFIXES)
 | 
				
			||||||
    token_match = TOKEN_MATCH
 | 
					    token_match = TOKEN_MATCH
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
    def create_tokenizer(cls, nlp=None):
 | 
					    def create_tokenizer(cls, nlp=None):
 | 
				
			||||||
        cls.tokenizer_exceptions = get_tokenizer_exceptions()
 | 
					        cls.tokenizer_exceptions = get_tokenizer_exceptions()
 | 
				
			||||||
        return super(FrenchDefaults, cls).create_tokenizer(nlp)
 | 
					        return super(FrenchDefaults, cls).create_tokenizer(nlp)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def create_lemmatizer(cls, nlp=None):
 | 
				
			||||||
 | 
					        return Lemmatizer(LOOK_UP)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class French(Language):
 | 
					class French(Language):
 | 
				
			||||||
    lang = 'fr'
 | 
					    lang = 'fr'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Defaults = FrenchDefaults
 | 
					    Defaults = FrenchDefaults
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										217121
									
								
								spacy/fr/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										217121
									
								
								spacy/fr/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										37736
									
								
								spacy/hu/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37736
									
								
								spacy/hu/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										333687
									
								
								spacy/it/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										333687
									
								
								spacy/it/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										21
									
								
								spacy/lemmatizerlookup.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								spacy/lemmatizerlookup.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,21 @@
 | 
				
			||||||
 | 
					# coding: utf8
 | 
				
			||||||
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from .lemmatizer import Lemmatizer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Lemmatizer(Lemmatizer):
 | 
				
			||||||
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def load(cls, path, lookup):
 | 
				
			||||||
 | 
					        return cls(lookup or {})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, lookup):
 | 
				
			||||||
 | 
					        print("Mon lemmatizer")
 | 
				
			||||||
 | 
					        self.lookup = lookup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __call__(self, string, univ_pos, morphology=None):
 | 
				
			||||||
 | 
					        print("call")
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            return self.lookup[string]
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            return string
 | 
				
			||||||
							
								
								
									
										824772
									
								
								spacy/pt/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										824772
									
								
								spacy/pt/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										660313
									
								
								spacy/sv/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										660313
									
								
								spacy/sv/lemmatization.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Loading…
	
		Reference in New Issue
	
	Block a user