mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	* Restore merge_mwe in English class
This commit is contained in:
		
							parent
							
								
									3c270fc8ff
								
							
						
					
					
						commit
						6859f6adac
					
				|  | @ -36,6 +36,7 @@ def get_lex_props(string): | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| if_model_present = -1 | if_model_present = -1 | ||||||
|  | LOCAL_DATA_DIR = path.join(path.dirname(__file__), 'data') | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class English(object): | class English(object): | ||||||
|  | @ -64,7 +65,7 @@ class English(object): | ||||||
|     EntityTransitionSystem = BiluoPushDown |     EntityTransitionSystem = BiluoPushDown | ||||||
| 
 | 
 | ||||||
|     def __init__(self, |     def __init__(self, | ||||||
|       data_dir=path.join(path.dirname(__file__), 'data'), |       data_dir=LOCAL_DATA_DIR | ||||||
|       Tokenizer=Tokenizer.from_dir, |       Tokenizer=Tokenizer.from_dir, | ||||||
|       Tagger=EnPosTagger, |       Tagger=EnPosTagger, | ||||||
|       Parser=ParserFactory(ParserTransitionSystem), |       Parser=ParserFactory(ParserTransitionSystem), | ||||||
|  | @ -106,7 +107,7 @@ class English(object): | ||||||
|             ('NNP', 'DATE', regexes.DAYS_RE), |             ('NNP', 'DATE', regexes.DAYS_RE), | ||||||
|             ('CD', 'MONEY', regexes.MONEY_RE)]) |             ('CD', 'MONEY', regexes.MONEY_RE)]) | ||||||
| 
 | 
 | ||||||
|     def __call__(self, text, tag=True, parse=True, entity=True): |     def __call__(self, text, tag=True, parse=True, entity=True, merge_mwes=False): | ||||||
|         """Apply the pipeline to some text.  The text can span multiple sentences, |         """Apply the pipeline to some text.  The text can span multiple sentences, | ||||||
|         and can contain arbtrary whitespace.  Alignment into the original string |         and can contain arbtrary whitespace.  Alignment into the original string | ||||||
|         is preserved. |         is preserved. | ||||||
|  | @ -130,6 +131,8 @@ class English(object): | ||||||
|             self.parser(tokens) |             self.parser(tokens) | ||||||
|         if self.entity and entity: |         if self.entity and entity: | ||||||
|             self.entity(tokens) |             self.entity(tokens) | ||||||
|  |         if merge_mwes and self.mwe_merger is not None: | ||||||
|  |             self.mwe_merger(tokens) | ||||||
|         return tokens |         return tokens | ||||||
| 
 | 
 | ||||||
|     @property |     @property | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user