mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	* Filter out phrases that consist of common, lower-case words.
This commit is contained in:
		
							parent
							
								
									4bbc8f45c6
								
							
						
					
					
						commit
						5af4b62fe7
					
				| 
						 | 
				
			
			@ -45,6 +45,8 @@ def read_gazetteer(tokenizer, loc, n=-1):
 | 
			
		|||
        if i >= n:
 | 
			
		||||
            break
 | 
			
		||||
        phrase = tokenizer(phrase)
 | 
			
		||||
        if all((t.is_lower and t.prob >= -10) for t in phrase):
 | 
			
		||||
            continue
 | 
			
		||||
        if len(phrase) >= 2:
 | 
			
		||||
            yield phrase
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user