mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Add tests for unicode puncuation character lemmatization
This commit is contained in:
		
							parent
							
								
									c5b2c4ead8
								
							
						
					
					
						commit
						b71ba2eed5
					
				| 
						 | 
				
			
			@ -1,3 +1,4 @@
 | 
			
		|||
# -*- coding: utf-8 -*-
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from spacy.lemmatizer import Lemmatizer, read_index, read_exc
 | 
			
		||||
| 
						 | 
				
			
			@ -34,3 +35,14 @@ def test_noun_lemmas(lemmatizer):
 | 
			
		|||
    assert do('planets') == set(['planet'])
 | 
			
		||||
    assert do('ring') == set(['ring'])
 | 
			
		||||
    assert do('axes') == set(['axis', 'axe', 'ax'])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_smart_quotes(lemmatizer):
 | 
			
		||||
    do = lemmatizer.punct
 | 
			
		||||
    assert do('“') == set(['``'])
 | 
			
		||||
    assert do('“') == set(['``'])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_smart_quotes(lemmatizer):
 | 
			
		||||
    do = lemmatizer.punct
 | 
			
		||||
    assert do('–') == set(["--"])
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user