mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			24 lines
		
	
	
		
			579 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			24 lines
		
	
	
		
			579 B
		
	
	
	
		
			Python
		
	
	
	
	
	
# encoding: utf8
 | 
						|
from __future__ import unicode_literals
 | 
						|
 | 
						|
from ..symbols import ORTH, LEMMA, POS
 | 
						|
 | 
						|
 | 
						|
_exc = {}
 | 
						|
 | 
						|
for exc_data in [
 | 
						|
    {ORTH: "m.in.", LEMMA: "między innymi", POS: ADV},
 | 
						|
    {ORTH: "inż.", LEMMA: "inżynier", POS: NOUN},
 | 
						|
    {ORTH: "mgr.", LEMMA: "magister", POS: NOUN},
 | 
						|
    {ORTH: "tzn.", LEMMA: "to znaczy", POS: ADV},
 | 
						|
    {ORTH: "tj.", LEMMA: "to jest", POS: ADV},
 | 
						|
    {ORTH: "tzw.", LEMMA: "tak zwany", POS: ADJ}]:
 | 
						|
    _exc[exc_data[ORTH]] = [dict(exc_data)],
 | 
						|
 | 
						|
for orth in [
 | 
						|
    "w.", "r."]:
 | 
						|
    _exc[orth] = [{ORTH: orth}]
 | 
						|
 | 
						|
 | 
						|
TOKENIZER_EXCEPTIONS = dict(_exc)
 |