mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			59 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			59 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from ..tokenizer_exceptions import BASE_EXCEPTIONS
 | 
						|
from ...symbols import ORTH
 | 
						|
from ...util import update_exc
 | 
						|
 | 
						|
 | 
						|
_exc = {
 | 
						|
    "all'art.": [{ORTH: "all'"}, {ORTH: "art."}],
 | 
						|
    "dall'art.": [{ORTH: "dall'"}, {ORTH: "art."}],
 | 
						|
    "dell'art.": [{ORTH: "dell'"}, {ORTH: "art."}],
 | 
						|
    "L'art.": [{ORTH: "L'"}, {ORTH: "art."}],
 | 
						|
    "l'art.": [{ORTH: "l'"}, {ORTH: "art."}],
 | 
						|
    "nell'art.": [{ORTH: "nell'"}, {ORTH: "art."}],
 | 
						|
    "po'": [{ORTH: "po'"}],
 | 
						|
    "sett..": [{ORTH: "sett."}, {ORTH: "."}],
 | 
						|
}
 | 
						|
 | 
						|
for orth in [
 | 
						|
    "..",
 | 
						|
    "....",
 | 
						|
    "al.",
 | 
						|
    "all-path",
 | 
						|
    "art.",
 | 
						|
    "Art.",
 | 
						|
    "artt.",
 | 
						|
    "att.",
 | 
						|
    "by-pass",
 | 
						|
    "c.d.",
 | 
						|
    "centro-sinistra",
 | 
						|
    "check-up",
 | 
						|
    "Civ.",
 | 
						|
    "cm.",
 | 
						|
    "Cod.",
 | 
						|
    "col.",
 | 
						|
    "Cost.",
 | 
						|
    "d.C.",
 | 
						|
    'de"',
 | 
						|
    "distr.",
 | 
						|
    "E'",
 | 
						|
    "ecc.",
 | 
						|
    "e-mail",
 | 
						|
    "e/o",
 | 
						|
    "etc.",
 | 
						|
    "Jr.",
 | 
						|
    "n°",
 | 
						|
    "nord-est",
 | 
						|
    "pag.",
 | 
						|
    "Proc.",
 | 
						|
    "prof.",
 | 
						|
    "sett.",
 | 
						|
    "s.p.a.",
 | 
						|
    "ss.",
 | 
						|
    "St.",
 | 
						|
    "tel.",
 | 
						|
    "week-end",
 | 
						|
]:
 | 
						|
    _exc[orth] = [{ORTH: orth}]
 | 
						|
 | 
						|
TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
 |