mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-01 00:17:44 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			114 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			114 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # coding: utf8
 | |
| from __future__ import unicode_literals
 | |
| 
 | |
| from ..symbols import *
 | |
| from ..language_data import PRON_LEMMA, DET_LEMMA
 | |
| 
 | |
| 
 | |
| TOKENIZER_EXCEPTIONS = {
 | |
|     "al": [
 | |
|         {ORTH: "a", LEMMA: "a", TAG: ADP},
 | |
|         {ORTH: "el", LEMMA: "el", TAG: DET}
 | |
|     ],
 | |
| 
 | |
|     "consigo": [
 | |
|         {ORTH: "con", LEMMA: "con"},
 | |
|         {ORTH: "sigo", LEMMA: PRON_LEMMA, NORM: "sí"}
 | |
|     ],
 | |
| 
 | |
|     "conmigo": [
 | |
|         {ORTH: "con", LEMMA: "con"},
 | |
|         {ORTH: "migo", LEMMA: PRON_LEMMA, NORM: "mí"}
 | |
|     ],
 | |
| 
 | |
|     "contigo": [
 | |
|         {ORTH: "con", LEMMA: "con"},
 | |
|         {ORTH: "tigo", LEMMA: PRON_LEMMA, NORM: "ti"}
 | |
|     ],
 | |
| 
 | |
|     "del": [
 | |
|         {ORTH: "de", LEMMA: "de", TAG: ADP},
 | |
|         {ORTH: "l", LEMMA: "el", TAG: DET}
 | |
|     ],
 | |
| 
 | |
|     "pel": [
 | |
|         {ORTH: "pe", LEMMA: "per", TAG: ADP},
 | |
|         {ORTH: "l", LEMMA: "el", TAG: DET}
 | |
|     ],
 | |
| 
 | |
|     "pal": [
 | |
|         {ORTH: "pa", LEMMA: "para"},
 | |
|         {ORTH: "l", LEMMA: DET_LEMMA, NORM: "el"}
 | |
|     ],
 | |
| 
 | |
|     "pala": [
 | |
|         {ORTH: "pa", LEMMA: "para"},
 | |
|         {ORTH: "la", LEMMA: DET_LEMMA}
 | |
|     ],
 | |
| 
 | |
|     "aprox.": [
 | |
|         {ORTH: "aprox.", LEMMA: "aproximadamente"}
 | |
|     ],
 | |
| 
 | |
|     "dna.": [
 | |
|         {ORTH: "dna.", LEMMA: "docena"}
 | |
|     ],
 | |
| 
 | |
|     "esq.": [
 | |
|         {ORTH: "esq.", LEMMA: "esquina"}
 | |
|     ],
 | |
| 
 | |
|     "pág.": [
 | |
|         {ORTH: "pág.", LEMMA: "página"}
 | |
|     ],
 | |
| 
 | |
|     "p.ej.": [
 | |
|         {ORTH: "p.ej.", LEMMA: "por ejemplo"}
 | |
|     ],
 | |
| 
 | |
|     "Ud.": [
 | |
|         {ORTH: "Ud.", LEMMA: PRON_LEMMA, NORM: "usted"}
 | |
|     ],
 | |
| 
 | |
|     "Vd.": [
 | |
|         {ORTH: "Vd.", LEMMA: PRON_LEMMA, NORM: "usted"}
 | |
|     ],
 | |
| 
 | |
|     "Uds.": [
 | |
|         {ORTH: "Uds.", LEMMA: PRON_LEMMA, NORM: "ustedes"}
 | |
|     ],
 | |
| 
 | |
|     "Vds.": [
 | |
|         {ORTH: "Vds.", LEMMA: PRON_LEMMA, NORM: "ustedes"}
 | |
|     ]
 | |
| }
 | |
| 
 | |
| 
 | |
| ORTH_ONLY = [
 | |
|     "a.C.",
 | |
|     "a.J.C.",
 | |
|     "apdo.",
 | |
|     "Av.",
 | |
|     "Avda.",
 | |
|     "Cía.",
 | |
|     "etc.",
 | |
|     "Gob.",
 | |
|     "Gral.",
 | |
|     "Ing.",
 | |
|     "J.C.",
 | |
|     "Lic.",
 | |
|     "m.n.",
 | |
|     "no.",
 | |
|     "núm.",
 | |
|     "P.D.",
 | |
|     "Prof.",
 | |
|     "Profa.",
 | |
|     "q.e.p.d."
 | |
|     "S.A.",
 | |
|     "S.L.",
 | |
|     "s.s.s.",
 | |
|     "Sr.",
 | |
|     "Sra.",
 | |
|     "Srta."
 | |
| ]
 |