mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			53 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			53 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# coding: utf8
 | 
						||
from __future__ import unicode_literals
 | 
						||
from ...symbols import ORTH, LEMMA
 | 
						||
 | 
						||
_exc = {}
 | 
						||
 | 
						||
for raw, lemma in [
 | 
						||
    ("a-a", "a-o"),
 | 
						||
    ("a-e", "a-o"),
 | 
						||
    ("a-o", "a-o"),
 | 
						||
    ("a-i", "a-o"),
 | 
						||
    ("co-a", "co-o"),
 | 
						||
    ("co-e", "co-o"),
 | 
						||
    ("co-i", "co-o"),
 | 
						||
    ("co-o", "co-o"),
 | 
						||
    ("da-a", "da-o"),
 | 
						||
    ("da-e", "da-o"),
 | 
						||
    ("da-i", "da-o"),
 | 
						||
    ("da-o", "da-o"),
 | 
						||
    ("pe-a", "pe-o"),
 | 
						||
    ("pe-e", "pe-o"),
 | 
						||
    ("pe-i", "pe-o"),
 | 
						||
    ("pe-o", "pe-o"),
 | 
						||
]:
 | 
						||
    for orth in [raw, raw.capitalize()]:
 | 
						||
        _exc[orth] = [{ORTH: orth, LEMMA: lemma}]
 | 
						||
 | 
						||
# Prefix + prepositions with à (e.g. "sott'a-o")
 | 
						||
 | 
						||
for prep, prep_lemma in [
 | 
						||
    ("a-a", "a-o"),
 | 
						||
    ("a-e", "a-o"),
 | 
						||
    ("a-o", "a-o"),
 | 
						||
    ("a-i", "a-o"),
 | 
						||
]:
 | 
						||
    for prefix, prefix_lemma in [
 | 
						||
        ("sott'", "sotta"),
 | 
						||
        ("sott’", "sotta"),
 | 
						||
        ("contr'", "contra"),
 | 
						||
        ("contr’", "contra"),
 | 
						||
        ("ch'", "che"),
 | 
						||
        ("ch’", "che"),
 | 
						||
        ("s'", "se"),
 | 
						||
        ("s’", "se"),
 | 
						||
    ]:
 | 
						||
        for prefix_orth in [prefix, prefix.capitalize()]:
 | 
						||
            _exc[prefix_orth + prep] = [
 | 
						||
                {ORTH: prefix_orth, LEMMA: prefix_lemma},
 | 
						||
                {ORTH: prep, LEMMA: prep_lemma},
 | 
						||
            ]
 | 
						||
 | 
						||
TOKENIZER_EXCEPTIONS = _exc
 |