mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Remove time from German language data
This commit is contained in:
		
							parent
							
								
									e8ae588be9
								
							
						
					
					
						commit
						70b51ed7c8
					
				| 
						 | 
				
			
			@ -10,7 +10,6 @@ from ..util import update_exc
 | 
			
		|||
from ..language_data import EMOTICONS
 | 
			
		||||
from .language_data import ORTH_ONLY
 | 
			
		||||
from .language_data import strings_to_exc
 | 
			
		||||
from .language_data import get_time_exc
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
 | 
			
		||||
| 
						 | 
				
			
			@ -23,7 +22,6 @@ STOP_WORDS = set(language_data.STOP_WORDS)
 | 
			
		|||
 | 
			
		||||
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(EMOTICONS))
 | 
			
		||||
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(ORTH_ONLY))
 | 
			
		||||
update_exc(TOKENIZER_EXCEPTIONS, get_time_exc(range(1, 24 + 1)))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class German(Language):
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -9,17 +9,6 @@ def strings_to_exc(orths):
 | 
			
		|||
    return {orth: [{ORTH: orth}] for orth in orths}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_time_exc(hours):
 | 
			
		||||
    exc = {}
 | 
			
		||||
    for hour in hours:
 | 
			
		||||
        # currently only supporting formats like "10h", not "10 Uhr"
 | 
			
		||||
        exc["%dh" % hour] = [
 | 
			
		||||
            {ORTH: hour},
 | 
			
		||||
            {ORTH: "h", LEMMA: "Uhr"}
 | 
			
		||||
        ]
 | 
			
		||||
    return exc
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
PRON_LEMMA = "-PRON-"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user