mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Implement like_num getter for Dutch (via #1177)
This commit is contained in:
		
							parent
							
								
									5ee10379db
								
							
						
					
					
						commit
						adda08fe14
					
				| 
						 | 
					@ -2,6 +2,7 @@
 | 
				
			||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .stop_words import STOP_WORDS
 | 
					from .stop_words import STOP_WORDS
 | 
				
			||||||
 | 
					from .lex_attrs import LEX_ATTRS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
 | 
					from ..tokenizer_exceptions import BASE_EXCEPTIONS
 | 
				
			||||||
from ..norm_exceptions import BASE_NORMS
 | 
					from ..norm_exceptions import BASE_NORMS
 | 
				
			||||||
| 
						 | 
					@ -12,6 +13,7 @@ from ...util import update_exc, add_lookups
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class DutchDefaults(Language.Defaults):
 | 
					class DutchDefaults(Language.Defaults):
 | 
				
			||||||
    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
 | 
					    lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
 | 
				
			||||||
 | 
					    lex_attr_getters.update(LEX_ATTRS)
 | 
				
			||||||
    lex_attr_getters[LANG] = lambda text: 'nl'
 | 
					    lex_attr_getters[LANG] = lambda text: 'nl'
 | 
				
			||||||
    lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
 | 
					    lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM], BASE_NORMS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										36
									
								
								spacy/lang/nl/lex_attrs.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								spacy/lang/nl/lex_attrs.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,36 @@
 | 
				
			||||||
 | 
					# coding: utf8
 | 
				
			||||||
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from ...attrs import LIKE_NUM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					_num_words = set("""
 | 
				
			||||||
 | 
					nul een één twee drie vier vijf zes zeven acht negen tien elf twaalf dertien
 | 
				
			||||||
 | 
					veertien twintig dertig veertig vijftig zestig zeventig tachtig negentig honderd
 | 
				
			||||||
 | 
					duizend miljoen miljard biljoen biljard triljoen triljard
 | 
				
			||||||
 | 
					""".split())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					_ordinal_words = set("""
 | 
				
			||||||
 | 
					eerste tweede derde vierde vijfde zesde zevende achtste negende tiende elfde
 | 
				
			||||||
 | 
					twaalfde dertiende veertiende twintigste dertigste veertigste vijftigste
 | 
				
			||||||
 | 
					zestigste zeventigste tachtigste negentigste honderdste duizendste miljoenste
 | 
				
			||||||
 | 
					miljardste biljoenste biljardste triljoenste triljardste
 | 
				
			||||||
 | 
					""".split())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def like_num(text):
 | 
				
			||||||
 | 
					    text = text.replace(',', '').replace('.', '')
 | 
				
			||||||
 | 
					    if text.isdigit():
 | 
				
			||||||
 | 
					        return True
 | 
				
			||||||
 | 
					    if text.count('/') == 1:
 | 
				
			||||||
 | 
					        num, denom = text.split('/')
 | 
				
			||||||
 | 
					        if num.isdigit() and denom.isdigit():
 | 
				
			||||||
 | 
					            return True
 | 
				
			||||||
 | 
					    if text in _num_words:
 | 
				
			||||||
 | 
					        return True
 | 
				
			||||||
 | 
					    return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					LEX_ATTRS = {
 | 
				
			||||||
 | 
					    LIKE_NUM: like_num
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user