mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	* Temporarily import flag attributes in matcher
This commit is contained in:
		
							parent
							
								
									7cc56ada6e
								
							
						
					
					
						commit
						6427a3fcac
					
				|  | @ -8,6 +8,7 @@ from cymem.cymem cimport Pool | ||||||
| from libcpp.vector cimport vector | from libcpp.vector cimport vector | ||||||
| 
 | 
 | ||||||
| from .attrs cimport LENGTH, ENT_TYPE, ORTH, NORM, LEMMA, LOWER, SHAPE | from .attrs cimport LENGTH, ENT_TYPE, ORTH, NORM, LEMMA, LOWER, SHAPE | ||||||
|  | from .attrs cimport FLAG13, FLAG14, FLAG15, FLAG16, FLAG17, FLAG18, FLAG19, FLAG20, FLAG21, FLAG22, FLAG23, FLAG24, FLAG25 | ||||||
| from .tokens.doc cimport get_token_attr | from .tokens.doc cimport get_token_attr | ||||||
| from .tokens.doc cimport Doc | from .tokens.doc cimport Doc | ||||||
| from .vocab cimport Vocab | from .vocab cimport Vocab | ||||||
|  | @ -53,6 +54,8 @@ cdef int match(const Pattern* pattern, const TokenC* token) except -1: | ||||||
|     cdef int i |     cdef int i | ||||||
|     for i in range(pattern.length): |     for i in range(pattern.length): | ||||||
|         if get_token_attr(token, pattern.spec[i].attr) != pattern.spec[i].value: |         if get_token_attr(token, pattern.spec[i].attr) != pattern.spec[i].value: | ||||||
|  |             print "Pattern fail", pattern.spec[i].attr, pattern.spec[i].value | ||||||
|  |             print get_token_attr(token, pattern.spec[i].attr) | ||||||
|             return False |             return False | ||||||
|     return True |     return True | ||||||
| 
 | 
 | ||||||
|  | @ -76,7 +79,10 @@ def _convert_strings(token_specs, string_store): | ||||||
|                 attr = map_attr_name(attr) |                 attr = map_attr_name(attr) | ||||||
|             if isinstance(value, basestring): |             if isinstance(value, basestring): | ||||||
|                 value = string_store[value] |                 value = string_store[value] | ||||||
|  |             if isinstance(value, bool): | ||||||
|  |                 value = int(value) | ||||||
|             converted[-1].append((attr, value)) |             converted[-1].append((attr, value)) | ||||||
|  |             print "Converted", converted[-1] | ||||||
|     return converted |     return converted | ||||||
|      |      | ||||||
| 
 | 
 | ||||||
|  | @ -92,6 +98,32 @@ def map_attr_name(attr): | ||||||
|         return SHAPE |         return SHAPE | ||||||
|     elif attr == 'NORM': |     elif attr == 'NORM': | ||||||
|         return NORM |         return NORM | ||||||
|  |     elif attr == 'FLAG13': | ||||||
|  |         return FLAG13 | ||||||
|  |     elif attr == 'FLAG14': | ||||||
|  |         return FLAG14 | ||||||
|  |     elif attr == 'FLAG15': | ||||||
|  |         return FLAG15 | ||||||
|  |     elif attr == 'FLAG16': | ||||||
|  |         return FLAG16 | ||||||
|  |     elif attr == 'FLAG17': | ||||||
|  |         return FLAG17 | ||||||
|  |     elif attr == 'FLAG18': | ||||||
|  |         return FLAG18 | ||||||
|  |     elif attr == 'FLAG19': | ||||||
|  |         return FLAG19 | ||||||
|  |     elif attr == 'FLAG20': | ||||||
|  |         return FLAG20 | ||||||
|  |     elif attr == 'FLAG21': | ||||||
|  |         return FLAG21 | ||||||
|  |     elif attr == 'FLAG22': | ||||||
|  |         return FLAG22 | ||||||
|  |     elif attr == 'FLAG23': | ||||||
|  |         return FLAG23 | ||||||
|  |     elif attr == 'FLAG24': | ||||||
|  |         return FLAG24 | ||||||
|  |     elif attr == 'FLAG25': | ||||||
|  |         return FLAG25 | ||||||
|     else: |     else: | ||||||
|         raise Exception("TODO: Finish supporting attr mapping %s" % attr) |         raise Exception("TODO: Finish supporting attr mapping %s" % attr) | ||||||
| 
 | 
 | ||||||
|  | @ -130,6 +162,7 @@ cdef class Matcher: | ||||||
|         # TODO: Do something more clever about multiple patterns for single |         # TODO: Do something more clever about multiple patterns for single | ||||||
|         # entity |         # entity | ||||||
|         for spec in specs: |         for spec in specs: | ||||||
|  |             assert len(spec) >= 1 | ||||||
|             spec = _convert_strings(spec, self.vocab.strings) |             spec = _convert_strings(spec, self.vocab.strings) | ||||||
|             self.patterns.push_back(init_pattern(self.mem, spec, etype)) |             self.patterns.push_back(init_pattern(self.mem, spec, etype)) | ||||||
| 
 | 
 | ||||||
|  | @ -142,11 +175,13 @@ cdef class Matcher: | ||||||
|         cdef Pattern* state |         cdef Pattern* state | ||||||
|         matches = [] |         matches = [] | ||||||
|         for token_i in range(doc.length): |         for token_i in range(doc.length): | ||||||
|  |             print 'check', doc[token_i].orth_ | ||||||
|             token = &doc.data[token_i] |             token = &doc.data[token_i] | ||||||
|             q = 0 |             q = 0 | ||||||
|             for i in range(partials.size()): |             for i in range(partials.size()): | ||||||
|                 state = partials.at(i) |                 state = partials.at(i) | ||||||
|                 if match(state, token): |                 if match(state, token): | ||||||
|  |                     print 'match!' | ||||||
|                     if is_final(state): |                     if is_final(state): | ||||||
|                         matches.append(get_entity(state, token, token_i)) |                         matches.append(get_entity(state, token, token_i)) | ||||||
|                     else: |                     else: | ||||||
|  | @ -156,6 +191,7 @@ cdef class Matcher: | ||||||
|             for i in range(self.n_patterns): |             for i in range(self.n_patterns): | ||||||
|                 state = self.patterns[i] |                 state = self.patterns[i] | ||||||
|                 if match(state, token): |                 if match(state, token): | ||||||
|  |                     print 'match!' | ||||||
|                     if is_final(state): |                     if is_final(state): | ||||||
|                         matches.append(get_entity(state, token, token_i)) |                         matches.append(get_entity(state, token, token_i)) | ||||||
|                     else: |                     else: | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user