mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Have the matcher return character offsets, to handle the match better.
This commit is contained in:
		
							parent
							
								
									9b60186266
								
							
						
					
					
						commit
						049c937540
					
				| 
						 | 
					@ -291,8 +291,8 @@ cdef class Matcher:
 | 
				
			||||||
                elif action == ACCEPT:
 | 
					                elif action == ACCEPT:
 | 
				
			||||||
                    # TODO: What to do about patterns starting with ZERO? Need to
 | 
					                    # TODO: What to do about patterns starting with ZERO? Need to
 | 
				
			||||||
                    # adjust the start position.
 | 
					                    # adjust the start position.
 | 
				
			||||||
                    start = state.first
 | 
					                    start = doc.c[state.first].idx
 | 
				
			||||||
                    end = token_i+1
 | 
					                    end = doc.c[token_i].idx + doc.c[token_i].lex.length
 | 
				
			||||||
                    ent_id = state.second[1].attrs[0].value
 | 
					                    ent_id = state.second[1].attrs[0].value
 | 
				
			||||||
                    label = state.second[1].attrs[1].value
 | 
					                    label = state.second[1].attrs[1].value
 | 
				
			||||||
                    acceptor = self._acceptors.get(ent_id)
 | 
					                    acceptor = self._acceptors.get(ent_id)
 | 
				
			||||||
| 
						 | 
					@ -319,12 +319,16 @@ cdef class Matcher:
 | 
				
			||||||
                    state.second = pattern + 1
 | 
					                    state.second = pattern + 1
 | 
				
			||||||
                    partials.push_back(state)
 | 
					                    partials.push_back(state)
 | 
				
			||||||
                elif action == ACCEPT:
 | 
					                elif action == ACCEPT:
 | 
				
			||||||
                    start = token_i
 | 
					                    start = token.idx
 | 
				
			||||||
                    end = token_i+1
 | 
					                    end = token.idx + token.lex.length
 | 
				
			||||||
                    ent_id = pattern[1].attrs[0].value
 | 
					                    ent_id = pattern[1].attrs[0].value
 | 
				
			||||||
                    label = pattern[1].attrs[1].value
 | 
					                    label = pattern[1].attrs[1].value
 | 
				
			||||||
                    if acceptor is None or acceptor(doc, ent_id, label, start, end):
 | 
					                    acceptor = self._acceptors.get(ent_id)
 | 
				
			||||||
                        matches.append((ent_id, label, start, end))
 | 
					                    if acceptor is not None:
 | 
				
			||||||
 | 
					                        match = acceptor(doc, ent_id, label, start, end)
 | 
				
			||||||
 | 
					                        if match:
 | 
				
			||||||
 | 
					                            ent_id, label, start, end = match
 | 
				
			||||||
 | 
					                    matches.append((ent_id, label, start, end))
 | 
				
			||||||
        for i, (ent_id, label, start, end) in enumerate(matches):
 | 
					        for i, (ent_id, label, start, end) in enumerate(matches):
 | 
				
			||||||
            on_match = self._callbacks.get(ent_id)
 | 
					            on_match = self._callbacks.get(ent_id)
 | 
				
			||||||
            if on_match is not None:
 | 
					            if on_match is not None:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user