mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Fix PhraseMatcher.remove for overlapping patterns (#4437)
This commit is contained in:
		
							parent
							
								
									f8f68bb062
								
							
						
					
					
						commit
						98a961a60e
					
				| 
						 | 
					@ -102,8 +102,10 @@ cdef class PhraseMatcher:
 | 
				
			||||||
        cdef vector[MapStruct*] path_nodes
 | 
					        cdef vector[MapStruct*] path_nodes
 | 
				
			||||||
        cdef vector[key_t] path_keys
 | 
					        cdef vector[key_t] path_keys
 | 
				
			||||||
        cdef key_t key_to_remove
 | 
					        cdef key_t key_to_remove
 | 
				
			||||||
        for keyword in self._docs[key]:
 | 
					        for keyword in sorted(self._docs[key], key=lambda x: len(x), reverse=True):
 | 
				
			||||||
            current_node = self.c_map
 | 
					            current_node = self.c_map
 | 
				
			||||||
 | 
					            path_nodes.clear()
 | 
				
			||||||
 | 
					            path_keys.clear()
 | 
				
			||||||
            for token in keyword:
 | 
					            for token in keyword:
 | 
				
			||||||
                result = map_get(current_node, token)
 | 
					                result = map_get(current_node, token)
 | 
				
			||||||
                if result:
 | 
					                if result:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -226,3 +226,13 @@ def test_phrase_matcher_callback(en_vocab):
 | 
				
			||||||
    matcher.add("COMPANY", mock, pattern)
 | 
					    matcher.add("COMPANY", mock, pattern)
 | 
				
			||||||
    matches = matcher(doc)
 | 
					    matches = matcher(doc)
 | 
				
			||||||
    mock.assert_called_once_with(matcher, doc, 0, matches)
 | 
					    mock.assert_called_once_with(matcher, doc, 0, matches)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_phrase_matcher_remove_overlapping_patterns(en_vocab):
 | 
				
			||||||
 | 
					    matcher = PhraseMatcher(en_vocab)
 | 
				
			||||||
 | 
					    pattern1 = Doc(en_vocab, words=["this"])
 | 
				
			||||||
 | 
					    pattern2 = Doc(en_vocab, words=["this", "is"])
 | 
				
			||||||
 | 
					    pattern3 = Doc(en_vocab, words=["this", "is", "a"])
 | 
				
			||||||
 | 
					    pattern4 = Doc(en_vocab, words=["this", "is", "a", "word"])
 | 
				
			||||||
 | 
					    matcher.add("THIS", None, pattern1, pattern2, pattern3, pattern4)
 | 
				
			||||||
 | 
					    matcher.remove("THIS")
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user