mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Revert "Update matcher test, to reflect character offset return instead of token offset."
This reverts commit f8d3e3bcfe.
			
			
This commit is contained in:
		
							parent
							
								
									4bf2c53c13
								
							
						
					
					
						commit
						7d446e5094
					
				|  | @ -31,32 +31,27 @@ def test_no_match(matcher): | ||||||
| def test_match_start(matcher): | def test_match_start(matcher): | ||||||
|     doc = Doc(matcher.vocab, words=['JavaScript', 'is', 'good']) |     doc = Doc(matcher.vocab, words=['JavaScript', 'is', 'good']) | ||||||
|     assert matcher(doc) == [(matcher.vocab.strings['JS'], |     assert matcher(doc) == [(matcher.vocab.strings['JS'], | ||||||
|                              matcher.vocab.strings['PRODUCT'], 0, len('JavaScript'))] |                              matcher.vocab.strings['PRODUCT'], 0, 1)] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_match_end(matcher): | def test_match_end(matcher): | ||||||
|     doc = Doc(matcher.vocab, words=['I', 'like', 'java']) |     doc = Doc(matcher.vocab, words=['I', 'like', 'java']) | ||||||
|     assert matcher(doc) == [(doc.vocab.strings['Java'], |     assert matcher(doc) == [(doc.vocab.strings['Java'], | ||||||
|                              doc.vocab.strings['PRODUCT'], |                              doc.vocab.strings['PRODUCT'], 2, 3)] | ||||||
|                              len('I like '), len('I like java'))] |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_match_middle(matcher): | def test_match_middle(matcher): | ||||||
|     doc = Doc(matcher.vocab, words=['I', 'like', 'Google', 'Now', 'best']) |     doc = Doc(matcher.vocab, words=['I', 'like', 'Google', 'Now', 'best']) | ||||||
|     assert matcher(doc) == [(doc.vocab.strings['GoogleNow'], |     assert matcher(doc) == [(doc.vocab.strings['GoogleNow'], | ||||||
|                              doc.vocab.strings['PRODUCT'], len('I like '), |                              doc.vocab.strings['PRODUCT'], 2, 4)] | ||||||
|                              len('I like Google Now'))] |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_match_multi(matcher): | def test_match_multi(matcher): | ||||||
|     doc = Doc(matcher.vocab, words='I like Google Now and java best'.split()) |     doc = Doc(matcher.vocab, words='I like Google Now and java best'.split()) | ||||||
|     assert matcher(doc) == [(doc.vocab.strings['GoogleNow'], |     assert matcher(doc) == [(doc.vocab.strings['GoogleNow'], | ||||||
|                              doc.vocab.strings['PRODUCT'], len('I like '), |                              doc.vocab.strings['PRODUCT'], 2, 4), | ||||||
|                              len('I like Google Now')), |  | ||||||
|                             (doc.vocab.strings['Java'], |                             (doc.vocab.strings['Java'], | ||||||
|                              doc.vocab.strings['PRODUCT'], |                              doc.vocab.strings['PRODUCT'], 5, 6)] | ||||||
|                              len('I like Google Now and '), |  | ||||||
|                              len('I like Google Now and java'))] |  | ||||||
| 
 | 
 | ||||||
| def test_match_zero(matcher): | def test_match_zero(matcher): | ||||||
|     matcher.add('Quote', '', {}, [ |     matcher.add('Quote', '', {}, [ | ||||||
|  | @ -92,20 +87,20 @@ def test_match_zero_plus(matcher): | ||||||
|     assert len(matcher(doc)) == 1 |     assert len(matcher(doc)) == 1 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @pytest.mark.models | #@pytest.mark.models | ||||||
| def test_match_preserved(EN): | #def test_match_preserved(EN): | ||||||
|     patterns = { | #    patterns = { | ||||||
|         'JS': ['PRODUCT', {}, [[{'ORTH': 'JavaScript'}]]], | #        'JS': ['PRODUCT', {}, [[{'ORTH': 'JavaScript'}]]], | ||||||
|         'GoogleNow':  ['PRODUCT', {}, [[{'ORTH': 'Google'}, {'ORTH': 'Now'}]]], | #        'GoogleNow':  ['PRODUCT', {}, [[{'ORTH': 'Google'}, {'ORTH': 'Now'}]]], | ||||||
|         'Java':       ['PRODUCT', {}, [[{'LOWER': 'java'}]]], | #        'Java':       ['PRODUCT', {}, [[{'LOWER': 'java'}]]], | ||||||
|     } | #    } | ||||||
|     matcher = Matcher(EN.vocab, patterns) | #    matcher = Matcher(EN.vocab, patterns) | ||||||
|     doc = EN.tokenizer('I like java.') | #    doc = EN.tokenizer('I like java.') | ||||||
|     EN.tagger(doc) | #    EN.tagger(doc) | ||||||
|     assert len(doc.ents) == 0 | #    assert len(doc.ents) == 0 | ||||||
|     doc = EN.tokenizer('I like java.') | #    doc = EN.tokenizer('I like java.') | ||||||
|     doc.ents += tuple(matcher(doc)) | #    doc.ents += tuple(matcher(doc)) | ||||||
|     assert len(doc.ents) == 1 | #    assert len(doc.ents) == 1 | ||||||
|     EN.tagger(doc) | #    EN.tagger(doc) | ||||||
|     EN.entity(doc) | #    EN.entity(doc) | ||||||
|     assert len(doc.ents) == 1 | #    assert len(doc.ents) == 1 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user