mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			82 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			82 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| //- ----------------------------------
 | |
| //- 💫 DOCS > API > MATCHER
 | |
| //- ----------------------------------
 | |
| 
 | |
| +section("matcher")
 | |
|     +h(2, "matcher", "https://github.com/" + SOCIAL.github + "/spaCy/blob/master/spacy/matcher.pyx")
 | |
|         | #[+tag class] Matcher
 | |
| 
 | |
|     p A full example can be found #[a(href="https://github.com/" + SOCIAL.github + "/spaCy/blob/master/examples/matcher_example.py") here].
 | |
| 
 | |
|     +table(["Usage", "Description"])
 | |
|         +row
 | |
|             +cell #[code.lang-python nlp(doc)]
 | |
|             +cell As part of annotation pipeline.
 | |
| 
 | |
|         +row
 | |
|             +cell #[code.lang-python nlp.matcher(doc)]
 | |
|             +cell Explicit invocation.
 | |
| 
 | |
|         +row
 | |
|             +cell #[code.lang-python nlp.matcher.add(u'FooCorp', u'ORG', {}, [[{u'ORTH': u'Foo'}]])]
 | |
|             +cell Add a pattern to match.
 | |
| 
 | |
|     +section("matcher-init")
 | |
|         +h(3, "matcher-init") __init__(self, vocab, patterns)
 | |
| 
 | |
|         +table(["Name", "Type", "Description"])
 | |
|             +row
 | |
|                 +cell vocab
 | |
|                 +cell #[code.lang-python spacy.vocab.Vocab]
 | |
|                 +cell Reference to the shared vocabulary object.
 | |
| 
 | |
|             +row
 | |
|                 +cell patterns
 | |
|                 +cell #[code {entity_key: (etype, attrs, specs)}]
 | |
|                 +cell.
 | |
|                     Initial patterns to match. See #[code Matcher.add]
 | |
| 
 | |
|     +section("matcher-add")
 | |
|         +h(3, "matcher-add") add(self, entity_key, etype, attrs, specs)
 | |
| 
 | |
|         +table(["Name", "Type", "Description"])
 | |
|             +row
 | |
|                 +cell entity_key
 | |
|                 +cell unicode or int
 | |
|                 +cell Your arbitrary ID string (or its integer encoding)
 | |
|             +row
 | |
|                 +cell etype
 | |
|                 +cell unicode or int
 | |
|                 +cell A pre-registered entity type, e.g. u'PERSON', u'ORG', etc.
 | |
|             +row
 | |
|                 +cell attrs
 | |
|                 +cell #[code dict]
 | |
|                 +cell Placeholder for future support of entity attributes.
 | |
|             +row
 | |
|                 +cell specs
 | |
|                 +cell #[code [[{int: unicode}]]]
 | |
|                 +cell A list of surface forms, where each surface form is defined as a list of token definitions, and each token definition is a dictionary mapping attribute IDs to attribute values.
 | |
| 
 | |
|     +section("matcher-saveload")
 | |
|         +h(3, "matcher-saveload")
 | |
|             | Save and Load
 | |
| 
 | |
|         +section("matcher-saveload-dump")
 | |
|             +h(4, "matcher-saveload-dump") dump(loc)
 | |
| 
 | |
|             +table(["Name", "Type", "Description"])
 | |
|                     +row
 | |
|                         +cell loc
 | |
|                         +cell #[+a(link_unicode) unicode]
 | |
|                         +cell Path to save the gazetteer.json file.
 | |
| 
 | |
|         +section("matcher-saveload-load")
 | |
|             +h(4, "matcher-saveload-load") load(loc)
 | |
| 
 | |
|             +table(["Name", "Type", "Description"])
 | |
|                     +row
 | |
|                         +cell loc
 | |
|                         +cell #[+a(link_unicode) unicode]
 | |
|                         +cell.
 | |
|                             Path to load the gazetteer.json file from.
 |