mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			83 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			83 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
//-  Docs > API > Matcher
 | 
						|
//- ============================================================================
 | 
						|
 | 
						|
+section('matcher')
 | 
						|
    +h2('matcher', 'https://github.com/' + profiles.github + '/spaCy/blob/master/spacy/matcher.pyx#L165')
 | 
						|
        | #[+label('tag') class] Matcher
 | 
						|
 | 
						|
    p A full example can be found #[a(href="https://github.com/" + profiles.github + "blob/master/examples/matcher_example.py") here].
 | 
						|
 | 
						|
    +table(['Usage', 'Description'], 'code')
 | 
						|
            +row
 | 
						|
                +cell #[code.lang-python nlp(doc)]
 | 
						|
                +cell.
 | 
						|
                    As part of annotation pipeline.
 | 
						|
 | 
						|
            +row
 | 
						|
                +cell #[code.lang-python nlp.matcher(doc)]
 | 
						|
                +cell.
 | 
						|
                    Explicit invocation.
 | 
						|
 | 
						|
            +row
 | 
						|
                +cell #[code.lang-python nlp.matcher.add(u'FooCorp', u'ORG', {}, [[{u'ORTH': u'Foo'}]])]
 | 
						|
                +cell.
 | 
						|
                    Add a pattern to match.
 | 
						|
 | 
						|
    +section('matcher-init')
 | 
						|
        +h3('matcher-init') __init__(self, vocab, patterns)
 | 
						|
            +table(['Name', 'Type', 'Description'], 'params')
 | 
						|
                +row
 | 
						|
                    +cell vocab
 | 
						|
                    +cell #[code.lang-python spacy.vocab.Vocab]
 | 
						|
                    +cell Reference to the shared vocabulary object.
 | 
						|
 | 
						|
                +row
 | 
						|
                    +cell patterns
 | 
						|
                    +cell #[code {entity_key: (etype, attrs, specs)}]
 | 
						|
                    +cell.
 | 
						|
                        Initial patterns to match. See #[code Matcher.add]
 | 
						|
                               
 | 
						|
    +section('matcher-add')
 | 
						|
        +h3('matcher-add') add(self, entity_key, etype, attrs, specs)
 | 
						|
            +table(['Name', 'Type', 'Description'], 'params')
 | 
						|
                +row
 | 
						|
                    +cell entity_key
 | 
						|
                    +cell unicode or int
 | 
						|
                    +cell Your arbitrary ID string (or its integer encoding)
 | 
						|
                +row
 | 
						|
                    +cell etype
 | 
						|
                    +cell unicode or int
 | 
						|
                    +cell A pre-registered entity type, e.g. u'PERSON', u'ORG', etc.
 | 
						|
                +row
 | 
						|
                    +cell attrs
 | 
						|
                    +cell #[code dict]
 | 
						|
                    +cell Placeholder for future support of entity attributes.
 | 
						|
                +row
 | 
						|
                    +cell specs
 | 
						|
                    +cell #[code [[{int: unicode}]]]
 | 
						|
                    +cell A list of surface forms, where each surface form is defined as a list of token definitions, and each token definition is a dictionary mapping attribute IDs to attribute values.
 | 
						|
 | 
						|
    +section('matcher-saveload')
 | 
						|
        +h3('matcher-saveload')
 | 
						|
            | Save and Load
 | 
						|
 | 
						|
        +section('matcher-saveload-dump')
 | 
						|
            +h4('matcher-saveload-dump') dump(loc)
 | 
						|
 | 
						|
            +table(['Name', 'Type', 'Description'], 'params')
 | 
						|
                    +row
 | 
						|
                        +cell loc
 | 
						|
                        +cell #[a(href=link_unicode target='_blank') unicode]
 | 
						|
                        +cell.
 | 
						|
                            Path to save the gazetteer.json file.
 | 
						|
 | 
						|
        +section('matcher-saveload-load')
 | 
						|
            +h4('matcher-saveload-load') load(loc)
 | 
						|
 | 
						|
            +table(['Name', 'Type', 'Description'], 'params')
 | 
						|
                    +row
 | 
						|
                        +cell loc
 | 
						|
                        +cell #[a(href=link_unicode target='_blank') unicode]
 | 
						|
                        +cell.
 | 
						|
                            Path to load the gazetteer.json file from.
 |