mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Update Matcher API docs
This commit is contained in:
		
							parent
							
								
									e39ad78267
								
							
						
					
					
						commit
						9edc7fb0ba
					
				|  | @ -45,7 +45,7 @@ p | ||||||
| 
 | 
 | ||||||
|     nlp = spacy.load('en') |     nlp = spacy.load('en') | ||||||
|     matcher = Matcher(nlp.vocab) |     matcher = Matcher(nlp.vocab) | ||||||
|     matcher.add_pattern('HelloWorld', [{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}]) |     matcher.add('HelloWorld', [{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}]) | ||||||
| 
 | 
 | ||||||
|     doc = nlp(u'Hello, world! Hello world!') |     doc = nlp(u'Hello, world! Hello world!') | ||||||
|     matches = matcher(doc) |     matches = matcher(doc) | ||||||
|  | @ -58,7 +58,7 @@ p | ||||||
|     |  without punctuation between "hello" and "world": |     |  without punctuation between "hello" and "world": | ||||||
| 
 | 
 | ||||||
| +code. | +code. | ||||||
|     matcher.add_pattern('HelloWorld', [{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}], |     matcher.add('HelloWorld', [{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}], | ||||||
|                               [{LOWER: 'hello'}, {LOWER: 'world'}]) |                               [{LOWER: 'hello'}, {LOWER: 'world'}]) | ||||||
| 
 | 
 | ||||||
| p | p | ||||||
|  | @ -81,7 +81,7 @@ p | ||||||
|     |  To be safe, you only match on the uppercase versions, in case someone has |     |  To be safe, you only match on the uppercase versions, in case someone has | ||||||
|     |  written it as "Google i/o". You also add a second pattern with an added |     |  written it as "Google i/o". You also add a second pattern with an added | ||||||
|     |  #[code {IS_DIGIT: True}] token – this will make sure you also match on |     |  #[code {IS_DIGIT: True}] token – this will make sure you also match on | ||||||
|     |  "Google I/O 2017". If this pattern matches, spaCy should execute your |     |  "Google I/O 2017". If your pattern matches, spaCy should execute your | ||||||
|     |  custom callback function #[code add_event_ent]. |     |  custom callback function #[code add_event_ent]. | ||||||
| 
 | 
 | ||||||
| +code. | +code. | ||||||
|  | @ -92,7 +92,7 @@ p | ||||||
|     nlp = spacy.load('en') |     nlp = spacy.load('en') | ||||||
|     matcher = Matcher(nlp.vocab) |     matcher = Matcher(nlp.vocab) | ||||||
| 
 | 
 | ||||||
|     matcher.add_pattern('GoogleIO', [{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}], |     matcher.add('GoogleIO', [{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}], | ||||||
|                             [{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}, {IS_DIGIT: True}], |                             [{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}, {IS_DIGIT: True}], | ||||||
|                             on_match=add_event_ent) |                             on_match=add_event_ent) | ||||||
| 
 | 
 | ||||||
|  | @ -101,8 +101,7 @@ p | ||||||
| 
 | 
 | ||||||
|     def add_event_ent(matcher, doc, i, matches): |     def add_event_ent(matcher, doc, i, matches): | ||||||
|         # Get the current match and create tuple of entity label, start and end. |         # Get the current match and create tuple of entity label, start and end. | ||||||
|         # Append entity to the doc's entity. (Don't overwrite doc.ents, in case |         # Append entity to the doc's entity. (Don't overwrite doc.ents!) | ||||||
|         # it already has other entities!) |  | ||||||
|         match_id, start, end = matches[i] |         match_id, start, end = matches[i] | ||||||
|         doc.ents += ((EVENT, start, end),) |         doc.ents += ((EVENT, start, end),) | ||||||
| 
 | 
 | ||||||
|  | @ -115,12 +114,12 @@ p | ||||||
|     |  function #[code merge_and_flag]: |     |  function #[code merge_and_flag]: | ||||||
| 
 | 
 | ||||||
| +code. | +code. | ||||||
|     matcher.add_pattern('BAD_HTML', [{ORTH: '<'}, {LOWER: 'br'}, {ORTH: '>'}], |     matcher.add('BAD_HTML', [{ORTH: '<'}, {LOWER: 'br'}, {ORTH: '>'}], | ||||||
|                             [{ORTH: '<'}, {LOWER: 'br/'}, {ORTH: '>'}] |                             [{ORTH: '<'}, {LOWER: 'br/'}, {ORTH: '>'}] | ||||||
|                             on_match=merge_and_flag) |                             on_match=merge_and_flag) | ||||||
| 
 | 
 | ||||||
|     # Add a new custom flag to the vocab, which is always False by default. |     # Add a new custom flag to the vocab, which is always False by default. | ||||||
|     # BAD_HTML will be the flag ID, which we can use to set it to True on the span. |     # BAD_HTML_FLAG will be the flag ID, which we can use to set it to True on the span. | ||||||
|     BAD_HTML_FLAG = doc.vocab.add_flag(lambda text: False) |     BAD_HTML_FLAG = doc.vocab.add_flag(lambda text: False) | ||||||
| 
 | 
 | ||||||
|     def merge_and_flag(matcher, doc, i, matches): |     def merge_and_flag(matcher, doc, i, matches): | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user