Update Matcher example

This commit is contained in:
ines 2017-05-29 01:08:47 +02:00
parent 7b1ddcc04d
commit 42cf414138

View File

@ -352,8 +352,7 @@ p
p p
| By default, spaCy's tokenizer will split emoji into separate tokens. This | By default, spaCy's tokenizer will split emoji into separate tokens. This
| means that you can create a pattern for one or more emoji tokens. In this | means that you can create a pattern for one or more emoji tokens.
| case, a sequence of identical emoji should be treated as one instance.
| Valid hashtags usually consist of a #[code #], plus a sequence of | Valid hashtags usually consist of a #[code #], plus a sequence of
| ASCII characters with no whitespace, making them easy to match as well. | ASCII characters with no whitespace, making them easy to match as well.
@ -368,8 +367,8 @@ p
neg_emoji = [u'😞', u'😠', u'😩', u'😢', u'😭', u'😒'] # negative emoji neg_emoji = [u'😞', u'😠', u'😩', u'😢', u'😭', u'😒'] # negative emoji
# add patterns to match one or more emoji tokens # add patterns to match one or more emoji tokens
pos_patterns = [[{'ORTH': emoji, 'OP': '+'}] for emoji in pos_emoji] pos_patterns = [[{'ORTH': emoji}] for emoji in pos_emoji]
neg_patterns = [[{'ORTH': emoji, 'OP': '+'}] for emoji in neg_emoji] neg_patterns = [[{'ORTH': emoji}] for emoji in neg_emoji]
matcher.add('HAPPY', label_sentiment, *pos_patterns) # add positive pattern matcher.add('HAPPY', label_sentiment, *pos_patterns) # add positive pattern
matcher.add('SAD', label_sentiment, *neg_patterns) # add negative pattern matcher.add('SAD', label_sentiment, *neg_patterns) # add negative pattern
@ -397,9 +396,9 @@ p
def label_sentiment(matcher, doc, i, matches): def label_sentiment(matcher, doc, i, matches):
match_id, start, end = matches[i] match_id, start, end = matches[i]
if match_id is 'HAPPY': if doc.vocab.strings[match_id] == 'HAPPY': # don't forget to get string!
doc.sentiment += 0.1 # add 0.1 for positive sentiment doc.sentiment += 0.1 # add 0.1 for positive sentiment
elif match_id is 'SAD': elif doc.vocab.strings[match_id] == 'SAD':
doc.sentiment -= 0.1 # subtract 0.1 for negative sentiment doc.sentiment -= 0.1 # subtract 0.1 for negative sentiment
span = doc[start : end] span = doc[start : end]
emoji = Emojipedia.search(span[0].text) # get data for emoji emoji = Emojipedia.search(span[0].text) # get data for emoji