Update example documents

This commit is contained in:
ines 2017-11-26 18:04:04 +01:00
parent 65d66b81f1
commit 89f8b1fba0
2 changed files with 8 additions and 7 deletions

View File

@ -60,8 +60,8 @@ include _includes/_mixins
# Load English tokenizer, tagger, parser, NER and word vectors # Load English tokenizer, tagger, parser, NER and word vectors
nlp = spacy.load('en') nlp = spacy.load('en')
# Process a document, of any size # Process whole documents
text = open('war_and_peace.txt').read() text = open('customer_feedback_627.txt').read()
doc = nlp(text) doc = nlp(text)
# Find named entities, phrases and concepts # Find named entities, phrases and concepts

View File

@ -183,11 +183,11 @@ p
from spacy.vocab import Vocab from spacy.vocab import Vocab
nlp = spacy.load('en') nlp = spacy.load('en')
moby_dick = open('moby_dick.txt', 'r').read() customer_feedback = open('customer_feedback_627.txt').read()
doc = nlp(moby_dick) doc = nlp(customer_feedback)
doc.to_disk('/moby_dick.bin') doc.to_disk('/tmp/customer_feedback_627.bin')
new_doc = Doc(Vocab()).from_disk('/moby_dick.bin') new_doc = Doc(Vocab()).from_disk('/tmp/customer_feedback_627.bin')
+infobox +infobox
| #[+label-inline API:] #[+api("language") #[code Language]], | #[+label-inline API:] #[+api("language") #[code Language]],
@ -210,7 +210,8 @@ p
pattern2 = [[{'ORTH': emoji, 'OP': '+'}] for emoji in ['😀', '😂', '🤣', '😍']] pattern2 = [[{'ORTH': emoji, 'OP': '+'}] for emoji in ['😀', '😂', '🤣', '😍']]
matcher.add('GoogleIO', None, pattern1) # match "Google I/O" or "Google i/o" matcher.add('GoogleIO', None, pattern1) # match "Google I/O" or "Google i/o"
matcher.add('HAPPY', set_sentiment, *pattern2) # match one or more happy emoji matcher.add('HAPPY', set_sentiment, *pattern2) # match one or more happy emoji
matches = nlp(LOTS_OF TEXT) text = open('customer_feedback_627.txt').read()
matches = nlp(text)
+infobox +infobox
| #[+label-inline API:] #[+api("matcher") #[code Matcher]] | #[+label-inline API:] #[+api("matcher") #[code Matcher]]