diff --git a/website/index.jade b/website/index.jade index 8a77ae5fe..cd8eecfa9 100644 --- a/website/index.jade +++ b/website/index.jade @@ -60,8 +60,8 @@ include _includes/_mixins # Load English tokenizer, tagger, parser, NER and word vectors nlp = spacy.load('en') - # Process a document, of any size - text = open('war_and_peace.txt').read() + # Process whole documents + text = open('customer_feedback_627.txt').read() doc = nlp(text) # Find named entities, phrases and concepts diff --git a/website/usage/_spacy-101/_lightning-tour.jade b/website/usage/_spacy-101/_lightning-tour.jade index 3046527b6..d80665eb1 100644 --- a/website/usage/_spacy-101/_lightning-tour.jade +++ b/website/usage/_spacy-101/_lightning-tour.jade @@ -183,11 +183,11 @@ p from spacy.vocab import Vocab nlp = spacy.load('en') - moby_dick = open('moby_dick.txt', 'r').read() - doc = nlp(moby_dick) - doc.to_disk('/moby_dick.bin') + customer_feedback = open('customer_feedback_627.txt').read() + doc = nlp(customer_feedback) + doc.to_disk('/tmp/customer_feedback_627.bin') - new_doc = Doc(Vocab()).from_disk('/moby_dick.bin') + new_doc = Doc(Vocab()).from_disk('/tmp/customer_feedback_627.bin') +infobox | #[+label-inline API:] #[+api("language") #[code Language]], @@ -210,7 +210,8 @@ p pattern2 = [[{'ORTH': emoji, 'OP': '+'}] for emoji in ['😀', '😂', '🤣', '😍']] matcher.add('GoogleIO', None, pattern1) # match "Google I/O" or "Google i/o" matcher.add('HAPPY', set_sentiment, *pattern2) # match one or more happy emoji - matches = nlp(LOTS_OF TEXT) + text = open('customer_feedback_627.txt').read() + matches = nlp(text) +infobox | #[+label-inline API:] #[+api("matcher") #[code Matcher]]