From b03fb2d7b068f4752fda7cb5783d3c08dd0adb63 Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 28 May 2017 00:03:16 +0200 Subject: [PATCH] Update 101 and usage docs --- website/assets/img/docs/pipeline.svg | 2 +- website/docs/usage/_spacy-101/_vocab-stringstore.jade | 4 +++- website/docs/usage/lightning-tour.jade | 2 ++ website/docs/usage/rule-based-matching.jade | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/website/assets/img/docs/pipeline.svg b/website/assets/img/docs/pipeline.svg index e42c2362f..2ff00d787 100644 --- a/website/assets/img/docs/pipeline.svg +++ b/website/assets/img/docs/pipeline.svg @@ -2,7 +2,7 @@ diff --git a/website/docs/usage/_spacy-101/_vocab-stringstore.jade b/website/docs/usage/_spacy-101/_vocab-stringstore.jade index 3f551c9e1..dd300b5b9 100644 --- a/website/docs/usage/_spacy-101/_vocab-stringstore.jade +++ b/website/docs/usage/_spacy-101/_vocab-stringstore.jade @@ -89,4 +89,6 @@ p p | Even though both #[code Doc] objects contain the same words, the internal - | integer IDs are very different. + | integer IDs are very different. The same applies for all other strings, + | like the annotation scheme. To avoid mismatched IDs, spaCy will always + | export the vocab if you save a #[code Doc] or #[code nlp] object. diff --git a/website/docs/usage/lightning-tour.jade b/website/docs/usage/lightning-tour.jade index 7de486070..8cf651be0 100644 --- a/website/docs/usage/lightning-tour.jade +++ b/website/docs/usage/lightning-tour.jade @@ -139,6 +139,8 @@ p new_doc = Doc(Vocab()).from_disk('/moby_dick.bin') +infobox + | #[strong API:] #[+api("language") #[code Language]], + | #[+api("doc") #[code Doc]] | #[strong Usage:] #[+a("/docs/usage/saving-loading") Saving and loading] +h(2, "rule-matcher") Match text with token rules diff --git a/website/docs/usage/rule-based-matching.jade b/website/docs/usage/rule-based-matching.jade index fde6da6ef..1fd398ad9 100644 --- a/website/docs/usage/rule-based-matching.jade +++ b/website/docs/usage/rule-based-matching.jade @@ -345,7 +345,7 @@ p | account and check the #[code subtree] for intensifiers like "very", to | increase the sentiment score. At some point, you might also want to train | a sentiment model. However, the approach described in this example is - | very useful for #[strong bootstrapping rules to gather training data]. + | very useful for #[strong bootstrapping rules to collect training data]. | It's also an incredibly fast way to gather first insights into your data | – with about 1 million tweets, you'd be looking at a processing time of | #[strong under 1 minute].