Fix typos, wording and formatting

2025-07-15 10:42:34 +03:00 · 2017-05-28 01:30:12 +02:00 · 2017-05-28 01:30:12 +02:00 · 10d05c2b92
commit 10d05c2b92
parent eb5a8be9ad
4 changed files with 49 additions and 50 deletions
--- a/website/docs/usage/_spacy-101/_similarity.jade
+++ b/website/docs/usage/_spacy-101/_similarity.jade
@ -5,7 +5,7 @@ p
    |  #[strong how similar they are]. Predicting similarity is useful for
    |  building recommendation systems or flagging duplicates. For example, you
    |  can suggest a user content that's similar to what they're currently
-    |  looking at, or label a support ticket as a duplicate, if it's very
+    |  looking at, or label a support ticket as a duplicate if it's very
    |  similar to an already existing one.

 p
--- a/website/docs/usage/language-processing-pipeline.jade
+++ b/website/docs/usage/language-processing-pipeline.jade
@ -144,7 +144,7 @@ p
 +table(["Argument", "Type", "Description"])
    +row
        +cell #[code vocab]
-        +cell #[coce Vocab]
+        +cell #[code Vocab]
        +cell
            |  Shared data between components, including strings, morphology,
            |  vectors etc.
--- a/website/docs/usage/spacy-101.jade
+++ b/website/docs/usage/spacy-101.jade
@ -65,7 +65,7 @@ p
    |  spaCy provides a variety of linguistic annotations to give you insights
    |  into a text's grammatical structure. This includes the word types,
    |  i.e. the parts of speech, and how the words are related to each other.
-    |  For example, if you're analysing text, it makes a #[em huge] difference
+    |  For example, if you're analysing text, it makes a huge difference
    |  whether a noun is the subject of a sentence, or the object – or whether
    |  "google" is used as a verb, or refers to the website or company in a
    |  specific context.
@ -119,9 +119,11 @@ include _spacy-101/_named-entities

 +infobox
    |  To learn more about entity recognition in spaCy, how to
-    |  #[strong add your own entities] to a document and how to train and update
-    |  the entity predictions of a model, see the usage guide on
-    |  #[+a("/docs/usage/entity-recognition") named entity recognition].
+    |  #[strong add your own entities] to a document and how to
+    |  #[strong train and update] the entity predictions of a model, see the
+    |  usage guides on
+    |  #[+a("/docs/usage/entity-recognition") named entity recognition] and
+    |  #[+a("/docs/usage/training-ner") training the named entity recognizer].

 +h(2, "vectors-similarity") Word vectors and similarity
    +tag-model("vectors")
--- a/website/docs/usage/v2.jade
+++ b/website/docs/usage/v2.jade
@ -20,19 +20,18 @@ p
    nlp = Language(pipeline=['my_factory', mycomponent])

 p
-    |  It's now much easier to customise the pipeline with your own components.
-    |  Components are functions that receive a #[code Doc] object, modify and
-    |  return it. If your component is stateful, you'll want to create a new one
-    |  for each pipeline. You can do that by defining and registering a factory
-    |  which receives the shared #[code Vocab] object and returns a component.
-
-p
-    |  spaCy's default components – the vectorizer, tagger, parser and entity
-    |  recognizer, can be added to your pipeline by using their string IDs.
-    |  This way, you won't have to worry about finding and implementing them –
-    |  to use the default tagger, simply add #[code "tagger"] to the pipeline,
+    |  It's now much easier to #[strong customise the pipeline] with your own
+    |  components, functions that receive a #[code Doc] object, modify and
+    |  return it. If your component is stateful, you can define and register a
+    |  factory which receives the shared #[code Vocab] object and returns a
+    |  component. spaCy's default components can be added to your pipeline by
+    |  using their string IDs. This way, you won't have to worry about finding
+    |  and implementing them – simply add #[code "tagger"] to the pipeline,
    |  and spaCy will know what to do.

+image
+    include ../../assets/img/docs/pipeline.svg
+
 +infobox
    |  #[strong API:] #[+api("language") #[code Language]]
    |  #[strong Usage:] #[+a("/docs/usage/language-processing-pipeline") Processing text]
@ -96,11 +95,10 @@ p
    |  #[code Language] class, or load a model that initialises one. This allows
    |  languages to contain more custom data, e.g. lemmatizer lookup tables, or
    |  complex regular expressions. The language data has also been tidied up
-    |  and simplified. It's now also possible to overwrite the functions that
-    |  compute lexical attributes like #[code like_num], and supply
-    |  language-specific syntax iterators, e.g. to determine noun chunks. spaCy
-    |  now also supports simple lookup-based lemmatization. The data is stored
-    |  in a dictionary mapping a string to its lemma.
+    |  and simplified. spaCy now also supports simple lookup-based lemmatization.
+
+image
+    include ../../assets/img/docs/language_data.svg

 +infobox
    |  #[strong API:] #[+api("language") #[code Language]]
@ -111,13 +109,10 @@ p

 +aside-code("Example").
    from spacy.matcher import Matcher
-    from spacy.attrs import LOWER, IS_PUNCT
    matcher = Matcher(nlp.vocab)
-    matcher.add('HelloWorld', None,
-                [{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}],
-                [{LOWER: 'hello'}, {LOWER: 'world'}])
+    matcher.add('HEARTS', None, [{'ORTH': '❤️', 'OP': '+'}])
    assert len(matcher) == 1
-    assert 'HelloWorld' in matcher
+    assert 'HEARTS' in matcher

 p
    |  Patterns can now be added to the matcher by calling
@ -157,28 +152,8 @@ p
        +cell #[+api("language#to_disk") #[code Language.to_disk]]

    +row
-        +cell #[code Tokenizer.load]
-        +cell
-            |  #[+api("tokenizer#from_disk") #[code Tokenizer.from_disk]]
-            |  #[+api("tokenizer#from_bytes") #[code Tokenizer.from_bytes]]
-
-    +row
-        +cell #[code Tagger.load]
-        +cell
-            |  #[+api("tagger#from_disk") #[code Tagger.from_disk]]
-            |  #[+api("tagger#from_bytes") #[code Tagger.from_bytes]]
-
-    +row
-        +cell #[code DependencyParser.load]
-        +cell
-            |  #[+api("dependencyparser#from_disk") #[code DependencyParser.from_disk]]
-            |  #[+api("dependencyparser#from_bytes") #[code DependencyParser.from_bytes]]
-
-    +row
-        +cell #[code EntityRecognizer.load]
-        +cell
-            |  #[+api("entityrecognizer#from_disk") #[code EntityRecognizer.from_disk]]
-            |  #[+api("entityrecognizer#from_bytes") #[code EntityRecognizer.from_bytes]]
+        +cell #[code Language.create_make_doc]
+        +cell #[+api("language#attributes") #[code Language.tokenizer]]

    +row
        +cell
@ -212,6 +187,28 @@ p
            |  #[+api("stringstore#to_disk") #[code StringStore.to_disk]]
            |  #[+api("stringstore#to_bytes") #[code StringStore.to_bytes]]

+    +row
+        +cell #[code Tokenizer.load]
+        +cell -
+
+    +row
+        +cell #[code Tagger.load]
+        +cell
+            |  #[+api("tagger#from_disk") #[code Tagger.from_disk]]
+            |  #[+api("tagger#from_bytes") #[code Tagger.from_bytes]]
+
+    +row
+        +cell #[code DependencyParser.load]
+        +cell
+            |  #[+api("dependencyparser#from_disk") #[code DependencyParser.from_disk]]
+            |  #[+api("dependencyparser#from_bytes") #[code DependencyParser.from_bytes]]
+
+    +row
+        +cell #[code EntityRecognizer.load]
+        +cell
+            |  #[+api("entityrecognizer#from_disk") #[code EntityRecognizer.from_disk]]
+            |  #[+api("entityrecognizer#from_bytes") #[code EntityRecognizer.from_bytes]]
+
    +row
        +cell #[code Matcher.load]
        +cell -
@ -232,7 +229,7 @@ p

    +row
        +cell #[code Doc.read_bytes]
-        +cell
+        +cell #[+api("binder") #[code Binder]]

    +row
        +cell #[code Token.is_ancestor_of]