Port over docs changes [ci skip]

2025-08-03 20:00:21 +03:00 · 2018-03-24 17:12:48 +01:00 · 2018-03-24 17:12:48 +01:00 · 53680642af
commit 53680642af
parent 74cc6bb06a
13 changed files with 67 additions and 21 deletions
--- a/website/api/_top-level/_displacy.jade
+++ b/website/api/_top-level/_displacy.jade
@ -208,7 +208,7 @@ p
    +row
        +cell #[code word_spacing]
        +cell int
-        +cell Horizontal spacing between words and arcs in px.
+        +cell Vertical spacing between words and arcs in px.
        +cell #[code 45]

    +row
--- a/website/api/doc.jade
+++ b/website/api/doc.jade
@ -674,7 +674,7 @@ p
    |  token vectors.

 +aside-code("Example").
-    apples = nlp(u'I like apples')
+    doc = nlp(u'I like apples')
    assert doc.vector.dtype == 'float32'
    assert doc.vector.shape == (300,)

--- a/website/api/goldcorpus.jade
+++ b/website/api/goldcorpus.jade
@ -12,11 +12,24 @@ p Create a #[code GoldCorpus].

 +table(["Name", "Type", "Description"])
    +row
-        +cell #[code train_path]
-        +cell unicode or #[code Path]
-        +cell File or directory of training data.
+        +cell #[code train]
+        +cell unicode or #[code Path] or iterable
+        +cell
+            |  Training data, as a path (file or directory) or iterable. If an
+            |  iterable, each item should be a #[code (text, paragraphs)]
+            |  tuple, where each paragraph is a tuple
+            |  #[code.u-break (sentences, brackets)],and each sentence is a
+            |  tuple #[code.u-break (ids, words, tags, heads, ner)]. See the
+            |  implementation of
+            |  #[+src(gh("spacy", "spacy/gold.pyx")) #[code gold.read_json_file]]
+            |  for further details.

    +row
-        +cell #[code dev_path]
-        +cell unicode or #[code Path]
-        +cell File or directory of development data.
+        +cell #[code dev]
+        +cell unicode or #[code Path] or iterable
+        +cell Development data, as a path (file or directory) or iterable.
+
+    +row("foot")
+        +cell returns
+        +cell #[code GoldCorpus]
+        +cell The newly constructed object.
--- a/website/api/lexeme.jade
+++ b/website/api/lexeme.jade
@ -325,6 +325,12 @@ p The L2 norm of the lexeme's vector representation.
        +cell bool
        +cell Is the lexeme a quotation mark?

+    +row
+        +cell #[code is_currency]
+            +tag-new("2.0.8")
+        +cell bool
+        +cell Is the lexeme a currency symbol?
+
    +row
        +cell #[code like_url]
        +cell bool
--- a/website/api/matcher.jade
+++ b/website/api/matcher.jade
@ -111,6 +111,25 @@ p Match a stream of documents, yielding them in turn.
            |  parallel, if the #[code Matcher] implementation supports
            |  multi-threading.

+    +row
+        +cell #[code return_matches]
+            +tag-new(2.1)
+        +cell bool
+        +cell
+            |  Yield the match lists along with the docs, making results
+            |  #[code (doc, matches)] tuples.
+
+    +row
+        +cell #[code as_tuples]
+            +tag-new(2.1)
+        +cell bool
+        +cell
+            |  Interpret the input stream as #[code (doc, context)] tuples, and
+            |  yield #[code (result, context)] tuples out. If both
+            |  #[code return_matches] and #[code as_tuples] are #[code True],
+            |  the output will be a sequence of
+            |  #[code ((doc, matches), context)] tuples.
+
    +row("foot")
        +cell yields
        +cell #[code Doc]
--- a/website/api/pipe.jade
+++ b/website/api/pipe.jade
@ -209,7 +209,7 @@ p

    +row
        +cell #[code drop]
-        +cell int
+        +cell float
        +cell The dropout rate.

    +row
--- a/website/api/token.jade
+++ b/website/api/token.jade
@ -740,6 +740,12 @@ p The L2 norm of the token's vector representation.
        +cell bool
        +cell Is the token a quotation mark?

+    +row
+        +cell #[code is_currency]
+            +tag-new("2.0.8")
+        +cell bool
+        +cell Is the token a currency symbol?
+
    +row
        +cell #[code like_url]
        +cell bool
--- a/website/assets/img/social/preview_alpha.jpg
+++ b/website/assets/img/social/preview_alpha.jpg
--- a/website/models/_data.json
+++ b/website/models/_data.json
@ -76,13 +76,15 @@
    },

    "MODEL_LICENSES": {
-        "CC BY-SA":     "https://creativecommons.org/licenses/by-sa/3.0/",
-        "CC BY-SA 3.0": "https://creativecommons.org/licenses/by-sa/3.0/",
-        "CC BY-SA 4.0": "https://creativecommons.org/licenses/by-sa/4.0/",
-        "CC BY-NC":     "https://creativecommons.org/licenses/by-nc/3.0/",
-        "CC BY-NC 3.0": "https://creativecommons.org/licenses/by-nc/3.0/",
-        "GPL":          "https://www.gnu.org/licenses/gpl.html",
-        "LGPL":         "https://www.gnu.org/licenses/lgpl.html"
+        "CC BY 4.0":       "https://creativecommons.org/licenses/by/4.0/",
+        "CC BY-SA":        "https://creativecommons.org/licenses/by-sa/3.0/",
+        "CC BY-SA 3.0":    "https://creativecommons.org/licenses/by-sa/3.0/",
+        "CC BY-SA 4.0":    "https://creativecommons.org/licenses/by-sa/4.0/",
+        "CC BY-NC":        "https://creativecommons.org/licenses/by-nc/3.0/",
+        "CC BY-NC 3.0":    "https://creativecommons.org/licenses/by-nc/3.0/",
+        "CC-BY-NC-SA 3.0": "https://creativecommons.org/licenses/by-nc-sa/3.0/",
+        "GPL":             "https://www.gnu.org/licenses/gpl.html",
+        "LGPL":            "https://www.gnu.org/licenses/lgpl.html"
    },

    "MODEL_BENCHMARKS": {
--- a/website/usage/_processing-pipelines/_pipelines.jade
+++ b/website/usage/_processing-pipelines/_pipelines.jade
@ -40,7 +40,7 @@ p
    +item
        |  Make the #[strong model data] available to the #[code Language] class
        |  by calling #[+api("language#from_disk") #[code from_disk]] with the
-        |  path to the model data ditectory.
+        |  path to the model data directory.

 p
    |  So when you call this...
@ -53,7 +53,7 @@ p
    |  pipeline #[code.u-break ["tagger", "parser", "ner"]]. spaCy will then
    |  initialise #[code spacy.lang.en.English], and create each pipeline
    |  component and add it to the processing pipeline. It'll then load in the
-    |  model's data from its data ditectory and return the modified
+    |  model's data from its data directory and return the modified
    |  #[code Language] class for you to use as the #[code nlp] object.

 p
--- a/website/usage/_spacy-101/_similarity.jade
+++ b/website/usage/_spacy-101/_similarity.jade
@ -37,7 +37,7 @@ p
            +cell.u-text-label.u-color-theme=label
            for cell in cells
                +cell.u-text-center
-                    - var result = cell > 0.5 ? ["yes", "similar"] : cell != 1 ? ["no", "dissimilar"] : ["neutral", "identical"]
+                    - var result = cell < 0.5 ? ["no", "dissimilar"] : cell != 1 ? ["yes", "similar"] : ["neutral", "identical"]
                    |  #[code=cell.toFixed(2)] #[+procon(...result)]

 p
--- a/website/usage/_v2/_features.jade
+++ b/website/usage/_v2/_features.jade
@ -163,7 +163,7 @@ p
    nlp = English().from_disk('/path/to/nlp')

 p
-    |  spay's serialization API has been made consistent across classes and
+    |  spaCy's serialization API has been made consistent across classes and
    |  objects. All container classes, i.e. #[code Language], #[code Doc],
    |  #[code Vocab] and #[code StringStore] now have a #[code to_bytes()],
    |  #[code from_bytes()], #[code to_disk()] and #[code from_disk()] method
--- a/website/usage/spacy-101.jade
+++ b/website/usage/spacy-101.jade
@ -68,7 +68,7 @@ p
    +item #[strong spaCy is not research software].
        |  It's built on the latest research, but it's designed to get
        |  things done. This leads to fairly different design decisions than
-        |  #[+a("https://github./nltk/nltk") NLTK]
+        |  #[+a("https://github.com/nltk/nltk") NLTK]
        |  or #[+a("https://stanfordnlp.github.io/CoreNLP/") CoreNLP], which were
        |  created as platforms for teaching and research. The main difference
        |  is that spaCy is integrated and opinionated. spaCy tries to avoid asking