From dfbed07d3b824aaa5e5619c6a67179cfffe244b3 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Sun, 24 Feb 2019 22:26:08 +0100
Subject: [PATCH 1/4] Remove unused temp errors

---
 spacy/errors.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 45f3fea76..2a501089d 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -342,13 +342,8 @@ class Errors(object):
 
 @add_codes
 class TempErrors(object):
-    T001 = ("Max length currently 10 for phrase matching")
-    T002 = ("Pattern length ({doc_len}) >= phrase_matcher.max_length "
-            "({max_len}). Length can be set on initialization, up to 10.")
     T003 = ("Resizing pre-trained Tagger models is not currently supported.")
     T004 = ("Currently parser depth is hard-coded to 1. Received: {value}.")
-    T005 = ("Currently history size is hard-coded to 0. Received: {value}.")
-    T006 = ("Currently history width is hard-coded to 0. Received: {value}.")
     T007 = ("Can't yet set {attr} from Span. Vote for this feature on the "
             "issue tracker: http://github.com/explosion/spaCy/issues")
     T008 = ("Bad configuration of Tagger. This is probably a bug within "

From 55bb570f51dff3966d1704558d646610a01b7887 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Mon, 25 Feb 2019 09:37:05 +0100
Subject: [PATCH 2/4] Add [ja] to extras_require

---
 setup.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/setup.py b/setup.py
index 3070985f1..34c92ad2b 100755
--- a/setup.py
+++ b/setup.py
@@ -244,6 +244,8 @@ def setup_package():
                 "cuda91": ["cupy-cuda91>=4.0"],
                 "cuda92": ["cupy-cuda92>=4.0"],
                 "cuda100": ["cupy-cuda100>=4.0"],
+                # Language tokenizers with external dependencies
+                "ja": ["mecab-python3==0.7"],
             },
             python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*",
             classifiers=[

From 1981b194cc579ae3e7561ee37c0b828e5934e4d1 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Mon, 25 Feb 2019 10:03:20 +0100
Subject: [PATCH 3/4] Fix recomputing of :target [ci skip]

Prevents additional history entry
---
 website/gatsby-browser.js | 1 -
 1 file changed, 1 deletion(-)

diff --git a/website/gatsby-browser.js b/website/gatsby-browser.js
index 25fedd4b4..3b570703d 100644
--- a/website/gatsby-browser.js
+++ b/website/gatsby-browser.js
@@ -42,7 +42,6 @@ export const onRouteUpdate = ({ location }) => {
                 // Navigate to targeted element
                 el.scrollIntoView()
                 // Force recomputing :target pseudo class with pushState/popState
-                window.location.hash = ''
                 window.location.hash = location.hash
             }
         }, 0)

From 1b6238101ae5c2623ae1411ffbd2d0cdcdad7a49 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Mon, 25 Feb 2019 10:03:43 +0100
Subject: [PATCH 4/4] Add table explaining training metrics [closes #2644]

---
 website/docs/usage/training.md | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/website/docs/usage/training.md b/website/docs/usage/training.md
index bc541e345..33bdf8266 100644
--- a/website/docs/usage/training.md
+++ b/website/docs/usage/training.md
@@ -39,6 +39,33 @@ mkdir models
 python -m spacy train es models ancora-json/es_ancora-ud-train.json ancora-json/es_ancora-ud-dev.json
 ```
 
+#### Understanding the training output
+
+When you train a model using the [`spacy train`](/api/cli#train) command, you'll
+see a table showing metrics after each pass over the data. Here's what those
+metrics means:
+
+> #### Tokenization metrics
+>
+> Note that if the development data has raw text, some of the gold-standard
+> entities might not align to the predicted tokenization. These tokenization
+> errors are **excluded from the NER evaluation**. If your tokenization makes it
+> impossible for the model to predict 50% of your entities, your NER F-score
+> might still look good.
+
+| Name       | Description                                                                                       |
+| ---------- | ------------------------------------------------------------------------------------------------- |
+| `Dep Loss` | Training loss for dependency parser. Should decrease, but usually not to 0.                       |
+| `NER Loss` | Training loss for named entity recognizer. Should decrease, but usually not to 0.                 |
+| `UAS`      | Unlabeled attachment score for parser. The percentage of unlabeled correct arcs. Should increase. |
+| `NER P.`   | NER precision on development data. Should increase.                                               |
+| `NER R.`   | NER recall on development data. Should increase.                                                  |
+| `NER F.`   | NER F-score on development data. Should increase.                                                 |
+| `Tag %`    | Fine-grained part-of-speech tag accuracy on development data. Should increase.                    |
+| `Token %`  | Tokenization accuracy on development data.                                                        |
+| `CPU WPS`  | Prediction speed on CPU in words per second, if available. Should stay stable.                    |
+| `GPU WPS`  | Prediction speed on GPU in words per second, if available. Should stay stable.                    |
+
 ### Improving accuracy with transfer learning {#transfer-learning new="2.1"}
 
 In most projects, you'll usually have a small amount of labelled data, and