From 3a9710f35698a55822d80ded5d66239e6dcdba8a Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sun, 23 Apr 2017 15:57:53 +0200
Subject: [PATCH 1/2] Pass dev_scores to print_progress correctly (resolves
 #1008)

Only read scores attribute if command is used with dev_data, otherwise
default dev_scores to empty dict.
---
 spacy/cli/train.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 3900c7f39..8557019c6 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -62,10 +62,10 @@ def train_model(Language, train_data, dev_data, output_path, tagger_cfg, parser_
         for itn, epoch in enumerate(trainer.epochs(n_iter, augment_data=None)):
             for doc, gold in epoch:
                 trainer.update(doc, gold)
-            dev_scores = trainer.evaluate(dev_data) if dev_data else []
+            dev_scores = trainer.evaluate(dev_data).scores if dev_data else {}
             print_progress(itn, trainer.nlp.parser.model.nr_weight,
                            trainer.nlp.parser.model.nr_active_feat,
-                           **dev_scores.scores)
+                           **dev_scores)
 
 
 def evaluate(Language, gold_tuples, output_path):

From 2bfec1a4f8b0c3218b731084a1a1619c72d69967 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sun, 23 Apr 2017 15:58:38 +0200
Subject: [PATCH 2/2] Add note on languages with non-latin characters (see
 #996)

---
 website/docs/usage/adding-languages.jade | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/website/docs/usage/adding-languages.jade b/website/docs/usage/adding-languages.jade
index 0c98cc5ca..50b626b99 100644
--- a/website/docs/usage/adding-languages.jade
+++ b/website/docs/usage/adding-languages.jade
@@ -98,6 +98,17 @@ p
     |  so that Python functions can be used to help you generalise and combine
     |  the data as you require.
 
++infobox("For languages with non-latin characters")
+    |  In order for the tokenizer to split suffixes, prefixes and infixes, spaCy
+    |  needs to know the language's character set. If the language you're adding
+    |  uses non-latin characters, you might need to add the required character
+    |  classes to the global
+    |  #[+src(gh("spacy", "spacy/language_data/punctuation.py")) punctuation.py].
+    |  spaCy uses the #[+a("https://pypi.python.org/pypi/regex/") #[code regex] library]
+    |  to keep this simple and readable. If the language requires very specific
+    |  punctuation rules, you should consider overwriting the default regular
+    |  expressions with your own in the language's #[code Defaults].
+
 +h(3, "stop-words") Stop words
 
 p