From db2dbc8e59d1a61a0d16fea371925e4667c8dec9 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 14 Aug 2020 14:58:03 +0200
Subject: [PATCH 1/6] Remove unused warning

---
 spacy/errors.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index c4eb4af28..26c0dba29 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -55,12 +55,6 @@ class Warnings:
             "loaded. (Shape: {shape})")
     W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be "
             "incorrect. Modify PhraseMatcher._terminal_hash to fix.")
-    W022 = ("Training a new part-of-speech tagger using a model with no "
-            "lemmatization rules or data. This means that the trained model "
-            "may not be able to lemmatize correctly. If this is intentional "
-            "or the language you're using doesn't have lemmatization data, "
-            "you can ignore this warning. If this is surprising, make sure you "
-            "have the spacy-lookups-data package installed.")
     W024 = ("Entity '{entity}' - Alias '{alias}' combination already exists in "
             "the Knowledge Base.")
     W026 = ("Unable to set all sentence boundaries from dependency parses.")

From cef97e4b6352be205e68a9c7f0d10b4c44151b88 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 14 Aug 2020 14:58:18 +0200
Subject: [PATCH 2/6] Fix path check

---
 spacy/gold/corpus.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/gold/corpus.py b/spacy/gold/corpus.py
index 745d52e0e..774c3b840 100644
--- a/spacy/gold/corpus.py
+++ b/spacy/gold/corpus.py
@@ -62,7 +62,7 @@ class Corpus:
             if str(path) in seen:
                 continue
             seen.add(str(path))
-            if path.parts[-1].startswith("."):
+            if path.parts and path.parts[-1].startswith("."):
                 continue
             elif path.is_dir():
                 paths.extend(path.iterdir())

From e4d0990857bb813a55e9f95de6c77c1026460006 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 14 Aug 2020 14:58:48 +0200
Subject: [PATCH 3/6] Only receive from listener if listener exists

---
 spacy/pipeline/tok2vec.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py
index 44cd457e4..f2d138cf7 100644
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@@ -193,7 +193,8 @@ class Tok2Vec(Pipe):
         batch_id = Tok2VecListener.get_batch_id(docs)
         for listener in self.listeners[:-1]:
             listener.receive(batch_id, tokvecs, accumulate_gradient)
-        self.listeners[-1].receive(batch_id, tokvecs, backprop)
+        if self.listeners:
+            self.listeners[-1].receive(batch_id, tokvecs, backprop)
         if set_annotations:
             self.set_annotations(docs, tokvecs)
         return losses

From ab1d165bba1ea77c723295cfff3c11772c057218 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 14 Aug 2020 14:59:22 +0200
Subject: [PATCH 4/6] Pass optimizer defined in config to resume/begin_training

Otherwise, this would create a default optimizer, which isn't what we want?
---
 spacy/cli/train.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index fbdb91ab9..c69fb92ea 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -102,9 +102,9 @@ def train(
     if resume_components:
         with nlp.select_pipes(enable=resume_components):
             msg.info(f"Resuming training for: {resume_components}")
-            nlp.resume_training()
+            nlp.resume_training(sgd=optimizer)
     with nlp.select_pipes(disable=[*frozen_components, *resume_components]):
-        nlp.begin_training(lambda: train_corpus(nlp))
+        nlp.begin_training(lambda: train_corpus(nlp), sgd=optimizer)
 
     if tag_map:
         # Replace tag map with provided mapping

From 37814b608d57fc9982d8dc84812a8688a01360c1 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 14 Aug 2020 14:59:54 +0200
Subject: [PATCH 5/6] Remove env_opt and simplfy default Optimizer

---
 spacy/cli/evaluate.py |  1 -
 spacy/cli/train.py    |  1 -
 spacy/util.py         | 46 +------------------------------------------
 3 files changed, 1 insertion(+), 47 deletions(-)

diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index cf77fecfd..cf8f513fc 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -60,7 +60,6 @@ def evaluate(
     fix_random_seed()
     if use_gpu >= 0:
         require_gpu(use_gpu)
-    util.set_env_log(False)
     data_path = util.ensure_path(data_path)
     output_path = util.ensure_path(output)
     displacy_path = util.ensure_path(displacy_path)
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index c69fb92ea..f04387b30 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -48,7 +48,6 @@ def train_cli(
     used to register custom functions and architectures that can then be
     referenced in the config.
     """
-    util.set_env_log(verbose)
     verify_cli_args(config_path, output_path)
     overrides = parse_config_overrides(ctx.args)
     import_code(code_path)
diff --git a/spacy/util.py b/spacy/util.py
index d10f83789..1e7703613 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -54,7 +54,6 @@ if TYPE_CHECKING:
     from .vocab import Vocab  # noqa: F401
 
 
-_PRINT_ENV = False
 OOV_RANK = numpy.iinfo(numpy.uint64).max
 LEXEME_NORM_LANGS = ["da", "de", "el", "en", "id", "lb", "pt", "ru", "sr", "ta", "th"]
 
@@ -109,11 +108,6 @@ class SimpleFrozenDict(dict):
         raise NotImplementedError(self.error)
 
 
-def set_env_log(value: bool) -> None:
-    global _PRINT_ENV
-    _PRINT_ENV = value
-
-
 def lang_class_is_loaded(lang: str) -> bool:
     """Check whether a Language class is already loaded. Language classes are
     loaded lazily, to avoid expensive setup code associated with the language
@@ -602,27 +596,6 @@ def get_async(stream, numpy_array):
         return array
 
 
-def env_opt(name: str, default: Optional[Any] = None) -> Optional[Any]:
-    if type(default) is float:
-        type_convert = float
-    else:
-        type_convert = int
-    if "SPACY_" + name.upper() in os.environ:
-        value = type_convert(os.environ["SPACY_" + name.upper()])
-        if _PRINT_ENV:
-            print(name, "=", repr(value), "via", "$SPACY_" + name.upper())
-        return value
-    elif name in os.environ:
-        value = type_convert(os.environ[name])
-        if _PRINT_ENV:
-            print(name, "=", repr(value), "via", "$" + name)
-        return value
-    else:
-        if _PRINT_ENV:
-            print(name, "=", repr(default), "by default")
-        return default
-
-
 def read_regex(path: Union[str, Path]) -> Pattern:
     path = ensure_path(path)
     with path.open(encoding="utf8") as file_:
@@ -1067,24 +1040,7 @@ class DummyTokenizer:
 
 
 def create_default_optimizer() -> Optimizer:
-    # TODO: Do we still want to allow env_opt?
-    learn_rate = env_opt("learn_rate", 0.001)
-    beta1 = env_opt("optimizer_B1", 0.9)
-    beta2 = env_opt("optimizer_B2", 0.999)
-    eps = env_opt("optimizer_eps", 1e-8)
-    L2 = env_opt("L2_penalty", 1e-6)
-    grad_clip = env_opt("grad_norm_clip", 10.0)
-    L2_is_weight_decay = env_opt("L2_is_weight_decay", False)
-    optimizer = Adam(
-        learn_rate,
-        L2=L2,
-        beta1=beta1,
-        beta2=beta2,
-        eps=eps,
-        grad_clip=grad_clip,
-        L2_is_weight_decay=L2_is_weight_decay,
-    )
-    return optimizer
+    return Adam()
 
 
 def minibatch(items, size):

From 8128e5eb354d534692d13c29c47fce3a764cde84 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 14 Aug 2020 15:00:52 +0200
Subject: [PATCH 6/6] Replace lexeme_norm warning with logging

---
 spacy/__init__.py                             |  2 +-
 spacy/cli/train.py                            |  3 ++-
 spacy/pipeline/transition_parser.pyx          |  2 +-
 spacy/tests/parser/test_ner.py                | 15 +++++++++------
 spacy/tests/regression/test_issue2001-2500.py |  2 --
 spacy/tests/regression/test_issue2501-3000.py |  1 -
 spacy/tests/regression/test_issue3001-3500.py |  1 -
 spacy/tests/regression/test_issue3501-4000.py |  4 ----
 spacy/tests/regression/test_issue4001-4500.py |  4 ----
 spacy/tests/regression/test_issue4501-5000.py |  2 --
 spacy/tests/regression/test_issue5152.py      |  9 +++++----
 spacy/tests/test_gold.py                      |  6 ++++--
 spacy/tests/test_new_example.py               |  5 +++--
 spacy/util.py                                 |  5 +++++
 14 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/spacy/__init__.py b/spacy/__init__.py
index 73e828936..d07ee5674 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -14,7 +14,7 @@ from . import pipeline  # noqa: F401
 from .cli.info import info  # noqa: F401
 from .glossary import explain  # noqa: F401
 from .about import __version__  # noqa: F401
-from .util import registry  # noqa: F401
+from .util import registry, logger  # noqa: F401
 
 from .errors import Errors
 from .language import Language
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index f04387b30..f2085ff80 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -9,6 +9,7 @@ from thinc.api import use_pytorch_for_gpu_memory, require_gpu, fix_random_seed
 from thinc.api import Config, Optimizer
 import random
 import typer
+import logging
 
 from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
 from ._util import import_code, get_sourced_components
@@ -17,7 +18,6 @@ from .. import util
 from ..gold.example import Example
 from ..errors import Errors
 
-
 # Don't remove - required to load the built-in architectures
 from ..ml import models  # noqa: F401
 
@@ -48,6 +48,7 @@ def train_cli(
     used to register custom functions and architectures that can then be
     referenced in the config.
     """
+    util.logger.setLevel(logging.DEBUG if verbose else logging.ERROR)
     verify_cli_args(config_path, output_path)
     overrides = parse_config_overrides(ctx.args)
     import_code(code_path)
diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx
index 443f7f6a0..2eadfa6aa 100644
--- a/spacy/pipeline/transition_parser.pyx
+++ b/spacy/pipeline/transition_parser.pyx
@@ -409,7 +409,7 @@ cdef class Parser(Pipe):
         lexeme_norms = self.vocab.lookups.get_table("lexeme_norm", {})
         if len(lexeme_norms) == 0 and self.vocab.lang in util.LEXEME_NORM_LANGS:
             langs = ", ".join(util.LEXEME_NORM_LANGS)
-            warnings.warn(Warnings.W033.format(model="parser or NER", langs=langs))
+            util.logger.debug(Warnings.W033.format(model="parser or NER", langs=langs))
         actions = self.moves.get_actions(
             examples=get_examples(),
             min_freq=self.cfg['min_action_freq'],
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index 0ffe74273..c7a1ed0d2 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -1,17 +1,17 @@
 import pytest
-
 from spacy import util
 from spacy.lang.en import English
-
 from spacy.language import Language
 from spacy.lookups import Lookups
 from spacy.pipeline._parser_internals.ner import BiluoPushDown
 from spacy.gold import Example
 from spacy.tokens import Doc
 from spacy.vocab import Vocab
+import logging
 
 from ..util import make_tempdir
 
+
 TRAIN_DATA = [
     ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
     ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
@@ -56,6 +56,7 @@ def test_get_oracle_moves(tsys, doc, entity_annots):
     assert names == ["U-PERSON", "O", "O", "B-GPE", "L-GPE", "O"]
 
 
+@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_get_oracle_moves_negative_entities(tsys, doc, entity_annots):
     entity_annots = [(s, e, "!" + label) for s, e, label in entity_annots]
     example = Example.from_dict(doc, {"entities": entity_annots})
@@ -332,19 +333,21 @@ def test_overfitting_IO():
         assert ents2[0].label_ == "LOC"
 
 
-def test_ner_warns_no_lookups():
+def test_ner_warns_no_lookups(caplog):
     nlp = English()
     assert nlp.lang in util.LEXEME_NORM_LANGS
     nlp.vocab.lookups = Lookups()
     assert not len(nlp.vocab.lookups)
     nlp.add_pipe("ner")
-    with pytest.warns(UserWarning):
+    with caplog.at_level(logging.DEBUG):
         nlp.begin_training()
+        assert "W033" in caplog.text
+    caplog.clear()
     nlp.vocab.lookups.add_table("lexeme_norm")
     nlp.vocab.lookups.get_table("lexeme_norm")["a"] = "A"
-    with pytest.warns(None) as record:
+    with caplog.at_level(logging.DEBUG):
         nlp.begin_training()
-        assert not record.list
+        assert "W033" not in caplog.text
 
 
 @Language.factory("blocker")
diff --git a/spacy/tests/regression/test_issue2001-2500.py b/spacy/tests/regression/test_issue2001-2500.py
index a09c6f4fb..259ca9b0c 100644
--- a/spacy/tests/regression/test_issue2001-2500.py
+++ b/spacy/tests/regression/test_issue2001-2500.py
@@ -25,7 +25,6 @@ def test_issue2070():
     assert len(doc) == 11
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue2179():
     """Test that spurious 'extra_labels' aren't created when initializing NER."""
     nlp = Italian()
@@ -135,7 +134,6 @@ def test_issue2464(en_vocab):
     assert len(matches) == 3
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue2482():
     """Test we can serialize and deserialize a blank NER or parser model."""
     nlp = Italian()
diff --git a/spacy/tests/regression/test_issue2501-3000.py b/spacy/tests/regression/test_issue2501-3000.py
index 2b0f9f427..3882df0a6 100644
--- a/spacy/tests/regression/test_issue2501-3000.py
+++ b/spacy/tests/regression/test_issue2501-3000.py
@@ -136,7 +136,6 @@ def test_issue2782(text, lang_cls):
     assert doc[0].like_num
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue2800():
     """Test issue that arises when too many labels are added to NER model.
     Used to cause segfault.
diff --git a/spacy/tests/regression/test_issue3001-3500.py b/spacy/tests/regression/test_issue3001-3500.py
index 98a6b9aa0..3059eb5ab 100644
--- a/spacy/tests/regression/test_issue3001-3500.py
+++ b/spacy/tests/regression/test_issue3001-3500.py
@@ -90,7 +90,6 @@ def test_issue3199():
     assert list(doc[0:3].noun_chunks) == []
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue3209():
     """Test issue that occurred in spaCy nightly where NER labels were being
     mapped to classes incorrectly after loading the model, when the labels
diff --git a/spacy/tests/regression/test_issue3501-4000.py b/spacy/tests/regression/test_issue3501-4000.py
index de554a5ec..fc2a3ed7c 100644
--- a/spacy/tests/regression/test_issue3501-4000.py
+++ b/spacy/tests/regression/test_issue3501-4000.py
@@ -91,7 +91,6 @@ def test_issue_3526_3(en_vocab):
         assert new_ruler.overwrite is not ruler.overwrite
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue_3526_4(en_vocab):
     nlp = Language(vocab=en_vocab)
     patterns = [{"label": "ORG", "pattern": "Apple"}]
@@ -252,7 +251,6 @@ def test_issue3803():
     assert [t.like_num for t in doc] == [True, True, True, True, True, True]
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue3830_no_subtok():
     """Test that the parser doesn't have subtok label if not learn_tokens"""
     config = {
@@ -270,7 +268,6 @@ def test_issue3830_no_subtok():
     assert "subtok" not in parser.labels
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue3830_with_subtok():
     """Test that the parser does have subtok label if learn_tokens=True."""
     config = {
@@ -333,7 +330,6 @@ def test_issue3879(en_vocab):
     assert len(matcher(doc)) == 2  # fails because of a FP match 'is a test'
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue3880():
     """Test that `nlp.pipe()` works when an empty string ends the batch.
 
diff --git a/spacy/tests/regression/test_issue4001-4500.py b/spacy/tests/regression/test_issue4001-4500.py
index 2fae3484b..1789973e9 100644
--- a/spacy/tests/regression/test_issue4001-4500.py
+++ b/spacy/tests/regression/test_issue4001-4500.py
@@ -81,7 +81,6 @@ def test_issue4030():
     assert doc.cats["inoffensive"] == 0.0
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue4042():
     """Test that serialization of an EntityRuler before NER works fine."""
     nlp = English()
@@ -110,7 +109,6 @@ def test_issue4042():
         assert doc2.ents[0].label_ == "MY_ORG"
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue4042_bug2():
     """
     Test that serialization of an NER works fine when new labels were added.
@@ -242,7 +240,6 @@ def test_issue4190():
     assert result_1b == result_2
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue4267():
     """ Test that running an entity_ruler after ner gives consistent results"""
     nlp = English()
@@ -324,7 +321,6 @@ def test_issue4313():
                 entity_scores[(start, end, label)] += score
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue4348():
     """Test that training the tagger with empty data, doesn't throw errors"""
     nlp = English()
diff --git a/spacy/tests/regression/test_issue4501-5000.py b/spacy/tests/regression/test_issue4501-5000.py
index 96d4e1ca4..1e655851f 100644
--- a/spacy/tests/regression/test_issue4501-5000.py
+++ b/spacy/tests/regression/test_issue4501-5000.py
@@ -179,7 +179,6 @@ def test_issue4707():
     assert "entity_ruler" in new_nlp.pipe_names
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue4725_1():
     """ Ensure the pickling of the NER goes well"""
     vocab = Vocab(vectors_name="test_vocab_add_vector")
@@ -198,7 +197,6 @@ def test_issue4725_1():
             assert ner2.cfg["update_with_oracle_cut_size"] == 111
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue4725_2():
     # ensures that this runs correctly and doesn't hang or crash because of the global vectors
     # if it does crash, it's usually because of calling 'spawn' for multiprocessing (e.g. on Windows),
diff --git a/spacy/tests/regression/test_issue5152.py b/spacy/tests/regression/test_issue5152.py
index 3c1cee5c3..c7a70a99c 100644
--- a/spacy/tests/regression/test_issue5152.py
+++ b/spacy/tests/regression/test_issue5152.py
@@ -1,8 +1,7 @@
-import pytest
 from spacy.lang.en import English
+import pytest
 
 
-@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_issue5152():
     # Test that the comparison between a Span and a Token, goes well
     # There was a bug when the number of tokens in the span equaled the number of characters in the token (!)
@@ -14,6 +13,8 @@ def test_issue5152():
     span_2 = text[0:3]  # Talk about being
     span_3 = text_var[0:3]  # Talk of being
     token = y[0]  # Let
-    assert span.similarity(token) == 0.0
+    with pytest.warns(UserWarning):
+        assert span.similarity(token) == 0.0
     assert span.similarity(span_2) == 1.0
-    assert span_2.similarity(span_3) < 1.0
+    with pytest.warns(UserWarning):
+        assert span_2.similarity(span_3) < 1.0
diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_gold.py
index 708c57837..334d9fc24 100644
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_gold.py
@@ -154,6 +154,7 @@ def test_example_from_dict_some_ner(en_vocab):
     assert ner_tags == ["U-LOC", None, None, None]
 
 
+@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_json2docs_no_ner(en_vocab):
     data = [
         {
@@ -506,6 +507,7 @@ def test_roundtrip_docs_to_docbin(doc):
     assert cats["BAKING"] == reloaded_example.reference.cats["BAKING"]
 
 
+@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_make_orth_variants(doc):
     nlp = English()
     with make_tempdir() as tmpdir:
@@ -586,7 +588,7 @@ def test_tuple_format_implicit():
         ("Uber blew through $1 million a week", {"entities": [(0, 4, "ORG")]}),
         (
             "Spotify steps up Asia expansion",
-            {"entities": [(0, 8, "ORG"), (17, 21, "LOC")]},
+            {"entities": [(0, 7, "ORG"), (17, 21, "LOC")]},
         ),
         ("Google rebrands its business apps", {"entities": [(0, 6, "ORG")]}),
     ]
@@ -601,7 +603,7 @@ def test_tuple_format_implicit_invalid():
         ("Uber blew through $1 million a week", {"frumble": [(0, 4, "ORG")]}),
         (
             "Spotify steps up Asia expansion",
-            {"entities": [(0, 8, "ORG"), (17, 21, "LOC")]},
+            {"entities": [(0, 7, "ORG"), (17, 21, "LOC")]},
         ),
         ("Google rebrands its business apps", {"entities": [(0, 6, "ORG")]}),
     ]
diff --git a/spacy/tests/test_new_example.py b/spacy/tests/test_new_example.py
index df6489aa8..321eaae95 100644
--- a/spacy/tests/test_new_example.py
+++ b/spacy/tests/test_new_example.py
@@ -46,6 +46,7 @@ def test_Example_from_dict_with_tags(pred_words, annots):
     assert aligned_tags == ["NN" for _ in predicted]
 
 
+@pytest.mark.filterwarnings("ignore::UserWarning")
 def test_aligned_tags():
     pred_words = ["Apply", "some", "sunscreen", "unless", "you", "can", "not"]
     gold_words = ["Apply", "some", "sun", "screen", "unless", "you", "cannot"]
@@ -198,8 +199,8 @@ def test_Example_from_dict_with_entities(annots):
 def test_Example_from_dict_with_entities_invalid(annots):
     vocab = Vocab()
     predicted = Doc(vocab, words=annots["words"])
-    example = Example.from_dict(predicted, annots)
-    # TODO: shouldn't this throw some sort of warning ?
+    with pytest.warns(UserWarning):
+        example = Example.from_dict(predicted, annots)
     assert len(list(example.reference.ents)) == 0
 
 
diff --git a/spacy/util.py b/spacy/util.py
index 1e7703613..09b117952 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -24,6 +24,7 @@ import tempfile
 import shutil
 import shlex
 import inspect
+import logging
 
 try:
     import cupy.random
@@ -58,6 +59,10 @@ OOV_RANK = numpy.iinfo(numpy.uint64).max
 LEXEME_NORM_LANGS = ["da", "de", "el", "en", "id", "lb", "pt", "ru", "sr", "ta", "th"]
 
 
+logging.basicConfig()
+logger = logging.getLogger("spacy")
+
+
 class registry(thinc.registry):
     languages = catalogue.create("spacy", "languages", entry_points=True)
     architectures = catalogue.create("spacy", "architectures", entry_points=True)