Merge pull request #5367 from adrianeboyd/feature/simplify-warnings-v2

This commit is contained in:
Ines Montani 2020-04-29 12:55:37 +02:00 committed by GitHub
commit efec28ce70
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 80 additions and 131 deletions

View File

@ -13,7 +13,7 @@ from . import pipeline
from .cli.info import info as cli_info from .cli.info import info as cli_info
from .glossary import explain from .glossary import explain
from .about import __version__ from .about import __version__
from .errors import Errors, Warnings, deprecation_warning from .errors import Errors, Warnings
from . import util from . import util
from .util import registry from .util import registry
from .language import component from .language import component
@ -26,7 +26,7 @@ if sys.maxunicode == 65535:
def load(name, **overrides): def load(name, **overrides):
depr_path = overrides.get("path") depr_path = overrides.get("path")
if depr_path not in (True, False, None): if depr_path not in (True, False, None):
deprecation_warning(Warnings.W001.format(path=depr_path)) warnings.warn(Warnings.W001.format(path=depr_path), DeprecationWarning)
return util.load_model(name, **overrides) return util.load_model(name, **overrides)

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import numpy import numpy
import warnings
from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu
from thinc.t2t import ExtractWindow, ParametricAttention from thinc.t2t import ExtractWindow, ParametricAttention
from thinc.t2v import Pooling, sum_pool, mean_pool from thinc.t2v import Pooling, sum_pool, mean_pool
@ -22,7 +23,7 @@ from thinc.neural._classes.affine import _set_dimensions_if_needed
import thinc.extra.load_nlp import thinc.extra.load_nlp
from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE
from .errors import Errors, user_warning, Warnings from .errors import Errors, Warnings
from . import util from . import util
from . import ml as new_ml from . import ml as new_ml
from .ml import _legacy_tok2vec from .ml import _legacy_tok2vec
@ -283,7 +284,7 @@ def link_vectors_to_models(vocab):
if vectors.name is None: if vectors.name is None:
vectors.name = VECTORS_KEY vectors.name = VECTORS_KEY
if vectors.data.size != 0: if vectors.data.size != 0:
user_warning(Warnings.W020.format(shape=vectors.data.shape)) warnings.warn(Warnings.W020.format(shape=vectors.data.shape))
ops = Model.ops ops = Model.ops
for word in vocab: for word in vocab:
if word.orth in vectors.key2row: if word.orth in vectors.key2row:
@ -299,7 +300,7 @@ def link_vectors_to_models(vocab):
# This is a hack to avoid the problem in #3853. # This is a hack to avoid the problem in #3853.
old_name = vectors.name old_name = vectors.name
new_name = vectors.name + "_%d" % data.shape[0] new_name = vectors.name + "_%d" % data.shape[0]
user_warning(Warnings.W019.format(old=old_name, new=new_name)) warnings.warn(Warnings.W019.format(old=old_name, new=new_name))
vectors.name = new_name vectors.name = new_name
key = (ops.device, vectors.name) key = (ops.device, vectors.name)
thinc.extra.load_nlp.VECTORS[key] = data thinc.extra.load_nlp.VECTORS[key] = data

View File

@ -1,11 +1,13 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import warnings
from collections import OrderedDict from collections import OrderedDict
from wasabi import Printer from wasabi import Printer
from .tokens import Doc, Token, Span from .tokens import Doc, Token, Span
from .errors import Errors, Warnings, user_warning from .errors import Errors, Warnings
def analyze_pipes(pipeline, name, pipe, index, warn=True): def analyze_pipes(pipeline, name, pipe, index, warn=True):
@ -34,7 +36,7 @@ def analyze_pipes(pipeline, name, pipe, index, warn=True):
if not fulfilled: if not fulfilled:
problems.append(annot) problems.append(annot)
if warn: if warn:
user_warning(Warnings.W025.format(name=name, attr=annot)) warnings.warn(Warnings.W025.format(name=name, attr=annot))
return problems return problems

View File

@ -12,10 +12,11 @@ import tarfile
import gzip import gzip
import zipfile import zipfile
import srsly import srsly
import warnings
from wasabi import msg from wasabi import msg
from ..vectors import Vectors from ..vectors import Vectors
from ..errors import Errors, Warnings, user_warning from ..errors import Errors, Warnings
from ..util import ensure_path, get_lang_class, OOV_RANK from ..util import ensure_path, get_lang_class, OOV_RANK
try: try:
@ -246,7 +247,7 @@ def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50):
def read_clusters(clusters_loc): def read_clusters(clusters_loc):
clusters = {} clusters = {}
if ftfy is None: if ftfy is None:
user_warning(Warnings.W004) warnings.warn(Warnings.W004)
with clusters_loc.open() as f: with clusters_loc.open() as f:
for line in tqdm(f): for line in tqdm(f):
try: try:

View File

@ -7,10 +7,12 @@ USAGE: https://spacy.io/usage/visualizers
""" """
from __future__ import unicode_literals from __future__ import unicode_literals
import warnings
from .render import DependencyRenderer, EntityRenderer from .render import DependencyRenderer, EntityRenderer
from ..tokens import Doc, Span from ..tokens import Doc, Span
from ..compat import b_to_str from ..compat import b_to_str
from ..errors import Errors, Warnings, user_warning from ..errors import Errors, Warnings
from ..util import is_in_jupyter from ..util import is_in_jupyter
@ -89,7 +91,7 @@ def serve(
from wsgiref import simple_server from wsgiref import simple_server
if is_in_jupyter(): if is_in_jupyter():
user_warning(Warnings.W011) warnings.warn(Warnings.W011)
render(docs, style=style, page=page, minify=minify, options=options, manual=manual) render(docs, style=style, page=page, minify=minify, options=options, manual=manual)
httpd = simple_server.make_server(host, port, app) httpd = simple_server.make_server(host, port, app)
@ -119,7 +121,7 @@ def parse_deps(orig_doc, options={}):
""" """
doc = Doc(orig_doc.vocab).from_bytes(orig_doc.to_bytes(exclude=["user_data"])) doc = Doc(orig_doc.vocab).from_bytes(orig_doc.to_bytes(exclude=["user_data"]))
if not doc.is_parsed: if not doc.is_parsed:
user_warning(Warnings.W005) warnings.warn(Warnings.W005)
if options.get("collapse_phrases", False): if options.get("collapse_phrases", False):
with doc.retokenize() as retokenizer: with doc.retokenize() as retokenizer:
for np in list(doc.noun_chunks): for np in list(doc.noun_chunks):
@ -184,7 +186,7 @@ def parse_ents(doc, options={}):
for ent in doc.ents for ent in doc.ents
] ]
if not ents: if not ents:
user_warning(Warnings.W006) warnings.warn(Warnings.W006)
title = doc.user_data.get("title", None) if hasattr(doc, "user_data") else None title = doc.user_data.get("title", None) if hasattr(doc, "user_data") else None
settings = get_doc_settings(doc) settings = get_doc_settings(doc)
return {"text": doc.text, "ents": ents, "title": title, "settings": settings} return {"text": doc.text, "ents": ents, "title": title, "settings": settings}

View File

@ -1,11 +1,6 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import os
import warnings
import inspect
def add_codes(err_cls): def add_codes(err_cls):
"""Add error codes to string messages via class attribute names.""" """Add error codes to string messages via class attribute names."""
@ -93,8 +88,7 @@ class Warnings(object):
W022 = ("Training a new part-of-speech tagger using a model with no " W022 = ("Training a new part-of-speech tagger using a model with no "
"lemmatization rules or data. This means that the trained model " "lemmatization rules or data. This means that the trained model "
"may not be able to lemmatize correctly. If this is intentional " "may not be able to lemmatize correctly. If this is intentional "
"or the language you're using doesn't have lemmatization data, " "or the language you're using doesn't have lemmatization data. "
"you can ignore this warning by setting SPACY_WARNING_IGNORE=W022. "
"If this is surprising, make sure you have the spacy-lookups-data " "If this is surprising, make sure you have the spacy-lookups-data "
"package installed.") "package installed.")
W023 = ("Multiprocessing of Language.pipe is not supported in Python 2. " W023 = ("Multiprocessing of Language.pipe is not supported in Python 2. "
@ -595,64 +589,3 @@ class MatchPatternError(ValueError):
class AlignmentError(ValueError): class AlignmentError(ValueError):
pass pass
class ModelsWarning(UserWarning):
pass
WARNINGS = {
"user": UserWarning,
"deprecation": DeprecationWarning,
"models": ModelsWarning,
}
def _get_warn_types(arg):
if arg == "": # don't show any warnings
return []
if not arg or arg == "all": # show all available warnings
return WARNINGS.keys()
return [w_type.strip() for w_type in arg.split(",") if w_type.strip() in WARNINGS]
def _get_warn_excl(arg):
if not arg:
return []
return [w_id.strip() for w_id in arg.split(",")]
SPACY_WARNING_FILTER = os.environ.get("SPACY_WARNING_FILTER")
SPACY_WARNING_TYPES = _get_warn_types(os.environ.get("SPACY_WARNING_TYPES"))
SPACY_WARNING_IGNORE = _get_warn_excl(os.environ.get("SPACY_WARNING_IGNORE"))
def user_warning(message):
_warn(message, "user")
def deprecation_warning(message):
_warn(message, "deprecation")
def models_warning(message):
_warn(message, "models")
def _warn(message, warn_type="user"):
"""
message (unicode): The message to display.
category (Warning): The Warning to show.
"""
if message.startswith("["):
w_id = message.split("[", 1)[1].split("]", 1)[0] # get ID from string
else:
w_id = None
ignore_warning = w_id and w_id in SPACY_WARNING_IGNORE
if warn_type in SPACY_WARNING_TYPES and not ignore_warning:
category = WARNINGS[warn_type]
stack = inspect.stack()[-1]
with warnings.catch_warnings():
if SPACY_WARNING_FILTER:
warnings.simplefilter(SPACY_WARNING_FILTER, category)
warnings.warn_explicit(message, category, stack[1], stack[2])

View File

@ -10,10 +10,11 @@ import shutil
import itertools import itertools
from pathlib import Path from pathlib import Path
import srsly import srsly
import warnings
from .syntax import nonproj from .syntax import nonproj
from .tokens import Doc, Span from .tokens import Doc, Span
from .errors import Errors, AlignmentError, user_warning, Warnings from .errors import Errors, AlignmentError, Warnings
from .compat import path2str from .compat import path2str
from . import util from . import util
from .util import minibatch, itershuffle from .util import minibatch, itershuffle
@ -508,7 +509,7 @@ def _json_iterate(loc):
py_raw = file_.read() py_raw = file_.read()
cdef long file_length = len(py_raw) cdef long file_length = len(py_raw)
if file_length > 2 ** 30: if file_length > 2 ** 30:
user_warning(Warnings.W027.format(size=file_length)) warnings.warn(Warnings.W027.format(size=file_length))
raw = <char*>py_raw raw = <char*>py_raw
cdef int square_depth = 0 cdef int square_depth = 0
@ -690,7 +691,7 @@ cdef class GoldParse:
else: else:
words_offset -= 1 words_offset -= 1
if len(entities) != len(words): if len(entities) != len(words):
user_warning(Warnings.W029.format(text=doc.text)) warnings.warn(Warnings.W029.format(text=doc.text))
entities = ["-" for _ in words] entities = ["-" for _ in words]
# These are filled by the tagger/parser/entity recogniser # These are filled by the tagger/parser/entity recogniser

View File

@ -1,7 +1,9 @@
# cython: infer_types=True # cython: infer_types=True
# cython: profile=True # cython: profile=True
# coding: utf8 # coding: utf8
from spacy.errors import Errors, Warnings, user_warning import warnings
from spacy.errors import Errors, Warnings
from pathlib import Path from pathlib import Path
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
@ -115,7 +117,7 @@ cdef class KnowledgeBase:
# Return if this entity was added before # Return if this entity was added before
if entity_hash in self._entry_index: if entity_hash in self._entry_index:
user_warning(Warnings.W018.format(entity=entity)) warnings.warn(Warnings.W018.format(entity=entity))
return return
# Raise an error if the provided entity vector is not of the correct length # Raise an error if the provided entity vector is not of the correct length
@ -147,7 +149,7 @@ cdef class KnowledgeBase:
# only process this entity if its unique ID hadn't been added before # only process this entity if its unique ID hadn't been added before
entity_hash = self.vocab.strings.add(entity_list[i]) entity_hash = self.vocab.strings.add(entity_list[i])
if entity_hash in self._entry_index: if entity_hash in self._entry_index:
user_warning(Warnings.W018.format(entity=entity_list[i])) warnings.warn(Warnings.W018.format(entity=entity_list[i]))
else: else:
entity_vector = vector_list[i] entity_vector = vector_list[i]
@ -195,7 +197,7 @@ cdef class KnowledgeBase:
# Check whether this alias was added before # Check whether this alias was added before
if alias_hash in self._alias_index: if alias_hash in self._alias_index:
user_warning(Warnings.W017.format(alias=alias)) warnings.warn(Warnings.W017.format(alias=alias))
return return
cdef vector[int64_t] entry_indices cdef vector[int64_t] entry_indices
@ -252,7 +254,7 @@ cdef class KnowledgeBase:
if is_present: if is_present:
if not ignore_warnings: if not ignore_warnings:
user_warning(Warnings.W024.format(entity=entity, alias=alias)) warnings.warn(Warnings.W024.format(entity=entity, alias=alias))
else: else:
entry_indices.push_back(int(entry_index)) entry_indices.push_back(int(entry_index))
alias_entry.entry_indices = entry_indices alias_entry.entry_indices = entry_indices

View File

@ -3,6 +3,7 @@ from __future__ import absolute_import, unicode_literals
import random import random
import itertools import itertools
import warnings
from thinc.extra import load_nlp from thinc.extra import load_nlp
@ -34,7 +35,7 @@ from .lang.tokenizer_exceptions import TOKEN_MATCH
from .lang.tag_map import TAG_MAP from .lang.tag_map import TAG_MAP
from .tokens import Doc from .tokens import Doc
from .lang.lex_attrs import LEX_ATTRS, is_stop from .lang.lex_attrs import LEX_ATTRS, is_stop
from .errors import Errors, Warnings, deprecation_warning, user_warning from .errors import Errors, Warnings
from . import util from . import util
from . import about from . import about
@ -758,10 +759,10 @@ class Language(object):
DOCS: https://spacy.io/api/language#pipe DOCS: https://spacy.io/api/language#pipe
""" """
if is_python2 and n_process != 1: if is_python2 and n_process != 1:
user_warning(Warnings.W023) warnings.warn(Warnings.W023)
n_process = 1 n_process = 1
if n_threads != -1: if n_threads != -1:
deprecation_warning(Warnings.W016) warnings.warn(Warnings.W016, DeprecationWarning)
if n_process == -1: if n_process == -1:
n_process = mp.cpu_count() n_process = mp.cpu_count()
if as_tuples: if as_tuples:
@ -896,7 +897,7 @@ class Language(object):
DOCS: https://spacy.io/api/language#to_disk DOCS: https://spacy.io/api/language#to_disk
""" """
if disable is not None: if disable is not None:
deprecation_warning(Warnings.W014) warnings.warn(Warnings.W014, DeprecationWarning)
exclude = disable exclude = disable
path = util.ensure_path(path) path = util.ensure_path(path)
serializers = OrderedDict() serializers = OrderedDict()
@ -929,7 +930,7 @@ class Language(object):
DOCS: https://spacy.io/api/language#from_disk DOCS: https://spacy.io/api/language#from_disk
""" """
if disable is not None: if disable is not None:
deprecation_warning(Warnings.W014) warnings.warn(Warnings.W014, DeprecationWarning)
exclude = disable exclude = disable
path = util.ensure_path(path) path = util.ensure_path(path)
deserializers = OrderedDict() deserializers = OrderedDict()
@ -964,7 +965,7 @@ class Language(object):
DOCS: https://spacy.io/api/language#to_bytes DOCS: https://spacy.io/api/language#to_bytes
""" """
if disable is not None: if disable is not None:
deprecation_warning(Warnings.W014) warnings.warn(Warnings.W014, DeprecationWarning)
exclude = disable exclude = disable
serializers = OrderedDict() serializers = OrderedDict()
serializers["vocab"] = lambda: self.vocab.to_bytes() serializers["vocab"] = lambda: self.vocab.to_bytes()
@ -989,7 +990,7 @@ class Language(object):
DOCS: https://spacy.io/api/language#from_bytes DOCS: https://spacy.io/api/language#from_bytes
""" """
if disable is not None: if disable is not None:
deprecation_warning(Warnings.W014) warnings.warn(Warnings.W014, DeprecationWarning)
exclude = disable exclude = disable
deserializers = OrderedDict() deserializers = OrderedDict()
deserializers["meta.json"] = lambda b: self.meta.update(srsly.json_loads(b)) deserializers["meta.json"] = lambda b: self.meta.update(srsly.json_loads(b))

View File

@ -9,6 +9,7 @@ cimport numpy as np
np.import_array() np.import_array()
import numpy import numpy
import warnings
from thinc.neural.util import get_array_module from thinc.neural.util import get_array_module
from libc.stdint cimport UINT64_MAX from libc.stdint cimport UINT64_MAX
@ -19,7 +20,7 @@ from .attrs cimport IS_BRACKET, IS_QUOTE, IS_LEFT_PUNCT, IS_RIGHT_PUNCT
from .attrs cimport IS_CURRENCY, IS_OOV, PROB from .attrs cimport IS_CURRENCY, IS_OOV, PROB
from .attrs import intify_attrs from .attrs import intify_attrs
from .errors import Errors, Warnings, user_warning from .errors import Errors, Warnings
OOV_RANK = UINT64_MAX OOV_RANK = UINT64_MAX
@ -130,7 +131,7 @@ cdef class Lexeme:
if self.c.orth == other[0].orth: if self.c.orth == other[0].orth:
return 1.0 return 1.0
if self.vector_norm == 0 or other.vector_norm == 0: if self.vector_norm == 0 or other.vector_norm == 0:
user_warning(Warnings.W008.format(obj="Lexeme")) warnings.warn(Warnings.W008.format(obj="Lexeme"))
return 0.0 return 0.0
vector = self.vector vector = self.vector
xp = get_array_module(vector) xp = get_array_module(vector)

View File

@ -9,6 +9,7 @@ from murmurhash.mrmr cimport hash64
import re import re
import srsly import srsly
import warnings
from ..typedefs cimport attr_t from ..typedefs cimport attr_t
from ..structs cimport TokenC from ..structs cimport TokenC
@ -20,7 +21,7 @@ from ..attrs cimport ID, attr_id_t, NULL_ATTR, ORTH, POS, TAG, DEP, LEMMA
from ._schemas import TOKEN_PATTERN_SCHEMA from ._schemas import TOKEN_PATTERN_SCHEMA
from ..util import get_json_validator, validate_json from ..util import get_json_validator, validate_json
from ..errors import Errors, MatchPatternError, Warnings, deprecation_warning from ..errors import Errors, MatchPatternError, Warnings
from ..strings import get_string_id from ..strings import get_string_id
from ..attrs import IDS from ..attrs import IDS
@ -195,7 +196,7 @@ cdef class Matcher:
YIELDS (Doc): Documents, in order. YIELDS (Doc): Documents, in order.
""" """
if n_threads != -1: if n_threads != -1:
deprecation_warning(Warnings.W016) warnings.warn(Warnings.W016, DeprecationWarning)
if as_tuples: if as_tuples:
for doc, context in docs: for doc, context in docs:

View File

@ -6,13 +6,15 @@ from libc.stdint cimport uintptr_t
from preshed.maps cimport map_init, map_set, map_get, map_clear, map_iter from preshed.maps cimport map_init, map_set, map_get, map_clear, map_iter
import warnings
from ..attrs cimport ORTH, POS, TAG, DEP, LEMMA from ..attrs cimport ORTH, POS, TAG, DEP, LEMMA
from ..structs cimport TokenC from ..structs cimport TokenC
from ..tokens.token cimport Token from ..tokens.token cimport Token
from ..typedefs cimport attr_t from ..typedefs cimport attr_t
from ._schemas import TOKEN_PATTERN_SCHEMA from ._schemas import TOKEN_PATTERN_SCHEMA
from ..errors import Errors, Warnings, deprecation_warning, user_warning from ..errors import Errors, Warnings
cdef class PhraseMatcher: cdef class PhraseMatcher:
@ -39,7 +41,7 @@ cdef class PhraseMatcher:
DOCS: https://spacy.io/api/phrasematcher#init DOCS: https://spacy.io/api/phrasematcher#init
""" """
if max_length != 0: if max_length != 0:
deprecation_warning(Warnings.W010) warnings.warn(Warnings.W010, DeprecationWarning)
self.vocab = vocab self.vocab = vocab
self._callbacks = {} self._callbacks = {}
self._docs = {} self._docs = {}
@ -195,7 +197,7 @@ cdef class PhraseMatcher:
if self._validate and (doc.is_tagged or doc.is_parsed) \ if self._validate and (doc.is_tagged or doc.is_parsed) \
and self.attr not in (DEP, POS, TAG, LEMMA): and self.attr not in (DEP, POS, TAG, LEMMA):
string_attr = self.vocab.strings[self.attr] string_attr = self.vocab.strings[self.attr]
user_warning(Warnings.W012.format(key=key, attr=string_attr)) warnings.warn(Warnings.W012.format(key=key, attr=string_attr))
keyword = self._convert_to_array(doc) keyword = self._convert_to_array(doc)
else: else:
keyword = doc keyword = doc
@ -204,7 +206,7 @@ cdef class PhraseMatcher:
current_node = self.c_map current_node = self.c_map
for token in keyword: for token in keyword:
if token == self._terminal_hash: if token == self._terminal_hash:
user_warning(Warnings.W021) warnings.warn(Warnings.W021)
break break
result = <MapStruct*>map_get(current_node, token) result = <MapStruct*>map_get(current_node, token)
if not result: if not result:
@ -306,7 +308,7 @@ cdef class PhraseMatcher:
DOCS: https://spacy.io/api/phrasematcher#pipe DOCS: https://spacy.io/api/phrasematcher#pipe
""" """
if n_threads != -1: if n_threads != -1:
deprecation_warning(Warnings.W016) warnings.warn(Warnings.W016, DeprecationWarning)
if as_tuples: if as_tuples:
for doc, context in stream: for doc, context in stream:
matches = self(doc) matches = self(doc)

View File

@ -6,6 +6,7 @@ from __future__ import unicode_literals
import numpy import numpy
import srsly import srsly
import random import random
import warnings
from collections import OrderedDict from collections import OrderedDict
from thinc.api import chain from thinc.api import chain
from thinc.v2v import Affine, Maxout, Softmax from thinc.v2v import Affine, Maxout, Softmax
@ -32,7 +33,7 @@ from .._ml import build_text_classifier, build_simple_cnn_text_classifier
from .._ml import build_bow_text_classifier, build_nel_encoder from .._ml import build_bow_text_classifier, build_nel_encoder
from .._ml import link_vectors_to_models, zero_init, flatten from .._ml import link_vectors_to_models, zero_init, flatten
from .._ml import masked_language_model, create_default_optimizer, get_cossim_loss from .._ml import masked_language_model, create_default_optimizer, get_cossim_loss
from ..errors import Errors, TempErrors, user_warning, Warnings from ..errors import Errors, TempErrors, Warnings
from .. import util from .. import util
@ -514,7 +515,7 @@ class Tagger(Pipe):
**kwargs): **kwargs):
lemma_tables = ["lemma_rules", "lemma_index", "lemma_exc", "lemma_lookup"] lemma_tables = ["lemma_rules", "lemma_index", "lemma_exc", "lemma_lookup"]
if not any(table in self.vocab.lookups for table in lemma_tables): if not any(table in self.vocab.lookups for table in lemma_tables):
user_warning(Warnings.W022) warnings.warn(Warnings.W022)
orig_tag_map = dict(self.vocab.morphology.tag_map) orig_tag_map = dict(self.vocab.morphology.tag_map)
new_tag_map = OrderedDict() new_tag_map = OrderedDict()
for raw_text, annots_brackets in get_gold_tuples(): for raw_text, annots_brackets in get_gold_tuples():

View File

@ -6,7 +6,6 @@ import pytest
import numpy import numpy
from spacy.tokens import Doc, Span from spacy.tokens import Doc, Span
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.errors import ModelsWarning
from spacy.attrs import ENT_TYPE, ENT_IOB, SENT_START, HEAD, DEP from spacy.attrs import ENT_TYPE, ENT_IOB, SENT_START, HEAD, DEP
from ..util import get_doc from ..util import get_doc
@ -216,7 +215,7 @@ def test_doc_api_similarity_match():
assert doc.similarity(doc[0]) == 1.0 assert doc.similarity(doc[0]) == 1.0
assert doc.similarity(doc.vocab["a"]) == 1.0 assert doc.similarity(doc.vocab["a"]) == 1.0
doc2 = Doc(doc.vocab, words=["a", "b", "c"]) doc2 = Doc(doc.vocab, words=["a", "b", "c"])
with pytest.warns(ModelsWarning): with pytest.warns(UserWarning):
assert doc.similarity(doc2[:1]) == 1.0 assert doc.similarity(doc2[:1]) == 1.0
assert doc.similarity(doc2) == 0.0 assert doc.similarity(doc2) == 0.0

View File

@ -5,7 +5,6 @@ import pytest
from spacy.attrs import ORTH, LENGTH from spacy.attrs import ORTH, LENGTH
from spacy.tokens import Doc, Span from spacy.tokens import Doc, Span
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.errors import ModelsWarning
from spacy.util import filter_spans from spacy.util import filter_spans
from ..util import get_doc from ..util import get_doc
@ -124,7 +123,7 @@ def test_span_similarity_match():
doc = Doc(Vocab(), words=["a", "b", "a", "b"]) doc = Doc(Vocab(), words=["a", "b", "a", "b"])
span1 = doc[:2] span1 = doc[:2]
span2 = doc[2:] span2 = doc[2:]
with pytest.warns(ModelsWarning): with pytest.warns(UserWarning):
assert span1.similarity(span2) == 1.0 assert span1.similarity(span2) == 1.0
assert span1.similarity(doc) == 0.0 assert span1.similarity(doc) == 0.0
assert span1[:1].similarity(doc.vocab["a"]) == 1.0 assert span1[:1].similarity(doc.vocab["a"]) == 1.0

View File

@ -11,6 +11,7 @@ cimport cython
from collections import OrderedDict from collections import OrderedDict
import re import re
import warnings
from .tokens.doc cimport Doc from .tokens.doc cimport Doc
from .strings cimport hash_string from .strings cimport hash_string
@ -18,7 +19,7 @@ from .compat import unescape_unicode, basestring_
from .attrs import intify_attrs from .attrs import intify_attrs
from .symbols import ORTH from .symbols import ORTH
from .errors import Errors, Warnings, deprecation_warning from .errors import Errors, Warnings
from . import util from . import util
@ -115,7 +116,7 @@ cdef class Tokenizer:
return (self.__class__, args, None, None) return (self.__class__, args, None, None)
cpdef Doc tokens_from_list(self, list strings): cpdef Doc tokens_from_list(self, list strings):
deprecation_warning(Warnings.W002) warnings.warn(Warnings.W002, DeprecationWarning)
return Doc(self.vocab, words=strings) return Doc(self.vocab, words=strings)
@cython.boundscheck(False) @cython.boundscheck(False)
@ -181,7 +182,7 @@ cdef class Tokenizer:
DOCS: https://spacy.io/api/tokenizer#pipe DOCS: https://spacy.io/api/tokenizer#pipe
""" """
if n_threads != -1: if n_threads != -1:
deprecation_warning(Warnings.W016) warnings.warn(Warnings.W016, DeprecationWarning)
for text in texts: for text in texts:
yield self(text) yield self(text)

View File

@ -16,6 +16,7 @@ import numpy.linalg
import struct import struct
import srsly import srsly
from thinc.neural.util import get_array_module, copy_array from thinc.neural.util import get_array_module, copy_array
import warnings
from .span cimport Span from .span cimport Span
from .token cimport Token from .token cimport Token
@ -29,7 +30,6 @@ from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t
from ..attrs import intify_attrs, IDS from ..attrs import intify_attrs, IDS
from ..util import normalize_slice from ..util import normalize_slice
from ..compat import is_config, copy_reg, pickle, basestring_ from ..compat import is_config, copy_reg, pickle, basestring_
from ..errors import deprecation_warning, models_warning, user_warning
from ..errors import Errors, Warnings from ..errors import Errors, Warnings
from .. import util from .. import util
from .underscore import Underscore, get_ext_args from .underscore import Underscore, get_ext_args
@ -396,9 +396,9 @@ cdef class Doc:
if similar: if similar:
return 1.0 return 1.0
if self.vocab.vectors.n_keys == 0: if self.vocab.vectors.n_keys == 0:
models_warning(Warnings.W007.format(obj="Doc")) warnings.warn(Warnings.W007.format(obj="Doc"))
if self.vector_norm == 0 or other.vector_norm == 0: if self.vector_norm == 0 or other.vector_norm == 0:
user_warning(Warnings.W008.format(obj="Doc")) warnings.warn(Warnings.W008.format(obj="Doc"))
return 0.0 return 0.0
vector = self.vector vector = self.vector
xp = get_array_module(vector) xp = get_array_module(vector)
@ -787,7 +787,7 @@ cdef class Doc:
attrs = [(IDS[id_.upper()] if hasattr(id_, "upper") else id_) attrs = [(IDS[id_.upper()] if hasattr(id_, "upper") else id_)
for id_ in attrs] for id_ in attrs]
if array.dtype != numpy.uint64: if array.dtype != numpy.uint64:
user_warning(Warnings.W028.format(type=array.dtype)) warnings.warn(Warnings.W028.format(type=array.dtype))
if SENT_START in attrs and HEAD in attrs: if SENT_START in attrs and HEAD in attrs:
raise ValueError(Errors.E032) raise ValueError(Errors.E032)
@ -1040,10 +1040,10 @@ cdef class Doc:
indices did not fall at token boundaries. indices did not fall at token boundaries.
""" """
cdef unicode tag, lemma, ent_type cdef unicode tag, lemma, ent_type
deprecation_warning(Warnings.W013.format(obj="Doc")) warnings.warn(Warnings.W013.format(obj="Doc"), DeprecationWarning)
# TODO: ENT_KB_ID ? # TODO: ENT_KB_ID ?
if len(args) == 3: if len(args) == 3:
deprecation_warning(Warnings.W003) warnings.warn(Warnings.W003, DeprecationWarning)
tag, lemma, ent_type = args tag, lemma, ent_type = args
attributes[TAG] = tag attributes[TAG] = tag
attributes[LEMMA] = lemma attributes[LEMMA] = lemma
@ -1183,7 +1183,7 @@ cdef int set_children_from_heads(TokenC* tokens, int length) except -1:
while not heads_within_sents: while not heads_within_sents:
heads_within_sents = _set_lr_kids_and_edges(tokens, length, loop_count) heads_within_sents = _set_lr_kids_and_edges(tokens, length, loop_count)
if loop_count > 10: if loop_count > 10:
user_warning(Warnings.W026) warnings.warn(Warnings.W026)
break break
loop_count += 1 loop_count += 1
# Set sentence starts # Set sentence starts

View File

@ -6,6 +6,7 @@ from libc.math cimport sqrt
import numpy import numpy
import numpy.linalg import numpy.linalg
import warnings
from thinc.neural.util import get_array_module from thinc.neural.util import get_array_module
from collections import defaultdict from collections import defaultdict
@ -21,8 +22,7 @@ from ..symbols cimport dep
from ..util import normalize_slice from ..util import normalize_slice
from ..compat import is_config, basestring_ from ..compat import is_config, basestring_
from ..errors import Errors, TempErrors, Warnings, user_warning, models_warning from ..errors import Errors, TempErrors, Warnings
from ..errors import deprecation_warning
from .underscore import Underscore, get_ext_args from .underscore import Underscore, get_ext_args
@ -292,7 +292,7 @@ cdef class Span:
attributes are inherited from the syntactic root token of the span. attributes are inherited from the syntactic root token of the span.
RETURNS (Token): The newly merged token. RETURNS (Token): The newly merged token.
""" """
deprecation_warning(Warnings.W013.format(obj="Span")) warnings.warn(Warnings.W013.format(obj="Span"), DeprecationWarning)
return self.doc.merge(self.start_char, self.end_char, *args, return self.doc.merge(self.start_char, self.end_char, *args,
**attributes) **attributes)
@ -333,9 +333,9 @@ cdef class Span:
if similar: if similar:
return 1.0 return 1.0
if self.vocab.vectors.n_keys == 0: if self.vocab.vectors.n_keys == 0:
models_warning(Warnings.W007.format(obj="Span")) warnings.warn(Warnings.W007.format(obj="Span"))
if self.vector_norm == 0.0 or other.vector_norm == 0.0: if self.vector_norm == 0.0 or other.vector_norm == 0.0:
user_warning(Warnings.W008.format(obj="Span")) warnings.warn(Warnings.W008.format(obj="Span"))
return 0.0 return 0.0
vector = self.vector vector = self.vector
xp = get_array_module(vector) xp = get_array_module(vector)

View File

@ -10,6 +10,7 @@ cimport numpy as np
np.import_array() np.import_array()
import numpy import numpy
import warnings
from thinc.neural.util import get_array_module from thinc.neural.util import get_array_module
from ..typedefs cimport hash_t from ..typedefs cimport hash_t
@ -24,7 +25,7 @@ from ..symbols cimport conj
from .. import parts_of_speech from .. import parts_of_speech
from .. import util from .. import util
from ..compat import is_config from ..compat import is_config
from ..errors import Errors, Warnings, user_warning, models_warning from ..errors import Errors, Warnings
from .underscore import Underscore, get_ext_args from .underscore import Underscore, get_ext_args
from .morphanalysis cimport MorphAnalysis from .morphanalysis cimport MorphAnalysis
@ -211,9 +212,9 @@ cdef class Token:
if self.c.lex.orth == other.orth: if self.c.lex.orth == other.orth:
return 1.0 return 1.0
if self.vocab.vectors.n_keys == 0: if self.vocab.vectors.n_keys == 0:
models_warning(Warnings.W007.format(obj="Token")) warnings.warn(Warnings.W007.format(obj="Token"))
if self.vector_norm == 0 or other.vector_norm == 0: if self.vector_norm == 0 or other.vector_norm == 0:
user_warning(Warnings.W008.format(obj="Token")) warnings.warn(Warnings.W008.format(obj="Token"))
return 0.0 return 0.0
vector = self.vector vector = self.vector
xp = get_array_module(vector) xp = get_array_module(vector)

View File

@ -16,6 +16,7 @@ import numpy
import srsly import srsly
import catalogue import catalogue
import sys import sys
import warnings
try: try:
import jsonschema import jsonschema
@ -30,7 +31,7 @@ except ImportError:
from .symbols import ORTH from .symbols import ORTH
from .compat import cupy, CudaStream, path2str, basestring_, unicode_ from .compat import cupy, CudaStream, path2str, basestring_, unicode_
from .compat import import_file from .compat import import_file
from .errors import Errors, Warnings, deprecation_warning from .errors import Errors, Warnings
_data_path = Path(__file__).parent / "data" _data_path = Path(__file__).parent / "data"
@ -749,7 +750,7 @@ def get_serialization_exclude(serializers, exclude, kwargs):
options = [name.split(".")[0] for name in serializers] options = [name.split(".")[0] for name in serializers]
for key, value in kwargs.items(): for key, value in kwargs.items():
if key in ("vocab",) and value is False: if key in ("vocab",) and value is False:
deprecation_warning(Warnings.W015.format(arg=key)) warnings.warn(Warnings.W015.format(arg=key), DeprecationWarning)
exclude.append(key) exclude.append(key)
elif key.split(".")[0] in options: elif key.split(".")[0] in options:
raise ValueError(Errors.E128.format(arg=key)) raise ValueError(Errors.E128.format(arg=key))