Simplify warnings

This commit is contained in:
Ines Montani 2020-02-28 12:20:23 +01:00
parent 5da3ad682a
commit 37691e6d5d
21 changed files with 82 additions and 133 deletions

View File

@ -11,7 +11,7 @@ from . import pipeline
from .cli.info import info as cli_info from .cli.info import info as cli_info
from .glossary import explain from .glossary import explain
from .about import __version__ from .about import __version__
from .errors import Errors, Warnings, deprecation_warning from .errors import Errors, Warnings
from . import util from . import util
from .util import registry from .util import registry
from .language import component from .language import component
@ -27,7 +27,7 @@ config = registry
def load(name, **overrides): def load(name, **overrides):
depr_path = overrides.get("path") depr_path = overrides.get("path")
if depr_path not in (True, False, None): if depr_path not in (True, False, None):
deprecation_warning(Warnings.W001.format(path=depr_path)) warnings.warn(Warnings.W001.format(path=depr_path), DeprecationWarning)
return util.load_model(name, **overrides) return util.load_model(name, **overrides)

View File

@ -1,7 +1,8 @@
from wasabi import Printer from wasabi import Printer
import warnings
from .tokens import Doc, Token, Span from .tokens import Doc, Token, Span
from .errors import Errors, Warnings, user_warning from .errors import Errors, Warnings
def analyze_pipes(pipeline, name, pipe, index, warn=True): def analyze_pipes(pipeline, name, pipe, index, warn=True):
@ -30,7 +31,7 @@ def analyze_pipes(pipeline, name, pipe, index, warn=True):
if not fulfilled: if not fulfilled:
problems.append(annot) problems.append(annot)
if warn: if warn:
user_warning(Warnings.W025.format(name=name, attr=annot)) warnings.warn(Warnings.W025.format(name=name, attr=annot))
return problems return problems

View File

@ -9,9 +9,10 @@ import gzip
import zipfile import zipfile
import srsly import srsly
from wasabi import msg from wasabi import msg
import warnings
from ..vectors import Vectors from ..vectors import Vectors
from ..errors import Errors, Warnings, user_warning from ..errors import Errors, Warnings
from ..util import ensure_path, get_lang_class from ..util import ensure_path, get_lang_class
try: try:
@ -227,7 +228,7 @@ def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50):
def read_clusters(clusters_loc): def read_clusters(clusters_loc):
clusters = {} clusters = {}
if ftfy is None: if ftfy is None:
user_warning(Warnings.W004) warnings.warn(Warnings.W004)
with clusters_loc.open() as f: with clusters_loc.open() as f:
for line in tqdm(f): for line in tqdm(f):
try: try:

View File

@ -4,9 +4,11 @@ spaCy's built in visualization suite for dependencies and named entities.
DOCS: https://spacy.io/api/top-level#displacy DOCS: https://spacy.io/api/top-level#displacy
USAGE: https://spacy.io/usage/visualizers USAGE: https://spacy.io/usage/visualizers
""" """
import warnings
from .render import DependencyRenderer, EntityRenderer from .render import DependencyRenderer, EntityRenderer
from ..tokens import Doc, Span from ..tokens import Doc, Span
from ..errors import Errors, Warnings, user_warning from ..errors import Errors, Warnings
from ..util import is_in_jupyter from ..util import is_in_jupyter
@ -85,7 +87,7 @@ def serve(
from wsgiref import simple_server from wsgiref import simple_server
if is_in_jupyter(): if is_in_jupyter():
user_warning(Warnings.W011) warnings.warn(Warnings.W011)
render(docs, style=style, page=page, minify=minify, options=options, manual=manual) render(docs, style=style, page=page, minify=minify, options=options, manual=manual)
httpd = simple_server.make_server(host, port, app) httpd = simple_server.make_server(host, port, app)
@ -115,7 +117,7 @@ def parse_deps(orig_doc, options={}):
""" """
doc = Doc(orig_doc.vocab).from_bytes(orig_doc.to_bytes(exclude=["user_data"])) doc = Doc(orig_doc.vocab).from_bytes(orig_doc.to_bytes(exclude=["user_data"]))
if not doc.is_parsed: if not doc.is_parsed:
user_warning(Warnings.W005) warnings.warn(Warnings.W005)
if options.get("collapse_phrases", False): if options.get("collapse_phrases", False):
with doc.retokenize() as retokenizer: with doc.retokenize() as retokenizer:
for np in list(doc.noun_chunks): for np in list(doc.noun_chunks):
@ -173,7 +175,7 @@ def parse_ents(doc, options={}):
for ent in doc.ents for ent in doc.ents
] ]
if not ents: if not ents:
user_warning(Warnings.W006) warnings.warn(Warnings.W006)
title = doc.user_data.get("title", None) if hasattr(doc, "user_data") else None title = doc.user_data.get("title", None) if hasattr(doc, "user_data") else None
settings = get_doc_settings(doc) settings = get_doc_settings(doc)
return {"text": doc.text, "ents": ents, "title": title, "settings": settings} return {"text": doc.text, "ents": ents, "title": title, "settings": settings}

View File

@ -1,8 +1,3 @@
import os
import warnings
import inspect
def add_codes(err_cls): def add_codes(err_cls):
"""Add error codes to string messages via class attribute names.""" """Add error codes to string messages via class attribute names."""
@ -583,64 +578,3 @@ class MatchPatternError(ValueError):
class AlignmentError(ValueError): class AlignmentError(ValueError):
pass pass
class ModelsWarning(UserWarning):
pass
WARNINGS = {
"user": UserWarning,
"deprecation": DeprecationWarning,
"models": ModelsWarning,
}
def _get_warn_types(arg):
if arg == "": # don't show any warnings
return []
if not arg or arg == "all": # show all available warnings
return WARNINGS.keys()
return [w_type.strip() for w_type in arg.split(",") if w_type.strip() in WARNINGS]
def _get_warn_excl(arg):
if not arg:
return []
return [w_id.strip() for w_id in arg.split(",")]
SPACY_WARNING_FILTER = os.environ.get("SPACY_WARNING_FILTER")
SPACY_WARNING_TYPES = _get_warn_types(os.environ.get("SPACY_WARNING_TYPES"))
SPACY_WARNING_IGNORE = _get_warn_excl(os.environ.get("SPACY_WARNING_IGNORE"))
def user_warning(message):
_warn(message, "user")
def deprecation_warning(message):
_warn(message, "deprecation")
def models_warning(message):
_warn(message, "models")
def _warn(message, warn_type="user"):
"""
message (unicode): The message to display.
category (Warning): The Warning to show.
"""
if message.startswith("["):
w_id = message.split("[", 1)[1].split("]", 1)[0] # get ID from string
else:
w_id = None
ignore_warning = w_id and w_id in SPACY_WARNING_IGNORE
if warn_type in SPACY_WARNING_TYPES and not ignore_warning:
category = WARNINGS[warn_type]
stack = inspect.stack()[-1]
with warnings.catch_warnings():
if SPACY_WARNING_FILTER:
warnings.simplefilter(SPACY_WARNING_FILTER, category)
warnings.warn_explicit(message, category, stack[1], stack[2])

View File

@ -7,10 +7,11 @@ import shutil
import itertools import itertools
from pathlib import Path from pathlib import Path
import srsly import srsly
import warnings
from .syntax import nonproj from .syntax import nonproj
from .tokens import Doc, Span from .tokens import Doc, Span
from .errors import Errors, AlignmentError, user_warning, Warnings from .errors import Errors, AlignmentError, Warnings
from . import util from . import util
@ -550,7 +551,7 @@ def _json_iterate(loc):
py_raw = file_.read() py_raw = file_.read()
cdef long file_length = len(py_raw) cdef long file_length = len(py_raw)
if file_length > 2 ** 30: if file_length > 2 ** 30:
user_warning(Warnings.W027.format(size=file_length)) warnings.warn(Warnings.W027.format(size=file_length))
raw = <char*>py_raw raw = <char*>py_raw
cdef int square_depth = 0 cdef int square_depth = 0

View File

@ -1,16 +1,18 @@
# cython: infer_types=True # cython: infer_types=True
# cython: profile=True # cython: profile=True
from pathlib import Path
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
from preshed.maps cimport PreshMap from preshed.maps cimport PreshMap
from cpython.exc cimport PyErr_SetFromErrno from cpython.exc cimport PyErr_SetFromErrno
from libc.stdio cimport fopen, fclose, fread, fwrite, feof, fseek from libc.stdio cimport fopen, fclose, fread, fwrite, feof, fseek
from libc.stdint cimport int32_t, int64_t from libc.stdint cimport int32_t, int64_t
from os import path
from libcpp.vector cimport vector from libcpp.vector cimport vector
from pathlib import Path
import warnings
from os import path
from .typedefs cimport hash_t from .typedefs cimport hash_t
from .errors import Errors, Warnings, user_warning from .errors import Errors, Warnings
cdef class Candidate: cdef class Candidate:
@ -110,7 +112,7 @@ cdef class KnowledgeBase:
# Return if this entity was added before # Return if this entity was added before
if entity_hash in self._entry_index: if entity_hash in self._entry_index:
user_warning(Warnings.W018.format(entity=entity)) warnings.warn(Warnings.W018.format(entity=entity))
return return
# Raise an error if the provided entity vector is not of the correct length # Raise an error if the provided entity vector is not of the correct length
@ -142,7 +144,7 @@ cdef class KnowledgeBase:
# only process this entity if its unique ID hadn't been added before # only process this entity if its unique ID hadn't been added before
entity_hash = self.vocab.strings.add(entity_list[i]) entity_hash = self.vocab.strings.add(entity_list[i])
if entity_hash in self._entry_index: if entity_hash in self._entry_index:
user_warning(Warnings.W018.format(entity=entity_list[i])) warnings.warn(Warnings.W018.format(entity=entity_list[i]))
else: else:
entity_vector = vector_list[i] entity_vector = vector_list[i]
@ -190,7 +192,7 @@ cdef class KnowledgeBase:
# Check whether this alias was added before # Check whether this alias was added before
if alias_hash in self._alias_index: if alias_hash in self._alias_index:
user_warning(Warnings.W017.format(alias=alias)) warnings.warn(Warnings.W017.format(alias=alias))
return return
cdef vector[int64_t] entry_indices cdef vector[int64_t] entry_indices
@ -247,7 +249,7 @@ cdef class KnowledgeBase:
if is_present: if is_present:
if not ignore_warnings: if not ignore_warnings:
user_warning(Warnings.W024.format(entity=entity, alias=alias)) warnings.warn(Warnings.W024.format(entity=entity, alias=alias))
else: else:
entry_indices.push_back(int(entry_index)) entry_indices.push_back(int(entry_index))
alias_entry.entry_indices = entry_indices alias_entry.entry_indices = entry_indices

View File

@ -5,6 +5,7 @@ import functools
from contextlib import contextmanager from contextlib import contextmanager
from copy import copy, deepcopy from copy import copy, deepcopy
from pathlib import Path from pathlib import Path
import warnings
from thinc.api import get_current_ops, Config from thinc.api import get_current_ops, Config
import srsly import srsly
@ -26,7 +27,7 @@ from .lang.tokenizer_exceptions import TOKEN_MATCH
from .lang.tag_map import TAG_MAP from .lang.tag_map import TAG_MAP
from .tokens import Doc from .tokens import Doc
from .lang.lex_attrs import LEX_ATTRS, is_stop from .lang.lex_attrs import LEX_ATTRS, is_stop
from .errors import Errors, Warnings, deprecation_warning, user_warning from .errors import Errors, Warnings
from . import util from . import util
from . import about from . import about
@ -340,11 +341,11 @@ class Language(object):
if "model" in config: if "model" in config:
model_cfg = config["model"] model_cfg = config["model"]
if not isinstance(model_cfg, dict): if not isinstance(model_cfg, dict):
user_warning(Warnings.W099.format(type=type(model_cfg), pipe=name)) warnings.warn(Warnings.W099.format(type=type(model_cfg), pipe=name))
model_cfg = None model_cfg = None
del config["model"] del config["model"]
if model_cfg is None and default_config is not None: if model_cfg is None and default_config is not None:
user_warning(Warnings.W098.format(name=name)) warnings.warn(Warnings.W098.format(name=name))
model_cfg = default_config["model"] model_cfg = default_config["model"]
model = None model = None
if model_cfg is not None: if model_cfg is not None:
@ -779,7 +780,7 @@ class Language(object):
# raw_texts will be used later to stop iterator. # raw_texts will be used later to stop iterator.
texts, raw_texts = itertools.tee(texts) texts, raw_texts = itertools.tee(texts)
if n_threads != -1: if n_threads != -1:
deprecation_warning(Warnings.W016) warnings.warn(Warnings.W016, DeprecationWarning)
if n_process == -1: if n_process == -1:
n_process = mp.cpu_count() n_process = mp.cpu_count()
if as_tuples: if as_tuples:
@ -915,7 +916,7 @@ class Language(object):
DOCS: https://spacy.io/api/language#to_disk DOCS: https://spacy.io/api/language#to_disk
""" """
if disable is not None: if disable is not None:
deprecation_warning(Warnings.W014) warnings.warn(Warnings.W014, DeprecationWarning)
exclude = disable exclude = disable
path = util.ensure_path(path) path = util.ensure_path(path)
serializers = {} serializers = {}
@ -949,7 +950,7 @@ class Language(object):
DOCS: https://spacy.io/api/language#from_disk DOCS: https://spacy.io/api/language#from_disk
""" """
if disable is not None: if disable is not None:
deprecation_warning(Warnings.W014) warnings.warn(Warnings.W014, DeprecationWarning)
exclude = disable exclude = disable
path = util.ensure_path(path) path = util.ensure_path(path)
deserializers = {} deserializers = {}
@ -987,7 +988,7 @@ class Language(object):
DOCS: https://spacy.io/api/language#to_bytes DOCS: https://spacy.io/api/language#to_bytes
""" """
if disable is not None: if disable is not None:
deprecation_warning(Warnings.W014) warnings.warn(Warnings.W014, DeprecationWarning)
exclude = disable exclude = disable
serializers = {} serializers = {}
serializers["vocab"] = lambda: self.vocab.to_bytes() serializers["vocab"] = lambda: self.vocab.to_bytes()
@ -1013,7 +1014,7 @@ class Language(object):
DOCS: https://spacy.io/api/language#from_bytes DOCS: https://spacy.io/api/language#from_bytes
""" """
if disable is not None: if disable is not None:
deprecation_warning(Warnings.W014) warnings.warn(Warnings.W014, DeprecationWarning)
exclude = disable exclude = disable
deserializers = {} deserializers = {}
deserializers["config.cfg"] = lambda b: self.config.from_bytes(b) deserializers["config.cfg"] = lambda b: self.config.from_bytes(b)

View File

@ -7,6 +7,7 @@ np.import_array()
import numpy import numpy
from thinc.api import get_array_module from thinc.api import get_array_module
import warnings
from .typedefs cimport attr_t, flags_t from .typedefs cimport attr_t, flags_t
from .attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE from .attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE
@ -15,7 +16,7 @@ from .attrs cimport IS_BRACKET, IS_QUOTE, IS_LEFT_PUNCT, IS_RIGHT_PUNCT
from .attrs cimport IS_CURRENCY, IS_OOV, PROB from .attrs cimport IS_CURRENCY, IS_OOV, PROB
from .attrs import intify_attrs from .attrs import intify_attrs
from .errors import Errors, Warnings, user_warning from .errors import Errors, Warnings
memset(&EMPTY_LEXEME, 0, sizeof(LexemeC)) memset(&EMPTY_LEXEME, 0, sizeof(LexemeC))
@ -124,7 +125,7 @@ cdef class Lexeme:
if self.c.orth == other[0].orth: if self.c.orth == other[0].orth:
return 1.0 return 1.0
if self.vector_norm == 0 or other.vector_norm == 0: if self.vector_norm == 0 or other.vector_norm == 0:
user_warning(Warnings.W008.format(obj="Lexeme")) warnings.warn(Warnings.W008.format(obj="Lexeme"))
return 0.0 return 0.0
vector = self.vector vector = self.vector
xp = get_array_module(vector) xp = get_array_module(vector)

View File

@ -7,6 +7,7 @@ from murmurhash.mrmr cimport hash64
import re import re
import srsly import srsly
import warnings
from ..typedefs cimport attr_t from ..typedefs cimport attr_t
from ..structs cimport TokenC from ..structs cimport TokenC
@ -16,7 +17,7 @@ from ..tokens.token cimport Token
from ..attrs cimport ID, attr_id_t, NULL_ATTR, ORTH, POS, TAG, DEP, LEMMA from ..attrs cimport ID, attr_id_t, NULL_ATTR, ORTH, POS, TAG, DEP, LEMMA
from ..schemas import validate_token_pattern from ..schemas import validate_token_pattern
from ..errors import Errors, MatchPatternError, Warnings, deprecation_warning from ..errors import Errors, MatchPatternError, Warnings
from ..strings import get_string_id from ..strings import get_string_id
from ..attrs import IDS from ..attrs import IDS
@ -188,7 +189,7 @@ cdef class Matcher:
YIELDS (Doc): Documents, in order. YIELDS (Doc): Documents, in order.
""" """
if n_threads != -1: if n_threads != -1:
deprecation_warning(Warnings.W016) warnings.warn(Warnings.W016, DeprecationWarning)
if as_tuples: if as_tuples:
for doc, context in docs: for doc, context in docs:

View File

@ -1,16 +1,17 @@
# cython: infer_types=True # cython: infer_types=True
# cython: profile=True # cython: profile=True
from libc.stdint cimport uintptr_t from libc.stdint cimport uintptr_t
from preshed.maps cimport map_init, map_set, map_get, map_clear, map_iter from preshed.maps cimport map_init, map_set, map_get, map_clear, map_iter
import warnings
from ..attrs cimport ORTH, POS, TAG, DEP, LEMMA from ..attrs cimport ORTH, POS, TAG, DEP, LEMMA
from ..structs cimport TokenC from ..structs cimport TokenC
from ..tokens.token cimport Token from ..tokens.token cimport Token
from ..typedefs cimport attr_t from ..typedefs cimport attr_t
from ..schemas import TokenPattern from ..schemas import TokenPattern
from ..errors import Errors, Warnings, deprecation_warning, user_warning from ..errors import Errors, Warnings
cdef class PhraseMatcher: cdef class PhraseMatcher:
@ -37,7 +38,7 @@ cdef class PhraseMatcher:
DOCS: https://spacy.io/api/phrasematcher#init DOCS: https://spacy.io/api/phrasematcher#init
""" """
if max_length != 0: if max_length != 0:
deprecation_warning(Warnings.W010) warnings.warn(Warnings.W010, DeprecationWarning)
self.vocab = vocab self.vocab = vocab
self._callbacks = {} self._callbacks = {}
self._docs = {} self._docs = {}
@ -193,7 +194,7 @@ cdef class PhraseMatcher:
if self._validate and (doc.is_tagged or doc.is_parsed) \ if self._validate and (doc.is_tagged or doc.is_parsed) \
and self.attr not in (DEP, POS, TAG, LEMMA): and self.attr not in (DEP, POS, TAG, LEMMA):
string_attr = self.vocab.strings[self.attr] string_attr = self.vocab.strings[self.attr]
user_warning(Warnings.W012.format(key=key, attr=string_attr)) warnings.warn(Warnings.W012.format(key=key, attr=string_attr))
keyword = self._convert_to_array(doc) keyword = self._convert_to_array(doc)
else: else:
keyword = doc keyword = doc
@ -202,7 +203,7 @@ cdef class PhraseMatcher:
current_node = self.c_map current_node = self.c_map
for token in keyword: for token in keyword:
if token == self._terminal_hash: if token == self._terminal_hash:
user_warning(Warnings.W021) warnings.warn(Warnings.W021)
break break
result = <MapStruct*>map_get(current_node, token) result = <MapStruct*>map_get(current_node, token)
if not result: if not result:
@ -304,7 +305,7 @@ cdef class PhraseMatcher:
DOCS: https://spacy.io/api/phrasematcher#pipe DOCS: https://spacy.io/api/phrasematcher#pipe
""" """
if n_threads != -1: if n_threads != -1:
deprecation_warning(Warnings.W016) warnings.warn(Warnings.W016, DeprecationWarning)
if as_tuples: if as_tuples:
for doc, context in stream: for doc, context in stream:
matches = self(doc) matches = self(doc)

View File

@ -3,6 +3,7 @@ from libc.string cimport memset
import srsly import srsly
from collections import Counter from collections import Counter
import numpy import numpy
import warnings
from .strings import get_string_id from .strings import get_string_id
from . import symbols from . import symbols
@ -11,7 +12,7 @@ from .attrs import LEMMA, intify_attrs
from .parts_of_speech cimport SPACE from .parts_of_speech cimport SPACE
from .parts_of_speech import IDS as POS_IDS from .parts_of_speech import IDS as POS_IDS
from .lexeme cimport Lexeme from .lexeme cimport Lexeme
from .errors import Errors, Warnings, user_warning from .errors import Errors, Warnings
from .util import ensure_path from .util import ensure_path
@ -39,7 +40,7 @@ def _normalize_props(props):
elif isinstance(key, (int, str)) and isinstance(value, (int, str)): elif isinstance(key, (int, str)) and isinstance(value, (int, str)):
out[key] = value out[key] = value
else: else:
user_warning(Warnings.W028.format(feature={key: value})) warnings.warn(Warnings.W028.format(feature={key: value}))
return out return out
@ -109,7 +110,7 @@ cdef class Morphology:
return tag_ptr.key return tag_ptr.key
features = self.feats_to_dict(features) features = self.feats_to_dict(features)
if not isinstance(features, dict): if not isinstance(features, dict):
user_warning(Warnings.W028.format(feature=features)) warnings.warn(Warnings.W028.format(feature=features))
features = {} features = {}
features = _normalize_props(features) features = _normalize_props(features)
string_features = {self.strings.as_string(field): self.strings.as_string(values) for field, values in features.items()} string_features = {self.strings.as_string(field): self.strings.as_string(values) for field, values in features.items()}

View File

@ -5,6 +5,7 @@ import srsly
import random import random
from thinc.api import CosineDistance, to_categorical, get_array_module from thinc.api import CosineDistance, to_categorical, get_array_module
from thinc.api import set_dropout_rate from thinc.api import set_dropout_rate
import warnings
from ..tokens.doc cimport Doc from ..tokens.doc cimport Doc
from ..syntax.nn_parser cimport Parser from ..syntax.nn_parser cimport Parser
@ -21,7 +22,7 @@ from ..attrs import POS, ID
from ..util import link_vectors_to_models, create_default_optimizer from ..util import link_vectors_to_models, create_default_optimizer
from ..parts_of_speech import X from ..parts_of_speech import X
from ..kb import KnowledgeBase from ..kb import KnowledgeBase
from ..errors import Errors, TempErrors, user_warning, Warnings from ..errors import Errors, TempErrors, Warnings
from .. import util from .. import util
@ -525,7 +526,7 @@ class Tagger(Pipe):
**kwargs): **kwargs):
lemma_tables = ["lemma_rules", "lemma_index", "lemma_exc", "lemma_lookup"] lemma_tables = ["lemma_rules", "lemma_index", "lemma_exc", "lemma_lookup"]
if not any(table in self.vocab.lookups for table in lemma_tables): if not any(table in self.vocab.lookups for table in lemma_tables):
user_warning(Warnings.W022) warnings.warn(Warnings.W022)
orig_tag_map = dict(self.vocab.morphology.tag_map) orig_tag_map = dict(self.vocab.morphology.tag_map)
new_tag_map = {} new_tag_map = {}
for example in get_examples(): for example in get_examples():

View File

@ -19,6 +19,7 @@ from itertools import islice
import srsly import srsly
import numpy.random import numpy.random
import numpy import numpy
import warnings
from ..gold import Example from ..gold import Example
from ..typedefs cimport weight_t, class_t, hash_t from ..typedefs cimport weight_t, class_t, hash_t
@ -31,7 +32,7 @@ from ..util import link_vectors_to_models, create_default_optimizer, registry
from ..compat import copy_array from ..compat import copy_array
from ..tokens.doc cimport Doc from ..tokens.doc cimport Doc
from ..gold cimport GoldParse from ..gold cimport GoldParse
from ..errors import Errors, user_warning, Warnings from ..errors import Errors, Warnings
from .. import util from .. import util
from .stateclass cimport StateClass from .stateclass cimport StateClass
from ._state cimport StateC from ._state cimport StateC

View File

@ -2,7 +2,6 @@ import pytest
import numpy import numpy
from spacy.tokens import Doc, Span from spacy.tokens import Doc, Span
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.errors import ModelsWarning
from spacy.attrs import ENT_TYPE, ENT_IOB, SENT_START, HEAD, DEP from spacy.attrs import ENT_TYPE, ENT_IOB, SENT_START, HEAD, DEP
from ..util import get_doc from ..util import get_doc
@ -213,7 +212,7 @@ def test_doc_api_similarity_match():
assert doc.similarity(doc[0]) == 1.0 assert doc.similarity(doc[0]) == 1.0
assert doc.similarity(doc.vocab["a"]) == 1.0 assert doc.similarity(doc.vocab["a"]) == 1.0
doc2 = Doc(doc.vocab, words=["a", "b", "c"]) doc2 = Doc(doc.vocab, words=["a", "b", "c"])
with pytest.warns(ModelsWarning): with pytest.warns(UserWarning):
assert doc.similarity(doc2[:1]) == 1.0 assert doc.similarity(doc2[:1]) == 1.0
assert doc.similarity(doc2) == 0.0 assert doc.similarity(doc2) == 0.0

View File

@ -2,7 +2,6 @@ import pytest
from spacy.attrs import ORTH, LENGTH from spacy.attrs import ORTH, LENGTH
from spacy.tokens import Doc, Span from spacy.tokens import Doc, Span
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.errors import ModelsWarning
from spacy.util import filter_spans from spacy.util import filter_spans
from ..util import get_doc from ..util import get_doc
@ -121,7 +120,7 @@ def test_span_similarity_match():
doc = Doc(Vocab(), words=["a", "b", "a", "b"]) doc = Doc(Vocab(), words=["a", "b", "a", "b"])
span1 = doc[:2] span1 = doc[:2]
span2 = doc[2:] span2 = doc[2:]
with pytest.warns(ModelsWarning): with pytest.warns(UserWarning):
assert span1.similarity(span2) == 1.0 assert span1.similarity(span2) == 1.0
assert span1.similarity(doc) == 0.0 assert span1.similarity(doc) == 0.0
assert span1[:1].similarity(doc.vocab["a"]) == 1.0 assert span1[:1].similarity(doc.vocab["a"]) == 1.0

View File

@ -11,13 +11,14 @@ from preshed.maps cimport PreshMap
cimport cython cimport cython
import re import re
import warnings
from .tokens.doc cimport Doc from .tokens.doc cimport Doc
from .strings cimport hash_string from .strings cimport hash_string
from .attrs import intify_attrs from .attrs import intify_attrs
from .symbols import ORTH from .symbols import ORTH
from .errors import Errors, Warnings, deprecation_warning from .errors import Errors, Warnings
from . import util from . import util
from .attrs import intify_attrs from .attrs import intify_attrs
from .lexeme cimport EMPTY_LEXEME from .lexeme cimport EMPTY_LEXEME
@ -128,7 +129,7 @@ cdef class Tokenizer:
return (self.__class__, args, None, None) return (self.__class__, args, None, None)
cpdef Doc tokens_from_list(self, list strings): cpdef Doc tokens_from_list(self, list strings):
deprecation_warning(Warnings.W002) warnings.warn(Warnings.W002, DeprecationWarning)
return Doc(self.vocab, words=strings) return Doc(self.vocab, words=strings)
def __call__(self, unicode string): def __call__(self, unicode string):
@ -216,7 +217,7 @@ cdef class Tokenizer:
DOCS: https://spacy.io/api/tokenizer#pipe DOCS: https://spacy.io/api/tokenizer#pipe
""" """
if n_threads != -1: if n_threads != -1:
deprecation_warning(Warnings.W016) warnings.warn(Warnings.W016, DeprecationWarning)
for text in texts: for text in texts:
yield self(text) yield self(text)

View File

@ -13,6 +13,7 @@ import struct
import srsly import srsly
from thinc.api import get_array_module from thinc.api import get_array_module
from thinc.util import copy_array from thinc.util import copy_array
import warnings
from .span cimport Span from .span cimport Span
from .token cimport Token from .token cimport Token
@ -26,7 +27,6 @@ from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t
from ..attrs import intify_attrs, IDS from ..attrs import intify_attrs, IDS
from ..util import normalize_slice from ..util import normalize_slice
from ..compat import copy_reg, pickle from ..compat import copy_reg, pickle
from ..errors import deprecation_warning, models_warning, user_warning
from ..errors import Errors, Warnings from ..errors import Errors, Warnings
from .. import util from .. import util
from .underscore import Underscore, get_ext_args from .underscore import Underscore, get_ext_args
@ -388,9 +388,9 @@ cdef class Doc:
else: else:
return 1.0 return 1.0
if self.vocab.vectors.n_keys == 0: if self.vocab.vectors.n_keys == 0:
models_warning(Warnings.W007.format(obj="Doc")) warnings.warn(Warnings.W007.format(obj="Doc"))
if self.vector_norm == 0 or other.vector_norm == 0: if self.vector_norm == 0 or other.vector_norm == 0:
user_warning(Warnings.W008.format(obj="Doc")) warnings.warn(Warnings.W008.format(obj="Doc"))
return 0.0 return 0.0
vector = self.vector vector = self.vector
xp = get_array_module(vector) xp = get_array_module(vector)
@ -1024,10 +1024,10 @@ cdef class Doc:
indices did not fall at token boundaries. indices did not fall at token boundaries.
""" """
cdef unicode tag, lemma, ent_type cdef unicode tag, lemma, ent_type
deprecation_warning(Warnings.W013.format(obj="Doc")) warnings.warn(Warnings.W013.format(obj="Doc"), DeprecationWarning)
# TODO: ENT_KB_ID ? # TODO: ENT_KB_ID ?
if len(args) == 3: if len(args) == 3:
deprecation_warning(Warnings.W003) warnings.warn(Warnings.W003, DeprecationWarning)
tag, lemma, ent_type = args tag, lemma, ent_type = args
attributes[TAG] = tag attributes[TAG] = tag
attributes[LEMMA] = lemma attributes[LEMMA] = lemma
@ -1167,7 +1167,7 @@ cdef int set_children_from_heads(TokenC* tokens, int length) except -1:
while not heads_within_sents: while not heads_within_sents:
heads_within_sents = _set_lr_kids_and_edges(tokens, length, loop_count) heads_within_sents = _set_lr_kids_and_edges(tokens, length, loop_count)
if loop_count > 10: if loop_count > 10:
user_warning(Warnings.W026) warnings.warn(Warnings.W026)
loop_count += 1 loop_count += 1
# Set sentence starts # Set sentence starts
for i in range(length): for i in range(length):

View File

@ -6,6 +6,7 @@ import numpy
import numpy.linalg import numpy.linalg
from thinc.api import get_array_module from thinc.api import get_array_module
from collections import defaultdict from collections import defaultdict
import warnings
from .doc cimport token_by_start, token_by_end, get_token_attr, _get_lca_matrix from .doc cimport token_by_start, token_by_end, get_token_attr, _get_lca_matrix
from .token cimport TokenC from .token cimport TokenC
@ -18,8 +19,7 @@ from ..lexeme cimport Lexeme
from ..symbols cimport dep from ..symbols cimport dep
from ..util import normalize_slice from ..util import normalize_slice
from ..errors import Errors, TempErrors, Warnings, user_warning, models_warning from ..errors import Errors, TempErrors, Warnings
from ..errors import deprecation_warning
from .underscore import Underscore, get_ext_args from .underscore import Underscore, get_ext_args
@ -287,7 +287,7 @@ cdef class Span:
attributes are inherited from the syntactic root token of the span. attributes are inherited from the syntactic root token of the span.
RETURNS (Token): The newly merged token. RETURNS (Token): The newly merged token.
""" """
deprecation_warning(Warnings.W013.format(obj="Span")) warnings.warn(Warnings.W013.format(obj="Span"), DeprecationWarning)
return self.doc.merge(self.start_char, self.end_char, *args, return self.doc.merge(self.start_char, self.end_char, *args,
**attributes) **attributes)
@ -326,9 +326,9 @@ cdef class Span:
else: else:
return 1.0 return 1.0
if self.vocab.vectors.n_keys == 0: if self.vocab.vectors.n_keys == 0:
models_warning(Warnings.W007.format(obj="Span")) warnings.warn(Warnings.W007.format(obj="Span"))
if self.vector_norm == 0.0 or other.vector_norm == 0.0: if self.vector_norm == 0.0 or other.vector_norm == 0.0:
user_warning(Warnings.W008.format(obj="Span")) warnings.warn(Warnings.W008.format(obj="Span"))
return 0.0 return 0.0
vector = self.vector vector = self.vector
xp = get_array_module(vector) xp = get_array_module(vector)

View File

@ -8,6 +8,7 @@ np.import_array()
import numpy import numpy
from thinc.api import get_array_module from thinc.api import get_array_module
import warnings
from ..typedefs cimport hash_t from ..typedefs cimport hash_t
from ..lexeme cimport Lexeme from ..lexeme cimport Lexeme
@ -20,7 +21,7 @@ from ..symbols cimport conj
from .. import parts_of_speech from .. import parts_of_speech
from .. import util from .. import util
from ..errors import Errors, Warnings, user_warning, models_warning from ..errors import Errors, Warnings
from .underscore import Underscore, get_ext_args from .underscore import Underscore, get_ext_args
from .morphanalysis cimport MorphAnalysis from .morphanalysis cimport MorphAnalysis
@ -205,9 +206,9 @@ cdef class Token:
if self.c.lex.orth == other.orth: if self.c.lex.orth == other.orth:
return 1.0 return 1.0
if self.vocab.vectors.n_keys == 0: if self.vocab.vectors.n_keys == 0:
models_warning(Warnings.W007.format(obj="Token")) warnings.warn(Warnings.W007.format(obj="Token"))
if self.vector_norm == 0 or other.vector_norm == 0: if self.vector_norm == 0 or other.vector_norm == 0:
user_warning(Warnings.W008.format(obj="Token")) warnings.warn(Warnings.W008.format(obj="Token"))
return 0.0 return 0.0
vector = self.vector vector = self.vector
xp = get_array_module(vector) xp = get_array_module(vector)

View File

@ -13,6 +13,7 @@ import numpy.random
import srsly import srsly
import catalogue import catalogue
import sys import sys
import warnings
try: try:
@ -22,7 +23,7 @@ except ImportError:
from .symbols import ORTH from .symbols import ORTH
from .compat import cupy, CudaStream from .compat import cupy, CudaStream
from .errors import Errors, Warnings, deprecation_warning, user_warning from .errors import Errors, Warnings
_PRINT_ENV = False _PRINT_ENV = False
@ -731,7 +732,7 @@ def get_serialization_exclude(serializers, exclude, kwargs):
options = [name.split(".")[0] for name in serializers] options = [name.split(".")[0] for name in serializers]
for key, value in kwargs.items(): for key, value in kwargs.items():
if key in ("vocab",) and value is False: if key in ("vocab",) and value is False:
deprecation_warning(Warnings.W015.format(arg=key)) warnings.warn(Warnings.W015.format(arg=key), DeprecationWarning)
exclude.append(key) exclude.append(key)
elif key.split(".")[0] in options: elif key.split(".")[0] in options:
raise ValueError(Errors.E128.format(arg=key)) raise ValueError(Errors.E128.format(arg=key))
@ -776,7 +777,7 @@ def link_vectors_to_models(vocab):
if vectors.name is None: if vectors.name is None:
vectors.name = VECTORS_KEY vectors.name = VECTORS_KEY
if vectors.data.size != 0: if vectors.data.size != 0:
user_warning(Warnings.W020.format(shape=vectors.data.shape)) warnings.warn(Warnings.W020.format(shape=vectors.data.shape))
for word in vocab: for word in vocab:
if word.orth in vectors.key2row: if word.orth in vectors.key2row:
word.rank = vectors.key2row[word.orth] word.rank = vectors.key2row[word.orth]