mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
Merge pull request #5367 from adrianeboyd/feature/simplify-warnings-v2
This commit is contained in:
commit
efec28ce70
|
@ -13,7 +13,7 @@ from . import pipeline
|
|||
from .cli.info import info as cli_info
|
||||
from .glossary import explain
|
||||
from .about import __version__
|
||||
from .errors import Errors, Warnings, deprecation_warning
|
||||
from .errors import Errors, Warnings
|
||||
from . import util
|
||||
from .util import registry
|
||||
from .language import component
|
||||
|
@ -26,7 +26,7 @@ if sys.maxunicode == 65535:
|
|||
def load(name, **overrides):
|
||||
depr_path = overrides.get("path")
|
||||
if depr_path not in (True, False, None):
|
||||
deprecation_warning(Warnings.W001.format(path=depr_path))
|
||||
warnings.warn(Warnings.W001.format(path=depr_path), DeprecationWarning)
|
||||
return util.load_model(name, **overrides)
|
||||
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import numpy
|
||||
import warnings
|
||||
from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu
|
||||
from thinc.t2t import ExtractWindow, ParametricAttention
|
||||
from thinc.t2v import Pooling, sum_pool, mean_pool
|
||||
|
@ -22,7 +23,7 @@ from thinc.neural._classes.affine import _set_dimensions_if_needed
|
|||
import thinc.extra.load_nlp
|
||||
|
||||
from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE
|
||||
from .errors import Errors, user_warning, Warnings
|
||||
from .errors import Errors, Warnings
|
||||
from . import util
|
||||
from . import ml as new_ml
|
||||
from .ml import _legacy_tok2vec
|
||||
|
@ -283,7 +284,7 @@ def link_vectors_to_models(vocab):
|
|||
if vectors.name is None:
|
||||
vectors.name = VECTORS_KEY
|
||||
if vectors.data.size != 0:
|
||||
user_warning(Warnings.W020.format(shape=vectors.data.shape))
|
||||
warnings.warn(Warnings.W020.format(shape=vectors.data.shape))
|
||||
ops = Model.ops
|
||||
for word in vocab:
|
||||
if word.orth in vectors.key2row:
|
||||
|
@ -299,7 +300,7 @@ def link_vectors_to_models(vocab):
|
|||
# This is a hack to avoid the problem in #3853.
|
||||
old_name = vectors.name
|
||||
new_name = vectors.name + "_%d" % data.shape[0]
|
||||
user_warning(Warnings.W019.format(old=old_name, new=new_name))
|
||||
warnings.warn(Warnings.W019.format(old=old_name, new=new_name))
|
||||
vectors.name = new_name
|
||||
key = (ops.device, vectors.name)
|
||||
thinc.extra.load_nlp.VECTORS[key] = data
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import warnings
|
||||
|
||||
from collections import OrderedDict
|
||||
from wasabi import Printer
|
||||
|
||||
from .tokens import Doc, Token, Span
|
||||
from .errors import Errors, Warnings, user_warning
|
||||
from .errors import Errors, Warnings
|
||||
|
||||
|
||||
def analyze_pipes(pipeline, name, pipe, index, warn=True):
|
||||
|
@ -34,7 +36,7 @@ def analyze_pipes(pipeline, name, pipe, index, warn=True):
|
|||
if not fulfilled:
|
||||
problems.append(annot)
|
||||
if warn:
|
||||
user_warning(Warnings.W025.format(name=name, attr=annot))
|
||||
warnings.warn(Warnings.W025.format(name=name, attr=annot))
|
||||
return problems
|
||||
|
||||
|
||||
|
|
|
@ -12,10 +12,11 @@ import tarfile
|
|||
import gzip
|
||||
import zipfile
|
||||
import srsly
|
||||
import warnings
|
||||
from wasabi import msg
|
||||
|
||||
from ..vectors import Vectors
|
||||
from ..errors import Errors, Warnings, user_warning
|
||||
from ..errors import Errors, Warnings
|
||||
from ..util import ensure_path, get_lang_class, OOV_RANK
|
||||
|
||||
try:
|
||||
|
@ -246,7 +247,7 @@ def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50):
|
|||
def read_clusters(clusters_loc):
|
||||
clusters = {}
|
||||
if ftfy is None:
|
||||
user_warning(Warnings.W004)
|
||||
warnings.warn(Warnings.W004)
|
||||
with clusters_loc.open() as f:
|
||||
for line in tqdm(f):
|
||||
try:
|
||||
|
|
|
@ -7,10 +7,12 @@ USAGE: https://spacy.io/usage/visualizers
|
|||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import warnings
|
||||
|
||||
from .render import DependencyRenderer, EntityRenderer
|
||||
from ..tokens import Doc, Span
|
||||
from ..compat import b_to_str
|
||||
from ..errors import Errors, Warnings, user_warning
|
||||
from ..errors import Errors, Warnings
|
||||
from ..util import is_in_jupyter
|
||||
|
||||
|
||||
|
@ -89,7 +91,7 @@ def serve(
|
|||
from wsgiref import simple_server
|
||||
|
||||
if is_in_jupyter():
|
||||
user_warning(Warnings.W011)
|
||||
warnings.warn(Warnings.W011)
|
||||
|
||||
render(docs, style=style, page=page, minify=minify, options=options, manual=manual)
|
||||
httpd = simple_server.make_server(host, port, app)
|
||||
|
@ -119,7 +121,7 @@ def parse_deps(orig_doc, options={}):
|
|||
"""
|
||||
doc = Doc(orig_doc.vocab).from_bytes(orig_doc.to_bytes(exclude=["user_data"]))
|
||||
if not doc.is_parsed:
|
||||
user_warning(Warnings.W005)
|
||||
warnings.warn(Warnings.W005)
|
||||
if options.get("collapse_phrases", False):
|
||||
with doc.retokenize() as retokenizer:
|
||||
for np in list(doc.noun_chunks):
|
||||
|
@ -184,7 +186,7 @@ def parse_ents(doc, options={}):
|
|||
for ent in doc.ents
|
||||
]
|
||||
if not ents:
|
||||
user_warning(Warnings.W006)
|
||||
warnings.warn(Warnings.W006)
|
||||
title = doc.user_data.get("title", None) if hasattr(doc, "user_data") else None
|
||||
settings = get_doc_settings(doc)
|
||||
return {"text": doc.text, "ents": ents, "title": title, "settings": settings}
|
||||
|
|
|
@ -1,11 +1,6 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import warnings
|
||||
import inspect
|
||||
|
||||
|
||||
def add_codes(err_cls):
|
||||
"""Add error codes to string messages via class attribute names."""
|
||||
|
||||
|
@ -93,8 +88,7 @@ class Warnings(object):
|
|||
W022 = ("Training a new part-of-speech tagger using a model with no "
|
||||
"lemmatization rules or data. This means that the trained model "
|
||||
"may not be able to lemmatize correctly. If this is intentional "
|
||||
"or the language you're using doesn't have lemmatization data, "
|
||||
"you can ignore this warning by setting SPACY_WARNING_IGNORE=W022. "
|
||||
"or the language you're using doesn't have lemmatization data. "
|
||||
"If this is surprising, make sure you have the spacy-lookups-data "
|
||||
"package installed.")
|
||||
W023 = ("Multiprocessing of Language.pipe is not supported in Python 2. "
|
||||
|
@ -595,64 +589,3 @@ class MatchPatternError(ValueError):
|
|||
|
||||
class AlignmentError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class ModelsWarning(UserWarning):
|
||||
pass
|
||||
|
||||
|
||||
WARNINGS = {
|
||||
"user": UserWarning,
|
||||
"deprecation": DeprecationWarning,
|
||||
"models": ModelsWarning,
|
||||
}
|
||||
|
||||
|
||||
def _get_warn_types(arg):
|
||||
if arg == "": # don't show any warnings
|
||||
return []
|
||||
if not arg or arg == "all": # show all available warnings
|
||||
return WARNINGS.keys()
|
||||
return [w_type.strip() for w_type in arg.split(",") if w_type.strip() in WARNINGS]
|
||||
|
||||
|
||||
def _get_warn_excl(arg):
|
||||
if not arg:
|
||||
return []
|
||||
return [w_id.strip() for w_id in arg.split(",")]
|
||||
|
||||
|
||||
SPACY_WARNING_FILTER = os.environ.get("SPACY_WARNING_FILTER")
|
||||
SPACY_WARNING_TYPES = _get_warn_types(os.environ.get("SPACY_WARNING_TYPES"))
|
||||
SPACY_WARNING_IGNORE = _get_warn_excl(os.environ.get("SPACY_WARNING_IGNORE"))
|
||||
|
||||
|
||||
def user_warning(message):
|
||||
_warn(message, "user")
|
||||
|
||||
|
||||
def deprecation_warning(message):
|
||||
_warn(message, "deprecation")
|
||||
|
||||
|
||||
def models_warning(message):
|
||||
_warn(message, "models")
|
||||
|
||||
|
||||
def _warn(message, warn_type="user"):
|
||||
"""
|
||||
message (unicode): The message to display.
|
||||
category (Warning): The Warning to show.
|
||||
"""
|
||||
if message.startswith("["):
|
||||
w_id = message.split("[", 1)[1].split("]", 1)[0] # get ID from string
|
||||
else:
|
||||
w_id = None
|
||||
ignore_warning = w_id and w_id in SPACY_WARNING_IGNORE
|
||||
if warn_type in SPACY_WARNING_TYPES and not ignore_warning:
|
||||
category = WARNINGS[warn_type]
|
||||
stack = inspect.stack()[-1]
|
||||
with warnings.catch_warnings():
|
||||
if SPACY_WARNING_FILTER:
|
||||
warnings.simplefilter(SPACY_WARNING_FILTER, category)
|
||||
warnings.warn_explicit(message, category, stack[1], stack[2])
|
||||
|
|
|
@ -10,10 +10,11 @@ import shutil
|
|||
import itertools
|
||||
from pathlib import Path
|
||||
import srsly
|
||||
import warnings
|
||||
|
||||
from .syntax import nonproj
|
||||
from .tokens import Doc, Span
|
||||
from .errors import Errors, AlignmentError, user_warning, Warnings
|
||||
from .errors import Errors, AlignmentError, Warnings
|
||||
from .compat import path2str
|
||||
from . import util
|
||||
from .util import minibatch, itershuffle
|
||||
|
@ -508,7 +509,7 @@ def _json_iterate(loc):
|
|||
py_raw = file_.read()
|
||||
cdef long file_length = len(py_raw)
|
||||
if file_length > 2 ** 30:
|
||||
user_warning(Warnings.W027.format(size=file_length))
|
||||
warnings.warn(Warnings.W027.format(size=file_length))
|
||||
|
||||
raw = <char*>py_raw
|
||||
cdef int square_depth = 0
|
||||
|
@ -690,7 +691,7 @@ cdef class GoldParse:
|
|||
else:
|
||||
words_offset -= 1
|
||||
if len(entities) != len(words):
|
||||
user_warning(Warnings.W029.format(text=doc.text))
|
||||
warnings.warn(Warnings.W029.format(text=doc.text))
|
||||
entities = ["-" for _ in words]
|
||||
|
||||
# These are filled by the tagger/parser/entity recogniser
|
||||
|
|
12
spacy/kb.pyx
12
spacy/kb.pyx
|
@ -1,7 +1,9 @@
|
|||
# cython: infer_types=True
|
||||
# cython: profile=True
|
||||
# coding: utf8
|
||||
from spacy.errors import Errors, Warnings, user_warning
|
||||
import warnings
|
||||
|
||||
from spacy.errors import Errors, Warnings
|
||||
|
||||
from pathlib import Path
|
||||
from cymem.cymem cimport Pool
|
||||
|
@ -115,7 +117,7 @@ cdef class KnowledgeBase:
|
|||
|
||||
# Return if this entity was added before
|
||||
if entity_hash in self._entry_index:
|
||||
user_warning(Warnings.W018.format(entity=entity))
|
||||
warnings.warn(Warnings.W018.format(entity=entity))
|
||||
return
|
||||
|
||||
# Raise an error if the provided entity vector is not of the correct length
|
||||
|
@ -147,7 +149,7 @@ cdef class KnowledgeBase:
|
|||
# only process this entity if its unique ID hadn't been added before
|
||||
entity_hash = self.vocab.strings.add(entity_list[i])
|
||||
if entity_hash in self._entry_index:
|
||||
user_warning(Warnings.W018.format(entity=entity_list[i]))
|
||||
warnings.warn(Warnings.W018.format(entity=entity_list[i]))
|
||||
|
||||
else:
|
||||
entity_vector = vector_list[i]
|
||||
|
@ -195,7 +197,7 @@ cdef class KnowledgeBase:
|
|||
|
||||
# Check whether this alias was added before
|
||||
if alias_hash in self._alias_index:
|
||||
user_warning(Warnings.W017.format(alias=alias))
|
||||
warnings.warn(Warnings.W017.format(alias=alias))
|
||||
return
|
||||
|
||||
cdef vector[int64_t] entry_indices
|
||||
|
@ -252,7 +254,7 @@ cdef class KnowledgeBase:
|
|||
|
||||
if is_present:
|
||||
if not ignore_warnings:
|
||||
user_warning(Warnings.W024.format(entity=entity, alias=alias))
|
||||
warnings.warn(Warnings.W024.format(entity=entity, alias=alias))
|
||||
else:
|
||||
entry_indices.push_back(int(entry_index))
|
||||
alias_entry.entry_indices = entry_indices
|
||||
|
|
|
@ -3,6 +3,7 @@ from __future__ import absolute_import, unicode_literals
|
|||
|
||||
import random
|
||||
import itertools
|
||||
import warnings
|
||||
|
||||
from thinc.extra import load_nlp
|
||||
|
||||
|
@ -34,7 +35,7 @@ from .lang.tokenizer_exceptions import TOKEN_MATCH
|
|||
from .lang.tag_map import TAG_MAP
|
||||
from .tokens import Doc
|
||||
from .lang.lex_attrs import LEX_ATTRS, is_stop
|
||||
from .errors import Errors, Warnings, deprecation_warning, user_warning
|
||||
from .errors import Errors, Warnings
|
||||
from . import util
|
||||
from . import about
|
||||
|
||||
|
@ -758,10 +759,10 @@ class Language(object):
|
|||
DOCS: https://spacy.io/api/language#pipe
|
||||
"""
|
||||
if is_python2 and n_process != 1:
|
||||
user_warning(Warnings.W023)
|
||||
warnings.warn(Warnings.W023)
|
||||
n_process = 1
|
||||
if n_threads != -1:
|
||||
deprecation_warning(Warnings.W016)
|
||||
warnings.warn(Warnings.W016, DeprecationWarning)
|
||||
if n_process == -1:
|
||||
n_process = mp.cpu_count()
|
||||
if as_tuples:
|
||||
|
@ -896,7 +897,7 @@ class Language(object):
|
|||
DOCS: https://spacy.io/api/language#to_disk
|
||||
"""
|
||||
if disable is not None:
|
||||
deprecation_warning(Warnings.W014)
|
||||
warnings.warn(Warnings.W014, DeprecationWarning)
|
||||
exclude = disable
|
||||
path = util.ensure_path(path)
|
||||
serializers = OrderedDict()
|
||||
|
@ -929,7 +930,7 @@ class Language(object):
|
|||
DOCS: https://spacy.io/api/language#from_disk
|
||||
"""
|
||||
if disable is not None:
|
||||
deprecation_warning(Warnings.W014)
|
||||
warnings.warn(Warnings.W014, DeprecationWarning)
|
||||
exclude = disable
|
||||
path = util.ensure_path(path)
|
||||
deserializers = OrderedDict()
|
||||
|
@ -964,7 +965,7 @@ class Language(object):
|
|||
DOCS: https://spacy.io/api/language#to_bytes
|
||||
"""
|
||||
if disable is not None:
|
||||
deprecation_warning(Warnings.W014)
|
||||
warnings.warn(Warnings.W014, DeprecationWarning)
|
||||
exclude = disable
|
||||
serializers = OrderedDict()
|
||||
serializers["vocab"] = lambda: self.vocab.to_bytes()
|
||||
|
@ -989,7 +990,7 @@ class Language(object):
|
|||
DOCS: https://spacy.io/api/language#from_bytes
|
||||
"""
|
||||
if disable is not None:
|
||||
deprecation_warning(Warnings.W014)
|
||||
warnings.warn(Warnings.W014, DeprecationWarning)
|
||||
exclude = disable
|
||||
deserializers = OrderedDict()
|
||||
deserializers["meta.json"] = lambda b: self.meta.update(srsly.json_loads(b))
|
||||
|
|
|
@ -9,6 +9,7 @@ cimport numpy as np
|
|||
np.import_array()
|
||||
|
||||
import numpy
|
||||
import warnings
|
||||
from thinc.neural.util import get_array_module
|
||||
|
||||
from libc.stdint cimport UINT64_MAX
|
||||
|
@ -19,7 +20,7 @@ from .attrs cimport IS_BRACKET, IS_QUOTE, IS_LEFT_PUNCT, IS_RIGHT_PUNCT
|
|||
from .attrs cimport IS_CURRENCY, IS_OOV, PROB
|
||||
|
||||
from .attrs import intify_attrs
|
||||
from .errors import Errors, Warnings, user_warning
|
||||
from .errors import Errors, Warnings
|
||||
|
||||
|
||||
OOV_RANK = UINT64_MAX
|
||||
|
@ -130,7 +131,7 @@ cdef class Lexeme:
|
|||
if self.c.orth == other[0].orth:
|
||||
return 1.0
|
||||
if self.vector_norm == 0 or other.vector_norm == 0:
|
||||
user_warning(Warnings.W008.format(obj="Lexeme"))
|
||||
warnings.warn(Warnings.W008.format(obj="Lexeme"))
|
||||
return 0.0
|
||||
vector = self.vector
|
||||
xp = get_array_module(vector)
|
||||
|
|
|
@ -9,6 +9,7 @@ from murmurhash.mrmr cimport hash64
|
|||
|
||||
import re
|
||||
import srsly
|
||||
import warnings
|
||||
|
||||
from ..typedefs cimport attr_t
|
||||
from ..structs cimport TokenC
|
||||
|
@ -20,7 +21,7 @@ from ..attrs cimport ID, attr_id_t, NULL_ATTR, ORTH, POS, TAG, DEP, LEMMA
|
|||
|
||||
from ._schemas import TOKEN_PATTERN_SCHEMA
|
||||
from ..util import get_json_validator, validate_json
|
||||
from ..errors import Errors, MatchPatternError, Warnings, deprecation_warning
|
||||
from ..errors import Errors, MatchPatternError, Warnings
|
||||
from ..strings import get_string_id
|
||||
from ..attrs import IDS
|
||||
|
||||
|
@ -195,7 +196,7 @@ cdef class Matcher:
|
|||
YIELDS (Doc): Documents, in order.
|
||||
"""
|
||||
if n_threads != -1:
|
||||
deprecation_warning(Warnings.W016)
|
||||
warnings.warn(Warnings.W016, DeprecationWarning)
|
||||
|
||||
if as_tuples:
|
||||
for doc, context in docs:
|
||||
|
|
|
@ -6,13 +6,15 @@ from libc.stdint cimport uintptr_t
|
|||
|
||||
from preshed.maps cimport map_init, map_set, map_get, map_clear, map_iter
|
||||
|
||||
import warnings
|
||||
|
||||
from ..attrs cimport ORTH, POS, TAG, DEP, LEMMA
|
||||
from ..structs cimport TokenC
|
||||
from ..tokens.token cimport Token
|
||||
from ..typedefs cimport attr_t
|
||||
|
||||
from ._schemas import TOKEN_PATTERN_SCHEMA
|
||||
from ..errors import Errors, Warnings, deprecation_warning, user_warning
|
||||
from ..errors import Errors, Warnings
|
||||
|
||||
|
||||
cdef class PhraseMatcher:
|
||||
|
@ -39,7 +41,7 @@ cdef class PhraseMatcher:
|
|||
DOCS: https://spacy.io/api/phrasematcher#init
|
||||
"""
|
||||
if max_length != 0:
|
||||
deprecation_warning(Warnings.W010)
|
||||
warnings.warn(Warnings.W010, DeprecationWarning)
|
||||
self.vocab = vocab
|
||||
self._callbacks = {}
|
||||
self._docs = {}
|
||||
|
@ -195,7 +197,7 @@ cdef class PhraseMatcher:
|
|||
if self._validate and (doc.is_tagged or doc.is_parsed) \
|
||||
and self.attr not in (DEP, POS, TAG, LEMMA):
|
||||
string_attr = self.vocab.strings[self.attr]
|
||||
user_warning(Warnings.W012.format(key=key, attr=string_attr))
|
||||
warnings.warn(Warnings.W012.format(key=key, attr=string_attr))
|
||||
keyword = self._convert_to_array(doc)
|
||||
else:
|
||||
keyword = doc
|
||||
|
@ -204,7 +206,7 @@ cdef class PhraseMatcher:
|
|||
current_node = self.c_map
|
||||
for token in keyword:
|
||||
if token == self._terminal_hash:
|
||||
user_warning(Warnings.W021)
|
||||
warnings.warn(Warnings.W021)
|
||||
break
|
||||
result = <MapStruct*>map_get(current_node, token)
|
||||
if not result:
|
||||
|
@ -306,7 +308,7 @@ cdef class PhraseMatcher:
|
|||
DOCS: https://spacy.io/api/phrasematcher#pipe
|
||||
"""
|
||||
if n_threads != -1:
|
||||
deprecation_warning(Warnings.W016)
|
||||
warnings.warn(Warnings.W016, DeprecationWarning)
|
||||
if as_tuples:
|
||||
for doc, context in stream:
|
||||
matches = self(doc)
|
||||
|
|
|
@ -6,6 +6,7 @@ from __future__ import unicode_literals
|
|||
import numpy
|
||||
import srsly
|
||||
import random
|
||||
import warnings
|
||||
from collections import OrderedDict
|
||||
from thinc.api import chain
|
||||
from thinc.v2v import Affine, Maxout, Softmax
|
||||
|
@ -32,7 +33,7 @@ from .._ml import build_text_classifier, build_simple_cnn_text_classifier
|
|||
from .._ml import build_bow_text_classifier, build_nel_encoder
|
||||
from .._ml import link_vectors_to_models, zero_init, flatten
|
||||
from .._ml import masked_language_model, create_default_optimizer, get_cossim_loss
|
||||
from ..errors import Errors, TempErrors, user_warning, Warnings
|
||||
from ..errors import Errors, TempErrors, Warnings
|
||||
from .. import util
|
||||
|
||||
|
||||
|
@ -514,7 +515,7 @@ class Tagger(Pipe):
|
|||
**kwargs):
|
||||
lemma_tables = ["lemma_rules", "lemma_index", "lemma_exc", "lemma_lookup"]
|
||||
if not any(table in self.vocab.lookups for table in lemma_tables):
|
||||
user_warning(Warnings.W022)
|
||||
warnings.warn(Warnings.W022)
|
||||
orig_tag_map = dict(self.vocab.morphology.tag_map)
|
||||
new_tag_map = OrderedDict()
|
||||
for raw_text, annots_brackets in get_gold_tuples():
|
||||
|
|
|
@ -6,7 +6,6 @@ import pytest
|
|||
import numpy
|
||||
from spacy.tokens import Doc, Span
|
||||
from spacy.vocab import Vocab
|
||||
from spacy.errors import ModelsWarning
|
||||
from spacy.attrs import ENT_TYPE, ENT_IOB, SENT_START, HEAD, DEP
|
||||
|
||||
from ..util import get_doc
|
||||
|
@ -216,7 +215,7 @@ def test_doc_api_similarity_match():
|
|||
assert doc.similarity(doc[0]) == 1.0
|
||||
assert doc.similarity(doc.vocab["a"]) == 1.0
|
||||
doc2 = Doc(doc.vocab, words=["a", "b", "c"])
|
||||
with pytest.warns(ModelsWarning):
|
||||
with pytest.warns(UserWarning):
|
||||
assert doc.similarity(doc2[:1]) == 1.0
|
||||
assert doc.similarity(doc2) == 0.0
|
||||
|
||||
|
|
|
@ -5,7 +5,6 @@ import pytest
|
|||
from spacy.attrs import ORTH, LENGTH
|
||||
from spacy.tokens import Doc, Span
|
||||
from spacy.vocab import Vocab
|
||||
from spacy.errors import ModelsWarning
|
||||
from spacy.util import filter_spans
|
||||
|
||||
from ..util import get_doc
|
||||
|
@ -124,7 +123,7 @@ def test_span_similarity_match():
|
|||
doc = Doc(Vocab(), words=["a", "b", "a", "b"])
|
||||
span1 = doc[:2]
|
||||
span2 = doc[2:]
|
||||
with pytest.warns(ModelsWarning):
|
||||
with pytest.warns(UserWarning):
|
||||
assert span1.similarity(span2) == 1.0
|
||||
assert span1.similarity(doc) == 0.0
|
||||
assert span1[:1].similarity(doc.vocab["a"]) == 1.0
|
||||
|
|
|
@ -11,6 +11,7 @@ cimport cython
|
|||
|
||||
from collections import OrderedDict
|
||||
import re
|
||||
import warnings
|
||||
|
||||
from .tokens.doc cimport Doc
|
||||
from .strings cimport hash_string
|
||||
|
@ -18,7 +19,7 @@ from .compat import unescape_unicode, basestring_
|
|||
from .attrs import intify_attrs
|
||||
from .symbols import ORTH
|
||||
|
||||
from .errors import Errors, Warnings, deprecation_warning
|
||||
from .errors import Errors, Warnings
|
||||
from . import util
|
||||
|
||||
|
||||
|
@ -115,7 +116,7 @@ cdef class Tokenizer:
|
|||
return (self.__class__, args, None, None)
|
||||
|
||||
cpdef Doc tokens_from_list(self, list strings):
|
||||
deprecation_warning(Warnings.W002)
|
||||
warnings.warn(Warnings.W002, DeprecationWarning)
|
||||
return Doc(self.vocab, words=strings)
|
||||
|
||||
@cython.boundscheck(False)
|
||||
|
@ -181,7 +182,7 @@ cdef class Tokenizer:
|
|||
DOCS: https://spacy.io/api/tokenizer#pipe
|
||||
"""
|
||||
if n_threads != -1:
|
||||
deprecation_warning(Warnings.W016)
|
||||
warnings.warn(Warnings.W016, DeprecationWarning)
|
||||
for text in texts:
|
||||
yield self(text)
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@ import numpy.linalg
|
|||
import struct
|
||||
import srsly
|
||||
from thinc.neural.util import get_array_module, copy_array
|
||||
import warnings
|
||||
|
||||
from .span cimport Span
|
||||
from .token cimport Token
|
||||
|
@ -29,7 +30,6 @@ from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t
|
|||
from ..attrs import intify_attrs, IDS
|
||||
from ..util import normalize_slice
|
||||
from ..compat import is_config, copy_reg, pickle, basestring_
|
||||
from ..errors import deprecation_warning, models_warning, user_warning
|
||||
from ..errors import Errors, Warnings
|
||||
from .. import util
|
||||
from .underscore import Underscore, get_ext_args
|
||||
|
@ -396,9 +396,9 @@ cdef class Doc:
|
|||
if similar:
|
||||
return 1.0
|
||||
if self.vocab.vectors.n_keys == 0:
|
||||
models_warning(Warnings.W007.format(obj="Doc"))
|
||||
warnings.warn(Warnings.W007.format(obj="Doc"))
|
||||
if self.vector_norm == 0 or other.vector_norm == 0:
|
||||
user_warning(Warnings.W008.format(obj="Doc"))
|
||||
warnings.warn(Warnings.W008.format(obj="Doc"))
|
||||
return 0.0
|
||||
vector = self.vector
|
||||
xp = get_array_module(vector)
|
||||
|
@ -787,7 +787,7 @@ cdef class Doc:
|
|||
attrs = [(IDS[id_.upper()] if hasattr(id_, "upper") else id_)
|
||||
for id_ in attrs]
|
||||
if array.dtype != numpy.uint64:
|
||||
user_warning(Warnings.W028.format(type=array.dtype))
|
||||
warnings.warn(Warnings.W028.format(type=array.dtype))
|
||||
|
||||
if SENT_START in attrs and HEAD in attrs:
|
||||
raise ValueError(Errors.E032)
|
||||
|
@ -1040,10 +1040,10 @@ cdef class Doc:
|
|||
indices did not fall at token boundaries.
|
||||
"""
|
||||
cdef unicode tag, lemma, ent_type
|
||||
deprecation_warning(Warnings.W013.format(obj="Doc"))
|
||||
warnings.warn(Warnings.W013.format(obj="Doc"), DeprecationWarning)
|
||||
# TODO: ENT_KB_ID ?
|
||||
if len(args) == 3:
|
||||
deprecation_warning(Warnings.W003)
|
||||
warnings.warn(Warnings.W003, DeprecationWarning)
|
||||
tag, lemma, ent_type = args
|
||||
attributes[TAG] = tag
|
||||
attributes[LEMMA] = lemma
|
||||
|
@ -1183,7 +1183,7 @@ cdef int set_children_from_heads(TokenC* tokens, int length) except -1:
|
|||
while not heads_within_sents:
|
||||
heads_within_sents = _set_lr_kids_and_edges(tokens, length, loop_count)
|
||||
if loop_count > 10:
|
||||
user_warning(Warnings.W026)
|
||||
warnings.warn(Warnings.W026)
|
||||
break
|
||||
loop_count += 1
|
||||
# Set sentence starts
|
||||
|
|
|
@ -6,6 +6,7 @@ from libc.math cimport sqrt
|
|||
|
||||
import numpy
|
||||
import numpy.linalg
|
||||
import warnings
|
||||
from thinc.neural.util import get_array_module
|
||||
from collections import defaultdict
|
||||
|
||||
|
@ -21,8 +22,7 @@ from ..symbols cimport dep
|
|||
|
||||
from ..util import normalize_slice
|
||||
from ..compat import is_config, basestring_
|
||||
from ..errors import Errors, TempErrors, Warnings, user_warning, models_warning
|
||||
from ..errors import deprecation_warning
|
||||
from ..errors import Errors, TempErrors, Warnings
|
||||
from .underscore import Underscore, get_ext_args
|
||||
|
||||
|
||||
|
@ -292,7 +292,7 @@ cdef class Span:
|
|||
attributes are inherited from the syntactic root token of the span.
|
||||
RETURNS (Token): The newly merged token.
|
||||
"""
|
||||
deprecation_warning(Warnings.W013.format(obj="Span"))
|
||||
warnings.warn(Warnings.W013.format(obj="Span"), DeprecationWarning)
|
||||
return self.doc.merge(self.start_char, self.end_char, *args,
|
||||
**attributes)
|
||||
|
||||
|
@ -333,9 +333,9 @@ cdef class Span:
|
|||
if similar:
|
||||
return 1.0
|
||||
if self.vocab.vectors.n_keys == 0:
|
||||
models_warning(Warnings.W007.format(obj="Span"))
|
||||
warnings.warn(Warnings.W007.format(obj="Span"))
|
||||
if self.vector_norm == 0.0 or other.vector_norm == 0.0:
|
||||
user_warning(Warnings.W008.format(obj="Span"))
|
||||
warnings.warn(Warnings.W008.format(obj="Span"))
|
||||
return 0.0
|
||||
vector = self.vector
|
||||
xp = get_array_module(vector)
|
||||
|
|
|
@ -10,6 +10,7 @@ cimport numpy as np
|
|||
np.import_array()
|
||||
|
||||
import numpy
|
||||
import warnings
|
||||
from thinc.neural.util import get_array_module
|
||||
|
||||
from ..typedefs cimport hash_t
|
||||
|
@ -24,7 +25,7 @@ from ..symbols cimport conj
|
|||
from .. import parts_of_speech
|
||||
from .. import util
|
||||
from ..compat import is_config
|
||||
from ..errors import Errors, Warnings, user_warning, models_warning
|
||||
from ..errors import Errors, Warnings
|
||||
from .underscore import Underscore, get_ext_args
|
||||
from .morphanalysis cimport MorphAnalysis
|
||||
|
||||
|
@ -211,9 +212,9 @@ cdef class Token:
|
|||
if self.c.lex.orth == other.orth:
|
||||
return 1.0
|
||||
if self.vocab.vectors.n_keys == 0:
|
||||
models_warning(Warnings.W007.format(obj="Token"))
|
||||
warnings.warn(Warnings.W007.format(obj="Token"))
|
||||
if self.vector_norm == 0 or other.vector_norm == 0:
|
||||
user_warning(Warnings.W008.format(obj="Token"))
|
||||
warnings.warn(Warnings.W008.format(obj="Token"))
|
||||
return 0.0
|
||||
vector = self.vector
|
||||
xp = get_array_module(vector)
|
||||
|
|
|
@ -16,6 +16,7 @@ import numpy
|
|||
import srsly
|
||||
import catalogue
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
try:
|
||||
import jsonschema
|
||||
|
@ -30,7 +31,7 @@ except ImportError:
|
|||
from .symbols import ORTH
|
||||
from .compat import cupy, CudaStream, path2str, basestring_, unicode_
|
||||
from .compat import import_file
|
||||
from .errors import Errors, Warnings, deprecation_warning
|
||||
from .errors import Errors, Warnings
|
||||
|
||||
|
||||
_data_path = Path(__file__).parent / "data"
|
||||
|
@ -749,7 +750,7 @@ def get_serialization_exclude(serializers, exclude, kwargs):
|
|||
options = [name.split(".")[0] for name in serializers]
|
||||
for key, value in kwargs.items():
|
||||
if key in ("vocab",) and value is False:
|
||||
deprecation_warning(Warnings.W015.format(arg=key))
|
||||
warnings.warn(Warnings.W015.format(arg=key), DeprecationWarning)
|
||||
exclude.append(key)
|
||||
elif key.split(".")[0] in options:
|
||||
raise ValueError(Errors.E128.format(arg=key))
|
||||
|
|
Loading…
Reference in New Issue
Block a user