diff --git a/spacy/__init__.py b/spacy/__init__.py index 9acc566ad..36ff2dd1a 100644 --- a/spacy/__init__.py +++ b/spacy/__init__.py @@ -8,8 +8,14 @@ from . import util def load(name, **overrides): - from .deprecated import resolve_load_name - name = resolve_load_name(name, **overrides) + depr_path = overrides.get('path') + if depr_path not in (True, False, None): + util.deprecated( + "As of spaCy v2.0, the keyword argument `path=` is deprecated. " + "You can now call spacy.load with the path as its first argument, " + "and the model's meta.json will be used to determine the language " + "to load. For example:\nnlp = spacy.load('{}')".format(depr_path), + 'error') return util.load_model(name, **overrides) diff --git a/spacy/deprecated.py b/spacy/deprecated.py index a1143474a..eafdbe580 100644 --- a/spacy/deprecated.py +++ b/spacy/deprecated.py @@ -1,40 +1,4 @@ # coding: utf8 from __future__ import unicode_literals -from .util import prints -from .cli import download -from . import about - - PRON_LEMMA = "-PRON-" - - -def depr_model_download(lang): - """Replace en/de download modules within, warn and ownload default models. - - lang (unicode): Language shortcut, 'en' or 'de'. - """ - prints("The spacy.%s.download command is now deprecated. Please use " - "spacy download [model name or shortcut] instead. For " - "more info, see the documentation:" % lang, - about.__docs_models__, - "Downloading default '%s' model now..." % lang, - title="Warning: deprecated command") - download(lang) - - -def resolve_load_name(name, **overrides): - """Resolve model loading if deprecated path kwarg in overrides. - - name (unicode): Name of model to load. - **overrides: Overrides specified in spacy.load(). - RETURNS: Model name or value of path kwarg. - """ - if overrides.get('path') not in (None, False, True): - name = overrides.get('path') - prints("To load a model from a path, you can now use the first " - "argument. The model meta is used to load the Language class.", - "OLD: spacy.load('en', path='/some/path')", - "NEW: spacy.load('/some/path')", - title="Warning: deprecated argument 'path'") - return name diff --git a/spacy/lang/de/download.py b/spacy/lang/de/download.py deleted file mode 100644 index d9ff3652a..000000000 --- a/spacy/lang/de/download.py +++ /dev/null @@ -1,8 +0,0 @@ -# coding: utf8 -from __future__ import unicode_literals - -from ..deprecated import depr_model_download - - -if __name__ == '__main__': - depr_model_download('de') diff --git a/spacy/lang/en/download.py b/spacy/lang/en/download.py deleted file mode 100644 index 73133fb48..000000000 --- a/spacy/lang/en/download.py +++ /dev/null @@ -1,8 +0,0 @@ -# coding: utf8 -from __future__ import unicode_literals - -from ..deprecated import depr_model_download - - -if __name__ == '__main__': - depr_model_download('en') diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py index 2c90572e3..e4d57cbb0 100644 --- a/spacy/tests/doc/test_doc_api.py +++ b/spacy/tests/doc/test_doc_api.py @@ -127,7 +127,8 @@ def test_doc_api_merge(en_tokenizer): # merge 'The Beach Boys' doc = en_tokenizer(text) assert len(doc) == 9 - doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), 'NAMED', 'LEMMA', 'TYPE') + doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), tag='NAMED', lemma='LEMMA', + ent_type='TYPE') assert len(doc) == 7 assert doc[4].text == 'the beach boys' assert doc[4].text_with_ws == 'the beach boys ' @@ -136,7 +137,8 @@ def test_doc_api_merge(en_tokenizer): # merge 'all night' doc = en_tokenizer(text) assert len(doc) == 9 - doc.merge(doc[7].idx, doc[8].idx + len(doc[8]), 'NAMED', 'LEMMA', 'TYPE') + doc.merge(doc[7].idx, doc[8].idx + len(doc[8]), tag='NAMED', lemma='LEMMA', + ent_type='TYPE') assert len(doc) == 8 assert doc[7].text == 'all night' assert doc[7].text_with_ws == 'all night' @@ -147,7 +149,8 @@ def test_doc_api_merge_children(en_tokenizer): text = "WKRO played songs by the beach boys all night" doc = en_tokenizer(text) assert len(doc) == 9 - doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), 'NAMED', 'LEMMA', 'TYPE') + doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), tag='NAMED', lemma='LEMMA', + ent_type='TYPE') for word in doc: if word.i < word.head.i: @@ -159,8 +162,8 @@ def test_doc_api_merge_children(en_tokenizer): def test_doc_api_merge_hang(en_tokenizer): text = "through North and South Carolina" doc = en_tokenizer(text) - doc.merge(18, 32, '', '', 'ORG') - doc.merge(8, 32, '', '', 'ORG') + doc.merge(18, 32, tag='', lemma='', ent_type='ORG') + doc.merge(8, 32, tag='', lemma='', ent_type='ORG') def test_doc_api_sents_empty_string(en_tokenizer): @@ -188,7 +191,8 @@ def test_doc_api_runtime_error(en_tokenizer): if len(np) > 1: nps.append((np.start_char, np.end_char, np.root.tag_, np.text, np.root.ent_type_)) for np in nps: - doc.merge(*np) + start, end, tag, lemma, ent_type = np + doc.merge(start, end, tag=tag, lemma=lemma, ent_type=ent_type) def test_doc_api_right_edge(en_tokenizer): diff --git a/spacy/tests/spans/test_span.py b/spacy/tests/doc/test_span.py similarity index 100% rename from spacy/tests/spans/test_span.py rename to spacy/tests/doc/test_span.py diff --git a/spacy/tests/spans/test_merge.py b/spacy/tests/doc/test_span_merge.py similarity index 86% rename from spacy/tests/spans/test_merge.py rename to spacy/tests/doc/test_span_merge.py index 29cc917fe..61f8ca50d 100644 --- a/spacy/tests/spans/test_merge.py +++ b/spacy/tests/doc/test_span_merge.py @@ -14,7 +14,7 @@ def test_spans_merge_tokens(en_tokenizer): assert len(doc) == 4 assert doc[0].head.text == 'Angeles' assert doc[1].head.text == 'start' - doc.merge(0, len('Los Angeles'), 'NNP', 'Los Angeles', 'GPE') + doc.merge(0, len('Los Angeles'), tag='NNP', lemma='Los Angeles', ent_type='GPE') assert len(doc) == 3 assert doc[0].text == 'Los Angeles' assert doc[0].head.text == 'start' @@ -36,7 +36,8 @@ def test_spans_merge_heads(en_tokenizer): doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads) assert len(doc) == 8 - doc.merge(doc[3].idx, doc[4].idx + len(doc[4]), doc[4].tag_, 'pilates class', 'O') + doc.merge(doc[3].idx, doc[4].idx + len(doc[4]), tag=doc[4].tag_, + lemma='pilates class', ent_type='O') assert len(doc) == 7 assert doc[0].head.i == 1 assert doc[1].head.i == 1 @@ -53,7 +54,8 @@ def test_span_np_merges(en_tokenizer): doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads) assert doc[4].head.i == 1 - doc.merge(doc[2].idx, doc[4].idx + len(doc[4]), 'NP', 'tool', 'O') + doc.merge(doc[2].idx, doc[4].idx + len(doc[4]), tag='NP', lemma='tool', + ent_type='O') assert doc[2].head.i == 1 text = "displaCy is a lightweight and modern dependency parse tree visualization tool built with CSS3 and JavaScript." @@ -63,7 +65,7 @@ def test_span_np_merges(en_tokenizer): ents = [(e[0].idx, e[-1].idx + len(e[-1]), e.label_, e.lemma_) for e in doc.ents] for start, end, label, lemma in ents: - merged = doc.merge(start, end, label, lemma, label) + merged = doc.merge(start, end, tag=label, lemma=lemma, ent_type=label) assert merged != None, (start, end, label, lemma) @@ -88,7 +90,7 @@ def test_spans_entity_merge(en_tokenizer): assert len(doc) == 17 for ent in doc.ents: label, lemma, type_ = (ent.root.tag_, ent.root.lemma_, max(w.ent_type_ for w in ent)) - ent.merge(label, lemma, type_) + ent.merge(label=label, lemma=lemma, ent_type=type_) # check looping is ok assert len(doc) == 15 @@ -105,8 +107,8 @@ def test_spans_sentence_update_after_merge(en_tokenizer): sent1, sent2 = list(doc.sents) init_len = len(sent1) init_len2 = len(sent2) - doc[0:2].merge('none', 'none', 'none') - doc[-2:].merge('none', 'none', 'none') + doc[0:2].merge(label='none', lemma='none', ent_type='none') + doc[-2:].merge(label='none', lemma='none', ent_type='none') assert len(sent1) == init_len - 1 assert len(sent2) == init_len2 - 1 @@ -122,5 +124,5 @@ def test_spans_subtree_size_check(en_tokenizer): doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps) sent1 = list(doc.sents)[0] init_len = len(list(sent1.root.subtree)) - doc[0:2].merge('none', 'none', 'none') - assert len(list(sent1.root.subtree)) == init_len - 1 \ No newline at end of file + doc[0:2].merge(label='none', lemma='none', ent_type='none') + assert len(list(sent1.root.subtree)) == init_len - 1 diff --git a/spacy/tests/doc/test_token_api.py b/spacy/tests/doc/test_token_api.py index 77a15fd43..ec5c32a79 100644 --- a/spacy/tests/doc/test_token_api.py +++ b/spacy/tests/doc/test_token_api.py @@ -161,7 +161,7 @@ def test_is_sent_start(en_tokenizer): doc[5].is_sent_start = True assert doc[5].is_sent_start is True # Backwards compatibility - assert doc[0].sent_start is False + with pytest.warns(DeprecationWarning): + assert doc[0].sent_start is False doc.is_parsed = True assert len(list(doc.sents)) == 2 - diff --git a/spacy/tests/spans/__init__.py b/spacy/tests/spans/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index ef31a5d5c..3996819ff 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -62,7 +62,11 @@ cdef class Tokenizer: return (self.__class__, args, None, None) cpdef Doc tokens_from_list(self, list strings): - # TODO: deprecation warning + util.deprecated( + "Tokenizer.from_from list is now deprecated. Create a new Doc " + "object instead and pass in the strings as the `words` keyword " + "argument, for example:\nfrom spacy.tokens import Doc\n" + "doc = Doc(nlp.vocab, words=[...])") return Doc(self.vocab, words=strings) @cython.boundscheck(False) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index fb15323f5..4c3dfc49f 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -842,17 +842,20 @@ cdef class Doc: """ cdef unicode tag, lemma, ent_type if len(args) == 3: - # TODO: Warn deprecation + util.deprecated( + "Positional arguments to Doc.merge are deprecated. Instead, " + "use the keyword arguments, for example tag=, lemma= or " + "ent_type=.") tag, lemma, ent_type = args attributes[TAG] = tag attributes[LEMMA] = lemma attributes[ENT_TYPE] = ent_type elif not args: - if "label" in attributes and 'ent_type' not in attributes: - if isinstance(attributes["label"], int): - attributes[ENT_TYPE] = attributes["label"] + if 'label' in attributes and 'ent_type' not in attributes: + if isinstance(attributes['label'], int): + attributes[ENT_TYPE] = attributes['label'] else: - attributes[ENT_TYPE] = self.vocab.strings[attributes["label"]] + attributes[ENT_TYPE] = self.vocab.strings[attributes['label']] if 'ent_type' in attributes: attributes[ENT_TYPE] = attributes['ent_type'] elif args: diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index af88872fb..3253fa738 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -19,6 +19,7 @@ from ..attrs cimport IS_OOV, IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL from ..attrs cimport IS_STOP, ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX from ..attrs cimport LENGTH, CLUSTER, LEMMA, POS, TAG, DEP from ..compat import is_config +from .. import util from .. import about from .underscore import Underscore @@ -330,8 +331,14 @@ cdef class Token: return self.c.r_kids property sent_start: - # TODO deprecation warning def __get__(self): + util.deprecated( + "Token.sent_start is now deprecated. Use Token.is_sent_start " + "instead, which returns a boolean value or None if the answer " + "is unknown – instead of a misleading 0 for False and 1 for " + "True. It also fixes a quirk in the old logic that would " + "always set the property to 0 for the first word of the " + "document.") # Handle broken backwards compatibility case: doc[0].sent_start # was False. if self.i == 0: diff --git a/spacy/util.py b/spacy/util.py index 3fbd22aaf..460954b8f 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -11,6 +11,8 @@ import sys import textwrap import random from collections import OrderedDict +import inspect +import warnings from thinc.neural._classes.model import Model import functools @@ -501,6 +503,18 @@ def from_disk(path, readers, exclude): return path +def deprecated(message, filter='always'): + """Show a deprecation warning. + + message (unicode): The message to display. + filter (unicode): Filter value. + """ + stack = inspect.stack()[-1] + with warnings.catch_warnings(): + warnings.simplefilter(filter, DeprecationWarning) + warnings.warn_explicit(message, DeprecationWarning, stack[1], stack[2]) + + def print_table(data, title=None): """Print data in table format. diff --git a/website/assets/js/models.js b/website/assets/js/models.js index 8df49acc2..2d371ee1f 100644 --- a/website/assets/js/models.js +++ b/website/assets/js/models.js @@ -108,6 +108,7 @@ export class ModelLoader { this.renderCompat(tpl, modelId); tpl.get('download').setAttribute('href', `${this.repo}/releases/tag/${model}`); tpl.get('table').removeAttribute('data-loading'); + tpl.get('error').style.display = 'none'; } renderDetails(tpl, { version, size, description, notes, author, url, @@ -241,7 +242,7 @@ export class ModelComparer { } showError(err) { - console.error(err); + console.error(err || 'Error'); this.tpl.get('result').style.display = 'none'; this.tpl.get('error').style.display = 'block'; } @@ -286,6 +287,8 @@ export class ModelComparer { this.chart.update(); [model1, model2].forEach((model, i) => this.renderTable(metaKeys, i + 1, model)); this.tpl.get('result').removeAttribute('data-loading'); + this.tpl.get('error').style.display = 'none'; + this.tpl.get('result').style.display = 'block'; } renderTable(metaKeys, i, { lang, name, version, size, description, diff --git a/website/usage/_v2/_incompat.jade b/website/usage/_v2/_incompat.jade index e7546e73c..daaa6141b 100644 --- a/website/usage/_v2/_incompat.jade +++ b/website/usage/_v2/_incompat.jade @@ -1,6 +1,14 @@ //- 💫 DOCS > USAGE > WHAT'S NEW IN V2.0 > BACKWARDS INCOMPATIBILITIES +p + | The following modules, classes and methods have changed between v1.x + | and v2.0. + +table(["Old", "New"]) + +row + +cell #[code spacy.download.en], #[code spacy.download.de] + +cell #[+api("cli#download") #[code cli.download]] + +row +cell | #[code spacy.en] etc. @@ -136,6 +144,17 @@ +cell #[code Token.is_ancestor_of] +cell #[+api("token#is_ancestor") #[code Token.is_ancestor]] ++h(3, "deprecated") Deprecated + +p + | The following methods are deprecated. They can still be used, + | but should be replaced. + ++table(["Old", "New"]) + +row + +cell #[code Tokenizer.tokens_from_list] + +cell #[+api("doc") #[code Doc]] + +row +cell #[code Span.sent_start] +cell #[+api("span#is_sent_start") #[code Span.is_sent_start]]