mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
1ae40b50b4
|
@ -8,8 +8,14 @@ from . import util
|
|||
|
||||
|
||||
def load(name, **overrides):
|
||||
from .deprecated import resolve_load_name
|
||||
name = resolve_load_name(name, **overrides)
|
||||
depr_path = overrides.get('path')
|
||||
if depr_path not in (True, False, None):
|
||||
util.deprecated(
|
||||
"As of spaCy v2.0, the keyword argument `path=` is deprecated. "
|
||||
"You can now call spacy.load with the path as its first argument, "
|
||||
"and the model's meta.json will be used to determine the language "
|
||||
"to load. For example:\nnlp = spacy.load('{}')".format(depr_path),
|
||||
'error')
|
||||
return util.load_model(name, **overrides)
|
||||
|
||||
|
||||
|
|
|
@ -1,40 +1,4 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .util import prints
|
||||
from .cli import download
|
||||
from . import about
|
||||
|
||||
|
||||
PRON_LEMMA = "-PRON-"
|
||||
|
||||
|
||||
def depr_model_download(lang):
|
||||
"""Replace en/de download modules within, warn and ownload default models.
|
||||
|
||||
lang (unicode): Language shortcut, 'en' or 'de'.
|
||||
"""
|
||||
prints("The spacy.%s.download command is now deprecated. Please use "
|
||||
"spacy download [model name or shortcut] instead. For "
|
||||
"more info, see the documentation:" % lang,
|
||||
about.__docs_models__,
|
||||
"Downloading default '%s' model now..." % lang,
|
||||
title="Warning: deprecated command")
|
||||
download(lang)
|
||||
|
||||
|
||||
def resolve_load_name(name, **overrides):
|
||||
"""Resolve model loading if deprecated path kwarg in overrides.
|
||||
|
||||
name (unicode): Name of model to load.
|
||||
**overrides: Overrides specified in spacy.load().
|
||||
RETURNS: Model name or value of path kwarg.
|
||||
"""
|
||||
if overrides.get('path') not in (None, False, True):
|
||||
name = overrides.get('path')
|
||||
prints("To load a model from a path, you can now use the first "
|
||||
"argument. The model meta is used to load the Language class.",
|
||||
"OLD: spacy.load('en', path='/some/path')",
|
||||
"NEW: spacy.load('/some/path')",
|
||||
title="Warning: deprecated argument 'path'")
|
||||
return name
|
||||
|
|
|
@ -1,8 +0,0 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..deprecated import depr_model_download
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
depr_model_download('de')
|
|
@ -1,8 +0,0 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..deprecated import depr_model_download
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
depr_model_download('en')
|
|
@ -127,7 +127,8 @@ def test_doc_api_merge(en_tokenizer):
|
|||
# merge 'The Beach Boys'
|
||||
doc = en_tokenizer(text)
|
||||
assert len(doc) == 9
|
||||
doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), 'NAMED', 'LEMMA', 'TYPE')
|
||||
doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), tag='NAMED', lemma='LEMMA',
|
||||
ent_type='TYPE')
|
||||
assert len(doc) == 7
|
||||
assert doc[4].text == 'the beach boys'
|
||||
assert doc[4].text_with_ws == 'the beach boys '
|
||||
|
@ -136,7 +137,8 @@ def test_doc_api_merge(en_tokenizer):
|
|||
# merge 'all night'
|
||||
doc = en_tokenizer(text)
|
||||
assert len(doc) == 9
|
||||
doc.merge(doc[7].idx, doc[8].idx + len(doc[8]), 'NAMED', 'LEMMA', 'TYPE')
|
||||
doc.merge(doc[7].idx, doc[8].idx + len(doc[8]), tag='NAMED', lemma='LEMMA',
|
||||
ent_type='TYPE')
|
||||
assert len(doc) == 8
|
||||
assert doc[7].text == 'all night'
|
||||
assert doc[7].text_with_ws == 'all night'
|
||||
|
@ -147,7 +149,8 @@ def test_doc_api_merge_children(en_tokenizer):
|
|||
text = "WKRO played songs by the beach boys all night"
|
||||
doc = en_tokenizer(text)
|
||||
assert len(doc) == 9
|
||||
doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), 'NAMED', 'LEMMA', 'TYPE')
|
||||
doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), tag='NAMED', lemma='LEMMA',
|
||||
ent_type='TYPE')
|
||||
|
||||
for word in doc:
|
||||
if word.i < word.head.i:
|
||||
|
@ -159,8 +162,8 @@ def test_doc_api_merge_children(en_tokenizer):
|
|||
def test_doc_api_merge_hang(en_tokenizer):
|
||||
text = "through North and South Carolina"
|
||||
doc = en_tokenizer(text)
|
||||
doc.merge(18, 32, '', '', 'ORG')
|
||||
doc.merge(8, 32, '', '', 'ORG')
|
||||
doc.merge(18, 32, tag='', lemma='', ent_type='ORG')
|
||||
doc.merge(8, 32, tag='', lemma='', ent_type='ORG')
|
||||
|
||||
|
||||
def test_doc_api_sents_empty_string(en_tokenizer):
|
||||
|
@ -188,7 +191,8 @@ def test_doc_api_runtime_error(en_tokenizer):
|
|||
if len(np) > 1:
|
||||
nps.append((np.start_char, np.end_char, np.root.tag_, np.text, np.root.ent_type_))
|
||||
for np in nps:
|
||||
doc.merge(*np)
|
||||
start, end, tag, lemma, ent_type = np
|
||||
doc.merge(start, end, tag=tag, lemma=lemma, ent_type=ent_type)
|
||||
|
||||
|
||||
def test_doc_api_right_edge(en_tokenizer):
|
||||
|
|
|
@ -14,7 +14,7 @@ def test_spans_merge_tokens(en_tokenizer):
|
|||
assert len(doc) == 4
|
||||
assert doc[0].head.text == 'Angeles'
|
||||
assert doc[1].head.text == 'start'
|
||||
doc.merge(0, len('Los Angeles'), 'NNP', 'Los Angeles', 'GPE')
|
||||
doc.merge(0, len('Los Angeles'), tag='NNP', lemma='Los Angeles', ent_type='GPE')
|
||||
assert len(doc) == 3
|
||||
assert doc[0].text == 'Los Angeles'
|
||||
assert doc[0].head.text == 'start'
|
||||
|
@ -36,7 +36,8 @@ def test_spans_merge_heads(en_tokenizer):
|
|||
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
|
||||
|
||||
assert len(doc) == 8
|
||||
doc.merge(doc[3].idx, doc[4].idx + len(doc[4]), doc[4].tag_, 'pilates class', 'O')
|
||||
doc.merge(doc[3].idx, doc[4].idx + len(doc[4]), tag=doc[4].tag_,
|
||||
lemma='pilates class', ent_type='O')
|
||||
assert len(doc) == 7
|
||||
assert doc[0].head.i == 1
|
||||
assert doc[1].head.i == 1
|
||||
|
@ -53,7 +54,8 @@ def test_span_np_merges(en_tokenizer):
|
|||
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
|
||||
|
||||
assert doc[4].head.i == 1
|
||||
doc.merge(doc[2].idx, doc[4].idx + len(doc[4]), 'NP', 'tool', 'O')
|
||||
doc.merge(doc[2].idx, doc[4].idx + len(doc[4]), tag='NP', lemma='tool',
|
||||
ent_type='O')
|
||||
assert doc[2].head.i == 1
|
||||
|
||||
text = "displaCy is a lightweight and modern dependency parse tree visualization tool built with CSS3 and JavaScript."
|
||||
|
@ -63,7 +65,7 @@ def test_span_np_merges(en_tokenizer):
|
|||
|
||||
ents = [(e[0].idx, e[-1].idx + len(e[-1]), e.label_, e.lemma_) for e in doc.ents]
|
||||
for start, end, label, lemma in ents:
|
||||
merged = doc.merge(start, end, label, lemma, label)
|
||||
merged = doc.merge(start, end, tag=label, lemma=lemma, ent_type=label)
|
||||
assert merged != None, (start, end, label, lemma)
|
||||
|
||||
|
||||
|
@ -88,7 +90,7 @@ def test_spans_entity_merge(en_tokenizer):
|
|||
assert len(doc) == 17
|
||||
for ent in doc.ents:
|
||||
label, lemma, type_ = (ent.root.tag_, ent.root.lemma_, max(w.ent_type_ for w in ent))
|
||||
ent.merge(label, lemma, type_)
|
||||
ent.merge(label=label, lemma=lemma, ent_type=type_)
|
||||
# check looping is ok
|
||||
assert len(doc) == 15
|
||||
|
||||
|
@ -105,8 +107,8 @@ def test_spans_sentence_update_after_merge(en_tokenizer):
|
|||
sent1, sent2 = list(doc.sents)
|
||||
init_len = len(sent1)
|
||||
init_len2 = len(sent2)
|
||||
doc[0:2].merge('none', 'none', 'none')
|
||||
doc[-2:].merge('none', 'none', 'none')
|
||||
doc[0:2].merge(label='none', lemma='none', ent_type='none')
|
||||
doc[-2:].merge(label='none', lemma='none', ent_type='none')
|
||||
assert len(sent1) == init_len - 1
|
||||
assert len(sent2) == init_len2 - 1
|
||||
|
||||
|
@ -122,5 +124,5 @@ def test_spans_subtree_size_check(en_tokenizer):
|
|||
doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps)
|
||||
sent1 = list(doc.sents)[0]
|
||||
init_len = len(list(sent1.root.subtree))
|
||||
doc[0:2].merge('none', 'none', 'none')
|
||||
assert len(list(sent1.root.subtree)) == init_len - 1
|
||||
doc[0:2].merge(label='none', lemma='none', ent_type='none')
|
||||
assert len(list(sent1.root.subtree)) == init_len - 1
|
|
@ -161,7 +161,7 @@ def test_is_sent_start(en_tokenizer):
|
|||
doc[5].is_sent_start = True
|
||||
assert doc[5].is_sent_start is True
|
||||
# Backwards compatibility
|
||||
assert doc[0].sent_start is False
|
||||
with pytest.warns(DeprecationWarning):
|
||||
assert doc[0].sent_start is False
|
||||
doc.is_parsed = True
|
||||
assert len(list(doc.sents)) == 2
|
||||
|
||||
|
|
|
@ -62,7 +62,11 @@ cdef class Tokenizer:
|
|||
return (self.__class__, args, None, None)
|
||||
|
||||
cpdef Doc tokens_from_list(self, list strings):
|
||||
# TODO: deprecation warning
|
||||
util.deprecated(
|
||||
"Tokenizer.from_from list is now deprecated. Create a new Doc "
|
||||
"object instead and pass in the strings as the `words` keyword "
|
||||
"argument, for example:\nfrom spacy.tokens import Doc\n"
|
||||
"doc = Doc(nlp.vocab, words=[...])")
|
||||
return Doc(self.vocab, words=strings)
|
||||
|
||||
@cython.boundscheck(False)
|
||||
|
|
|
@ -842,17 +842,20 @@ cdef class Doc:
|
|||
"""
|
||||
cdef unicode tag, lemma, ent_type
|
||||
if len(args) == 3:
|
||||
# TODO: Warn deprecation
|
||||
util.deprecated(
|
||||
"Positional arguments to Doc.merge are deprecated. Instead, "
|
||||
"use the keyword arguments, for example tag=, lemma= or "
|
||||
"ent_type=.")
|
||||
tag, lemma, ent_type = args
|
||||
attributes[TAG] = tag
|
||||
attributes[LEMMA] = lemma
|
||||
attributes[ENT_TYPE] = ent_type
|
||||
elif not args:
|
||||
if "label" in attributes and 'ent_type' not in attributes:
|
||||
if isinstance(attributes["label"], int):
|
||||
attributes[ENT_TYPE] = attributes["label"]
|
||||
if 'label' in attributes and 'ent_type' not in attributes:
|
||||
if isinstance(attributes['label'], int):
|
||||
attributes[ENT_TYPE] = attributes['label']
|
||||
else:
|
||||
attributes[ENT_TYPE] = self.vocab.strings[attributes["label"]]
|
||||
attributes[ENT_TYPE] = self.vocab.strings[attributes['label']]
|
||||
if 'ent_type' in attributes:
|
||||
attributes[ENT_TYPE] = attributes['ent_type']
|
||||
elif args:
|
||||
|
|
|
@ -19,6 +19,7 @@ from ..attrs cimport IS_OOV, IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL
|
|||
from ..attrs cimport IS_STOP, ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX
|
||||
from ..attrs cimport LENGTH, CLUSTER, LEMMA, POS, TAG, DEP
|
||||
from ..compat import is_config
|
||||
from .. import util
|
||||
from .. import about
|
||||
from .underscore import Underscore
|
||||
|
||||
|
@ -330,8 +331,14 @@ cdef class Token:
|
|||
return self.c.r_kids
|
||||
|
||||
property sent_start:
|
||||
# TODO deprecation warning
|
||||
def __get__(self):
|
||||
util.deprecated(
|
||||
"Token.sent_start is now deprecated. Use Token.is_sent_start "
|
||||
"instead, which returns a boolean value or None if the answer "
|
||||
"is unknown – instead of a misleading 0 for False and 1 for "
|
||||
"True. It also fixes a quirk in the old logic that would "
|
||||
"always set the property to 0 for the first word of the "
|
||||
"document.")
|
||||
# Handle broken backwards compatibility case: doc[0].sent_start
|
||||
# was False.
|
||||
if self.i == 0:
|
||||
|
|
|
@ -11,6 +11,8 @@ import sys
|
|||
import textwrap
|
||||
import random
|
||||
from collections import OrderedDict
|
||||
import inspect
|
||||
import warnings
|
||||
from thinc.neural._classes.model import Model
|
||||
import functools
|
||||
|
||||
|
@ -501,6 +503,18 @@ def from_disk(path, readers, exclude):
|
|||
return path
|
||||
|
||||
|
||||
def deprecated(message, filter='always'):
|
||||
"""Show a deprecation warning.
|
||||
|
||||
message (unicode): The message to display.
|
||||
filter (unicode): Filter value.
|
||||
"""
|
||||
stack = inspect.stack()[-1]
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter(filter, DeprecationWarning)
|
||||
warnings.warn_explicit(message, DeprecationWarning, stack[1], stack[2])
|
||||
|
||||
|
||||
def print_table(data, title=None):
|
||||
"""Print data in table format.
|
||||
|
||||
|
|
|
@ -108,6 +108,7 @@ export class ModelLoader {
|
|||
this.renderCompat(tpl, modelId);
|
||||
tpl.get('download').setAttribute('href', `${this.repo}/releases/tag/${model}`);
|
||||
tpl.get('table').removeAttribute('data-loading');
|
||||
tpl.get('error').style.display = 'none';
|
||||
}
|
||||
|
||||
renderDetails(tpl, { version, size, description, notes, author, url,
|
||||
|
@ -241,7 +242,7 @@ export class ModelComparer {
|
|||
}
|
||||
|
||||
showError(err) {
|
||||
console.error(err);
|
||||
console.error(err || 'Error');
|
||||
this.tpl.get('result').style.display = 'none';
|
||||
this.tpl.get('error').style.display = 'block';
|
||||
}
|
||||
|
@ -286,6 +287,8 @@ export class ModelComparer {
|
|||
this.chart.update();
|
||||
[model1, model2].forEach((model, i) => this.renderTable(metaKeys, i + 1, model));
|
||||
this.tpl.get('result').removeAttribute('data-loading');
|
||||
this.tpl.get('error').style.display = 'none';
|
||||
this.tpl.get('result').style.display = 'block';
|
||||
}
|
||||
|
||||
renderTable(metaKeys, i, { lang, name, version, size, description,
|
||||
|
|
|
@ -1,6 +1,14 @@
|
|||
//- 💫 DOCS > USAGE > WHAT'S NEW IN V2.0 > BACKWARDS INCOMPATIBILITIES
|
||||
|
||||
p
|
||||
| The following modules, classes and methods have changed between v1.x
|
||||
| and v2.0.
|
||||
|
||||
+table(["Old", "New"])
|
||||
+row
|
||||
+cell #[code spacy.download.en], #[code spacy.download.de]
|
||||
+cell #[+api("cli#download") #[code cli.download]]
|
||||
|
||||
+row
|
||||
+cell
|
||||
| #[code spacy.en] etc.
|
||||
|
@ -136,6 +144,17 @@
|
|||
+cell #[code Token.is_ancestor_of]
|
||||
+cell #[+api("token#is_ancestor") #[code Token.is_ancestor]]
|
||||
|
||||
+h(3, "deprecated") Deprecated
|
||||
|
||||
p
|
||||
| The following methods are deprecated. They can still be used,
|
||||
| but should be replaced.
|
||||
|
||||
+table(["Old", "New"])
|
||||
+row
|
||||
+cell #[code Tokenizer.tokens_from_list]
|
||||
+cell #[+api("doc") #[code Doc]]
|
||||
|
||||
+row
|
||||
+cell #[code Span.sent_start]
|
||||
+cell #[+api("span#is_sent_start") #[code Span.is_sent_start]]
|
||||
|
|
Loading…
Reference in New Issue
Block a user