mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-26 21:51:24 +03:00 
			
		
		
		
	Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
		
						commit
						2ef7b59eb0
					
				|  | @ -8,8 +8,14 @@ from . import util | |||
| 
 | ||||
| 
 | ||||
| def load(name, **overrides): | ||||
|     from .deprecated import resolve_load_name | ||||
|     name = resolve_load_name(name, **overrides) | ||||
|     depr_path = overrides.get('path') | ||||
|     if depr_path not in (True, False, None): | ||||
|         util.deprecated( | ||||
|             "As of spaCy v2.0, the keyword argument `path=` is deprecated. " | ||||
|             "You can now call spacy.load with the path as its first argument, " | ||||
|             "and the model's meta.json will be used to determine the language " | ||||
|             "to load. For example:\nnlp = spacy.load('{}')".format(depr_path), | ||||
|             'error') | ||||
|     return util.load_model(name, **overrides) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,40 +1,4 @@ | |||
| # coding: utf8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| from .util import prints | ||||
| from .cli import download | ||||
| from . import about | ||||
| 
 | ||||
| 
 | ||||
| PRON_LEMMA = "-PRON-" | ||||
| 
 | ||||
| 
 | ||||
| def depr_model_download(lang): | ||||
|     """Replace en/de download modules within, warn and ownload default models. | ||||
| 
 | ||||
|     lang (unicode): Language shortcut, 'en' or 'de'. | ||||
|     """ | ||||
|     prints("The spacy.%s.download command is now deprecated. Please use " | ||||
|            "spacy download [model name or shortcut] instead. For " | ||||
|            "more info, see the documentation:" % lang, | ||||
|            about.__docs_models__, | ||||
|            "Downloading default '%s' model now..." % lang, | ||||
|            title="Warning: deprecated command") | ||||
|     download(lang) | ||||
| 
 | ||||
| 
 | ||||
| def resolve_load_name(name, **overrides): | ||||
|     """Resolve model loading if deprecated path kwarg in overrides. | ||||
| 
 | ||||
|     name (unicode): Name of model to load. | ||||
|     **overrides: Overrides specified in spacy.load(). | ||||
|     RETURNS: Model name or value of path kwarg. | ||||
|     """ | ||||
|     if overrides.get('path') not in (None, False, True): | ||||
|         name = overrides.get('path') | ||||
|         prints("To load a model from a path, you can now use the first " | ||||
|                "argument. The model meta is used to load the Language class.", | ||||
|                "OLD: spacy.load('en', path='/some/path')", | ||||
|                "NEW: spacy.load('/some/path')", | ||||
|                title="Warning: deprecated argument 'path'") | ||||
|     return name | ||||
|  |  | |||
|  | @ -1,8 +0,0 @@ | |||
| # coding: utf8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| from ..deprecated import depr_model_download | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     depr_model_download('de') | ||||
|  | @ -1,8 +0,0 @@ | |||
| # coding: utf8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| from ..deprecated import depr_model_download | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     depr_model_download('en') | ||||
|  | @ -127,7 +127,8 @@ def test_doc_api_merge(en_tokenizer): | |||
|     # merge 'The Beach Boys' | ||||
|     doc = en_tokenizer(text) | ||||
|     assert len(doc) == 9 | ||||
|     doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), 'NAMED', 'LEMMA', 'TYPE') | ||||
|     doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), tag='NAMED', lemma='LEMMA', | ||||
|               ent_type='TYPE') | ||||
|     assert len(doc) == 7 | ||||
|     assert doc[4].text == 'the beach boys' | ||||
|     assert doc[4].text_with_ws == 'the beach boys ' | ||||
|  | @ -136,7 +137,8 @@ def test_doc_api_merge(en_tokenizer): | |||
|     # merge 'all night' | ||||
|     doc = en_tokenizer(text) | ||||
|     assert len(doc) == 9 | ||||
|     doc.merge(doc[7].idx, doc[8].idx + len(doc[8]), 'NAMED', 'LEMMA', 'TYPE') | ||||
|     doc.merge(doc[7].idx, doc[8].idx + len(doc[8]), tag='NAMED', lemma='LEMMA', | ||||
|               ent_type='TYPE') | ||||
|     assert len(doc) == 8 | ||||
|     assert doc[7].text == 'all night' | ||||
|     assert doc[7].text_with_ws == 'all night' | ||||
|  | @ -147,7 +149,8 @@ def test_doc_api_merge_children(en_tokenizer): | |||
|     text = "WKRO played songs by the beach boys all night" | ||||
|     doc = en_tokenizer(text) | ||||
|     assert len(doc) == 9 | ||||
|     doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), 'NAMED', 'LEMMA', 'TYPE') | ||||
|     doc.merge(doc[4].idx, doc[6].idx + len(doc[6]), tag='NAMED', lemma='LEMMA', | ||||
|               ent_type='TYPE') | ||||
| 
 | ||||
|     for word in doc: | ||||
|         if word.i < word.head.i: | ||||
|  | @ -159,8 +162,8 @@ def test_doc_api_merge_children(en_tokenizer): | |||
| def test_doc_api_merge_hang(en_tokenizer): | ||||
|     text = "through North and South Carolina" | ||||
|     doc = en_tokenizer(text) | ||||
|     doc.merge(18, 32, '', '', 'ORG') | ||||
|     doc.merge(8, 32, '', '', 'ORG') | ||||
|     doc.merge(18, 32, tag='', lemma='', ent_type='ORG') | ||||
|     doc.merge(8, 32, tag='', lemma='', ent_type='ORG') | ||||
| 
 | ||||
| 
 | ||||
| def test_doc_api_sents_empty_string(en_tokenizer): | ||||
|  | @ -188,7 +191,8 @@ def test_doc_api_runtime_error(en_tokenizer): | |||
|         if len(np) > 1: | ||||
|             nps.append((np.start_char, np.end_char, np.root.tag_, np.text, np.root.ent_type_)) | ||||
|     for np in nps: | ||||
|         doc.merge(*np) | ||||
|         start, end, tag, lemma, ent_type = np | ||||
|         doc.merge(start, end, tag=tag, lemma=lemma, ent_type=ent_type) | ||||
| 
 | ||||
| 
 | ||||
| def test_doc_api_right_edge(en_tokenizer): | ||||
|  |  | |||
|  | @ -161,7 +161,7 @@ def test_is_sent_start(en_tokenizer): | |||
|     doc[5].is_sent_start = True | ||||
|     assert doc[5].is_sent_start is True | ||||
|     # Backwards compatibility | ||||
|     with pytest.warns(DeprecationWarning): | ||||
|         assert doc[0].sent_start is False | ||||
|     doc.is_parsed = True | ||||
|     assert len(list(doc.sents)) == 2 | ||||
| 
 | ||||
|  |  | |||
|  | @ -14,7 +14,7 @@ def test_spans_merge_tokens(en_tokenizer): | |||
|     assert len(doc) == 4 | ||||
|     assert doc[0].head.text == 'Angeles' | ||||
|     assert doc[1].head.text == 'start' | ||||
|     doc.merge(0, len('Los Angeles'), 'NNP', 'Los Angeles', 'GPE') | ||||
|     doc.merge(0, len('Los Angeles'), tag='NNP', lemma='Los Angeles', ent_type='GPE') | ||||
|     assert len(doc) == 3 | ||||
|     assert doc[0].text == 'Los Angeles' | ||||
|     assert doc[0].head.text == 'start' | ||||
|  | @ -36,7 +36,8 @@ def test_spans_merge_heads(en_tokenizer): | |||
|     doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads) | ||||
| 
 | ||||
|     assert len(doc) == 8 | ||||
|     doc.merge(doc[3].idx, doc[4].idx + len(doc[4]), doc[4].tag_, 'pilates class', 'O') | ||||
|     doc.merge(doc[3].idx, doc[4].idx + len(doc[4]), tag=doc[4].tag_, | ||||
|               lemma='pilates class', ent_type='O') | ||||
|     assert len(doc) == 7 | ||||
|     assert doc[0].head.i == 1 | ||||
|     assert doc[1].head.i == 1 | ||||
|  | @ -53,7 +54,8 @@ def test_span_np_merges(en_tokenizer): | |||
|     doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads) | ||||
| 
 | ||||
|     assert doc[4].head.i == 1 | ||||
|     doc.merge(doc[2].idx, doc[4].idx + len(doc[4]), 'NP', 'tool', 'O') | ||||
|     doc.merge(doc[2].idx, doc[4].idx + len(doc[4]), tag='NP', lemma='tool', | ||||
|               ent_type='O') | ||||
|     assert doc[2].head.i == 1 | ||||
| 
 | ||||
|     text = "displaCy is a lightweight and modern dependency parse tree visualization tool built with CSS3 and JavaScript." | ||||
|  | @ -63,7 +65,7 @@ def test_span_np_merges(en_tokenizer): | |||
| 
 | ||||
|     ents = [(e[0].idx, e[-1].idx + len(e[-1]), e.label_, e.lemma_) for e in doc.ents] | ||||
|     for start, end, label, lemma in ents: | ||||
|         merged = doc.merge(start, end, label, lemma, label) | ||||
|         merged = doc.merge(start, end, tag=label, lemma=lemma, ent_type=label) | ||||
|         assert merged != None, (start, end, label, lemma) | ||||
| 
 | ||||
| 
 | ||||
|  | @ -88,7 +90,7 @@ def test_spans_entity_merge(en_tokenizer): | |||
|     assert len(doc) == 17 | ||||
|     for ent in doc.ents: | ||||
|         label, lemma, type_ = (ent.root.tag_, ent.root.lemma_, max(w.ent_type_ for w in ent)) | ||||
|         ent.merge(label, lemma, type_) | ||||
|         ent.merge(label=label, lemma=lemma, ent_type=type_) | ||||
|     # check looping is ok | ||||
|     assert len(doc) == 15 | ||||
| 
 | ||||
|  | @ -105,8 +107,8 @@ def test_spans_sentence_update_after_merge(en_tokenizer): | |||
|     sent1, sent2 = list(doc.sents) | ||||
|     init_len = len(sent1) | ||||
|     init_len2 = len(sent2) | ||||
|     doc[0:2].merge('none', 'none', 'none') | ||||
|     doc[-2:].merge('none', 'none', 'none') | ||||
|     doc[0:2].merge(label='none', lemma='none', ent_type='none') | ||||
|     doc[-2:].merge(label='none', lemma='none', ent_type='none') | ||||
|     assert len(sent1) == init_len - 1 | ||||
|     assert len(sent2) == init_len2 - 1 | ||||
| 
 | ||||
|  | @ -122,5 +124,5 @@ def test_spans_subtree_size_check(en_tokenizer): | |||
|     doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps) | ||||
|     sent1 = list(doc.sents)[0] | ||||
|     init_len = len(list(sent1.root.subtree)) | ||||
|     doc[0:2].merge('none', 'none', 'none') | ||||
|     doc[0:2].merge(label='none', lemma='none', ent_type='none') | ||||
|     assert len(list(sent1.root.subtree)) == init_len - 1 | ||||
|  | @ -62,7 +62,11 @@ cdef class Tokenizer: | |||
|         return (self.__class__, args, None, None) | ||||
| 
 | ||||
|     cpdef Doc tokens_from_list(self, list strings): | ||||
|         # TODO: deprecation warning | ||||
|         util.deprecated( | ||||
|             "Tokenizer.from_from list is now deprecated. Create a new Doc " | ||||
|             "object instead and pass in the strings as the `words` keyword " | ||||
|             "argument, for example:\nfrom spacy.tokens import Doc\n" | ||||
|             "doc = Doc(nlp.vocab, words=[...])") | ||||
|         return Doc(self.vocab, words=strings) | ||||
| 
 | ||||
|     @cython.boundscheck(False) | ||||
|  |  | |||
|  | @ -842,17 +842,20 @@ cdef class Doc: | |||
|         """ | ||||
|         cdef unicode tag, lemma, ent_type | ||||
|         if len(args) == 3: | ||||
|             # TODO: Warn deprecation | ||||
|             util.deprecated( | ||||
|                 "Positional arguments to Doc.merge are deprecated. Instead, " | ||||
|                 "use the keyword arguments, for example tag=, lemma= or " | ||||
|                 "ent_type=.") | ||||
|             tag, lemma, ent_type = args | ||||
|             attributes[TAG] = tag | ||||
|             attributes[LEMMA] = lemma | ||||
|             attributes[ENT_TYPE] = ent_type | ||||
|         elif not args: | ||||
|             if "label" in attributes and 'ent_type' not in attributes: | ||||
|                 if isinstance(attributes["label"], int): | ||||
|                     attributes[ENT_TYPE] = attributes["label"] | ||||
|             if 'label' in attributes and 'ent_type' not in attributes: | ||||
|                 if isinstance(attributes['label'], int): | ||||
|                     attributes[ENT_TYPE] = attributes['label'] | ||||
|                 else: | ||||
|                     attributes[ENT_TYPE] = self.vocab.strings[attributes["label"]] | ||||
|                     attributes[ENT_TYPE] = self.vocab.strings[attributes['label']] | ||||
|             if 'ent_type' in attributes: | ||||
|                 attributes[ENT_TYPE] = attributes['ent_type'] | ||||
|         elif args: | ||||
|  |  | |||
|  | @ -19,6 +19,7 @@ from ..attrs cimport IS_OOV, IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL | |||
| from ..attrs cimport IS_STOP, ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX | ||||
| from ..attrs cimport LENGTH, CLUSTER, LEMMA, POS, TAG, DEP | ||||
| from ..compat import is_config | ||||
| from .. import util | ||||
| from .. import about | ||||
| from .underscore import Underscore | ||||
| 
 | ||||
|  | @ -330,8 +331,14 @@ cdef class Token: | |||
|             return self.c.r_kids | ||||
| 
 | ||||
|     property sent_start: | ||||
|         # TODO deprecation warning | ||||
|         def __get__(self): | ||||
|             util.deprecated( | ||||
|                 "Token.sent_start is now deprecated. Use Token.is_sent_start " | ||||
|                 "instead, which returns a boolean value or None if the answer " | ||||
|                 "is unknown – instead of a misleading 0 for False and 1 for " | ||||
|                 "True. It also fixes a quirk in the old logic that would " | ||||
|                 "always set the property to 0 for the first word of the " | ||||
|                 "document.") | ||||
|             # Handle broken backwards compatibility case: doc[0].sent_start | ||||
|             # was False. | ||||
|             if self.i == 0: | ||||
|  |  | |||
|  | @ -11,6 +11,8 @@ import sys | |||
| import textwrap | ||||
| import random | ||||
| from collections import OrderedDict | ||||
| import inspect | ||||
| import warnings | ||||
| from thinc.neural._classes.model import Model | ||||
| import functools | ||||
| 
 | ||||
|  | @ -501,6 +503,18 @@ def from_disk(path, readers, exclude): | |||
|     return path | ||||
| 
 | ||||
| 
 | ||||
| def deprecated(message, filter='always'): | ||||
|     """Show a deprecation warning. | ||||
| 
 | ||||
|     message (unicode): The message to display. | ||||
|     filter (unicode): Filter value. | ||||
|     """ | ||||
|     stack = inspect.stack()[-1] | ||||
|     with warnings.catch_warnings(): | ||||
|         warnings.simplefilter(filter, DeprecationWarning) | ||||
|         warnings.warn_explicit(message, DeprecationWarning, stack[1], stack[2]) | ||||
| 
 | ||||
| 
 | ||||
| def print_table(data, title=None): | ||||
|     """Print data in table format. | ||||
| 
 | ||||
|  |  | |||
|  | @ -108,6 +108,7 @@ export class ModelLoader { | |||
|         this.renderCompat(tpl, modelId); | ||||
|         tpl.get('download').setAttribute('href', `${this.repo}/releases/tag/${model}`); | ||||
|         tpl.get('table').removeAttribute('data-loading'); | ||||
|         tpl.get('error').style.display = 'none'; | ||||
|     } | ||||
| 
 | ||||
|     renderDetails(tpl, { version, size, description, notes, author, url, | ||||
|  | @ -241,7 +242,7 @@ export class ModelComparer { | |||
|     } | ||||
| 
 | ||||
|     showError(err) { | ||||
|         console.error(err); | ||||
|         console.error(err || 'Error'); | ||||
|         this.tpl.get('result').style.display = 'none'; | ||||
|         this.tpl.get('error').style.display = 'block'; | ||||
|     } | ||||
|  | @ -286,6 +287,8 @@ export class ModelComparer { | |||
|         this.chart.update(); | ||||
|         [model1, model2].forEach((model, i) => this.renderTable(metaKeys, i + 1, model)); | ||||
|         this.tpl.get('result').removeAttribute('data-loading'); | ||||
|         this.tpl.get('error').style.display = 'none'; | ||||
|         this.tpl.get('result').style.display = 'block'; | ||||
|     } | ||||
| 
 | ||||
|     renderTable(metaKeys, i, { lang, name, version, size, description, | ||||
|  |  | |||
|  | @ -1,6 +1,14 @@ | |||
| //- 💫 DOCS > USAGE > WHAT'S NEW IN V2.0 > BACKWARDS INCOMPATIBILITIES | ||||
| 
 | ||||
| p | ||||
|     |  The following modules, classes and methods have changed between v1.x | ||||
|     |  and v2.0. | ||||
| 
 | ||||
| +table(["Old", "New"]) | ||||
|     +row | ||||
|         +cell #[code spacy.download.en], #[code spacy.download.de] | ||||
|         +cell #[+api("cli#download") #[code cli.download]] | ||||
| 
 | ||||
|     +row | ||||
|         +cell | ||||
|             |  #[code spacy.en] etc. | ||||
|  | @ -136,6 +144,17 @@ | |||
|         +cell #[code Token.is_ancestor_of] | ||||
|         +cell #[+api("token#is_ancestor") #[code Token.is_ancestor]] | ||||
| 
 | ||||
| +h(3, "deprecated") Deprecated | ||||
| 
 | ||||
| p | ||||
|     |  The following methods are deprecated. They can still be used, | ||||
|     |  but should be replaced. | ||||
| 
 | ||||
| +table(["Old", "New"]) | ||||
|     +row | ||||
|         +cell #[code Tokenizer.tokens_from_list] | ||||
|         +cell #[+api("doc") #[code Doc]] | ||||
| 
 | ||||
|     +row | ||||
|         +cell #[code Span.sent_start] | ||||
|         +cell #[+api("span#is_sent_start") #[code Span.is_sent_start]] | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user