From 16aa092fb5cffb5ec7079951ea0c04cb96733b3e Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 21 Sep 2019 14:37:06 +0200 Subject: [PATCH 01/13] Improve Morphology errors (#4314) * Improve Morphology errors * Also clean up some other errors * Update errors.py --- spacy/errors.py | 7 +++++++ spacy/morphology.pyx | 6 +++--- spacy/syntax/transition_system.pyx | 3 +-- spacy/util.py | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/spacy/errors.py b/spacy/errors.py index b03bd6d23..a6b199a50 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -469,6 +469,13 @@ class Errors(object): "that case.") E166 = ("Can only merge DocBins with the same pre-defined attributes.\n" "Current DocBin: {current}\nOther DocBin: {other}") + E167 = ("Unknown morphological feature: '{feat}' ({feat_id}). This can " + "happen if the tagger was trained with a different set of " + "morphological features. If you're using a pre-trained model, make " + "sure that your models are up to date:\npython -m spacy validate") + E168 = ("Unknown field: {field}") + E169 = ("Can't find module: {module}") + E170 = ("Cannot apply transition {name}: invalid for the current state.") @add_codes diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index 190ca8d00..c146094a9 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -197,7 +197,7 @@ cdef class Morphology: cdef attr_t feature for feature in features: if feature != 0 and feature not in self._feat_map.id2feat: - raise KeyError("Unknown feature: %s" % self.strings[feature]) + raise ValueError(Errors.E167.format(feat=self.strings[feature], feat_id=feature)) cdef MorphAnalysisC tag tag = create_rich_tag(features) cdef hash_t key = self.insert(tag) @@ -531,7 +531,7 @@ cdef attr_t get_field(const MorphAnalysisC* tag, int field_id) nogil: elif field == Field_VerbType: return tag.verb_type else: - raise ValueError("Unknown field: (%d)" % field_id) + raise ValueError(Errors.E168.format(field=field_id)) cdef int check_feature(const MorphAnalysisC* tag, attr_t feature) nogil: @@ -726,7 +726,7 @@ cdef int set_feature(MorphAnalysisC* tag, elif field == Field_VerbType: tag.verb_type = value_ else: - raise ValueError("Unknown feature: %s (%d)" % (FEATURE_NAMES.get(feature), feature)) + raise ValueError(Errors.E167.format(field=FEATURE_NAMES.get(feature), field_id=feature)) FIELDS = { diff --git a/spacy/syntax/transition_system.pyx b/spacy/syntax/transition_system.pyx index 523cd6699..fede704b5 100644 --- a/spacy/syntax/transition_system.pyx +++ b/spacy/syntax/transition_system.pyx @@ -96,8 +96,7 @@ cdef class TransitionSystem: def apply_transition(self, StateClass state, name): if not self.is_valid(state, name): - raise ValueError( - "Cannot apply transition {name}: invalid for the current state.".format(name=name)) + raise ValueError(Errors.E170.format(name=name)) action = self.lookup_transition(name) action.do(state.c, action.label) diff --git a/spacy/util.py b/spacy/util.py index e88d66452..dbe965392 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -136,7 +136,7 @@ def load_language_data(path): def get_module_path(module): if not hasattr(module, "__module__"): - raise ValueError("Can't find module {}".format(repr(module))) + raise ValueError(Errors.E169.format(module=repr(module))) return Path(sys.modules[module.__module__].__file__).parent From 42340740e3a6c7628a3457134db2a682f92cec34 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Tue, 24 Sep 2019 10:47:17 +0200 Subject: [PATCH 02/13] update neuralcoref example (#4317) --- website/meta/universe.json | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/website/meta/universe.json b/website/meta/universe.json index f9dae7ead..32afcfa59 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -432,17 +432,21 @@ { "id": "neuralcoref", "slogan": "State-of-the-art coreference resolution based on neural nets and spaCy", - "description": "This coreference resolution module is based on the super fast [spaCy](https://spacy.io/) parser and uses the neural net scoring model described in [Deep Reinforcement Learning for Mention-Ranking Coreference Models](http://cs.stanford.edu/people/kevclark/resources/clark-manning-emnlp2016-deep.pdf) by Kevin Clark and Christopher D. Manning, EMNLP 2016. With ✨Neuralcoref v2.0, you should now be able to train the coreference resolution system on your own dataset — e.g., another language than English! — **provided you have an annotated dataset**.", + "description": "This coreference resolution module is based on the super fast [spaCy](https://spacy.io/) parser and uses the neural net scoring model described in [Deep Reinforcement Learning for Mention-Ranking Coreference Models](http://cs.stanford.edu/people/kevclark/resources/clark-manning-emnlp2016-deep.pdf) by Kevin Clark and Christopher D. Manning, EMNLP 2016. Since ✨Neuralcoref v2.0, you can train the coreference resolution system on your own dataset — e.g., another language than English! — **provided you have an annotated dataset**. Note that to use neuralcoref with spaCy > 2.1.0, you'll have to install neuralcoref from source.", "github": "huggingface/neuralcoref", "thumb": "https://i.imgur.com/j6FO9O6.jpg", "code_example": [ - "from neuralcoref import Coref", + "import spacy", + "import neuralcoref", "", - "coref = Coref()", - "clusters = coref.one_shot_coref(utterances=u\"She loves him.\", context=u\"My sister has a dog.\")", - "mentions = coref.get_mentions()", - "utterances = coref.get_utterances()", - "resolved_utterance_text = coref.get_resolved_utterances()" + "nlp = spacy.load('en')", + "neuralcoref.add_to_pipe(nlp)", + "doc1 = nlp('My sister has a dog. She loves him.')", + "print(doc1._.coref_clusters)", + "", + "doc2 = nlp('Angela lives in Boston. She is quite happy in that city.')", + "for ent in doc2.ents:", + " print(ent._.coref_cluster)" ], "author": "Hugging Face", "author_links": { From 38de08c7a99d5d8c490223126071afe7dd4f4b67 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 24 Sep 2019 14:31:09 +0200 Subject: [PATCH 03/13] Update README.md [ci skip] --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 104dc2bdf..6bdbc7e46 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ It's commercial open-source software, released under the MIT license. | [Contribute] | How to contribute to the spaCy project and code base. | [spacy 101]: https://spacy.io/usage/spacy-101 -[new in v2.2]: https://spacy.io/usage/v2-1 +[new in v2.1]: https://spacy.io/usage/v2-1 [usage guides]: https://spacy.io/usage/ [api reference]: https://spacy.io/api/ [models]: https://spacy.io/models From 52904b72700a3f301a26563d3f94493bad96a446 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 24 Sep 2019 23:06:24 +0200 Subject: [PATCH 04/13] Raise if on_match is not callable or None --- spacy/errors.py | 2 ++ spacy/matcher/matcher.pyx | 2 ++ spacy/tests/matcher/test_matcher_api.py | 8 ++++++++ 3 files changed, 12 insertions(+) diff --git a/spacy/errors.py b/spacy/errors.py index a6b199a50..02656e0e7 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -476,6 +476,8 @@ class Errors(object): E168 = ("Unknown field: {field}") E169 = ("Can't find module: {module}") E170 = ("Cannot apply transition {name}: invalid for the current state.") + E171 = ("Matcher.add received invalid on_match callback argument: expected " + "callable or None, but got: {arg_type}") @add_codes diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index fe6ccc781..950a7b977 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -103,6 +103,8 @@ cdef class Matcher: *patterns (list): List of token descriptions. """ errors = {} + if on_match is not None and not hasattr(on_match, "__call__"): + raise ValueError(Errors.E171.format(arg_type=type(on_match))) for i, pattern in enumerate(patterns): if len(pattern) == 0: raise ValueError(Errors.E012.format(key=key)) diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py index df35a1be2..0d640e1a2 100644 --- a/spacy/tests/matcher/test_matcher_api.py +++ b/spacy/tests/matcher/test_matcher_api.py @@ -410,3 +410,11 @@ def test_matcher_schema_token_attributes(en_vocab, pattern, text): assert len(matcher) == 1 matches = matcher(doc) assert len(matches) == 1 + + +def test_matcher_valid_callback(en_vocab): + """Test that on_match can only be None or callable.""" + matcher = Matcher(en_vocab) + with pytest.raises(ValueError): + matcher.add("TEST", [], [{"TEXT": "test"}]) + matcher(Doc(en_vocab, words=["test"])) From 09816f8323ef4298eee3c8b334d51b8ff4a6d2b1 Mon Sep 17 00:00:00 2001 From: Eric Semeniuc <3838856+esemeniuc@users.noreply.github.com> Date: Wed, 25 Sep 2019 03:17:54 -0700 Subject: [PATCH 05/13] update sense2vec version (#4320) --- website/meta/universe.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/meta/universe.json b/website/meta/universe.json index 32afcfa59..4571f595b 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -739,7 +739,7 @@ "slogan": "Use NLP to go beyond vanilla word2vec", "description": "sense2vec ([Trask et. al](https://arxiv.org/abs/1511.06388), 2015) is a nice twist on [word2vec](https://en.wikipedia.org/wiki/Word2vec) that lets you learn more interesting, detailed and context-sensitive word vectors. For an interactive example of the technology, see our [sense2vec demo](https://explosion.ai/demos/sense2vec) that lets you explore semantic similarities across all Reddit comments of 2015.", "github": "explosion/sense2vec", - "pip": "sense2vec==1.0.0a0", + "pip": "sense2vec==1.0.0a1", "thumb": "https://i.imgur.com/awfdhX6.jpg", "image": "https://explosion.ai/assets/img/demos/sense2vec.png", "url": "https://explosion.ai/demos/sense2vec", From 92ed4dc5e07d3eb5df067c4d7e5fc55ed50b0dfc Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 25 Sep 2019 13:11:00 +0200 Subject: [PATCH 06/13] Allow vectors name to be set in init-model (#4321) * Allow vectors name to be specified in init-model * Document --vectors-name argument to init-model * Update website/docs/api/cli.md Co-Authored-By: Ines Montani --- spacy/cli/init_model.py | 11 ++++++++--- website/docs/api/cli.md | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py index 955b420aa..8953ac6be 100644 --- a/spacy/cli/init_model.py +++ b/spacy/cli/init_model.py @@ -35,6 +35,7 @@ msg = Printer() clusters_loc=("Optional location of brown clusters data", "option", "c", str), vectors_loc=("Optional vectors file in Word2Vec format", "option", "v", str), prune_vectors=("Optional number of vectors to prune to", "option", "V", int), + vectors_name=("Optional name for the word vectors, e.g. en_core_web_lg.vectors", "vn", str) ) def init_model( lang, @@ -44,6 +45,7 @@ def init_model( jsonl_loc=None, vectors_loc=None, prune_vectors=-1, + vectors_name=None ): """ Create a new model from raw data, like word frequencies, Brown clusters @@ -78,7 +80,7 @@ def init_model( nlp = create_model(lang, lex_attrs) msg.good("Successfully created model") if vectors_loc is not None: - add_vectors(nlp, vectors_loc, prune_vectors) + add_vectors(nlp, vectors_loc, prune_vectors, vectors_name) vec_added = len(nlp.vocab.vectors) lex_added = len(nlp.vocab) msg.good( @@ -160,7 +162,7 @@ def create_model(lang, lex_attrs): return nlp -def add_vectors(nlp, vectors_loc, prune_vectors): +def add_vectors(nlp, vectors_loc, prune_vectors, name=None): vectors_loc = ensure_path(vectors_loc) if vectors_loc and vectors_loc.parts[-1].endswith(".npz"): nlp.vocab.vectors = Vectors(data=numpy.load(vectors_loc.open("rb"))) @@ -181,7 +183,10 @@ def add_vectors(nlp, vectors_loc, prune_vectors): lexeme.is_oov = False if vectors_data is not None: nlp.vocab.vectors = Vectors(data=vectors_data, keys=vector_keys) - nlp.vocab.vectors.name = "%s_model.vectors" % nlp.meta["lang"] + if name is None: + nlp.vocab.vectors.name = "%s_model.vectors" % nlp.meta["lang"] + else: + nlp.vocab.vectors.name = name nlp.meta["vectors"]["name"] = nlp.vocab.vectors.name if prune_vectors >= 1: nlp.vocab.prune_vectors(prune_vectors) diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md index 5d42f6fb8..8c6caa443 100644 --- a/website/docs/api/cli.md +++ b/website/docs/api/cli.md @@ -538,6 +538,7 @@ $ python -m spacy init-model [lang] [output_dir] [--jsonl-loc] [--vectors-loc] | `--jsonl-loc`, `-j` | option | Optional location of JSONL-formatted [vocabulary file](/api/annotation#vocab-jsonl) with lexical attributes. | | `--vectors-loc`, `-v` | option | Optional location of vectors. Should be a file where the first row contains the dimensions of the vectors, followed by a space-separated Word2Vec table. File can be provided in `.txt` format or as a zipped text file in `.zip` or `.tar.gz` format. | | `--prune-vectors`, `-V` | flag | Number of vectors to prune the vocabulary to. Defaults to `-1` for no pruning. | +| `--vectors-name`, `-vn` | option | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`. | | **CREATES** | model | A spaCy model containing the vocab and vectors. | ## Evaluate {#evaluate new="2"} From 1251b57dbbfd61be272bf76f4710ac58289e016a Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 25 Sep 2019 14:21:27 +0200 Subject: [PATCH 07/13] Fix vectors name arg to init-model --- spacy/cli/init_model.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py index 8953ac6be..8f77f749c 100644 --- a/spacy/cli/init_model.py +++ b/spacy/cli/init_model.py @@ -35,7 +35,12 @@ msg = Printer() clusters_loc=("Optional location of brown clusters data", "option", "c", str), vectors_loc=("Optional vectors file in Word2Vec format", "option", "v", str), prune_vectors=("Optional number of vectors to prune to", "option", "V", int), - vectors_name=("Optional name for the word vectors, e.g. en_core_web_lg.vectors", "vn", str) + vectors_name=( + "Optional name for the word vectors, e.g. en_core_web_lg.vectors", + "option", + "vn", + str, + ), ) def init_model( lang, @@ -45,7 +50,7 @@ def init_model( jsonl_loc=None, vectors_loc=None, prune_vectors=-1, - vectors_name=None + vectors_name=None, ): """ Create a new model from raw data, like word frequencies, Brown clusters From aafa091541d8420717265baf5bf403d52e32fde9 Mon Sep 17 00:00:00 2001 From: Em Zhan Date: Wed, 25 Sep 2019 13:42:18 -0400 Subject: [PATCH 08/13] Fix typo in documentation (#4322) * Fix typo 'probj' instead of 'pobj' * Add spaCy contributor agreement for zqianem --- .github/contributors/zqianem.md | 106 ++++++++++++++++++++++++++++ website/docs/usage/101/_pos-deps.md | 2 +- 2 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 .github/contributors/zqianem.md diff --git a/.github/contributors/zqianem.md b/.github/contributors/zqianem.md new file mode 100644 index 000000000..13f6ab214 --- /dev/null +++ b/.github/contributors/zqianem.md @@ -0,0 +1,106 @@ +# spaCy contributor agreement + +This spaCy Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +[ExplosionAI GmbH](https://explosion.ai/legal). The term +**"you"** shall mean the person or entity identified below. + +If you agree to be bound by these terms, fill in the information requested +below and include the filled-in version with your first pull request, under the +folder [`.github/contributors/`](/.github/contributors/). The name of the file +should be your GitHub username, with the extension `.md`. For example, the user +example_user would create the file `.github/contributors/example_user.md`. + +Read this agreement carefully before signing. These terms and conditions +constitute a binding legal agreement. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. We may publicly disclose your + participation in the project, including the fact that you have signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [x] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect to my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | -------------------- | +| Name | Em Zhan | +| Company name (if applicable) | | +| Title or role (if applicable) | | +| Date | 2019-09-25 | +| GitHub username | zqianem | +| Website (optional) | | diff --git a/website/docs/usage/101/_pos-deps.md b/website/docs/usage/101/_pos-deps.md index b0e2b33b8..9d04d6ffc 100644 --- a/website/docs/usage/101/_pos-deps.md +++ b/website/docs/usage/101/_pos-deps.md @@ -45,7 +45,7 @@ for token in doc: | for | for | `ADP` | `IN` | `prep` | `xxx` | `True` | `True` | | \$ | \$ | `SYM` | `$` | `quantmod` | `$` | `False` | `False` | | 1 | 1 | `NUM` | `CD` | `compound` | `d` | `False` | `False` | -| billion | billion | `NUM` | `CD` | `probj` | `xxxx` | `True` | `False` | +| billion | billion | `NUM` | `CD` | `pobj` | `xxxx` | `True` | `False` | > #### Tip: Understanding tags and labels > From eced2f32116df0b01c423cb169d7d2c775b46597 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 25 Sep 2019 21:14:07 +0200 Subject: [PATCH 09/13] Set version to v2.2.0.dev9 --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index 2fa3f95da..adcac75dc 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -1,6 +1,6 @@ # fmt: off __title__ = "spacy" -__version__ = "2.2.0.dev8" +__version__ = "2.2.0.dev9" __summary__ = "Industrial-strength Natural Language Processing (NLP) in Python" __uri__ = "https://spacy.io" __author__ = "Explosion" From 27ace84f4a97bba18fa17a956622f0eedbff1157 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 26 Sep 2019 03:01:32 +0200 Subject: [PATCH 10/13] Support model name in init-model --- spacy/cli/init_model.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py index 8f77f749c..c285a12a6 100644 --- a/spacy/cli/init_model.py +++ b/spacy/cli/init_model.py @@ -41,6 +41,7 @@ msg = Printer() "vn", str, ), + model_name=("Optional name for the model meta", "option", "mn", str), ) def init_model( lang, @@ -51,6 +52,7 @@ def init_model( vectors_loc=None, prune_vectors=-1, vectors_name=None, + model_name=None, ): """ Create a new model from raw data, like word frequencies, Brown clusters @@ -82,7 +84,7 @@ def init_model( lex_attrs = read_attrs_from_deprecated(freqs_loc, clusters_loc) with msg.loading("Creating model..."): - nlp = create_model(lang, lex_attrs) + nlp = create_model(lang, lex_attrs, name=model_name) msg.good("Successfully created model") if vectors_loc is not None: add_vectors(nlp, vectors_loc, prune_vectors, vectors_name) @@ -145,7 +147,7 @@ def read_attrs_from_deprecated(freqs_loc, clusters_loc): return lex_attrs -def create_model(lang, lex_attrs): +def create_model(lang, lex_attrs, name=None): lang_class = get_lang_class(lang) nlp = lang_class() for lexeme in nlp.vocab: @@ -164,6 +166,8 @@ def create_model(lang, lex_attrs): else: oov_prob = DEFAULT_OOV_PROB nlp.vocab.cfg.update({"oov_prob": oov_prob}) + if name: + nlp.meta["name"] = name return nlp From 58533f01bf926546337ad2868abe7fc8f0a3b3ae Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 26 Sep 2019 03:03:50 +0200 Subject: [PATCH 11/13] Set version to v2.2.0.dev10 --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index adcac75dc..f2f43a946 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -1,6 +1,6 @@ # fmt: off __title__ = "spacy" -__version__ = "2.2.0.dev9" +__version__ = "2.2.0.dev10" __summary__ = "Industrial-strength Natural Language Processing (NLP) in Python" __uri__ = "https://spacy.io" __author__ = "Explosion" From da9a869d3fb7f51291164bac6ca1e5bd61e44541 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 26 Sep 2019 16:21:32 +0200 Subject: [PATCH 12/13] Update vectors name docs [ci skip] --- spacy/vectors.pyx | 2 +- spacy/vocab.pyx | 1 + website/docs/api/vectors.md | 1 + website/docs/api/vocab.md | 15 ++++++++------- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index 2cb5b077f..3c238fe2d 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -63,7 +63,7 @@ cdef class Vectors: shape (tuple): Size of the table, as (# entries, # columns) data (numpy.ndarray): The vector data. keys (iterable): A sequence of keys, aligned with the data. - name (string): A name to identify the vectors table. + name (unicode): A name to identify the vectors table. RETURNS (Vectors): The newly created object. DOCS: https://spacy.io/api/vectors#init diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 9c9f85d05..62c1791b9 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -45,6 +45,7 @@ cdef class Vocab: strings (StringStore): StringStore that maps strings to integers, and vice versa. lookups (Lookups): Container for large lookup tables and dictionaries. + name (unicode): Optional name to identify the vectors table. RETURNS (Vocab): The newly constructed object. """ lex_attr_getters = lex_attr_getters if lex_attr_getters is not None else {} diff --git a/website/docs/api/vectors.md b/website/docs/api/vectors.md index bfe0e5f3f..73300ee53 100644 --- a/website/docs/api/vectors.md +++ b/website/docs/api/vectors.md @@ -35,6 +35,7 @@ you can add vectors to later. | `data` | `ndarray[ndim=1, dtype='float32']` | The vector data. | | `keys` | iterable | A sequence of keys aligned with the data. | | `shape` | tuple | Size of the table as `(n_entries, n_columns)`, the number of entries and number of columns. Not required if you're initializing the object with `data` and `keys`. | +| `name` | unicode | A name to identify the vectors table. | | **RETURNS** | `Vectors` | The newly created object. | ## Vectors.\_\_getitem\_\_ {#getitem tag="method"} diff --git a/website/docs/api/vocab.md b/website/docs/api/vocab.md index 78e5f7541..ea0c2d219 100644 --- a/website/docs/api/vocab.md +++ b/website/docs/api/vocab.md @@ -21,13 +21,14 @@ Create the vocabulary. > vocab = Vocab(strings=["hello", "world"]) > ``` -| Name | Type | Description | -| ------------------ | -------------------- | ------------------------------------------------------------------------------------------------------------------ | -| `lex_attr_getters` | dict | A dictionary mapping attribute IDs to functions to compute them. Defaults to `None`. | -| `tag_map` | dict | A dictionary mapping fine-grained tags to coarse-grained parts-of-speech, and optionally morphological attributes. | -| `lemmatizer` | object | A lemmatizer. Defaults to `None`. | -| `strings` | `StringStore` / list | A [`StringStore`](/api/stringstore) that maps strings to hash values, and vice versa, or a list of strings. | -| **RETURNS** | `Vocab` | The newly constructed object. | +| Name | Type | Description | +| ------------------------------------------- | -------------------- | ------------------------------------------------------------------------------------------------------------------ | +| `lex_attr_getters` | dict | A dictionary mapping attribute IDs to functions to compute them. Defaults to `None`. | +| `tag_map` | dict | A dictionary mapping fine-grained tags to coarse-grained parts-of-speech, and optionally morphological attributes. | +| `lemmatizer` | object | A lemmatizer. Defaults to `None`. | +| `strings` | `StringStore` / list | A [`StringStore`](/api/stringstore) that maps strings to hash values, and vice versa, or a list of strings. | +| `vectors_name` 2.2 | unicode | A name to identify the vectors table. | +| **RETURNS** | `Vocab` | The newly constructed object. | ## Vocab.\_\_len\_\_ {#len tag="method"} From eb0649e38ef89055860221d7fbc98eea01bd2c46 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 26 Sep 2019 16:22:33 +0200 Subject: [PATCH 13/13] Fix tag [ci skip] --- website/docs/api/vectors.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/api/vectors.md b/website/docs/api/vectors.md index 73300ee53..ae62d8cfc 100644 --- a/website/docs/api/vectors.md +++ b/website/docs/api/vectors.md @@ -212,7 +212,7 @@ Iterate over `(key, vector)` pairs, in order. | ---------- | ----- | -------------------------------- | | **YIELDS** | tuple | `(key, vector)` pairs, in order. | -## Vectors.find (#find tag="method") +## Vectors.find {#find tag="method"} Look up one or more keys by row, or vice versa.