From 4299a7f654cc96f266f140cfd33037a59fc51a15 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 23 Oct 2020 11:27:54 +0200 Subject: [PATCH 01/16] Setup / install / quickstart updates * Add `cuda110` to setup.cfg and quickstart dropdown * Switch to `pip` for pip-only packages in conda quickstart instructions * Update zh pkuseg install message with version range and conda * Remove `zh` from `extras_require` because the default doesn't require additional packages --- setup.cfg | 4 ++-- spacy/lang/zh/__init__.py | 2 +- website/src/widgets/quickstart-install.js | 10 +++++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/setup.cfg b/setup.cfg index e42bb9c57..cc48e576b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -87,6 +87,8 @@ cuda101 = cupy-cuda101>=5.0.0b4,<9.0.0 cuda102 = cupy-cuda102>=5.0.0b4,<9.0.0 +cuda110 = + cupy-cuda110>=5.0.0b4,<9.0.0 # Language tokenizers with external dependencies ja = sudachipy>=0.4.9 @@ -95,8 +97,6 @@ ko = natto-py==0.9.0 th = pythainlp>=2.0 -zh = - spacy-pkuseg==0.0.26 [bdist_wheel] universal = false diff --git a/spacy/lang/zh/__init__.py b/spacy/lang/zh/__init__.py index 30560ed0d..9a8a21a63 100644 --- a/spacy/lang/zh/__init__.py +++ b/spacy/lang/zh/__init__.py @@ -17,7 +17,7 @@ from ... import util # fmt: off -_PKUSEG_INSTALL_MSG = "install spacy-pkuseg with `pip install spacy-pkuseg==0.0.26`" +_PKUSEG_INSTALL_MSG = "install spacy-pkuseg with `pip install \"spacy-pkuseg>=0.0.27,<0.1.0\"` or `conda install -c conda-forge \"spacy-pkuseg>=0.0.27,<0.1.0\"`" # fmt: on DEFAULT_CONFIG = """ diff --git a/website/src/widgets/quickstart-install.js b/website/src/widgets/quickstart-install.js index 37ae10da4..4e63ec0bd 100644 --- a/website/src/widgets/quickstart-install.js +++ b/website/src/widgets/quickstart-install.js @@ -7,7 +7,7 @@ import { repo } from '../components/util' const DEFAULT_MODELS = ['en'] const DEFAULT_OPT = 'efficiency' const DEFAULT_HARDWARE = 'cpu' -const DEFAULT_CUDA = 'cuda100' +const DEFAULT_CUDA = 'cuda102' const CUDA = { '8.0': 'cuda80', '9.0': 'cuda90', @@ -16,6 +16,7 @@ const CUDA = { '10.0': 'cuda100', '10.1': 'cuda101', '10.2': 'cuda102', + '11.0': 'cuda110', } const LANG_EXTRAS = ['zh', 'ja'] // only for languages with models const DATA = [ @@ -184,11 +185,14 @@ const QuickstartInstall = ({ id, title }) => { pip install -e '.[{pipExtras}]' )} + + # packages only available via pip + - conda install -c conda-forge spacy-transformers + pip install spacy-transformers - conda install -c conda-forge spacy-lookups-data + pip install spacy-lookups-data {models.map(({ code, models: modelOptions }) => { From 8fe7ede6679dffeb581650ddc4617d0e63d1a532 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 23 Oct 2020 11:34:43 +0200 Subject: [PATCH 02/16] Add install step to source install quickstart --- website/src/widgets/quickstart-install.js | 1 + 1 file changed, 1 insertion(+) diff --git a/website/src/widgets/quickstart-install.js b/website/src/widgets/quickstart-install.js index 4e63ec0bd..0ec9d2eb7 100644 --- a/website/src/widgets/quickstart-install.js +++ b/website/src/widgets/quickstart-install.js @@ -181,6 +181,7 @@ const QuickstartInstall = ({ id, title }) => { pip install -r requirements.txt python setup.py build_ext --inplace + python setup.py install {(train || hardware == 'gpu') && ( pip install -e '.[{pipExtras}]' )} From c0b76f4c19ea8e5d50ac327874a34155b91d129f Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 23 Oct 2020 11:36:36 +0200 Subject: [PATCH 03/16] Add install step to "Compile from source" --- website/docs/usage/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/website/docs/usage/index.md b/website/docs/usage/index.md index ccb59e937..8d40ee61e 100644 --- a/website/docs/usage/index.md +++ b/website/docs/usage/index.md @@ -174,6 +174,7 @@ $ source .env/bin/activate # activate virtual env $ export PYTHONPATH=`pwd` # set Python path to spaCy dir $ pip install -r requirements.txt # install all requirements $ python setup.py build_ext --inplace # compile spaCy +$ python setup.py install # install spaCy ``` Compared to regular install via pip, the From af26886fffb4d7da8c8606e58dad0acb956d48b5 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 23 Oct 2020 11:38:14 +0200 Subject: [PATCH 04/16] Fix formatting --- website/src/widgets/quickstart-install.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/src/widgets/quickstart-install.js b/website/src/widgets/quickstart-install.js index 0ec9d2eb7..fd0b2d632 100644 --- a/website/src/widgets/quickstart-install.js +++ b/website/src/widgets/quickstart-install.js @@ -187,8 +187,8 @@ const QuickstartInstall = ({ id, title }) => { )} - # packages only available via pip - + # packages only available via pip + pip install spacy-transformers From 253480353c43152a05ab36d7629c269bf58ebf2c Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 23 Oct 2020 11:39:25 +0200 Subject: [PATCH 05/16] Remove zh from quickstart extras --- website/src/widgets/quickstart-install.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/src/widgets/quickstart-install.js b/website/src/widgets/quickstart-install.js index fd0b2d632..7e3bed136 100644 --- a/website/src/widgets/quickstart-install.js +++ b/website/src/widgets/quickstart-install.js @@ -18,7 +18,7 @@ const CUDA = { '10.2': 'cuda102', '11.0': 'cuda110', } -const LANG_EXTRAS = ['zh', 'ja'] // only for languages with models +const LANG_EXTRAS = ['ja'] // only for languages with models const DATA = [ { id: 'os', From 080066ae74c10d5ef2a61eb123ad608b354a1103 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 26 Oct 2020 10:37:25 +0100 Subject: [PATCH 06/16] remove TODO note --- spacy/scorer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/scorer.py b/spacy/scorer.py index 273bda898..97f54f4fb 100644 --- a/spacy/scorer.py +++ b/spacy/scorer.py @@ -478,7 +478,7 @@ class Scorer: negative_labels (Iterable[str]): The string values that refer to no annotation (e.g. "NIL") RETURNS (Dict[str, Any]): A dictionary containing the scores. - DOCS (TODO): https://nightly.spacy.io/api/scorer#score_links + DOCS: https://nightly.spacy.io/api/scorer#score_links """ f_per_type = {} for example in examples: From a664994a8143355500335f291ece33e0bc22e35d Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 26 Oct 2020 10:52:47 +0100 Subject: [PATCH 07/16] adding score method to explanation of new component --- website/docs/usage/layers-architectures.md | 52 ++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/website/docs/usage/layers-architectures.md b/website/docs/usage/layers-architectures.md index aa62a77d4..d91fd6bf1 100644 --- a/website/docs/usage/layers-architectures.md +++ b/website/docs/usage/layers-architectures.md @@ -843,6 +843,27 @@ def __call__(self, Doc doc): return doc ``` +There is one more optional method to implement: [`score`](/api/pipe#score) +calculates the performance of your component on a set of examples, and +returns the results as a dictionary: + +```python +### The score method +def score(self, examples: Iterable[Example]) -> Dict[str, Any]: + prf = PRFScore() + for example in examples: + ... + + return { + f"rel_micro_p": prf.precision, + f"rel_micro_r": prf.recall, + f"rel_micro_f": prf.fscore, + } +``` + +This is particularly useful to see the scores on the development corpus +when training the component with [`spacy train`](/api/cli#training). + Once our `TrainablePipe` subclass is fully implemented, we can [register](/usage/processing-pipelines#custom-components-factories) the component with the [`@Language.factory`](/api/language#factory) decorator. This @@ -876,6 +897,37 @@ def make_relation_extractor(nlp, name, model): return RelationExtractor(nlp.vocab, model, name) ``` +You can extend the decorator to include information such as the type of +annotations that are required for this component to run, the type of annotations +it produces, and the scores that can be calculated: + +> #### config.cfg (excerpt) +> +> ```ini +> [training.score_weights] +> rel_micro_p: 0.0 +> rel_micro_r: 0.0 +> rel_micro_f: 1.0 +> ``` + +```python +### Factory annotations +from spacy.language import Language + +@Language.factory( + "relation_extractor", + requires=["doc.ents", "token.ent_iob", "token.ent_type"], + assigns=["doc._.rel"], + default_score_weights={ + "rel_micro_p": None, + "rel_micro_r": None, + "rel_micro_f": None, + }, +) +def make_relation_extractor(nlp, name, model): + return RelationExtractor(nlp.vocab, model, name) +``` + From e95d9caa878a39070ed47af3b390e161d6486f40 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 26 Oct 2020 11:09:25 +0100 Subject: [PATCH 08/16] small edits --- website/docs/usage/layers-architectures.md | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/website/docs/usage/layers-architectures.md b/website/docs/usage/layers-architectures.md index d91fd6bf1..db0b81112 100644 --- a/website/docs/usage/layers-architectures.md +++ b/website/docs/usage/layers-architectures.md @@ -855,10 +855,10 @@ def score(self, examples: Iterable[Example]) -> Dict[str, Any]: ... return { - f"rel_micro_p": prf.precision, - f"rel_micro_r": prf.recall, - f"rel_micro_f": prf.fscore, - } + "rel_micro_p": prf.precision, + "rel_micro_r": prf.recall, + "rel_micro_f": prf.fscore, + } ``` This is particularly useful to see the scores on the development corpus @@ -886,6 +886,11 @@ assigns it a name and lets you create the component with > [components.relation_extractor.model.get_candidates] > @misc = "rel_cand_generator.v1" > max_length = 20 +> +> [training.score_weights] +> rel_micro_p: 0.0 +> rel_micro_r: 0.0 +> rel_micro_f: 1.0 > ``` ```python @@ -904,14 +909,11 @@ it produces, and the scores that can be calculated: > #### config.cfg (excerpt) > > ```ini -> [training.score_weights] -> rel_micro_p: 0.0 -> rel_micro_r: 0.0 -> rel_micro_f: 1.0 + > ``` ```python -### Factory annotations +### Factory annotations {highlight="5-11"} from spacy.language import Language @Language.factory( From 5878ff6bcdd21847a4f9e0b97464d6dcfdd421c0 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 26 Oct 2020 11:13:02 +0100 Subject: [PATCH 09/16] cleanup --- website/docs/usage/layers-architectures.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/website/docs/usage/layers-architectures.md b/website/docs/usage/layers-architectures.md index db0b81112..a1b58f41e 100644 --- a/website/docs/usage/layers-architectures.md +++ b/website/docs/usage/layers-architectures.md @@ -906,12 +906,6 @@ You can extend the decorator to include information such as the type of annotations that are required for this component to run, the type of annotations it produces, and the scores that can be calculated: -> #### config.cfg (excerpt) -> -> ```ini - -> ``` - ```python ### Factory annotations {highlight="5-11"} from spacy.language import Language From 77688b0072ebb4580870ba5b964e60a298ed24e2 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 26 Oct 2020 11:14:34 +0100 Subject: [PATCH 10/16] fix config --- website/docs/usage/layers-architectures.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/docs/usage/layers-architectures.md b/website/docs/usage/layers-architectures.md index a1b58f41e..641db02f5 100644 --- a/website/docs/usage/layers-architectures.md +++ b/website/docs/usage/layers-architectures.md @@ -888,9 +888,9 @@ assigns it a name and lets you create the component with > max_length = 20 > > [training.score_weights] -> rel_micro_p: 0.0 -> rel_micro_r: 0.0 -> rel_micro_f: 1.0 +> rel_micro_p = 0.0 +> rel_micro_r = 0.0 +> rel_micro_f = 1.0 > ``` ```python From 448bfbdc30a29225547562e3f222c08565b50021 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 10 Nov 2020 09:44:52 +0800 Subject: [PATCH 11/16] Remove conda from nightly install widget [ci skip] --- website/src/widgets/quickstart-install.js | 104 +++++++++++----------- 1 file changed, 50 insertions(+), 54 deletions(-) diff --git a/website/src/widgets/quickstart-install.js b/website/src/widgets/quickstart-install.js index 7e3bed136..df8d971f0 100644 --- a/website/src/widgets/quickstart-install.js +++ b/website/src/widgets/quickstart-install.js @@ -19,54 +19,6 @@ const CUDA = { '11.0': 'cuda110', } const LANG_EXTRAS = ['ja'] // only for languages with models -const DATA = [ - { - id: 'os', - title: 'Operating system', - options: [ - { id: 'mac', title: 'macOS / OSX', checked: true }, - { id: 'windows', title: 'Windows' }, - { id: 'linux', title: 'Linux' }, - ], - }, - { - id: 'package', - title: 'Package manager', - options: [ - { id: 'pip', title: 'pip', checked: true }, - { id: 'conda', title: 'conda' }, - { id: 'source', title: 'from source' }, - ], - }, - { - id: 'hardware', - title: 'Hardware', - options: [ - { id: 'cpu', title: 'CPU', checked: DEFAULT_HARDWARE === 'cpu' }, - { id: 'gpu', title: 'GPU', checked: DEFAULT_HARDWARE == 'gpu' }, - ], - dropdown: Object.keys(CUDA).map(id => ({ id: CUDA[id], title: `CUDA ${id}` })), - defaultValue: DEFAULT_CUDA, - }, - { - id: 'config', - title: 'Configuration', - multiple: true, - options: [ - { - id: 'venv', - title: 'virtual env', - help: 'Use a virtual environment and install spaCy into a user directory', - }, - { - id: 'train', - title: 'train models', - help: - 'Check this if you plan to train your own models with spaCy to install extra dependencies and data resources', - }, - ], - }, -] const QuickstartInstall = ({ id, title }) => { const [train, setTrain] = useState(false) @@ -100,7 +52,56 @@ const QuickstartInstall = ({ id, title }) => { const pkg = nightly ? 'spacy-nightly' : 'spacy' const models = languages.filter(({ models }) => models !== null) const data = [ - ...DATA, + { + id: 'os', + title: 'Operating system', + options: [ + { id: 'mac', title: 'macOS / OSX', checked: true }, + { id: 'windows', title: 'Windows' }, + { id: 'linux', title: 'Linux' }, + ], + }, + { + id: 'package', + title: 'Package manager', + options: [ + { id: 'pip', title: 'pip', checked: true }, + !nightly ? { id: 'conda', title: 'conda' } : null, + { id: 'source', title: 'from source' }, + ].filter(o => o), + }, + { + id: 'hardware', + title: 'Hardware', + options: [ + { id: 'cpu', title: 'CPU', checked: DEFAULT_HARDWARE === 'cpu' }, + { id: 'gpu', title: 'GPU', checked: DEFAULT_HARDWARE == 'gpu' }, + ], + dropdown: Object.keys(CUDA).map(id => ({ + id: CUDA[id], + title: `CUDA ${id}`, + })), + defaultValue: DEFAULT_CUDA, + }, + { + id: 'config', + title: 'Configuration', + multiple: true, + options: [ + { + id: 'venv', + title: 'virtual env', + help: + 'Use a virtual environment and install spaCy into a user directory', + }, + { + id: 'train', + title: 'train models', + help: + 'Check this if you plan to train your own models with spaCy to install extra dependencies and data resources', + }, + ], + }, { id: 'models', title: 'Trained pipelines', @@ -142,11 +143,6 @@ const QuickstartInstall = ({ id, title }) => { setters={setters} showDropdown={showDropdown} > - {nightly && ( - - # 🚨 Nightly releases are currently only available via pip - - )} python -m venv .env source .env/bin/activate From 3ca5c7082df36a1970277c4accc0f02176dc2459 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 10 Nov 2020 17:27:49 +0800 Subject: [PATCH 12/16] Use pip install . in quickstart [ci skip] --- website/src/widgets/quickstart-install.js | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/website/src/widgets/quickstart-install.js b/website/src/widgets/quickstart-install.js index df8d971f0..6bb14b687 100644 --- a/website/src/widgets/quickstart-install.js +++ b/website/src/widgets/quickstart-install.js @@ -177,11 +177,9 @@ const QuickstartInstall = ({ id, title }) => { pip install -r requirements.txt python setup.py build_ext --inplace - python setup.py install - {(train || hardware == 'gpu') && ( - pip install -e '.[{pipExtras}]' - )} - + + pip install {train || hardware == 'gpu' ? `'.[${pipExtras}]'` : '.'} + # packages only available via pip From a0c899a0fff08e09f7ebabb8e0e50baa4f4b0897 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Tue, 10 Nov 2020 13:14:47 +0100 Subject: [PATCH 13/16] Fix textcat + transformer architecture (#6371) * add pooling to textcat TransformerListener * maybe_get_dim in case it's null --- spacy/cli/templates/quickstart_training.jinja | 3 +++ spacy/ml/models/textcat.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja index 1194438de..37983cb1a 100644 --- a/spacy/cli/templates/quickstart_training.jinja +++ b/spacy/cli/templates/quickstart_training.jinja @@ -143,6 +143,9 @@ nO = null @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 +[components.textcat.model.tok2vec.pooling] +@layers = "reduce_mean.v1" + [components.textcat.model.linear_model] @architectures = "spacy.TextCatBOW.v1" exclusive_classes = false diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py index d4aed2839..2ec036810 100644 --- a/spacy/ml/models/textcat.py +++ b/spacy/ml/models/textcat.py @@ -61,14 +61,14 @@ def build_bow_text_classifier( @registry.architectures.register("spacy.TextCatEnsemble.v2") -def build_text_classifier( +def build_text_classifier_v2( tok2vec: Model[List[Doc], List[Floats2d]], linear_model: Model[List[Doc], Floats2d], nO: Optional[int] = None, ) -> Model[List[Doc], Floats2d]: exclusive_classes = not linear_model.attrs["multi_label"] with Model.define_operators({">>": chain, "|": concatenate}): - width = tok2vec.get_dim("nO") + width = tok2vec.maybe_get_dim("nO") cnn_model = ( tok2vec >> list2ragged() @@ -94,7 +94,7 @@ def build_text_classifier( # TODO: move to legacy @registry.architectures.register("spacy.TextCatEnsemble.v1") -def build_text_classifier( +def build_text_classifier_v1( width: int, embed_size: int, pretrained_vectors: Optional[bool], From a7e7d6c6c902055b66b208c299cfe8b578a497d8 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Tue, 10 Nov 2020 13:15:09 +0100 Subject: [PATCH 14/16] Ignore misaligned in Morphologizer.get_loss (#6363) Fix bug where `Morphologizer.get_loss` treated misaligned annotation as `EMPTY_MORPH` rather than ignoring it. Remove unneeded default `EMPTY_MORPH` mappings. --- spacy/pipeline/morphologizer.pyx | 19 ++++++++----------- spacy/tests/pipeline/test_morphologizer.py | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index a03c7daf0..305f8f5df 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -92,9 +92,6 @@ class Morphologizer(Tagger): # 2) labels_pos stores a mapping from morph+POS->POS cfg = {"labels_morph": labels_morph or {}, "labels_pos": labels_pos or {}} self.cfg = dict(sorted(cfg.items())) - # add mappings for empty morph - self.cfg["labels_morph"][Morphology.EMPTY_MORPH] = Morphology.EMPTY_MORPH - self.cfg["labels_pos"][Morphology.EMPTY_MORPH] = POS_IDS[""] @property def labels(self): @@ -201,8 +198,8 @@ class Morphologizer(Tagger): doc_tag_ids = doc_tag_ids.get() for j, tag_id in enumerate(doc_tag_ids): morph = self.labels[tag_id] - doc.c[j].morph = self.vocab.morphology.add(self.cfg["labels_morph"][morph]) - doc.c[j].pos = self.cfg["labels_pos"][morph] + doc.c[j].morph = self.vocab.morphology.add(self.cfg["labels_morph"].get(morph, 0)) + doc.c[j].pos = self.cfg["labels_pos"].get(morph, 0) def get_loss(self, examples, scores): """Find the loss and gradient of loss for the batch of documents and @@ -228,12 +225,12 @@ class Morphologizer(Tagger): # doesn't, so if either is None, treat both as None here so that # truths doesn't end up with an unknown morph+POS combination if pos is None or morph is None: - pos = None - morph = None - label_dict = Morphology.feats_to_dict(morph) - if pos: - label_dict[self.POS_FEAT] = pos - label = self.vocab.strings[self.vocab.morphology.add(label_dict)] + label = None + else: + label_dict = Morphology.feats_to_dict(morph) + if pos: + label_dict[self.POS_FEAT] = pos + label = self.vocab.strings[self.vocab.morphology.add(label_dict)] eg_truths.append(label) truths.append(eg_truths) d_scores, loss = loss_func(scores, truths) diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py index 85d1d6c8b..add42e00a 100644 --- a/spacy/tests/pipeline/test_morphologizer.py +++ b/spacy/tests/pipeline/test_morphologizer.py @@ -116,3 +116,23 @@ def test_overfitting_IO(): no_batch_deps = [doc.to_array([MORPH]) for doc in [nlp(text) for text in texts]] assert_equal(batch_deps_1, batch_deps_2) assert_equal(batch_deps_1, no_batch_deps) + + # Test without POS + nlp.remove_pipe("morphologizer") + nlp.add_pipe("morphologizer") + for example in train_examples: + for token in example.reference: + token.pos_ = "" + optimizer = nlp.initialize(get_examples=lambda: train_examples) + for i in range(50): + losses = {} + nlp.update(train_examples, sgd=optimizer, losses=losses) + assert losses["morphologizer"] < 0.00001 + + # Test the trained model + test_text = "I like blue ham" + doc = nlp(test_text) + gold_morphs = ["Feat=N", "Feat=V", "", ""] + gold_pos_tags = ["", "", "", ""] + assert [str(t.morph) for t in doc] == gold_morphs + assert [t.pos_ for t in doc] == gold_pos_tags From 96726ec1f62ade72b7904b2b21b4a893f93d0ca8 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Tue, 17 Nov 2020 14:36:44 +0100 Subject: [PATCH 15/16] Fix DocBin init in training example (#6396) --- website/docs/usage/training.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/usage/training.md b/website/docs/usage/training.md index 274ea5989..58c846e9d 100644 --- a/website/docs/usage/training.md +++ b/website/docs/usage/training.md @@ -969,7 +969,7 @@ import spacy from spacy.tokens import Doc, DocBin nlp = spacy.blank("en") -docbin = DocBin(nlp.vocab) +docbin = DocBin() words = ["Apple", "is", "looking", "at", "buying", "U.K.", "startup", "."] spaces = [True, True, True, True, True, True, True, False] ents = ["B-ORG", "O", "O", "O", "O", "B-GPE", "O", "O"] From 165993d8e57f2bd0ea35f4792f414951dc6c4787 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Thu, 19 Nov 2020 14:11:38 +0100 Subject: [PATCH 16/16] fix typo in transformer docs (#6404) --- website/docs/api/transformer.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/website/docs/api/transformer.md b/website/docs/api/transformer.md index 5754d2238..e31c8ad2c 100644 --- a/website/docs/api/transformer.md +++ b/website/docs/api/transformer.md @@ -61,11 +61,11 @@ on the transformer architectures and their arguments and hyperparameters. > nlp.add_pipe("transformer", config=DEFAULT_CONFIG) > ``` -| Setting | Description | -| ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `max_batch_items` | Maximum size of a padded batch. Defaults to `4096`. ~~int~~ | -| `set_extra_annotations` | Function that takes a batch of `Doc` objects and transformer outputs to set additional annotations on the `Doc`. The `Doc._.transformer_data` attribute is set prior to calling the callback. Defaults to `null_annotation_setter` (no additional annotations). ~~Callable[[List[Doc], FullTransformerBatch], None]~~ | -| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. Defaults to [TransformerModel](/api/architectures#TransformerModel). ~~Model[List[Doc], FullTransformerBatch]~~ | +| Setting | Description | +| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `max_batch_items` | Maximum size of a padded batch. Defaults to `4096`. ~~int~~ | +| `set_extra_annotations` | Function that takes a batch of `Doc` objects and transformer outputs to set additional annotations on the `Doc`. The `Doc._.trf_data` attribute is set prior to calling the callback. Defaults to `null_annotation_setter` (no additional annotations). ~~Callable[[List[Doc], FullTransformerBatch], None]~~ | +| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. Defaults to [TransformerModel](/api/architectures#TransformerModel). ~~Model[List[Doc], FullTransformerBatch]~~ | ```python https://github.com/explosion/spacy-transformers/blob/master/spacy_transformers/pipeline_component.py