From cca1e21ad6c68f55791c354b1fc11498c06e7782 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Thu, 31 Mar 2022 14:16:21 +0200 Subject: [PATCH 01/14] Revert "Add click pin to avoid typer issues (#10573)" This reverts commit 9966e08f32cf59963591b589bf816aa114ed39a9. --- requirements.txt | 1 - setup.cfg | 1 - 2 files changed, 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 616e793e9..ca4099be5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,7 +11,6 @@ wasabi>=0.8.1,<1.1.0 srsly>=2.4.1,<3.0.0 catalogue>=2.0.6,<2.1.0 typer>=0.3.0,<0.5.0 -click<8.1.0 pathy>=0.3.5 # Third party dependencies numpy>=1.15.0 diff --git a/setup.cfg b/setup.cfg index 2d446e442..586a044ff 100644 --- a/setup.cfg +++ b/setup.cfg @@ -52,7 +52,6 @@ install_requires = srsly>=2.4.1,<3.0.0 catalogue>=2.0.6,<2.1.0 typer>=0.3.0,<0.5.0 - click<8.1.0 pathy>=0.3.5 # Third-party dependencies tqdm>=4.38.0,<5.0.0 From a8b883fead31c448754842c5cce3f2615ba91a64 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 2 Dec 2022 09:33:52 +0100 Subject: [PATCH 02/14] Fix spancat for zero suggestions (#11860) * Add test for spancat predict with zero suggestions * Fix spancat for zero suggestions * Undo changes to extract_spans * Use .sum() as in update --- spacy/pipeline/spancat.py | 5 +++- spacy/tests/pipeline/test_spancat.py | 43 ++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py index 5d0d8f17e..5bf934cb1 100644 --- a/spacy/pipeline/spancat.py +++ b/spacy/pipeline/spancat.py @@ -268,7 +268,10 @@ class SpanCategorizer(TrainablePipe): DOCS: https://spacy.io/api/spancategorizer#predict """ indices = self.suggester(docs, ops=self.model.ops) - scores = self.model.predict((docs, indices)) # type: ignore + if indices.lengths.sum() == 0: + scores = self.model.ops.alloc2f(0, 0) + else: + scores = self.model.predict((docs, indices)) # type: ignore return indices, scores def set_annotations(self, docs: Iterable[Doc], indices_scores) -> None: diff --git a/spacy/tests/pipeline/test_spancat.py b/spacy/tests/pipeline/test_spancat.py index 8060bc621..f8c9456da 100644 --- a/spacy/tests/pipeline/test_spancat.py +++ b/spacy/tests/pipeline/test_spancat.py @@ -372,24 +372,39 @@ def test_overfitting_IO_overlapping(): def test_zero_suggestions(): - # Test with a suggester that returns 0 suggestions + # Test with a suggester that can return 0 suggestions - @registry.misc("test_zero_suggester") - def make_zero_suggester(): - def zero_suggester(docs, *, ops=None): + @registry.misc("test_mixed_zero_suggester") + def make_mixed_zero_suggester(): + def mixed_zero_suggester(docs, *, ops=None): if ops is None: ops = get_current_ops() - return Ragged( - ops.xp.zeros((0, 0), dtype="i"), ops.xp.zeros((len(docs),), dtype="i") - ) + spans = [] + lengths = [] + for doc in docs: + if len(doc) > 0 and len(doc) % 2 == 0: + spans.append((0, 1)) + lengths.append(1) + else: + lengths.append(0) + spans = ops.asarray2i(spans) + lengths_array = ops.asarray1i(lengths) + if len(spans) > 0: + output = Ragged(ops.xp.vstack(spans), lengths_array) + else: + output = Ragged(ops.xp.zeros((0, 0), dtype="i"), lengths_array) + return output - return zero_suggester + return mixed_zero_suggester fix_random_seed(0) nlp = English() spancat = nlp.add_pipe( "spancat", - config={"suggester": {"@misc": "test_zero_suggester"}, "spans_key": SPAN_KEY}, + config={ + "suggester": {"@misc": "test_mixed_zero_suggester"}, + "spans_key": SPAN_KEY, + }, ) train_examples = make_examples(nlp) optimizer = nlp.initialize(get_examples=lambda: train_examples) @@ -397,3 +412,13 @@ def test_zero_suggestions(): assert set(spancat.labels) == {"LOC", "PERSON"} nlp.update(train_examples, sgd=optimizer) + # empty doc + nlp("") + # single doc with zero suggestions + nlp("one") + # single doc with one suggestion + nlp("two two") + # batch with mixed zero/one suggestions + list(nlp.pipe(["one", "two two", "three three three", "", "four four four four"])) + # batch with no suggestions + list(nlp.pipe(["", "one", "three three three"])) From 21204f17c701e32269897d9154c6807cbd9deb0a Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 25 Nov 2022 13:00:57 +0100 Subject: [PATCH 03/14] Add smart_open requirement, update deprecated options (#11864) * Switch from deprecated `ignore_ext` to `compression` * Add upload/download test for local files --- requirements.txt | 1 + setup.cfg | 3 ++- spacy/cli/_util.py | 2 +- spacy/tests/test_cli.py | 18 +++++++++++++++++- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index ca4099be5..96523041b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ srsly>=2.4.1,<3.0.0 catalogue>=2.0.6,<2.1.0 typer>=0.3.0,<0.5.0 pathy>=0.3.5 +smart-open>=5.2.1,<7.0.0 # Third party dependencies numpy>=1.15.0 requests>=2.13.0,<3.0.0 diff --git a/setup.cfg b/setup.cfg index 586a044ff..8a5e054b5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,9 +51,10 @@ install_requires = wasabi>=0.8.1,<1.1.0 srsly>=2.4.1,<3.0.0 catalogue>=2.0.6,<2.1.0 + # Third-party dependencies typer>=0.3.0,<0.5.0 pathy>=0.3.5 - # Third-party dependencies + smart-open>=5.2.1,<7.0.0 tqdm>=4.38.0,<5.0.0 numpy>=1.15.0 requests>=2.13.0,<3.0.0 diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index fb680d888..1b9740d0b 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -358,7 +358,7 @@ def download_file(src: Union[str, "Pathy"], dest: Path, *, force: bool = False) if dest.exists() and not force: return None src = str(src) - with smart_open.open(src, mode="rb", ignore_ext=True) as input_file: + with smart_open.open(src, mode="rb", compression="disable") as input_file: with dest.open(mode="wb") as output_file: output_file.write(input_file.read()) diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py index 9d3f1ee71..e64d273c5 100644 --- a/spacy/tests/test_cli.py +++ b/spacy/tests/test_cli.py @@ -12,6 +12,7 @@ from spacy.cli._util import is_subpath_of, load_project_config from spacy.cli._util import parse_config_overrides, string_to_list from spacy.cli._util import substitute_project_variables from spacy.cli._util import validate_project_commands +from spacy.cli._util import upload_file, download_file from spacy.cli.debug_data import _compile_gold, _get_labels_from_model from spacy.cli.debug_data import _get_labels_from_spancat from spacy.cli.download import get_compatibility, get_version @@ -719,4 +720,19 @@ def test_debug_data_compile_gold(): ref = Doc(nlp.vocab, words=["Token", ".", "New York City"], sent_starts=[True, False, True], ents=["O", "B-ENT", "I-ENT"]) eg = Example(pred, ref) data = _compile_gold([eg], ["ner"], nlp, True) - assert data["boundary_cross_ents"] == 1 \ No newline at end of file + assert data["boundary_cross_ents"] == 1 + + +def test_upload_download_local_file(): + with make_tempdir() as d1, make_tempdir() as d2: + filename = "f.txt" + content = "content" + local_file = d1 / filename + remote_file = d2 / filename + with local_file.open(mode="w") as file_: + file_.write(content) + upload_file(local_file, remote_file) + local_file.unlink() + download_file(remote_file, local_file) + with local_file.open(mode="r") as file_: + assert file_.read() == content From 0de78920333acc9bba0ed3805289ddc9afbc4299 Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Wed, 7 Dec 2022 23:52:35 +0900 Subject: [PATCH 04/14] Add in errors used in the beam code that were removed at some point (#11935) I don't think there's any way to use the beam code at the moment, but as long as it's around the errors it refers to should also be present. --- spacy/errors.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/spacy/errors.py b/spacy/errors.py index b45c4f9db..42c60d63e 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -322,6 +322,11 @@ class Errors(metaclass=ErrorsWithCodes): "clear the existing vectors and resize the table.") E074 = ("Error interpreting compiled match pattern: patterns are expected " "to end with the attribute {attr}. Got: {bad_attr}.") + E079 = ("Error computing states in beam: number of predicted beams " + "({pbeams}) does not equal number of gold beams ({gbeams}).") + E080 = ("Duplicate state found in beam: {key}.") + E081 = ("Error getting gradient in beam: number of histories ({n_hist}) " + "does not equal number of losses ({losses}).") E082 = ("Error deprojectivizing parse: number of heads ({n_heads}), " "projective heads ({n_proj_heads}) and labels ({n_labels}) do not " "match.") From 3ac7230abdfdcbd6796508240a3fce656d059c14 Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Fri, 2 Dec 2022 18:17:11 +0900 Subject: [PATCH 05/14] Config generation fails for GPU without transformers (#11899) If you don't have spacy-transformers installed, but try to use `init config` with the GPU flag, you'll get an error. The issue is that the `use_transformers` flag in the config is conflated with the GPU flag, and then there's an attempt to access transformers config info that may not exist. There may be a better way to do this, but this stops the error. --- spacy/cli/templates/quickstart_training.jinja | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja index fb79a4f60..33cb26f79 100644 --- a/spacy/cli/templates/quickstart_training.jinja +++ b/spacy/cli/templates/quickstart_training.jinja @@ -1,7 +1,7 @@ {# This is a template for training configs used for the quickstart widget in the docs and the init config command. It encodes various best practices and can help generate the best possible configuration, given a user's requirements. #} -{%- set use_transformer = hardware != "cpu" -%} +{%- set use_transformer = hardware != "cpu" and transformer_data -%} {%- set transformer = transformer_data[optimize] if use_transformer else {} -%} [paths] train = null From 8cfc4c732502fdfc4731ac688c395e22a1ec4d6e Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Mon, 12 Dec 2022 08:45:35 +0100 Subject: [PATCH 06/14] Cast to uint64 for all array-based doc representations (#11933) * Convert all individual values explicitly to uint64 for array-based doc representations * Temporarily test with latest numpy v1.24.0rc * Remove unnecessary conversion from attr_t * Reduce number of individual casts * Convert specifically from int32 to uint64 * Revert "Temporarily test with latest numpy v1.24.0rc" This reverts commit eb0e3c5006515b9a7ff52bae59484c909b8a3f65. * Also use int32 in tests --- spacy/tests/doc/test_array.py | 4 ++-- spacy/tokens/doc.pyx | 1 + spacy/tokens/span.pyx | 4 ++-- spacy/training/example.pyx | 15 ++++++++------- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/spacy/tests/doc/test_array.py b/spacy/tests/doc/test_array.py index c334cc6eb..1f2d7d999 100644 --- a/spacy/tests/doc/test_array.py +++ b/spacy/tests/doc/test_array.py @@ -123,14 +123,14 @@ def test_doc_from_array_heads_in_bounds(en_vocab): # head before start arr = doc.to_array(["HEAD"]) - arr[0] = -1 + arr[0] = numpy.int32(-1).astype(numpy.uint64) doc_from_array = Doc(en_vocab, words=words) with pytest.raises(ValueError): doc_from_array.from_array(["HEAD"], arr) # head after end arr = doc.to_array(["HEAD"]) - arr[0] = 5 + arr[0] = numpy.int32(5).astype(numpy.uint64) doc_from_array = Doc(en_vocab, words=words) with pytest.raises(ValueError): doc_from_array.from_array(["HEAD"], arr) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index d33764ac9..91a816a73 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -356,6 +356,7 @@ cdef class Doc: for annot in annotations: if annot: if annot is heads or annot is sent_starts or annot is ent_iobs: + annot = numpy.array(annot, dtype=numpy.int32).astype(numpy.uint64) for i in range(len(words)): if attrs.ndim == 1: attrs[i] = annot[i] diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 970c09d60..4fd98716d 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -305,7 +305,7 @@ cdef class Span: for ancestor in ancestors: ancestor_i = ancestor.i - self.c.start if ancestor_i in range(length): - array[i, head_col] = ancestor_i - i + array[i, head_col] = numpy.int32(ancestor_i - i).astype(numpy.uint64) # if there is no appropriate ancestor, define a new artificial root value = array[i, head_col] @@ -313,7 +313,7 @@ cdef class Span: new_root = old_to_new_root.get(ancestor_i, None) if new_root is not None: # take the same artificial root as a previous token from the same sentence - array[i, head_col] = new_root - i + array[i, head_col] = numpy.int32(new_root - i).astype(numpy.uint64) else: # set this token as the new artificial root array[i, head_col] = 0 diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx index 732203e7b..7e5ff32dd 100644 --- a/spacy/training/example.pyx +++ b/spacy/training/example.pyx @@ -333,26 +333,27 @@ def _annot2array(vocab, tok_annot, doc_annot): if key not in IDS: raise ValueError(Errors.E974.format(obj="token", key=key)) elif key in ["ORTH", "SPACY"]: - pass + continue elif key == "HEAD": attrs.append(key) - values.append([h-i if h is not None else 0 for i, h in enumerate(value)]) + row = [h-i if h is not None else 0 for i, h in enumerate(value)] elif key == "DEP": attrs.append(key) - values.append([vocab.strings.add(h) if h is not None else MISSING_DEP for h in value]) + row = [vocab.strings.add(h) if h is not None else MISSING_DEP for h in value] elif key == "SENT_START": attrs.append(key) - values.append([to_ternary_int(v) for v in value]) + row = [to_ternary_int(v) for v in value] elif key == "MORPH": attrs.append(key) - values.append([vocab.morphology.add(v) for v in value]) + row = [vocab.morphology.add(v) for v in value] else: attrs.append(key) if not all(isinstance(v, str) for v in value): types = set([type(v) for v in value]) raise TypeError(Errors.E969.format(field=key, types=types)) from None - values.append([vocab.strings.add(v) for v in value]) - array = numpy.asarray(values, dtype="uint64") + row = [vocab.strings.add(v) for v in value] + values.append([numpy.array(v, dtype=numpy.int32).astype(numpy.uint64) if v < 0 else v for v in row]) + array = numpy.array(values, dtype=numpy.uint64) return attrs, array.T From e3ef798e032c416bbb299bb30eb2ad25993febd6 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Tue, 25 Oct 2022 14:53:18 +0200 Subject: [PATCH 07/14] Rename test helper method with non-test_ name (#11701) --- spacy/tests/test_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py index 2306cabb7..d91ed1201 100644 --- a/spacy/tests/test_models.py +++ b/spacy/tests/test_models.py @@ -23,7 +23,7 @@ def get_textcat_bow_kwargs(): def get_textcat_cnn_kwargs(): - return {"tok2vec": test_tok2vec(), "exclusive_classes": False, "nO": 13} + return {"tok2vec": make_test_tok2vec(), "exclusive_classes": False, "nO": 13} def get_all_params(model): @@ -65,7 +65,7 @@ def get_tok2vec_kwargs(): } -def test_tok2vec(): +def make_test_tok2vec(): return build_Tok2Vec_model(**get_tok2vec_kwargs()) From 1a5352e4238bec22a520a33a6d86a95e924807b9 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Mon, 22 Aug 2022 12:04:30 +0200 Subject: [PATCH 08/14] Clean up warnings in the test suite (#11331) --- spacy/tests/doc/test_doc_api.py | 5 +++-- spacy/tests/lang/ru/test_lemmatizer.py | 3 +++ spacy/tests/lang/uk/test_lemmatizer.py | 4 ++++ spacy/tests/matcher/test_phrase_matcher.py | 9 +++++---- spacy/training/initialize.py | 2 ++ 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py index 858c7cbb6..ed4d8b47b 100644 --- a/spacy/tests/doc/test_doc_api.py +++ b/spacy/tests/doc/test_doc_api.py @@ -2,6 +2,7 @@ import weakref import numpy import pytest +import warnings from thinc.api import NumpyOps, get_current_ops from spacy.attrs import DEP, ENT_IOB, ENT_TYPE, HEAD, IS_ALPHA, MORPH, POS @@ -528,9 +529,9 @@ def test_doc_from_array_sent_starts(en_vocab): # no warning using default attrs attrs = doc._get_array_attrs() arr = doc.to_array(attrs) - with pytest.warns(None) as record: + with warnings.catch_warnings(): + warnings.simplefilter("error") new_doc.from_array(attrs, arr) - assert len(record) == 0 # only SENT_START uses SENT_START attrs = [SENT_START] arr = doc.to_array(attrs) diff --git a/spacy/tests/lang/ru/test_lemmatizer.py b/spacy/tests/lang/ru/test_lemmatizer.py index 3810323bf..9ca7f441b 100644 --- a/spacy/tests/lang/ru/test_lemmatizer.py +++ b/spacy/tests/lang/ru/test_lemmatizer.py @@ -2,6 +2,9 @@ import pytest from spacy.tokens import Doc +pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning") + + def test_ru_doc_lemmatization(ru_lemmatizer): words = ["мама", "мыла", "раму"] pos = ["NOUN", "VERB", "NOUN"] diff --git a/spacy/tests/lang/uk/test_lemmatizer.py b/spacy/tests/lang/uk/test_lemmatizer.py index 4a787b2a6..57dd4198a 100644 --- a/spacy/tests/lang/uk/test_lemmatizer.py +++ b/spacy/tests/lang/uk/test_lemmatizer.py @@ -1,6 +1,10 @@ +import pytest from spacy.tokens import Doc +pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning") + + def test_uk_lemmatizer(uk_lemmatizer): """Check that the default uk lemmatizer runs.""" doc = Doc(uk_lemmatizer.vocab, words=["a", "b", "c"]) diff --git a/spacy/tests/matcher/test_phrase_matcher.py b/spacy/tests/matcher/test_phrase_matcher.py index f893d81f8..769d26cea 100644 --- a/spacy/tests/matcher/test_phrase_matcher.py +++ b/spacy/tests/matcher/test_phrase_matcher.py @@ -1,4 +1,5 @@ import pytest +import warnings import srsly from mock import Mock @@ -314,13 +315,13 @@ def test_phrase_matcher_validation(en_vocab): matcher.add("TEST1", [doc1]) with pytest.warns(UserWarning): matcher.add("TEST2", [doc2]) - with pytest.warns(None) as record: + with warnings.catch_warnings(): + warnings.simplefilter("error") matcher.add("TEST3", [doc3]) - assert not record.list matcher = PhraseMatcher(en_vocab, attr="POS", validate=True) - with pytest.warns(None) as record: + with warnings.catch_warnings(): + warnings.simplefilter("error") matcher.add("TEST4", [doc2]) - assert not record.list def test_attr_validation(en_vocab): diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py index b59288e38..9b77e06e8 100644 --- a/spacy/training/initialize.py +++ b/spacy/training/initialize.py @@ -335,3 +335,5 @@ def ensure_shape(vectors_loc): # store all the results in a list in memory lines2 = open_file(vectors_loc) yield from lines2 + lines2.close() + lines.close() From 571ef56fa9e0682ca9cad6cb31ad2bbd6e7d050e Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 9 Dec 2022 14:38:04 +0100 Subject: [PATCH 09/14] Modify similarity tests to avoid spurious warnings --- spacy/tests/vocab_vectors/test_similarity.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/spacy/tests/vocab_vectors/test_similarity.py b/spacy/tests/vocab_vectors/test_similarity.py index 47cd1f060..e1f95b28b 100644 --- a/spacy/tests/vocab_vectors/test_similarity.py +++ b/spacy/tests/vocab_vectors/test_similarity.py @@ -7,7 +7,7 @@ from ..util import get_cosine, add_vecs_to_vocab @pytest.fixture def vectors(): - return [("apple", [1, 2, 3]), ("orange", [-1, -2, -3])] + return [("apple", [1, 2, 3]), ("orange", [-1, -2, -5])] @pytest.fixture() @@ -71,19 +71,17 @@ def test_vectors_similarity_DD(vocab, vectors): def test_vectors_similarity_TD(vocab, vectors): [(word1, vec1), (word2, vec2)] = vectors doc = Doc(vocab, words=[word1, word2]) - with pytest.warns(UserWarning): - assert isinstance(doc.similarity(doc[0]), float) - assert isinstance(doc[0].similarity(doc), float) - assert doc.similarity(doc[0]) == doc[0].similarity(doc) + assert isinstance(doc.similarity(doc[0]), float) + assert isinstance(doc[0].similarity(doc), float) + assert doc.similarity(doc[0]) == doc[0].similarity(doc) def test_vectors_similarity_TS(vocab, vectors): [(word1, vec1), (word2, vec2)] = vectors doc = Doc(vocab, words=[word1, word2]) - with pytest.warns(UserWarning): - assert isinstance(doc[:2].similarity(doc[0]), float) - assert isinstance(doc[0].similarity(doc[-2]), float) - assert doc[:2].similarity(doc[0]) == doc[0].similarity(doc[:2]) + assert isinstance(doc[:2].similarity(doc[0]), float) + assert isinstance(doc[0].similarity(doc[-2]), float) + assert doc[:2].similarity(doc[0]) == doc[0].similarity(doc[:2]) def test_vectors_similarity_DS(vocab, vectors): From 41afbb2f89dd3ddade930812e5705c2c3312cc5c Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Thu, 3 Nov 2022 09:29:46 +0100 Subject: [PATCH 10/14] Modernize and simplify CI steps (#11738) * Use `build` instead of `python setup.py sdist` * Remove in-place build with `setup.py` * Remove `gpu` parameter and GPU tests * Keep `architecture` and `num_build_jobs` in azure steps with CI defaults * Fix use of `num_build_jobs` parameters * Remove now-unused `prefix` parameter * Test imports and CLI before installing test requirements * Remove `*.egg-info` directory in addition to source directory for an warning-free `import spacy` --- .github/azure-steps.yml | 71 +++++++++++++++++++---------------------- azure-pipelines.yml | 17 ---------- 2 files changed, 33 insertions(+), 55 deletions(-) diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml index 80c88b0b8..cd69fcc3a 100644 --- a/.github/azure-steps.yml +++ b/.github/azure-steps.yml @@ -1,9 +1,7 @@ parameters: python_version: '' - architecture: '' - prefix: '' - gpu: false - num_build_jobs: 1 + architecture: 'x64' + num_build_jobs: 2 steps: - task: UsePythonVersion@0 @@ -16,16 +14,16 @@ steps: displayName: 'Set variables' - script: | - ${{ parameters.prefix }} python -m pip install -U pip setuptools - ${{ parameters.prefix }} python -m pip install -U -r requirements.txt + python -m pip install -U build pip setuptools + python -m pip install -U -r requirements.txt displayName: "Install dependencies" - script: | - ${{ parameters.prefix }} python setup.py build_ext --inplace -j ${{ parameters.num_build_jobs }} - ${{ parameters.prefix }} python setup.py sdist --formats=gztar - displayName: "Compile and build sdist" + python -m build --sdist + displayName: "Build sdist" - - script: python -m mypy spacy + - script: | + python -m mypy spacy displayName: 'Run mypy' condition: ne(variables['python_version'], '3.10') @@ -34,35 +32,24 @@ steps: contents: "spacy" displayName: "Delete source directory" + - task: DeleteFiles@1 + inputs: + contents: "*.egg-info" + displayName: "Delete egg-info directory" + - script: | - ${{ parameters.prefix }} python -m pip freeze --exclude torch --exclude cupy-cuda110 > installed.txt - ${{ parameters.prefix }} python -m pip uninstall -y -r installed.txt + python -m pip freeze > installed.txt + python -m pip uninstall -y -r installed.txt displayName: "Uninstall all packages" - bash: | - ${{ parameters.prefix }} SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1) - ${{ parameters.prefix }} python -m pip install dist/$SDIST + SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1) + SPACY_NUM_BUILD_JOBS=${{ parameters.num_build_jobs }} python -m pip install dist/$SDIST displayName: "Install from sdist" - script: | - ${{ parameters.prefix }} python -m pip install -U -r requirements.txt - displayName: "Install test requirements" - - - script: | - ${{ parameters.prefix }} python -m pip install -U cupy-cuda110 -f https://github.com/cupy/cupy/releases/v9.0.0 - ${{ parameters.prefix }} python -m pip install "torch==1.7.1+cu110" -f https://download.pytorch.org/whl/torch_stable.html - displayName: "Install GPU requirements" - condition: eq(${{ parameters.gpu }}, true) - - - script: | - ${{ parameters.prefix }} python -m pytest --pyargs spacy - displayName: "Run CPU tests" - condition: eq(${{ parameters.gpu }}, false) - - - script: | - ${{ parameters.prefix }} python -m pytest --pyargs spacy -p spacy.tests.enable_gpu - displayName: "Run GPU tests" - condition: eq(${{ parameters.gpu }}, true) + python -W error -c "import spacy" + displayName: "Test import" - script: | python -m spacy download ca_core_news_sm @@ -105,13 +92,21 @@ steps: displayName: 'Test assemble CLI vectors warning' condition: eq(variables['python_version'], '3.8') + - script: | + python -m pip install -U -r requirements.txt + displayName: "Install test requirements" + + - script: | + python -m pytest --pyargs spacy -W error + displayName: "Run CPU tests" + + - script: | + python -m pip install --pre thinc-apple-ops + python -m pytest --pyargs spacy + displayName: "Run CPU tests with thinc-apple-ops" + condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.10')) + - script: | python .github/validate_universe_json.py website/meta/universe.json displayName: 'Test website/meta/universe.json' condition: eq(variables['python_version'], '3.8') - - - script: | - ${{ parameters.prefix }} python -m pip install thinc-apple-ops - ${{ parameters.prefix }} python -m pytest --pyargs spacy - displayName: "Run CPU tests with thinc-apple-ops" - condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.9')) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 71a793911..99be683b9 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -90,20 +90,3 @@ jobs: - template: .github/azure-steps.yml parameters: python_version: '$(python.version)' - architecture: 'x64' - -# - job: "TestGPU" -# dependsOn: "Validate" -# strategy: -# matrix: -# Python38LinuxX64_GPU: -# python.version: '3.8' -# pool: -# name: "LinuxX64_GPU" -# steps: -# - template: .github/azure-steps.yml -# parameters: -# python_version: '$(python.version)' -# architecture: 'x64' -# gpu: true -# num_build_jobs: 24 From e73755e49f75997975dbe0b7dc814997ad898ad4 Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Mon, 5 Dec 2022 17:43:23 +0900 Subject: [PATCH 11/14] Switch ubuntu-latest to ubuntu-20.04 in main tests (#11928) * Switch ubuntu-latest to ubuntu-20.04 in main tests * Only use 20.04 for 3.6 --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 99be683b9..3910bc343 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -39,7 +39,7 @@ jobs: matrix: # We're only running one platform per Python version to speed up builds Python36Linux: - imageName: "ubuntu-latest" + imageName: "ubuntu-20.04" python.version: "3.6" # Python36Windows: # imageName: "windows-latest" @@ -48,7 +48,7 @@ jobs: # imageName: "macos-latest" # python.version: "3.6" # Python37Linux: - # imageName: "ubuntu-latest" + # imageName: "ubuntu-20.04" # python.version: "3.7" Python37Windows: imageName: "windows-latest" From b449d355d54bb766acedf48b578801f0799812ec Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Mon, 12 Dec 2022 10:13:10 +0100 Subject: [PATCH 12/14] CI: Install thinc-apple-ops through extra (#11963) --- .github/azure-steps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml index cd69fcc3a..0c622bebd 100644 --- a/.github/azure-steps.yml +++ b/.github/azure-steps.yml @@ -101,7 +101,7 @@ steps: displayName: "Run CPU tests" - script: | - python -m pip install --pre thinc-apple-ops + python -m pip install 'spacy[apple]' python -m pytest --pyargs spacy displayName: "Run CPU tests with thinc-apple-ops" condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.10')) From 386a3e69da10d0449826e3dd030a20a41da87c85 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 9 Dec 2022 13:53:58 +0100 Subject: [PATCH 13/14] CI and precommit hooks: switch to flake8==5.0.4 --- .pre-commit-config.yaml | 2 +- azure-pipelines.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a7a12fd24..2612c9a81 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,7 +5,7 @@ repos: - id: black language_version: python3.7 - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.2 + rev: 5.0.4 hooks: - id: flake8 args: diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 3910bc343..57f5c8727 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -29,7 +29,7 @@ jobs: inputs: versionSpec: "3.7" - script: | - pip install flake8==3.9.2 + pip install flake8==5.0.4 python -m flake8 spacy --count --select=E901,E999,F821,F822,F823 --show-source --statistics displayName: "flake8" From 427de63f0a19433ce91f1ee6bfbeb8816d705661 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Tue, 13 Dec 2022 13:21:53 +0100 Subject: [PATCH 14/14] Set version to v3.2.5 --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index a04f7180c..a81c9ce29 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -1,6 +1,6 @@ # fmt: off __title__ = "spacy" -__version__ = "3.2.4" +__version__ = "3.2.5" __download_url__ = "https://github.com/explosion/spacy-models/releases/download" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" __projects__ = "https://github.com/explosion/projects"