mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Merge remote-tracking branch 'upstream/develop' into fix/patterns-init
This commit is contained in:
		
						commit
						9eb813a35d
					
				|  | @ -7,7 +7,7 @@ requires = [ | |||
|     "preshed>=3.0.2,<3.1.0", | ||||
|     "murmurhash>=0.28.0,<1.1.0", | ||||
|     "thinc>=8.0.0a43,<8.0.0a50", | ||||
|     "blis>=0.4.0,<0.5.0", | ||||
|     "blis>=0.4.0,<0.8.0", | ||||
|     "pytokenizations", | ||||
|     "pathy" | ||||
| ] | ||||
|  |  | |||
|  | @ -2,7 +2,7 @@ | |||
| cymem>=2.0.2,<2.1.0 | ||||
| preshed>=3.0.2,<3.1.0 | ||||
| thinc>=8.0.0a43,<8.0.0a50 | ||||
| blis>=0.4.0,<0.5.0 | ||||
| blis>=0.4.0,<0.8.0 | ||||
| ml_datasets==0.2.0a0 | ||||
| murmurhash>=0.28.0,<1.1.0 | ||||
| wasabi>=0.8.0,<1.1.0 | ||||
|  |  | |||
|  | @ -41,7 +41,7 @@ install_requires = | |||
|     cymem>=2.0.2,<2.1.0 | ||||
|     preshed>=3.0.2,<3.1.0 | ||||
|     thinc>=8.0.0a43,<8.0.0a50 | ||||
|     blis>=0.4.0,<0.5.0 | ||||
|     blis>=0.4.0,<0.8.0 | ||||
|     wasabi>=0.8.0,<1.1.0 | ||||
|     srsly>=2.3.0,<3.0.0 | ||||
|     catalogue>=2.0.1,<2.1.0 | ||||
|  |  | |||
|  | @ -1,6 +1,6 @@ | |||
| # fmt: off | ||||
| __title__ = "spacy-nightly" | ||||
| __version__ = "3.0.0a33" | ||||
| __version__ = "3.0.0a34" | ||||
| __release__ = True | ||||
| __download_url__ = "https://github.com/explosion/spacy-models/releases/download" | ||||
| __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" | ||||
|  |  | |||
|  | @ -458,10 +458,10 @@ class Errors: | |||
|     # TODO: fix numbering after merging develop into master | ||||
|     E900 = ("Patterns for component '{name}' not initialized. This can be fixed " | ||||
|             "by calling 'add_patterns' or 'initialize'.") | ||||
|     E092 = ("The sentence-per-line IOB/IOB2 file is not formatted correctly. " | ||||
|     E902 = ("The sentence-per-line IOB/IOB2 file is not formatted correctly. " | ||||
|             "Try checking whitespace and delimiters. See " | ||||
|             "https://nightly.spacy.io/api/cli#convert") | ||||
|     E093 = ("The token-per-line NER file is not formatted correctly. Try checking " | ||||
|     E903 = ("The token-per-line NER file is not formatted correctly. Try checking " | ||||
|             "whitespace and delimiters. See https://nightly.spacy.io/api/cli#convert") | ||||
|     E904 = ("Cannot initialize StaticVectors layer: nO dimension unset. This " | ||||
|             "dimension refers to the output width, after the linear projection " | ||||
|  |  | |||
|  | @ -289,13 +289,12 @@ class Lookups: | |||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/lookups#to_disk | ||||
|         """ | ||||
|         if len(self._tables): | ||||
|             path = ensure_path(path) | ||||
|             if not path.exists(): | ||||
|                 path.mkdir() | ||||
|             filepath = path / filename | ||||
|             with filepath.open("wb") as file_: | ||||
|                 file_.write(self.to_bytes()) | ||||
|         path = ensure_path(path) | ||||
|         if not path.exists(): | ||||
|             path.mkdir() | ||||
|         filepath = path / filename | ||||
|         with filepath.open("wb") as file_: | ||||
|             file_.write(self.to_bytes()) | ||||
| 
 | ||||
|     def from_disk( | ||||
|         self, path: Union[str, Path], filename: str = "lookups.bin", **kwargs | ||||
|  |  | |||
|  | @ -210,7 +210,7 @@ class Morphologizer(Tagger): | |||
| 
 | ||||
|         examples (Iterable[Examples]): The batch of examples. | ||||
|         scores: Scores representing the model's predictions. | ||||
|         RETUTNRS (Tuple[float, float]): The loss and the gradient. | ||||
|         RETURNS (Tuple[float, float]): The loss and the gradient. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/morphologizer#get_loss | ||||
|         """ | ||||
|  |  | |||
|  | @ -162,7 +162,7 @@ cdef class Pipe: | |||
| 
 | ||||
|         examples (Iterable[Examples]): The batch of examples. | ||||
|         scores: Scores representing the model's predictions. | ||||
|         RETUTNRS (Tuple[float, float]): The loss and the gradient. | ||||
|         RETURNS (Tuple[float, float]): The loss and the gradient. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/pipe#get_loss | ||||
|         """ | ||||
|  |  | |||
|  | @ -104,7 +104,7 @@ class SentenceRecognizer(Tagger): | |||
| 
 | ||||
|         examples (Iterable[Examples]): The batch of examples. | ||||
|         scores: Scores representing the model's predictions. | ||||
|         RETUTNRS (Tuple[float, float]): The loss and the gradient. | ||||
|         RETURNS (Tuple[float, float]): The loss and the gradient. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/sentencerecognizer#get_loss | ||||
|         """ | ||||
|  |  | |||
|  | @ -249,7 +249,7 @@ class Tagger(Pipe): | |||
| 
 | ||||
|         examples (Iterable[Examples]): The batch of examples. | ||||
|         scores: Scores representing the model's predictions. | ||||
|         RETUTNRS (Tuple[float, float]): The loss and the gradient. | ||||
|         RETURNS (Tuple[float, float]): The loss and the gradient. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/tagger#get_loss | ||||
|         """ | ||||
|  |  | |||
|  | @ -281,7 +281,7 @@ class TextCategorizer(Pipe): | |||
| 
 | ||||
|         examples (Iterable[Examples]): The batch of examples. | ||||
|         scores: Scores representing the model's predictions. | ||||
|         RETUTNRS (Tuple[float, float]): The loss and the gradient. | ||||
|         RETURNS (Tuple[float, float]): The loss and the gradient. | ||||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/textcategorizer#get_loss | ||||
|         """ | ||||
|  |  | |||
|  | @ -7,6 +7,15 @@ from spacy import util | |||
| from spacy import prefer_gpu, require_gpu | ||||
| from spacy.ml._precomputable_affine import PrecomputableAffine | ||||
| from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding | ||||
| from spacy.util import dot_to_object, SimpleFrozenList | ||||
| from thinc.api import Config, Optimizer, ConfigValidationError | ||||
| from spacy.training.batchers import minibatch_by_words | ||||
| from spacy.lang.en import English | ||||
| from spacy.lang.nl import Dutch | ||||
| from spacy.language import DEFAULT_CONFIG_PATH | ||||
| from spacy.schemas import ConfigSchemaTraining | ||||
| 
 | ||||
| from .util import get_random_doc | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture | ||||
|  | @ -157,3 +166,128 @@ def test_dot_to_dict(dot_notation, expected): | |||
|     result = util.dot_to_dict(dot_notation) | ||||
|     assert result == expected | ||||
|     assert util.dict_to_dot(result) == dot_notation | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize( | ||||
|     "doc_sizes, expected_batches", | ||||
|     [ | ||||
|         ([400, 400, 199], [3]), | ||||
|         ([400, 400, 199, 3], [4]), | ||||
|         ([400, 400, 199, 3, 200], [3, 2]), | ||||
|         ([400, 400, 199, 3, 1], [5]), | ||||
|         ([400, 400, 199, 3, 1, 1500], [5]),  # 1500 will be discarded | ||||
|         ([400, 400, 199, 3, 1, 200], [3, 3]), | ||||
|         ([400, 400, 199, 3, 1, 999], [3, 3]), | ||||
|         ([400, 400, 199, 3, 1, 999, 999], [3, 2, 1, 1]), | ||||
|         ([1, 2, 999], [3]), | ||||
|         ([1, 2, 999, 1], [4]), | ||||
|         ([1, 200, 999, 1], [2, 2]), | ||||
|         ([1, 999, 200, 1], [2, 2]), | ||||
|     ], | ||||
| ) | ||||
| def test_util_minibatch(doc_sizes, expected_batches): | ||||
|     docs = [get_random_doc(doc_size) for doc_size in doc_sizes] | ||||
|     tol = 0.2 | ||||
|     batch_size = 1000 | ||||
|     batches = list( | ||||
|         minibatch_by_words(docs, size=batch_size, tolerance=tol, discard_oversize=True) | ||||
|     ) | ||||
|     assert [len(batch) for batch in batches] == expected_batches | ||||
| 
 | ||||
|     max_size = batch_size + batch_size * tol | ||||
|     for batch in batches: | ||||
|         assert sum([len(doc) for doc in batch]) < max_size | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize( | ||||
|     "doc_sizes, expected_batches", | ||||
|     [ | ||||
|         ([400, 4000, 199], [1, 2]), | ||||
|         ([400, 400, 199, 3000, 200], [1, 4]), | ||||
|         ([400, 400, 199, 3, 1, 1500], [1, 5]), | ||||
|         ([400, 400, 199, 3000, 2000, 200, 200], [1, 1, 3, 2]), | ||||
|         ([1, 2, 9999], [1, 2]), | ||||
|         ([2000, 1, 2000, 1, 1, 1, 2000], [1, 1, 1, 4]), | ||||
|     ], | ||||
| ) | ||||
| def test_util_minibatch_oversize(doc_sizes, expected_batches): | ||||
|     """ Test that oversized documents are returned in their own batch""" | ||||
|     docs = [get_random_doc(doc_size) for doc_size in doc_sizes] | ||||
|     tol = 0.2 | ||||
|     batch_size = 1000 | ||||
|     batches = list( | ||||
|         minibatch_by_words(docs, size=batch_size, tolerance=tol, discard_oversize=False) | ||||
|     ) | ||||
|     assert [len(batch) for batch in batches] == expected_batches | ||||
| 
 | ||||
| 
 | ||||
| def test_util_dot_section(): | ||||
|     cfg_string = """ | ||||
|     [nlp] | ||||
|     lang = "en" | ||||
|     pipeline = ["textcat"] | ||||
| 
 | ||||
|     [components] | ||||
| 
 | ||||
|     [components.textcat] | ||||
|     factory = "textcat" | ||||
| 
 | ||||
|     [components.textcat.model] | ||||
|     @architectures = "spacy.TextCatBOW.v1" | ||||
|     exclusive_classes = true | ||||
|     ngram_size = 1 | ||||
|     no_output_layer = false | ||||
|     """ | ||||
|     nlp_config = Config().from_str(cfg_string) | ||||
|     en_nlp = util.load_model_from_config(nlp_config, auto_fill=True) | ||||
|     default_config = Config().from_disk(DEFAULT_CONFIG_PATH) | ||||
|     default_config["nlp"]["lang"] = "nl" | ||||
|     nl_nlp = util.load_model_from_config(default_config, auto_fill=True) | ||||
|     # Test that creation went OK | ||||
|     assert isinstance(en_nlp, English) | ||||
|     assert isinstance(nl_nlp, Dutch) | ||||
|     assert nl_nlp.pipe_names == [] | ||||
|     assert en_nlp.pipe_names == ["textcat"] | ||||
|     # not exclusive_classes | ||||
|     assert en_nlp.get_pipe("textcat").model.attrs["multi_label"] is False | ||||
|     # Test that default values got overwritten | ||||
|     assert en_nlp.config["nlp"]["pipeline"] == ["textcat"] | ||||
|     assert nl_nlp.config["nlp"]["pipeline"] == []  # default value [] | ||||
|     # Test proper functioning of 'dot_to_object' | ||||
|     with pytest.raises(KeyError): | ||||
|         dot_to_object(en_nlp.config, "nlp.pipeline.tagger") | ||||
|     with pytest.raises(KeyError): | ||||
|         dot_to_object(en_nlp.config, "nlp.unknownattribute") | ||||
|     T = util.registry.resolve(nl_nlp.config["training"], schema=ConfigSchemaTraining) | ||||
|     assert isinstance(dot_to_object({"training": T}, "training.optimizer"), Optimizer) | ||||
| 
 | ||||
| 
 | ||||
| def test_simple_frozen_list(): | ||||
|     t = SimpleFrozenList(["foo", "bar"]) | ||||
|     assert t == ["foo", "bar"] | ||||
|     assert t.index("bar") == 1  # okay method | ||||
|     with pytest.raises(NotImplementedError): | ||||
|         t.append("baz") | ||||
|     with pytest.raises(NotImplementedError): | ||||
|         t.sort() | ||||
|     with pytest.raises(NotImplementedError): | ||||
|         t.extend(["baz"]) | ||||
|     with pytest.raises(NotImplementedError): | ||||
|         t.pop() | ||||
|     t = SimpleFrozenList(["foo", "bar"], error="Error!") | ||||
|     with pytest.raises(NotImplementedError): | ||||
|         t.append("baz") | ||||
| 
 | ||||
| 
 | ||||
| def test_resolve_dot_names(): | ||||
|     config = { | ||||
|         "training": {"optimizer": {"@optimizers": "Adam.v1"}}, | ||||
|         "foo": {"bar": "training.optimizer", "baz": "training.xyz"}, | ||||
|     } | ||||
|     result = util.resolve_dot_names(config, ["training.optimizer"]) | ||||
|     assert isinstance(result[0], Optimizer) | ||||
|     with pytest.raises(ConfigValidationError) as e: | ||||
|         util.resolve_dot_names(config, ["training.xyz", "training.optimizer"]) | ||||
|     errors = e.value.errors | ||||
|     assert len(errors) == 1 | ||||
|     assert errors[0]["loc"] == ["training", "xyz"] | ||||
|  |  | |||
|  | @ -1,137 +0,0 @@ | |||
| import pytest | ||||
| 
 | ||||
| from spacy import util | ||||
| from spacy.util import dot_to_object, SimpleFrozenList | ||||
| from thinc.api import Config, Optimizer, ConfigValidationError | ||||
| from spacy.training.batchers import minibatch_by_words | ||||
| from spacy.lang.en import English | ||||
| from spacy.lang.nl import Dutch | ||||
| from spacy.language import DEFAULT_CONFIG_PATH | ||||
| from spacy.schemas import ConfigSchemaTraining | ||||
| 
 | ||||
| from .util import get_random_doc | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize( | ||||
|     "doc_sizes, expected_batches", | ||||
|     [ | ||||
|         ([400, 400, 199], [3]), | ||||
|         ([400, 400, 199, 3], [4]), | ||||
|         ([400, 400, 199, 3, 200], [3, 2]), | ||||
|         ([400, 400, 199, 3, 1], [5]), | ||||
|         ([400, 400, 199, 3, 1, 1500], [5]),  # 1500 will be discarded | ||||
|         ([400, 400, 199, 3, 1, 200], [3, 3]), | ||||
|         ([400, 400, 199, 3, 1, 999], [3, 3]), | ||||
|         ([400, 400, 199, 3, 1, 999, 999], [3, 2, 1, 1]), | ||||
|         ([1, 2, 999], [3]), | ||||
|         ([1, 2, 999, 1], [4]), | ||||
|         ([1, 200, 999, 1], [2, 2]), | ||||
|         ([1, 999, 200, 1], [2, 2]), | ||||
|     ], | ||||
| ) | ||||
| def test_util_minibatch(doc_sizes, expected_batches): | ||||
|     docs = [get_random_doc(doc_size) for doc_size in doc_sizes] | ||||
|     tol = 0.2 | ||||
|     batch_size = 1000 | ||||
|     batches = list( | ||||
|         minibatch_by_words(docs, size=batch_size, tolerance=tol, discard_oversize=True) | ||||
|     ) | ||||
|     assert [len(batch) for batch in batches] == expected_batches | ||||
| 
 | ||||
|     max_size = batch_size + batch_size * tol | ||||
|     for batch in batches: | ||||
|         assert sum([len(doc) for doc in batch]) < max_size | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize( | ||||
|     "doc_sizes, expected_batches", | ||||
|     [ | ||||
|         ([400, 4000, 199], [1, 2]), | ||||
|         ([400, 400, 199, 3000, 200], [1, 4]), | ||||
|         ([400, 400, 199, 3, 1, 1500], [1, 5]), | ||||
|         ([400, 400, 199, 3000, 2000, 200, 200], [1, 1, 3, 2]), | ||||
|         ([1, 2, 9999], [1, 2]), | ||||
|         ([2000, 1, 2000, 1, 1, 1, 2000], [1, 1, 1, 4]), | ||||
|     ], | ||||
| ) | ||||
| def test_util_minibatch_oversize(doc_sizes, expected_batches): | ||||
|     """ Test that oversized documents are returned in their own batch""" | ||||
|     docs = [get_random_doc(doc_size) for doc_size in doc_sizes] | ||||
|     tol = 0.2 | ||||
|     batch_size = 1000 | ||||
|     batches = list( | ||||
|         minibatch_by_words(docs, size=batch_size, tolerance=tol, discard_oversize=False) | ||||
|     ) | ||||
|     assert [len(batch) for batch in batches] == expected_batches | ||||
| 
 | ||||
| 
 | ||||
| def test_util_dot_section(): | ||||
|     cfg_string = """ | ||||
|     [nlp] | ||||
|     lang = "en" | ||||
|     pipeline = ["textcat"] | ||||
| 
 | ||||
|     [components] | ||||
| 
 | ||||
|     [components.textcat] | ||||
|     factory = "textcat" | ||||
| 
 | ||||
|     [components.textcat.model] | ||||
|     @architectures = "spacy.TextCatBOW.v1" | ||||
|     exclusive_classes = true | ||||
|     ngram_size = 1 | ||||
|     no_output_layer = false | ||||
|     """ | ||||
|     nlp_config = Config().from_str(cfg_string) | ||||
|     en_nlp = util.load_model_from_config(nlp_config, auto_fill=True) | ||||
|     default_config = Config().from_disk(DEFAULT_CONFIG_PATH) | ||||
|     default_config["nlp"]["lang"] = "nl" | ||||
|     nl_nlp = util.load_model_from_config(default_config, auto_fill=True) | ||||
|     # Test that creation went OK | ||||
|     assert isinstance(en_nlp, English) | ||||
|     assert isinstance(nl_nlp, Dutch) | ||||
|     assert nl_nlp.pipe_names == [] | ||||
|     assert en_nlp.pipe_names == ["textcat"] | ||||
|     # not exclusive_classes | ||||
|     assert en_nlp.get_pipe("textcat").model.attrs["multi_label"] is False | ||||
|     # Test that default values got overwritten | ||||
|     assert en_nlp.config["nlp"]["pipeline"] == ["textcat"] | ||||
|     assert nl_nlp.config["nlp"]["pipeline"] == []  # default value [] | ||||
|     # Test proper functioning of 'dot_to_object' | ||||
|     with pytest.raises(KeyError): | ||||
|         dot_to_object(en_nlp.config, "nlp.pipeline.tagger") | ||||
|     with pytest.raises(KeyError): | ||||
|         dot_to_object(en_nlp.config, "nlp.unknownattribute") | ||||
|     T = util.registry.resolve(nl_nlp.config["training"], schema=ConfigSchemaTraining) | ||||
|     assert isinstance(dot_to_object({"training": T}, "training.optimizer"), Optimizer) | ||||
| 
 | ||||
| 
 | ||||
| def test_simple_frozen_list(): | ||||
|     t = SimpleFrozenList(["foo", "bar"]) | ||||
|     assert t == ["foo", "bar"] | ||||
|     assert t.index("bar") == 1  # okay method | ||||
|     with pytest.raises(NotImplementedError): | ||||
|         t.append("baz") | ||||
|     with pytest.raises(NotImplementedError): | ||||
|         t.sort() | ||||
|     with pytest.raises(NotImplementedError): | ||||
|         t.extend(["baz"]) | ||||
|     with pytest.raises(NotImplementedError): | ||||
|         t.pop() | ||||
|     t = SimpleFrozenList(["foo", "bar"], error="Error!") | ||||
|     with pytest.raises(NotImplementedError): | ||||
|         t.append("baz") | ||||
| 
 | ||||
| 
 | ||||
| def test_resolve_dot_names(): | ||||
|     config = { | ||||
|         "training": {"optimizer": {"@optimizers": "Adam.v1"}}, | ||||
|         "foo": {"bar": "training.optimizer", "baz": "training.xyz"}, | ||||
|     } | ||||
|     result = util.resolve_dot_names(config, ["training.optimizer"]) | ||||
|     assert isinstance(result[0], Optimizer) | ||||
|     with pytest.raises(ConfigValidationError) as e: | ||||
|         util.resolve_dot_names(config, ["training.xyz", "training.optimizer"]) | ||||
|     errors = e.value.errors | ||||
|     assert len(errors) == 1 | ||||
|     assert errors[0]["loc"] == ["training", "xyz"] | ||||
|  | @ -5,7 +5,7 @@ import copy | |||
| from functools import partial | ||||
| from pydantic import BaseModel, StrictStr | ||||
| 
 | ||||
| from ..util import registry, logger | ||||
| from ..util import registry | ||||
| from ..tokens import Doc | ||||
| from .example import Example | ||||
| 
 | ||||
|  | @ -119,9 +119,8 @@ def make_orth_variants( | |||
|     orig_token_dict = copy.deepcopy(token_dict) | ||||
|     ndsv = orth_variants.get("single", []) | ||||
|     ndpv = orth_variants.get("paired", []) | ||||
|     logger.debug(f"Data augmentation: {len(ndsv)} single / {len(ndpv)} paired variants") | ||||
|     words = token_dict.get("words", []) | ||||
|     tags = token_dict.get("tags", []) | ||||
|     words = token_dict.get("ORTH", []) | ||||
|     tags = token_dict.get("TAG", []) | ||||
|     # keep unmodified if words or tags are not defined | ||||
|     if words and tags: | ||||
|         if lower: | ||||
|  | @ -154,8 +153,8 @@ def make_orth_variants( | |||
|                             if words[word_idx] in pair: | ||||
|                                 pair_idx = pair.index(words[word_idx]) | ||||
|                     words[word_idx] = punct_choices[punct_idx][pair_idx] | ||||
|         token_dict["words"] = words | ||||
|         token_dict["tags"] = tags | ||||
|         token_dict["ORTH"] = words | ||||
|         token_dict["TAG"] = tags | ||||
|     # modify raw | ||||
|     if raw is not None: | ||||
|         variants = [] | ||||
|  |  | |||
|  | @ -103,7 +103,7 @@ def conll_ner_to_docs( | |||
|             lines = [line.strip() for line in conll_sent.split("\n") if line.strip()] | ||||
|             cols = list(zip(*[line.split() for line in lines])) | ||||
|             if len(cols) < 2: | ||||
|                 raise ValueError(Errors.E093) | ||||
|                 raise ValueError(Errors.E903) | ||||
|             length = len(cols[0]) | ||||
|             words.extend(cols[0]) | ||||
|             sent_starts.extend([True] + [False] * (length - 1)) | ||||
|  |  | |||
|  | @ -46,7 +46,7 @@ def read_iob(raw_sents, vocab, n_sents): | |||
|                 sent_words, sent_iob = zip(*sent_tokens) | ||||
|                 sent_tags = ["-"] * len(sent_words) | ||||
|             else: | ||||
|                 raise ValueError(Errors.E092) | ||||
|                 raise ValueError(Errors.E902) | ||||
|             words.extend(sent_words) | ||||
|             tags.extend(sent_tags) | ||||
|             iob.extend(sent_iob) | ||||
|  |  | |||
|  | @ -445,9 +445,9 @@ cdef class Vocab: | |||
|         setters = ["strings", "vectors"] | ||||
|         if "strings" not in exclude: | ||||
|             self.strings.to_disk(path / "strings.json") | ||||
|         if "vectors" not in "exclude" and self.vectors is not None: | ||||
|         if "vectors" not in "exclude": | ||||
|             self.vectors.to_disk(path) | ||||
|         if "lookups" not in "exclude" and self.lookups is not None: | ||||
|         if "lookups" not in "exclude": | ||||
|             self.lookups.to_disk(path) | ||||
| 
 | ||||
|     def from_disk(self, path, *, exclude=tuple()): | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user