mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 10:26:35 +03:00
60 lines
1.3 KiB
Python
60 lines
1.3 KiB
Python
|
from spacy.lang.en import English
|
||
|
from spacy.training import Example
|
||
|
from spacy.util import load_config_from_str
|
||
|
import pickle
|
||
|
|
||
|
|
||
|
CONFIG = """
|
||
|
[nlp]
|
||
|
lang = "en"
|
||
|
pipeline = ["tok2vec", "tagger"]
|
||
|
|
||
|
[components]
|
||
|
|
||
|
[components.tok2vec]
|
||
|
factory = "tok2vec"
|
||
|
|
||
|
[components.tok2vec.model]
|
||
|
@architectures = "spacy.Tok2Vec.v1"
|
||
|
|
||
|
[components.tok2vec.model.embed]
|
||
|
@architectures = "spacy.MultiHashEmbed.v1"
|
||
|
width = ${components.tok2vec.model.encode:width}
|
||
|
attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
|
||
|
rows = [5000,2500,2500,2500]
|
||
|
include_static_vectors = false
|
||
|
|
||
|
[components.tok2vec.model.encode]
|
||
|
@architectures = "spacy.MaxoutWindowEncoder.v1"
|
||
|
width = 96
|
||
|
depth = 4
|
||
|
window_size = 1
|
||
|
maxout_pieces = 3
|
||
|
|
||
|
[components.ner]
|
||
|
factory = "ner"
|
||
|
|
||
|
[components.tagger]
|
||
|
factory = "tagger"
|
||
|
|
||
|
[components.tagger.model]
|
||
|
@architectures = "spacy.Tagger.v1"
|
||
|
nO = null
|
||
|
|
||
|
[components.tagger.model.tok2vec]
|
||
|
@architectures = "spacy.Tok2VecListener.v1"
|
||
|
width = ${components.tok2vec.model.encode:width}
|
||
|
upstream = "*"
|
||
|
"""
|
||
|
|
||
|
|
||
|
def test_issue6950():
|
||
|
"""Test that the nlp object with initialized tok2vec with listeners pickles
|
||
|
correctly (and doesn't have lambdas).
|
||
|
"""
|
||
|
nlp = English.from_config(load_config_from_str(CONFIG))
|
||
|
nlp.initialize(lambda: [Example.from_dict(nlp.make_doc("hello"), {"tags": ["V"]})])
|
||
|
pickle.dumps(nlp)
|
||
|
nlp("hello")
|
||
|
pickle.dumps(nlp)
|