mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
💫 Use Blis for matrix multiplications (#2966)
Our epic matrix multiplication odyssey is drawing to a close... I've now finally got the Blis linear algebra routines in a self-contained Python package, with wheels for Windows, Linux and OSX. The only missing platform at the moment is Windows Python 2.7. The result is at https://github.com/explosion/cython-blis Thinc v7.0.0 will make the change to Blis. I've put a Thinc v7.0.0.dev0 up on PyPi so that we can test these changes with the CI, and even get them out to spacy-nightly, before Thinc v7.0.0 is released. This PR also updates the other dependencies to be in line with the current versions master is using. I've also resolved the msgpack deprecation problems, and gotten spaCy and Thinc up to date with the latest Cython. The point of switching to Blis is to have control of how our matrix multiplications are executed across platforms. When we were using numpy for this, a different library would be used on pip and conda, OSX would use Accelerate, etc. This would open up different bugs and performance problems, especially when multi-threading was introduced. With the change to Blis, we now strictly single-thread the matrix multiplications. This will make it much easier to use multiprocessing to parallelise the runtime, since we won't have nested parallelism problems to deal with. * Use blis * Use -2 arg to Cython * Update dependencies * Fix requirements * Update setup dependencies * Fix requirement typo * Fix msgpack errors * Remove Python27 test from Appveyor, until Blis works there * Auto-format setup.py * Fix murmurhash version
This commit is contained in:
parent
3832c8a2c1
commit
2c37e0ccf6
|
@ -5,14 +5,15 @@ environment:
|
||||||
# For Python versions available on Appveyor, see
|
# For Python versions available on Appveyor, see
|
||||||
# http://www.appveyor.com/docs/installed-software#python
|
# http://www.appveyor.com/docs/installed-software#python
|
||||||
|
|
||||||
- PYTHON: "C:\\Python27-x64"
|
#- PYTHON: "C:\\Python27-x64"
|
||||||
#- PYTHON: "C:\\Python34"
|
#- PYTHON: "C:\\Python34"
|
||||||
#- PYTHON: "C:\\Python35"
|
#- PYTHON: "C:\\Python35"
|
||||||
#- DISTUTILS_USE_SDK: "1"
|
#- DISTUTILS_USE_SDK: "1"
|
||||||
#- PYTHON: "C:\\Python34-x64"
|
#- PYTHON: "C:\\Python34-x64"
|
||||||
#- DISTUTILS_USE_SDK: "1"
|
#- DISTUTILS_USE_SDK: "1"
|
||||||
#- PYTHON: "C:\\Python35-x64"
|
- PYTHON: "C:\\Python35-x64"
|
||||||
- PYTHON: "C:\\Python36-x64"
|
- PYTHON: "C:\\Python36-x64"
|
||||||
|
- PYTHON: "C:\\Python37-x64"
|
||||||
|
|
||||||
install:
|
install:
|
||||||
# We need wheel installed to build wheels
|
# We need wheel installed to build wheels
|
||||||
|
|
|
@ -38,7 +38,7 @@ import argparse
|
||||||
HASH_FILE = 'cythonize.json'
|
HASH_FILE = 'cythonize.json'
|
||||||
|
|
||||||
|
|
||||||
def process_pyx(fromfile, tofile):
|
def process_pyx(fromfile, tofile, language_level='-2'):
|
||||||
print('Processing %s' % fromfile)
|
print('Processing %s' % fromfile)
|
||||||
try:
|
try:
|
||||||
from Cython.Compiler.Version import version as cython_version
|
from Cython.Compiler.Version import version as cython_version
|
||||||
|
@ -49,7 +49,7 @@ def process_pyx(fromfile, tofile):
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
flags = ['--fast-fail']
|
flags = ['--fast-fail', language_level]
|
||||||
if tofile.endswith('.cpp'):
|
if tofile.endswith('.cpp'):
|
||||||
flags += ['--cplus']
|
flags += ['--cplus']
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,15 @@
|
||||||
cython>=0.24,<0.28.0
|
cython>=0.25
|
||||||
numpy>=1.15.0
|
numpy>=1.15.0
|
||||||
cymem>=1.30,<1.32
|
cymem>=2.0.2,<2.1.0
|
||||||
preshed>=1.0.0,<2.0.0
|
preshed>=2.0.1,<2.1.0
|
||||||
thinc>=6.11.2,<6.12.0
|
thinc==7.0.0.dev0
|
||||||
murmurhash>=0.28,<0.29
|
blis>=0.2.2,<0.3.0
|
||||||
|
murmurhash>=0.28.0,<1.1.0
|
||||||
cytoolz>=0.9.0,<0.10.0
|
cytoolz>=0.9.0,<0.10.0
|
||||||
plac<1.0.0,>=0.9.6
|
plac<1.0.0,>=0.9.6
|
||||||
ujson>=1.35
|
ujson>=1.35
|
||||||
dill>=0.2,<0.3
|
dill>=0.2,<0.3
|
||||||
regex==2017.4.5
|
regex==2018.01.10
|
||||||
requests>=2.13.0,<3.0.0
|
requests>=2.13.0,<3.0.0
|
||||||
pytest>=3.6.0,<4.0.0
|
pytest>=3.6.0,<4.0.0
|
||||||
pytest-timeout>=1.3.0,<2.0.0
|
pytest-timeout>=1.3.0,<2.0.0
|
||||||
|
|
12
setup.py
12
setup.py
|
@ -197,13 +197,14 @@ def setup_package():
|
||||||
scripts=["bin/spacy"],
|
scripts=["bin/spacy"],
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"numpy>=1.15.0",
|
"numpy>=1.15.0",
|
||||||
"murmurhash>=0.28,<0.29",
|
"murmurhash>=0.28.0,<1.1.0",
|
||||||
"cymem>=1.30,<1.32",
|
"cymem>=2.0.2,<2.1.0",
|
||||||
"preshed>=1.0.0,<2.0.0",
|
"preshed>=2.0.1,<2.1.0",
|
||||||
"thinc>=6.11.2,<6.12.0",
|
"thinc==7.0.0.dev0",
|
||||||
|
"blis>=0.2.2,<0.3.0",
|
||||||
"plac<1.0.0,>=0.9.6",
|
"plac<1.0.0,>=0.9.6",
|
||||||
"ujson>=1.35",
|
"ujson>=1.35",
|
||||||
"regex==2017.4.5",
|
"regex==2018.01.10",
|
||||||
"dill>=0.2,<0.3",
|
"dill>=0.2,<0.3",
|
||||||
"requests>=2.13.0,<3.0.0",
|
"requests>=2.13.0,<3.0.0",
|
||||||
'pathlib==1.0.1; python_version < "3.4"',
|
'pathlib==1.0.1; python_version < "3.4"',
|
||||||
|
@ -214,6 +215,7 @@ def setup_package():
|
||||||
"cuda80": ["cupy-cuda80>=4.0"],
|
"cuda80": ["cupy-cuda80>=4.0"],
|
||||||
"cuda90": ["cupy-cuda90>=4.0"],
|
"cuda90": ["cupy-cuda90>=4.0"],
|
||||||
"cuda91": ["cupy-cuda91>=4.0"],
|
"cuda91": ["cupy-cuda91>=4.0"],
|
||||||
|
"cuda92": ["cupy-cuda92>=4.0"],
|
||||||
},
|
},
|
||||||
classifiers=[
|
classifiers=[
|
||||||
"Development Status :: 5 - Production/Stable",
|
"Development Status :: 5 - Production/Stable",
|
||||||
|
|
|
@ -234,7 +234,7 @@ class EntityRuler(object):
|
||||||
**kwargs: Other config paramters, mostly for consistency.
|
**kwargs: Other config paramters, mostly for consistency.
|
||||||
RETURNS (EntityRuler): The loaded entity ruler.
|
RETURNS (EntityRuler): The loaded entity ruler.
|
||||||
"""
|
"""
|
||||||
patterns = msgpack.loads(patterns_bytes)
|
patterns = msgpack.loads(patterns_bytes, raw=False)
|
||||||
self.add_patterns(patterns)
|
self.add_patterns(patterns)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
@ -243,7 +243,7 @@ class EntityRuler(object):
|
||||||
|
|
||||||
RETURNS (bytes): The serialized patterns.
|
RETURNS (bytes): The serialized patterns.
|
||||||
"""
|
"""
|
||||||
return msgpack.dumps(self.patterns)
|
return msgpack.dumps(self.patterns, use_bin_type=True)
|
||||||
|
|
||||||
def from_disk(self, path, **kwargs):
|
def from_disk(self, path, **kwargs):
|
||||||
"""Load the entity ruler from a file. Expects a file containing
|
"""Load the entity ruler from a file. Expects a file containing
|
||||||
|
@ -747,7 +747,7 @@ class Tagger(Pipe):
|
||||||
serialize['cfg'] = lambda: ujson.dumps(self.cfg)
|
serialize['cfg'] = lambda: ujson.dumps(self.cfg)
|
||||||
tag_map = OrderedDict(sorted(self.vocab.morphology.tag_map.items()))
|
tag_map = OrderedDict(sorted(self.vocab.morphology.tag_map.items()))
|
||||||
serialize['tag_map'] = lambda: msgpack.dumps(
|
serialize['tag_map'] = lambda: msgpack.dumps(
|
||||||
tag_map, use_bin_type=True, encoding='utf8')
|
tag_map, use_bin_type=True)
|
||||||
return util.to_bytes(serialize, exclude)
|
return util.to_bytes(serialize, exclude)
|
||||||
|
|
||||||
def from_bytes(self, bytes_data, **exclude):
|
def from_bytes(self, bytes_data, **exclude):
|
||||||
|
@ -765,7 +765,7 @@ class Tagger(Pipe):
|
||||||
self.model.from_bytes(b)
|
self.model.from_bytes(b)
|
||||||
|
|
||||||
def load_tag_map(b):
|
def load_tag_map(b):
|
||||||
tag_map = msgpack.loads(b, encoding='utf8')
|
tag_map = msgpack.loads(b, raw=False)
|
||||||
self.vocab.morphology = Morphology(
|
self.vocab.morphology = Morphology(
|
||||||
self.vocab.strings, tag_map=tag_map,
|
self.vocab.strings, tag_map=tag_map,
|
||||||
lemmatizer=self.vocab.morphology.lemmatizer,
|
lemmatizer=self.vocab.morphology.lemmatizer,
|
||||||
|
@ -785,7 +785,7 @@ class Tagger(Pipe):
|
||||||
serialize = OrderedDict((
|
serialize = OrderedDict((
|
||||||
('vocab', lambda p: self.vocab.to_disk(p)),
|
('vocab', lambda p: self.vocab.to_disk(p)),
|
||||||
('tag_map', lambda p: p.open('wb').write(msgpack.dumps(
|
('tag_map', lambda p: p.open('wb').write(msgpack.dumps(
|
||||||
tag_map, use_bin_type=True, encoding='utf8'))),
|
tag_map, use_bin_type=True))),
|
||||||
('model', lambda p: p.open('wb').write(self.model.to_bytes())),
|
('model', lambda p: p.open('wb').write(self.model.to_bytes())),
|
||||||
('cfg', lambda p: p.open('w').write(json_dumps(self.cfg)))
|
('cfg', lambda p: p.open('w').write(json_dumps(self.cfg)))
|
||||||
))
|
))
|
||||||
|
@ -803,7 +803,7 @@ class Tagger(Pipe):
|
||||||
|
|
||||||
def load_tag_map(p):
|
def load_tag_map(p):
|
||||||
with p.open('rb') as file_:
|
with p.open('rb') as file_:
|
||||||
tag_map = msgpack.loads(file_.read(), encoding='utf8')
|
tag_map = msgpack.loads(file_.read(), raw=False)
|
||||||
self.vocab.morphology = Morphology(
|
self.vocab.morphology = Morphology(
|
||||||
self.vocab.strings, tag_map=tag_map,
|
self.vocab.strings, tag_map=tag_map,
|
||||||
lemmatizer=self.vocab.morphology.lemmatizer,
|
lemmatizer=self.vocab.morphology.lemmatizer,
|
||||||
|
|
|
@ -25,8 +25,7 @@ from thinc.misc import LayerNorm
|
||||||
from thinc.neural.ops import CupyOps
|
from thinc.neural.ops import CupyOps
|
||||||
from thinc.neural.util import get_array_module
|
from thinc.neural.util import get_array_module
|
||||||
from thinc.linalg cimport Vec, VecVec
|
from thinc.linalg cimport Vec, VecVec
|
||||||
from thinc cimport openblas
|
cimport blis.cy
|
||||||
|
|
||||||
|
|
||||||
from .._ml import zero_init, PrecomputableAffine, Tok2Vec, flatten
|
from .._ml import zero_init, PrecomputableAffine, Tok2Vec, flatten
|
||||||
from .._ml import link_vectors_to_models, create_default_optimizer
|
from .._ml import link_vectors_to_models, create_default_optimizer
|
||||||
|
@ -107,10 +106,14 @@ cdef void predict_states(ActivationsC* A, StateC** states,
|
||||||
which = Vec.arg_max(&A.unmaxed[index], n.pieces)
|
which = Vec.arg_max(&A.unmaxed[index], n.pieces)
|
||||||
A.hiddens[i*n.hiddens + j] = A.unmaxed[index + which]
|
A.hiddens[i*n.hiddens + j] = A.unmaxed[index + which]
|
||||||
memset(A.scores, 0, n.states * n.classes * sizeof(float))
|
memset(A.scores, 0, n.states * n.classes * sizeof(float))
|
||||||
|
cdef double one = 1.0
|
||||||
# Compute hidden-to-output
|
# Compute hidden-to-output
|
||||||
openblas.simple_gemm(A.scores, n.states, n.classes,
|
blis.cy.gemm(blis.cy.NO_TRANSPOSE, blis.cy.TRANSPOSE,
|
||||||
A.hiddens, n.states, n.hiddens,
|
n.states, n.classes, n.hiddens, one,
|
||||||
W.hidden_weights, n.classes, n.hiddens, 0, 1)
|
<float*>A.hiddens, n.hiddens, 1,
|
||||||
|
<float*>W.hidden_weights, n.hiddens, 1,
|
||||||
|
one,
|
||||||
|
<float*>A.scores, n.classes, 1)
|
||||||
# Add bias
|
# Add bias
|
||||||
for i in range(n.states):
|
for i in range(n.states):
|
||||||
VecVec.add_i(&A.scores[i*n.classes],
|
VecVec.add_i(&A.scores[i*n.classes],
|
||||||
|
@ -132,8 +135,9 @@ cdef void sum_state_features(float* output,
|
||||||
else:
|
else:
|
||||||
idx = token_ids[f] * id_stride + f*O
|
idx = token_ids[f] * id_stride + f*O
|
||||||
feature = &cached[idx]
|
feature = &cached[idx]
|
||||||
openblas.simple_axpy(&output[b*O], O,
|
blis.cy.axpyv(blis.cy.NO_CONJUGATE, O, one,
|
||||||
feature, one)
|
<float*>feature, 1,
|
||||||
|
&output[b*O], 1)
|
||||||
token_ids += F
|
token_ids += F
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,6 @@ from thinc.misc import LayerNorm
|
||||||
from thinc.neural.ops import CupyOps
|
from thinc.neural.ops import CupyOps
|
||||||
from thinc.neural.util import get_array_module
|
from thinc.neural.util import get_array_module
|
||||||
from thinc.linalg cimport Vec, VecVec
|
from thinc.linalg cimport Vec, VecVec
|
||||||
from thinc cimport openblas
|
|
||||||
|
|
||||||
from ._parser_model cimport resize_activations, predict_states, arg_max_if_valid
|
from ._parser_model cimport resize_activations, predict_states, arg_max_if_valid
|
||||||
from ._parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
|
from ._parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
|
||||||
|
|
|
@ -566,7 +566,7 @@ def to_bytes(getters, exclude):
|
||||||
for key, getter in getters.items():
|
for key, getter in getters.items():
|
||||||
if key not in exclude:
|
if key not in exclude:
|
||||||
serialized[key] = getter()
|
serialized[key] = getter()
|
||||||
return msgpack.dumps(serialized, use_bin_type=True, encoding='utf8')
|
return msgpack.dumps(serialized, use_bin_type=True)
|
||||||
|
|
||||||
|
|
||||||
def from_bytes(bytes_data, setters, exclude):
|
def from_bytes(bytes_data, setters, exclude):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user