💫 Use Blis for matrix multiplications (#2966)

Our epic matrix multiplication odyssey is drawing to a close...

I've now finally got the Blis linear algebra routines in a self-contained Python package, with wheels for Windows, Linux and OSX. The only missing platform at the moment is Windows Python 2.7. The result is at https://github.com/explosion/cython-blis

Thinc v7.0.0 will make the change to Blis. I've put a Thinc v7.0.0.dev0 up on PyPi so that we can test these changes with the CI, and even get them out to spacy-nightly, before Thinc v7.0.0 is released. This PR also updates the other dependencies to be in line with the current versions master is using. I've also resolved the msgpack deprecation problems, and gotten spaCy and Thinc up to date with the latest Cython.

The point of switching to Blis is to have control of how our matrix multiplications are executed across platforms. When we were using numpy for this, a different library would be used on pip and conda, OSX would use Accelerate, etc. This would open up different bugs and performance problems, especially when multi-threading was introduced.

With the change to Blis, we now strictly single-thread the matrix multiplications. This will make it much easier to use multiprocessing to parallelise the runtime, since we won't have nested parallelism problems to deal with.

* Use blis

* Use -2 arg to Cython

* Update dependencies

* Fix requirements

* Update setup dependencies

* Fix requirement typo

* Fix msgpack errors

* Remove Python27 test from Appveyor, until Blis works there

* Auto-format setup.py

* Fix murmurhash version
This commit is contained in:
Matthew Honnibal 2018-11-27 00:44:04 +01:00 committed by GitHub
parent 3832c8a2c1
commit 2c37e0ccf6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 37 additions and 30 deletions

View File

@ -5,14 +5,15 @@ environment:
# For Python versions available on Appveyor, see
# http://www.appveyor.com/docs/installed-software#python
- PYTHON: "C:\\Python27-x64"
#- PYTHON: "C:\\Python27-x64"
#- PYTHON: "C:\\Python34"
#- PYTHON: "C:\\Python35"
#- DISTUTILS_USE_SDK: "1"
#- PYTHON: "C:\\Python34-x64"
#- DISTUTILS_USE_SDK: "1"
#- PYTHON: "C:\\Python35-x64"
- PYTHON: "C:\\Python35-x64"
- PYTHON: "C:\\Python36-x64"
- PYTHON: "C:\\Python37-x64"
install:
# We need wheel installed to build wheels

View File

@ -38,7 +38,7 @@ import argparse
HASH_FILE = 'cythonize.json'
def process_pyx(fromfile, tofile):
def process_pyx(fromfile, tofile, language_level='-2'):
print('Processing %s' % fromfile)
try:
from Cython.Compiler.Version import version as cython_version
@ -49,7 +49,7 @@ def process_pyx(fromfile, tofile):
except ImportError:
pass
flags = ['--fast-fail']
flags = ['--fast-fail', language_level]
if tofile.endswith('.cpp'):
flags += ['--cplus']

View File

@ -1,14 +1,15 @@
cython>=0.24,<0.28.0
cython>=0.25
numpy>=1.15.0
cymem>=1.30,<1.32
preshed>=1.0.0,<2.0.0
thinc>=6.11.2,<6.12.0
murmurhash>=0.28,<0.29
cymem>=2.0.2,<2.1.0
preshed>=2.0.1,<2.1.0
thinc==7.0.0.dev0
blis>=0.2.2,<0.3.0
murmurhash>=0.28.0,<1.1.0
cytoolz>=0.9.0,<0.10.0
plac<1.0.0,>=0.9.6
ujson>=1.35
dill>=0.2,<0.3
regex==2017.4.5
regex==2018.01.10
requests>=2.13.0,<3.0.0
pytest>=3.6.0,<4.0.0
pytest-timeout>=1.3.0,<2.0.0

View File

@ -197,13 +197,14 @@ def setup_package():
scripts=["bin/spacy"],
install_requires=[
"numpy>=1.15.0",
"murmurhash>=0.28,<0.29",
"cymem>=1.30,<1.32",
"preshed>=1.0.0,<2.0.0",
"thinc>=6.11.2,<6.12.0",
"murmurhash>=0.28.0,<1.1.0",
"cymem>=2.0.2,<2.1.0",
"preshed>=2.0.1,<2.1.0",
"thinc==7.0.0.dev0",
"blis>=0.2.2,<0.3.0",
"plac<1.0.0,>=0.9.6",
"ujson>=1.35",
"regex==2017.4.5",
"regex==2018.01.10",
"dill>=0.2,<0.3",
"requests>=2.13.0,<3.0.0",
'pathlib==1.0.1; python_version < "3.4"',
@ -214,6 +215,7 @@ def setup_package():
"cuda80": ["cupy-cuda80>=4.0"],
"cuda90": ["cupy-cuda90>=4.0"],
"cuda91": ["cupy-cuda91>=4.0"],
"cuda92": ["cupy-cuda92>=4.0"],
},
classifiers=[
"Development Status :: 5 - Production/Stable",

View File

@ -234,7 +234,7 @@ class EntityRuler(object):
**kwargs: Other config paramters, mostly for consistency.
RETURNS (EntityRuler): The loaded entity ruler.
"""
patterns = msgpack.loads(patterns_bytes)
patterns = msgpack.loads(patterns_bytes, raw=False)
self.add_patterns(patterns)
return self
@ -243,7 +243,7 @@ class EntityRuler(object):
RETURNS (bytes): The serialized patterns.
"""
return msgpack.dumps(self.patterns)
return msgpack.dumps(self.patterns, use_bin_type=True)
def from_disk(self, path, **kwargs):
"""Load the entity ruler from a file. Expects a file containing
@ -747,7 +747,7 @@ class Tagger(Pipe):
serialize['cfg'] = lambda: ujson.dumps(self.cfg)
tag_map = OrderedDict(sorted(self.vocab.morphology.tag_map.items()))
serialize['tag_map'] = lambda: msgpack.dumps(
tag_map, use_bin_type=True, encoding='utf8')
tag_map, use_bin_type=True)
return util.to_bytes(serialize, exclude)
def from_bytes(self, bytes_data, **exclude):
@ -765,7 +765,7 @@ class Tagger(Pipe):
self.model.from_bytes(b)
def load_tag_map(b):
tag_map = msgpack.loads(b, encoding='utf8')
tag_map = msgpack.loads(b, raw=False)
self.vocab.morphology = Morphology(
self.vocab.strings, tag_map=tag_map,
lemmatizer=self.vocab.morphology.lemmatizer,
@ -785,7 +785,7 @@ class Tagger(Pipe):
serialize = OrderedDict((
('vocab', lambda p: self.vocab.to_disk(p)),
('tag_map', lambda p: p.open('wb').write(msgpack.dumps(
tag_map, use_bin_type=True, encoding='utf8'))),
tag_map, use_bin_type=True))),
('model', lambda p: p.open('wb').write(self.model.to_bytes())),
('cfg', lambda p: p.open('w').write(json_dumps(self.cfg)))
))
@ -803,7 +803,7 @@ class Tagger(Pipe):
def load_tag_map(p):
with p.open('rb') as file_:
tag_map = msgpack.loads(file_.read(), encoding='utf8')
tag_map = msgpack.loads(file_.read(), raw=False)
self.vocab.morphology = Morphology(
self.vocab.strings, tag_map=tag_map,
lemmatizer=self.vocab.morphology.lemmatizer,

View File

@ -25,8 +25,7 @@ from thinc.misc import LayerNorm
from thinc.neural.ops import CupyOps
from thinc.neural.util import get_array_module
from thinc.linalg cimport Vec, VecVec
from thinc cimport openblas
cimport blis.cy
from .._ml import zero_init, PrecomputableAffine, Tok2Vec, flatten
from .._ml import link_vectors_to_models, create_default_optimizer
@ -107,10 +106,14 @@ cdef void predict_states(ActivationsC* A, StateC** states,
which = Vec.arg_max(&A.unmaxed[index], n.pieces)
A.hiddens[i*n.hiddens + j] = A.unmaxed[index + which]
memset(A.scores, 0, n.states * n.classes * sizeof(float))
cdef double one = 1.0
# Compute hidden-to-output
openblas.simple_gemm(A.scores, n.states, n.classes,
A.hiddens, n.states, n.hiddens,
W.hidden_weights, n.classes, n.hiddens, 0, 1)
blis.cy.gemm(blis.cy.NO_TRANSPOSE, blis.cy.TRANSPOSE,
n.states, n.classes, n.hiddens, one,
<float*>A.hiddens, n.hiddens, 1,
<float*>W.hidden_weights, n.hiddens, 1,
one,
<float*>A.scores, n.classes, 1)
# Add bias
for i in range(n.states):
VecVec.add_i(&A.scores[i*n.classes],
@ -132,8 +135,9 @@ cdef void sum_state_features(float* output,
else:
idx = token_ids[f] * id_stride + f*O
feature = &cached[idx]
openblas.simple_axpy(&output[b*O], O,
feature, one)
blis.cy.axpyv(blis.cy.NO_CONJUGATE, O, one,
<float*>feature, 1,
&output[b*O], 1)
token_ids += F

View File

@ -27,7 +27,6 @@ from thinc.misc import LayerNorm
from thinc.neural.ops import CupyOps
from thinc.neural.util import get_array_module
from thinc.linalg cimport Vec, VecVec
from thinc cimport openblas
from ._parser_model cimport resize_activations, predict_states, arg_max_if_valid
from ._parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss

View File

@ -566,7 +566,7 @@ def to_bytes(getters, exclude):
for key, getter in getters.items():
if key not in exclude:
serialized[key] = getter()
return msgpack.dumps(serialized, use_bin_type=True, encoding='utf8')
return msgpack.dumps(serialized, use_bin_type=True)
def from_bytes(bytes_data, setters, exclude):