From 6d58451a4d4c3e460b0396a90d5883e48b022f67 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 5 Feb 2016 11:50:11 +0100 Subject: [PATCH 1/5] * Fix requirement of thinc --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8964308d4..89834ee05 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ cython cymem>=1.30,<1.31 pathlib preshed>=0.46.1,<0.47.0 -thinc>=4.2.0,<4.3.0 +thinc>=5.0.0,<5.1.0 murmurhash>=0.26,<0.27 text-unidecode numpy From add8f07f6189749c0f17a58bfd20f511f1dc8abe Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 5 Feb 2016 12:19:51 +0100 Subject: [PATCH 2/5] * Conditionally link against openmp, on not-darwin --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index f92f8c50b..a177db5fa 100644 --- a/setup.py +++ b/setup.py @@ -79,6 +79,9 @@ if sys.platform.startswith('darwin'): compile_options['other'].append('-mmacosx-version-min=10.8') compile_options['other'].append('-stdlib=libc++') link_options['other'].append('-lc++') +else: + compile_options['other'].append('-fopenmp') + link_options['other'].append('-fopenmp') class build_ext_options: From af58f273b3d4f1640a550ee27a019ae3e7439c90 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 5 Feb 2016 12:20:29 +0100 Subject: [PATCH 3/5] * Fix spacy.language.pipe --- spacy/language.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index 58d137e2e..29a844c13 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -271,19 +271,19 @@ class Language(object): def pipe(self, texts, tag=True, parse=True, entity=True, n_threads=2, batch_size=1000): - stream = self.tokenizer.stream(texts, + stream = self.tokenizer.pipe(texts, n_threads=n_threads, batch_size=batch_size) if self.tagger and tag: - stream = self.tagger.stream(stream, + stream = self.tagger.pipe(stream, n_threads=n_threads, batch_size=batch_size) if self.matcher and entity: - stream = self.matcher.stream(stream, + stream = self.matcher.pipe(stream, n_threads=n_threads, batch_size=batch_size) if self.parser and parse: - stream = self.parser.stream(stream, + stream = self.parser.pipe(stream, n_threads=n_threads, batch_size=batch_size) if self.entity and entity: - stream = self.entity.stream(stream, + stream = self.entity.pipe(stream, n_threads=n_threads, batch_size=batch_size) for doc in stream: yield doc From 048dfe35aa87d9eff696105c03811e799984ed99 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 5 Feb 2016 12:20:42 +0100 Subject: [PATCH 4/5] * cimport cython.parallel --- spacy/syntax/parser.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/spacy/syntax/parser.pyx b/spacy/syntax/parser.pyx index 40f010f4a..c62ba6c6f 100644 --- a/spacy/syntax/parser.pyx +++ b/spacy/syntax/parser.pyx @@ -4,6 +4,7 @@ MALT-style dependency parser """ from __future__ import unicode_literals cimport cython +cimport cython.parallel from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF from cpython.exc cimport PyErr_CheckSignals From 249dccbe95523f523502a0b01a134c381952c21d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 5 Feb 2016 12:47:57 +0100 Subject: [PATCH 5/5] * Fix Language.pipe --- spacy/language.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/language.py b/spacy/language.py index 29a844c13..36a56413a 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -284,7 +284,7 @@ class Language(object): n_threads=n_threads, batch_size=batch_size) if self.entity and entity: stream = self.entity.pipe(stream, - n_threads=n_threads, batch_size=batch_size) + n_threads=1, batch_size=batch_size) for doc in stream: yield doc