From a5159ddcf58e583ffdc0b7cf4b911982a4107596 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 11 May 2019 19:03:51 +0200 Subject: [PATCH 1/6] Set version to v2.1.4.dev1 --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index d1906fd76..abfbfb058 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -4,7 +4,7 @@ # fmt: off __title__ = "spacy" -__version__ = "2.1.4.dev0" +__version__ = "2.1.4.dev1" __summary__ = "Industrial-strength Natural Language Processing (NLP) with Python and Cython" __uri__ = "https://spacy.io" __author__ = "Explosion AI" From f6e9394aa57ae2e086ca5966e03ba5789d992e02 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 11 May 2019 19:04:35 +0200 Subject: [PATCH 2/6] Fix push-tag script --- bin/push-tag.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/push-tag.sh b/bin/push-tag.sh index 57133499c..50b50c986 100755 --- a/bin/push-tag.sh +++ b/bin/push-tag.sh @@ -16,4 +16,4 @@ version=${version/\'/} version=${version/\"/} version=${version/\"/} git tag "v$version" -git push origin "v$version" --tags +git push origin "v$version" From 0bf6441863433575aebcbd0b238d27d95830c015 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 11 May 2019 19:15:26 +0200 Subject: [PATCH 3/6] Fix .iob converter (closes #3620) --- spacy/cli/converters/iob2json.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/spacy/cli/converters/iob2json.py b/spacy/cli/converters/iob2json.py index b986ea61e..b6fde31a4 100644 --- a/spacy/cli/converters/iob2json.py +++ b/spacy/cli/converters/iob2json.py @@ -2,23 +2,17 @@ from __future__ import unicode_literals import re +from cytoolz import partition_all from ...gold import iob_to_biluo -from ...util import minibatch def iob2json(input_data, n_sents=10, *args, **kwargs): """ Convert IOB files into JSON format for use with train cli. """ - docs = [] - for group in minibatch(docs, n_sents): - group = list(group) - first = group.pop(0) - to_extend = first["paragraphs"][0]["sentences"] - for sent in group[1:]: - to_extend.extend(sent["paragraphs"][0]["sentences"]) - docs.append(first) + sentences = read_iob(input_data.split("\n")) + docs = merge_sentences(sentences, n_sents) return docs @@ -27,7 +21,6 @@ def read_iob(raw_sents): for line in raw_sents: if not line.strip(): continue - # tokens = [t.split("|") for t in line.split()] tokens = [re.split("[^\w\-]", line.strip())] if len(tokens[0]) == 3: words, pos, iob = zip(*tokens) @@ -49,3 +42,15 @@ def read_iob(raw_sents): paragraphs = [{"sentences": [sent]} for sent in sentences] docs = [{"id": 0, "paragraphs": [para]} for para in paragraphs] return docs + + +def merge_sentences(docs, n_sents): + merged = [] + for group in partition_all(n_sents, docs): + group = list(group) + first = group.pop(0) + to_extend = first["paragraphs"][0]["sentences"] + for sent in group[1:]: + to_extend.extend(sent["paragraphs"][0]["sentences"]) + merged.append(first) + return merged From aea1c93a0503bb61c18a44ceb376a2600d0cace5 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 11 May 2019 21:12:09 +0200 Subject: [PATCH 4/6] Replace cytoolz.partition_all with util.minibatch --- spacy/cli/converters/iob2json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/cli/converters/iob2json.py b/spacy/cli/converters/iob2json.py index b6fde31a4..e0086afa0 100644 --- a/spacy/cli/converters/iob2json.py +++ b/spacy/cli/converters/iob2json.py @@ -2,9 +2,9 @@ from __future__ import unicode_literals import re -from cytoolz import partition_all from ...gold import iob_to_biluo +from ...util import minibatch def iob2json(input_data, n_sents=10, *args, **kwargs): @@ -46,7 +46,7 @@ def read_iob(raw_sents): def merge_sentences(docs, n_sents): merged = [] - for group in partition_all(n_sents, docs): + for group in minibatch(docs, size=n_sents): group = list(group) first = group.pop(0) to_extend = first["paragraphs"][0]["sentences"] From 3aceeeaaeb0d27d1cfa3f1d5b7671c49ccc9174a Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 11 May 2019 22:57:53 +0200 Subject: [PATCH 5/6] Set version to v2.1.4 --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index abfbfb058..5e7093606 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -4,7 +4,7 @@ # fmt: off __title__ = "spacy" -__version__ = "2.1.4.dev1" +__version__ = "2.1.4" __summary__ = "Industrial-strength Natural Language Processing (NLP) with Python and Cython" __uri__ = "https://spacy.io" __author__ = "Explosion AI" From f96af8526a927d2b18f70045d623fe0f3e2ec79d Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 11 May 2019 23:03:56 +0200 Subject: [PATCH 6/6] Merge branch 'spacy.io' [ci skip] --- website/docs/api/top-level.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md index e687cefa8..9d5bdc527 100644 --- a/website/docs/api/top-level.md +++ b/website/docs/api/top-level.md @@ -351,7 +351,7 @@ the two-letter language code. | `name` | unicode | Two-letter language code, e.g. `'en'`. | | `cls` | `Language` | The language class, e.g. `English`. | -### util.lang_class_is_loaded (#util.lang_class_is_loaded tag="function" new="2.1") +### util.lang_class_is_loaded {#util.lang_class_is_loaded tag="function" new="2.1"} Check whether a `Language` class is already loaded. `Language` classes are loaded lazily, to avoid expensive setup code associated with the language data.