From f0e7da64780758df7b2e97137387ebdd1441cf2b Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 13 Oct 2018 23:53:26 +0200 Subject: [PATCH 1/8] Fix formatting and consistency --- website/api/doc.jade | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/api/doc.jade b/website/api/doc.jade index 574865fb3..7d304926c 100644 --- a/website/api/doc.jade +++ b/website/api/doc.jade @@ -581,7 +581,7 @@ p | web applications. +aside-code("Example"). - doc = nlp('Alice ate the pizza.') + doc = nlp(u'Alice ate the pizza.') trees = doc.print_tree() # {'modifiers': [ # {'modifiers': [], 'NE': 'PERSON', 'word': 'Alice', 'arc': 'nsubj', 'POS_coarse': 'PROPN', 'POS_fine': 'NNP', 'lemma': 'Alice'}, @@ -615,8 +615,8 @@ p | document. +aside-code("Example"). - tokens = nlp(u'Mr. Best flew to New York on Saturday morning.') - ents = list(tokens.ents) + doc = nlp(u'Mr. Best flew to New York on Saturday morning.') + ents = list(doc.ents) assert ents[0].label == 346 assert ents[0].label_ == 'PERSON' assert ents[0].text == 'Mr. Best' From 23d5b4ff5b113cfcbe7cce5230a816e6a16ec2f2 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 13 Oct 2018 23:53:33 +0200 Subject: [PATCH 2/8] Update docs for new version [ci skip] --- website/api/cli.jade | 1 + website/api/span.jade | 24 ++++++++++++++++++++++++ website/models/_data.json | 13 ++----------- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/website/api/cli.jade b/website/api/cli.jade index dcc1a0551..28c76c09c 100644 --- a/website/api/cli.jade +++ b/website/api/cli.jade @@ -346,6 +346,7 @@ p +row +cell #[code --verbose] + +tag-new("2.0.13") +cell flag +cell Show more detail message during training. diff --git a/website/api/span.jade b/website/api/span.jade index 681664269..7be1ad4a4 100644 --- a/website/api/span.jade +++ b/website/api/span.jade @@ -349,6 +349,30 @@ p Retokenize the document, such that the span is merged into a single token. +cell #[code Token] +cell The newly merged token. ++h(2, "ents") Span.ents + +tag property + +tag-model("NER") + +p + | Iterate over the entities in the span. Yields named-entity + | #[code Span] objects, if the entity recognizer has been applied to the + | parent document. + ++aside-code("Example"). + doc = nlp(u'Mr. Best flew to New York on Saturday morning.') + span = doc[0:6] + ents = list(span.ents) + assert ents[0].label == 346 + assert ents[0].label_ == 'PERSON' + assert ents[0].text == 'Mr. Best' + ++table(["Name", "Type", "Description"]) + +row("foot") + +cell yields + +cell #[code Span] + +cell Entities in the document. + + +h(2, "as_doc") Span.as_doc p diff --git a/website/models/_data.json b/website/models/_data.json index 4eda2564a..d1f3862f5 100644 --- a/website/models/_data.json +++ b/website/models/_data.json @@ -27,17 +27,6 @@ } }, - "comparison": { - "title": "Model Comparison", - "teaser": "Compare spaCy's statistical models and their accuracy.", - "tag": "experimental", - "compare_models": true, - "default_models": { - "model1": "en_core_web_sm", - "model2": "en_core_web_lg" - } - }, - "MODELS": { "en": ["en_core_web_sm", "en_core_web_md", "en_core_web_lg", "en_vectors_web_lg"], "de": ["de_core_news_sm"], @@ -122,6 +111,8 @@ "fa": "Persian", "ur": "Urdu", "tt": "Tatar", + "te": "Telugu", + "si": "Sinhala", "ga": "Irish", "bn": "Bengali", "hi": "Hindi", From 30aa7f8b2039fdf627d243700a6a16fae7753409 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 13 Oct 2018 23:55:50 +0200 Subject: [PATCH 3/8] Increment version [ci skip] --- website/_harp.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/_harp.json b/website/_harp.json index f866c554d..8867cf61c 100644 --- a/website/_harp.json +++ b/website/_harp.json @@ -14,7 +14,7 @@ "MODELS_REPO": "explosion/spacy-models", "SPACY_VERSION": "2.0", - "BINDER_VERSION": "2.0.12", + "BINDER_VERSION": "2.0.13", "SOCIAL": { "twitter": "spacy_io", From ac4cadd31d8f0da395c44d416a65003d74292c07 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sun, 14 Oct 2018 00:04:37 +0200 Subject: [PATCH 4/8] Add info on wheels [ci skip] --- README.rst | 5 +++-- website/usage/_install/_instructions.jade | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index eadf1f018..f90cf933e 100644 --- a/README.rst +++ b/README.rst @@ -113,7 +113,7 @@ the `documentation `_. ==================== === **Operating system** macOS / OS X, Linux, Windows (Cygwin, MinGW, Visual Studio) **Python version** CPython 2.7, 3.4+. Only 64 bit. -**Package managers** `pip`_ (source packages only), `conda`_ (via ``conda-forge``) +**Package managers** `pip`_, `conda`_ (via ``conda-forge``) ==================== === .. _pip: https://pypi.python.org/pypi/spacy @@ -122,7 +122,8 @@ the `documentation `_. pip --- -Using pip, spaCy releases are currently only available as source packages. +Using pip, spaCy releases are available as source packages and binary wheels +(as of ``v2.0.13``). .. code:: bash diff --git a/website/usage/_install/_instructions.jade b/website/usage/_install/_instructions.jade index fb9247bb6..12d175723 100644 --- a/website/usage/_install/_instructions.jade +++ b/website/usage/_install/_instructions.jade @@ -3,7 +3,9 @@ +h(3, "pip") pip +badge("https://img.shields.io/pypi/v/spacy.svg?style=flat-square", "https://pypi.python.org/pypi/spacy") -p Using pip, spaCy releases are currently only available as source packages. +p + | Using pip, spaCy releases are available as source packages and binary + | wheels (as of #[code v2.0.13]). +code(false, "bash"). pip install -U spacy From cb075c8e7266163bc581fd509cc430082052d23a Mon Sep 17 00:00:00 2001 From: Keshan Date: Sun, 14 Oct 2018 03:36:40 +0530 Subject: [PATCH 5/8] Adding "This is a sentence" example to Sinhala (#2846) --- spacy/lang/si/examples.py | 1 + 1 file changed, 1 insertion(+) diff --git a/spacy/lang/si/examples.py b/spacy/lang/si/examples.py index 68862d3cf..842dfdd7e 100644 --- a/spacy/lang/si/examples.py +++ b/spacy/lang/si/examples.py @@ -11,6 +11,7 @@ Example sentences to test spaCy and its language models. sentences = [ + "මෙය වාක්‍යයකි.", "ඔබ කවුද?", "ගූගල් සමාගම ඩොලර් මිලියන 500 කට එම ආයතනය මිලදී ගන්නා ලදී.", "කොළඹ ශ්‍රී ලංකාවේ ප්‍රධානතම නගරය යි.", From 8f393b1dcf81930dba5df29be7c63c267edeb01d Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sun, 14 Oct 2018 00:48:04 +0200 Subject: [PATCH 6/8] Add wheels badge --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index f90cf933e..0d38d86f3 100644 --- a/README.rst +++ b/README.rst @@ -32,9 +32,9 @@ integration. It's commercial open-source software, released under the MIT licens :target: https://anaconda.org/conda-forge/spacy :alt: conda Version -.. image:: https://img.shields.io/badge/chat-join%20%E2%86%92-09a3d5.svg?style=flat-square&logo=gitter-white - :target: https://gitter.im/explosion/spaCy - :alt: spaCy on Gitter +.. image:: https://img.shields.io/badge/wheels-%E2%9C%93-green.svg?longCache=true&style=flat-square&logo=python&logoColor=white + :target: https://github.com/explosion/wheelwright/releases + :alt: spaCy wheels .. image:: https://img.shields.io/twitter/follow/spacy_io.svg?style=social&label=Follow :target: https://twitter.com/spacy_io From 3decf44dd3a10c2e3e1520d9e0cafbf777c67574 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sun, 14 Oct 2018 00:54:19 +0200 Subject: [PATCH 7/8] Update badge [ci skip] --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 0d38d86f3..c7debd679 100644 --- a/README.rst +++ b/README.rst @@ -32,9 +32,9 @@ integration. It's commercial open-source software, released under the MIT licens :target: https://anaconda.org/conda-forge/spacy :alt: conda Version -.. image:: https://img.shields.io/badge/wheels-%E2%9C%93-green.svg?longCache=true&style=flat-square&logo=python&logoColor=white +.. image:: https://img.shields.io/badge/wheels-%E2%9C%93-4c1.svg?longCache=true&style=flat-square&logo=python&logoColor=white :target: https://github.com/explosion/wheelwright/releases - :alt: spaCy wheels + :alt: Python wheels .. image:: https://img.shields.io/twitter/follow/spacy_io.svg?style=social&label=Follow :target: https://twitter.com/spacy_io From 76c43380e41a40ec9db830da13bebfed353ee2d6 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sun, 14 Oct 2018 01:00:55 +0200 Subject: [PATCH 8/8] Update README.rst [ci skip] --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index c7debd679..5633ef86c 100644 --- a/README.rst +++ b/README.rst @@ -5,7 +5,7 @@ spaCy is a library for advanced Natural Language Processing in Python and Cython It's built on the very latest research, and was designed from day one to be used in real products. spaCy comes with `pre-trained statistical models `_ and word -vectors, and currently supports tokenization for **20+ languages**. It features +vectors, and currently supports tokenization for **30+ languages**. It features the **fastest syntactic parser** in the world, convolutional **neural network models** for tagging, parsing and **named entity recognition** and easy **deep learning** integration. It's commercial open-source software, released under the MIT license.