This commit is contained in:
Matthew Honnibal 2018-10-14 16:12:23 +02:00
commit 7de0dcb91f
8 changed files with 42 additions and 22 deletions

View File

@ -5,7 +5,7 @@ spaCy is a library for advanced Natural Language Processing in Python and Cython
It's built on the very latest research, and was designed from day one to be
used in real products. spaCy comes with
`pre-trained statistical models <https://spacy.io/models>`_ and word
vectors, and currently supports tokenization for **20+ languages**. It features
vectors, and currently supports tokenization for **30+ languages**. It features
the **fastest syntactic parser** in the world, convolutional **neural network models**
for tagging, parsing and **named entity recognition** and easy **deep learning**
integration. It's commercial open-source software, released under the MIT license.
@ -32,9 +32,9 @@ integration. It's commercial open-source software, released under the MIT licens
:target: https://anaconda.org/conda-forge/spacy
:alt: conda Version
.. image:: https://img.shields.io/badge/chat-join%20%E2%86%92-09a3d5.svg?style=flat-square&logo=gitter-white
:target: https://gitter.im/explosion/spaCy
:alt: spaCy on Gitter
.. image:: https://img.shields.io/badge/wheels-%E2%9C%93-4c1.svg?longCache=true&style=flat-square&logo=python&logoColor=white
:target: https://github.com/explosion/wheelwright/releases
:alt: Python wheels
.. image:: https://img.shields.io/twitter/follow/spacy_io.svg?style=social&label=Follow
:target: https://twitter.com/spacy_io
@ -113,7 +113,7 @@ the `documentation <https://spacy.io/usage>`_.
==================== ===
**Operating system** macOS / OS X, Linux, Windows (Cygwin, MinGW, Visual Studio)
**Python version** CPython 2.7, 3.4+. Only 64 bit.
**Package managers** `pip`_ (source packages only), `conda`_ (via ``conda-forge``)
**Package managers** `pip`_, `conda`_ (via ``conda-forge``)
==================== ===
.. _pip: https://pypi.python.org/pypi/spacy
@ -122,7 +122,8 @@ the `documentation <https://spacy.io/usage>`_.
pip
---
Using pip, spaCy releases are currently only available as source packages.
Using pip, spaCy releases are available as source packages and binary wheels
(as of ``v2.0.13``).
.. code:: bash

View File

@ -11,6 +11,7 @@ Example sentences to test spaCy and its language models.
sentences = [
"මෙය වාක්‍යයකි.",
"ඔබ කවුද?",
"ගූගල් සමාගම ඩොලර් මිලියන 500 කට එම ආයතනය මිලදී ගන්නා ලදී.",
"කොළඹ ශ්‍රී ලංකාවේ ප්‍රධානතම නගරය යි.",

View File

@ -14,7 +14,7 @@
"MODELS_REPO": "explosion/spacy-models",
"SPACY_VERSION": "2.0",
"BINDER_VERSION": "2.0.12",
"BINDER_VERSION": "2.0.13",
"SOCIAL": {
"twitter": "spacy_io",

View File

@ -346,6 +346,7 @@ p
+row
+cell #[code --verbose]
+tag-new("2.0.13")
+cell flag
+cell Show more detail message during training.

View File

@ -581,7 +581,7 @@ p
| web applications.
+aside-code("Example").
doc = nlp('Alice ate the pizza.')
doc = nlp(u'Alice ate the pizza.')
trees = doc.print_tree()
# {'modifiers': [
# {'modifiers': [], 'NE': 'PERSON', 'word': 'Alice', 'arc': 'nsubj', 'POS_coarse': 'PROPN', 'POS_fine': 'NNP', 'lemma': 'Alice'},
@ -615,8 +615,8 @@ p
| document.
+aside-code("Example").
tokens = nlp(u'Mr. Best flew to New York on Saturday morning.')
ents = list(tokens.ents)
doc = nlp(u'Mr. Best flew to New York on Saturday morning.')
ents = list(doc.ents)
assert ents[0].label == 346
assert ents[0].label_ == 'PERSON'
assert ents[0].text == 'Mr. Best'

View File

@ -349,6 +349,30 @@ p Retokenize the document, such that the span is merged into a single token.
+cell #[code Token]
+cell The newly merged token.
+h(2, "ents") Span.ents
+tag property
+tag-model("NER")
p
| Iterate over the entities in the span. Yields named-entity
| #[code Span] objects, if the entity recognizer has been applied to the
| parent document.
+aside-code("Example").
doc = nlp(u'Mr. Best flew to New York on Saturday morning.')
span = doc[0:6]
ents = list(span.ents)
assert ents[0].label == 346
assert ents[0].label_ == 'PERSON'
assert ents[0].text == 'Mr. Best'
+table(["Name", "Type", "Description"])
+row("foot")
+cell yields
+cell #[code Span]
+cell Entities in the document.
+h(2, "as_doc") Span.as_doc
p

View File

@ -27,17 +27,6 @@
}
},
"comparison": {
"title": "Model Comparison",
"teaser": "Compare spaCy's statistical models and their accuracy.",
"tag": "experimental",
"compare_models": true,
"default_models": {
"model1": "en_core_web_sm",
"model2": "en_core_web_lg"
}
},
"MODELS": {
"en": ["en_core_web_sm", "en_core_web_md", "en_core_web_lg", "en_vectors_web_lg"],
"de": ["de_core_news_sm"],
@ -122,6 +111,8 @@
"fa": "Persian",
"ur": "Urdu",
"tt": "Tatar",
"te": "Telugu",
"si": "Sinhala",
"ga": "Irish",
"bn": "Bengali",
"hi": "Hindi",

View File

@ -3,7 +3,9 @@
+h(3, "pip") pip
+badge("https://img.shields.io/pypi/v/spacy.svg?style=flat-square", "https://pypi.python.org/pypi/spacy")
p Using pip, spaCy releases are currently only available as source packages.
p
| Using pip, spaCy releases are available as source packages and binary
| wheels (as of #[code v2.0.13]).
+code(false, "bash").
pip install -U spacy