Merge branch 'develop' of https://github.com/explosion/spaCy into develop

This commit is contained in:
Matthew Honnibal 2017-06-05 19:20:13 +02:00
commit c0d90f52f7
3 changed files with 14 additions and 16 deletions

View File

@ -4,12 +4,10 @@ spaCy: Industrial-strength NLP
spaCy is a library for advanced natural language processing in Python and spaCy is a library for advanced natural language processing in Python and
Cython. spaCy is built on the very latest research, but it isn't researchware. Cython. spaCy is built on the very latest research, but it isn't researchware.
It was designed from day one to be used in real products. spaCy currently supports It was designed from day one to be used in real products. spaCy currently supports
English, German and French, as well as tokenization for Spanish, Italian, English, German, French and Spanish, as well as tokenization for Italian,
Portuguese, Dutch, Swedish, Finnish, Norwegian, Hungarian, Bengali, Hebrew, Portuguese, Dutch, Swedish, Finnish, Norwegian, Danish, Hungarian, Polish,
Chinese and Japanese. It's commercial open-source software, released under the Bengali, Hebrew, Chinese and Japanese. It's commercial open-source software,
MIT license. released under the MIT license.
📊 **Help us improve the library!** `Take the spaCy user survey <https://survey.spacy.io>`_.
💫 **Version 1.8 out now!** `Read the release notes here. <https://github.com/explosion/spaCy/releases/>`_ 💫 **Version 1.8 out now!** `Read the release notes here. <https://github.com/explosion/spaCy/releases/>`_
@ -85,7 +83,7 @@ Features
* GIL-free **multi-threading** * GIL-free **multi-threading**
* Efficient binary serialization * Efficient binary serialization
* Easy **deep learning** integration * Easy **deep learning** integration
* Statistical models for **English** and **German** * Statistical models for **English**, **German**, **French** and **Spanish**
* State-of-the-art speed * State-of-the-art speed
* Robust, rigorously evaluated accuracy * Robust, rigorously evaluated accuracy
@ -197,7 +195,7 @@ To load a model, use ``spacy.load()`` with the model's shortcut link:
.. code:: python .. code:: python
import spacy import spacy
nlp = spacy.load('en_default') nlp = spacy.load('en')
doc = nlp(u'This is a sentence.') doc = nlp(u'This is a sentence.')
If you've installed a model via pip, you can also ``import`` it directly and If you've installed a model via pip, you can also ``import`` it directly and
@ -313,7 +311,7 @@ and ``--model`` are optional and enable additional tests:
# make sure you are using recent pytest version # make sure you are using recent pytest version
python -m pip install -U pytest python -m pip install -U pytest
python -m pytest <spacy-directory> --vectors --models --slow python -m pytest <spacy-directory>
🛠 Changelog 🛠 Changelog
============ ============

View File

@ -1,15 +1,15 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from ...attrs import HEAD, DEP from ....attrs import HEAD, DEP
from ...symbols import nsubj, dobj, amod, nmod, conj, cc, root from ....symbols import nsubj, dobj, amod, nmod, conj, cc, root
from ...syntax.iterators import english_noun_chunks from ....lang.en.syntax_iterators import SYNTAX_ITERATORS
from ..util import get_doc from ...util import get_doc
import numpy import numpy
def test_doc_noun_chunks_not_nested(en_tokenizer): def test_en_noun_chunks_not_nested(en_tokenizer):
text = "Peter has chronic command and control issues" text = "Peter has chronic command and control issues"
heads = [1, 0, 4, 3, -1, -2, -5] heads = [1, 0, 4, 3, -1, -2, -5]
deps = ['nsubj', 'ROOT', 'amod', 'nmod', 'cc', 'conj', 'dobj'] deps = ['nsubj', 'ROOT', 'amod', 'nmod', 'cc', 'conj', 'dobj']
@ -21,7 +21,7 @@ def test_doc_noun_chunks_not_nested(en_tokenizer):
[HEAD, DEP], [HEAD, DEP],
numpy.asarray([[1, nsubj], [0, root], [4, amod], [3, nmod], [-1, cc], numpy.asarray([[1, nsubj], [0, root], [4, amod], [3, nmod], [-1, cc],
[-2, conj], [-5, dobj]], dtype='uint64')) [-2, conj], [-5, dobj]], dtype='uint64'))
tokens.noun_chunks_iterator = english_noun_chunks tokens.noun_chunks_iterator = SYNTAX_ITERATORS['noun_chunks']
word_occurred = {} word_occurred = {}
for chunk in tokens.noun_chunks: for chunk in tokens.noun_chunks:
for word in chunk: for word in chunk:

View File

@ -251,7 +251,7 @@ p
+cell #[code lang.xx.lex_attrs] +cell #[code lang.xx.lex_attrs]
+row +row
+cell #[code syntax.syntax_iterators] +cell #[code syntax.iterators]
+cell #[code lang.xx.syntax_iterators] +cell #[code lang.xx.syntax_iterators]
+row +row