Fix test_misc merge conflict

2024-12-26 01:46:28 +03:00 · 2017-05-29 18:31:44 -05:00 · 2017-05-29 18:31:44 -05:00 · b127645afc
commit b127645afc
parent e0e8eae7c7 ce4e45d0bb
37 changed files with 407 additions and 411 deletions
--- a/spacy/init.py
+++ b/spacy/init.py
@ -7,6 +7,7 @@ from .deprecated import resolve_load_name
 from .about import __version__
 from . import util
 def load(name, **overrides):
    name = resolve_load_name(name, **overrides)
    return util.load_model(name, **overrides)
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@ -1,19 +1,40 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from ..tokens import Doc
 from ..strings import StringStore
 from ..lemmatizer import Lemmatizer
 from ..attrs import ORTH, TAG, HEAD, DEP
 from .. import util
 from io import StringIO, BytesIO
 from pathlib import Path
 import pytest
 from .util import load_test_model
 from ..tokens import Doc
 from ..strings import StringStore
 from .. import util
 _languages = ['bn', 'da', 'de', 'en', 'es', 'fi', 'fr', 'he', 'hu', 'it', 'nb',
-              'nl', 'pl', 'pt', 'sv']
+              'nl', 'pl', 'pt', 'sv', 'xx']
 _models = {'en': ['en_core_web_sm', 'en_core_web_md'],
           'de': ['de_core_news_md'],
           'fr': ['fr_depvec_web_lg'],
           'xx': ['xx_ent_web_md']}
 # only used for tests that require loading the models
 # in all other cases, use specific instances
@pytest.fixture(params=_models['en'], scope="session")
 def EN(request):
    return load_test_model(request.param)
@pytest.fixture(params=_models['de'], scope="session")
 def DE(request):
    return load_test_model(request.param)
@pytest.fixture(params=_models['fr'], scope="session")
 def FR(request):
    return load_test_model(request.param)
@pytest.fixture(params=_languages)
@ -91,11 +112,6 @@ def en_entityrecognizer():
     return util.get_lang_class('en').Defaults.create_entity()
@pytest.fixture
 def lemmatizer():
    return util.get_lang_class('en').Defaults.create_lemmatizer()
@pytest.fixture
 def text_file():
    return StringIO()
@ -105,22 +121,6 @@ def text_file_b():
    return BytesIO()
 # only used for tests that require loading the models
 # in all other cases, use specific instances
@pytest.fixture(scope="session")
 def EN():
    return English()
@pytest.fixture(scope="session")
 def DE():
    return German()
@pytest.fixture(scope="session")
 def FR():
    return French()
 def pytest_addoption(parser):
    parser.addoption("--models", action="store_true",
        help="include tests that require full models")
@ -129,8 +129,18 @@ def pytest_addoption(parser):
    parser.addoption("--slow", action="store_true",
        help="include slow tests")
    for lang in _languages + ['all']:
        parser.addoption("--%s" % lang, action="store_true", help="Use %s models" % lang)
 def pytest_runtest_setup(item):
    for opt in ['models', 'vectors', 'slow']:
        if opt in item.keywords and not item.config.getoption("--%s" % opt):
            pytest.skip("need --%s option to run" % opt)
    # Check if test is marked with models and has arguments set, i.e. specific
    # language. If so, skip test if flag not set.
    if item.get_marker('models'):
        for arg in item.get_marker('models').args:
            if not item.config.getoption("--%s" % arg) and not item.config.getoption("--all"):
                pytest.skip()
--- a/spacy/tests/lang/de/test_parser.py
+++ b/spacy/tests/lang/de/test_parser.py
@ -0,0 +1,35 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from ...util import get_doc
 import pytest
 def test_de_parser_noun_chunks_standard_de(de_tokenizer):
    text = "Eine Tasse steht auf dem Tisch."
    heads = [1, 1, 0, -1, 1, -2, -4]
    tags = ['ART', 'NN', 'VVFIN', 'APPR', 'ART', 'NN', '$.']
    deps = ['nk', 'sb', 'ROOT', 'mo', 'nk', 'nk', 'punct']
    tokens = de_tokenizer(text)
    doc = get_doc(tokens.vocab, [t.text for t in tokens], tags=tags, deps=deps, heads=heads)
    chunks = list(doc.noun_chunks)
    assert len(chunks) == 2
    assert chunks[0].text_with_ws == "Eine Tasse "
    assert chunks[1].text_with_ws == "dem Tisch "
 def test_de_extended_chunk(de_tokenizer):
    text = "Die Sängerin singt mit einer Tasse Kaffee Arien."
    heads = [1, 1, 0, -1, 1, -2, -1, -5, -6]
    tags = ['ART', 'NN', 'VVFIN', 'APPR', 'ART', 'NN', 'NN', 'NN', '$.']
    deps = ['nk', 'sb', 'ROOT', 'mo', 'nk', 'nk', 'nk', 'oa', 'punct']
    tokens = de_tokenizer(text)
    doc = get_doc(tokens.vocab, [t.text for t in tokens], tags=tags, deps=deps, heads=heads)
    chunks = list(doc.noun_chunks)
    assert len(chunks) == 3
    assert chunks[0].text_with_ws == "Die Sängerin "
    assert chunks[1].text_with_ws == "einer Tasse Kaffee "
    assert chunks[2].text_with_ws == "Arien "
--- a/spacy/tests/lang/en/test_contractions.py
+++ b/spacy/tests/lang/en/test_contractions.py
@ -1,87 +0,0 @@
 # coding: utf-8
 """Test that tokens are created correctly for contractions."""
 from __future__ import unicode_literals
 import pytest
 def test_tokenizer_handles_basic_contraction(en_tokenizer):
    text = "don't giggle"
    tokens = en_tokenizer(text)
    assert len(tokens) == 3
    assert tokens[1].text == "n't"
    text = "i said don't!"
    tokens = en_tokenizer(text)
    assert len(tokens) == 5
    assert tokens[4].text == "!"
@pytest.mark.parametrize('text', ["`ain't", '''"isn't''', "can't!"])
 def test_tokenizer_handles_basic_contraction_punct(en_tokenizer, text):
    tokens = en_tokenizer(text)
    assert len(tokens) == 3
@pytest.mark.parametrize('text_poss,text', [("Robin's", "Robin"), ("Alexis's", "Alexis")])
 def test_tokenizer_handles_poss_contraction(en_tokenizer, text_poss, text):
    tokens = en_tokenizer(text_poss)
    assert len(tokens) == 2
    assert tokens[0].text == text
    assert tokens[1].text == "'s"
@pytest.mark.parametrize('text', ["schools'", "Alexis'"])
 def test_tokenizer_splits_trailing_apos(en_tokenizer, text):
    tokens = en_tokenizer(text)
    assert len(tokens) == 2
    assert tokens[0].text == text.split("'")[0]
    assert tokens[1].text == "'"
@pytest.mark.parametrize('text', ["'em", "nothin'", "ol'"])
 def text_tokenizer_doesnt_split_apos_exc(en_tokenizer, text):
    tokens = en_tokenizer(text)
    assert len(tokens) == 1
    assert tokens[0].text == text
@pytest.mark.parametrize('text', ["we'll", "You'll", "there'll"])
 def test_tokenizer_handles_ll_contraction(en_tokenizer, text):
    tokens = en_tokenizer(text)
    assert len(tokens) == 2
    assert tokens[0].text == text.split("'")[0]
    assert tokens[1].text == "'ll"
    assert tokens[1].lemma_ == "will"
@pytest.mark.parametrize('text_lower,text_title', [("can't", "Can't"), ("ain't", "Ain't")])
 def test_tokenizer_handles_capitalization(en_tokenizer, text_lower, text_title):
    tokens_lower = en_tokenizer(text_lower)
    tokens_title = en_tokenizer(text_title)
    assert tokens_title[0].text == tokens_lower[0].text.title()
    assert tokens_lower[0].text == tokens_title[0].text.lower()
    assert tokens_lower[1].text == tokens_title[1].text
@pytest.mark.parametrize('pron', ["I", "You", "He", "She", "It", "We", "They"])
@pytest.mark.parametrize('contraction', ["'ll", "'d"])
 def test_tokenizer_keeps_title_case(en_tokenizer, pron, contraction):
    tokens = en_tokenizer(pron + contraction)
    assert tokens[0].text == pron
    assert tokens[1].text == contraction
@pytest.mark.parametrize('exc', ["Ill", "ill", "Hell", "hell", "Well", "well"])
 def test_tokenizer_excludes_ambiguous(en_tokenizer, exc):
    tokens = en_tokenizer(exc)
    assert len(tokens) == 1
@pytest.mark.parametrize('wo_punct,w_punct', [("We've", "``We've"), ("couldn't", "couldn't)")])
 def test_tokenizer_splits_defined_punct(en_tokenizer, wo_punct, w_punct):
    tokens = en_tokenizer(wo_punct)
    assert len(tokens) == 2
    tokens = en_tokenizer(w_punct)
    assert len(tokens) == 3
--- a/spacy/tests/lang/en/test_exceptions.py
+++ b/spacy/tests/lang/en/test_exceptions.py
@ -1,19 +1,96 @@
 # coding: utf-8
 """Test that tokenizer exceptions are handled correctly."""
 from __future__ import unicode_literals
 import pytest
 def test_en_tokenizer_handles_basic_contraction(en_tokenizer):
    text = "don't giggle"
    tokens = en_tokenizer(text)
    assert len(tokens) == 3
    assert tokens[1].text == "n't"
    text = "i said don't!"
    tokens = en_tokenizer(text)
    assert len(tokens) == 5
    assert tokens[4].text == "!"
@pytest.mark.parametrize('text', ["`ain't", '''"isn't''', "can't!"])
 def test_en_tokenizer_handles_basic_contraction_punct(en_tokenizer, text):
    tokens = en_tokenizer(text)
    assert len(tokens) == 3
@pytest.mark.parametrize('text_poss,text', [("Robin's", "Robin"), ("Alexis's", "Alexis")])
 def test_en_tokenizer_handles_poss_contraction(en_tokenizer, text_poss, text):
    tokens = en_tokenizer(text_poss)
    assert len(tokens) == 2
    assert tokens[0].text == text
    assert tokens[1].text == "'s"
@pytest.mark.parametrize('text', ["schools'", "Alexis'"])
 def test_en_tokenizer_splits_trailing_apos(en_tokenizer, text):
    tokens = en_tokenizer(text)
    assert len(tokens) == 2
    assert tokens[0].text == text.split("'")[0]
    assert tokens[1].text == "'"
@pytest.mark.parametrize('text', ["'em", "nothin'", "ol'"])
 def text_tokenizer_doesnt_split_apos_exc(en_tokenizer, text):
    tokens = en_tokenizer(text)
    assert len(tokens) == 1
    assert tokens[0].text == text
@pytest.mark.parametrize('text', ["we'll", "You'll", "there'll"])
 def test_en_tokenizer_handles_ll_contraction(en_tokenizer, text):
    tokens = en_tokenizer(text)
    assert len(tokens) == 2
    assert tokens[0].text == text.split("'")[0]
    assert tokens[1].text == "'ll"
    assert tokens[1].lemma_ == "will"
@pytest.mark.parametrize('text_lower,text_title', [("can't", "Can't"), ("ain't", "Ain't")])
 def test_en_tokenizer_handles_capitalization(en_tokenizer, text_lower, text_title):
    tokens_lower = en_tokenizer(text_lower)
    tokens_title = en_tokenizer(text_title)
    assert tokens_title[0].text == tokens_lower[0].text.title()
    assert tokens_lower[0].text == tokens_title[0].text.lower()
    assert tokens_lower[1].text == tokens_title[1].text
@pytest.mark.parametrize('pron', ["I", "You", "He", "She", "It", "We", "They"])
@pytest.mark.parametrize('contraction', ["'ll", "'d"])
 def test_en_tokenizer_keeps_title_case(en_tokenizer, pron, contraction):
    tokens = en_tokenizer(pron + contraction)
    assert tokens[0].text == pron
    assert tokens[1].text == contraction
@pytest.mark.parametrize('exc', ["Ill", "ill", "Hell", "hell", "Well", "well"])
 def test_en_tokenizer_excludes_ambiguous(en_tokenizer, exc):
    tokens = en_tokenizer(exc)
    assert len(tokens) == 1
@pytest.mark.parametrize('wo_punct,w_punct', [("We've", "``We've"), ("couldn't", "couldn't)")])
 def test_en_tokenizer_splits_defined_punct(en_tokenizer, wo_punct, w_punct):
    tokens = en_tokenizer(wo_punct)
    assert len(tokens) == 2
    tokens = en_tokenizer(w_punct)
    assert len(tokens) == 3
@pytest.mark.parametrize('text', ["e.g.", "p.m.", "Jan.", "Dec.", "Inc."])
-def test_tokenizer_handles_abbr(en_tokenizer, text):
+def test_en_tokenizer_handles_abbr(en_tokenizer, text):
    tokens = en_tokenizer(text)
    assert len(tokens) == 1
-def test_tokenizer_handles_exc_in_text(en_tokenizer):
+def test_en_tokenizer_handles_exc_in_text(en_tokenizer):
    text = "It's mediocre i.e. bad."
    tokens = en_tokenizer(text)
    assert len(tokens) == 6
@ -21,7 +98,7 @@ def test_tokenizer_handles_exc_in_text(en_tokenizer):
@pytest.mark.parametrize('text', ["1am", "12a.m.", "11p.m.", "4pm"])
-def test_tokenizer_handles_times(en_tokenizer, text):
+def test_en_tokenizer_handles_times(en_tokenizer, text):
    tokens = en_tokenizer(text)
    assert len(tokens) == 2
    assert tokens[1].lemma_ in ["a.m.", "p.m."]
--- a/spacy/tests/lang/en/test_indices.py
+++ b/spacy/tests/lang/en/test_indices.py
@ -7,7 +7,7 @@ from __future__ import unicode_literals
 import pytest
-def test_simple_punct(en_tokenizer):
+def test_en_simple_punct(en_tokenizer):
    text = "to walk, do foo"
    tokens = en_tokenizer(text)
    assert tokens[0].idx == 0
@ -17,7 +17,7 @@ def test_simple_punct(en_tokenizer):
    assert tokens[4].idx == 12
-def test_complex_punct(en_tokenizer):
+def test_en_complex_punct(en_tokenizer):
    text = "Tom (D., Ill.)!"
    tokens = en_tokenizer(text)
    assert tokens[0].idx == 0
--- a/spacy/tests/lang/en/test_lemmatizer.py
+++ b/spacy/tests/lang/en/test_lemmatizer.py
@ -0,0 +1,46 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import pytest
@pytest.fixture
 def en_lemmatizer(EN):
    return EN.Defaults.create_lemmatizer()
@pytest.mark.models('en')
@pytest.mark.parametrize('text,lemmas', [("aardwolves", ["aardwolf"]),
                                         ("aardwolf", ["aardwolf"]),
                                         ("planets", ["planet"]),
                                         ("ring", ["ring"]),
                                         ("axes", ["axis", "axe", "ax"])])
 def test_en_lemmatizer_noun_lemmas(en_lemmatizer, text, lemmas):
    assert en_lemmatizer.noun(text) == set(lemmas)
@pytest.mark.xfail
@pytest.mark.models('en')
 def test_en_lemmatizer_base_forms(en_lemmatizer):
    assert en_lemmatizer.noun('dive', {'number': 'sing'}) == set(['dive'])
    assert en_lemmatizer.noun('dive', {'number': 'plur'}) == set(['diva'])
@pytest.mark.models
 def test_en_lemmatizer_base_form_verb(en_lemmatizer):
    assert en_lemmatizer.verb('saw', {'verbform': 'past'}) == set(['see'])
@pytest.mark.models
 def test_en_lemmatizer_punct(en_lemmatizer):
    assert en_lemmatizer.punct('“') == set(['"'])
    assert en_lemmatizer.punct('“') == set(['"'])
@pytest.mark.models('en')
 def test_en_lemmatizer_lemma_assignment(EN):
    text = "Bananas in pyjamas are geese."
    doc = EN.tokenizer(text)
    assert all(t.lemma_ == '' for t in doc)
    EN.tagger(doc)
    assert all(t.lemma_ != '' for t in doc)
--- a/spacy/tests/lang/en/test_ner.py
+++ b/spacy/tests/lang/en/test_ner.py
@ -5,8 +5,8 @@ from spacy.attrs import LOWER
 from spacy.matcher import Matcher
-@pytest.mark.models
+@pytest.mark.models('en')
-def test_simple_types(EN):
+def test_en_ner_simple_types(EN):
    tokens = EN(u'Mr. Best flew to New York on Saturday morning.')
    ents = list(tokens.ents)
    assert ents[0].start == 1
@ -17,8 +17,8 @@ def test_simple_types(EN):
    assert ents[1].label_ == 'GPE'
-@pytest.mark.models
+@pytest.mark.models('en')
-def test_consistency_bug(EN):
+def test_en_ner_consistency_bug(EN):
    '''Test an arbitrary sequence-consistency bug encountered during speed test'''
    tokens = EN(u'Where rap essentially went mainstream, illustrated by seminal Public Enemy, Beastie Boys and L.L. Cool J. tracks.')
    tokens = EN(u'''Charity and other short-term aid have buoyed them so far, and a tax-relief bill working its way through Congress would help. But the September 11 Victim Compensation Fund, enacted by Congress to discourage people from filing lawsuits, will determine the shape of their lives for years to come.\n\n''', entity=False)
@ -26,8 +26,8 @@ def test_consistency_bug(EN):
    EN.entity(tokens)
-@pytest.mark.models
+@pytest.mark.models('en')
-def test_unit_end_gazetteer(EN):
+def test_en_ner_unit_end_gazetteer(EN):
    '''Test a bug in the interaction between the NER model and the gazetteer'''
    matcher = Matcher(EN.vocab)
    matcher.add('MemberNames', None, [{LOWER: 'cal'}], [{LOWER: 'cal'}, {LOWER: 'henderson'}])
@ -38,6 +38,3 @@ def test_unit_end_gazetteer(EN):
        doc.ents += tuple(ents)
        EN.entity(doc)
        assert list(doc.ents)[0].text == 'cal'
--- a/spacy/tests/parser/test_noun_chunks.py
+++ b/spacy/tests/parser/test_noun_chunks.py
@ -1,7 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
-from ..util import get_doc
+from ...util import get_doc
 import pytest
@ -45,32 +45,3 @@ def test_parser_noun_chunks_pp_chunks(en_tokenizer):
    assert len(chunks) == 2
    assert chunks[0].text_with_ws == "A phrase "
    assert chunks[1].text_with_ws == "another phrase "
 def test_parser_noun_chunks_standard_de(de_tokenizer):
    text = "Eine Tasse steht auf dem Tisch."
    heads = [1, 1, 0, -1, 1, -2, -4]
    tags = ['ART', 'NN', 'VVFIN', 'APPR', 'ART', 'NN', '$.']
    deps = ['nk', 'sb', 'ROOT', 'mo', 'nk', 'nk', 'punct']
    tokens = de_tokenizer(text)
    doc = get_doc(tokens.vocab, [t.text for t in tokens], tags=tags, deps=deps, heads=heads)
    chunks = list(doc.noun_chunks)
    assert len(chunks) == 2
    assert chunks[0].text_with_ws == "Eine Tasse "
    assert chunks[1].text_with_ws == "dem Tisch "
 def test_de_extended_chunk(de_tokenizer):
    text = "Die Sängerin singt mit einer Tasse Kaffee Arien."
    heads = [1, 1, 0, -1, 1, -2, -1, -5, -6]
    tags = ['ART', 'NN', 'VVFIN', 'APPR', 'ART', 'NN', 'NN', 'NN', '$.']
    deps = ['nk', 'sb', 'ROOT', 'mo', 'nk', 'nk', 'nk', 'oa', 'punct']
    tokens = de_tokenizer(text)
    doc = get_doc(tokens.vocab, [t.text for t in tokens], tags=tags, deps=deps, heads=heads)
    chunks = list(doc.noun_chunks)
    assert len(chunks) == 3
    assert chunks[0].text_with_ws == "Die Sängerin "
    assert chunks[1].text_with_ws == "einer Tasse Kaffee "
    assert chunks[2].text_with_ws == "Arien "
--- a/spacy/tests/lang/en/test_punct.py
+++ b/spacy/tests/lang/en/test_punct.py
@ -16,14 +16,14 @@ PUNCT_PAIRED = [('(', ')'),  ('[', ']'), ('{', '}'), ('*', '*')]
@pytest.mark.parametrize('text', ["(", "((", "<"])
-def test_tokenizer_handles_only_punct(en_tokenizer, text):
+def test_en_tokenizer_handles_only_punct(en_tokenizer, text):
    tokens = en_tokenizer(text)
    assert len(tokens) == len(text)
@pytest.mark.parametrize('punct', PUNCT_OPEN)
@pytest.mark.parametrize('text', ["Hello"])
-def test_tokenizer_splits_open_punct(en_tokenizer, punct, text):
+def test_en_tokenizer_splits_open_punct(en_tokenizer, punct, text):
    tokens = en_tokenizer(punct + text)
    assert len(tokens) == 2
    assert tokens[0].text == punct
@ -32,7 +32,7 @@ def test_tokenizer_splits_open_punct(en_tokenizer, punct, text):
@pytest.mark.parametrize('punct', PUNCT_CLOSE)
@pytest.mark.parametrize('text', ["Hello"])
-def test_tokenizer_splits_close_punct(en_tokenizer, punct, text):
+def test_en_tokenizer_splits_close_punct(en_tokenizer, punct, text):
    tokens = en_tokenizer(text + punct)
    assert len(tokens) == 2
    assert tokens[0].text == text
@ -42,7 +42,7 @@ def test_tokenizer_splits_close_punct(en_tokenizer, punct, text):
@pytest.mark.parametrize('punct', PUNCT_OPEN)
@pytest.mark.parametrize('punct_add', ["`"])
@pytest.mark.parametrize('text', ["Hello"])
-def test_tokenizer_splits_two_diff_open_punct(en_tokenizer, punct, punct_add, text):
+def test_en_tokenizer_splits_two_diff_open_punct(en_tokenizer, punct, punct_add, text):
    tokens = en_tokenizer(punct + punct_add + text)
    assert len(tokens) == 3
    assert tokens[0].text == punct
@ -53,7 +53,7 @@ def test_tokenizer_splits_two_diff_open_punct(en_tokenizer, punct, punct_add, te
@pytest.mark.parametrize('punct', PUNCT_CLOSE)
@pytest.mark.parametrize('punct_add', ["'"])
@pytest.mark.parametrize('text', ["Hello"])
-def test_tokenizer_splits_two_diff_close_punct(en_tokenizer, punct, punct_add, text):
+def test_en_tokenizer_splits_two_diff_close_punct(en_tokenizer, punct, punct_add, text):
    tokens = en_tokenizer(text + punct + punct_add)
    assert len(tokens) == 3
    assert tokens[0].text == text
@ -63,7 +63,7 @@ def test_tokenizer_splits_two_diff_close_punct(en_tokenizer, punct, punct_add, t
@pytest.mark.parametrize('punct', PUNCT_OPEN)
@pytest.mark.parametrize('text', ["Hello"])
-def test_tokenizer_splits_same_open_punct(en_tokenizer, punct, text):
+def test_en_tokenizer_splits_same_open_punct(en_tokenizer, punct, text):
    tokens = en_tokenizer(punct + punct + punct + text)
    assert len(tokens) == 4
    assert tokens[0].text == punct
@ -72,7 +72,7 @@ def test_tokenizer_splits_same_open_punct(en_tokenizer, punct, text):
@pytest.mark.parametrize('punct', PUNCT_CLOSE)
@pytest.mark.parametrize('text', ["Hello"])
-def test_tokenizer_splits_same_close_punct(en_tokenizer, punct, text):
+def test_en_tokenizer_splits_same_close_punct(en_tokenizer, punct, text):
    tokens = en_tokenizer(text + punct + punct + punct)
    assert len(tokens) == 4
    assert tokens[0].text == text
@ -80,14 +80,14 @@ def test_tokenizer_splits_same_close_punct(en_tokenizer, punct, text):
@pytest.mark.parametrize('text', ["'The"])
-def test_tokenizer_splits_open_appostrophe(en_tokenizer, text):
+def test_en_tokenizer_splits_open_appostrophe(en_tokenizer, text):
    tokens = en_tokenizer(text)
    assert len(tokens) == 2
    assert tokens[0].text == "'"
@pytest.mark.parametrize('text', ["Hello''"])
-def test_tokenizer_splits_double_end_quote(en_tokenizer, text):
+def test_en_tokenizer_splits_double_end_quote(en_tokenizer, text):
    tokens = en_tokenizer(text)
    assert len(tokens) == 2
    tokens_punct = en_tokenizer("''")
@ -96,7 +96,7 @@ def test_tokenizer_splits_double_end_quote(en_tokenizer, text):
@pytest.mark.parametrize('punct_open,punct_close', PUNCT_PAIRED)
@pytest.mark.parametrize('text', ["Hello"])
-def test_tokenizer_splits_open_close_punct(en_tokenizer, punct_open,
+def test_en_tokenizer_splits_open_close_punct(en_tokenizer, punct_open,
                                           punct_close, text):
    tokens = en_tokenizer(punct_open + text + punct_close)
    assert len(tokens) == 3
@ -108,7 +108,7 @@ def test_tokenizer_splits_open_close_punct(en_tokenizer, punct_open,
@pytest.mark.parametrize('punct_open,punct_close', PUNCT_PAIRED)
@pytest.mark.parametrize('punct_open2,punct_close2', [("`", "'")])
@pytest.mark.parametrize('text', ["Hello"])
-def test_tokenizer_two_diff_punct(en_tokenizer, punct_open, punct_close,
+def test_en_tokenizer_two_diff_punct(en_tokenizer, punct_open, punct_close,
                                  punct_open2, punct_close2, text):
    tokens = en_tokenizer(punct_open2 + punct_open + text + punct_close + punct_close2)
    assert len(tokens) == 5
@ -120,13 +120,13 @@ def test_tokenizer_two_diff_punct(en_tokenizer, punct_open, punct_close,
@pytest.mark.parametrize('text,punct', [("(can't", "(")])
-def test_tokenizer_splits_pre_punct_regex(text, punct):
+def test_en_tokenizer_splits_pre_punct_regex(text, punct):
    en_search_prefixes = compile_prefix_regex(TOKENIZER_PREFIXES).search
    match = en_search_prefixes(text)
    assert match.group() == punct
-def test_tokenizer_splits_bracket_period(en_tokenizer):
+def test_en_tokenizer_splits_bracket_period(en_tokenizer):
    text = "(And a 6a.m. run through Washington Park)."
    tokens = en_tokenizer(text)
    assert tokens[len(tokens) - 1].text == "."
--- a/spacy/tests/parser/test_sbd_prag.py
+++ b/spacy/tests/parser/test_sbd_prag.py
@ -1,9 +1,65 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 from ....tokens import Doc
 from ...util import get_doc, apply_transition_sequence
 import pytest
@pytest.mark.parametrize('text', ["A test sentence"])
@pytest.mark.parametrize('punct', ['.', '!', '?', ''])
 def test_en_sbd_single_punct(en_tokenizer, text, punct):
    heads = [2, 1, 0, -1] if punct else [2, 1, 0]
    tokens = en_tokenizer(text + punct)
    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
    assert len(doc) == 4 if punct else 3
    assert len(list(doc.sents)) == 1
    assert sum(len(sent) for sent in doc.sents) == len(doc)
@pytest.mark.xfail
 def test_en_sentence_breaks(en_tokenizer, en_parser):
    text = "This is a sentence . This is another one ."
    heads = [1, 0, 1, -2, -3, 1, 0, 1, -2, -3]
    deps = ['nsubj', 'ROOT', 'det', 'attr', 'punct', 'nsubj', 'ROOT', 'det',
            'attr', 'punct']
    transition = ['L-nsubj', 'S', 'L-det', 'R-attr', 'D', 'R-punct', 'B-ROOT',
                  'L-nsubj', 'S', 'L-attr', 'R-attr', 'D', 'R-punct']
    tokens = en_tokenizer(text)
    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps)
    apply_transition_sequence(en_parser, doc, transition)
    assert len(list(doc.sents)) == 2
    for token in doc:
        assert token.dep != 0 or token.is_space
    assert [token.head.i for token in doc ] == [1, 1, 3, 1, 1, 6, 6, 8, 6, 6]
 # Currently, there's no way of setting the serializer data for the parser
 # without loading the models, so we can't remove the model dependency here yet.
@pytest.mark.xfail
@pytest.mark.models('en')
 def test_en_sbd_serialization_projective(EN):
    """Test that before and after serialization, the sentence boundaries are
    the same."""
    text = "I bought a couch from IKEA It wasn't very comfortable."
    transition = ['L-nsubj', 'S', 'L-det', 'R-dobj', 'D', 'R-prep', 'R-pobj',
                  'B-ROOT', 'L-nsubj', 'R-neg', 'D', 'S', 'L-advmod',
                  'R-acomp', 'D', 'R-punct']
    doc = EN.tokenizer(text)
    apply_transition_sequence(EN.parser, doc, transition)
    doc_serialized = Doc(EN.vocab).from_bytes(doc.to_bytes())
    assert doc.is_parsed == True
    assert doc_serialized.is_parsed == True
    assert doc.to_bytes() == doc_serialized.to_bytes()
    assert [s.text for s in doc.sents] == [s.text for s in doc_serialized.sents]
 TEST_CASES = [
    ("Hello World. My name is Jonas.", ["Hello World.", "My name is Jonas."]),
    ("What is your name? My name is Jonas.", ["What is your name?", "My name is Jonas."]),
@ -59,10 +115,9 @@ TEST_CASES = [
    pytest.mark.xfail(("Hello world.Today is Tuesday.Mr. Smith went to the store and bought 1,000.That is a lot.", ["Hello world.", "Today is Tuesday.", "Mr. Smith went to the store and bought 1,000.", "That is a lot."]))
 ]
-@pytest.mark.slow
+@pytest.mark.models('en')
@pytest.mark.models
@pytest.mark.parametrize('text,expected_sents', TEST_CASES)
-def test_parser_sbd_prag(EN, text, expected_sents):
+def test_en_sbd_prag(EN, text, expected_sents):
    """SBD tests from Pragmatic Segmenter"""
    doc = EN(text)
    sents = []
--- a/spacy/tests/lang/en/test_tagger.py
+++ b/spacy/tests/lang/en/test_tagger.py
@ -0,0 +1,59 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from ....parts_of_speech import SPACE
 from ...util import get_doc
 import six
 import pytest
 def test_en_tagger_load_morph_exc(en_tokenizer):
    text = "I like his style."
    tags = ['PRP', 'VBP', 'PRP$', 'NN', '.']
    morph_exc = {'VBP': {'like': {'L': 'luck'}}}
    en_tokenizer.vocab.morphology.load_morph_exceptions(morph_exc)
    tokens = en_tokenizer(text)
    doc = get_doc(tokens.vocab, [t.text for t in tokens], tags=tags)
    assert doc[1].tag_ == 'VBP'
    assert doc[1].lemma_ == 'luck'
@pytest.mark.models('en')
 def test_tag_names(EN):
    text = "I ate pizzas with anchovies."
    doc = EN(text, parse=False, tag=True)
    assert type(doc[2].pos) == int
    assert isinstance(doc[2].pos_, six.text_type)
    assert type(doc[2].dep) == int
    assert isinstance(doc[2].dep_, six.text_type)
    assert doc[2].tag_ == u'NNS'
@pytest.mark.models('en')
 def test_en_tagger_spaces(EN):
    """Ensure spaces are assigned the POS tag SPACE"""
    text = "Some\nspaces are\tnecessary."
    doc = EN(text, tag=True, parse=False)
    assert doc[0].pos != SPACE
    assert doc[0].pos_ != 'SPACE'
    assert doc[1].pos == SPACE
    assert doc[1].pos_ == 'SPACE'
    assert doc[1].tag_ == 'SP'
    assert doc[2].pos != SPACE
    assert doc[3].pos != SPACE
    assert doc[4].pos == SPACE
@pytest.mark.models('en')
 def test_en_tagger_return_char(EN):
    """Ensure spaces are assigned the POS tag SPACE"""
    text = ('hi Aaron,\r\n\r\nHow is your schedule today, I was wondering if '
              'you had time for a phone\r\ncall this afternoon?\r\n\r\n\r\n')
    tokens = EN(text)
    for token in tokens:
        if token.is_space:
            assert token.pos == SPACE
    assert tokens[3].text == '\r\n\r\n'
    assert tokens[3].is_space
    assert tokens[3].pos == SPACE
--- a/spacy/tests/lang/en/test_text.py
+++ b/spacy/tests/lang/en/test_text.py
@ -7,7 +7,7 @@ from __future__ import unicode_literals
 import pytest
-def test_tokenizer_handles_long_text(en_tokenizer):
+def test_en_tokenizer_handles_long_text(en_tokenizer):
    text = """Tributes pour in for late British Labour Party leader
 Tributes poured in from around the world Thursday
@ -30,7 +30,7 @@ untimely death" of the rapier-tongued Scottish barrister and parliamentarian.
    ("""'Me too!', Mr. P. Delaware cried. """, 11),
    ("They ran about 10km.", 6),
    pytest.mark.xfail(("But then the 6,000-year ice age came...", 10))])
-def test_tokenizer_handles_cnts(en_tokenizer, text, length):
+def test_en_tokenizer_handles_cnts(en_tokenizer, text, length):
    tokens = en_tokenizer(text)
    assert len(tokens) == length
--- a/spacy/tests/lang/fr/test_lemmatization.py
+++ b/spacy/tests/lang/fr/test_lemmatization.py
@ -1,37 +1,33 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import pytest
-@pytest.mark.models
+@pytest.mark.models('fr')
 def test_lemmatizer_verb(FR):
-    text = "Qu'est-ce que tu fais?"
+    tokens = FR("Qu'est-ce que tu fais?")
    tokens = FR(text)
    assert tokens[0].lemma_ == "que"
    assert tokens[1].lemma_ == "être"
    assert tokens[5].lemma_ == "faire"
-@pytest.mark.models
+
@pytest.mark.models('fr')
@pytest.mark.xfail(reason="sont tagged as AUX")
 def test_lemmatizer_noun_verb_2(FR):
-    text = "Les abaissements de température sont gênants."
+    tokens = FR("Les abaissements de température sont gênants.")
    tokens = FR(text)
    assert tokens[4].lemma_ == "être"
-@pytest.mark.models
+
@pytest.mark.models('fr')
@pytest.mark.xfail(reason="Costaricienne TAG is PROPN instead of NOUN and spacy don't lemmatize PROPN")
-def test_lemmatizer_noun(FR):
+def test_lemmatizer_noun(model):
-    text = "il y a des Costaricienne."
+    tokens = FR("il y a des Costaricienne.")
    tokens = FR(text)
    assert tokens[4].lemma_ == "Costaricain"
-@pytest.mark.models
+
@pytest.mark.models('fr')
 def test_lemmatizer_noun_2(FR):
-    text = "Les abaissements de température sont gênants."
+    tokens = FR("Les abaissements de température sont gênants.")
    tokens = FR(text)
    assert tokens[1].lemma_ == "abaissement"
    assert tokens[5].lemma_ == "gênant"
--- a/spacy/tests/parser/test_sbd.py
+++ b/spacy/tests/parser/test_sbd.py
@ -1,60 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from ...tokens import Doc
 from ..util import get_doc, apply_transition_sequence
 import pytest
@pytest.mark.parametrize('text', ["A test sentence"])
@pytest.mark.parametrize('punct', ['.', '!', '?', ''])
 def test_parser_sbd_single_punct(en_tokenizer, text, punct):
    heads = [2, 1, 0, -1] if punct else [2, 1, 0]
    tokens = en_tokenizer(text + punct)
    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
    assert len(doc) == 4 if punct else 3
    assert len(list(doc.sents)) == 1
    assert sum(len(sent) for sent in doc.sents) == len(doc)
@pytest.mark.xfail
 def test_parser_sentence_breaks(en_tokenizer, en_parser):
    text = "This is a sentence . This is another one ."
    heads = [1, 0, 1, -2, -3, 1, 0, 1, -2, -3]
    deps = ['nsubj', 'ROOT', 'det', 'attr', 'punct', 'nsubj', 'ROOT', 'det',
            'attr', 'punct']
    transition = ['L-nsubj', 'S', 'L-det', 'R-attr', 'D', 'R-punct', 'B-ROOT',
                  'L-nsubj', 'S', 'L-attr', 'R-attr', 'D', 'R-punct']
    tokens = en_tokenizer(text)
    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads, deps=deps)
    apply_transition_sequence(en_parser, doc, transition)
    assert len(list(doc.sents)) == 2
    for token in doc:
        assert token.dep != 0 or token.is_space
    assert [token.head.i for token in doc ] == [1, 1, 3, 1, 1, 6, 6, 8, 6, 6]
 # Currently, there's no way of setting the serializer data for the parser
 # without loading the models, so we can't remove the model dependency here yet.
@pytest.mark.xfail
@pytest.mark.models
 def test_parser_sbd_serialization_projective(EN):
    """Test that before and after serialization, the sentence boundaries are
    the same."""
    text = "I bought a couch from IKEA It wasn't very comfortable."
    transition = ['L-nsubj', 'S', 'L-det', 'R-dobj', 'D', 'R-prep', 'R-pobj',
                  'B-ROOT', 'L-nsubj', 'R-neg', 'D', 'S', 'L-advmod',
                  'R-acomp', 'D', 'R-punct']
    doc = EN.tokenizer(text)
    apply_transition_sequence(EN.parser, doc, transition)
    doc_serialized = Doc(EN.vocab).from_bytes(doc.to_bytes())
    assert doc.is_parsed == True
    assert doc_serialized.is_parsed == True
    assert doc.to_bytes() == doc_serialized.to_bytes()
    assert [s.text for s in doc.sents] == [s.text for s in doc_serialized.sents]
--- a/spacy/tests/parser/test_to_from_bytes_disk.py
+++ b/spacy/tests/parser/test_to_from_bytes_disk.py
@ -1,17 +1,11 @@
 import pytest
 from ...pipeline import NeuralDependencyParser
 from ...vocab import Vocab
@pytest.fixture
-def vocab():
+def parser(en_vocab):
-    return Vocab()
+    parser = NeuralDependencyParser(en_vocab)
@pytest.fixture
 def parser(vocab):
    parser = NeuralDependencyParser(vocab)
    parser.add_label('nsubj')
    parser.model, cfg = parser.Model(parser.moves.n_moves)
    parser.cfg.update(cfg)
@ -19,8 +13,8 @@ def parser(vocab):
@pytest.fixture
-def blank_parser(vocab):
+def blank_parser(en_vocab):
-    parser = NeuralDependencyParser(vocab)
+    parser = NeuralDependencyParser(en_vocab)
    return parser
--- a/spacy/tests/regression/test_issue401.py
+++ b/spacy/tests/regression/test_issue401.py
@ -4,7 +4,7 @@ from __future__ import unicode_literals
 import pytest
-@pytest.mark.models
+@pytest.mark.models('en')
@pytest.mark.parametrize('text,i', [("Jane's got a new car", 1),
                                    ("Jane thinks that's a nice car", 3)])
 def test_issue401(EN, text, i):
--- a/spacy/tests/regression/test_issue429.py
+++ b/spacy/tests/regression/test_issue429.py
@ -6,7 +6,7 @@ from ...matcher import Matcher
 import pytest
-@pytest.mark.models
+@pytest.mark.models('en')
 def test_issue429(EN):
    def merge_phrases(matcher, doc, i, matches):
      if i != len(matches) - 1:
--- a/spacy/tests/regression/test_issue514.py
+++ b/spacy/tests/regression/test_issue514.py
@ -6,7 +6,7 @@ from ..util import get_doc
 import pytest
-@pytest.mark.models
+@pytest.mark.models('en')
 def test_issue514(EN):
    """Test serializing after adding entity"""
    text = ["This", "is", "a", "sentence", "about", "pasta", "."]
--- a/spacy/tests/regression/test_issue54.py
+++ b/spacy/tests/regression/test_issue54.py
@ -4,7 +4,7 @@ from __future__ import unicode_literals
 import pytest
-@pytest.mark.models
+@pytest.mark.models('en')
 def test_issue54(EN):
    text = "Talks given by women had a slightly higher number of questions asked (3.2$\pm$0.2) than talks given by men (2.6$\pm$0.1)."
    tokens = EN(text)
--- a/spacy/tests/regression/test_issue686.py
+++ b/spacy/tests/regression/test_issue686.py
@ -4,7 +4,7 @@ from __future__ import unicode_literals
 import pytest
-@pytest.mark.models
+@pytest.mark.models('en')
@pytest.mark.parametrize('text', ["He is the man", "he is the man"])
 def test_issue686(EN, text):
    """Test that pronoun lemmas are assigned correctly."""
--- a/spacy/tests/regression/test_issue693.py
+++ b/spacy/tests/regression/test_issue693.py
@ -4,7 +4,7 @@ from __future__ import unicode_literals
 import pytest
-@pytest.mark.models
+@pytest.mark.models('en')
 def test_issue693(EN):
    """Test that doc.noun_chunks parses the complete sentence."""
--- a/spacy/tests/regression/test_issue704.py
+++ b/spacy/tests/regression/test_issue704.py
@ -4,7 +4,7 @@ from __future__ import unicode_literals
 import pytest
-@pytest.mark.models
+@pytest.mark.models('en')
 def test_issue704(EN):
    """Test that sentence boundaries are detected correctly."""
--- a/spacy/tests/regression/test_issue717.py
+++ b/spacy/tests/regression/test_issue717.py
@ -4,7 +4,7 @@ from __future__ import unicode_literals
 import pytest
-@pytest.mark.models
+@pytest.mark.models('en')
@pytest.mark.parametrize('text1,text2',
    [("You're happy", "You are happy"),
     ("I'm happy", "I am happy"),
--- a/spacy/tests/regression/test_issue719.py
+++ b/spacy/tests/regression/test_issue719.py
@ -4,7 +4,7 @@ from __future__ import unicode_literals
 import pytest
-@pytest.mark.models
+@pytest.mark.models('en')
@pytest.mark.parametrize('text', ["s..."])
 def test_issue719(EN, text):
    """Test that the token 's' is not lemmatized into empty string."""
--- a/spacy/tests/regression/test_issue758.py
+++ b/spacy/tests/regression/test_issue758.py
@ -4,7 +4,7 @@ import pytest
@pytest.mark.xfail
-@pytest.mark.models
+@pytest.mark.models('en')
 def test_issue758(EN):
    '''Test parser transition bug after label added.'''
    from ...matcher import merge_phrase
--- a/spacy/tests/regression/test_issue781.py
+++ b/spacy/tests/regression/test_issue781.py
@ -5,6 +5,8 @@ import pytest
 # Note: "chromosomes" worked previous the bug fix
@pytest.mark.models('en')
@pytest.mark.parametrize('word,lemmas', [("chromosomes", ["chromosome"]), ("endosomes", ["endosome"]), ("colocalizes", ["colocalize", "colocaliz"])])
-def test_issue781(lemmatizer, word, lemmas):
+def test_issue781(EN, word, lemmas):
    lemmatizer = EN.Defaults.create_lemmatizer()
    assert lemmatizer(word, 'noun', morphology={'number': 'plur'}) == set(lemmas)
--- a/spacy/tests/regression/test_issue910.py
+++ b/spacy/tests/regression/test_issue910.py
@ -70,8 +70,8 @@ def temp_save_model(model):
-@pytest.mark.models
+@pytest.mark.models('en')
-def test_issue910(train_data, additional_entity_types):
+def test_issue910(EN, train_data, additional_entity_types):
    '''Test that adding entities and resuming training works passably OK.
    There are two issues here:
@ -79,8 +79,7 @@ def test_issue910(train_data, additional_entity_types):
    2) There's no way to set the learning rate for the weight update, so we
        end up out-of-scale, causing it to learn too fast.
    '''
-    nlp = English()
+    doc = EN(u"I am looking for a restaurant in Berlin")
    doc = nlp(u"I am looking for a restaurant in Berlin")
    ents_before_train = [(ent.label_, ent.text) for ent in doc.ents]
    # Fine tune the ner model
    for entity_type in additional_entity_types:
--- a/spacy/tests/regression/test_issue995.py
+++ b/spacy/tests/regression/test_issue995.py
@ -1,18 +1,13 @@
 from __future__ import unicode_literals
 import pytest
 from ... import load as load_spacy
@pytest.fixture
 def doc():
    nlp = load_spacy('en')
    return nlp('Does flight number three fifty-four require a connecting flight'
               ' to get to Boston?')
-@pytest.mark.models
+@pytest.mark.models('en')
-def test_issue955(doc):
+def test_issue955(EN, doc):
    '''Test that we don't have any nested noun chunks'''
    doc = EN('Does flight number three fifty-four require a connecting flight'
             ' to get to Boston?')
    seen_tokens = set()
    for np in doc.noun_chunks:
        print(np.text, np.root.text, np.root.dep_, np.root.tag_)
--- a/spacy/tests/tagger/init.py
+++ b/spacy/tests/tagger/init.py
--- a/spacy/tests/tagger/test_lemmatizer.py
+++ b/spacy/tests/tagger/test_lemmatizer.py
@ -1,49 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import pytest
@pytest.mark.models
@pytest.mark.parametrize('text,lemmas', [("aardwolves", ["aardwolf"]),
                                         ("aardwolf", ["aardwolf"]),
                                         ("planets", ["planet"]),
                                         ("ring", ["ring"]),
                                         ("axes", ["axis", "axe", "ax"])])
 def test_tagger_lemmatizer_noun_lemmas(lemmatizer, text, lemmas):
    if lemmatizer is None:
        return None
    assert lemmatizer.noun(text) == set(lemmas)
@pytest.mark.xfail
@pytest.mark.models
 def test_tagger_lemmatizer_base_forms(lemmatizer):
    if lemmatizer is None:
        return None
    assert lemmatizer.noun('dive', {'number': 'sing'}) == set(['dive'])
    assert lemmatizer.noun('dive', {'number': 'plur'}) == set(['diva'])
@pytest.mark.models
 def test_tagger_lemmatizer_base_form_verb(lemmatizer):
    if lemmatizer is None:
        return None
    assert lemmatizer.verb('saw', {'verbform': 'past'}) == set(['see'])
@pytest.mark.models
 def test_tagger_lemmatizer_punct(lemmatizer):
    if lemmatizer is None:
        return None
    assert lemmatizer.punct('“') == set(['"'])
    assert lemmatizer.punct('“') == set(['"'])
@pytest.mark.models
 def test_tagger_lemmatizer_lemma_assignment(EN):
    text = "Bananas in pyjamas are geese."
    doc = EN.tokenizer(text)
    assert all(t.lemma_ == '' for t in doc)
    EN.tagger(doc)
    assert all(t.lemma_ != '' for t in doc)
--- a/spacy/tests/tagger/test_morph_exceptions.py
+++ b/spacy/tests/tagger/test_morph_exceptions.py
@ -1,17 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from ..util import get_doc
 import pytest
 def test_tagger_load_morph_exc(en_tokenizer):
    text = "I like his style."
    tags = ['PRP', 'VBP', 'PRP$', 'NN', '.']
    morph_exc = {'VBP': {'like': {'L': 'luck'}}}
    en_tokenizer.vocab.morphology.load_morph_exceptions(morph_exc)
    tokens = en_tokenizer(text)
    doc = get_doc(tokens.vocab, [t.text for t in tokens], tags=tags)
    assert doc[1].tag_ == 'VBP'
    assert doc[1].lemma_ == 'luck'
--- a/spacy/tests/tagger/test_spaces.py
+++ b/spacy/tests/tagger/test_spaces.py
@ -1,35 +0,0 @@
 # coding: utf-8
 """Ensure spaces are assigned the POS tag SPACE"""
 from __future__ import unicode_literals
 from ...parts_of_speech import SPACE
 import pytest
@pytest.mark.models
 def test_tagger_spaces(EN):
    text = "Some\nspaces are\tnecessary."
    doc = EN(text, tag=True, parse=False)
    assert doc[0].pos != SPACE
    assert doc[0].pos_ != 'SPACE'
    assert doc[1].pos == SPACE
    assert doc[1].pos_ == 'SPACE'
    assert doc[1].tag_ == 'SP'
    assert doc[2].pos != SPACE
    assert doc[3].pos != SPACE
    assert doc[4].pos == SPACE
@pytest.mark.models
 def test_tagger_return_char(EN):
    text = ('hi Aaron,\r\n\r\nHow is your schedule today, I was wondering if '
              'you had time for a phone\r\ncall this afternoon?\r\n\r\n\r\n')
    tokens = EN(text)
    for token in tokens:
        if token.is_space:
            assert token.pos == SPACE
    assert tokens[3].text == '\r\n\r\n'
    assert tokens[3].is_space
    assert tokens[3].pos == SPACE
--- a/spacy/tests/tagger/test_tag_names.py
+++ b/spacy/tests/tagger/test_tag_names.py
@ -1,16 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import six
 import pytest
@pytest.mark.models
 def test_tag_names(EN):
    text = "I ate pizzas with anchovies."
    doc = EN(text, parse=False, tag=True)
    assert type(doc[2].pos) == int
    assert isinstance(doc[2].pos_, six.text_type)
    assert type(doc[2].dep) == int
    assert isinstance(doc[2].dep_, six.text_type)
    assert doc[2].tag_ == u'NNS'
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@ -20,6 +20,7 @@ def test_util_ensure_path_succeeds(text):
    assert isinstance(path, Path)
@pytest.mark.models
 def test_simple_model_roundtrip_bytes():
    model = Maxout(5, 10, pieces=2)
    model.b += 1
@ -29,6 +30,7 @@ def test_simple_model_roundtrip_bytes():
    assert model.b[0, 0] == 1
@pytest.mark.models
 def test_multi_model_roundtrip_bytes():
    model = chain(Maxout(5, 10, pieces=2), Maxout(2, 3))
    model._layers[0].b += 1
@ -41,6 +43,7 @@ def test_multi_model_roundtrip_bytes():
    assert model._layers[1].b[0, 0] == 2
@pytest.mark.models
 def test_multi_model_load_missing_dims():
    model = chain(Maxout(5, 10, pieces=2), Maxout(2, 3))
    model._layers[0].b += 1
--- a/spacy/tests/util.py
+++ b/spacy/tests/util.py
@ -4,9 +4,20 @@ from __future__ import unicode_literals
 from ..tokens import Doc
 from ..attrs import ORTH, POS, HEAD, DEP
 import pytest
 import numpy
 MODELS = {}
 def load_test_model(model):
    if model not in MODELS:
        module = pytest.importorskip(model)
        MODELS[model] = module.load()
    return MODELS[model]
 def get_doc(vocab, words=[], pos=None, heads=None, deps=None, tags=None, ents=None):
    """Create Doc object from given vocab, words and annotations."""
    pos = pos or [''] * len(words)
--- a/website/docs/usage/spacy-101.jade
+++ b/website/docs/usage/spacy-101.jade
@ -2,9 +2,16 @@
 include ../../_includes/_mixins
 +aside("Help us improve the docs")
    |  Did you spot a mistake or come across explanations that
    |  are unclear? We always appreciate improvement
    |  #[+a(gh("spaCy") + "/issues") suggestions] or
    |  #[+a(gh("spaCy") + "/pulls") pull requests]. You can find a "Suggest
    |  edits" link at the bottom of each page that points you to the source.
 +h(2, "whats-spacy") What's spaCy?
-+grid
+grid.o-no-block
    +grid-col("half")
    +grid-col("half")
@ -52,8 +59,8 @@ p
    +row
        +cell #[strong Dependency Parsing]
        +cell
-            |  Assigning syntactic dependency labels, i.e. the relations between
+            |  Assigning syntactic dependency labels, describing the relations
-            |  individual tokens.
+            |  between individual tokens, like subject or object.
        +cell #[+procon("pro")]
    +row
@ -374,6 +381,8 @@ p
    |  on GitHub, which we use to tag bugs and feature requests that are easy
    |  and self-contained. We also appreciate contributions to the docs – whether
    |  it's fixing a typo, improving an example or adding additional explanations.
    |  You'll find a "Suggest edits" link at the bottom of each page that points
    |  you to the source.
 p
    |  Another way of getting involved is to help us improve the