from __future__ import unicode_literals import pytest @pytest.fixture(scope="session") def nlp(): from spacy.en import English return English() @pytest.fixture() def doc(nlp): return nlp('Hello, world. Here are two sentences.') @pytest.fixture() def token(doc): return doc[0] def test_load_resources_and_process_text(): from spacy.en import English nlp = English() doc = nlp('Hello, world. Here are two sentences.') def test_get_tokens_and_sentences(doc): token = doc[0] sentence = doc.sents.next() assert token is sentence[0] assert sentence.text == 'Hello, world.' def test_use_integer_ids_for_any_strings(nlp, token): hello_id = nlp.vocab.strings['Hello'] hello_str = nlp.vocab.strings[hello_id] assert token.orth == hello_id == 3404 assert token.orth_ == hello_str == 'Hello'