spaCy/spacy/tests/pipeline/test_functions.py
Ines Montani db55577c45
Drop Python 2.7 and 3.5 (#4828)
* Remove unicode declarations

* Remove Python 3.5 and 2.7 from CI

* Don't require pathlib

* Replace compat helpers

* Remove OrderedDict

* Use f-strings

* Set Cython compiler language level

* Fix typo

* Re-add OrderedDict for Table

* Update setup.cfg

* Revert CONTRIBUTING.md

* Revert lookups.md

* Revert top-level.md

* Small adjustments and docs [ci skip]
2019-12-22 01:53:56 +01:00

32 lines
904 B
Python

import pytest
from spacy.pipeline.functions import merge_subtokens
from ..util import get_doc
@pytest.fixture
def doc(en_tokenizer):
# fmt: off
text = "This is a sentence. This is another sentence. And a third."
heads = [1, 0, 1, -2, -3, 1, 0, 1, -2, -3, 1, 1, 1, 0]
deps = ["nsubj", "ROOT", "subtok", "attr", "punct", "nsubj", "ROOT",
"subtok", "attr", "punct", "subtok", "subtok", "subtok", "ROOT"]
# fmt: on
tokens = en_tokenizer(text)
return get_doc(tokens.vocab, words=[t.text for t in tokens], heads=heads, deps=deps)
def test_merge_subtokens(doc):
doc = merge_subtokens(doc)
# get_doc() doesn't set spaces, so the result is "And a third ."
assert [t.text for t in doc] == [
"This",
"is",
"a sentence",
".",
"This",
"is",
"another sentence",
".",
"And a third .",
]