* Fix test failures in test_api

This commit is contained in:
Matthew Honnibal 2015-09-29 23:04:20 +10:00
parent 87e6186828
commit bf4d30c5b6

View File

@ -33,13 +33,12 @@ def test_sentence_spans(nlp):
assert [s.root.orth_ for s in doc.sents] == ["is", "'s"] assert [s.root.orth_ for s in doc.sents] == ["is", "'s"]
@pytest.mark.xfail
def test_entity_spans(nlp): def test_entity_spans(nlp):
# from spacy.en import English # from spacy.en import English
# nlp = English() # nlp = English()
tokens = nlp('Mr. Best flew to New York on Saturday morning.') tokens = nlp('Mr. Best flew to New York on Saturday morning.')
ents = list(tokens.ents) ents = list(tokens.ents)
assert ents[0].label == 112504 assert ents[0].label == 28061
assert ents[0].label_ == 'PERSON' assert ents[0].label_ == 'PERSON'
assert ents[0].orth_ == 'Best' assert ents[0].orth_ == 'Best'
assert ents[0].string == ents[0].string assert ents[0].string == ents[0].string
@ -57,26 +56,30 @@ def test_noun_chunk_spans(nlp):
# NP three noun chunks <-- has # NP three noun chunks <-- has
@pytest.mark.xfail
def test_count_by(nlp): def test_count_by(nlp):
# from spacy.en import English, attrs # from spacy.en import English, attrs
# nlp = English() # nlp = English()
import numpy
from spacy.en import attrs from spacy.en import attrs
tokens = nlp('apple apple orange banana') tokens = nlp('apple apple orange banana')
assert tokens.count_by(attrs.ORTH) == {12800L: 1, assert tokens.count_by(attrs.ORTH) == {2529: 2, 4117: 1, 6650: 1}
11880L: 2, assert repr(tokens.to_array([attrs.ORTH])) == repr(numpy.array([[2529],
7561L: 1} [2529],
assert tokens.to_array([attrs.ORTH]) == array([[11880], [4117],
[11880], [6650]], dtype=numpy.int32))
[7561],
[12800]])
@pytest.mark.models
@pytest.mark.xfail def test_read_bytes(nlp):
def test_read_bytes(): from spacy.tokens.doc import Doc
# TODO: missing imports loc = '/tmp/test_serialize.bin'
for byte_string in Doc.read_bytes(open('path/to/data_directory')): with open(loc, 'wb') as file_:
doc = Doc(nlp.vocab).from_bytes(byte_string) file_.write(nlp(u'This is a document.').to_bytes())
file_.write(nlp(u'This is another.').to_bytes())
docs = []
with open(loc) as file_:
for byte_string in Doc.read_bytes(file_):
docs.append(Doc(nlp.vocab).from_bytes(byte_string))
assert len(docs) == 2
def test_token_span(doc): def test_token_span(doc):
@ -134,25 +137,22 @@ def test_example_i_like_new_york4(toks, new, york):
assert new_york.root.orth_ == 'York' assert new_york.root.orth_ == 'York'
@pytest.mark.xfail
def test_example_i_like_new_york5(toks, autumn, dot): def test_example_i_like_new_york5(toks, autumn, dot):
assert toks[autumn].head.orth_ == 'in' assert toks[autumn].head.orth_ == 'in'
assert toks[dot].head.orth_ == 'like' assert toks[dot].head.orth_ == 'like'
# TODO: TypeError: readonly attribute
autumn_dot = toks[autumn:] autumn_dot = toks[autumn:]
assert autumn_dot.root.orth_ == 'Autumn' assert autumn_dot.root.orth_ == 'Autumn'
@pytest.mark.xfail
def test_navigating_the_parse_tree_lefts(doc): def test_navigating_the_parse_tree_lefts(doc):
# TODO: where does the span object come from? # TODO: where does the span object come from?
span = doc[:2]
lefts = [span.doc[i] for i in range(0, span.start) lefts = [span.doc[i] for i in range(0, span.start)
if span.doc[i].head in span] if span.doc[i].head in span]
@pytest.mark.xfail
def test_navigating_the_parse_tree_rights(doc): def test_navigating_the_parse_tree_rights(doc):
# TODO: where does the span object come from? span = doc[:2]
rights = [span.doc[i] for i in range(span.end, len(span.doc)) rights = [span.doc[i] for i in range(span.end, len(span.doc))
if span.doc[i].head in span] if span.doc[i].head in span]