* Fix test failures in test_api

2025-10-17 09:14:14 +03:00 · 2015-09-29 23:04:20 +10:00 · 2015-09-29 23:04:20 +10:00 · bf4d30c5b6
commit bf4d30c5b6
parent 87e6186828
1 changed files with 21 additions and 21 deletions
--- a/tests/website/test_api.py
+++ b/tests/website/test_api.py
@ -33,13 +33,12 @@ def test_sentence_spans(nlp):
    assert [s.root.orth_ for s in doc.sents] == ["is", "'s"]
@pytest.mark.xfail
 def test_entity_spans(nlp):
    # from spacy.en import English
    # nlp = English()
    tokens = nlp('Mr. Best flew to New York on Saturday morning.')
    ents = list(tokens.ents)
-    assert ents[0].label == 112504
+    assert ents[0].label == 28061
    assert ents[0].label_ == 'PERSON'
    assert ents[0].orth_ == 'Best'
    assert ents[0].string == ents[0].string
@ -57,26 +56,30 @@ def test_noun_chunk_spans(nlp):
    # NP three noun chunks <-- has
@pytest.mark.xfail
 def test_count_by(nlp):
    # from spacy.en import English, attrs
    # nlp = English()
    import numpy
    from spacy.en import attrs
    tokens = nlp('apple apple orange banana')
-    assert tokens.count_by(attrs.ORTH) == {12800L: 1,
+    assert tokens.count_by(attrs.ORTH) == {2529: 2, 4117: 1, 6650: 1}
-                                           11880L: 2,
+    assert repr(tokens.to_array([attrs.ORTH])) == repr(numpy.array([[2529],
-                                           7561L: 1}
+                                                        [2529],
-    assert tokens.to_array([attrs.ORTH]) == array([[11880],
+                                                        [4117],
-                                                   [11880],
+                                                        [6650]], dtype=numpy.int32))
                                                   [7561],
                                                   [12800]])
-
+@pytest.mark.models
-@pytest.mark.xfail
+def test_read_bytes(nlp):
-def test_read_bytes():
+    from spacy.tokens.doc import Doc
-    # TODO: missing imports
+    loc = '/tmp/test_serialize.bin'
-    for byte_string in Doc.read_bytes(open('path/to/data_directory')):
+    with open(loc, 'wb') as file_:
-        doc = Doc(nlp.vocab).from_bytes(byte_string)
+        file_.write(nlp(u'This is a document.').to_bytes())
        file_.write(nlp(u'This is another.').to_bytes())
    docs = []
    with open(loc) as file_:
        for byte_string in Doc.read_bytes(file_):
            docs.append(Doc(nlp.vocab).from_bytes(byte_string))
    assert len(docs) == 2
 def test_token_span(doc):
@ -134,25 +137,22 @@ def test_example_i_like_new_york4(toks, new, york):
    assert new_york.root.orth_ == 'York'
@pytest.mark.xfail
 def test_example_i_like_new_york5(toks, autumn, dot):
    assert toks[autumn].head.orth_ == 'in'
    assert toks[dot].head.orth_ == 'like'
    # TODO: TypeError: readonly attribute
    autumn_dot = toks[autumn:]
    assert autumn_dot.root.orth_ == 'Autumn'
@pytest.mark.xfail
 def test_navigating_the_parse_tree_lefts(doc):
    # TODO: where does the span object come from?
    span = doc[:2]
    lefts = [span.doc[i] for i in range(0, span.start)
             if span.doc[i].head in span]
@pytest.mark.xfail
 def test_navigating_the_parse_tree_rights(doc):
-    # TODO: where does the span object come from?
+    span = doc[:2]
    rights = [span.doc[i] for i in range(span.end, len(span.doc))
              if span.doc[i].head in span]