Tidy up and fix issues

2025-11-03 09:27:56 +03:00 · 2020-02-18 15:17:03 +01:00 · 2020-02-18 15:17:03 +01:00 · 1278161f47
commit 1278161f47
parent de11ea753a
9 changed files with 1476 additions and 1500 deletions
--- a/spacy/cli/converters/conllu2json.py
+++ b/spacy/cli/converters/conllu2json.py
@ -235,7 +235,7 @@ def example_from_conllu_sentence(
            subtok_word = ""
            in_subtok = False
        id_ = int(id_) - 1
-        head = (int(head) - 1) if head != "0" else id_
+        head = (int(head) - 1) if head not in ("0", "_") else id_
        tag = pos if tag == "_" else tag
        morph = morph if morph != "_" else ""
        dep = "ROOT" if dep == "root" else dep
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -541,8 +541,8 @@ class Errors(object):
    E997 = ("Tokenizer special cases are not allowed to modify the text. "
            "This would map '{chunk}' to '{orth}' given token attributes "
            "'{token_attrs}'.")
-    E998 = ("Can only create GoldParse's from Example's without a Doc, "
-            "if get_gold_parses() is called with a Vocab object.")
+    E998 = ("Can only create GoldParse objects from Example objects without a "
+            "Doc if get_gold_parses() is called with a Vocab object.")
    E999 = ("Encountered an unexpected format for the dictionary holding "
            "gold annotations: {gold_dict}")

--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@ -991,11 +991,6 @@ cdef class GoldParse:
        self.cats = {} if cats is None else dict(cats)
        self.links = {} if links is None else dict(links)

-        # orig_annot is used as an iterator in `nlp.evalate` even if self.length == 0,
-        # so set a empty list to avoid error.
-        # if self.lenght > 0, this is modified latter.
-        self.orig_annot = []
-
        # avoid allocating memory if the doc does not contain any tokens
        if self.length > 0:
            if not words:
--- a/spacy/lang/sk/examples.py
+++ b/spacy/lang/sk/examples.py
@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/sk/lex_attrs.py
+++ b/spacy/lang/sk/lex_attrs.py
@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 from ...attrs import LIKE_NUM

 _num_words = [
--- a/spacy/lang/sk/tag_map.py
+++ b/spacy/lang/sk/tag_map.py
@ -1,8 +1,5 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, AUX, PUNCT, SYM, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
-from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON
+from ...symbols import POS, AUX, ADJ, CCONJ, NUM, ADV, ADP, X, VERB
+from ...symbols import NOUN, PART, INTJ, PRON

 # Source https://universaldependencies.org/tagset-conversion/sk-snk-uposf.html
 # fmt: off
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -77,7 +77,7 @@ cdef class Parser:
        tok2vec = Tok2Vec(width=token_vector_width,
                          embed_size=embed_size,
                          conv_depth=conv_depth,
-                          window_size=window_size,
+                          window_size=conv_window,
                          cnn_maxout_pieces=t2v_pieces,
                          subword_features=subword_features,
                          pretrained_vectors=pretrained_vectors,
@ -105,7 +105,7 @@ cdef class Parser:
            'bilstm_depth': bilstm_depth,
            'self_attn_depth': self_attn_depth,
            'conv_depth': conv_depth,
-            'window_size': window_size,
+            'window_size': conv_window,
            'embed_size': embed_size,
            'cnn_maxout_pieces': t2v_pieces
        }
--- a/spacy/tests/regression/test_issue4849.py
+++ b/spacy/tests/regression/test_issue4849.py
@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 from spacy.lang.en import English
 from spacy.pipeline import EntityRuler

@ -9,11 +6,12 @@ def test_issue4849():
    nlp = English()

    ruler = EntityRuler(
-        nlp, patterns=[
-            {"label": "PERSON", "pattern": 'joe biden', "id": 'joe-biden'},
-            {"label": "PERSON", "pattern": 'bernie sanders', "id": 'bernie-sanders'},
+        nlp,
+        patterns=[
+            {"label": "PERSON", "pattern": "joe biden", "id": "joe-biden"},
+            {"label": "PERSON", "pattern": "bernie sanders", "id": "bernie-sanders"},
        ],
-        phrase_matcher_attr="LOWER"
+        phrase_matcher_attr="LOWER",
    )

    nlp.add_pipe(ruler)
@ -27,10 +25,10 @@ def test_issue4849():
    count_ents = 0
    for doc in nlp.pipe([text], n_process=1):
        count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
-    assert(count_ents == 2)
+    assert count_ents == 2

    # USING 2 PROCESSES
    count_ents = 0
    for doc in nlp.pipe([text], n_process=2):
        count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
-    assert (count_ents == 2)
+    assert count_ents == 2
--- a/spacy/tests/regression/test_issue4924.py
+++ b/spacy/tests/regression/test_issue4924.py
@ -1,16 +1,9 @@
-# coding: utf8
-from __future__ import unicode_literals
-
 import pytest
-
-import spacy
+from spacy.language import Language


-@pytest.fixture
-def nlp():
-    return spacy.blank("en")
-
-
-def test_evaluate(nlp):
+def test_evaluate():
+    nlp = Language()
    docs_golds = [("", {})]
+    with pytest.raises(ValueError):
        nlp.evaluate(docs_golds)