Tidy up and fix issues

2025-06-29 01:13:17 +03:00 · 2020-02-18 15:17:03 +01:00 · 2020-02-18 15:17:03 +01:00 · 1278161f47
commit 1278161f47
parent de11ea753a
9 changed files with 1476 additions and 1500 deletions
--- a/spacy/cli/converters/conllu2json.py
+++ b/spacy/cli/converters/conllu2json.py
@ -235,7 +235,7 @@ def example_from_conllu_sentence(
            subtok_word = ""
            in_subtok = False
        id_ = int(id_) - 1
-        head = (int(head) - 1) if head != "0" else id_
+        head = (int(head) - 1) if head not in ("0", "_") else id_
        tag = pos if tag == "_" else tag
        morph = morph if morph != "_" else ""
        dep = "ROOT" if dep == "root" else dep
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -541,8 +541,8 @@ class Errors(object):
    E997 = ("Tokenizer special cases are not allowed to modify the text. "
            "This would map '{chunk}' to '{orth}' given token attributes "
            "'{token_attrs}'.")
-    E998 = ("Can only create GoldParse's from Example's without a Doc, "
+    E998 = ("Can only create GoldParse objects from Example objects without a "
-            "if get_gold_parses() is called with a Vocab object.")
+            "Doc if get_gold_parses() is called with a Vocab object.")
    E999 = ("Encountered an unexpected format for the dictionary holding "
            "gold annotations: {gold_dict}")
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@ -991,11 +991,6 @@ cdef class GoldParse:
        self.cats = {} if cats is None else dict(cats)
        self.links = {} if links is None else dict(links)
        # orig_annot is used as an iterator in `nlp.evalate` even if self.length == 0,
        # so set a empty list to avoid error.
        # if self.lenght > 0, this is modified latter.
        self.orig_annot = []
        # avoid allocating memory if the doc does not contain any tokens
        if self.length > 0:
            if not words:
--- a/spacy/lang/sk/examples.py
+++ b/spacy/lang/sk/examples.py
@ -1,7 +1,3 @@
 # coding: utf8
 from __future__ import unicode_literals
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/sk/lex_attrs.py
+++ b/spacy/lang/sk/lex_attrs.py
@ -1,6 +1,3 @@
 # coding: utf8
 from __future__ import unicode_literals
 from ...attrs import LIKE_NUM
 _num_words = [
--- a/spacy/lang/sk/tag_map.py
+++ b/spacy/lang/sk/tag_map.py
@ -1,8 +1,5 @@
-# coding: utf8
+from ...symbols import POS, AUX, ADJ, CCONJ, NUM, ADV, ADP, X, VERB
-from __future__ import unicode_literals
+from ...symbols import NOUN, PART, INTJ, PRON
 from ...symbols import POS, AUX, PUNCT, SYM, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
 from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON
 # Source https://universaldependencies.org/tagset-conversion/sk-snk-uposf.html
 # fmt: off
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -77,7 +77,7 @@ cdef class Parser:
        tok2vec = Tok2Vec(width=token_vector_width,
                          embed_size=embed_size,
                          conv_depth=conv_depth,
-                          window_size=window_size,
+                          window_size=conv_window,
                          cnn_maxout_pieces=t2v_pieces,
                          subword_features=subword_features,
                          pretrained_vectors=pretrained_vectors,
@ -105,7 +105,7 @@ cdef class Parser:
            'bilstm_depth': bilstm_depth,
            'self_attn_depth': self_attn_depth,
            'conv_depth': conv_depth,
-            'window_size': window_size,
+            'window_size': conv_window,
            'embed_size': embed_size,
            'cnn_maxout_pieces': t2v_pieces
        }
--- a/spacy/tests/regression/test_issue4849.py
+++ b/spacy/tests/regression/test_issue4849.py
@ -1,6 +1,3 @@
 # coding: utf8
 from __future__ import unicode_literals
 from spacy.lang.en import English
 from spacy.pipeline import EntityRuler
@ -9,11 +6,12 @@ def test_issue4849():
    nlp = English()
    ruler = EntityRuler(
-        nlp, patterns=[
+        nlp,
-            {"label": "PERSON", "pattern": 'joe biden', "id": 'joe-biden'},
+        patterns=[
-            {"label": "PERSON", "pattern": 'bernie sanders', "id": 'bernie-sanders'},
+            {"label": "PERSON", "pattern": "joe biden", "id": "joe-biden"},
            {"label": "PERSON", "pattern": "bernie sanders", "id": "bernie-sanders"},
        ],
-        phrase_matcher_attr="LOWER"
+        phrase_matcher_attr="LOWER",
    )
    nlp.add_pipe(ruler)
@ -27,10 +25,10 @@ def test_issue4849():
    count_ents = 0
    for doc in nlp.pipe([text], n_process=1):
        count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
-    assert(count_ents == 2)
+    assert count_ents == 2
    # USING 2 PROCESSES
    count_ents = 0
    for doc in nlp.pipe([text], n_process=2):
        count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
-    assert (count_ents == 2)
+    assert count_ents == 2
--- a/spacy/tests/regression/test_issue4924.py
+++ b/spacy/tests/regression/test_issue4924.py
@ -1,16 +1,9 @@
 # coding: utf8
 from __future__ import unicode_literals
 import pytest
-
+from spacy.language import Language
 import spacy
-@pytest.fixture
+def test_evaluate():
-def nlp():
+    nlp = Language()
    return spacy.blank("en")
 def test_evaluate(nlp):
    docs_golds = [("", {})]
    with pytest.raises(ValueError):
        nlp.evaluate(docs_golds)