Tidy up and auto-format [ci skip]

2026-01-11 03:01:25 +03:00 · 2019-09-14 12:58:06 +02:00 · 2019-09-14 12:58:06 +02:00 · 3126dd0904
commit 3126dd0904
parent bcbb9f5119
5 changed files with 12 additions and 9 deletions
--- a/spacy/lang/ja/init.py
+++ b/spacy/lang/ja/init.py
@ -39,8 +39,8 @@ def resolve_pos(token):
    """

    # this is only used for consecutive ascii spaces
-    if token.pos == '空白':
-        return '空白'
+    if token.pos == "空白":
+        return "空白"

    # TODO: This is a first take. The rules here are crude approximations.
    # For many of these, full dependencies are needed to properly resolve
@ -78,7 +78,7 @@ def detailed_tokens(tokenizer, text):
        scount = node.next.rlength - node.next.length
        spaces.append(bool(scount))
        while scount > 1:
-            words.append(ShortUnitWord(' ', ' ', '空白'))
+            words.append(ShortUnitWord(" ", " ", "空白"))
            spaces.append(False)
            scount -= 1

--- a/spacy/tests/lang/ja/test_tokenizer.py
+++ b/spacy/tests/lang/ja/test_tokenizer.py
@ -48,8 +48,9 @@ def test_ja_tokenizer_pos(ja_tokenizer, text, expected_pos):
    pos = [token.pos_ for token in ja_tokenizer(text)]
    assert pos == expected_pos

+
 def test_extra_spaces(ja_tokenizer):
    # note: three spaces after "I"
    tokens = ja_tokenizer("I   like cheese.")
-    assert tokens[1].orth_ == ' '
-    assert tokens[2].orth_ == ' '
+    assert tokens[1].orth_ == " "
+    assert tokens[2].orth_ == " "
--- a/spacy/tests/lang/lt/test_lemmatizer.py
+++ b/spacy/tests/lang/lt/test_lemmatizer.py
@ -17,4 +17,6 @@ TEST_CASES = [

@pytest.mark.parametrize("tokens,lemmas", TEST_CASES)
 def test_lt_lemmatizer(lt_lemmatizer, tokens, lemmas):
-    assert lemmas == [lt_lemmatizer.lookup_table.get_string(token, token) for token in tokens]
+    assert lemmas == [
+        lt_lemmatizer.lookup_table.get_string(token, token) for token in tokens
+    ]
--- a/spacy/tests/vocab_vectors/test_lookups.py
+++ b/spacy/tests/vocab_vectors/test_lookups.py
@ -78,7 +78,6 @@ def test_lookups_to_from_disk():
    assert table2.get_string("b") == 2


-
 # This fails on Python 3.5
@pytest.mark.xfail
 def test_lookups_to_from_bytes_via_vocab():
@ -97,6 +96,7 @@ def test_lookups_to_from_bytes_via_vocab():
    assert table.get_string("hello") == "world"
    assert new_vocab.to_bytes() == vocab_bytes

+
 # This fails on Python 3.5
@pytest.mark.xfail
 def test_lookups_to_from_disk_via_vocab():
--- a/website/docs/api/annotation.md
+++ b/website/docs/api/annotation.md
@ -80,8 +80,8 @@ training corpus and can be defined in the respective language data's

 <Accordion title="Universal Part-of-speech Tags" id="pos-universal">

-spaCy also maps all language-specific part-of-speech tags to a small, fixed set
-of word type tags following the
+spaCy maps all language-specific part-of-speech tags to a small, fixed set of
+word type tags following the
 [Universal Dependencies scheme](http://universaldependencies.org/u/pos/). The
 universal tags don't code for any morphological features and only cover the word
 type. They're available as the [`Token.pos`](/api/token#attributes) and