Tidy up and auto-format

This commit is contained in:
Ines Montani 2020-10-10 19:14:48 +02:00
parent 74972744e5
commit 539b0c10da
9 changed files with 32 additions and 31 deletions

View File

@ -62,6 +62,7 @@ _ordinal_words = [
_ordinal_endings = ("inci", "ıncı", "nci", "ncı", "uncu", "üncü")
def like_num(text):
if text.startswith(("+", "-", "±", "~")):
text = text[1:]
@ -75,11 +76,11 @@ def like_num(text):
text_lower = text.lower()
#Check cardinal number
# Check cardinal number
if text_lower in _num_words:
return True
#Check ordinal number
# Check ordinal number
if text_lower in _ordinal_words:
return True
if text_lower.endswith(_ordinal_endings):

View File

@ -51,9 +51,8 @@ def noun_chunks(doclike):
elif word.dep == conj:
cc_token = word.left_edge
prev_end = cc_token.i
yield cc_token.right_edge.i + 1, extend_right(word), np_label # Shave off cc tokens from the NP
# Shave off cc tokens from the NP
yield cc_token.right_edge.i + 1, extend_right(word), np_label
SYNTAX_ITERATORS = {"noun_chunks": noun_chunks}

View File

@ -1,5 +1,5 @@
from typing import Optional, Any, Dict, Callable, Iterable, Union, List, Pattern
from typing import Tuple, Iterator
from typing import Tuple
from dataclasses import dataclass
import random
import itertools
@ -1197,7 +1197,9 @@ class Language:
doc = Doc(self.vocab, words=["x", "y", "z"])
get_examples = lambda: [Example.from_dict(doc, {})]
if not hasattr(get_examples, "__call__"):
err = Errors.E930.format(method="Language.initialize", obj=type(get_examples))
err = Errors.E930.format(
method="Language.initialize", obj=type(get_examples)
)
raise TypeError(err)
# Make sure the config is interpolated so we can resolve subsections
config = self.config.interpolate()

View File

@ -239,10 +239,12 @@ def th_tokenizer():
def tr_tokenizer():
return get_lang_class("tr")().tokenizer
@pytest.fixture(scope="session")
def tr_vocab():
return get_lang_class("tr").Defaults.create_vocab()
@pytest.fixture(scope="session")
def tt_tokenizer():
return get_lang_class("tt")().tokenizer

View File

@ -225,7 +225,7 @@ def test_tr_noun_chunks_acl_nmod(tr_tokenizer):
assert chunks[0].text_with_ws == "en sevdiğim ses sanatçısı "
def test_tr_noun_chunks_acl_nmod(tr_tokenizer):
def test_tr_noun_chunks_acl_nmod2(tr_tokenizer):
text = "bildiğim bir turizm şirketi"
heads = [3, 3, 3, 3]
deps = ["acl", "det", "nmod", "ROOT"]
@ -326,7 +326,7 @@ def test_tr_noun_chunks_np_recursive_no_nmod(tr_tokenizer):
def test_tr_noun_chunks_np_recursive_long_two_acls(tr_tokenizer):
text = "içine Simge'nin bahçesinden toplanmış birkaç çiçeğin konmuş olduğu bir vazo"
heads = [6, 2, 3, 5, 5, 6, 9, 6, 9, 9]
deps = ["obl", "nmod" , "obl", "acl", "det", "nsubj", "acl", "aux", "det", "ROOT"]
deps = ["obl", "nmod", "obl", "acl", "det", "nsubj", "acl", "aux", "det", "ROOT"]
pos = ["ADP", "PROPN", "NOUN", "VERB", "DET", "NOUN", "VERB", "AUX", "DET", "NOUN"]
tokens = tr_tokenizer(text)
doc = Doc(
@ -334,7 +334,10 @@ def test_tr_noun_chunks_np_recursive_long_two_acls(tr_tokenizer):
)
chunks = list(doc.noun_chunks)
assert len(chunks) == 1
assert chunks[0].text_with_ws == "içine Simge'nin bahçesinden toplanmış birkaç çiçeğin konmuş olduğu bir vazo "
assert (
chunks[0].text_with_ws
== "içine Simge'nin bahçesinden toplanmış birkaç çiçeğin konmuş olduğu bir vazo "
)
def test_tr_noun_chunks_two_nouns_in_nmod(tr_tokenizer):
@ -350,7 +353,8 @@ def test_tr_noun_chunks_two_nouns_in_nmod(tr_tokenizer):
assert len(chunks) == 1
assert chunks[0].text_with_ws == "kız ve erkek çocuklar "
def test_tr_noun_chunks_two_nouns_in_nmod(tr_tokenizer):
def test_tr_noun_chunks_two_nouns_in_nmod2(tr_tokenizer):
text = "tatlı ve gürbüz çocuklar"
heads = [3, 2, 0, 3]
deps = ["amod", "cc", "conj", "ROOT"]
@ -378,6 +382,7 @@ def test_tr_noun_chunks_conj_simple(tr_tokenizer):
assert chunks[0].text_with_ws == "ben "
assert chunks[1].text_with_ws == "Sen "
def test_tr_noun_chunks_conj_three(tr_tokenizer):
text = "sen, ben ve ondan"
heads = [0, 2, 0, 4, 0]
@ -394,7 +399,7 @@ def test_tr_noun_chunks_conj_three(tr_tokenizer):
assert chunks[2].text_with_ws == "sen "
def test_tr_noun_chunks_conj_three(tr_tokenizer):
def test_tr_noun_chunks_conj_three2(tr_tokenizer):
text = "ben ya da sen ya da onlar"
heads = [0, 3, 1, 0, 6, 4, 3]
deps = ["ROOT", "cc", "fixed", "conj", "cc", "fixed", "conj"]
@ -499,7 +504,7 @@ def test_tr_noun_chunks_flat_names_and_title(tr_tokenizer):
assert chunks[0].text_with_ws == "Gazi Mustafa Kemal "
def test_tr_noun_chunks_flat_names_and_title(tr_tokenizer):
def test_tr_noun_chunks_flat_names_and_title2(tr_tokenizer):
text = "Ahmet Vefik Paşa"
heads = [2, 0, 2]
deps = ["nmod", "flat", "ROOT"]

View File

@ -15,8 +15,8 @@ from spacy.lang.tr.lex_attrs import like_num
"üçüncü",
"beşinci",
"100üncü",
"8inci"
]
"8inci",
],
)
def test_tr_lex_attrs_like_number_cardinal_ordinal(word):
assert like_num(word)
@ -26,4 +26,3 @@ def test_tr_lex_attrs_like_number_cardinal_ordinal(word):
def test_tr_lex_attrs_capitals(word):
assert like_num(word)
assert like_num(word.upper())

View File

@ -446,7 +446,7 @@ def test_overfitting_IO():
return mykb
# Create the Entity Linker component and add it to the pipeline
entity_linker = nlp.add_pipe("entity_linker", last=True,)
entity_linker = nlp.add_pipe("entity_linker", last=True)
entity_linker.set_kb(create_kb)
assert "Q2146908" in entity_linker.vocab.strings
assert "Q2146908" in entity_linker.kb.vocab.strings

View File

@ -1,10 +1,8 @@
from typing import List
import pytest
from thinc.api import fix_random_seed, Adam, set_dropout_rate
from numpy.testing import assert_array_equal
import numpy
from spacy.ml.models import build_Tok2Vec_model, MultiHashEmbed, MaxoutWindowEncoder
from spacy.ml.models import build_text_classifier, build_simple_cnn_text_classifier
from spacy.ml.staticvectors import StaticVectors
@ -188,12 +186,7 @@ def test_models_update_consistently(seed, dropout, model_func, kwargs, get_X):
assert_array_equal(get_all_params(model1), get_all_params(model2))
@pytest.mark.parametrize(
"model_func,kwargs",
[
(StaticVectors, {"nO": 128, "nM": 300}),
]
)
@pytest.mark.parametrize("model_func,kwargs", [(StaticVectors, {"nO": 128, "nM": 300})])
def test_empty_docs(model_func, kwargs):
nlp = English()
model = model_func(**kwargs).initialize()
@ -201,7 +194,7 @@ def test_empty_docs(model_func, kwargs):
for n_docs in range(3):
docs = [nlp("") for _ in range(n_docs)]
# Test predict
_ = model.predict(docs)
model.predict(docs)
# Test backprop
output, backprop = model.begin_update(docs)
_ = backprop(output)
backprop(output)