From 6e303de717ea2fb556d9c40e489f882136241e36 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 20 Nov 2019 13:15:24 +0100 Subject: [PATCH] Auto-format --- spacy/lang/char_classes.py | 10 ++- spacy/lang/ko/lex_attrs.py | 82 ++++++++++++------------ spacy/lang/lb/tokenizer_exceptions.py | 4 +- spacy/lang/zh/__init__.py | 5 +- spacy/scorer.py | 8 ++- spacy/tests/regression/test_issue4590.py | 17 +++-- spacy/tests/test_scorer.py | 20 +++++- 7 files changed, 89 insertions(+), 57 deletions(-) diff --git a/spacy/lang/char_classes.py b/spacy/lang/char_classes.py index b28c43d63..2c8823867 100644 --- a/spacy/lang/char_classes.py +++ b/spacy/lang/char_classes.py @@ -206,7 +206,15 @@ _upper = LATIN_UPPER + _russian_upper + _tatar_upper + _greek_upper + _ukrainian _lower = LATIN_LOWER + _russian_lower + _tatar_lower + _greek_lower + _ukrainian_lower _uncased = ( - _bengali + _hebrew + _persian + _sinhala + _hindi + _kannada + _tamil + _telugu + _hangul + _bengali + + _hebrew + + _persian + + _sinhala + + _hindi + + _kannada + + _tamil + + _telugu + + _hangul ) ALPHA = group_chars(LATIN + _russian + _tatar + _greek + _ukrainian + _uncased) diff --git a/spacy/lang/ko/lex_attrs.py b/spacy/lang/ko/lex_attrs.py index e84a8c7c4..1904a0ece 100644 --- a/spacy/lang/ko/lex_attrs.py +++ b/spacy/lang/ko/lex_attrs.py @@ -5,47 +5,47 @@ from ...attrs import LIKE_NUM _num_words = [ - "영", - "공", - # Native Korean number system - "하나", - "둘", - "셋", - "넷", - "다섯", - "여섯", - "일곱", - "여덟", - "아홉", - "열", - "스물", - "서른", - "마흔", - "쉰", - "예순", - "일흔", - "여든", - "아흔", - # Sino-Korean number system - "일", - "이", - "삼", - "사", - "오", - "육", - "칠", - "팔", - "구", - "십", - "백", - "천", - "만", - "십만", - "백만", - "천만", - "일억", - "십억", - "백억" + "영", + "공", + # Native Korean number system + "하나", + "둘", + "셋", + "넷", + "다섯", + "여섯", + "일곱", + "여덟", + "아홉", + "열", + "스물", + "서른", + "마흔", + "쉰", + "예순", + "일흔", + "여든", + "아흔", + # Sino-Korean number system + "일", + "이", + "삼", + "사", + "오", + "육", + "칠", + "팔", + "구", + "십", + "백", + "천", + "만", + "십만", + "백만", + "천만", + "일억", + "십억", + "백억", ] diff --git a/spacy/lang/lb/tokenizer_exceptions.py b/spacy/lang/lb/tokenizer_exceptions.py index 18b58f2b1..d84372aef 100644 --- a/spacy/lang/lb/tokenizer_exceptions.py +++ b/spacy/lang/lb/tokenizer_exceptions.py @@ -6,9 +6,7 @@ from ...symbols import ORTH, LEMMA, NORM # TODO # treat other apostrophes within words as part of the word: [op d'mannst], [fir d'éischt] (= exceptions) -_exc = { - -} +_exc = {} # translate / delete what is not necessary for exc_data in [ diff --git a/spacy/lang/zh/__init__.py b/spacy/lang/zh/__init__.py index 5bd7b7335..8179b4551 100644 --- a/spacy/lang/zh/__init__.py +++ b/spacy/lang/zh/__init__.py @@ -14,6 +14,7 @@ from .tag_map import TAG_MAP def try_jieba_import(use_jieba): try: import jieba + return jieba except ImportError: if use_jieba: @@ -34,7 +35,9 @@ class ChineseTokenizer(DummyTokenizer): def __call__(self, text): # use jieba if self.use_jieba: - jieba_words = list([x for x in self.jieba_seg.cut(text, cut_all=False) if x]) + jieba_words = list( + [x for x in self.jieba_seg.cut(text, cut_all=False) if x] + ) words = [jieba_words[0]] spaces = [False] for i in range(1, len(jieba_words)): diff --git a/spacy/scorer.py b/spacy/scorer.py index 0b4843f41..7b05b11fd 100644 --- a/spacy/scorer.py +++ b/spacy/scorer.py @@ -271,7 +271,9 @@ class Scorer(object): self.labelled_per_dep[token.dep_.lower()] = PRFScore() if token.dep_.lower() not in cand_deps_per_dep: cand_deps_per_dep[token.dep_.lower()] = set() - cand_deps_per_dep[token.dep_.lower()].add((gold_i, gold_head, token.dep_.lower())) + cand_deps_per_dep[token.dep_.lower()].add( + (gold_i, gold_head, token.dep_.lower()) + ) if "-" not in [token[-1] for token in gold.orig_annot]: # Find all NER labels in gold and doc ent_labels = set([x[0] for x in gold_ents] + [k.label_ for k in doc.ents]) @@ -304,7 +306,9 @@ class Scorer(object): self.tags.score_set(cand_tags, gold_tags) self.labelled.score_set(cand_deps, gold_deps) for dep in self.labelled_per_dep: - self.labelled_per_dep[dep].score_set(cand_deps_per_dep.get(dep, set()), gold_deps_per_dep.get(dep, set())) + self.labelled_per_dep[dep].score_set( + cand_deps_per_dep.get(dep, set()), gold_deps_per_dep.get(dep, set()) + ) self.unlabelled.score_set( set(item[:2] for item in cand_deps), set(item[:2] for item in gold_deps) ) diff --git a/spacy/tests/regression/test_issue4590.py b/spacy/tests/regression/test_issue4590.py index 6a43dfea9..63e99c552 100644 --- a/spacy/tests/regression/test_issue4590.py +++ b/spacy/tests/regression/test_issue4590.py @@ -11,8 +11,14 @@ def test_issue4590(en_vocab): """Test that matches param in on_match method are the same as matches run with no on_match method""" pattern = [ {"SPEC": {"NODE_NAME": "jumped"}, "PATTERN": {"ORTH": "jumped"}}, - {"SPEC": {"NODE_NAME": "fox", "NBOR_RELOP": ">", "NBOR_NAME": "jumped"}, "PATTERN": {"ORTH": "fox"}}, - {"SPEC": {"NODE_NAME": "quick", "NBOR_RELOP": ".", "NBOR_NAME": "jumped"}, "PATTERN": {"ORTH": "fox"}}, + { + "SPEC": {"NODE_NAME": "fox", "NBOR_RELOP": ">", "NBOR_NAME": "jumped"}, + "PATTERN": {"ORTH": "fox"}, + }, + { + "SPEC": {"NODE_NAME": "quick", "NBOR_RELOP": ".", "NBOR_NAME": "jumped"}, + "PATTERN": {"ORTH": "fox"}, + }, ] on_match = Mock() @@ -23,12 +29,11 @@ def test_issue4590(en_vocab): text = "The quick brown fox jumped over the lazy fox" heads = [3, 2, 1, 1, 0, -1, 2, 1, -3] deps = ["det", "amod", "amod", "nsubj", "prep", "pobj", "det", "amod"] - + doc = get_doc(en_vocab, text.split(), heads=heads, deps=deps) - + matches = matcher(doc) - + on_match_args = on_match.call_args assert on_match_args[0][3] == matches - diff --git a/spacy/tests/test_scorer.py b/spacy/tests/test_scorer.py index c59358a6b..2a4ef0f40 100644 --- a/spacy/tests/test_scorer.py +++ b/spacy/tests/test_scorer.py @@ -12,8 +12,22 @@ from .util import get_doc test_las_apple = [ [ "Apple is looking at buying U.K. startup for $ 1 billion", - {"heads": [2, 2, 2, 2, 3, 6, 4, 4, 10, 10, 7], - "deps": ['nsubj', 'aux', 'ROOT', 'prep', 'pcomp', 'compound', 'dobj', 'prep', 'quantmod', 'compound', 'pobj']}, + { + "heads": [2, 2, 2, 2, 3, 6, 4, 4, 10, 10, 7], + "deps": [ + "nsubj", + "aux", + "ROOT", + "prep", + "pcomp", + "compound", + "dobj", + "prep", + "quantmod", + "compound", + "pobj", + ], + }, ] ] @@ -59,7 +73,7 @@ def test_las_per_type(en_vocab): en_vocab, words=input_.split(" "), heads=([h - i for i, h in enumerate(annot["heads"])]), - deps=annot["deps"] + deps=annot["deps"], ) gold = GoldParse(doc, heads=annot["heads"], deps=annot["deps"]) doc[0].dep_ = "compound"