Auto-format

This commit is contained in:
Ines Montani 2019-11-20 13:15:24 +01:00
parent 235fe6fe3b
commit 6e303de717
7 changed files with 89 additions and 57 deletions

View File

@ -206,7 +206,15 @@ _upper = LATIN_UPPER + _russian_upper + _tatar_upper + _greek_upper + _ukrainian
_lower = LATIN_LOWER + _russian_lower + _tatar_lower + _greek_lower + _ukrainian_lower _lower = LATIN_LOWER + _russian_lower + _tatar_lower + _greek_lower + _ukrainian_lower
_uncased = ( _uncased = (
_bengali + _hebrew + _persian + _sinhala + _hindi + _kannada + _tamil + _telugu + _hangul _bengali
+ _hebrew
+ _persian
+ _sinhala
+ _hindi
+ _kannada
+ _tamil
+ _telugu
+ _hangul
) )
ALPHA = group_chars(LATIN + _russian + _tatar + _greek + _ukrainian + _uncased) ALPHA = group_chars(LATIN + _russian + _tatar + _greek + _ukrainian + _uncased)

View File

@ -45,7 +45,7 @@ _num_words = [
"천만", "천만",
"일억", "일억",
"십억", "십억",
"백억" "백억",
] ]

View File

@ -6,9 +6,7 @@ from ...symbols import ORTH, LEMMA, NORM
# TODO # TODO
# treat other apostrophes within words as part of the word: [op d'mannst], [fir d'éischt] (= exceptions) # treat other apostrophes within words as part of the word: [op d'mannst], [fir d'éischt] (= exceptions)
_exc = { _exc = {}
}
# translate / delete what is not necessary # translate / delete what is not necessary
for exc_data in [ for exc_data in [

View File

@ -14,6 +14,7 @@ from .tag_map import TAG_MAP
def try_jieba_import(use_jieba): def try_jieba_import(use_jieba):
try: try:
import jieba import jieba
return jieba return jieba
except ImportError: except ImportError:
if use_jieba: if use_jieba:
@ -34,7 +35,9 @@ class ChineseTokenizer(DummyTokenizer):
def __call__(self, text): def __call__(self, text):
# use jieba # use jieba
if self.use_jieba: if self.use_jieba:
jieba_words = list([x for x in self.jieba_seg.cut(text, cut_all=False) if x]) jieba_words = list(
[x for x in self.jieba_seg.cut(text, cut_all=False) if x]
)
words = [jieba_words[0]] words = [jieba_words[0]]
spaces = [False] spaces = [False]
for i in range(1, len(jieba_words)): for i in range(1, len(jieba_words)):

View File

@ -271,7 +271,9 @@ class Scorer(object):
self.labelled_per_dep[token.dep_.lower()] = PRFScore() self.labelled_per_dep[token.dep_.lower()] = PRFScore()
if token.dep_.lower() not in cand_deps_per_dep: if token.dep_.lower() not in cand_deps_per_dep:
cand_deps_per_dep[token.dep_.lower()] = set() cand_deps_per_dep[token.dep_.lower()] = set()
cand_deps_per_dep[token.dep_.lower()].add((gold_i, gold_head, token.dep_.lower())) cand_deps_per_dep[token.dep_.lower()].add(
(gold_i, gold_head, token.dep_.lower())
)
if "-" not in [token[-1] for token in gold.orig_annot]: if "-" not in [token[-1] for token in gold.orig_annot]:
# Find all NER labels in gold and doc # Find all NER labels in gold and doc
ent_labels = set([x[0] for x in gold_ents] + [k.label_ for k in doc.ents]) ent_labels = set([x[0] for x in gold_ents] + [k.label_ for k in doc.ents])
@ -304,7 +306,9 @@ class Scorer(object):
self.tags.score_set(cand_tags, gold_tags) self.tags.score_set(cand_tags, gold_tags)
self.labelled.score_set(cand_deps, gold_deps) self.labelled.score_set(cand_deps, gold_deps)
for dep in self.labelled_per_dep: for dep in self.labelled_per_dep:
self.labelled_per_dep[dep].score_set(cand_deps_per_dep.get(dep, set()), gold_deps_per_dep.get(dep, set())) self.labelled_per_dep[dep].score_set(
cand_deps_per_dep.get(dep, set()), gold_deps_per_dep.get(dep, set())
)
self.unlabelled.score_set( self.unlabelled.score_set(
set(item[:2] for item in cand_deps), set(item[:2] for item in gold_deps) set(item[:2] for item in cand_deps), set(item[:2] for item in gold_deps)
) )

View File

@ -11,8 +11,14 @@ def test_issue4590(en_vocab):
"""Test that matches param in on_match method are the same as matches run with no on_match method""" """Test that matches param in on_match method are the same as matches run with no on_match method"""
pattern = [ pattern = [
{"SPEC": {"NODE_NAME": "jumped"}, "PATTERN": {"ORTH": "jumped"}}, {"SPEC": {"NODE_NAME": "jumped"}, "PATTERN": {"ORTH": "jumped"}},
{"SPEC": {"NODE_NAME": "fox", "NBOR_RELOP": ">", "NBOR_NAME": "jumped"}, "PATTERN": {"ORTH": "fox"}}, {
{"SPEC": {"NODE_NAME": "quick", "NBOR_RELOP": ".", "NBOR_NAME": "jumped"}, "PATTERN": {"ORTH": "fox"}}, "SPEC": {"NODE_NAME": "fox", "NBOR_RELOP": ">", "NBOR_NAME": "jumped"},
"PATTERN": {"ORTH": "fox"},
},
{
"SPEC": {"NODE_NAME": "quick", "NBOR_RELOP": ".", "NBOR_NAME": "jumped"},
"PATTERN": {"ORTH": "fox"},
},
] ]
on_match = Mock() on_match = Mock()
@ -31,4 +37,3 @@ def test_issue4590(en_vocab):
on_match_args = on_match.call_args on_match_args = on_match.call_args
assert on_match_args[0][3] == matches assert on_match_args[0][3] == matches

View File

@ -12,8 +12,22 @@ from .util import get_doc
test_las_apple = [ test_las_apple = [
[ [
"Apple is looking at buying U.K. startup for $ 1 billion", "Apple is looking at buying U.K. startup for $ 1 billion",
{"heads": [2, 2, 2, 2, 3, 6, 4, 4, 10, 10, 7], {
"deps": ['nsubj', 'aux', 'ROOT', 'prep', 'pcomp', 'compound', 'dobj', 'prep', 'quantmod', 'compound', 'pobj']}, "heads": [2, 2, 2, 2, 3, 6, 4, 4, 10, 10, 7],
"deps": [
"nsubj",
"aux",
"ROOT",
"prep",
"pcomp",
"compound",
"dobj",
"prep",
"quantmod",
"compound",
"pobj",
],
},
] ]
] ]
@ -59,7 +73,7 @@ def test_las_per_type(en_vocab):
en_vocab, en_vocab,
words=input_.split(" "), words=input_.split(" "),
heads=([h - i for i, h in enumerate(annot["heads"])]), heads=([h - i for i, h in enumerate(annot["heads"])]),
deps=annot["deps"] deps=annot["deps"],
) )
gold = GoldParse(doc, heads=annot["heads"], deps=annot["deps"]) gold = GoldParse(doc, heads=annot["heads"], deps=annot["deps"])
doc[0].dep_ = "compound" doc[0].dep_ = "compound"