Tidy up and fix issues

This commit is contained in:
Ines Montani 2020-02-18 15:17:03 +01:00
parent de11ea753a
commit 1278161f47
9 changed files with 1476 additions and 1500 deletions

View File

@ -235,7 +235,7 @@ def example_from_conllu_sentence(
subtok_word = ""
in_subtok = False
id_ = int(id_) - 1
head = (int(head) - 1) if head != "0" else id_
head = (int(head) - 1) if head not in ("0", "_") else id_
tag = pos if tag == "_" else tag
morph = morph if morph != "_" else ""
dep = "ROOT" if dep == "root" else dep

View File

@ -541,8 +541,8 @@ class Errors(object):
E997 = ("Tokenizer special cases are not allowed to modify the text. "
"This would map '{chunk}' to '{orth}' given token attributes "
"'{token_attrs}'.")
E998 = ("Can only create GoldParse's from Example's without a Doc, "
"if get_gold_parses() is called with a Vocab object.")
E998 = ("Can only create GoldParse objects from Example objects without a "
"Doc if get_gold_parses() is called with a Vocab object.")
E999 = ("Encountered an unexpected format for the dictionary holding "
"gold annotations: {gold_dict}")

View File

@ -991,11 +991,6 @@ cdef class GoldParse:
self.cats = {} if cats is None else dict(cats)
self.links = {} if links is None else dict(links)
# orig_annot is used as an iterator in `nlp.evalate` even if self.length == 0,
# so set a empty list to avoid error.
# if self.lenght > 0, this is modified latter.
self.orig_annot = []
# avoid allocating memory if the doc does not contain any tokens
if self.length > 0:
if not words:

View File

@ -1,7 +1,3 @@
# coding: utf8
from __future__ import unicode_literals
"""
Example sentences to test spaCy and its language models.

View File

@ -1,6 +1,3 @@
# coding: utf8
from __future__ import unicode_literals
from ...attrs import LIKE_NUM
_num_words = [

View File

@ -1,8 +1,5 @@
# coding: utf8
from __future__ import unicode_literals
from ...symbols import POS, AUX, PUNCT, SYM, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON
from ...symbols import POS, AUX, ADJ, CCONJ, NUM, ADV, ADP, X, VERB
from ...symbols import NOUN, PART, INTJ, PRON
# Source https://universaldependencies.org/tagset-conversion/sk-snk-uposf.html
# fmt: off

View File

@ -77,7 +77,7 @@ cdef class Parser:
tok2vec = Tok2Vec(width=token_vector_width,
embed_size=embed_size,
conv_depth=conv_depth,
window_size=window_size,
window_size=conv_window,
cnn_maxout_pieces=t2v_pieces,
subword_features=subword_features,
pretrained_vectors=pretrained_vectors,
@ -105,7 +105,7 @@ cdef class Parser:
'bilstm_depth': bilstm_depth,
'self_attn_depth': self_attn_depth,
'conv_depth': conv_depth,
'window_size': window_size,
'window_size': conv_window,
'embed_size': embed_size,
'cnn_maxout_pieces': t2v_pieces
}

View File

@ -1,6 +1,3 @@
# coding: utf8
from __future__ import unicode_literals
from spacy.lang.en import English
from spacy.pipeline import EntityRuler
@ -9,11 +6,12 @@ def test_issue4849():
nlp = English()
ruler = EntityRuler(
nlp, patterns=[
{"label": "PERSON", "pattern": 'joe biden', "id": 'joe-biden'},
{"label": "PERSON", "pattern": 'bernie sanders', "id": 'bernie-sanders'},
nlp,
patterns=[
{"label": "PERSON", "pattern": "joe biden", "id": "joe-biden"},
{"label": "PERSON", "pattern": "bernie sanders", "id": "bernie-sanders"},
],
phrase_matcher_attr="LOWER"
phrase_matcher_attr="LOWER",
)
nlp.add_pipe(ruler)
@ -27,10 +25,10 @@ def test_issue4849():
count_ents = 0
for doc in nlp.pipe([text], n_process=1):
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
assert(count_ents == 2)
assert count_ents == 2
# USING 2 PROCESSES
count_ents = 0
for doc in nlp.pipe([text], n_process=2):
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
assert (count_ents == 2)
assert count_ents == 2

View File

@ -1,16 +1,9 @@
# coding: utf8
from __future__ import unicode_literals
import pytest
import spacy
from spacy.language import Language
@pytest.fixture
def nlp():
return spacy.blank("en")
def test_evaluate(nlp):
def test_evaluate():
nlp = Language()
docs_golds = [("", {})]
with pytest.raises(ValueError):
nlp.evaluate(docs_golds)