Tidy up and fix issues

This commit is contained in:
Ines Montani 2020-02-18 15:17:03 +01:00
parent de11ea753a
commit 1278161f47
9 changed files with 1476 additions and 1500 deletions

View File

@ -235,7 +235,7 @@ def example_from_conllu_sentence(
subtok_word = "" subtok_word = ""
in_subtok = False in_subtok = False
id_ = int(id_) - 1 id_ = int(id_) - 1
head = (int(head) - 1) if head != "0" else id_ head = (int(head) - 1) if head not in ("0", "_") else id_
tag = pos if tag == "_" else tag tag = pos if tag == "_" else tag
morph = morph if morph != "_" else "" morph = morph if morph != "_" else ""
dep = "ROOT" if dep == "root" else dep dep = "ROOT" if dep == "root" else dep

View File

@ -541,8 +541,8 @@ class Errors(object):
E997 = ("Tokenizer special cases are not allowed to modify the text. " E997 = ("Tokenizer special cases are not allowed to modify the text. "
"This would map '{chunk}' to '{orth}' given token attributes " "This would map '{chunk}' to '{orth}' given token attributes "
"'{token_attrs}'.") "'{token_attrs}'.")
E998 = ("Can only create GoldParse's from Example's without a Doc, " E998 = ("Can only create GoldParse objects from Example objects without a "
"if get_gold_parses() is called with a Vocab object.") "Doc if get_gold_parses() is called with a Vocab object.")
E999 = ("Encountered an unexpected format for the dictionary holding " E999 = ("Encountered an unexpected format for the dictionary holding "
"gold annotations: {gold_dict}") "gold annotations: {gold_dict}")

View File

@ -991,11 +991,6 @@ cdef class GoldParse:
self.cats = {} if cats is None else dict(cats) self.cats = {} if cats is None else dict(cats)
self.links = {} if links is None else dict(links) self.links = {} if links is None else dict(links)
# orig_annot is used as an iterator in `nlp.evalate` even if self.length == 0,
# so set a empty list to avoid error.
# if self.lenght > 0, this is modified latter.
self.orig_annot = []
# avoid allocating memory if the doc does not contain any tokens # avoid allocating memory if the doc does not contain any tokens
if self.length > 0: if self.length > 0:
if not words: if not words:

View File

@ -1,7 +1,3 @@
# coding: utf8
from __future__ import unicode_literals
""" """
Example sentences to test spaCy and its language models. Example sentences to test spaCy and its language models.

View File

@ -1,6 +1,3 @@
# coding: utf8
from __future__ import unicode_literals
from ...attrs import LIKE_NUM from ...attrs import LIKE_NUM
_num_words = [ _num_words = [

File diff suppressed because it is too large Load Diff

View File

@ -77,7 +77,7 @@ cdef class Parser:
tok2vec = Tok2Vec(width=token_vector_width, tok2vec = Tok2Vec(width=token_vector_width,
embed_size=embed_size, embed_size=embed_size,
conv_depth=conv_depth, conv_depth=conv_depth,
window_size=window_size, window_size=conv_window,
cnn_maxout_pieces=t2v_pieces, cnn_maxout_pieces=t2v_pieces,
subword_features=subword_features, subword_features=subword_features,
pretrained_vectors=pretrained_vectors, pretrained_vectors=pretrained_vectors,
@ -105,7 +105,7 @@ cdef class Parser:
'bilstm_depth': bilstm_depth, 'bilstm_depth': bilstm_depth,
'self_attn_depth': self_attn_depth, 'self_attn_depth': self_attn_depth,
'conv_depth': conv_depth, 'conv_depth': conv_depth,
'window_size': window_size, 'window_size': conv_window,
'embed_size': embed_size, 'embed_size': embed_size,
'cnn_maxout_pieces': t2v_pieces 'cnn_maxout_pieces': t2v_pieces
} }

View File

@ -1,6 +1,3 @@
# coding: utf8
from __future__ import unicode_literals
from spacy.lang.en import English from spacy.lang.en import English
from spacy.pipeline import EntityRuler from spacy.pipeline import EntityRuler
@ -9,11 +6,12 @@ def test_issue4849():
nlp = English() nlp = English()
ruler = EntityRuler( ruler = EntityRuler(
nlp, patterns=[ nlp,
{"label": "PERSON", "pattern": 'joe biden', "id": 'joe-biden'}, patterns=[
{"label": "PERSON", "pattern": 'bernie sanders', "id": 'bernie-sanders'}, {"label": "PERSON", "pattern": "joe biden", "id": "joe-biden"},
{"label": "PERSON", "pattern": "bernie sanders", "id": "bernie-sanders"},
], ],
phrase_matcher_attr="LOWER" phrase_matcher_attr="LOWER",
) )
nlp.add_pipe(ruler) nlp.add_pipe(ruler)
@ -27,10 +25,10 @@ def test_issue4849():
count_ents = 0 count_ents = 0
for doc in nlp.pipe([text], n_process=1): for doc in nlp.pipe([text], n_process=1):
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0]) count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
assert(count_ents == 2) assert count_ents == 2
# USING 2 PROCESSES # USING 2 PROCESSES
count_ents = 0 count_ents = 0
for doc in nlp.pipe([text], n_process=2): for doc in nlp.pipe([text], n_process=2):
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0]) count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
assert (count_ents == 2) assert count_ents == 2

View File

@ -1,16 +1,9 @@
# coding: utf8
from __future__ import unicode_literals
import pytest import pytest
from spacy.language import Language
import spacy
@pytest.fixture def test_evaluate():
def nlp(): nlp = Language()
return spacy.blank("en")
def test_evaluate(nlp):
docs_golds = [("", {})] docs_golds = [("", {})]
nlp.evaluate(docs_golds) with pytest.raises(ValueError):
nlp.evaluate(docs_golds)