mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Tidy up and fix issues
This commit is contained in:
parent
de11ea753a
commit
1278161f47
|
@ -235,7 +235,7 @@ def example_from_conllu_sentence(
|
|||
subtok_word = ""
|
||||
in_subtok = False
|
||||
id_ = int(id_) - 1
|
||||
head = (int(head) - 1) if head != "0" else id_
|
||||
head = (int(head) - 1) if head not in ("0", "_") else id_
|
||||
tag = pos if tag == "_" else tag
|
||||
morph = morph if morph != "_" else ""
|
||||
dep = "ROOT" if dep == "root" else dep
|
||||
|
|
|
@ -541,8 +541,8 @@ class Errors(object):
|
|||
E997 = ("Tokenizer special cases are not allowed to modify the text. "
|
||||
"This would map '{chunk}' to '{orth}' given token attributes "
|
||||
"'{token_attrs}'.")
|
||||
E998 = ("Can only create GoldParse's from Example's without a Doc, "
|
||||
"if get_gold_parses() is called with a Vocab object.")
|
||||
E998 = ("Can only create GoldParse objects from Example objects without a "
|
||||
"Doc if get_gold_parses() is called with a Vocab object.")
|
||||
E999 = ("Encountered an unexpected format for the dictionary holding "
|
||||
"gold annotations: {gold_dict}")
|
||||
|
||||
|
|
|
@ -991,11 +991,6 @@ cdef class GoldParse:
|
|||
self.cats = {} if cats is None else dict(cats)
|
||||
self.links = {} if links is None else dict(links)
|
||||
|
||||
# orig_annot is used as an iterator in `nlp.evalate` even if self.length == 0,
|
||||
# so set a empty list to avoid error.
|
||||
# if self.lenght > 0, this is modified latter.
|
||||
self.orig_annot = []
|
||||
|
||||
# avoid allocating memory if the doc does not contain any tokens
|
||||
if self.length > 0:
|
||||
if not words:
|
||||
|
|
|
@ -1,7 +1,3 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
"""
|
||||
Example sentences to test spaCy and its language models.
|
||||
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ...attrs import LIKE_NUM
|
||||
|
||||
_num_words = [
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -77,7 +77,7 @@ cdef class Parser:
|
|||
tok2vec = Tok2Vec(width=token_vector_width,
|
||||
embed_size=embed_size,
|
||||
conv_depth=conv_depth,
|
||||
window_size=window_size,
|
||||
window_size=conv_window,
|
||||
cnn_maxout_pieces=t2v_pieces,
|
||||
subword_features=subword_features,
|
||||
pretrained_vectors=pretrained_vectors,
|
||||
|
@ -105,7 +105,7 @@ cdef class Parser:
|
|||
'bilstm_depth': bilstm_depth,
|
||||
'self_attn_depth': self_attn_depth,
|
||||
'conv_depth': conv_depth,
|
||||
'window_size': window_size,
|
||||
'window_size': conv_window,
|
||||
'embed_size': embed_size,
|
||||
'cnn_maxout_pieces': t2v_pieces
|
||||
}
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from spacy.lang.en import English
|
||||
from spacy.pipeline import EntityRuler
|
||||
|
||||
|
@ -9,11 +6,12 @@ def test_issue4849():
|
|||
nlp = English()
|
||||
|
||||
ruler = EntityRuler(
|
||||
nlp, patterns=[
|
||||
{"label": "PERSON", "pattern": 'joe biden', "id": 'joe-biden'},
|
||||
{"label": "PERSON", "pattern": 'bernie sanders', "id": 'bernie-sanders'},
|
||||
nlp,
|
||||
patterns=[
|
||||
{"label": "PERSON", "pattern": "joe biden", "id": "joe-biden"},
|
||||
{"label": "PERSON", "pattern": "bernie sanders", "id": "bernie-sanders"},
|
||||
],
|
||||
phrase_matcher_attr="LOWER"
|
||||
phrase_matcher_attr="LOWER",
|
||||
)
|
||||
|
||||
nlp.add_pipe(ruler)
|
||||
|
@ -27,10 +25,10 @@ def test_issue4849():
|
|||
count_ents = 0
|
||||
for doc in nlp.pipe([text], n_process=1):
|
||||
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
|
||||
assert(count_ents == 2)
|
||||
assert count_ents == 2
|
||||
|
||||
# USING 2 PROCESSES
|
||||
count_ents = 0
|
||||
for doc in nlp.pipe([text], n_process=2):
|
||||
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
|
||||
assert (count_ents == 2)
|
||||
assert count_ents == 2
|
||||
|
|
|
@ -1,16 +1,9 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
|
||||
import spacy
|
||||
from spacy.language import Language
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def nlp():
|
||||
return spacy.blank("en")
|
||||
|
||||
|
||||
def test_evaluate(nlp):
|
||||
def test_evaluate():
|
||||
nlp = Language()
|
||||
docs_golds = [("", {})]
|
||||
nlp.evaluate(docs_golds)
|
||||
with pytest.raises(ValueError):
|
||||
nlp.evaluate(docs_golds)
|
||||
|
|
Loading…
Reference in New Issue
Block a user