mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Tidy up and fix issues
This commit is contained in:
parent
de11ea753a
commit
1278161f47
|
@ -235,7 +235,7 @@ def example_from_conllu_sentence(
|
||||||
subtok_word = ""
|
subtok_word = ""
|
||||||
in_subtok = False
|
in_subtok = False
|
||||||
id_ = int(id_) - 1
|
id_ = int(id_) - 1
|
||||||
head = (int(head) - 1) if head != "0" else id_
|
head = (int(head) - 1) if head not in ("0", "_") else id_
|
||||||
tag = pos if tag == "_" else tag
|
tag = pos if tag == "_" else tag
|
||||||
morph = morph if morph != "_" else ""
|
morph = morph if morph != "_" else ""
|
||||||
dep = "ROOT" if dep == "root" else dep
|
dep = "ROOT" if dep == "root" else dep
|
||||||
|
|
|
@ -541,8 +541,8 @@ class Errors(object):
|
||||||
E997 = ("Tokenizer special cases are not allowed to modify the text. "
|
E997 = ("Tokenizer special cases are not allowed to modify the text. "
|
||||||
"This would map '{chunk}' to '{orth}' given token attributes "
|
"This would map '{chunk}' to '{orth}' given token attributes "
|
||||||
"'{token_attrs}'.")
|
"'{token_attrs}'.")
|
||||||
E998 = ("Can only create GoldParse's from Example's without a Doc, "
|
E998 = ("Can only create GoldParse objects from Example objects without a "
|
||||||
"if get_gold_parses() is called with a Vocab object.")
|
"Doc if get_gold_parses() is called with a Vocab object.")
|
||||||
E999 = ("Encountered an unexpected format for the dictionary holding "
|
E999 = ("Encountered an unexpected format for the dictionary holding "
|
||||||
"gold annotations: {gold_dict}")
|
"gold annotations: {gold_dict}")
|
||||||
|
|
||||||
|
|
|
@ -991,11 +991,6 @@ cdef class GoldParse:
|
||||||
self.cats = {} if cats is None else dict(cats)
|
self.cats = {} if cats is None else dict(cats)
|
||||||
self.links = {} if links is None else dict(links)
|
self.links = {} if links is None else dict(links)
|
||||||
|
|
||||||
# orig_annot is used as an iterator in `nlp.evalate` even if self.length == 0,
|
|
||||||
# so set a empty list to avoid error.
|
|
||||||
# if self.lenght > 0, this is modified latter.
|
|
||||||
self.orig_annot = []
|
|
||||||
|
|
||||||
# avoid allocating memory if the doc does not contain any tokens
|
# avoid allocating memory if the doc does not contain any tokens
|
||||||
if self.length > 0:
|
if self.length > 0:
|
||||||
if not words:
|
if not words:
|
||||||
|
|
|
@ -1,7 +1,3 @@
|
||||||
# coding: utf8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Example sentences to test spaCy and its language models.
|
Example sentences to test spaCy and its language models.
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,3 @@
|
||||||
# coding: utf8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from ...attrs import LIKE_NUM
|
from ...attrs import LIKE_NUM
|
||||||
|
|
||||||
_num_words = [
|
_num_words = [
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -77,7 +77,7 @@ cdef class Parser:
|
||||||
tok2vec = Tok2Vec(width=token_vector_width,
|
tok2vec = Tok2Vec(width=token_vector_width,
|
||||||
embed_size=embed_size,
|
embed_size=embed_size,
|
||||||
conv_depth=conv_depth,
|
conv_depth=conv_depth,
|
||||||
window_size=window_size,
|
window_size=conv_window,
|
||||||
cnn_maxout_pieces=t2v_pieces,
|
cnn_maxout_pieces=t2v_pieces,
|
||||||
subword_features=subword_features,
|
subword_features=subword_features,
|
||||||
pretrained_vectors=pretrained_vectors,
|
pretrained_vectors=pretrained_vectors,
|
||||||
|
@ -105,7 +105,7 @@ cdef class Parser:
|
||||||
'bilstm_depth': bilstm_depth,
|
'bilstm_depth': bilstm_depth,
|
||||||
'self_attn_depth': self_attn_depth,
|
'self_attn_depth': self_attn_depth,
|
||||||
'conv_depth': conv_depth,
|
'conv_depth': conv_depth,
|
||||||
'window_size': window_size,
|
'window_size': conv_window,
|
||||||
'embed_size': embed_size,
|
'embed_size': embed_size,
|
||||||
'cnn_maxout_pieces': t2v_pieces
|
'cnn_maxout_pieces': t2v_pieces
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,3 @@
|
||||||
# coding: utf8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.pipeline import EntityRuler
|
from spacy.pipeline import EntityRuler
|
||||||
|
|
||||||
|
@ -9,11 +6,12 @@ def test_issue4849():
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
|
||||||
ruler = EntityRuler(
|
ruler = EntityRuler(
|
||||||
nlp, patterns=[
|
nlp,
|
||||||
{"label": "PERSON", "pattern": 'joe biden', "id": 'joe-biden'},
|
patterns=[
|
||||||
{"label": "PERSON", "pattern": 'bernie sanders', "id": 'bernie-sanders'},
|
{"label": "PERSON", "pattern": "joe biden", "id": "joe-biden"},
|
||||||
|
{"label": "PERSON", "pattern": "bernie sanders", "id": "bernie-sanders"},
|
||||||
],
|
],
|
||||||
phrase_matcher_attr="LOWER"
|
phrase_matcher_attr="LOWER",
|
||||||
)
|
)
|
||||||
|
|
||||||
nlp.add_pipe(ruler)
|
nlp.add_pipe(ruler)
|
||||||
|
@ -27,10 +25,10 @@ def test_issue4849():
|
||||||
count_ents = 0
|
count_ents = 0
|
||||||
for doc in nlp.pipe([text], n_process=1):
|
for doc in nlp.pipe([text], n_process=1):
|
||||||
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
|
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
|
||||||
assert(count_ents == 2)
|
assert count_ents == 2
|
||||||
|
|
||||||
# USING 2 PROCESSES
|
# USING 2 PROCESSES
|
||||||
count_ents = 0
|
count_ents = 0
|
||||||
for doc in nlp.pipe([text], n_process=2):
|
for doc in nlp.pipe([text], n_process=2):
|
||||||
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
|
count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
|
||||||
assert (count_ents == 2)
|
assert count_ents == 2
|
||||||
|
|
|
@ -1,16 +1,9 @@
|
||||||
# coding: utf8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from spacy.language import Language
|
||||||
import spacy
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
def test_evaluate():
|
||||||
def nlp():
|
nlp = Language()
|
||||||
return spacy.blank("en")
|
|
||||||
|
|
||||||
|
|
||||||
def test_evaluate(nlp):
|
|
||||||
docs_golds = [("", {})]
|
docs_golds = [("", {})]
|
||||||
nlp.evaluate(docs_golds)
|
with pytest.raises(ValueError):
|
||||||
|
nlp.evaluate(docs_golds)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user