mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-07 15:56:32 +03:00
Merge branch 'master' into spacy.io
This commit is contained in:
commit
0c7937c74d
|
@ -103,6 +103,10 @@ def fill_config(
|
||||||
# config result is a valid config
|
# config result is a valid config
|
||||||
nlp = util.load_model_from_config(nlp.config)
|
nlp = util.load_model_from_config(nlp.config)
|
||||||
filled = nlp.config
|
filled = nlp.config
|
||||||
|
# If we have sourced components in the base config, those will have been
|
||||||
|
# replaced with their actual config after loading, so we have to re-add them
|
||||||
|
sourced = util.get_sourced_components(config)
|
||||||
|
filled["components"].update(sourced)
|
||||||
if pretraining:
|
if pretraining:
|
||||||
validate_config_for_pretrain(filled, msg)
|
validate_config_for_pretrain(filled, msg)
|
||||||
pretrain_config = util.load_config(DEFAULT_CONFIG_PRETRAIN_PATH)
|
pretrain_config = util.load_config(DEFAULT_CONFIG_PRETRAIN_PATH)
|
||||||
|
|
|
@ -278,7 +278,7 @@ cdef cppclass StateC:
|
||||||
return this._stack.size()
|
return this._stack.size()
|
||||||
|
|
||||||
int buffer_length() nogil const:
|
int buffer_length() nogil const:
|
||||||
return this.length - this._b_i
|
return (this.length - this._b_i) + this._rebuffer.size()
|
||||||
|
|
||||||
void push() nogil:
|
void push() nogil:
|
||||||
b0 = this.B(0)
|
b0 = this.B(0)
|
||||||
|
|
|
@ -134,8 +134,6 @@ cdef class TransitionSystem:
|
||||||
|
|
||||||
def is_valid(self, StateClass stcls, move_name):
|
def is_valid(self, StateClass stcls, move_name):
|
||||||
action = self.lookup_transition(move_name)
|
action = self.lookup_transition(move_name)
|
||||||
if action.move == 0:
|
|
||||||
return False
|
|
||||||
return action.is_valid(stcls.c, action.label)
|
return action.is_valid(stcls.c, action.label)
|
||||||
|
|
||||||
cdef int set_valid(self, int* is_valid, const StateC* st) nogil:
|
cdef int set_valid(self, int* is_valid, const StateC* st) nogil:
|
||||||
|
|
40
spacy/tests/regression/test_issue7055.py
Normal file
40
spacy/tests/regression/test_issue7055.py
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
from spacy.cli.init_config import fill_config
|
||||||
|
from spacy.util import load_config
|
||||||
|
from spacy.lang.en import English
|
||||||
|
from thinc.api import Config
|
||||||
|
|
||||||
|
from ..util import make_tempdir
|
||||||
|
|
||||||
|
|
||||||
|
def test_issue7055():
|
||||||
|
"""Test that fill-config doesn't turn sourced components into factories."""
|
||||||
|
source_cfg = {
|
||||||
|
"nlp": {"lang": "en", "pipeline": ["tok2vec", "tagger"]},
|
||||||
|
"components": {
|
||||||
|
"tok2vec": {"factory": "tok2vec"},
|
||||||
|
"tagger": {"factory": "tagger"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
source_nlp = English.from_config(source_cfg)
|
||||||
|
with make_tempdir() as dir_path:
|
||||||
|
# We need to create a loadable source pipeline
|
||||||
|
source_path = dir_path / "test_model"
|
||||||
|
source_nlp.to_disk(source_path)
|
||||||
|
base_cfg = {
|
||||||
|
"nlp": {"lang": "en", "pipeline": ["tok2vec", "tagger", "ner"]},
|
||||||
|
"components": {
|
||||||
|
"tok2vec": {"source": str(source_path)},
|
||||||
|
"tagger": {"source": str(source_path)},
|
||||||
|
"ner": {"factory": "ner"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
base_cfg = Config(base_cfg)
|
||||||
|
base_path = dir_path / "base.cfg"
|
||||||
|
base_cfg.to_disk(base_path)
|
||||||
|
output_path = dir_path / "config.cfg"
|
||||||
|
fill_config(output_path, base_path, silent=True)
|
||||||
|
filled_cfg = load_config(output_path)
|
||||||
|
assert filled_cfg["components"]["tok2vec"]["source"] == str(source_path)
|
||||||
|
assert filled_cfg["components"]["tagger"]["source"] == str(source_path)
|
||||||
|
assert filled_cfg["components"]["ner"]["factory"] == "ner"
|
||||||
|
assert "model" in filled_cfg["components"]["ner"]
|
27
spacy/tests/regression/test_issue7056.py
Normal file
27
spacy/tests/regression/test_issue7056.py
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from spacy.tokens.doc import Doc
|
||||||
|
from spacy.vocab import Vocab
|
||||||
|
from spacy.pipeline._parser_internals.arc_eager import ArcEager
|
||||||
|
|
||||||
|
|
||||||
|
def test_issue7056():
|
||||||
|
"""Test that the Unshift transition works properly, and doesn't cause
|
||||||
|
sentence segmentation errors."""
|
||||||
|
vocab = Vocab()
|
||||||
|
ae = ArcEager(
|
||||||
|
vocab.strings,
|
||||||
|
ArcEager.get_actions(left_labels=["amod"], right_labels=["pobj"])
|
||||||
|
)
|
||||||
|
doc = Doc(vocab, words="Severe pain , after trauma".split())
|
||||||
|
state = ae.init_batch([doc])[0]
|
||||||
|
ae.apply_transition(state, "S")
|
||||||
|
ae.apply_transition(state, "L-amod")
|
||||||
|
ae.apply_transition(state, "S")
|
||||||
|
ae.apply_transition(state, "S")
|
||||||
|
ae.apply_transition(state, "S")
|
||||||
|
ae.apply_transition(state, "R-pobj")
|
||||||
|
ae.apply_transition(state, "D")
|
||||||
|
ae.apply_transition(state, "D")
|
||||||
|
ae.apply_transition(state, "D")
|
||||||
|
assert not state.eol()
|
|
@ -1,5 +1,33 @@
|
||||||
{
|
{
|
||||||
"resources": [
|
"resources": [
|
||||||
|
{
|
||||||
|
"id": "spacy-dbpedia-spotlight",
|
||||||
|
"title": "DBpedia Spotlight for SpaCy",
|
||||||
|
"slogan": "Use DBpedia Spotlight to link entities inside SpaCy",
|
||||||
|
"description": "This library links SpaCy with [DBpedia Spotlight](https://www.dbpedia-spotlight.org/). You can easily get the DBpedia entities from your documents, using the public web service or by using your own instance of DBpedia Spotlight. The `doc.ents` are populated with the entities and all their details (URI, type, ...).",
|
||||||
|
"github": "MartinoMensio/spacy-dbpedia-spotlight",
|
||||||
|
"pip": "spacy-dbpedia-spotlight",
|
||||||
|
"code_example": [
|
||||||
|
"import spacy_dbpedia_spotlight",
|
||||||
|
"# load your model as usual",
|
||||||
|
"nlp = spacy.load('en_core_web_lg')",
|
||||||
|
"# add the pipeline stage",
|
||||||
|
"nlp.add_pipe('dbpedia_spotlight')",
|
||||||
|
"# get the document",
|
||||||
|
"doc = nlp('The president of USA is calling Boris Johnson to decide what to do about coronavirus')",
|
||||||
|
"# see the entities",
|
||||||
|
"print('Entities', [(ent.text, ent.label_, ent.kb_id_) for ent in doc.ents])",
|
||||||
|
"# inspect the raw data from DBpedia spotlight",
|
||||||
|
"print(doc.ents[0]._.dbpedia_raw_result)"
|
||||||
|
],
|
||||||
|
"category": ["models", "pipeline"],
|
||||||
|
"author": "Martino Mensio",
|
||||||
|
"author_links": {
|
||||||
|
"twitter": "MartinoMensio",
|
||||||
|
"github": "MartinoMensio",
|
||||||
|
"website": "https://martinomensio.github.io"
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": "spacy-textblob",
|
"id": "spacy-textblob",
|
||||||
"title": "spaCyTextBlob",
|
"title": "spaCyTextBlob",
|
||||||
|
|
Loading…
Reference in New Issue
Block a user