From 6c0c3d5ddc41b38baf929ce1188d6b2693fba51e Mon Sep 17 00:00:00 2001
From: Martino Mensio <martino.mensio@open.ac.uk>
Date: Fri, 12 Feb 2021 19:06:51 +0100
Subject: [PATCH 1/3] added spacy-dbpedia-spotlight

---
 website/meta/universe.json | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/website/meta/universe.json b/website/meta/universe.json
index d5768d73b..45d146511 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -1,6 +1,34 @@
 {
     "resources": [
-    	{
+        {
+            "id": "spacy-dbpedia-spotlight",
+            "title": "DBpedia Spotlight for SpaCy",
+            "slogan": "Use DBpedia Spotlight to link entities inside SpaCy",
+            "description": "This library links SpaCy with [DBpedia Spotlight](https://www.dbpedia-spotlight.org/). You can easily get the DBpedia entities from your documents, using the public web service or by using your own instance of DBpedia Spotlight. The `doc.ents` are populated with the entities and all their details (URI, type, ...).",
+            "github": "MartinoMensio/spacy-dbpedia-spotlight",
+            "pip": "spacy-dbpedia-spotlight",
+            "code_example": [
+                "import spacy_dbpedia_spotlight",
+                "# load your model as usual",
+                "nlp = spacy.load('en_core_web_lg')",
+                "# add the pipeline stage",
+                "nlp.add_pipe('dbpedia_spotlight')",
+                "# get the document",
+                "doc = nlp('The president of USA is calling Boris Johnson to decide what to do about coronavirus')",
+                "# see the entities",
+                "print('Entities', [(ent.text, ent.label_, ent.kb_id_) for ent in doc.ents])",
+                "# inspect the raw data from DBpedia spotlight",
+                "print(doc.ents[0]._.dbpedia_raw_result)"
+            ],
+            "category": ["models", "pipeline"],
+            "author": "Martino Mensio",
+            "author_links": {
+                "twitter": "MartinoMensio",
+                "github": "MartinoMensio",
+                "website": "https://martinomensio.github.io"
+            }
+        },
+        {
             "id": "spacy-textblob",
             "title": "spaCyTextBlob",
             "slogan": "Easy sentiment analysis for spaCy using TextBlob",

From 0fb8d437c09234ce913cb84982acfbdaf7b8c61d Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sun, 14 Feb 2021 13:38:13 +1100
Subject: [PATCH 2/3] Fix sentence fragments bug (#7056, #7035) (#7057)

* Add test for #7035

* Update test for issue 7056

* Fix test

* Fix transitions method used in testing

* Fix state eol detection when rebuffer

* Clean up redundant fix
---
 spacy/pipeline/_parser_internals/_state.pxd   |  2 +-
 .../_parser_internals/transition_system.pyx   |  2 --
 spacy/tests/regression/test_issue7056.py      | 27 +++++++++++++++++++
 3 files changed, 28 insertions(+), 3 deletions(-)
 create mode 100644 spacy/tests/regression/test_issue7056.py

diff --git a/spacy/pipeline/_parser_internals/_state.pxd b/spacy/pipeline/_parser_internals/_state.pxd
index a6bf926f9..161f3ca48 100644
--- a/spacy/pipeline/_parser_internals/_state.pxd
+++ b/spacy/pipeline/_parser_internals/_state.pxd
@@ -278,7 +278,7 @@ cdef cppclass StateC:
         return this._stack.size()
 
     int buffer_length() nogil const:
-        return this.length - this._b_i
+        return (this.length - this._b_i) + this._rebuffer.size()
 
     void push() nogil:
         b0 = this.B(0)
diff --git a/spacy/pipeline/_parser_internals/transition_system.pyx b/spacy/pipeline/_parser_internals/transition_system.pyx
index 9bb4f7f5f..9e6f847eb 100644
--- a/spacy/pipeline/_parser_internals/transition_system.pyx
+++ b/spacy/pipeline/_parser_internals/transition_system.pyx
@@ -134,8 +134,6 @@ cdef class TransitionSystem:
 
     def is_valid(self, StateClass stcls, move_name):
         action = self.lookup_transition(move_name)
-        if action.move == 0:
-            return False
         return action.is_valid(stcls.c, action.label)
 
     cdef int set_valid(self, int* is_valid, const StateC* st) nogil:
diff --git a/spacy/tests/regression/test_issue7056.py b/spacy/tests/regression/test_issue7056.py
new file mode 100644
index 000000000..64a420b84
--- /dev/null
+++ b/spacy/tests/regression/test_issue7056.py
@@ -0,0 +1,27 @@
+import pytest
+
+from spacy.tokens.doc import Doc
+from spacy.vocab import Vocab
+from spacy.pipeline._parser_internals.arc_eager import ArcEager
+
+
+def test_issue7056():
+    """Test that the Unshift transition works properly, and doesn't cause
+    sentence segmentation errors."""
+    vocab = Vocab()
+    ae = ArcEager(
+        vocab.strings,
+        ArcEager.get_actions(left_labels=["amod"], right_labels=["pobj"])
+    )
+    doc = Doc(vocab, words="Severe pain , after trauma".split())
+    state = ae.init_batch([doc])[0]
+    ae.apply_transition(state, "S")
+    ae.apply_transition(state, "L-amod")
+    ae.apply_transition(state, "S")
+    ae.apply_transition(state, "S")
+    ae.apply_transition(state, "S")
+    ae.apply_transition(state, "R-pobj")
+    ae.apply_transition(state, "D")
+    ae.apply_transition(state, "D")
+    ae.apply_transition(state, "D")
+    assert not state.eol()

From f4f46b617f2106f51579bae2b71c71867d1cc7eb Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Sun, 14 Feb 2021 14:02:14 +1100
Subject: [PATCH 3/3] Preserve sourced components in fill-config (fixes #7055)
 (#7058)

---
 spacy/cli/init_config.py                 |  4 +++
 spacy/tests/regression/test_issue7055.py | 40 ++++++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 spacy/tests/regression/test_issue7055.py

diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py
index 6bdf393f6..9880c389c 100644
--- a/spacy/cli/init_config.py
+++ b/spacy/cli/init_config.py
@@ -103,6 +103,10 @@ def fill_config(
     # config result is a valid config
     nlp = util.load_model_from_config(nlp.config)
     filled = nlp.config
+    # If we have sourced components in the base config, those will have been
+    # replaced with their actual config after loading, so we have to re-add them
+    sourced = util.get_sourced_components(config)
+    filled["components"].update(sourced)
     if pretraining:
         validate_config_for_pretrain(filled, msg)
         pretrain_config = util.load_config(DEFAULT_CONFIG_PRETRAIN_PATH)
diff --git a/spacy/tests/regression/test_issue7055.py b/spacy/tests/regression/test_issue7055.py
new file mode 100644
index 000000000..c7ddb0a75
--- /dev/null
+++ b/spacy/tests/regression/test_issue7055.py
@@ -0,0 +1,40 @@
+from spacy.cli.init_config import fill_config
+from spacy.util import load_config
+from spacy.lang.en import English
+from thinc.api import Config
+
+from ..util import make_tempdir
+
+
+def test_issue7055():
+    """Test that fill-config doesn't turn sourced components into factories."""
+    source_cfg = {
+        "nlp": {"lang": "en", "pipeline": ["tok2vec", "tagger"]},
+        "components": {
+            "tok2vec": {"factory": "tok2vec"},
+            "tagger": {"factory": "tagger"},
+        },
+    }
+    source_nlp = English.from_config(source_cfg)
+    with make_tempdir() as dir_path:
+        # We need to create a loadable source pipeline
+        source_path = dir_path / "test_model"
+        source_nlp.to_disk(source_path)
+        base_cfg = {
+            "nlp": {"lang": "en", "pipeline": ["tok2vec", "tagger", "ner"]},
+            "components": {
+                "tok2vec": {"source": str(source_path)},
+                "tagger": {"source": str(source_path)},
+                "ner": {"factory": "ner"},
+            },
+        }
+        base_cfg = Config(base_cfg)
+        base_path = dir_path / "base.cfg"
+        base_cfg.to_disk(base_path)
+        output_path = dir_path / "config.cfg"
+        fill_config(output_path, base_path, silent=True)
+        filled_cfg = load_config(output_path)
+    assert filled_cfg["components"]["tok2vec"]["source"] == str(source_path)
+    assert filled_cfg["components"]["tagger"]["source"] == str(source_path)
+    assert filled_cfg["components"]["ner"]["factory"] == "ner"
+    assert "model" in filled_cfg["components"]["ner"]