Merge branch 'develop' into nightly.spacy.io

2025-12-15 06:04:33 +03:00 · 2020-11-09 12:43:42 +08:00 · 2020-11-09 12:43:42 +08:00 · b1b7caa084
commit b1b7caa084
parent 1075b7ebb7 363ac73c72
6 changed files with 19 additions and 23 deletions
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@ -30,7 +30,7 @@ default_model_config = """
 pretrained_vectors = null
 width = 96
 depth = 2
-embed_size = 300
+embed_size = 2000
 window_size = 1
 maxout_pieces = 3
 subword_features = true
--- a/spacy/tests/README.md
+++ b/spacy/tests/README.md
@ -109,12 +109,12 @@ Loading the models is expensive and not necessary if you're not actually testing
 ```python
 def test_doc_token_api_strings(en_vocab):
-    text = "Give it back! He pleaded."
+    words = ["Give", "it", "back", "!", "He", "pleaded", "."]
    pos = ['VERB', 'PRON', 'PART', 'PUNCT', 'PRON', 'VERB', 'PUNCT']
    heads = [0, 0, 0, 0, 5, 5, 5]
    deps = ['ROOT', 'dobj', 'prt', 'punct', 'nsubj', 'ROOT', 'punct']
-    doc = Doc(en_vocab, [t.text for t in tokens], pos=pos, heads=heads, deps=deps)
+    doc = Doc(en_vocab, words=words, pos=pos, heads=heads, deps=deps)
    assert doc[0].text == 'Give'
    assert doc[0].lower_ == 'give'
    assert doc[0].pos_ == 'VERB'
--- a/website/docs/api/architectures.md
+++ b/website/docs/api/architectures.md
@ -663,7 +663,7 @@ into the "real world". This requires 3 main components:
 > pretrained_vectors = null
 > width = 96
 > depth = 2
-> embed_size = 300
+> embed_size = 2000
 > window_size = 1
 > maxout_pieces = 3
 > subword_features = true
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@ -787,14 +787,12 @@ doesn't, the pipeline analysis won't catch that.
 ============================= Pipeline Overview =============================
 #   Component       Assigns           Requires         Scores        Retokenizes
-   -------------   ---------------   --------------   ---------   -----------
+-   -------------   ---------------   --------------   -----------   -----------
 0   tagger          token.tag                          tag_acc       False
                                                       pos_acc
                                                       lemma_acc
-1   entity_linker   token.ent_kb_id   doc.ents                     False
+1   entity_linker   token.ent_kb_id   doc.ents         nel_micro_f   False
-                                      doc.sents
+                                      doc.sents        nel_micro_r
-                                      token.ent_iob
+                                      token.ent_iob    nel_micro_p
                                      token.ent_type
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@ -54,9 +54,9 @@ create a surface form. Here are some examples:
 | I don't watch the news, I read the paper | read    | read  | `VERB` | `VerbForm=Fin`, `Mood=Ind`, `Tense=Pres` |
 | I read the paper yesterday               | read    | read  | `VERB` | `VerbForm=Fin`, `Mood=Ind`, `Tense=Past` |
-Morphological features are stored in the [`MorphAnalysis`](/api/morphanalysis)
+Morphological features are stored in the
-under `Token.morph`, which allows you to access individual morphological
+[`MorphAnalysis`](/api/morphology#morphanalysis) under `Token.morph`, which
-features.
+allows you to access individual morphological features.
 > #### 📝 Things to try
 >
--- a/website/docs/usage/processing-pipelines.md
+++ b/website/docs/usage/processing-pipelines.md
@ -488,14 +488,12 @@ analysis = nlp.analyze_pipes(pretty=True)
 ============================= Pipeline Overview =============================
 #   Component       Assigns           Requires         Scores        Retokenizes
-   -------------   ---------------   --------------   ---------   -----------
+-   -------------   ---------------   --------------   -----------   -----------
 0   tagger          token.tag                          tag_acc       False
                                                       pos_acc
                                                       lemma_acc
-1   entity_linker   token.ent_kb_id   doc.ents                     False
+1   entity_linker   token.ent_kb_id   doc.ents         nel_micro_f   False
-                                      doc.sents
+                                      doc.sents        nel_micro_r
-                                      token.ent_iob
+                                      token.ent_iob    nel_micro_p
                                      token.ent_type