mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-13 07:55:49 +03:00
Merge branch 'develop' into nightly.spacy.io
This commit is contained in:
commit
b1b7caa084
|
@ -30,7 +30,7 @@ default_model_config = """
|
||||||
pretrained_vectors = null
|
pretrained_vectors = null
|
||||||
width = 96
|
width = 96
|
||||||
depth = 2
|
depth = 2
|
||||||
embed_size = 300
|
embed_size = 2000
|
||||||
window_size = 1
|
window_size = 1
|
||||||
maxout_pieces = 3
|
maxout_pieces = 3
|
||||||
subword_features = true
|
subword_features = true
|
||||||
|
|
|
@ -109,12 +109,12 @@ Loading the models is expensive and not necessary if you're not actually testing
|
||||||
|
|
||||||
```python
|
```python
|
||||||
def test_doc_token_api_strings(en_vocab):
|
def test_doc_token_api_strings(en_vocab):
|
||||||
text = "Give it back! He pleaded."
|
words = ["Give", "it", "back", "!", "He", "pleaded", "."]
|
||||||
pos = ['VERB', 'PRON', 'PART', 'PUNCT', 'PRON', 'VERB', 'PUNCT']
|
pos = ['VERB', 'PRON', 'PART', 'PUNCT', 'PRON', 'VERB', 'PUNCT']
|
||||||
heads = [0, 0, 0, 0, 5, 5, 5]
|
heads = [0, 0, 0, 0, 5, 5, 5]
|
||||||
deps = ['ROOT', 'dobj', 'prt', 'punct', 'nsubj', 'ROOT', 'punct']
|
deps = ['ROOT', 'dobj', 'prt', 'punct', 'nsubj', 'ROOT', 'punct']
|
||||||
|
|
||||||
doc = Doc(en_vocab, [t.text for t in tokens], pos=pos, heads=heads, deps=deps)
|
doc = Doc(en_vocab, words=words, pos=pos, heads=heads, deps=deps)
|
||||||
assert doc[0].text == 'Give'
|
assert doc[0].text == 'Give'
|
||||||
assert doc[0].lower_ == 'give'
|
assert doc[0].lower_ == 'give'
|
||||||
assert doc[0].pos_ == 'VERB'
|
assert doc[0].pos_ == 'VERB'
|
||||||
|
|
|
@ -663,7 +663,7 @@ into the "real world". This requires 3 main components:
|
||||||
> pretrained_vectors = null
|
> pretrained_vectors = null
|
||||||
> width = 96
|
> width = 96
|
||||||
> depth = 2
|
> depth = 2
|
||||||
> embed_size = 300
|
> embed_size = 2000
|
||||||
> window_size = 1
|
> window_size = 1
|
||||||
> maxout_pieces = 3
|
> maxout_pieces = 3
|
||||||
> subword_features = true
|
> subword_features = true
|
||||||
|
|
|
@ -787,14 +787,12 @@ doesn't, the pipeline analysis won't catch that.
|
||||||
============================= Pipeline Overview =============================
|
============================= Pipeline Overview =============================
|
||||||
|
|
||||||
# Component Assigns Requires Scores Retokenizes
|
# Component Assigns Requires Scores Retokenizes
|
||||||
- ------------- --------------- -------------- --------- -----------
|
- ------------- --------------- -------------- ----------- -----------
|
||||||
0 tagger token.tag tag_acc False
|
0 tagger token.tag tag_acc False
|
||||||
pos_acc
|
|
||||||
lemma_acc
|
|
||||||
|
|
||||||
1 entity_linker token.ent_kb_id doc.ents False
|
1 entity_linker token.ent_kb_id doc.ents nel_micro_f False
|
||||||
doc.sents
|
doc.sents nel_micro_r
|
||||||
token.ent_iob
|
token.ent_iob nel_micro_p
|
||||||
token.ent_type
|
token.ent_type
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -54,9 +54,9 @@ create a surface form. Here are some examples:
|
||||||
| I don't watch the news, I read the paper | read | read | `VERB` | `VerbForm=Fin`, `Mood=Ind`, `Tense=Pres` |
|
| I don't watch the news, I read the paper | read | read | `VERB` | `VerbForm=Fin`, `Mood=Ind`, `Tense=Pres` |
|
||||||
| I read the paper yesterday | read | read | `VERB` | `VerbForm=Fin`, `Mood=Ind`, `Tense=Past` |
|
| I read the paper yesterday | read | read | `VERB` | `VerbForm=Fin`, `Mood=Ind`, `Tense=Past` |
|
||||||
|
|
||||||
Morphological features are stored in the [`MorphAnalysis`](/api/morphanalysis)
|
Morphological features are stored in the
|
||||||
under `Token.morph`, which allows you to access individual morphological
|
[`MorphAnalysis`](/api/morphology#morphanalysis) under `Token.morph`, which
|
||||||
features.
|
allows you to access individual morphological features.
|
||||||
|
|
||||||
> #### 📝 Things to try
|
> #### 📝 Things to try
|
||||||
>
|
>
|
||||||
|
|
|
@ -488,14 +488,12 @@ analysis = nlp.analyze_pipes(pretty=True)
|
||||||
============================= Pipeline Overview =============================
|
============================= Pipeline Overview =============================
|
||||||
|
|
||||||
# Component Assigns Requires Scores Retokenizes
|
# Component Assigns Requires Scores Retokenizes
|
||||||
- ------------- --------------- -------------- --------- -----------
|
- ------------- --------------- -------------- ----------- -----------
|
||||||
0 tagger token.tag tag_acc False
|
0 tagger token.tag tag_acc False
|
||||||
pos_acc
|
|
||||||
lemma_acc
|
|
||||||
|
|
||||||
1 entity_linker token.ent_kb_id doc.ents False
|
1 entity_linker token.ent_kb_id doc.ents nel_micro_f False
|
||||||
doc.sents
|
doc.sents nel_micro_r
|
||||||
token.ent_iob
|
token.ent_iob nel_micro_p
|
||||||
token.ent_type
|
token.ent_type
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user