mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-06 06:03:11 +03:00
Merge branch 'develop' into spacy.io
This commit is contained in:
commit
bee1966b88
2
setup.py
2
setup.py
|
@ -244,6 +244,8 @@ def setup_package():
|
||||||
"cuda91": ["cupy-cuda91>=4.0"],
|
"cuda91": ["cupy-cuda91>=4.0"],
|
||||||
"cuda92": ["cupy-cuda92>=4.0"],
|
"cuda92": ["cupy-cuda92>=4.0"],
|
||||||
"cuda100": ["cupy-cuda100>=4.0"],
|
"cuda100": ["cupy-cuda100>=4.0"],
|
||||||
|
# Language tokenizers with external dependencies
|
||||||
|
"ja": ["mecab-python3==0.7"],
|
||||||
},
|
},
|
||||||
python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*",
|
python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*",
|
||||||
classifiers=[
|
classifiers=[
|
||||||
|
|
|
@ -342,13 +342,8 @@ class Errors(object):
|
||||||
|
|
||||||
@add_codes
|
@add_codes
|
||||||
class TempErrors(object):
|
class TempErrors(object):
|
||||||
T001 = ("Max length currently 10 for phrase matching")
|
|
||||||
T002 = ("Pattern length ({doc_len}) >= phrase_matcher.max_length "
|
|
||||||
"({max_len}). Length can be set on initialization, up to 10.")
|
|
||||||
T003 = ("Resizing pre-trained Tagger models is not currently supported.")
|
T003 = ("Resizing pre-trained Tagger models is not currently supported.")
|
||||||
T004 = ("Currently parser depth is hard-coded to 1. Received: {value}.")
|
T004 = ("Currently parser depth is hard-coded to 1. Received: {value}.")
|
||||||
T005 = ("Currently history size is hard-coded to 0. Received: {value}.")
|
|
||||||
T006 = ("Currently history width is hard-coded to 0. Received: {value}.")
|
|
||||||
T007 = ("Can't yet set {attr} from Span. Vote for this feature on the "
|
T007 = ("Can't yet set {attr} from Span. Vote for this feature on the "
|
||||||
"issue tracker: http://github.com/explosion/spaCy/issues")
|
"issue tracker: http://github.com/explosion/spaCy/issues")
|
||||||
T008 = ("Bad configuration of Tagger. This is probably a bug within "
|
T008 = ("Bad configuration of Tagger. This is probably a bug within "
|
||||||
|
|
|
@ -39,6 +39,33 @@ mkdir models
|
||||||
python -m spacy train es models ancora-json/es_ancora-ud-train.json ancora-json/es_ancora-ud-dev.json
|
python -m spacy train es models ancora-json/es_ancora-ud-train.json ancora-json/es_ancora-ud-dev.json
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Understanding the training output
|
||||||
|
|
||||||
|
When you train a model using the [`spacy train`](/api/cli#train) command, you'll
|
||||||
|
see a table showing metrics after each pass over the data. Here's what those
|
||||||
|
metrics means:
|
||||||
|
|
||||||
|
> #### Tokenization metrics
|
||||||
|
>
|
||||||
|
> Note that if the development data has raw text, some of the gold-standard
|
||||||
|
> entities might not align to the predicted tokenization. These tokenization
|
||||||
|
> errors are **excluded from the NER evaluation**. If your tokenization makes it
|
||||||
|
> impossible for the model to predict 50% of your entities, your NER F-score
|
||||||
|
> might still look good.
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
| ---------- | ------------------------------------------------------------------------------------------------- |
|
||||||
|
| `Dep Loss` | Training loss for dependency parser. Should decrease, but usually not to 0. |
|
||||||
|
| `NER Loss` | Training loss for named entity recognizer. Should decrease, but usually not to 0. |
|
||||||
|
| `UAS` | Unlabeled attachment score for parser. The percentage of unlabeled correct arcs. Should increase. |
|
||||||
|
| `NER P.` | NER precision on development data. Should increase. |
|
||||||
|
| `NER R.` | NER recall on development data. Should increase. |
|
||||||
|
| `NER F.` | NER F-score on development data. Should increase. |
|
||||||
|
| `Tag %` | Fine-grained part-of-speech tag accuracy on development data. Should increase. |
|
||||||
|
| `Token %` | Tokenization accuracy on development data. |
|
||||||
|
| `CPU WPS` | Prediction speed on CPU in words per second, if available. Should stay stable. |
|
||||||
|
| `GPU WPS` | Prediction speed on GPU in words per second, if available. Should stay stable. |
|
||||||
|
|
||||||
### Improving accuracy with transfer learning {#transfer-learning new="2.1"}
|
### Improving accuracy with transfer learning {#transfer-learning new="2.1"}
|
||||||
|
|
||||||
In most projects, you'll usually have a small amount of labelled data, and
|
In most projects, you'll usually have a small amount of labelled data, and
|
||||||
|
|
|
@ -42,7 +42,6 @@ export const onRouteUpdate = ({ location }) => {
|
||||||
// Navigate to targeted element
|
// Navigate to targeted element
|
||||||
el.scrollIntoView()
|
el.scrollIntoView()
|
||||||
// Force recomputing :target pseudo class with pushState/popState
|
// Force recomputing :target pseudo class with pushState/popState
|
||||||
window.location.hash = ''
|
|
||||||
window.location.hash = location.hash
|
window.location.hash = location.hash
|
||||||
}
|
}
|
||||||
}, 0)
|
}, 0)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user