mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
51e1541ddb
|
@ -27,7 +27,7 @@ def info(cmd, model=None, markdown=False):
|
||||||
meta_path = model_path / 'meta.json'
|
meta_path = model_path / 'meta.json'
|
||||||
if not meta_path.is_file():
|
if not meta_path.is_file():
|
||||||
util.prints(meta_path, title="Can't find model meta.json", exits=1)
|
util.prints(meta_path, title="Can't find model meta.json", exits=1)
|
||||||
meta = read_json(meta_path)
|
meta = util.read_json(meta_path)
|
||||||
if model_path.resolve() != model_path:
|
if model_path.resolve() != model_path:
|
||||||
meta['link'] = path2str(model_path)
|
meta['link'] = path2str(model_path)
|
||||||
meta['source'] = path2str(model_path.resolve())
|
meta['source'] = path2str(model_path.resolve())
|
||||||
|
|
|
@ -155,7 +155,7 @@ def get_model_meta(path):
|
||||||
meta = read_json(meta_path)
|
meta = read_json(meta_path)
|
||||||
for setting in ['lang', 'name', 'version']:
|
for setting in ['lang', 'name', 'version']:
|
||||||
if setting not in meta:
|
if setting not in meta:
|
||||||
raise IOError('No %s setting found in model meta.json' % setting)
|
raise ValueError('No %s setting found in model meta.json' % setting)
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
|
|
||||||
|
@ -417,6 +417,7 @@ def read_json(location):
|
||||||
location (Path): Path to JSON file.
|
location (Path): Path to JSON file.
|
||||||
RETURNS (dict): Loaded JSON content.
|
RETURNS (dict): Loaded JSON content.
|
||||||
"""
|
"""
|
||||||
|
location = ensure_path(location)
|
||||||
with location.open('r', encoding='utf8') as f:
|
with location.open('r', encoding='utf8') as f:
|
||||||
return ujson.load(f)
|
return ujson.load(f)
|
||||||
|
|
||||||
|
|
|
@ -28,8 +28,8 @@
|
||||||
|
|
||||||
- function getSocialImg() {
|
- function getSocialImg() {
|
||||||
- var base = SITE_URL + '/assets/img/social/preview_'
|
- var base = SITE_URL + '/assets/img/social/preview_'
|
||||||
- var image = 'default'
|
- var image = ALPHA ? 'alpha' : 'default'
|
||||||
- if (preview) image = preview
|
- if (preview) image = preview
|
||||||
- else if (SECTION == 'docs') image = 'docs'
|
- else if (SECTION == 'docs' && !ALPHA) image = 'docs'
|
||||||
- return base + image + '.jpg'
|
- return base + image + '.jpg'
|
||||||
- }
|
- }
|
||||||
|
|
BIN
website/assets/img/social/preview_alpha.jpg
Normal file
BIN
website/assets/img/social/preview_alpha.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 374 KiB |
|
@ -104,6 +104,13 @@ p
|
||||||
| recommend using pip with a direct link, instead of relying on spaCy's
|
| recommend using pip with a direct link, instead of relying on spaCy's
|
||||||
| #[+api("cli#download") #[code download]] command.
|
| #[+api("cli#download") #[code download]] command.
|
||||||
|
|
||||||
|
+infobox
|
||||||
|
| You can also add the direct download link to your application's
|
||||||
|
| #[code requirements.txt]. For more details,
|
||||||
|
| see the usage guide on
|
||||||
|
| #[+a("/docs/usage/production-use#models") working with models in production].
|
||||||
|
|
||||||
|
|
||||||
+h(3, "download-manual") Manual download and installation
|
+h(3, "download-manual") Manual download and installation
|
||||||
|
|
||||||
p
|
p
|
||||||
|
@ -118,15 +125,15 @@ p
|
||||||
└── en_core_web_md-1.2.0.tar.gz # downloaded archive
|
└── en_core_web_md-1.2.0.tar.gz # downloaded archive
|
||||||
├── meta.json # model meta data
|
├── meta.json # model meta data
|
||||||
├── setup.py # setup file for pip installation
|
├── setup.py # setup file for pip installation
|
||||||
└── en_core_web_md # model directory
|
└── en_core_web_md # 📦 model package
|
||||||
├── __init__.py # init for pip installation
|
├── __init__.py # init for pip installation
|
||||||
├── meta.json # model meta data
|
├── meta.json # model meta data
|
||||||
└── en_core_web_md-1.2.0 # model data
|
└── en_core_web_md-1.2.0 # model data
|
||||||
|
|
||||||
p
|
p
|
||||||
| You can place the model data directory anywhere on your local file system.
|
| You can place the #[strong model package directory] anywhere on your
|
||||||
| To use it with spaCy, simply assign it a name by creating a
|
| local file system. To use it with spaCy, simply assign it a name by
|
||||||
| #[+a("#usage") shortcut link] for the data directory.
|
| creating a #[+a("#usage") shortcut link] for the data directory.
|
||||||
|
|
||||||
+h(2, "usage") Using models with spaCy
|
+h(2, "usage") Using models with spaCy
|
||||||
|
|
||||||
|
@ -136,9 +143,9 @@ p
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
import spacy
|
import spacy
|
||||||
nlp = spacy.load('en') # load model with shortcut link "en"
|
nlp = spacy.load('en') # load model with shortcut link "en"
|
||||||
nlp = spacy.load('en_core_web_sm') # load model package "en_core_web_sm"
|
nlp = spacy.load('en_core_web_sm') # load model package "en_core_web_sm"
|
||||||
nlp = spacy.load('/path/to/model') # load model from a directory
|
nlp = spacy.load('/path/to/en_core_web_sm') # load package from a directory
|
||||||
|
|
||||||
doc = nlp(u'This is a sentence.')
|
doc = nlp(u'This is a sentence.')
|
||||||
|
|
||||||
|
@ -219,6 +226,10 @@ p
|
||||||
| immediately, instead of failing somewhere down the line when calling
|
| immediately, instead of failing somewhere down the line when calling
|
||||||
| #[code spacy.load()].
|
| #[code spacy.load()].
|
||||||
|
|
||||||
|
+infobox
|
||||||
|
| For more details, see the usage guide on
|
||||||
|
| #[+a("/docs/usage/production-use#models") working with models in production].
|
||||||
|
|
||||||
+h(2, "own-models") Using your own models
|
+h(2, "own-models") Using your own models
|
||||||
|
|
||||||
p
|
p
|
||||||
|
|
|
@ -76,3 +76,72 @@ p
|
||||||
| attributes to set the part-of-speech tags, syntactic dependencies, named
|
| attributes to set the part-of-speech tags, syntactic dependencies, named
|
||||||
| entities and other attributes. For details, see the respective usage
|
| entities and other attributes. For details, see the respective usage
|
||||||
| pages.
|
| pages.
|
||||||
|
|
||||||
|
+h(2, "models") Working with models
|
||||||
|
|
||||||
|
p
|
||||||
|
| If your application depends on one or more #[+a("/docs/usage/models") models],
|
||||||
|
| you'll usually want to integrate them into your continuous integration
|
||||||
|
| workflow and build process. While spaCy provides a range of useful helpers
|
||||||
|
| for downloading, linking and loading models, the underlying functionality
|
||||||
|
| is entirely based on native Python packages. This allows your application
|
||||||
|
| to handle a model like any other package dependency.
|
||||||
|
|
||||||
|
+h(3, "models-download") Downloading and requiring model dependencies
|
||||||
|
|
||||||
|
p
|
||||||
|
| spaCy's built-in #[+api("cli#download") #[code download]] command
|
||||||
|
| is mostly intended as a convenient, interactive wrapper. It performs
|
||||||
|
| compatibility checks and prints detailed error messages and warnings.
|
||||||
|
| However, if you're downloading models as part of an automated build
|
||||||
|
| process, this only adds an unecessary layer of complexity. If you know
|
||||||
|
| which models your application needs, you should be specifying them directly.
|
||||||
|
|
||||||
|
p
|
||||||
|
| Because all models are valid Python packages, you can add them to your
|
||||||
|
| application's #[code requirements.txt]. If you're running your own
|
||||||
|
| internal PyPi installation, you can simply upload the models there. pip's
|
||||||
|
| #[+a("https://pip.pypa.io/en/latest/reference/pip_install/#requirements-file-format") requirements file format]
|
||||||
|
| supports both package names to download via a PyPi server, as well as direct
|
||||||
|
| URLs.
|
||||||
|
|
||||||
|
+code("requirements.txt", "text").
|
||||||
|
spacy>=2.0.0,<3.0.0
|
||||||
|
-e #{gh("spacy-models")}/releases/download/en_core_web_sm-2.0.0/en_core_web_sm-2.0.0.tar.gz
|
||||||
|
|
||||||
|
p
|
||||||
|
| All models are versioned and specify their spaCy dependency. This ensures
|
||||||
|
| cross-compatibility and lets you specify exact version requirements for
|
||||||
|
| each model. If you've trained your own model, you can use the
|
||||||
|
| #[+api("cli#package") #[code package]] command to generate the required
|
||||||
|
| meta data and turn it into a loadable package.
|
||||||
|
|
||||||
|
+h(3, "models-loading") Loading and testing models
|
||||||
|
|
||||||
|
p
|
||||||
|
| Downloading models directly via pip won't call spaCy's link
|
||||||
|
| #[+api("cli#link") #[code link]] command, which creates
|
||||||
|
| symlinks for model shortcuts. This means that you'll have to run this
|
||||||
|
| command separately, or use the native #[code import] syntax to load the
|
||||||
|
| models:
|
||||||
|
|
||||||
|
+code.
|
||||||
|
import en_core_web_sm
|
||||||
|
nlp = en_core_web_sm.load()
|
||||||
|
|
||||||
|
p
|
||||||
|
| In general, this approach is recommended for larger code bases, as it's
|
||||||
|
| more "native", and doesn't depend on symlinks or rely on spaCy's loader
|
||||||
|
| to resolve string names to model packages. If a model can't be
|
||||||
|
| imported, Python will raise an #[code ImportError] immediately. And if a
|
||||||
|
| model is imported but not used, any linter will catch that.
|
||||||
|
|
||||||
|
p
|
||||||
|
| Similarly, it'll give you more flexibility when writing tests that
|
||||||
|
| require loading models. For example, instead of writing your own
|
||||||
|
| #[code try] and #[code except] logic around spaCy's loader, you can use
|
||||||
|
| #[+a("http://pytest.readthedocs.io/en/latest/") pytest]'s
|
||||||
|
| #[code importorskip()] method to only run a test if a specific model or
|
||||||
|
| model version is installed. Each model package exposes a #[code __version__]
|
||||||
|
| attribute which you can also use to perform your own version compatibility
|
||||||
|
| checks before loading a model.
|
||||||
|
|
|
@ -29,6 +29,7 @@ p
|
||||||
| standards.
|
| standards.
|
||||||
|
|
||||||
+h(2, "getting-started") Getting started
|
+h(2, "getting-started") Getting started
|
||||||
|
+tag-new(2)
|
||||||
|
|
||||||
p
|
p
|
||||||
| The quickest way visualize #[code Doc] is to use
|
| The quickest way visualize #[code Doc] is to use
|
||||||
|
|
Loading…
Reference in New Issue
Block a user