Merge branch 'develop' of https://github.com/explosion/spaCy into develop

2025-12-24 02:23:19 +03:00 · 2017-06-04 14:26:29 -05:00 · 2017-06-04 14:26:29 -05:00 · 51e1541ddb
commit 51e1541ddb
parent add9a33782 e4eb33daf7
7 changed files with 93 additions and 11 deletions
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@ -27,7 +27,7 @@ def info(cmd, model=None, markdown=False):
        meta_path = model_path / 'meta.json'
        if not meta_path.is_file():
            util.prints(meta_path, title="Can't find model meta.json", exits=1)
-        meta = read_json(meta_path)
+        meta = util.read_json(meta_path)
        if model_path.resolve() != model_path:
            meta['link'] = path2str(model_path)
            meta['source'] = path2str(model_path.resolve())
--- a/spacy/util.py
+++ b/spacy/util.py
@ -155,7 +155,7 @@ def get_model_meta(path):
    meta = read_json(meta_path)
    for setting in ['lang', 'name', 'version']:
        if setting not in meta:
-            raise IOError('No %s setting found in model meta.json' % setting)
+            raise ValueError('No %s setting found in model meta.json' % setting)
    return meta
@ -417,6 +417,7 @@ def read_json(location):
    location (Path): Path to JSON file.
    RETURNS (dict): Loaded JSON content.
    """
    location = ensure_path(location)
    with location.open('r', encoding='utf8') as f:
        return ujson.load(f)
--- a/website/_includes/_functions.jade
+++ b/website/_includes/_functions.jade
@ -28,8 +28,8 @@
 -   function getSocialImg() {
 -       var base = SITE_URL + '/assets/img/social/preview_'
-       var image = 'default'
+-       var image = ALPHA ? 'alpha' : 'default'
 -       if (preview) image = preview
-       else if (SECTION == 'docs') image = 'docs'
+-       else if (SECTION == 'docs' && !ALPHA) image = 'docs'
 -       return base + image + '.jpg'
 -   }
--- a/website/assets/img/social/preview_alpha.jpg
+++ b/website/assets/img/social/preview_alpha.jpg
--- a/website/docs/usage/models.jade
+++ b/website/docs/usage/models.jade
@ -104,6 +104,13 @@ p
    |  recommend using pip with a direct link, instead of relying on spaCy's
    |  #[+api("cli#download") #[code download]] command.
 +infobox
    |  You can also add the direct download link to your application's
    |  #[code requirements.txt]. For more details,
    |  see the usage guide on
    |  #[+a("/docs/usage/production-use#models") working with models in production].
 +h(3, "download-manual") Manual download and installation
 p
@ -118,15 +125,15 @@ p
    └── en_core_web_md-1.2.0.tar.gz       # downloaded archive
        ├── meta.json                     # model meta data
        ├── setup.py                      # setup file for pip installation
-        └── en_core_web_md                # model directory
+        └── en_core_web_md                # 📦 model package
            ├── __init__.py               # init for pip installation
            ├── meta.json                 # model meta data
            └── en_core_web_md-1.2.0      # model data
 p
-    |  You can place the model data directory anywhere on your local file system.
+    |  You can place the #[strong model package directory] anywhere on your
-    |  To use it with spaCy, simply assign it a name by creating a
+    |  local file system. To use it with spaCy, simply assign it a name by
-    |  #[+a("#usage") shortcut link] for the data directory.
+    |  creating a #[+a("#usage") shortcut link] for the data directory.
 +h(2, "usage") Using models with spaCy
@ -136,9 +143,9 @@ p
 +code.
    import spacy
-    nlp = spacy.load('en')              # load model with shortcut link "en"
+    nlp = spacy.load('en')                       # load model with shortcut link "en"
-    nlp = spacy.load('en_core_web_sm')  # load model package "en_core_web_sm"
+    nlp = spacy.load('en_core_web_sm')           # load model package "en_core_web_sm"
-    nlp = spacy.load('/path/to/model')  # load model from a directory
+    nlp = spacy.load('/path/to/en_core_web_sm')  # load package from a directory
    doc = nlp(u'This is a sentence.')
@ -219,6 +226,10 @@ p
    |  immediately, instead of failing somewhere down the line when calling
    |  #[code spacy.load()].
 +infobox
    |  For more details, see the usage guide on
    |  #[+a("/docs/usage/production-use#models") working with models in production].
 +h(2, "own-models") Using your own models
 p
--- a/website/docs/usage/production-use.jade
+++ b/website/docs/usage/production-use.jade
@ -76,3 +76,72 @@ p
    |  attributes to set the part-of-speech tags, syntactic dependencies, named
    |  entities and other attributes. For details, see the respective usage
    |  pages.
 +h(2, "models") Working with models
 p
    |  If your application depends on one or more #[+a("/docs/usage/models") models],
    |  you'll usually want to integrate them into your continuous integration
    |  workflow and build process. While spaCy provides a range of useful helpers
    |  for downloading, linking and loading models, the underlying functionality
    |  is entirely based on native Python packages. This allows your application
    |  to handle a model like any other package dependency.
 +h(3, "models-download") Downloading and requiring model dependencies
 p
    |  spaCy's built-in #[+api("cli#download") #[code download]] command
    |  is mostly intended as a convenient, interactive wrapper. It performs
    |  compatibility checks and prints detailed error messages and warnings.
    |  However, if you're downloading models as part of an automated build
    |  process, this only adds an unecessary layer of complexity. If you know
    |  which models your application needs, you should be specifying them directly.
 p
    |  Because all models are valid Python packages, you can add them to your
    |  application's #[code requirements.txt]. If you're running your own
    |  internal PyPi installation, you can simply upload the models there. pip's
    |  #[+a("https://pip.pypa.io/en/latest/reference/pip_install/#requirements-file-format") requirements file format]
    |  supports both package names to download via a PyPi server, as well as direct
    |  URLs.
 +code("requirements.txt", "text").
    spacy&gt;=2.0.0,&lt;3.0.0
    -e #{gh("spacy-models")}/releases/download/en_core_web_sm-2.0.0/en_core_web_sm-2.0.0.tar.gz
 p
    |  All models are versioned and specify their spaCy dependency. This ensures
    |  cross-compatibility and lets you specify exact version requirements for
    |  each model. If you've trained your own model, you can use the
    |  #[+api("cli#package") #[code package]] command to generate the required
    |  meta data and turn it into a loadable package.
 +h(3, "models-loading") Loading and testing models
 p
    |  Downloading models directly via pip won't call spaCy's link
    |  #[+api("cli#link") #[code link]] command, which creates
    |  symlinks for model shortcuts. This means that you'll have to run this
    |  command separately, or use the native #[code import] syntax to load the
    |  models:
 +code.
    import en_core_web_sm
    nlp = en_core_web_sm.load()
 p
    |  In general, this approach is recommended for larger code bases, as it's
    |  more "native", and doesn't depend on symlinks or rely on spaCy's loader
    |  to resolve string names to model packages. If a model can't be
    |  imported, Python will raise an #[code ImportError] immediately. And if a
    |  model is imported but not used, any linter will catch that.
 p
    |  Similarly, it'll give you more flexibility when writing tests that
    |  require loading models. For example, instead of writing your own
    |  #[code try] and #[code except] logic around spaCy's loader, you can use
    |  #[+a("http://pytest.readthedocs.io/en/latest/") pytest]'s
    |  #[code importorskip()] method to only run a test if a specific model or
    |  model version is installed. Each model package exposes a #[code __version__]
    |  attribute which you can also use to perform your own version compatibility
    |  checks before loading a model.
--- a/website/docs/usage/visualizers.jade
+++ b/website/docs/usage/visualizers.jade
@ -29,6 +29,7 @@ p
    |  standards.
 +h(2, "getting-started") Getting started
    +tag-new(2)
 p
    |  The quickest way visualize  #[code Doc] is to use