From 1465c6c221c0f0153bea6c35e688a69c891050d2 Mon Sep 17 00:00:00 2001 From: ines Date: Sat, 13 May 2017 21:23:12 +0200 Subject: [PATCH] Add API docs for util functions --- website/docs/api/_data.json | 5 + website/docs/api/util.jade | 227 ++++++++++++++++++++++++++++++++++++ 2 files changed, 232 insertions(+) create mode 100644 website/docs/api/util.jade diff --git a/website/docs/api/_data.json b/website/docs/api/_data.json index 586057fe2..2e0d80ca1 100644 --- a/website/docs/api/_data.json +++ b/website/docs/api/_data.json @@ -21,6 +21,7 @@ "GoldParse": "goldparse" }, "Other": { + "Utility Functions": "util", "Annotation Specs": "annotation", "Feature Scheme": "features" } @@ -110,6 +111,10 @@ "tag": "class" }, + "util": { + "title": "Utility Functions" + }, + "annotation": { "title": "Annotation Specifications" }, diff --git a/website/docs/api/util.jade b/website/docs/api/util.jade new file mode 100644 index 000000000..3b1f305a9 --- /dev/null +++ b/website/docs/api/util.jade @@ -0,0 +1,227 @@ +//- 💫 DOCS > API > ANNOTATION SPECS + +include ../../_includes/_mixins + +p + | spaCy comes with a small collection of utility functions located in + | #[+src(gh("spaCy", "spacy/util.py")) spacy/util.py]. + ++infobox("Important note") + | Because utility functions are mostly intended for + | #[strong internal use within spaCy], their behaviour may change with + | future releases. The functions documented on this page should be safe + | to use and we'll try to ensure backwards compatibility. However, we + | recommend having additional tests in place if your application depends on + | any of spaCy's utilities. + ++h(2, "get_data_path") get_data_path + +tag function + +p + | Get path to the data directory where spaCy looks for models. Defaults to + | #[code spacy/data]. + ++table(["Name", "Type", "Description"]) + +row + +cell #[code require_exists] + +cell bool + +cell Only return path if it exists, otherwise return #[code None]. + + +footrow + +cell return + +cell #[code Path] / #[code None] + +cell Data path or #[code None]. + ++h(2, "set_data_path") set_data_path + +tag function + +p + | Set custom path to the data directory where spaCy looks for models. + ++aside-code("Example"). + util.set_data_path('/custom/path') + util.get_data_path() + # PosixPath('/custom/path') + ++table(["Name", "Type", "Description"]) + +row + +cell #[code path] + +cell unicode or #[code Path] + +cell Path to new data directory. + ++h(2, "load_lang_class") load_lang_class + +tag function + +p + | Import and load a #[code Language] class. Allows lazy-loading + | #[+a("/docs/usage/adding-languages") language data] and importing + | languages using the two-letter language code. + ++aside-code("Example"). + for lang_id in ['en', 'de']: + lang_class = util.load_lang_class(lang_id) + lang = lang_class() + tokenizer = lang.Defaults.create_tokenizer() + ++table(["Name", "Type", "Description"]) + +row + +cell #[code lang] + +cell unicode + +cell Two-letter language code, e.g. #[code 'en']. + + +footrow + +cell return + +cell #[code Language] + +cell Language class. + ++h(2, "resolve_model_path") resolve_model_path + +tag function + +p Resolve a model name or string to a model path. + ++aside-code("Example"). + model_path = util.resolve_model_path('en') + model_path = util.resolve_model_path('/path/to/en') + ++table(["Name", "Type", "Description"]) + +row + +cell #[code name] + +cell unicode + +cell Package name, shortcut link or model path. + + +footrow + +cell return + +cell #[code Path] + +cell Path to model data directory. + ++h(2, "is_package") is_package + +tag function + +p + | Check if string maps to a package installed via pip. Mainly used to + | validate #[+a("/docs/usage/models") model packages]. + ++aside-code("Example"). + util.is_package('en_core_web_sm') # True + util.is_package('xyz') # False + ++table(["Name", "Type", "Description"]) + +row + +cell #[code name] + +cell unicode + +cell Name of package. + + +footrow + +cell return + +cell #[code bool] + +cell #[code True] if installed package, #[code False] if not. + ++h(2, "get_model_package_path") get_model_package_path + +tag function + +p + | Get path to a #[+a("/docs/usage/models") model package] installed via pip. + | Currently imports the package to find it and parse its meta data. + ++aside-code("Example"). + util.get_model_package_path('en_core_web_sm') + # /usr/lib/python3.6/site-packages/en_core_web_sm/en_core_web_sm-1.2.0 + ++table(["Name", "Type", "Description"]) + +row + +cell #[code package_name] + +cell unicode + +cell Name of installed package. + + +footrow + +cell return + +cell #[code Path] + +cell Path to model data directory. + ++h(2, "parse_package_meta") parse_package_meta + +tag function + +p + | Check if a #[code meta.json] exists in a model package and return its + | contents. + ++aside-code("Example"). + if util.is_package('en_core_web_sm'): + path = util.get_model_package_path('en_core_web_sm') + meta = util.parse_package_meta(path, require=True) + # {'name': 'core_web_sm', 'lang': 'en', ...} + ++table(["Name", "Type", "Description"]) + +row + +cell #[code package_path] + +cell #[code Path] + +cell Path to model package directory. + + +row + +cell #[code require] + +cell #[code bool] + +cell If #[code True], raise error if no #[code meta.json] is found. + + +footrow + +cell return + +cell dict / #[code None] + +cell Model meta data or #[code None]. + ++h(2, "update_exc") update_exc + +tag function + +p + | Update, validate and overwrite + | #[+a("/docs/usage/adding-languages#tokenizer-exceptions") tokenizer exceptions]. + | Used to combine global exceptions with custom, language-specific + | exceptions. Will raise an error if key doesn't match #[code ORTH] values. + ++aside-code("Example"). + BASE = {"a.": [{ORTH: "a."}], ":)": [{ORTH: ":)"}]} + NEW = {"a.": [{ORTH: "a.", LEMMA: "all"}]} + exceptions = util.update_exc(BASE, NEW) + # {"a.": [{ORTH: "a.", LEMMA: "all"}], ":)": [{ORTH: ":)"}]} + ++table(["Name", "Type", "Description"]) + +row + +cell #[code base_exceptions] + +cell dict + +cell Base tokenizer exceptions. + + +row + +cell #[code *addition_dicts] + +cell dicts + +cell Exception dictionaries to add to the base exceptions, in order. + + +footrow + +cell return + +cell dict + +cell Combined tokenizer exceptions. + + ++h(2, "prints") prints + +tag function + +p + | Print a formatted, text-wrapped message with optional title. If a text + | argument is a #[code Path], it's converted to a string. Should only + | be used for interactive components like the #[+a("/docs/usage/cli") CLI]. + ++aside-code("Example"). + data_path = Path('/some/path') + if not path.exists(): + util.prints("Can't find the path.", data_path, + title="Error", exits=True) + ++table(["Name", "Type", "Description"]) + +row + +cell #[code *texts] + +cell unicode + +cell Texts to print. Each argument is rendered as paragraph. + + +row + +cell #[code **kwargs] + +cell - + +cell + | #[code title] is rendered as coloured headline. #[code exits=True] + | performs system exit after printing.