2017-10-03 15:27:22 +03:00
|
|
|
|
//- 💫 DOCS > API > TOP-LEVEL > UTIL
|
2017-05-13 22:23:12 +03:00
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| spaCy comes with a small collection of utility functions located in
|
2017-10-03 15:27:22 +03:00
|
|
|
|
| #[+src(gh("spaCy", "spacy/util.py")) #[code spacy/util.py]].
|
2017-05-13 22:23:12 +03:00
|
|
|
|
| Because utility functions are mostly intended for
|
|
|
|
|
| #[strong internal use within spaCy], their behaviour may change with
|
|
|
|
|
| future releases. The functions documented on this page should be safe
|
|
|
|
|
| to use and we'll try to ensure backwards compatibility. However, we
|
|
|
|
|
| recommend having additional tests in place if your application depends on
|
|
|
|
|
| any of spaCy's utilities.
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+h(3, "util.get_data_path") util.get_data_path
|
2017-05-13 22:23:12 +03:00
|
|
|
|
+tag function
|
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| Get path to the data directory where spaCy looks for models. Defaults to
|
|
|
|
|
| #[code spacy/data].
|
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code require_exists]
|
|
|
|
|
+cell bool
|
|
|
|
|
+cell Only return path if it exists, otherwise return #[code None].
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+row("foot")
|
2017-05-19 01:02:34 +03:00
|
|
|
|
+cell returns
|
2017-05-13 22:23:12 +03:00
|
|
|
|
+cell #[code Path] / #[code None]
|
|
|
|
|
+cell Data path or #[code None].
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+h(3, "util.set_data_path") util.set_data_path
|
2017-05-13 22:23:12 +03:00
|
|
|
|
+tag function
|
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| Set custom path to the data directory where spaCy looks for models.
|
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
|
util.set_data_path('/custom/path')
|
|
|
|
|
util.get_data_path()
|
|
|
|
|
# PosixPath('/custom/path')
|
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code path]
|
|
|
|
|
+cell unicode or #[code Path]
|
|
|
|
|
+cell Path to new data directory.
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+h(3, "util.get_lang_class") util.get_lang_class
|
2017-05-13 22:23:12 +03:00
|
|
|
|
+tag function
|
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| Import and load a #[code Language] class. Allows lazy-loading
|
2017-10-03 15:27:22 +03:00
|
|
|
|
| #[+a("/usage/adding-languages") language data] and importing
|
2017-05-13 22:23:12 +03:00
|
|
|
|
| languages using the two-letter language code.
|
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
|
for lang_id in ['en', 'de']:
|
2017-05-14 02:31:10 +03:00
|
|
|
|
lang_class = util.get_lang_class(lang_id)
|
2017-05-13 22:23:12 +03:00
|
|
|
|
lang = lang_class()
|
|
|
|
|
tokenizer = lang.Defaults.create_tokenizer()
|
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code lang]
|
|
|
|
|
+cell unicode
|
|
|
|
|
+cell Two-letter language code, e.g. #[code 'en'].
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+row("foot")
|
2017-05-19 01:02:34 +03:00
|
|
|
|
+cell returns
|
2017-05-13 22:23:12 +03:00
|
|
|
|
+cell #[code Language]
|
|
|
|
|
+cell Language class.
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+h(3, "util.load_model") util.load_model
|
2017-05-13 22:23:12 +03:00
|
|
|
|
+tag function
|
2017-05-26 13:42:36 +03:00
|
|
|
|
+tag-new(2)
|
2017-05-13 22:23:12 +03:00
|
|
|
|
|
2017-05-28 01:22:00 +03:00
|
|
|
|
p
|
|
|
|
|
| Load a model from a shortcut link, package or data path. If called with a
|
|
|
|
|
| shortcut link or package name, spaCy will assume the model is a Python
|
|
|
|
|
| package and import and call its #[code load()] method. If called with a
|
|
|
|
|
| path, spaCy will assume it's a data directory, read the language and
|
|
|
|
|
| pipeline settings from the meta.json and initialise a #[code Language]
|
|
|
|
|
| class. The model data will then be loaded in via
|
|
|
|
|
| #[+api("language#from_disk") #[code Language.from_disk()]].
|
2017-05-13 22:23:12 +03:00
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
2017-05-28 01:22:00 +03:00
|
|
|
|
nlp = util.load_model('en')
|
2017-05-29 15:10:10 +03:00
|
|
|
|
nlp = util.load_model('en_core_web_sm', disable=['ner'])
|
2017-05-28 01:22:00 +03:00
|
|
|
|
nlp = util.load_model('/path/to/data')
|
2017-05-13 22:23:12 +03:00
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code name]
|
|
|
|
|
+cell unicode
|
|
|
|
|
+cell Package name, shortcut link or model path.
|
|
|
|
|
|
2017-05-29 15:10:10 +03:00
|
|
|
|
+row
|
|
|
|
|
+cell #[code **overrides]
|
|
|
|
|
+cell -
|
|
|
|
|
+cell Specific overrides, like pipeline components to disable.
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+row("foot")
|
2017-05-19 01:02:34 +03:00
|
|
|
|
+cell returns
|
2017-05-28 01:22:00 +03:00
|
|
|
|
+cell #[code Language]
|
|
|
|
|
+cell #[code Language] class with the loaded model.
|
2017-06-05 14:18:22 +03:00
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+h(3, "util.load_model_from_path") util.load_model_from_path
|
2017-06-05 14:18:22 +03:00
|
|
|
|
+tag function
|
|
|
|
|
+tag-new(2)
|
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| Load a model from a data directory path. Creates the
|
|
|
|
|
| #[+api("language") #[code Language]] class and pipeline based on the
|
|
|
|
|
| directory's meta.json and then calls
|
|
|
|
|
| #[+api("language#from_disk") #[code from_disk()]] with the path. This
|
|
|
|
|
| function also makes it easy to test a new model that you haven't packaged
|
|
|
|
|
| yet.
|
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
|
nlp = load_model_from_path('/path/to/data')
|
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code model_path]
|
|
|
|
|
+cell unicode
|
|
|
|
|
+cell Path to model data directory.
|
|
|
|
|
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code meta]
|
|
|
|
|
+cell dict
|
|
|
|
|
+cell
|
|
|
|
|
| Model meta data. If #[code False], spaCy will try to load the
|
|
|
|
|
| meta from a meta.json in the same directory.
|
|
|
|
|
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code **overrides]
|
|
|
|
|
+cell -
|
|
|
|
|
+cell Specific overrides, like pipeline components to disable.
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+row("foot")
|
2017-06-05 14:18:22 +03:00
|
|
|
|
+cell returns
|
|
|
|
|
+cell #[code Language]
|
|
|
|
|
+cell #[code Language] class with the loaded model.
|
2017-05-28 01:22:00 +03:00
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+h(3, "util.load_model_from_init_py") util.load_model_from_init_py
|
2017-05-28 01:22:00 +03:00
|
|
|
|
+tag function
|
|
|
|
|
+tag-new(2)
|
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| A helper function to use in the #[code load()] method of a model package's
|
2017-10-03 15:27:22 +03:00
|
|
|
|
| #[+src(gh("spacy-dev-resources", "templates/model/en_model_name/__init__.py")) #[code __init__.py]].
|
2017-05-28 01:22:00 +03:00
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
|
from spacy.util import load_model_from_init_py
|
|
|
|
|
|
2017-05-29 15:10:10 +03:00
|
|
|
|
def load(**overrides):
|
|
|
|
|
return load_model_from_init_py(__file__, **overrides)
|
2017-05-28 01:22:00 +03:00
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code init_file]
|
|
|
|
|
+cell unicode
|
|
|
|
|
+cell Path to model's __init__.py, i.e. #[code __file__].
|
|
|
|
|
|
2017-05-29 15:10:10 +03:00
|
|
|
|
+row
|
|
|
|
|
+cell #[code **overrides]
|
|
|
|
|
+cell -
|
|
|
|
|
+cell Specific overrides, like pipeline components to disable.
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+row("foot")
|
2017-05-28 01:22:00 +03:00
|
|
|
|
+cell returns
|
|
|
|
|
+cell #[code Language]
|
|
|
|
|
+cell #[code Language] class with the loaded model.
|
2017-05-13 22:23:12 +03:00
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+h(3, "util.get_model_meta") util.get_model_meta
|
2017-05-29 15:10:10 +03:00
|
|
|
|
+tag function
|
|
|
|
|
+tag-new(2)
|
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| Get a model's meta.json from a directory path and validate its contents.
|
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
|
meta = util.get_model_meta('/path/to/model')
|
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code path]
|
|
|
|
|
+cell unicode or #[code Path]
|
|
|
|
|
+cell Path to model directory.
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+row("foot")
|
2017-05-29 15:10:10 +03:00
|
|
|
|
+cell returns
|
|
|
|
|
+cell dict
|
|
|
|
|
+cell The model's meta data.
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+h(3, "util.is_package") util.is_package
|
2017-05-13 22:23:12 +03:00
|
|
|
|
+tag function
|
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| Check if string maps to a package installed via pip. Mainly used to
|
2017-10-03 15:27:22 +03:00
|
|
|
|
| validate #[+a("/usage/models") model packages].
|
2017-05-13 22:23:12 +03:00
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
|
util.is_package('en_core_web_sm') # True
|
|
|
|
|
util.is_package('xyz') # False
|
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code name]
|
|
|
|
|
+cell unicode
|
|
|
|
|
+cell Name of package.
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+row("foot")
|
2017-05-19 01:02:34 +03:00
|
|
|
|
+cell returns
|
2017-05-13 22:23:12 +03:00
|
|
|
|
+cell #[code bool]
|
|
|
|
|
+cell #[code True] if installed package, #[code False] if not.
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+h(3, "util.get_package_path") util.get_package_path
|
2017-05-13 22:23:12 +03:00
|
|
|
|
+tag function
|
2017-05-28 01:22:00 +03:00
|
|
|
|
+tag-new(2)
|
2017-05-13 22:23:12 +03:00
|
|
|
|
|
|
|
|
|
p
|
2017-05-28 01:22:00 +03:00
|
|
|
|
| Get path to an installed package. Mainly used to resolve the location of
|
2017-10-03 15:27:22 +03:00
|
|
|
|
| #[+a("/usage/models") model packages]. Currently imports the package
|
2017-05-28 01:22:00 +03:00
|
|
|
|
| to find its path.
|
2017-05-13 22:23:12 +03:00
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
2017-05-28 01:22:00 +03:00
|
|
|
|
util.get_package_path('en_core_web_sm')
|
|
|
|
|
# /usr/lib/python3.6/site-packages/en_core_web_sm
|
2017-05-13 22:23:12 +03:00
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code package_name]
|
|
|
|
|
+cell unicode
|
|
|
|
|
+cell Name of installed package.
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+row("foot")
|
2017-05-19 01:02:34 +03:00
|
|
|
|
+cell returns
|
2017-05-13 22:23:12 +03:00
|
|
|
|
+cell #[code Path]
|
|
|
|
|
+cell Path to model package directory.
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+h(3, "util.is_in_jupyter") util.is_in_jupyter
|
2017-05-21 02:12:09 +03:00
|
|
|
|
+tag function
|
2017-05-26 13:42:36 +03:00
|
|
|
|
+tag-new(2)
|
2017-05-21 02:12:09 +03:00
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| Check if user is running spaCy from a #[+a("https://jupyter.org") Jupyter]
|
|
|
|
|
| notebook by detecting the IPython kernel. Mainly used for the
|
2017-11-01 23:11:10 +03:00
|
|
|
|
| #[+api("top-level#displacy") #[code displacy]] visualizer.
|
2017-05-21 02:12:09 +03:00
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
|
html = '<h1>Hello world!</h1>'
|
|
|
|
|
if util.is_in_jupyter():
|
|
|
|
|
from IPython.core.display import display, HTML
|
|
|
|
|
return display(HTML(html))
|
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+row("foot")
|
2017-05-21 02:12:09 +03:00
|
|
|
|
+cell returns
|
|
|
|
|
+cell bool
|
|
|
|
|
+cell #[code True] if in Jupyter, #[code False] if not.
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+h(3, "util.update_exc") util.update_exc
|
2017-05-13 22:23:12 +03:00
|
|
|
|
+tag function
|
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| Update, validate and overwrite
|
2017-10-03 15:27:22 +03:00
|
|
|
|
| #[+a("/usage/adding-languages#tokenizer-exceptions") tokenizer exceptions].
|
2017-05-13 22:23:12 +03:00
|
|
|
|
| Used to combine global exceptions with custom, language-specific
|
|
|
|
|
| exceptions. Will raise an error if key doesn't match #[code ORTH] values.
|
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
|
BASE = {"a.": [{ORTH: "a."}], ":)": [{ORTH: ":)"}]}
|
|
|
|
|
NEW = {"a.": [{ORTH: "a.", LEMMA: "all"}]}
|
|
|
|
|
exceptions = util.update_exc(BASE, NEW)
|
|
|
|
|
# {"a.": [{ORTH: "a.", LEMMA: "all"}], ":)": [{ORTH: ":)"}]}
|
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code base_exceptions]
|
|
|
|
|
+cell dict
|
|
|
|
|
+cell Base tokenizer exceptions.
|
|
|
|
|
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code *addition_dicts]
|
|
|
|
|
+cell dicts
|
|
|
|
|
+cell Exception dictionaries to add to the base exceptions, in order.
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+row("foot")
|
2017-05-19 01:02:34 +03:00
|
|
|
|
+cell returns
|
2017-05-13 22:23:12 +03:00
|
|
|
|
+cell dict
|
|
|
|
|
+cell Combined tokenizer exceptions.
|
|
|
|
|
|
|
|
|
|
|
2017-10-03 15:27:22 +03:00
|
|
|
|
+h(3, "util.prints") util.prints
|
2017-05-13 22:23:12 +03:00
|
|
|
|
+tag function
|
2017-05-26 13:42:36 +03:00
|
|
|
|
+tag-new(2)
|
2017-05-13 22:23:12 +03:00
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| Print a formatted, text-wrapped message with optional title. If a text
|
|
|
|
|
| argument is a #[code Path], it's converted to a string. Should only
|
2017-10-03 15:27:22 +03:00
|
|
|
|
| be used for interactive components like the command-line interface.
|
2017-05-13 22:23:12 +03:00
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
|
data_path = Path('/some/path')
|
|
|
|
|
if not path.exists():
|
|
|
|
|
util.prints("Can't find the path.", data_path,
|
2017-05-22 14:54:52 +03:00
|
|
|
|
title="Error", exits=1)
|
2017-05-13 22:23:12 +03:00
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code *texts]
|
|
|
|
|
+cell unicode
|
|
|
|
|
+cell Texts to print. Each argument is rendered as paragraph.
|
|
|
|
|
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code **kwargs]
|
|
|
|
|
+cell -
|
|
|
|
|
+cell
|
2017-05-22 14:54:52 +03:00
|
|
|
|
| #[code title] is rendered as coloured headline. #[code exits]
|
|
|
|
|
| performs system exit after printing, using the value of the
|
|
|
|
|
| argument as the exit code, e.g. #[code exits=1].
|
2017-11-07 02:22:43 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+h(3, "util.minibatch") util.minibatch
|
|
|
|
|
+tag function
|
|
|
|
|
+tag-new(2)
|
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| Iterate over batches of items. #[code size] may be an iterator, so that
|
|
|
|
|
| batch-size can vary on each step.
|
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
|
batches = minibatch(train_data)
|
|
|
|
|
for batch in batches:
|
|
|
|
|
texts, annotations = zip(*batch)
|
|
|
|
|
nlp.update(texts, annotations)
|
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code items]
|
|
|
|
|
+cell iterable
|
|
|
|
|
+cell The items to batch up.
|
|
|
|
|
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code size]
|
|
|
|
|
+cell int / iterable
|
|
|
|
|
+cell
|
|
|
|
|
| The batch size(s). Use
|
|
|
|
|
| #[+api("top-level#util.compounding") #[code util.compounding]] or
|
|
|
|
|
| #[+api("top-level#util.decaying") #[code util.decaying]] or
|
|
|
|
|
| for an infinite series of compounding or decaying values.
|
|
|
|
|
|
|
|
|
|
+row("foot")
|
|
|
|
|
+cell yields
|
|
|
|
|
+cell list
|
|
|
|
|
+cell The batches.
|
|
|
|
|
|
|
|
|
|
+h(3, "util.compounding") util.compounding
|
|
|
|
|
+tag function
|
|
|
|
|
+tag-new(2)
|
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| Yield an infinite series of compounding values. Each time the generator
|
|
|
|
|
| is called, a value is produced by multiplying the previous value by the
|
|
|
|
|
| compound rate.
|
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
|
sizes = compounding(1., 10., 1.5)
|
|
|
|
|
assert next(sizes) == 1.
|
|
|
|
|
assert next(sizes) == 1. * 1.5
|
|
|
|
|
assert next(sizes) == 1.5 * 1.5
|
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code start]
|
|
|
|
|
+cell int / float
|
|
|
|
|
+cell The first value.
|
|
|
|
|
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code stop]
|
|
|
|
|
+cell int / float
|
|
|
|
|
+cell The maximum value.
|
|
|
|
|
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code compound]
|
|
|
|
|
+cell int / float
|
|
|
|
|
+cell The compounding factor.
|
|
|
|
|
|
|
|
|
|
+row("foot")
|
|
|
|
|
+cell yields
|
|
|
|
|
+cell int
|
|
|
|
|
+cell Compounding values.
|
|
|
|
|
|
|
|
|
|
+h(3, "util.decaying") util.decaying
|
|
|
|
|
+tag function
|
|
|
|
|
+tag-new(2)
|
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| Yield an infinite series of linearly decaying values.
|
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
|
sizes = decaying(1., 10., 0.001)
|
|
|
|
|
assert next(sizes) == 1.
|
|
|
|
|
assert next(sizes) == 1. - 0.001
|
|
|
|
|
assert next(sizes) == 0.999 - 0.001
|
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code start]
|
|
|
|
|
+cell int / float
|
|
|
|
|
+cell The first value.
|
|
|
|
|
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code end]
|
|
|
|
|
+cell int / float
|
|
|
|
|
+cell The maximum value.
|
|
|
|
|
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code decay]
|
|
|
|
|
+cell int / float
|
|
|
|
|
+cell The decaying factor.
|
|
|
|
|
|
|
|
|
|
+row("foot")
|
|
|
|
|
+cell yields
|
|
|
|
|
+cell int
|
|
|
|
|
+cell The decaying values.
|
|
|
|
|
|
|
|
|
|
+h(3, "util.itershuffle") util.itershuffle
|
|
|
|
|
+tag function
|
|
|
|
|
+tag-new(2)
|
|
|
|
|
|
|
|
|
|
p
|
|
|
|
|
| Shuffle an iterator. This works by holding #[code bufsize] items back and
|
|
|
|
|
| yielding them sometime later. Obviously, this is not unbiased – but
|
|
|
|
|
| should be good enough for batching. Larger bufsize means less bias.
|
|
|
|
|
|
|
|
|
|
+aside-code("Example").
|
|
|
|
|
values = range(1000)
|
|
|
|
|
shuffled = itershuffle(values)
|
|
|
|
|
|
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code iterable]
|
|
|
|
|
+cell iterable
|
|
|
|
|
+cell Iterator to shuffle.
|
|
|
|
|
|
|
|
|
|
+row
|
|
|
|
|
+cell #[code buffsize]
|
|
|
|
|
+cell int
|
|
|
|
|
+cell Items to hold back.
|
|
|
|
|
|
|
|
|
|
+row("foot")
|
|
|
|
|
+cell yields
|
|
|
|
|
+cell iterable
|
|
|
|
|
+cell The shuffled iterator.
|