From 72aea8f1057d251c88306c11146c2a9c0ca0c3c2 Mon Sep 17 00:00:00 2001 From: Explosion Bot Date: Mon, 30 Oct 2017 10:03:08 +0100 Subject: [PATCH 01/32] Update vectors.add() to allow setting keys to rows --- spacy/tests/doc/test_doc_api.py | 2 +- spacy/tests/doc/test_token_api.py | 4 +-- spacy/vectors.pyx | 46 +++++++++++++++++++------------ 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py index 46c615973..8f881e811 100644 --- a/spacy/tests/doc/test_doc_api.py +++ b/spacy/tests/doc/test_doc_api.py @@ -209,7 +209,7 @@ def test_doc_api_right_edge(en_tokenizer): def test_doc_api_has_vector(): vocab = Vocab() vocab.clear_vectors(2) - vocab.vectors.add('kitten', numpy.asarray([0., 2.], dtype='f')) + vocab.vectors.add('kitten', vector=numpy.asarray([0., 2.], dtype='f')) doc = Doc(vocab, words=['kitten']) assert doc.has_vector diff --git a/spacy/tests/doc/test_token_api.py b/spacy/tests/doc/test_token_api.py index 0ab723f7a..a52be9731 100644 --- a/spacy/tests/doc/test_token_api.py +++ b/spacy/tests/doc/test_token_api.py @@ -73,8 +73,8 @@ def test_doc_token_api_is_properties(en_vocab): def test_doc_token_api_vectors(): vocab = Vocab() vocab.clear_vectors(2) - vocab.vectors.add('apples', numpy.asarray([0., 2.], dtype='f')) - vocab.vectors.add('oranges', numpy.asarray([0., 1.], dtype='f')) + vocab.vectors.add('apples', vector=numpy.asarray([0., 2.], dtype='f')) + vocab.vectors.add('oranges', vector=numpy.asarray([0., 1.], dtype='f')) doc = Doc(vocab, words=['apples', 'oranges', 'oov']) assert doc.has_vector diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index 155d7b9d2..d6b59401e 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -21,8 +21,10 @@ cdef class Vectors: Vectors data is kept in the vectors.data attribute, which should be an instance of numpy.ndarray (for CPU vectors) or cupy.ndarray (for GPU vectors). `vectors.key2row` is a dictionary mapping word hashes to - rows in the vectors.data table. The array `vectors.keys` keeps the keys in - order, such that `keys[vectors.key2row[key]] == key`. + rows in the vectors.data table. + + Multiple keys can be mapped to the same vector, so len(keys) may be greater + (but not smaller) than data.shape[0]. """ cdef public object data cdef readonly StringStore strings @@ -57,7 +59,7 @@ cdef class Vectors: for i, string in enumerate(self.strings): if i >= self.data.shape[0]: break - self.add(self.strings[string], self.data[i]) + self.add(self.strings[string], vector=self.data[i]) def __reduce__(self): return (Vectors, (self.strings, self.data)) @@ -114,27 +116,36 @@ cdef class Vectors: key = self.strings[key] return key in self.key2row - def add(self, key, vector=None): - """Add a key to the table, optionally setting a vector value as well. + def add(self, key, *, vector=None, row=None): + """Add a key to the table. Keys can be mapped to an existing vector + by setting `row`, or a new vector can be added. key (unicode / int): The key to add. - vector (numpy.ndarray): An optional vector to add. + vector (numpy.ndarray / None): A vector to add for the key. + row (int / None): The row-number of a vector to map the key to. """ + if row is not None and vector is not None: + raise ValueError("Only one of 'row' and 'vector' may be set") if isinstance(key, basestring_): key = self.strings.add(key) - if key not in self.key2row: - i = self.i - if i >= self.keys.shape[0]: - self.keys.resize((self.keys.shape[0]*2,)) - self.data.resize((self.data.shape[0]*2, self.data.shape[1])) - self.key2row[key] = self.i + if key in self.key2row and vector is not None: + row = self.key2row[key] + elif key in self.key2row and row is not None: + self.key2row[key] = row + elif key not in self.key2row: + if row is not None: + self.key2row[key] = row + else: + self.key2row[key] = self.i + row = self.i + if row >= self.keys.shape[0]: + self.keys.resize((row*2,)) + self.data.resize((row*2, self.data.shape[1])) self.keys[self.i] = key self.i += 1 - else: - i = self.key2row[key] if vector is not None: - self.data[i] = vector - return i + self.data[row] = vector + return row def items(self): """Iterate over `(string key, vector)` pairs, in order. @@ -143,7 +154,8 @@ cdef class Vectors: """ for i, key in enumerate(self.keys): string = self.strings[key] - yield string, self.data[i] + row = self.key2row[key] + yield string, self.data[row] @property def shape(self): From 5ede7cec9b45a6edf873fbb442369b503592237e Mon Sep 17 00:00:00 2001 From: Explosion Bot Date: Mon, 30 Oct 2017 11:49:11 +0100 Subject: [PATCH 02/32] Improve Lexeme.set_attrs method --- spacy/lexeme.pyx | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 88748af33..a64e394c3 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -13,6 +13,8 @@ from .typedefs cimport attr_t, flags_t from .attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE from .attrs cimport IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL, IS_STOP from .attrs cimport IS_BRACKET, IS_QUOTE, IS_LEFT_PUNCT, IS_RIGHT_PUNCT, IS_OOV +from .attrs cimport PROB +from .attrs import intify_attrs from . import about @@ -68,6 +70,17 @@ cdef class Lexeme: def __hash__(self): return self.c.orth + def set_attrs(self, **attrs): + cdef attr_id_t attr + attrs = intify_attrs(attrs) + for attr, value in attrs.items(): + if attr == PROB: + self.c.prob = value + elif isinstance(value, int) or isinstance(value, long): + Lexeme.set_struct_attr(self.c, attr, value) + else: + Lexeme.set_struct_attr(self.c, attr, self.vocab.strings.add(value)) + def set_flag(self, attr_id_t flag_id, bint value): """Change the value of a boolean flag. From 08869c19fd38dd9d46932ddb2bd0443834116eb5 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 13:15:13 +0100 Subject: [PATCH 03/32] Merge mixins and mixins-base The distinction was never clear anyways and it was progressively getting messier. So all mixins live in one file now. --- website/_includes/_mixins-base.jade | 244 ---------------------------- website/_includes/_mixins.jade | 231 +++++++++++++++++++++++++- 2 files changed, 230 insertions(+), 245 deletions(-) delete mode 100644 website/_includes/_mixins-base.jade diff --git a/website/_includes/_mixins-base.jade b/website/_includes/_mixins-base.jade deleted file mode 100644 index 689d97a88..000000000 --- a/website/_includes/_mixins-base.jade +++ /dev/null @@ -1,244 +0,0 @@ -//- 💫 MIXINS > BASE - -//- Section - id - [string] anchor assigned to section (used for breadcrumb navigation) - -mixin section(id) - section.o-section(id="section-" + id data-section=id) - block - - -//- Aside wrapper - label - [string] aside label - -mixin aside-wrapper(label) - aside.c-aside - .c-aside__content(role="complementary")&attributes(attributes) - if label - h4.u-text-label.u-text-label--dark=label - - block - - -//- SVG from map (uses embedded SVG sprite) - name - [string] SVG symbol id - width - [integer] width in px - height - [integer] height in px (default: same as width) - -mixin svg(name, width, height) - svg(aria-hidden="true" viewBox="0 0 #{width} #{height || width}" width=width height=(height || width))&attributes(attributes) - use(xlink:href="#svg_#{name}") - - -//- Icon - name - [string] icon name (will be used as symbol id: #svg_{name}) - width - [integer] icon width (default: 20) - height - [integer] icon height (defaults to width) - -mixin icon(name, width, height) - - var width = width || 20 - - var height = height || width - +svg(name, width, height).o-icon(style="min-width: #{width}px")&attributes(attributes) - - -//- Pro/Con/Neutral icon - icon - [string] "pro", "con" or "neutral" (default: "neutral") - size - [integer] icon size (optional) - -mixin procon(icon, label, show_label, size) - - var colors = { yes: "green", no: "red", neutral: "subtle" } - span.u-nowrap - +icon(icon, size || 20)(class="u-color-#{colors[icon] || 'subtle'}").o-icon--inline&attributes(attributes) - span.u-text-small(class=show_label ? null : "u-hidden")=(label || icon) - -//- Headlines Helper Mixin - level - [integer] 1, 2, 3, 4, or 5 - -mixin headline(level) - if level == 1 - h1.u-heading-1&attributes(attributes) - block - - else if level == 2 - h2.u-heading-2&attributes(attributes) - block - - else if level == 3 - h3.u-heading-3&attributes(attributes) - block - - else if level == 4 - h4.u-heading-4&attributes(attributes) - block - - else if level == 5 - h5.u-heading-5&attributes(attributes) - block - - -//- Permalink rendering - id - [string] permalink ID used for link anchor - -mixin permalink(id) - if id - a.u-permalink(href="##{id}") - block - - else - block - - -//- Quickstart widget - quickstart.js with manual markup, inspired by PyTorch's "Getting started" - groups - [object] option groups, uses global variable QUICKSTART - headline - [string] optional text to be rendered as widget headline - -mixin quickstart(groups, headline, description, hide_results) - .c-quickstart.o-block-small#qs - .c-quickstart__content - if headline - +h(2)=headline - if description - p=description - for group in groups - .c-quickstart__group.u-text-small(data-qs-group=group.id) - if group.title - .c-quickstart__legend=group.title - if group.help - | #[+help(group.help)] - .c-quickstart__fields - for option in group.options - input.c-quickstart__input(class="c-quickstart__input--" + (group.input_style ? group.input_style : group.multiple ? "check" : "radio") type=group.multiple ? "checkbox" : "radio" name=group.id id="qs-#{option.id}" value=option.id checked=option.checked) - label.c-quickstart__label.u-text-tiny(for="qs-#{option.id}")!=option.title - if option.meta - | #[span.c-quickstart__label__meta (#{option.meta})] - if option.help - | #[+help(option.help)] - - if hide_results - block - else - pre.c-code-block - code.c-code-block__content.c-quickstart__code(data-qs-results="") - block - - -//- Quickstart code item - data - [object] Rendering conditions (keyed by option group ID, value: option) - style - [string] modifier ID for line style - -mixin qs(data, style) - - args = {} - for value, setting in data - - args['data-qs-' + setting] = value - span.c-quickstart__line(class="c-quickstart__line--#{style || 'bash'}")&attributes(args) - block - - -//- Terminal-style code window - label - [string] title displayed in top bar of terminal window - -mixin terminal(label) - .x-terminal - .x-terminal__icons: span - .u-padding-small.u-text-label.u-text-center=label - - +code.x-terminal__code - block - -//- Chart.js - id - [string] chart ID, will be assigned as #chart_{id} - -mixin chart(id, height) - figure.o-block&attributes(attributes) - canvas(id="chart_#{id}" width="800" height=(height || "400") style="max-width: 100%") - - -//- Gitter chat button and widget - button - [string] text shown on button - label - [string] title of chat window (default: same as button) - -mixin gitter(button, label) - aside.js-gitter.c-chat.is-collapsed(data-title=(label || button)) - - button.js-gitter-button.c-chat__button.u-text-tag - +icon("chat", 16).o-icon--inline - !=button - - -//- Badge - image - [string] path to badge image - url - [string] badge link - -mixin badge(image, url) - +a(url).u-padding-small.u-hide-link&attributes(attributes) - img.o-badge(src=image alt=url height="20") - - -//- spaCy logo - -mixin logo() - +svg("spacy", 675, 215).o-logo&attributes(attributes) - - -//- Landing - -mixin landing-header() - header.c-landing - .c-landing__wrapper - .c-landing__content - block - -mixin landing-banner(headline, label) - .c-landing__banner.u-padding.o-block.u-color-light - +grid.c-landing__banner__content.o-no-block - +grid-col("third") - h3.u-heading.u-heading-1 - if label - div - span.u-text-label.u-text-label--light=label - !=headline - - +grid-col("two-thirds").c-landing__banner__text - block - - -mixin landing-logos(title, logos) - .o-content.u-text-center&attributes(attributes) - h3.u-heading.u-text-label.u-color-dark=title - - each row, i in logos - - var is_last = i == logos.length - 1 - +grid("center").o-inline-list.o-no-block(class=is_last ? "o-no-block" : null) - each details, name in row - +a(details[0]).u-padding-medium - +icon(name, details[1], details[2]) - - if is_last - block - - -//- Under construction (temporary) - Marks sections that still need to be completed for the v2.0 release. - -mixin under-construction() - +infobox("Under construction", "🚧") - | This section is still being written and will be updated for the v2.0 - | release. Is there anything that you think should definitely mentioned or - | explained here? Any examples you'd like to see? #[strong Let us know] - | on the #[+a(gh("spacy") + "/issues/1105") v2.0 alpha thread] on GitHub! - - -//- Alpha infobox (temporary) - Added in the templates to notify user that they're visiting the alpha site. - -mixin alpha-info() - +infobox("You are viewing the spaCy v2.0.0 alpha docs", "⚠️") - strong This page is part of the alpha documentation for spaCy v2.0. - | It does not reflect the state of the latest stable release. - | Because v2.0 is still under development, the implementation - | may differ from the intended state described here. See the - | #[+a(gh("spaCy") + "/releases/tag/v2.0.0-alpha") release notes] - | for details on how to install and test the new version. To - | read the official docs for spaCy v1.x, - | #[+a("https://spacy.io/docs") go here]. diff --git a/website/_includes/_mixins.jade b/website/_includes/_mixins.jade index 5dace47e0..902328906 100644 --- a/website/_includes/_mixins.jade +++ b/website/_includes/_mixins.jade @@ -1,7 +1,39 @@ //- 💫 INCLUDES > MIXINS include _functions -include _mixins-base + + +//- Section + id - [string] anchor assigned to section (used for breadcrumb navigation) + +mixin section(id) + section.o-section(id="section-" + id data-section=id) + block + + +//- Headlines Helper Mixin + level - [integer] 1, 2, 3, 4, or 5 + +mixin headline(level) + if level == 1 + h1.u-heading-1&attributes(attributes) + block + + else if level == 2 + h2.u-heading-2&attributes(attributes) + block + + else if level == 3 + h3.u-heading-3&attributes(attributes) + block + + else if level == 4 + h4.u-heading-4&attributes(attributes) + block + + else if level == 5 + h5.u-heading-5&attributes(attributes) + block //- Headlines @@ -18,6 +50,18 @@ mixin h(level, id, source) span Source #[+icon("code", 14).o-icon--inline] +//- Permalink rendering + id - [string] permalink ID used for link anchor + +mixin permalink(id) + if id + a.u-permalink(href="##{id}") + block + + else + block + + //- External links url - [string] link href trusted - [boolean] if not set / false, rel="noopener nofollow" is added @@ -63,6 +107,18 @@ mixin help(tooltip, icon_size) +icon("help_o", icon_size || 16).o-icon--inline +//- Aside wrapper + label - [string] aside label + +mixin aside-wrapper(label) + aside.c-aside + .c-aside__content(role="complementary")&attributes(attributes) + if label + h4.u-text-label.u-text-label--dark=label + + block + + //- Aside for text label - [string] aside title (optional) @@ -112,6 +168,37 @@ mixin infobox-logos(...logos) | #[+icon(logo[0], logo[1], logo[2]).u-color-dark] +//- SVG from map (uses embedded SVG sprite) + name - [string] SVG symbol id + width - [integer] width in px + height - [integer] height in px (default: same as width) + +mixin svg(name, width, height) + svg(aria-hidden="true" viewBox="0 0 #{width} #{height || width}" width=width height=(height || width))&attributes(attributes) + use(xlink:href="#svg_#{name}") + + +//- Icon + name - [string] icon name (will be used as symbol id: #svg_{name}) + width - [integer] icon width (default: 20) + height - [integer] icon height (defaults to width) + +mixin icon(name, width, height) + - var width = width || 20 + - var height = height || width + +svg(name, width, height).o-icon(style="min-width: #{width}px")&attributes(attributes) + + +//- Pro/Con/Neutral icon + icon - [string] "pro", "con" or "neutral" (default: "neutral") + size - [integer] icon size (optional) + +mixin procon(icon, label, show_label, size) + - var colors = { yes: "green", no: "red", neutral: "subtle" } + span.u-nowrap + +icon(icon, size || 20)(class="u-color-#{colors[icon] || 'subtle'}").o-icon--inline&attributes(attributes) + span.u-text-small(class=show_label ? null : "u-hidden")=(label || icon) + //- Link button url - [string] link href @@ -238,6 +325,14 @@ mixin graphic(original) +button(original, false, "secondary", "small") View large graphic +//- Chart.js + id - [string] chart ID, will be assigned as #chart_{id} + +mixin chart(id, height) + figure.o-block&attributes(attributes) + canvas(id="chart_#{id}" width="800" height=(height || "400") style="max-width: 100%") + + //- Labels mixin label() @@ -445,3 +540,137 @@ mixin annotation-row(annots, style) else +cell=cell block + + +//- spaCy logo + +mixin logo() + +svg("spacy", 675, 215).o-logo&attributes(attributes) + + +//- Gitter chat button and widget + button - [string] text shown on button + label - [string] title of chat window (default: same as button) + +mixin gitter(button, label) + aside.js-gitter.c-chat.is-collapsed(data-title=(label || button)) + + button.js-gitter-button.c-chat__button.u-text-tag + +icon("chat", 16).o-icon--inline + !=button + + +//- Badge + image - [string] path to badge image + url - [string] badge link + +mixin badge(image, url) + +a(url).u-padding-small.u-hide-link&attributes(attributes) + img.o-badge(src=image alt=url height="20") + + +//- Quickstart widget + quickstart.js with manual markup, inspired by PyTorch's "Getting started" + groups - [object] option groups, uses global variable QUICKSTART + headline - [string] optional text to be rendered as widget headline + +mixin quickstart(groups, headline, description, hide_results) + .c-quickstart.o-block-small#qs + .c-quickstart__content + if headline + +h(2)=headline + if description + p=description + for group in groups + .c-quickstart__group.u-text-small(data-qs-group=group.id) + if group.title + .c-quickstart__legend=group.title + if group.help + | #[+help(group.help)] + .c-quickstart__fields + for option in group.options + input.c-quickstart__input(class="c-quickstart__input--" + (group.input_style ? group.input_style : group.multiple ? "check" : "radio") type=group.multiple ? "checkbox" : "radio" name=group.id id="qs-#{option.id}" value=option.id checked=option.checked) + label.c-quickstart__label.u-text-tiny(for="qs-#{option.id}")!=option.title + if option.meta + | #[span.c-quickstart__label__meta (#{option.meta})] + if option.help + | #[+help(option.help)] + + if hide_results + block + else + pre.c-code-block + code.c-code-block__content.c-quickstart__code(data-qs-results="") + block + + +//- Quickstart code item + data - [object] Rendering conditions (keyed by option group ID, value: option) + style - [string] modifier ID for line style + +mixin qs(data, style) + - args = {} + for value, setting in data + - args['data-qs-' + setting] = value + span.c-quickstart__line(class="c-quickstart__line--#{style || 'bash'}")&attributes(args) + block + + +//- Terminal-style code window + label - [string] title displayed in top bar of terminal window + +mixin terminal(label) + .x-terminal + .x-terminal__icons: span + .u-padding-small.u-text-label.u-text-center=label + + +code.x-terminal__code + block + + +//- Landing + +mixin landing-header() + header.c-landing + .c-landing__wrapper + .c-landing__content + block + +mixin landing-banner(headline, label) + .c-landing__banner.u-padding.o-block.u-color-light + +grid.c-landing__banner__content.o-no-block + +grid-col("third") + h3.u-heading.u-heading-1 + if label + div + span.u-text-label.u-text-label--light=label + !=headline + + +grid-col("two-thirds").c-landing__banner__text + block + + +mixin landing-logos(title, logos) + .o-content.u-text-center&attributes(attributes) + h3.u-heading.u-text-label.u-color-dark=title + + each row, i in logos + - var is_last = i == logos.length - 1 + +grid("center").o-inline-list.o-no-block(class=is_last ? "o-no-block" : null) + each details, name in row + +a(details[0]).u-padding-medium + +icon(name, details[1], details[2]) + + if is_last + block + + +//- Under construction (temporary) + Marks sections that still need to be completed for the v2.0 release. + +mixin under-construction() + +infobox("Under construction", "🚧") + | This section is still being written and will be updated for the v2.0 + | release. Is there anything that you think should definitely mentioned or + | explained here? Any examples you'd like to see? #[strong Let us know] + | on the #[+a(gh("spacy") + "/issues/1105") v2.0 alpha thread] on GitHub! From 25f6331550bae1fb25685ffe3e6a3a525aee2a1a Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 13:15:30 +0100 Subject: [PATCH 04/32] Allow other style arguments on +grid-col --- website/_includes/_mixins.jade | 4 ++-- website/assets/css/_base/_grid.sass | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/website/_includes/_mixins.jade b/website/_includes/_mixins.jade index 902328906..94d84b4fe 100644 --- a/website/_includes/_mixins.jade +++ b/website/_includes/_mixins.jade @@ -448,8 +448,8 @@ mixin grid(...style) width - [string] "quarter", "third", "half", "two-thirds", "three-quarters" see $grid in assets/css/_variables.sass -mixin grid-col(width) - .o-grid__col(class="o-grid__col--#{width}")&attributes(attributes) +mixin grid-col(...style) + .o-grid__col(class=prefixArgs(style, "o-grid__col"))&attributes(attributes) block diff --git a/website/assets/css/_base/_grid.sass b/website/assets/css/_base/_grid.sass index 536c657db..16cf40f71 100644 --- a/website/assets/css/_base/_grid.sass +++ b/website/assets/css/_base/_grid.sass @@ -48,6 +48,9 @@ flex: 0 0 100% flex-flow: column wrap + &.o-grid__col--no-gutter + margin-top: 0 + // Fix overflow issue in old browsers & > * From ae454469789c537e5b9ce710883bee01d311e497 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 13:15:46 +0100 Subject: [PATCH 05/32] Remove comment --- website/_includes/_page-docs.jade | 3 --- 1 file changed, 3 deletions(-) diff --git a/website/_includes/_page-docs.jade b/website/_includes/_page-docs.jade index 703102487..6295491a6 100644 --- a/website/_includes/_page-docs.jade +++ b/website/_includes/_page-docs.jade @@ -25,9 +25,6 @@ main.o-main.o-main--sidebar.o-main--aside +button(gh("spacy", source), false, "secondary", "small").u-nowrap | Source #[+icon("code", 14)] - //-if ALPHA - //- +alpha-info - if IS_MODELS include _page_models else From 74dd0ee2c2418f26a263773ded2171ac2eaf44da Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 13:16:06 +0100 Subject: [PATCH 06/32] Prevent responsive tables form scrolling vertically --- website/assets/css/_components/_tables.sass | 1 + 1 file changed, 1 insertion(+) diff --git a/website/assets/css/_components/_tables.sass b/website/assets/css/_components/_tables.sass index 021b9521a..99ae998ff 100644 --- a/website/assets/css/_components/_tables.sass +++ b/website/assets/css/_components/_tables.sass @@ -51,6 +51,7 @@ @include scroll-shadow-base($color-front) display: inline-block overflow-x: auto + overflow-y: hidden width: auto -webkit-overflow-scrolling: touch From df149455f9b2c8acb371a0fb96acfae982565173 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 13:16:20 +0100 Subject: [PATCH 07/32] Don't ever wrap navigation bar contents --- website/assets/css/_components/_navigation.sass | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/assets/css/_components/_navigation.sass b/website/assets/css/_components/_navigation.sass index 1543de5fb..2f1cfb6e3 100644 --- a/website/assets/css/_components/_navigation.sass +++ b/website/assets/css/_components/_navigation.sass @@ -8,7 +8,7 @@ align-items: center display: flex justify-content: space-between - flex-flow: row wrap + flex-flow: row nowrap padding: 0 2rem 0 1rem z-index: 30 width: 100% From 5453821a9f93390c3cefbc4d976aad823594ff7c Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 13:53:49 +0100 Subject: [PATCH 08/32] Update NER annotation scheme Add note on training data sources and include coarse-grained Wikipedia scheme --- spacy/glossary.py | 12 +++++- website/api/_annotation/_named-entities.jade | 40 ++++++++++++++++++-- website/usage/_install/_changelog.jade | 2 +- 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/spacy/glossary.py b/spacy/glossary.py index 78e61f8a7..c17cb7467 100644 --- a/spacy/glossary.py +++ b/spacy/glossary.py @@ -300,5 +300,15 @@ GLOSSARY = { 'MONEY': 'Monetary values, including unit', 'QUANTITY': 'Measurements, as of weight or distance', 'ORDINAL': '"first", "second", etc.', - 'CARDINAL': 'Numerals that do not fall under another type' + 'CARDINAL': 'Numerals that do not fall under another type', + + + # Named Entity Recognition + # Wikipedia + # http://www.sciencedirect.com/science/article/pii/S0004370212000276 + # https://pdfs.semanticscholar.org/5744/578cc243d92287f47448870bb426c66cc941.pdf + + 'PER': 'Named person or family.', + 'MISC': ('Miscellaneous entities, e.g. events, nationalities, ' + 'products or works of art'), } diff --git a/website/api/_annotation/_named-entities.jade b/website/api/_annotation/_named-entities.jade index 93e705c72..4cc8a707f 100644 --- a/website/api/_annotation/_named-entities.jade +++ b/website/api/_annotation/_named-entities.jade @@ -1,6 +1,11 @@ //- 💫 DOCS > API > ANNOTATION > NAMED ENTITIES -+table([ "Type", "Description" ]) +p + | Models trained on the + | #[+a("https://catalog.ldc.upenn.edu/ldc2013t19") OntoNotes 5] corpus + | support the following entity types: + ++table(["Type", "Description"]) +row +cell #[code PERSON] +cell People, including fictional. @@ -45,9 +50,6 @@ +cell #[code LANGUAGE] +cell Any named language. -p The following values are also annotated in a style similar to names: - -+table([ "Type", "Description" ]) +row +cell #[code DATE] +cell Absolute or relative dates or periods. @@ -75,3 +77,33 @@ p The following values are also annotated in a style similar to names: +row +cell #[code CARDINAL] +cell Numerals that do not fall under another type. + ++h(4, "ner-wikipedia-scheme") Wikipedia scheme + +p + | Models trained on Wikipedia corpus + | (#[+a("http://www.sciencedirect.com/science/article/pii/S0004370212000276") Nothman et al., 2013]) + | use a less fine-grained NER annotation scheme and recognise the + | following entities: + ++table(["Type", "Description"]) + +row + +cell #[code PER] + +cell Named person or family. + + +row + +cell #[code LOC] + +cell + | Name of politically or geographically defined location (cities, + | provinces, countries, international regions, bodies of water, + | mountains). + + +row + +cell #[code ORG] + +cell Named corporate, governmental, or other organizational entity. + + +row + +cell #[code MISC] + +cell + | Miscellaneous entities, e.g. events, nationalities, products or + | works of art. diff --git a/website/usage/_install/_changelog.jade b/website/usage/_install/_changelog.jade index e966b6695..7b802ce63 100644 --- a/website/usage/_install/_changelog.jade +++ b/website/usage/_install/_changelog.jade @@ -3,7 +3,7 @@ +h(2, "changelog") Changelog +button(gh("spacy") + "/releases", false, "secondary", "small").u-float-right.u-nowrap View releases -div(data-tpl="changelog" data-tpl-key="error") +div(data-tpl="changelog" data-tpl-key="error" style="display: none") +infobox | Unable to load changelog from GitHub. Please see the | #[+a(gh("spacy") + "/releases") releases page] instead. From 41d0f1665a8ce59bf949bd4df2c3e42b41f7cf09 Mon Sep 17 00:00:00 2001 From: Explosion Bot Date: Mon, 30 Oct 2017 16:07:50 +0100 Subject: [PATCH 09/32] Fix add_attrs for cluster --- spacy/lexeme.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index a64e394c3..10c934ba4 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -76,6 +76,8 @@ cdef class Lexeme: for attr, value in attrs.items(): if attr == PROB: self.c.prob = value + elif attr == CLUSTER: + self.c.cluster = int(value) elif isinstance(value, int) or isinstance(value, long): Lexeme.set_struct_attr(self.c, attr, value) else: From ab5d5ed8808dd7809e58723cce2c53e6cf0ffea8 Mon Sep 17 00:00:00 2001 From: Explosion Bot Date: Mon, 30 Oct 2017 16:08:09 +0100 Subject: [PATCH 10/32] Fix vectors.add() --- spacy/vectors.pyx | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index d6b59401e..86c8450ce 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -56,10 +56,11 @@ cdef class Vectors: self.i = 0 self.key2row = {} self.keys = numpy.zeros((self.data.shape[0],), dtype='uint64') - for i, string in enumerate(self.strings): - if i >= self.data.shape[0]: - break - self.add(self.strings[string], vector=self.data[i]) + if data is not None: + for i, string in enumerate(self.strings): + if i >= self.data.shape[0]: + break + self.add(self.strings[string], vector=self.data[i]) def __reduce__(self): return (Vectors, (self.strings, self.data)) @@ -124,25 +125,22 @@ cdef class Vectors: vector (numpy.ndarray / None): A vector to add for the key. row (int / None): The row-number of a vector to map the key to. """ - if row is not None and vector is not None: - raise ValueError("Only one of 'row' and 'vector' may be set") if isinstance(key, basestring_): key = self.strings.add(key) - if key in self.key2row and vector is not None: + if key in self.key2row and row is None: row = self.key2row[key] elif key in self.key2row and row is not None: self.key2row[key] = row - elif key not in self.key2row: - if row is not None: - self.key2row[key] = row - else: - self.key2row[key] = self.i - row = self.i - if row >= self.keys.shape[0]: - self.keys.resize((row*2,)) - self.data.resize((row*2, self.data.shape[1])) - self.keys[self.i] = key + elif row is None: + row = self.i self.i += 1 + if row >= self.keys.shape[0]: + self.keys.resize((row*2,)) + self.data.resize((row*2, self.data.shape[1])) + self.keys[self.i] = key + + self.key2row[key] = row + self.keys[row] = key if vector is not None: self.data[row] = vector return row From 7b56b2f04bb14ae02646921010743f5bfd759f48 Mon Sep 17 00:00:00 2001 From: Explosion Bot Date: Mon, 30 Oct 2017 16:08:50 +0100 Subject: [PATCH 11/32] Add Vocab.cfg attr, to hold stuff like oov probs --- spacy/vocab.pxd | 1 + spacy/vocab.pyx | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/spacy/vocab.pxd b/spacy/vocab.pxd index 8005cbf06..b12bccf38 100644 --- a/spacy/vocab.pxd +++ b/spacy/vocab.pxd @@ -32,6 +32,7 @@ cdef class Vocab: cdef readonly int length cdef public object data_dir cdef public object lex_attr_getters + cdef public object cfg cdef const LexemeC* get(self, Pool mem, unicode string) except NULL cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 8b09d7ee7..937d4b69d 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -27,7 +27,7 @@ cdef class Vocab: C-data that is shared between `Doc` objects. """ def __init__(self, lex_attr_getters=None, tag_map=None, lemmatizer=None, - strings=tuple(), **deprecated_kwargs): + strings=tuple(), oov_prob=-20., **deprecated_kwargs): """Create the vocabulary. lex_attr_getters (dict): A dictionary mapping attribute IDs to @@ -43,6 +43,7 @@ cdef class Vocab: tag_map = tag_map if tag_map is not None else {} if lemmatizer in (None, True, False): lemmatizer = Lemmatizer({}, {}, {}) + self.cfg = {'oov_prob': oov_prob} self.mem = Pool() self._by_hash = PreshMap() self._by_orth = PreshMap() From aa64031751db9c358d5dc93ac2f112eaefd71829 Mon Sep 17 00:00:00 2001 From: Explosion Bot Date: Mon, 30 Oct 2017 16:09:04 +0100 Subject: [PATCH 12/32] Fix clear_vectors() method on Vocab --- spacy/vocab.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 937d4b69d..160f0d5bd 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -239,13 +239,13 @@ cdef class Vocab: def vectors_length(self): return self.vectors.data.shape[1] - def clear_vectors(self, new_dim=None): + def clear_vectors(self, width=None): """Drop the current vector table. Because all vectors must be the same width, you have to call this to change the size of the vectors. """ - if new_dim is None: - new_dim = self.vectors.data.shape[1] - self.vectors = Vectors(self.strings, width=new_dim) + if width is None: + width = self.vectors.data.shape[1] + self.vectors = Vectors(self.strings, width=width) def get_vector(self, orth): """Retrieve a vector for a word in the vocabulary. Words can be looked From 0fc1209421eeeaa04d19152d757091880476e8aa Mon Sep 17 00:00:00 2001 From: Explosion Bot Date: Mon, 30 Oct 2017 16:14:50 +0100 Subject: [PATCH 13/32] Wire up new vocab command --- spacy/__main__.py | 3 ++- spacy/cli/__init__.py | 1 + spacy/cli/vocab.py | 49 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 spacy/cli/vocab.py diff --git a/spacy/__main__.py b/spacy/__main__.py index 48460c9e3..770ce5296 100644 --- a/spacy/__main__.py +++ b/spacy/__main__.py @@ -7,7 +7,7 @@ if __name__ == '__main__': import plac import sys from spacy.cli import download, link, info, package, train, convert, model - from spacy.cli import profile, evaluate, validate + from spacy.cli import vocab, profile, evaluate, validate from spacy.util import prints commands = { @@ -19,6 +19,7 @@ if __name__ == '__main__': 'convert': convert, 'package': package, 'model': model, + 'model': vocab, 'profile': profile, 'validate': validate } diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py index 2595dcc03..b807480ca 100644 --- a/spacy/cli/__init__.py +++ b/spacy/cli/__init__.py @@ -7,4 +7,5 @@ from .train import train from .evaluate import evaluate from .convert import convert from .model import model +from .vocab import make_vocab as vocab from .validate import validate diff --git a/spacy/cli/vocab.py b/spacy/cli/vocab.py new file mode 100644 index 000000000..4424955d1 --- /dev/null +++ b/spacy/cli/vocab.py @@ -0,0 +1,49 @@ +'''Compile a vocabulary from a lexicon jsonl file and word vectors.''' +# coding: utf8 +from __future__ import unicode_literals + +import plac +import json +import spacy +import numpy +from spacy.util import ensure_path + + +@plac.annotations( + lang=("model language", "positional", None, str), + output_dir=("output directory to store model in", "positional", None, str), + lexemes_loc=("location of JSONL-formatted lexical data", "positional", + None, str), + vectors_loc=("location of vectors data, as numpy .npz (optional)", + "positional", None, str), + version=("Model version", "option", "V", str), + meta_path=("Optional path to meta.json. All relevant properties will be " + "overwritten.", "option", "m", Path)) + +def make_vocab(lang, output_dir, lexemes_loc, vectors_loc=None): + out_dir = ensure_path(output_dir) + jsonl_loc = ensure_path(lexemes_loc) + nlp = spacy.blank(lang) + for word in nlp.vocab: + word.rank = 0 + with jsonl_loc.open() as file_: + for line in file_: + if line.strip(): + attrs = json.loads(line) + if 'settings' in attrs: + nlp.vocab.cfg.update(attrs['settings']) + else: + lex = nlp.vocab[attrs['orth']] + lex.set_attrs(**attrs) + assert lex.rank == attrs['id'] + if vectors_loc is not None: + vector_data = numpy.load(open(vectors_loc, 'rb')) + nlp.vocab.clear_vectors(width=vector_data.shape[1]) + added = 0 + for word in nlp.vocab: + if word.rank: + nlp.vocab.vectors.add(word.orth_, row=word.rank, + vector=vector_data[word.rank]) + added += 1 + nlp.to_disk(out_dir) + return nlp From b46bdce8d28a2773284cdab02956b7d494e2c1e1 Mon Sep 17 00:00:00 2001 From: Explosion Bot Date: Mon, 30 Oct 2017 16:18:10 +0100 Subject: [PATCH 14/32] Add missing import --- spacy/cli/vocab.py | 1 + 1 file changed, 1 insertion(+) diff --git a/spacy/cli/vocab.py b/spacy/cli/vocab.py index 4424955d1..3b4d0a0b8 100644 --- a/spacy/cli/vocab.py +++ b/spacy/cli/vocab.py @@ -2,6 +2,7 @@ # coding: utf8 from __future__ import unicode_literals +from pathlib import Path import plac import json import spacy From 05a1dd570ec3a0d3e3493c9f183eb74fe57a053a Mon Sep 17 00:00:00 2001 From: Explosion Bot Date: Mon, 30 Oct 2017 16:19:22 +0100 Subject: [PATCH 15/32] Fix vocab script --- spacy/cli/vocab.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/spacy/cli/vocab.py b/spacy/cli/vocab.py index 3b4d0a0b8..c1bab825c 100644 --- a/spacy/cli/vocab.py +++ b/spacy/cli/vocab.py @@ -18,10 +18,8 @@ from spacy.util import ensure_path vectors_loc=("location of vectors data, as numpy .npz (optional)", "positional", None, str), version=("Model version", "option", "V", str), - meta_path=("Optional path to meta.json. All relevant properties will be " - "overwritten.", "option", "m", Path)) - -def make_vocab(lang, output_dir, lexemes_loc, vectors_loc=None): +) +def make_vocab(lang, output_dir, lexemes_loc, vectors_loc=None, version=None): out_dir = ensure_path(output_dir) jsonl_loc = ensure_path(lexemes_loc) nlp = spacy.blank(lang) From d0cf12c8c7873feb0cc5ae441121edd1aad58d3a Mon Sep 17 00:00:00 2001 From: Explosion Bot Date: Mon, 30 Oct 2017 16:22:03 +0100 Subject: [PATCH 16/32] Fix off-by-one error in vectors --- spacy/vectors.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index 86c8450ce..368b73866 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -137,7 +137,7 @@ cdef class Vectors: if row >= self.keys.shape[0]: self.keys.resize((row*2,)) self.data.resize((row*2, self.data.shape[1])) - self.keys[self.i] = key + self.keys[row] = key self.key2row[key] = row self.keys[row] = key From e026b29ea92c22de3ff11a56d6648ff404138c80 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 30 Oct 2017 17:59:43 +0100 Subject: [PATCH 17/32] Add prune_vectors method to Vocab --- spacy/vocab.pyx | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 160f0d5bd..ff6c5b844 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -5,6 +5,7 @@ import numpy import dill from collections import OrderedDict +from thinc.neural.util import get_array_module from .lexeme cimport EMPTY_LEXEME from .lexeme cimport Lexeme from .strings cimport hash_string @@ -247,6 +248,44 @@ cdef class Vocab: width = self.vectors.data.shape[1] self.vectors = Vectors(self.strings, width=width) + def prune_vectors(self, nr_row, batch_size=1024): + """Reduce the current vector table to `nr_row` unique entries. Words + mapped to the discarded vectors will be remapped to the closest vector + among those remaining. + + For example, suppose the original table had vectors for the words: + ['sat', 'cat', 'feline', 'reclined']. If we prune the vector table to, + two rows, we would discard the vectors for 'feline' and 'reclined'. + These words would then be remapped to the closest remaining vector + -- so "feline" would have the same vector as "cat", and "reclined" + would have the same vector as "sat". + + The similarities are judged by cosine. The original vectors may + be large, so the cosines are calculated in minibatches, to reduce + memory usage. + """ + xp = get_array_module(self.vectors.data) + # Work in batches, to avoid memory problems. + keep = self.vectors.data[:nr_row] + toss = self.vectors.data[nr_row:] + # Normalize the vectors, so cosine similarity is just dot product. + # Note we can't modify the ones we're keeping in-place... + keep = keep / (xp.linalg.norm(keep)+1e-8) + keep = xp.ascontiguousarray(keep.T) + neighbours = xp.zeros((toss.shape[0],), dtype='i') + for i in range(0, toss.shape[0], batch_size): + batch = toss[i : i+batch_size] + batch /= xp.linalg.norm(batch)+1e-8 + neighbours[i:i+batch_size] = xp.dot(batch, keep).argmax(axis=1) + for lex in self: + # If we're losing the vector for this word, map it to the nearest + # vector we're keeping. + if lex.rank >= nr_row: + lex.rank = neighbours[lex.rank-nr_row] + self.vectors.add(lex.orth, row=lex.rank) + # Make copy, to encourage the original table to be garbage collected. + self.vectors.data = xp.ascontiguousarray(self.vectors.data[:nr_row]) + def get_vector(self, orth): """Retrieve a vector for a word in the vocabulary. Words can be looked up by string or int ID. If no vectors data is loaded, ValueError is From e98451b5f7dff6a4e91e9a47cd37cdadf24b4e47 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 30 Oct 2017 18:00:10 +0100 Subject: [PATCH 18/32] Add -prune-vectors argument to spacy.cly.train --- spacy/cli/train.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index fb96e6c05..34117db22 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -32,6 +32,7 @@ numpy.random.seed(0) n_sents=("number of sentences", "option", "ns", int), use_gpu=("Use GPU", "option", "g", int), vectors=("Model to load vectors from", "option", "v"), + vectors_limit=("Truncate to N vectors (requires -v)", "option", None, int), no_tagger=("Don't train tagger", "flag", "T", bool), no_parser=("Don't train parser", "flag", "P", bool), no_entities=("Don't train NER", "flag", "N", bool), @@ -40,9 +41,9 @@ numpy.random.seed(0) meta_path=("Optional path to meta.json. All relevant properties will be " "overwritten.", "option", "m", Path)) def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0, - use_gpu=-1, vectors=None, no_tagger=False, no_parser=False, - no_entities=False, gold_preproc=False, version="0.0.0", - meta_path=None): + use_gpu=-1, vectors=None, vectors_limit=None, no_tagger=False, + no_parser=False, no_entities=False, gold_preproc=False, + version="0.0.0", meta_path=None): """ Train a model. Expects data in spaCy's JSON format. """ @@ -94,6 +95,8 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0, nlp.meta.update(meta) if vectors: util.load_model(vectors, vocab=nlp.vocab) + if vectors_limit is not None: + nlp.vocab.prune_vectors(vectors_limit) for name in pipeline: nlp.add_pipe(nlp.create_pipe(name), name=name) optimizer = nlp.begin_training(lambda: corpus.train_tuples, device=use_gpu) From 98c35d2585c548e6ff2c25a537cfd81c25482283 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 18:38:41 +0100 Subject: [PATCH 19/32] Fix spacy vocab command --- spacy/__main__.py | 2 +- spacy/cli/vocab.py | 38 ++++++++++++++++++++++---------------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/spacy/__main__.py b/spacy/__main__.py index 770ce5296..f4b5e6715 100644 --- a/spacy/__main__.py +++ b/spacy/__main__.py @@ -19,7 +19,7 @@ if __name__ == '__main__': 'convert': convert, 'package': package, 'model': model, - 'model': vocab, + 'vocab': vocab, 'profile': profile, 'validate': validate } diff --git a/spacy/cli/vocab.py b/spacy/cli/vocab.py index c1bab825c..d05eff3f0 100644 --- a/spacy/cli/vocab.py +++ b/spacy/cli/vocab.py @@ -1,31 +1,33 @@ -'''Compile a vocabulary from a lexicon jsonl file and word vectors.''' # coding: utf8 from __future__ import unicode_literals -from pathlib import Path import plac import json import spacy import numpy -from spacy.util import ensure_path +from pathlib import Path + +from ..util import prints, ensure_path @plac.annotations( lang=("model language", "positional", None, str), - output_dir=("output directory to store model in", "positional", None, str), + output_dir=("model output directory", "positional", None, Path), lexemes_loc=("location of JSONL-formatted lexical data", "positional", - None, str), - vectors_loc=("location of vectors data, as numpy .npz (optional)", - "positional", None, str), - version=("Model version", "option", "V", str), -) -def make_vocab(lang, output_dir, lexemes_loc, vectors_loc=None, version=None): - out_dir = ensure_path(output_dir) - jsonl_loc = ensure_path(lexemes_loc) + None, Path), + vectors_loc=("optional: location of vectors data, as numpy .npz", + "positional", None, str)) +def make_vocab(cmd, lang, output_dir, lexemes_loc, vectors_loc=None): + """Compile a vocabulary from a lexicon jsonl file and word vectors.""" + if not lexemes_loc.exists(): + prints(lexemes_loc, title="Can't find lexical data", exits=1) + vectors_loc = ensure_path(vectors_loc) nlp = spacy.blank(lang) for word in nlp.vocab: word.rank = 0 - with jsonl_loc.open() as file_: + lex_added = 0 + vec_added = 0 + with lexemes_loc.open() as file_: for line in file_: if line.strip(): attrs = json.loads(line) @@ -35,14 +37,18 @@ def make_vocab(lang, output_dir, lexemes_loc, vectors_loc=None, version=None): lex = nlp.vocab[attrs['orth']] lex.set_attrs(**attrs) assert lex.rank == attrs['id'] + lex_added += 1 if vectors_loc is not None: vector_data = numpy.load(open(vectors_loc, 'rb')) nlp.vocab.clear_vectors(width=vector_data.shape[1]) - added = 0 for word in nlp.vocab: if word.rank: nlp.vocab.vectors.add(word.orth_, row=word.rank, vector=vector_data[word.rank]) - added += 1 - nlp.to_disk(out_dir) + vec_added += 1 + if not output_dir.exists(): + output_dir.mkdir() + nlp.to_disk(output_dir) + prints("{} entries, {} vectors".format(lex_added, vec_added), output_dir, + title="Sucessfully compiled vocab and vectors, and saved model") return nlp From ce98fa79341806d5ef87c764350013f2b3722ef9 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 18:38:55 +0100 Subject: [PATCH 20/32] Fix formatting --- spacy/cli/evaluate.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py index d4d54d8aa..d7695fd73 100644 --- a/spacy/cli/evaluate.py +++ b/spacy/cli/evaluate.py @@ -17,14 +17,14 @@ numpy.random.seed(0) @plac.annotations( - model=("Model name or path", "positional", None, str), - data_path=("Location of JSON-formatted evaluation data", "positional", + model=("model name or path", "positional", None, str), + data_path=("location of JSON-formatted evaluation data", "positional", None, str), - gold_preproc=("Use gold preprocessing", "flag", "G", bool), - gpu_id=("Use GPU", "option", "g", int), - displacy_path=("Directory to output rendered parses as HTML", "option", + gold_preproc=("use gold preprocessing", "flag", "G", bool), + gpu_id=("use GPU", "option", "g", int), + displacy_path=("directory to output rendered parses as HTML", "option", "dp", str), - displacy_limit=("Limit of parses to render as HTML", "option", "dl", int)) + displacy_limit=("limit of parses to render as HTML", "option", "dl", int)) def evaluate(cmd, model, data_path, gpu_id=-1, gold_preproc=False, displacy_path=None, displacy_limit=25): """ From abf8aa05d39688a69afd6c389ab517263982572e Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 18:39:38 +0100 Subject: [PATCH 21/32] Populate --create-meta defaults from file if available If meta.json is found in directory and user chooses to overwrite it, show existing data as defaults. --- spacy/cli/package.py | 40 +++++++++++++++++--------------- website/api/_top-level/_cli.jade | 18 +++++++------- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/spacy/cli/package.py b/spacy/cli/package.py index d1984fe65..3157ba99d 100644 --- a/spacy/cli/package.py +++ b/spacy/cli/package.py @@ -16,10 +16,11 @@ from .. import about input_dir=("directory with model data", "positional", None, str), output_dir=("output parent directory", "positional", None, str), meta_path=("path to meta.json", "option", "m", str), - create_meta=("create meta.json, even if one exists in directory", "flag", - "c", bool), - force=("force overwriting of existing folder in output directory", "flag", - "f", bool)) + create_meta=("create meta.json, even if one exists in directory – if " + "existing meta is found, entries are shown as defaults in " + "the command line prompt", "flag", "c", bool), + force=("force overwriting of existing model directory in output directory", + "flag", "f", bool)) def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False, force=False): """ @@ -41,13 +42,13 @@ def package(cmd, input_dir, output_dir, meta_path=None, create_meta=False, template_manifest = get_template('MANIFEST.in') template_init = get_template('xx_model_name/__init__.py') meta_path = meta_path or input_path / 'meta.json' - if not create_meta and meta_path.is_file(): - prints(meta_path, title="Reading meta.json from file") + if meta_path.is_file(): meta = util.read_json(meta_path) - else: - meta = generate_meta(input_dir) + if not create_meta: # only print this if user doesn't want to overwrite + prints(meta_path, title="Loaded meta.json from file") + else: + meta = generate_meta(input_dir, meta) meta = validate_meta(meta, ['lang', 'name', 'version']) - model_name = meta['lang'] + '_' + meta['name'] model_name_v = model_name + '-' + meta['version'] main_path = output_path / model_name_v @@ -82,18 +83,19 @@ def create_file(file_path, contents): file_path.open('w', encoding='utf-8').write(contents) -def generate_meta(model_path): - meta = {} - settings = [('lang', 'Model language', 'en'), - ('name', 'Model name', 'model'), - ('version', 'Model version', '0.0.0'), +def generate_meta(model_path, existing_meta): + meta = existing_meta or {} + settings = [('lang', 'Model language', meta.get('lang', 'en')), + ('name', 'Model name', meta.get('name', 'model')), + ('version', 'Model version', meta.get('version', '0.0.0')), ('spacy_version', 'Required spaCy version', '>=%s,<3.0.0' % about.__version__), - ('description', 'Model description', False), - ('author', 'Author', False), - ('email', 'Author email', False), - ('url', 'Author website', False), - ('license', 'License', 'CC BY-NC 3.0')] + ('description', 'Model description', + meta.get('description', False)), + ('author', 'Author', meta.get('author', False)), + ('email', 'Author email', meta.get('email', False)), + ('url', 'Author website', meta.get('url', False)), + ('license', 'License', meta.get('license', 'CC BY-SA 3.0'))] nlp = util.load_model_from_path(Path(model_path)) meta['pipeline'] = nlp.pipe_names meta['vectors'] = {'width': nlp.vocab.vectors_length, diff --git a/website/api/_top-level/_cli.jade b/website/api/_top-level/_cli.jade index f19eb43d0..aa13abc12 100644 --- a/website/api/_top-level/_cli.jade +++ b/website/api/_top-level/_cli.jade @@ -453,10 +453,11 @@ p p | Generate a #[+a("/usage/training#models-generating") model Python package] | from an existing model data directory. All data files are copied over. - | If the path to a meta.json is supplied, or a meta.json is found in the - | input directory, this file is used. Otherwise, the data can be entered - | directly from the command line. The required file templates are downloaded - | from #[+src(gh("spacy-dev-resources", "templates/model")) GitHub] to make + | If the path to a #[code meta.json] is supplied, or a #[code meta.json] is + | found in the input directory, this file is used. Otherwise, the data can + | be entered directly from the command line. The required file templates + | are downloaded from + | #[+src(gh("spacy-dev-resources", "templates/model")) GitHub] to make | sure you're always using the latest versions. This means you need to be | connected to the internet to use this command. @@ -477,15 +478,16 @@ p +row +cell #[code --meta-path], #[code -m] +cell option - +cell #[+tag-new(2)] Path to meta.json file (optional). + +cell #[+tag-new(2)] Path to #[code meta.json] file (optional). +row +cell #[code --create-meta], #[code -c] +cell flag +cell - | #[+tag-new(2)] Create a meta.json file on the command line, even - | if one already exists in the directory. - + | #[+tag-new(2)] Create a #[code meta.json] file on the command + | line, even if one already exists in the directory. If an + | existing file is found, its entries will be shown as the defaults + | in the command line prompt. +row +cell #[code --force], #[code -f] +cell flag From 8e022942413f65a7b28ea45fa92ba687db76d1f9 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 18:39:48 +0100 Subject: [PATCH 22/32] Add vectors to Language.meta --- spacy/language.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/language.py b/spacy/language.py index 05546cde4..1ce74b265 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -154,6 +154,8 @@ class Language(object): self._meta.setdefault('email', '') self._meta.setdefault('url', '') self._meta.setdefault('license', '') + self._meta['vectors'] = {'width': self.vocab.vectors_length, + 'entries': len(self.vocab.vectors)} self._meta['pipeline'] = self.pipe_names return self._meta From 559854205506ef896a3effe63206a73012d7d0d0 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 18:58:55 +0100 Subject: [PATCH 23/32] Add link --- website/api/_top-level/_spacy.jade | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/website/api/_top-level/_spacy.jade b/website/api/_top-level/_spacy.jade index 81612c5e6..c6b342011 100644 --- a/website/api/_top-level/_spacy.jade +++ b/website/api/_top-level/_spacy.jade @@ -85,7 +85,9 @@ p +row +cell #[code name] +cell unicode - +cell ISO code of the language class to load. + +cell + | #[+a("https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes") ISO code] + | of the language class to load. +row +cell #[code disable] From 12343e23fda04f1a607e3dbd67a1bae45275f09e Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 18:59:08 +0100 Subject: [PATCH 24/32] Update CLI docs and document vocab command --- website/api/_top-level/_cli.jade | 96 +++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/website/api/_top-level/_cli.jade b/website/api/_top-level/_cli.jade index aa13abc12..ec2c1737a 100644 --- a/website/api/_top-level/_cli.jade +++ b/website/api/_top-level/_cli.jade @@ -34,6 +34,13 @@ p +cell flag +cell Show help message and available arguments. + +row("foot") + +cell creates + +cell directory, symlink + +cell + | The installed model package in your #[code site-packages] + | directory and a shortcut link as a symlink in #[code spacy/data]. + +aside("Downloading best practices") | The #[code download] command is mostly intended as a convenient, | interactive wrapper – it performs compatibility checks and prints @@ -86,6 +93,13 @@ p +cell flag +cell Show help message and available arguments. + +row("foot") + +cell creates + +cell symlink + +cell + | A shortcut link of the given name as a symlink in + | #[code spacy/data]. + +h(3, "info") Info p @@ -113,6 +127,11 @@ p +cell flag +cell Show help message and available arguments. + +row("foot") + +cell prints + +cell #[code stdout] + +cell Information about your spaCy installation. + +h(3, "validate") Validate +tag-new(2) @@ -129,6 +148,12 @@ p +code(false, "bash", "$"). spacy validate ++table(["Argument", "Type", "Description"]) + +row("foot") + +cell prints + +cell #[code stdout] + +cell Details about the compatibility of your installed models. + +h(3, "convert") Convert p @@ -172,6 +197,11 @@ p +cell flag +cell Show help message and available arguments. + +row("foot") + +cell creates + +cell JSON + +cell Data in spaCy's #[+a("/api/annotation#json-input") JSON format]. + p The following converters are available: +table(["ID", "Description"]) @@ -286,6 +316,11 @@ p +cell flag +cell Show help message and available arguments. + +row("foot") + +cell creates + +cell model, pickle + +cell A spaCy model on each epoch, and a final #[code .pickle] file. + +h(4, "train-hyperparams") Environment variables for hyperparameters +tag-new(2) @@ -395,6 +430,47 @@ p +cell Gradient L2 norm constraint. +cell #[code 1.0] ++h(3, "vocab") Vocab + +tag-new(2) + +p + | Compile a vocabulary from a #[+a("#") lexicon JSONL] file and optional + | word vectors. Will save out a valid spaCy model that you can load via + | #[+api("spacy#load") #[code spacy.load]] or package using the + | #[+api("cli#package") #[code package]] command. + ++code(false, "bash", "$"). + spacy vocab [lang] [output_dir] [lexemes_loc] [vectors_loc] + ++table(["Argument", "Type", "Description"]) + +row + +cell #[code lang] + +cell positional + +cell + | Model language + | #[+a("https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes") ISO code], + | e.g. #[code en]. + + +row + +cell #[code output_dir] + +cell positional + +cell Model output directory. Will be created if it doesn't exist. + + +row + +cell #[code lexemes_loc] + +cell positional + +cell Location of lexical data in spaCy's #[+a("#") JSONL format]. + + +row + +cell #[code vectors_loc] + +cell positional + +cell Optional location of vectors data as numpy #[code .npz] file. + + +row("foot") + +cell creates + +cell model + +cell A spaCy model containing the vocab and vectors. + +h(3, "evaluate") Evaluate +tag-new(2) @@ -447,6 +523,11 @@ p +cell flag +cell Use gold preprocessing. + +row("foot") + +cell prints / creates + +cell #[code stdout], HTML + +cell Training results and optional displaCy visualizations. + +h(3, "package") Package @@ -459,11 +540,19 @@ p | are downloaded from | #[+src(gh("spacy-dev-resources", "templates/model")) GitHub] to make | sure you're always using the latest versions. This means you need to be - | connected to the internet to use this command. + | connected to the internet to use this command. After packaging, you + | can run #[code python setup.py sdist] from the newly created directory + | to turn your model into an installable archive file. +code(false, "bash", "$", false, false, true). spacy package [input_dir] [output_dir] [--meta-path] [--create-meta] [--force] ++aside-code("Example", "bash"). + spacy package /input /output + cd /output/en_model-0.0.0 + python setup.py sdist + pip install dist/en_model-0.0.0.tar.gz + +table(["Argument", "Type", "Description"]) +row +cell #[code input_dir] @@ -497,3 +586,8 @@ p +cell #[code --help], #[code -h] +cell flag +cell Show help message and available arguments. + + +row("foot") + +cell creates + +cell directory + +cell A Python package containing the spaCy model. From ec657c1ddcdae63d2cd12a14a5c3536b44841555 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 19:35:41 +0100 Subject: [PATCH 25/32] Update vocab docs and document Vocab.prune_vectors --- spacy/vocab.pyx | 12 +++++++++- website/api/vocab.jade | 51 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index ff6c5b844..23254718f 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -252,7 +252,7 @@ cdef class Vocab: """Reduce the current vector table to `nr_row` unique entries. Words mapped to the discarded vectors will be remapped to the closest vector among those remaining. - + For example, suppose the original table had vectors for the words: ['sat', 'cat', 'feline', 'reclined']. If we prune the vector table to, two rows, we would discard the vectors for 'feline' and 'reclined'. @@ -263,6 +263,15 @@ cdef class Vocab: The similarities are judged by cosine. The original vectors may be large, so the cosines are calculated in minibatches, to reduce memory usage. + + nr_row (int): The number of rows to keep in the vector table. + batch_size (int): Batch of vectors for calculating the similarities. + Larger batch sizes might be faster, while temporarily requiring + more memory. + RETURNS (dict): A dictionary keyed by removed words mapped to + `(string, score)` tuples, where `string` is the entry the removed + word was mapped to, and `score` the similarity score between the + two words. """ xp = get_array_module(self.vectors.data) # Work in batches, to avoid memory problems. @@ -285,6 +294,7 @@ cdef class Vocab: self.vectors.add(lex.orth, row=lex.rank) # Make copy, to encourage the original table to be garbage collected. self.vectors.data = xp.ascontiguousarray(self.vectors.data[:nr_row]) + # TODO: return new mapping def get_vector(self, orth): """Retrieve a vector for a word in the vocabulary. Words can be looked diff --git a/website/api/vocab.jade b/website/api/vocab.jade index 6faefc064..54dd4f691 100644 --- a/website/api/vocab.jade +++ b/website/api/vocab.jade @@ -162,7 +162,7 @@ p +cell int +cell The integer ID by which the flag value can be checked. -+h(2, "add_flag") Vocab.clear_vectors ++h(2, "clear_vectors") Vocab.clear_vectors +tag method +tag-new(2) @@ -181,7 +181,50 @@ p | Number of dimensions of the new vectors. If #[code None], size | is not changed. -+h(2, "add_flag") Vocab.get_vector ++h(2, "prune_vectors") Vocab.prune_vectors + +tag method + +tag-new(2) + +p + | Reduce the current vector table to #[code nr_row] unique entries. Words + | mapped to the discarded vectors will be remapped to the closest vector + | among those remaining. For example, suppose the original table had + | vectors for the words: + | #[code.u-break ['sat', 'cat', 'feline', 'reclined']]. If we prune the + | vector table to, two rows, we would discard the vectors for "feline" + | and "reclined". These words would then be remapped to the closest + | remaining vector – so "feline" would have the same vector as "cat", + | and "reclined" would have the same vector as "sat". The similarities are + | judged by cosine. The original vectors may be large, so the cosines are + | calculated in minibatches, to reduce memory usage. + ++aside-code("Example"). + nlp.vocab.prune_vectors(10000) + assert len(nlp.vocab.vectors) <= 1000 + ++table(["Name", "Type", "Description"]) + +row + +cell #[code nr_row] + +cell int + +cell The number of rows to keep in the vector table. + + +row + +cell #[code batch_size] + +cell int + +cell + | Batch of vectors for calculating the similarities. Larger batch + | sizes might be faster, while temporarily requiring more memory. + + +row("foot") + +cell returns + +cell dict + +cell + | A dictionary keyed by removed words mapped to + | #[code (string, score)] tuples, where #[code string] is the entry + | the removed word was mapped to, and #[code score] the similarity + | score between the two words. + ++h(2, "get_vector") Vocab.get_vector +tag method +tag-new(2) @@ -206,7 +249,7 @@ p | A word vector. Size and shape are determined by the | #[code Vocab.vectors] instance. -+h(2, "add_flag") Vocab.set_vector ++h(2, "set_vector") Vocab.set_vector +tag method +tag-new(2) @@ -228,7 +271,7 @@ p +cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']] +cell The vector to set. -+h(2, "add_flag") Vocab.has_vector ++h(2, "has_vector") Vocab.has_vector +tag method +tag-new(2) From 57534253e6f8a4de010341d033e66a65ba49ad99 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 19:39:26 +0100 Subject: [PATCH 26/32] Move CLI docs to own page --- website/_includes/_functions.jade | 3 --- website/api/_data.json | 11 +++++++++-- website/api/{_top-level/_cli.jade => cli.jade} | 11 ++++++++--- website/api/top-level.jade | 4 ---- 4 files changed, 17 insertions(+), 12 deletions(-) rename website/api/{_top-level/_cli.jade => cli.jade} (98%) diff --git a/website/_includes/_functions.jade b/website/_includes/_functions.jade index eb16d9659..39139cc58 100644 --- a/website/_includes/_functions.jade +++ b/website/_includes/_functions.jade @@ -41,9 +41,6 @@ - var comps = path.split('#'); - return "top-level#" + comps[0] + '.' + comps[1]; - } -- else if (path.startsWith('cli#')) { -- return "top-level#" + path.split('#')[1]; -- } - return path; - } diff --git a/website/api/_data.json b/website/api/_data.json index ba7997690..0be09b782 100644 --- a/website/api/_data.json +++ b/website/api/_data.json @@ -3,8 +3,10 @@ "Overview": { "Architecture": "./", "Annotation Specs": "annotation", + "Command Line": "cli", "Functions": "top-level" }, + "Containers": { "Doc": "doc", "Token": "token", @@ -45,14 +47,19 @@ } }, + "cli": { + "title": "Command Line Interface", + "teaser": "Download, train and package models, and debug spaCy.", + "source": "spacy/cli" + }, + "top-level": { "title": "Top-level Functions", "menu": { "spacy": "spacy", "displacy": "displacy", "Utility Functions": "util", - "Compatibility": "compat", - "Command Line": "cli" + "Compatibility": "compat" } }, diff --git a/website/api/_top-level/_cli.jade b/website/api/cli.jade similarity index 98% rename from website/api/_top-level/_cli.jade rename to website/api/cli.jade index ec2c1737a..cd1cb22fb 100644 --- a/website/api/_top-level/_cli.jade +++ b/website/api/cli.jade @@ -1,4 +1,6 @@ -//- 💫 DOCS > API > TOP-LEVEL > COMMAND LINE INTERFACE +//- 💫 DOCS > API > COMMAND LINE INTERFACE + +include ../_includes/_mixins p | As of v1.7.0, spaCy comes with new command line helpers to download and @@ -434,7 +436,8 @@ p +tag-new(2) p - | Compile a vocabulary from a #[+a("#") lexicon JSONL] file and optional + | Compile a vocabulary from a + | #[+a("/api/annotation#vocab-jsonl") lexicon JSONL] file and optional | word vectors. Will save out a valid spaCy model that you can load via | #[+api("spacy#load") #[code spacy.load]] or package using the | #[+api("cli#package") #[code package]] command. @@ -459,7 +462,9 @@ p +row +cell #[code lexemes_loc] +cell positional - +cell Location of lexical data in spaCy's #[+a("#") JSONL format]. + +cell + | Location of lexical data in spaCy's + | #[+a("/api/annotation#vocab-jsonl") JSONL format]. +row +cell #[code vectors_loc] diff --git a/website/api/top-level.jade b/website/api/top-level.jade index 46d2e8750..f16daae23 100644 --- a/website/api/top-level.jade +++ b/website/api/top-level.jade @@ -18,7 +18,3 @@ include ../_includes/_mixins +section("compat") +h(2, "compat", "spacy/compaty.py") Compatibility functions include _top-level/_compat - -+section("cli", "spacy/cli") - +h(2, "cli") Command line - include _top-level/_cli From 18dde7869aff327987e9e318542bc9567c03b3b1 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 19:40:05 +0100 Subject: [PATCH 27/32] Update training data docs and add vocab JSONL --- examples/training/vocab-data.jsonl | 500 +++++++++++++++++++++++++ website/api/_annotation/_training.jade | 56 +++ website/api/_data.json | 2 +- website/api/annotation.jade | 2 +- 4 files changed, 558 insertions(+), 2 deletions(-) create mode 100644 examples/training/vocab-data.jsonl diff --git a/examples/training/vocab-data.jsonl b/examples/training/vocab-data.jsonl new file mode 100644 index 000000000..4fae8fd65 --- /dev/null +++ b/examples/training/vocab-data.jsonl @@ -0,0 +1,500 @@ +{"lang": "en", "settings": {"oov_prob": -20.502029418945312}} +{"orth": ".", "id": 1, "lower": ".", "norm": ".", "shape": ".", "prefix": ".", "suffix": ".", "length": 1, "cluster": "8", "prob": -3.0678977966308594, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": ",", "id": 2, "lower": ",", "norm": ",", "shape": ",", "prefix": ",", "suffix": ",", "length": 1, "cluster": "4", "prob": -3.4549596309661865, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "the", "id": 3, "lower": "the", "norm": "the", "shape": "xxx", "prefix": "t", "suffix": "the", "length": 3, "cluster": "11", "prob": -3.528766632080078, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "I", "id": 4, "lower": "i", "norm": "I", "shape": "X", "prefix": "I", "suffix": "I", "length": 1, "cluster": "346", "prob": -3.791565179824829, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": true, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "to", "id": 5, "lower": "to", "norm": "to", "shape": "xx", "prefix": "t", "suffix": "to", "length": 2, "cluster": "12", "prob": -3.8560216426849365, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "a", "id": 6, "lower": "a", "norm": "a", "shape": "x", "prefix": "a", "suffix": "a", "length": 1, "cluster": "19", "prob": -3.92978835105896, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "and", "id": 7, "lower": "and", "norm": "and", "shape": "xxx", "prefix": "a", "suffix": "and", "length": 3, "cluster": "20", "prob": -4.113108158111572, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "of", "id": 8, "lower": "of", "norm": "of", "shape": "xx", "prefix": "o", "suffix": "of", "length": 2, "cluster": "28", "prob": -4.27587366104126, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "you", "id": 9, "lower": "you", "norm": "you", "shape": "xxx", "prefix": "y", "suffix": "you", "length": 3, "cluster": "602", "prob": -4.373791217803955, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "it", "id": 10, "lower": "it", "norm": "it", "shape": "xx", "prefix": "i", "suffix": "it", "length": 2, "cluster": "474", "prob": -4.388050079345703, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "is", "id": 11, "lower": "is", "norm": "is", "shape": "xx", "prefix": "i", "suffix": "is", "length": 2, "cluster": "762", "prob": -4.457748889923096, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "that", "id": 12, "lower": "that", "norm": "that", "shape": "xxxx", "prefix": "t", "suffix": "hat", "length": 4, "cluster": "84", "prob": -4.464504718780518, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "\n\n", "id": 0, "lower": "\n\n", "norm": "\n\n", "shape": "\n\n", "prefix": "\n", "suffix": "\n\n", "length": 2, "cluster": "0", "prob": -4.606560707092285, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "in", "id": 13, "lower": "in", "norm": "in", "shape": "xx", "prefix": "i", "suffix": "in", "length": 2, "cluster": "60", "prob": -4.619071960449219, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "'s", "id": 14, "lower": "'s", "norm": "'s", "shape": "'x", "prefix": "'", "suffix": "'s", "length": 2, "cluster": "52", "prob": -4.830559253692627, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "n't", "id": 15, "lower": "n't", "norm": "n't", "shape": "x'x", "prefix": "n", "suffix": "n't", "length": 3, "cluster": "74", "prob": -4.859938621520996, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "for", "id": 16, "lower": "for", "norm": "for", "shape": "xxx", "prefix": "f", "suffix": "for", "length": 3, "cluster": "508", "prob": -4.8801093101501465, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "\"", "id": 17, "lower": "\"", "norm": "\"", "shape": "\"", "prefix": "\"", "suffix": "\"", "length": 1, "cluster": "0", "prob": -5.02677583694458, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": true, "is_left_punct": true, "is_right_punct": true} +{"orth": "?", "id": 18, "lower": "?", "norm": "?", "shape": "?", "prefix": "?", "suffix": "?", "length": 1, "cluster": "0", "prob": -5.05924654006958, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": " ", "id": 0, "lower": " ", "norm": " ", "shape": " ", "prefix": " ", "suffix": " ", "length": 1, "cluster": "0", "prob": -5.129165172576904, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "have", "id": 19, "lower": "have", "norm": "have", "shape": "xxxx", "prefix": "h", "suffix": "ave", "length": 4, "cluster": "378", "prob": -5.156484603881836, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "on", "id": 20, "lower": "on", "norm": "on", "shape": "xx", "prefix": "o", "suffix": "on", "length": 2, "cluster": "2044", "prob": -5.172736167907715, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "*", "id": 21, "lower": "*", "norm": "*", "shape": "*", "prefix": "*", "suffix": "*", "length": 1, "cluster": "5098", "prob": -5.1977410316467285, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": ")", "id": 22, "lower": ")", "norm": ")", "shape": ")", "prefix": ")", "suffix": ")", "length": 1, "cluster": "0", "prob": -5.197994232177734, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": true} +{"orth": "be", "id": 23, "lower": "be", "norm": "be", "shape": "xx", "prefix": "b", "suffix": "be", "length": 2, "cluster": "458", "prob": -5.225094318389893, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "with", "id": 24, "lower": "with", "norm": "with", "shape": "xxxx", "prefix": "w", "suffix": "ith", "length": 4, "cluster": "1020", "prob": -5.243249893188477, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "do", "id": 25, "lower": "do", "norm": "do", "shape": "xx", "prefix": "d", "suffix": "do", "length": 2, "cluster": "2042", "prob": -5.246996879577637, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "was", "id": 26, "lower": "was", "norm": "was", "shape": "xxx", "prefix": "w", "suffix": "was", "length": 3, "cluster": "250", "prob": -5.252320289611816, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "are", "id": 27, "lower": "are", "norm": "are", "shape": "xxx", "prefix": "a", "suffix": "are", "length": 3, "cluster": "1530", "prob": -5.271068096160889, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "not", "id": 28, "lower": "not", "norm": "not", "shape": "xxx", "prefix": "n", "suffix": "not", "length": 3, "cluster": "1258", "prob": -5.332601070404053, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "but", "id": 29, "lower": "but", "norm": "but", "shape": "xxx", "prefix": "b", "suffix": "but", "length": 3, "cluster": "148", "prob": -5.3419694900512695, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "!", "id": 30, "lower": "!", "norm": "!", "shape": "!", "prefix": "!", "suffix": "!", "length": 1, "cluster": "0", "prob": -5.359641075134277, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "this", "id": 31, "lower": "this", "norm": "this", "shape": "xxxx", "prefix": "t", "suffix": "his", "length": 4, "cluster": "63", "prob": -5.36181640625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "[", "id": 32, "lower": "[", "norm": "[", "shape": "[", "prefix": "[", "suffix": "[", "length": 1, "cluster": "0", "prob": -5.438112258911133, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": true, "is_right_punct": false} +{"orth": "-", "id": 33, "lower": "-", "norm": "-", "shape": "-", "prefix": "-", "suffix": "-", "length": 1, "cluster": "36", "prob": -5.468655109405518, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "my", "id": 34, "lower": "my", "norm": "my", "shape": "xx", "prefix": "m", "suffix": "my", "length": 2, "cluster": "251", "prob": -5.491642951965332, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "they", "id": 35, "lower": "they", "norm": "they", "shape": "xxxx", "prefix": "t", "suffix": "hey", "length": 4, "cluster": "90", "prob": -5.5243682861328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "as", "id": 36, "lower": "as", "norm": "as", "shape": "xx", "prefix": "a", "suffix": "as", "length": 2, "cluster": "212", "prob": -5.53448486328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "like", "id": 37, "lower": "like", "norm": "like", "shape": "xxxx", "prefix": "l", "suffix": "ike", "length": 4, "cluster": "1684", "prob": -5.610429763793945, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "just", "id": 38, "lower": "just", "norm": "just", "shape": "xxxx", "prefix": "j", "suffix": "ust", "length": 4, "cluster": "31978", "prob": -5.630868434906006, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "your", "id": 39, "lower": "your", "norm": "your", "shape": "xxxx", "prefix": "y", "suffix": "our", "length": 4, "cluster": "251", "prob": -5.650108814239502, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "or", "id": 40, "lower": "or", "norm": "or", "shape": "xx", "prefix": "o", "suffix": "or", "length": 2, "cluster": "404", "prob": -5.654984951019287, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "(", "id": 41, "lower": "(", "norm": "(", "shape": "(", "prefix": "(", "suffix": "(", "length": 1, "cluster": "0", "prob": -5.75598669052124, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": true, "is_right_punct": false} +{"orth": "at", "id": 42, "lower": "at", "norm": "at", "shape": "xx", "prefix": "a", "suffix": "at", "length": 2, "cluster": "124", "prob": -5.763442516326904, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "if", "id": 43, "lower": "if", "norm": "if", "shape": "xx", "prefix": "i", "suffix": "if", "length": 2, "cluster": "4052", "prob": -5.763589859008789, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "would", "id": 44, "lower": "would", "norm": "would", "shape": "xxxx", "prefix": "w", "suffix": "uld", "length": 5, "cluster": "1978", "prob": -5.772674560546875, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "so", "id": 45, "lower": "so", "norm": "so", "shape": "xx", "prefix": "s", "suffix": "so", "length": 2, "cluster": "2282", "prob": -5.823773384094238, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "can", "id": 46, "lower": "can", "norm": "can", "shape": "xxx", "prefix": "c", "suffix": "can", "length": 3, "cluster": "58", "prob": -5.827763080596924, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "me", "id": 47, "lower": "me", "norm": "me", "shape": "xx", "prefix": "m", "suffix": "me", "length": 2, "cluster": "1898", "prob": -5.846089839935303, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "about", "id": 48, "lower": "about", "norm": "about", "shape": "xxxx", "prefix": "a", "suffix": "out", "length": 5, "cluster": "618", "prob": -5.906808853149414, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "he", "id": 49, "lower": "he", "norm": "he", "shape": "xx", "prefix": "h", "suffix": "he", "length": 2, "cluster": "218", "prob": -5.9319047927856445, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "It", "id": 50, "lower": "it", "norm": "It", "shape": "Xx", "prefix": "I", "suffix": "It", "length": 2, "cluster": "894", "prob": -5.93662691116333, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "all", "id": 51, "lower": "all", "norm": "all", "shape": "xxx", "prefix": "a", "suffix": "all", "length": 3, "cluster": "6122", "prob": -5.936640739440918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "The", "id": 52, "lower": "the", "norm": "The", "shape": "Xxx", "prefix": "T", "suffix": "The", "length": 3, "cluster": "30", "prob": -5.958707332611084, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "get", "id": 53, "lower": "get", "norm": "get", "shape": "xxx", "prefix": "g", "suffix": "get", "length": 3, "cluster": "2570", "prob": -5.992605686187744, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "one", "id": 54, "lower": "one", "norm": "one", "shape": "xxx", "prefix": "o", "suffix": "one", "length": 3, "cluster": "8170", "prob": -5.996385097503662, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "'m", "id": 55, "lower": "'m", "norm": "'m", "shape": "'x", "prefix": "'", "suffix": "'m", "length": 2, "cluster": "3066", "prob": -5.9999823570251465, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "out", "id": 56, "lower": "out", "norm": "out", "shape": "xxx", "prefix": "o", "suffix": "out", "length": 3, "cluster": "1386", "prob": -6.0027008056640625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "from", "id": 57, "lower": "from", "norm": "from", "shape": "xxxx", "prefix": "f", "suffix": "rom", "length": 4, "cluster": "380", "prob": -6.010132312774658, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "an", "id": 58, "lower": "an", "norm": "an", "shape": "xx", "prefix": "a", "suffix": "an", "length": 2, "cluster": "3", "prob": -6.014852046966553, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "what", "id": 59, "lower": "what", "norm": "what", "shape": "xxxx", "prefix": "w", "suffix": "hat", "length": 4, "cluster": "2026", "prob": -6.023346424102783, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "up", "id": 60, "lower": "up", "norm": "up", "shape": "xx", "prefix": "u", "suffix": "up", "length": 2, "cluster": "362", "prob": -6.028695583343506, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "]", "id": 61, "lower": "]", "norm": "]", "shape": "]", "prefix": "]", "suffix": "]", "length": 1, "cluster": "0", "prob": -6.0386552810668945, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": true} +{"orth": "\n", "id": 0, "lower": "\n", "norm": "\n", "shape": "\n", "prefix": "\n", "suffix": "\n", "length": 1, "cluster": "0", "prob": -6.0506510734558105, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "people", "id": 62, "lower": "people", "norm": "people", "shape": "xxxx", "prefix": "p", "suffix": "ple", "length": 6, "cluster": "365", "prob": -6.0715765953063965, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "more", "id": 63, "lower": "more", "norm": "more", "shape": "xxxx", "prefix": "m", "suffix": "ore", "length": 4, "cluster": "1514", "prob": -6.081598281860352, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": ":", "id": 64, "lower": ":", "norm": ":", "shape": ":", "prefix": ":", "suffix": ":", "length": 1, "cluster": "228", "prob": -6.128875732421875, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "there", "id": 65, "lower": "there", "norm": "there", "shape": "xxxx", "prefix": "t", "suffix": "ere", "length": 5, "cluster": "986", "prob": -6.135282039642334, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "deleted", "id": 66, "lower": "deleted", "norm": "deleted", "shape": "xxxx", "prefix": "d", "suffix": "ted", "length": 7, "cluster": "1706", "prob": -6.1543049812316895, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "think", "id": 67, "lower": "think", "norm": "think", "shape": "xxxx", "prefix": "t", "suffix": "ink", "length": 5, "cluster": "1674", "prob": -6.180924892425537, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "will", "id": 68, "lower": "will", "norm": "will", "shape": "xxxx", "prefix": "w", "suffix": "ill", "length": 4, "cluster": "442", "prob": -6.199834823608398, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "them", "id": 69, "lower": "them", "norm": "them", "shape": "xxxx", "prefix": "t", "suffix": "hem", "length": 4, "cluster": "5994", "prob": -6.2177276611328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "we", "id": 70, "lower": "we", "norm": "we", "shape": "xx", "prefix": "w", "suffix": "we", "length": 2, "cluster": "1626", "prob": -6.230024337768555, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "'re", "id": 71, "lower": "'re", "norm": "'re", "shape": "'xx", "prefix": "'", "suffix": "'re", "length": 3, "cluster": "7162", "prob": -6.255462646484375, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "when", "id": 72, "lower": "when", "norm": "when", "shape": "xxxx", "prefix": "w", "suffix": "hen", "length": 4, "cluster": "16340", "prob": -6.2623114585876465, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "You", "id": 73, "lower": "you", "norm": "You", "shape": "Xxx", "prefix": "Y", "suffix": "You", "length": 3, "cluster": "858", "prob": -6.276494026184082, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "...", "id": 74, "lower": "...", "norm": "...", "shape": "...", "prefix": ".", "suffix": "...", "length": 3, "cluster": "966", "prob": -6.278521537780762, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "some", "id": 75, "lower": "some", "norm": "some", "shape": "xxxx", "prefix": "s", "suffix": "ome", "length": 4, "cluster": "239", "prob": -6.318882465362549, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "has", "id": 76, "lower": "has", "norm": "has", "shape": "xxx", "prefix": "h", "suffix": "has", "length": 3, "cluster": "890", "prob": -6.325605392456055, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "because", "id": 77, "lower": "because", "norm": "because", "shape": "xxxx", "prefix": "b", "suffix": "use", "length": 7, "cluster": "980", "prob": -6.349620342254639, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "know", "id": 78, "lower": "know", "norm": "know", "shape": "xxxx", "prefix": "k", "suffix": "now", "length": 4, "cluster": "3722", "prob": -6.368943214416504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "really", "id": 79, "lower": "really", "norm": "really", "shape": "xxxx", "prefix": "r", "suffix": "lly", "length": 6, "cluster": "7802", "prob": -6.370757102966309, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "by", "id": 80, "lower": "by", "norm": "by", "shape": "xx", "prefix": "b", "suffix": "by", "length": 2, "cluster": "252", "prob": -6.375086784362793, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "time", "id": 81, "lower": "time", "norm": "time", "shape": "xxxx", "prefix": "t", "suffix": "ime", "length": 4, "cluster": "477", "prob": -6.3782219886779785, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "did", "id": 82, "lower": "did", "norm": "did", "shape": "xxx", "prefix": "d", "suffix": "did", "length": 3, "cluster": "8186", "prob": -6.389003753662109, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "no", "id": 83, "lower": "no", "norm": "no", "shape": "xx", "prefix": "n", "suffix": "no", "length": 2, "cluster": "4074", "prob": -6.402691841125488, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "had", "id": 84, "lower": "had", "norm": "had", "shape": "xxx", "prefix": "h", "suffix": "had", "length": 3, "cluster": "1914", "prob": -6.45427131652832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "their", "id": 85, "lower": "their", "norm": "their", "shape": "xxxx", "prefix": "t", "suffix": "eir", "length": 5, "cluster": "187", "prob": -6.461463928222656, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "If", "id": 86, "lower": "if", "norm": "If", "shape": "Xx", "prefix": "I", "suffix": "If", "length": 2, "cluster": "190", "prob": -6.469156742095947, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "how", "id": 87, "lower": "how", "norm": "how", "shape": "xxx", "prefix": "h", "suffix": "how", "length": 3, "cluster": "10218", "prob": -6.496722221374512, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "does", "id": 88, "lower": "does", "norm": "does", "shape": "xxxx", "prefix": "d", "suffix": "oes", "length": 4, "cluster": "4090", "prob": -6.500738143920898, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "who", "id": 89, "lower": "who", "norm": "who", "shape": "xxx", "prefix": "w", "suffix": "who", "length": 3, "cluster": "410", "prob": -6.504637241363525, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "than", "id": 90, "lower": "than", "norm": "than", "shape": "xxxx", "prefix": "t", "suffix": "han", "length": 4, "cluster": "106", "prob": -6.512253761291504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "good", "id": 91, "lower": "good", "norm": "good", "shape": "xxxx", "prefix": "g", "suffix": "ood", "length": 4, "cluster": "551", "prob": -6.518923759460449, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "only", "id": 92, "lower": "only", "norm": "only", "shape": "xxxx", "prefix": "o", "suffix": "nly", "length": 4, "cluster": "15594", "prob": -6.535442352294922, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "his", "id": 93, "lower": "his", "norm": "his", "shape": "xxx", "prefix": "h", "suffix": "his", "length": 3, "cluster": "123", "prob": -6.574275016784668, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "much", "id": 94, "lower": "much", "norm": "much", "shape": "xxxx", "prefix": "m", "suffix": "uch", "length": 4, "cluster": "2794", "prob": -6.584301948547363, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": ";", "id": 95, "lower": ";", "norm": ";", "shape": ";", "prefix": ";", "suffix": ";", "length": 1, "cluster": "36", "prob": -6.586422920227051, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "'ve", "id": 96, "lower": "'ve", "norm": "'ve", "shape": "'xx", "prefix": "'", "suffix": "'ve", "length": 3, "cluster": "1018", "prob": -6.593011379241943, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "could", "id": 97, "lower": "could", "norm": "could", "shape": "xxxx", "prefix": "c", "suffix": "uld", "length": 5, "cluster": "954", "prob": -6.595959186553955, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "then", "id": 98, "lower": "then", "norm": "then", "shape": "xxxx", "prefix": "t", "suffix": "hen", "length": 4, "cluster": "9962", "prob": -6.598200798034668, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "other", "id": 99, "lower": "other", "norm": "other", "shape": "xxxx", "prefix": "o", "suffix": "her", "length": 5, "cluster": "47", "prob": -6.6438727378845215, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "make", "id": 100, "lower": "make", "norm": "make", "shape": "xxxx", "prefix": "m", "suffix": "ake", "length": 4, "cluster": "4618", "prob": -6.66980504989624, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "been", "id": 101, "lower": "been", "norm": "been", "shape": "xxxx", "prefix": "b", "suffix": "een", "length": 4, "cluster": "202", "prob": -6.670916557312012, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "were", "id": 102, "lower": "were", "norm": "were", "shape": "xxxx", "prefix": "w", "suffix": "ere", "length": 4, "cluster": "506", "prob": -6.673174858093262, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "see", "id": 103, "lower": "see", "norm": "see", "shape": "xxx", "prefix": "s", "suffix": "see", "length": 3, "cluster": "1546", "prob": -6.6828837394714355, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "That", "id": 104, "lower": "that", "norm": "That", "shape": "Xxxx", "prefix": "T", "suffix": "hat", "length": 4, "cluster": "1406", "prob": -6.688080310821533, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "i", "id": 105, "lower": "i", "norm": "i", "shape": "x", "prefix": "i", "suffix": "i", "length": 1, "cluster": "966", "prob": -6.6887712478637695, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "any", "id": 106, "lower": "any", "norm": "any", "shape": "xxx", "prefix": "a", "suffix": "any", "length": 3, "cluster": "12266", "prob": -6.689523220062256, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "want", "id": 107, "lower": "want", "norm": "want", "shape": "xxxx", "prefix": "w", "suffix": "ant", "length": 4, "cluster": "906", "prob": -6.694204807281494, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "even", "id": 108, "lower": "even", "norm": "even", "shape": "xxxx", "prefix": "e", "suffix": "ven", "length": 4, "cluster": "3306", "prob": -6.702912330627441, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "should", "id": 109, "lower": "should", "norm": "should", "shape": "xxxx", "prefix": "s", "suffix": "uld", "length": 6, "cluster": "698", "prob": -6.733259677886963, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "way", "id": 110, "lower": "way", "norm": "way", "shape": "xxx", "prefix": "w", "suffix": "way", "length": 3, "cluster": "1349", "prob": -6.73627233505249, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "'", "id": 111, "lower": "'", "norm": "'", "shape": "'", "prefix": "'", "suffix": "'", "length": 1, "cluster": "916", "prob": -6.73720121383667, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": true, "is_left_punct": true, "is_right_punct": true} +{"orth": "too", "id": 112, "lower": "too", "norm": "too", "shape": "xxx", "prefix": "t", "suffix": "too", "length": 3, "cluster": "6378", "prob": -6.77581787109375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "go", "id": 113, "lower": "go", "norm": "go", "shape": "xx", "prefix": "g", "suffix": "go", "length": 2, "cluster": "3466", "prob": -6.775965213775635, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "him", "id": 114, "lower": "him", "norm": "him", "shape": "xxx", "prefix": "h", "suffix": "him", "length": 3, "cluster": "1898", "prob": -6.783067226409912, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "This", "id": 115, "lower": "this", "norm": "This", "shape": "Xxxx", "prefix": "T", "suffix": "his", "length": 4, "cluster": "382", "prob": -6.78391695022583, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "her", "id": 116, "lower": "her", "norm": "her", "shape": "xxx", "prefix": "h", "suffix": "her", "length": 3, "cluster": "507", "prob": -6.798486709594727, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "going", "id": 117, "lower": "going", "norm": "going", "shape": "xxxx", "prefix": "g", "suffix": "ing", "length": 5, "cluster": "2090", "prob": -6.833367824554443, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "now", "id": 118, "lower": "now", "norm": "now", "shape": "xxx", "prefix": "n", "suffix": "now", "length": 3, "cluster": "1770", "prob": -6.834407329559326, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "being", "id": 119, "lower": "being", "norm": "being", "shape": "xxxx", "prefix": "b", "suffix": "ing", "length": 5, "cluster": "3818", "prob": -6.845808029174805, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "still", "id": 120, "lower": "still", "norm": "still", "shape": "xxxx", "prefix": "s", "suffix": "ill", "length": 5, "cluster": "1658", "prob": -6.867525100708008, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "into", "id": 121, "lower": "into", "norm": "into", "shape": "xxxx", "prefix": "i", "suffix": "nto", "length": 4, "cluster": "8188", "prob": -6.87359094619751, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "which", "id": 122, "lower": "which", "norm": "which", "shape": "xxxx", "prefix": "w", "suffix": "ich", "length": 5, "cluster": "154", "prob": -6.877470970153809, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "something", "id": 123, "lower": "something", "norm": "something", "shape": "xxxx", "prefix": "s", "suffix": "ing", "length": 9, "cluster": "14314", "prob": -6.887354850769043, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "she", "id": 124, "lower": "she", "norm": "she", "shape": "xxx", "prefix": "s", "suffix": "she", "length": 3, "cluster": "218", "prob": -6.90155553817749, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "also", "id": 125, "lower": "also", "norm": "also", "shape": "xxxx", "prefix": "a", "suffix": "lso", "length": 4, "cluster": "122", "prob": -6.928974151611328, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "very", "id": 126, "lower": "very", "norm": "very", "shape": "xxxx", "prefix": "v", "suffix": "ery", "length": 4, "cluster": "234", "prob": -6.93242883682251, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "right", "id": 127, "lower": "right", "norm": "right", "shape": "xxxx", "prefix": "r", "suffix": "ght", "length": 5, "cluster": "14122", "prob": -6.933711051940918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "game", "id": 128, "lower": "game", "norm": "game", "shape": "xxxx", "prefix": "g", "suffix": "ame", "length": 4, "cluster": "7973", "prob": -6.940612316131592, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "say", "id": 129, "lower": "say", "norm": "say", "shape": "xxx", "prefix": "s", "suffix": "say", "length": 3, "cluster": "1162", "prob": -6.950479984283447, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "'ll", "id": 130, "lower": "'ll", "norm": "'ll", "shape": "'xx", "prefix": "'", "suffix": "'ll", "length": 3, "cluster": "5114", "prob": -6.958071231842041, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "got", "id": 131, "lower": "got", "norm": "got", "shape": "xxx", "prefix": "g", "suffix": "got", "length": 3, "cluster": "10666", "prob": -6.98855447769165, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "well", "id": 132, "lower": "well", "norm": "well", "shape": "xxxx", "prefix": "w", "suffix": "ell", "length": 4, "cluster": "746", "prob": -6.995903968811035, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "need", "id": 133, "lower": "need", "norm": "need", "shape": "xxxx", "prefix": "n", "suffix": "eed", "length": 4, "cluster": "2954", "prob": -7.008103370666504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "And", "id": 134, "lower": "and", "norm": "And", "shape": "Xxx", "prefix": "A", "suffix": "And", "length": 3, "cluster": "1470", "prob": -7.012199401855469, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "But", "id": 135, "lower": "but", "norm": "But", "shape": "Xxx", "prefix": "B", "suffix": "But", "length": 3, "cluster": "1470", "prob": -7.0142974853515625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "over", "id": 136, "lower": "over", "norm": "over", "shape": "xxxx", "prefix": "o", "suffix": "ver", "length": 4, "cluster": "49148", "prob": -7.027544975280762, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "back", "id": 137, "lower": "back", "norm": "back", "shape": "xxxx", "prefix": "b", "suffix": "ack", "length": 4, "cluster": "7530", "prob": -7.033305644989014, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "same", "id": 138, "lower": "same", "norm": "same", "shape": "xxxx", "prefix": "s", "suffix": "ame", "length": 4, "cluster": "991", "prob": -7.053191661834717, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "thing", "id": 139, "lower": "thing", "norm": "thing", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 5, "cluster": "2013", "prob": -7.063167572021484, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "first", "id": 140, "lower": "first", "norm": "first", "shape": "xxxx", "prefix": "f", "suffix": "rst", "length": 5, "cluster": "159", "prob": -7.063716888427734, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "most", "id": 141, "lower": "most", "norm": "most", "shape": "xxxx", "prefix": "m", "suffix": "ost", "length": 4, "cluster": "175", "prob": -7.0663957595825195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "here", "id": 142, "lower": "here", "norm": "here", "shape": "xxxx", "prefix": "h", "suffix": "ere", "length": 4, "cluster": "3946", "prob": -7.0680251121521, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "ca", "id": 143, "lower": "ca", "norm": "ca", "shape": "xx", "prefix": "c", "suffix": "ca", "length": 2, "cluster": "0", "prob": -7.071251392364502, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "off", "id": 144, "lower": "off", "norm": "off", "shape": "xxx", "prefix": "o", "suffix": "off", "length": 3, "cluster": "6506", "prob": -7.073742389678955, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "'d", "id": 145, "lower": "'d", "norm": "'d", "shape": "'x", "prefix": "'", "suffix": "'d", "length": 2, "cluster": "5114", "prob": -7.075286865234375, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "They", "id": 146, "lower": "they", "norm": "They", "shape": "Xxxx", "prefix": "T", "suffix": "hey", "length": 4, "cluster": "1882", "prob": -7.0789008140563965, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "work", "id": 147, "lower": "work", "norm": "work", "shape": "xxxx", "prefix": "w", "suffix": "ork", "length": 4, "cluster": "1973", "prob": -7.081293106079102, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "use", "id": 148, "lower": "use", "norm": "use", "shape": "xxx", "prefix": "u", "suffix": "use", "length": 3, "cluster": "2741", "prob": -7.083596229553223, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "never", "id": 149, "lower": "never", "norm": "never", "shape": "xxxx", "prefix": "n", "suffix": "ver", "length": 5, "cluster": "15994", "prob": -7.084620475769043, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "better", "id": 150, "lower": "better", "norm": "better", "shape": "xxxx", "prefix": "b", "suffix": "ter", "length": 6, "cluster": "7658", "prob": -7.1072587966918945, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "though", "id": 151, "lower": "though", "norm": "though", "shape": "xxxx", "prefix": "t", "suffix": "ugh", "length": 6, "cluster": "2004", "prob": -7.113335132598877, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "lot", "id": 152, "lower": "lot", "norm": "lot", "shape": "xxx", "prefix": "l", "suffix": "lot", "length": 3, "cluster": "853", "prob": -7.113600254058838, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "pretty", "id": 153, "lower": "pretty", "norm": "pretty", "shape": "xxxx", "prefix": "p", "suffix": "tty", "length": 6, "cluster": "234", "prob": -7.1256103515625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "where", "id": 154, "lower": "where", "norm": "where", "shape": "xxxx", "prefix": "w", "suffix": "ere", "length": 5, "cluster": "8148", "prob": -7.146170139312744, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "am", "id": 155, "lower": "am", "norm": "am", "shape": "xx", "prefix": "a", "suffix": "am", "length": 2, "cluster": "3066", "prob": -7.149725437164307, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "things", "id": 156, "lower": "things", "norm": "things", "shape": "xxxx", "prefix": "t", "suffix": "ngs", "length": 6, "cluster": "3917", "prob": -7.154941082000732, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "sure", "id": 157, "lower": "sure", "norm": "sure", "shape": "xxxx", "prefix": "s", "suffix": "ure", "length": 4, "cluster": "490", "prob": -7.157395839691162, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "actually", "id": 158, "lower": "actually", "norm": "actually", "shape": "xxxx", "prefix": "a", "suffix": "lly", "length": 8, "cluster": "7802", "prob": -7.160778045654297, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "He", "id": 159, "lower": "he", "norm": "He", "shape": "Xx", "prefix": "H", "suffix": "He", "length": 2, "cluster": "126", "prob": -7.162238121032715, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "those", "id": 160, "lower": "those", "norm": "those", "shape": "xxxx", "prefix": "t", "suffix": "ose", "length": 5, "cluster": "495", "prob": -7.169255256652832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "why", "id": 161, "lower": "why", "norm": "why", "shape": "xxx", "prefix": "w", "suffix": "why", "length": 3, "cluster": "18410", "prob": -7.178915500640869, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "So", "id": 162, "lower": "so", "norm": "So", "shape": "Xx", "prefix": "S", "suffix": "So", "length": 2, "cluster": "1726", "prob": -7.199381351470947, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "take", "id": 163, "lower": "take", "norm": "take", "shape": "xxxx", "prefix": "t", "suffix": "ake", "length": 4, "cluster": "6666", "prob": -7.209812641143799, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "down", "id": 164, "lower": "down", "norm": "down", "shape": "xxxx", "prefix": "d", "suffix": "own", "length": 4, "cluster": "2410", "prob": -7.223586082458496, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "What", "id": 165, "lower": "what", "norm": "What", "shape": "Xxxx", "prefix": "W", "suffix": "hat", "length": 4, "cluster": "702", "prob": -7.226758003234863, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "someone", "id": 166, "lower": "someone", "norm": "someone", "shape": "xxxx", "prefix": "s", "suffix": "one", "length": 7, "cluster": "30698", "prob": -7.249640464782715, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "before", "id": 167, "lower": "before", "norm": "before", "shape": "xxxx", "prefix": "b", "suffix": "ore", "length": 6, "cluster": "1492", "prob": -7.253359794616699, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "said", "id": 168, "lower": "said", "norm": "said", "shape": "xxxx", "prefix": "s", "suffix": "aid", "length": 4, "cluster": "116", "prob": -7.258025169372559, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "after", "id": 169, "lower": "after", "norm": "after", "shape": "xxxx", "prefix": "a", "suffix": "ter", "length": 5, "cluster": "3540", "prob": -7.265651702880859, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "around", "id": 170, "lower": "around", "norm": "around", "shape": "xxxx", "prefix": "a", "suffix": "und", "length": 6, "cluster": "245756", "prob": -7.313362121582031, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "its", "id": 171, "lower": "its", "norm": "its", "shape": "xxx", "prefix": "i", "suffix": "its", "length": 3, "cluster": "27", "prob": -7.321457862854004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "2", "id": 172, "lower": "2", "norm": "2", "shape": "d", "prefix": "2", "suffix": "2", "length": 1, "cluster": "818", "prob": -7.324268341064453, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "feel", "id": 173, "lower": "feel", "norm": "feel", "shape": "xxxx", "prefix": "f", "suffix": "eel", "length": 4, "cluster": "1674", "prob": -7.342533588409424, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "My", "id": 174, "lower": "my", "norm": "My", "shape": "Xx", "prefix": "M", "suffix": "My", "length": 2, "cluster": "94", "prob": -7.345071792602539, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "There", "id": 175, "lower": "there", "norm": "There", "shape": "Xxxxx", "prefix": "T", "suffix": "ere", "length": 5, "cluster": "1918", "prob": -7.347356796264648, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "look", "id": 176, "lower": "look", "norm": "look", "shape": "xxxx", "prefix": "l", "suffix": "ook", "length": 4, "cluster": "2442", "prob": -7.352481365203857, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "these", "id": 177, "lower": "these", "norm": "these", "shape": "xxxx", "prefix": "t", "suffix": "ese", "length": 5, "cluster": "1519", "prob": -7.36269474029541, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "years", "id": 178, "lower": "years", "norm": "years", "shape": "xxxx", "prefix": "y", "suffix": "ars", "length": 5, "cluster": "189", "prob": -7.368987560272217, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "love", "id": 179, "lower": "love", "norm": "love", "shape": "xxxx", "prefix": "l", "suffix": "ove", "length": 4, "cluster": "2661", "prob": -7.372685432434082, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "always", "id": 180, "lower": "always", "norm": "always", "shape": "xxxx", "prefix": "a", "suffix": "ays", "length": 6, "cluster": "15994", "prob": -7.37296724319458, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "many", "id": 181, "lower": "many", "norm": "many", "shape": "xxxx", "prefix": "m", "suffix": "any", "length": 4, "cluster": "751", "prob": -7.377613067626953, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": ">", "id": 0, "lower": ">", "norm": ">", "shape": "&xx", "prefix": "&", "suffix": ">", "length": 3, "cluster": "0", "prob": -7.38146448135376, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "A", "id": 182, "lower": "a", "norm": "A", "shape": "X", "prefix": "A", "suffix": "A", "length": 1, "cluster": "222", "prob": -7.38541841506958, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": true, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "point", "id": 183, "lower": "point", "norm": "point", "shape": "xxxx", "prefix": "p", "suffix": "int", "length": 5, "cluster": "389", "prob": -7.386973857879639, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "find", "id": 184, "lower": "find", "norm": "find", "shape": "xxxx", "prefix": "f", "suffix": "ind", "length": 4, "cluster": "5642", "prob": -7.387212753295898, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "probably", "id": 185, "lower": "probably", "norm": "probably", "shape": "xxxx", "prefix": "p", "suffix": "bly", "length": 8, "cluster": "5754", "prob": -7.395048141479492, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "new", "id": 186, "lower": "new", "norm": "new", "shape": "xxx", "prefix": "n", "suffix": "new", "length": 3, "cluster": "199", "prob": -7.398182392120361, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "made", "id": 187, "lower": "made", "norm": "made", "shape": "xxxx", "prefix": "m", "suffix": "ade", "length": 4, "cluster": "120490", "prob": -7.399899005889893, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "day", "id": 188, "lower": "day", "norm": "day", "shape": "xxx", "prefix": "d", "suffix": "day", "length": 3, "cluster": "989", "prob": -7.400947093963623, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "We", "id": 189, "lower": "we", "norm": "We", "shape": "Xx", "prefix": "W", "suffix": "We", "length": 2, "cluster": "858", "prob": -7.402578353881836, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "every", "id": 190, "lower": "every", "norm": "every", "shape": "xxxx", "prefix": "e", "suffix": "ery", "length": 5, "cluster": "61418", "prob": -7.414647579193115, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "great", "id": 191, "lower": "great", "norm": "great", "shape": "xxxx", "prefix": "g", "suffix": "eat", "length": 5, "cluster": "1831", "prob": -7.420454502105713, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "our", "id": 192, "lower": "our", "norm": "our", "shape": "xxx", "prefix": "o", "suffix": "our", "length": 3, "cluster": "59", "prob": -7.4210286140441895, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "two", "id": 193, "lower": "two", "norm": "two", "shape": "xxx", "prefix": "t", "suffix": "two", "length": 3, "cluster": "15", "prob": -7.433600425720215, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "anything", "id": 194, "lower": "anything", "norm": "anything", "shape": "xxxx", "prefix": "a", "suffix": "ing", "length": 8, "cluster": "14314", "prob": -7.439383506774902, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "while", "id": 195, "lower": "while", "norm": "while", "shape": "xxxx", "prefix": "w", "suffix": "ile", "length": 5, "cluster": "6100", "prob": -7.440170764923096, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "few", "id": 196, "lower": "few", "norm": "few", "shape": "xxx", "prefix": "f", "suffix": "few", "length": 3, "cluster": "79", "prob": -7.440912246704102, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "$", "id": 197, "lower": "$", "norm": "$", "shape": "$", "prefix": "$", "suffix": "$", "length": 1, "cluster": "18", "prob": -7.450106620788574, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "bad", "id": 198, "lower": "bad", "norm": "bad", "shape": "xxx", "prefix": "b", "suffix": "bad", "length": 3, "cluster": "551", "prob": -7.452563762664795, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "No", "id": 199, "lower": "no", "norm": "No", "shape": "Xx", "prefix": "N", "suffix": "No", "length": 2, "cluster": "94", "prob": -7.456389427185059, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "little", "id": 200, "lower": "little", "norm": "little", "shape": "xxxx", "prefix": "l", "suffix": "tle", "length": 6, "cluster": "1959", "prob": -7.480203628540039, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "might", "id": 201, "lower": "might", "norm": "might", "shape": "xxxx", "prefix": "m", "suffix": "ght", "length": 5, "cluster": "186", "prob": -7.490107536315918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "best", "id": 202, "lower": "best", "norm": "best", "shape": "xxxx", "prefix": "b", "suffix": "est", "length": 4, "cluster": "479", "prob": -7.492556571960449, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "play", "id": 203, "lower": "play", "norm": "play", "shape": "xxxx", "prefix": "p", "suffix": "lay", "length": 4, "cluster": "1717", "prob": -7.50220251083374, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "shit", "id": 204, "lower": "shit", "norm": "shit", "shape": "xxxx", "prefix": "s", "suffix": "hit", "length": 4, "cluster": "0", "prob": -7.522359371185303, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "try", "id": 205, "lower": "try", "norm": "try", "shape": "xxx", "prefix": "t", "suffix": "try", "length": 3, "cluster": "1930", "prob": -7.540920734405518, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "used", "id": 206, "lower": "used", "norm": "used", "shape": "xxxx", "prefix": "u", "suffix": "sed", "length": 4, "cluster": "15402", "prob": -7.542972087860107, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "long", "id": 207, "lower": "long", "norm": "long", "shape": "xxxx", "prefix": "l", "suffix": "ong", "length": 4, "cluster": "935", "prob": -7.544892311096191, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "doing", "id": 208, "lower": "doing", "norm": "doing", "shape": "xxxx", "prefix": "d", "suffix": "ing", "length": 5, "cluster": "15338", "prob": -7.553442478179932, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "getting", "id": 209, "lower": "getting", "norm": "getting", "shape": "xxxx", "prefix": "g", "suffix": "ing", "length": 7, "cluster": "31722", "prob": -7.564762115478516, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "post", "id": 210, "lower": "post", "norm": "post", "shape": "xxxx", "prefix": "p", "suffix": "ost", "length": 4, "cluster": "3733", "prob": -7.565684795379639, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "year", "id": 211, "lower": "year", "norm": "year", "shape": "xxxx", "prefix": "y", "suffix": "ear", "length": 4, "cluster": "29", "prob": -7.567681312561035, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Do", "id": 212, "lower": "do", "norm": "Do", "shape": "Xx", "prefix": "D", "suffix": "Do", "length": 2, "cluster": "702", "prob": -7.570033073425293, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "life", "id": 213, "lower": "life", "norm": "life", "shape": "xxxx", "prefix": "l", "suffix": "ife", "length": 4, "cluster": "1893", "prob": -7.574200630187988, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "through", "id": 214, "lower": "through", "norm": "through", "shape": "xxxx", "prefix": "t", "suffix": "ugh", "length": 7, "cluster": "65532", "prob": -7.575429439544678, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "guy", "id": 215, "lower": "guy", "norm": "guy", "shape": "xxx", "prefix": "g", "suffix": "guy", "length": 3, "cluster": "549", "prob": -7.582011699676514, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "enough", "id": 216, "lower": "enough", "norm": "enough", "shape": "xxxx", "prefix": "e", "suffix": "ugh", "length": 6, "cluster": "1834", "prob": -7.586349010467529, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "ever", "id": 217, "lower": "ever", "norm": "ever", "shape": "xxxx", "prefix": "e", "suffix": "ver", "length": 4, "cluster": "14058", "prob": -7.591183662414551, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "In", "id": 218, "lower": "in", "norm": "In", "shape": "Xx", "prefix": "I", "suffix": "In", "length": 2, "cluster": "62", "prob": -7.603263854980469, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "give", "id": 219, "lower": "give", "norm": "give", "shape": "xxxx", "prefix": "g", "suffix": "ive", "length": 4, "cluster": "522", "prob": -7.611863136291504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "mean", "id": 220, "lower": "mean", "norm": "mean", "shape": "xxxx", "prefix": "m", "suffix": "ean", "length": 4, "cluster": "3082", "prob": -7.611870765686035, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "thought", "id": 221, "lower": "thought", "norm": "thought", "shape": "xxxx", "prefix": "t", "suffix": "ght", "length": 7, "cluster": "650", "prob": -7.614910125732422, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "since", "id": 222, "lower": "since", "norm": "since", "shape": "xxxx", "prefix": "s", "suffix": "nce", "length": 5, "cluster": "468", "prob": -7.615171909332275, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "|", "id": 223, "lower": "|", "norm": "|", "shape": "|", "prefix": "|", "suffix": "|", "length": 1, "cluster": "0", "prob": -7.6297454833984375, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "different", "id": 224, "lower": "different", "norm": "different", "shape": "xxxx", "prefix": "d", "suffix": "ent", "length": 9, "cluster": "1319", "prob": -7.630640506744385, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "3", "id": 225, "lower": "3", "norm": "3", "shape": "d", "prefix": "3", "suffix": "3", "length": 1, "cluster": "818", "prob": -7.636006832122803, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "last", "id": 226, "lower": "last", "norm": "last", "shape": "xxxx", "prefix": "l", "suffix": "ast", "length": 4, "cluster": "127", "prob": -7.636077404022217, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "own", "id": 227, "lower": "own", "norm": "own", "shape": "xxx", "prefix": "o", "suffix": "own", "length": 3, "cluster": "217", "prob": -7.636797904968262, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "1", "id": 228, "lower": "1", "norm": "1", "shape": "d", "prefix": "1", "suffix": "1", "length": 1, "cluster": "306", "prob": -7.639832973480225, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "us", "id": 229, "lower": "us", "norm": "us", "shape": "xx", "prefix": "u", "suffix": "us", "length": 2, "cluster": "1898", "prob": -7.643693923950195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "%", "id": 230, "lower": "%", "norm": "%", "shape": "%", "prefix": "%", "suffix": "%", "length": 1, "cluster": "34", "prob": -7.645323753356934, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Not", "id": 231, "lower": "not", "norm": "Not", "shape": "Xxx", "prefix": "N", "suffix": "Not", "length": 3, "cluster": "1982", "prob": -7.65825080871582, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "put", "id": 232, "lower": "put", "norm": "put", "shape": "xxx", "prefix": "p", "suffix": "put", "length": 3, "cluster": "6314", "prob": -7.666473865509033, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "man", "id": 233, "lower": "man", "norm": "man", "shape": "xxx", "prefix": "m", "suffix": "man", "length": 3, "cluster": "549", "prob": -7.668745517730713, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "may", "id": 234, "lower": "may", "norm": "may", "shape": "xxx", "prefix": "m", "suffix": "may", "length": 3, "cluster": "186", "prob": -7.678494930267334, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "makes", "id": 235, "lower": "makes", "norm": "makes", "shape": "xxxx", "prefix": "m", "suffix": "kes", "length": 5, "cluster": "426", "prob": -7.684445858001709, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "money", "id": 236, "lower": "money", "norm": "money", "shape": "xxxx", "prefix": "m", "suffix": "ney", "length": 5, "cluster": "357", "prob": -7.693631172180176, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": ":)", "id": 237, "lower": ":)", "norm": ":)", "shape": ":)", "prefix": ":", "suffix": ":)", "length": 2, "cluster": "0", "prob": -7.694086074829102, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "without", "id": 238, "lower": "without", "norm": "without", "shape": "xxxx", "prefix": "w", "suffix": "out", "length": 7, "cluster": "57340", "prob": -7.694504261016846, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "bit", "id": 239, "lower": "bit", "norm": "bit", "shape": "xxx", "prefix": "b", "suffix": "bit", "length": 3, "cluster": "853", "prob": -7.721855640411377, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "person", "id": 240, "lower": "person", "norm": "person", "shape": "xxxx", "prefix": "p", "suffix": "son", "length": 6, "cluster": "549", "prob": -7.727076530456543, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Also", "id": 241, "lower": "also", "norm": "Also", "shape": "Xxxx", "prefix": "A", "suffix": "lso", "length": 4, "cluster": "254", "prob": -7.734253406524658, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "again", "id": 242, "lower": "again", "norm": "again", "shape": "xxxx", "prefix": "a", "suffix": "ain", "length": 5, "cluster": "28522", "prob": -7.7370924949646, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Just", "id": 243, "lower": "just", "norm": "Just", "shape": "Xxxx", "prefix": "J", "suffix": "ust", "length": 4, "cluster": "1982", "prob": -7.743429183959961, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "both", "id": 244, "lower": "both", "norm": "both", "shape": "xxxx", "prefix": "b", "suffix": "oth", "length": 4, "cluster": "1007", "prob": -7.750914573669434, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "help", "id": 245, "lower": "help", "norm": "help", "shape": "xxxx", "prefix": "h", "suffix": "elp", "length": 4, "cluster": "309", "prob": -7.758815288543701, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "trying", "id": 246, "lower": "trying", "norm": "trying", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 6, "cluster": "14378", "prob": -7.759474754333496, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "least", "id": 247, "lower": "least", "norm": "least", "shape": "xxxx", "prefix": "l", "suffix": "ast", "length": 5, "cluster": "3690", "prob": -7.7660088539123535, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "come", "id": 248, "lower": "come", "norm": "come", "shape": "xxxx", "prefix": "c", "suffix": "ome", "length": 4, "cluster": "7562", "prob": -7.775856971740723, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "keep", "id": 249, "lower": "keep", "norm": "keep", "shape": "xxxx", "prefix": "k", "suffix": "eep", "length": 4, "cluster": "3338", "prob": -7.778285980224609, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Thanks", "id": 250, "lower": "thanks", "norm": "Thanks", "shape": "Xxxxx", "prefix": "T", "suffix": "nks", "length": 6, "cluster": "510", "prob": -7.781467914581299, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "read", "id": 251, "lower": "read", "norm": "read", "shape": "xxxx", "prefix": "r", "suffix": "ead", "length": 4, "cluster": "6314", "prob": -7.787075042724609, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "nt", "id": 252, "lower": "nt", "norm": "nt", "shape": "xx", "prefix": "n", "suffix": "nt", "length": 2, "cluster": "3685", "prob": -7.788322925567627, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "part", "id": 253, "lower": "part", "norm": "part", "shape": "xxxx", "prefix": "p", "suffix": "art", "length": 4, "cluster": "725", "prob": -7.791079521179199, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "let", "id": 254, "lower": "let", "norm": "let", "shape": "xxx", "prefix": "l", "suffix": "let", "length": 3, "cluster": "522", "prob": -7.795135974884033, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "hard", "id": 255, "lower": "hard", "norm": "hard", "shape": "xxxx", "prefix": "h", "suffix": "ard", "length": 4, "cluster": "2538", "prob": -7.795384407043457, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "another", "id": 256, "lower": "another", "norm": "another", "shape": "xxxx", "prefix": "a", "suffix": "her", "length": 7, "cluster": "28650", "prob": -7.801506519317627, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "end", "id": 257, "lower": "end", "norm": "end", "shape": "xxx", "prefix": "e", "suffix": "end", "length": 3, "cluster": "21", "prob": -7.816553115844727, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "having", "id": 258, "lower": "having", "norm": "having", "shape": "xxxx", "prefix": "h", "suffix": "ing", "length": 6, "cluster": "130026", "prob": -7.818792819976807, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "As", "id": 259, "lower": "as", "norm": "As", "shape": "Xx", "prefix": "A", "suffix": "As", "length": 2, "cluster": "958", "prob": -7.836142539978027, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "games", "id": 260, "lower": "games", "norm": "games", "shape": "xxxx", "prefix": "g", "suffix": "mes", "length": 5, "cluster": "1485", "prob": -7.836157321929932, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "already", "id": 261, "lower": "already", "norm": "already", "shape": "xxxx", "prefix": "a", "suffix": "ady", "length": 7, "cluster": "634", "prob": -7.838688850402832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "..", "id": 0, "lower": "..", "norm": "..", "shape": "..", "prefix": ".", "suffix": "..", "length": 2, "cluster": "4906", "prob": -7.840396404266357, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "problem", "id": 262, "lower": "problem", "norm": "problem", "shape": "xxxx", "prefix": "p", "suffix": "lem", "length": 7, "cluster": "16069", "prob": -7.841479301452637, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "kind", "id": 263, "lower": "kind", "norm": "kind", "shape": "xxxx", "prefix": "k", "suffix": "ind", "length": 4, "cluster": "213", "prob": -7.844367980957031, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "old", "id": 264, "lower": "old", "norm": "old", "shape": "xxx", "prefix": "o", "suffix": "old", "length": 3, "cluster": "2346", "prob": -7.845602989196777, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "everyone", "id": 265, "lower": "everyone", "norm": "everyone", "shape": "xxxx", "prefix": "e", "suffix": "one", "length": 8, "cluster": "30698", "prob": -7.850788116455078, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "saying", "id": 266, "lower": "saying", "norm": "saying", "shape": "xxxx", "prefix": "s", "suffix": "ing", "length": 6, "cluster": "3732", "prob": -7.854340076446533, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "idea", "id": 267, "lower": "idea", "norm": "idea", "shape": "xxxx", "prefix": "i", "suffix": "dea", "length": 4, "cluster": "709", "prob": -7.855560779571533, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "else", "id": 268, "lower": "else", "norm": "else", "shape": "xxxx", "prefix": "e", "suffix": "lse", "length": 4, "cluster": "2013", "prob": -7.86043643951416, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "reason", "id": 269, "lower": "reason", "norm": "reason", "shape": "xxxx", "prefix": "r", "suffix": "son", "length": 6, "cluster": "113", "prob": -7.867291450500488, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Well", "id": 270, "lower": "well", "norm": "Well", "shape": "Xxxx", "prefix": "W", "suffix": "ell", "length": 4, "cluster": "1726", "prob": -7.871857643127441, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "less", "id": 271, "lower": "less", "norm": "less", "shape": "xxxx", "prefix": "l", "suffix": "ess", "length": 4, "cluster": "5610", "prob": -7.872425079345703, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "world", "id": 272, "lower": "world", "norm": "world", "shape": "xxxx", "prefix": "w", "suffix": "rld", "length": 5, "cluster": "329", "prob": -7.8744120597839355, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "wrong", "id": 273, "lower": "wrong", "norm": "wrong", "shape": "xxxx", "prefix": "w", "suffix": "ong", "length": 5, "cluster": "4586", "prob": -7.876842021942139, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "How", "id": 274, "lower": "how", "norm": "How", "shape": "Xxx", "prefix": "H", "suffix": "How", "length": 3, "cluster": "702", "prob": -7.879385948181152, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "far", "id": 275, "lower": "far", "norm": "far", "shape": "xxx", "prefix": "f", "suffix": "far", "length": 3, "cluster": "6890", "prob": -7.8802924156188965, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "big", "id": 276, "lower": "big", "norm": "big", "shape": "xxx", "prefix": "b", "suffix": "big", "length": 3, "cluster": "135", "prob": -7.880735874176025, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "done", "id": 277, "lower": "done", "norm": "done", "shape": "xxxx", "prefix": "d", "suffix": "one", "length": 4, "cluster": "26282", "prob": -7.886453151702881, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "believe", "id": 278, "lower": "believe", "norm": "believe", "shape": "xxxx", "prefix": "b", "suffix": "eve", "length": 7, "cluster": "138", "prob": -7.886724948883057, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Yeah", "id": 279, "lower": "yeah", "norm": "Yeah", "shape": "Xxxx", "prefix": "Y", "suffix": "eah", "length": 4, "cluster": "1726", "prob": -7.890377044677734, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "such", "id": 280, "lower": "such", "norm": "such", "shape": "xxxx", "prefix": "s", "suffix": "uch", "length": 4, "cluster": "111", "prob": -7.894707679748535, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "stuff", "id": 281, "lower": "stuff", "norm": "stuff", "shape": "xxxx", "prefix": "s", "suffix": "uff", "length": 5, "cluster": "6853", "prob": -7.898244380950928, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "away", "id": 282, "lower": "away", "norm": "away", "shape": "xxxx", "prefix": "a", "suffix": "way", "length": 4, "cluster": "3434", "prob": -7.9017462730407715, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "nothing", "id": 283, "lower": "nothing", "norm": "nothing", "shape": "xxxx", "prefix": "n", "suffix": "ing", "length": 7, "cluster": "14314", "prob": -7.909971714019775, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "tell", "id": 284, "lower": "tell", "norm": "tell", "shape": "xxxx", "prefix": "t", "suffix": "ell", "length": 4, "cluster": "1546", "prob": -7.910365581512451, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "looking", "id": 285, "lower": "looking", "norm": "looking", "shape": "xxxx", "prefix": "l", "suffix": "ing", "length": 7, "cluster": "1066", "prob": -7.911639213562012, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "start", "id": 286, "lower": "start", "norm": "start", "shape": "xxxx", "prefix": "s", "suffix": "art", "length": 5, "cluster": "3978", "prob": -7.923925876617432, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "using", "id": 287, "lower": "using", "norm": "using", "shape": "xxxx", "prefix": "u", "suffix": "ing", "length": 5, "cluster": "7146", "prob": -7.938363075256348, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "able", "id": 288, "lower": "able", "norm": "able", "shape": "xxxx", "prefix": "a", "suffix": "ble", "length": 4, "cluster": "6186", "prob": -7.939544677734375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "place", "id": 289, "lower": "place", "norm": "place", "shape": "xxxx", "prefix": "p", "suffix": "ace", "length": 5, "cluster": "6245", "prob": -7.954748153686523, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "high", "id": 290, "lower": "high", "norm": "high", "shape": "xxxx", "prefix": "h", "suffix": "igh", "length": 4, "cluster": "167", "prob": -7.963760852813721, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "until", "id": 291, "lower": "until", "norm": "until", "shape": "xxxx", "prefix": "u", "suffix": "til", "length": 5, "cluster": "2516", "prob": -7.964784622192383, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "either", "id": 292, "lower": "either", "norm": "either", "shape": "xxxx", "prefix": "e", "suffix": "her", "length": 6, "cluster": "30698", "prob": -7.965897560119629, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "seen", "id": 293, "lower": "seen", "norm": "seen", "shape": "xxxx", "prefix": "s", "suffix": "een", "length": 4, "cluster": "26282", "prob": -7.97322416305542, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "times", "id": 294, "lower": "times", "norm": "times", "shape": "xxxx", "prefix": "t", "suffix": "mes", "length": 5, "cluster": "61", "prob": -7.9734907150268555, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "real", "id": 295, "lower": "real", "norm": "real", "shape": "xxxx", "prefix": "r", "suffix": "eal", "length": 4, "cluster": "503", "prob": -7.981620788574219, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "When", "id": 296, "lower": "when", "norm": "When", "shape": "Xxxx", "prefix": "W", "suffix": "hen", "length": 4, "cluster": "190", "prob": -7.982150554656982, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "making", "id": 297, "lower": "making", "norm": "making", "shape": "xxxx", "prefix": "m", "suffix": "ing", "length": 6, "cluster": "7146", "prob": -7.985988616943359, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "seems", "id": 298, "lower": "seems", "norm": "seems", "shape": "xxxx", "prefix": "s", "suffix": "ems", "length": 5, "cluster": "16298", "prob": -7.989145278930664, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "fuck", "id": 299, "lower": "fuck", "norm": "fuck", "shape": "xxxx", "prefix": "f", "suffix": "uck", "length": 4, "cluster": "0", "prob": -7.992913246154785, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "fucking", "id": 300, "lower": "fucking", "norm": "fucking", "shape": "xxxx", "prefix": "f", "suffix": "ing", "length": 7, "cluster": "0", "prob": -7.993165969848633, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "\n\n\n", "id": 0, "lower": "\n\n\n", "norm": "\n\n\n", "shape": "\n\n\n", "prefix": "\n", "suffix": "\n\n\n", "length": 3, "cluster": "0", "prob": -7.996075630187988, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "next", "id": 301, "lower": "next", "norm": "next", "shape": "xxxx", "prefix": "n", "suffix": "ext", "length": 4, "cluster": "255", "prob": -7.996739864349365, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "anyone", "id": 302, "lower": "anyone", "norm": "anyone", "shape": "xxxx", "prefix": "a", "suffix": "one", "length": 6, "cluster": "30698", "prob": -7.997350215911865, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "#", "id": 303, "lower": "#", "norm": "#", "shape": "#", "prefix": "#", "suffix": "#", "length": 1, "cluster": "18", "prob": -8.001263618469238, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "looks", "id": 304, "lower": "looks", "norm": "looks", "shape": "xxxx", "prefix": "l", "suffix": "oks", "length": 5, "cluster": "2442", "prob": -8.001678466796875, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "everything", "id": 305, "lower": "everything", "norm": "everything", "shape": "xxxx", "prefix": "e", "suffix": "ing", "length": 10, "cluster": "14314", "prob": -8.00584602355957, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Oh", "id": 306, "lower": "oh", "norm": "Oh", "shape": "Xx", "prefix": "O", "suffix": "Oh", "length": 2, "cluster": "1726", "prob": -8.007224082946777, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "nice", "id": 307, "lower": "nice", "norm": "nice", "shape": "xxxx", "prefix": "n", "suffix": "ice", "length": 4, "cluster": "551", "prob": -8.009806632995605, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "once", "id": 308, "lower": "once", "norm": "once", "shape": "xxxx", "prefix": "o", "suffix": "nce", "length": 4, "cluster": "22250", "prob": -8.010163307189941, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "show", "id": 309, "lower": "show", "norm": "show", "shape": "xxxx", "prefix": "s", "suffix": "how", "length": 4, "cluster": "7690", "prob": -8.011373519897461, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "maybe", "id": 310, "lower": "maybe", "norm": "maybe", "shape": "xxxx", "prefix": "m", "suffix": "ybe", "length": 5, "cluster": "60650", "prob": -8.020626068115234, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "fact", "id": 311, "lower": "fact", "norm": "fact", "shape": "xxxx", "prefix": "f", "suffix": "act", "length": 4, "cluster": "369", "prob": -8.032754898071289, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "wo", "id": 312, "lower": "wo", "norm": "wo", "shape": "xx", "prefix": "w", "suffix": "wo", "length": 2, "cluster": "26", "prob": -8.0400972366333, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "5", "id": 313, "lower": "5", "norm": "5", "shape": "d", "prefix": "5", "suffix": "5", "length": 1, "cluster": "818", "prob": -8.040534019470215, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "free", "id": 314, "lower": "free", "norm": "free", "shape": "xxxx", "prefix": "f", "suffix": "ree", "length": 4, "cluster": "6634", "prob": -8.0440092086792, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "understand", "id": 315, "lower": "understand", "norm": "understand", "shape": "xxxx", "prefix": "u", "suffix": "and", "length": 10, "cluster": "3722", "prob": -8.052404403686523, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "team", "id": 316, "lower": "team", "norm": "team", "shape": "xxxx", "prefix": "t", "suffix": "eam", "length": 4, "cluster": "1061", "prob": -8.053070068359375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "....", "id": 317, "lower": "....", "norm": "....", "shape": "....", "prefix": ".", "suffix": "...", "length": 4, "cluster": "1202", "prob": -8.05477523803711, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "against", "id": 318, "lower": "against", "norm": "against", "shape": "xxxx", "prefix": "a", "suffix": "nst", "length": 7, "cluster": "24572", "prob": -8.064282417297363, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "live", "id": 319, "lower": "live", "norm": "live", "shape": "xxxx", "prefix": "l", "suffix": "ive", "length": 4, "cluster": "1418", "prob": -8.065953254699707, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": " \n\n", "id": 0, "lower": " \n\n", "norm": " \n\n", "shape": " \n\n", "prefix": " ", "suffix": " \n\n", "length": 3, "cluster": "0", "prob": -8.068946838378906, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Why", "id": 320, "lower": "why", "norm": "Why", "shape": "Xxx", "prefix": "W", "suffix": "Why", "length": 3, "cluster": "702", "prob": -8.06901741027832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "whole", "id": 321, "lower": "whole", "norm": "whole", "shape": "xxxx", "prefix": "w", "suffix": "ole", "length": 5, "cluster": "71", "prob": -8.070209503173828, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "For", "id": 322, "lower": "for", "norm": "For", "shape": "Xxx", "prefix": "F", "suffix": "For", "length": 3, "cluster": "1342", "prob": -8.072200775146484, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "guys", "id": 323, "lower": "guys", "norm": "guys", "shape": "xxxx", "prefix": "g", "suffix": "uys", "length": 4, "cluster": "365", "prob": -8.075167655944824, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "job", "id": 324, "lower": "job", "norm": "job", "shape": "xxx", "prefix": "j", "suffix": "job", "length": 3, "cluster": "37", "prob": -8.082273483276367, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "etc", "id": 325, "lower": "etc", "norm": "etc", "shape": "xxx", "prefix": "e", "suffix": "etc", "length": 3, "cluster": "26", "prob": -8.087606430053711, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "4", "id": 326, "lower": "4", "norm": "4", "shape": "d", "prefix": "4", "suffix": "4", "length": 1, "cluster": "818", "prob": -8.088510513305664, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "went", "id": 327, "lower": "went", "norm": "went", "shape": "xxxx", "prefix": "w", "suffix": "ent", "length": 4, "cluster": "7338", "prob": -8.091073989868164, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "school", "id": 328, "lower": "school", "norm": "school", "shape": "xxxx", "prefix": "s", "suffix": "ool", "length": 6, "cluster": "1829", "prob": -8.096077919006348, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "guess", "id": 329, "lower": "guess", "norm": "guess", "shape": "xxxx", "prefix": "g", "suffix": "ess", "length": 5, "cluster": "650", "prob": -8.097951889038086, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "friends", "id": 330, "lower": "friends", "norm": "friends", "shape": "xxxx", "prefix": "f", "suffix": "nds", "length": 7, "cluster": "3565", "prob": -8.10158634185791, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "between", "id": 331, "lower": "between", "norm": "between", "shape": "xxxx", "prefix": "b", "suffix": "een", "length": 7, "cluster": "12284", "prob": -8.106386184692383, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "case", "id": 332, "lower": "case", "norm": "case", "shape": "xxxx", "prefix": "c", "suffix": "ase", "length": 4, "cluster": "3269", "prob": -8.106882095336914, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "She", "id": 333, "lower": "she", "norm": "She", "shape": "Xxx", "prefix": "S", "suffix": "She", "length": 3, "cluster": "126", "prob": -8.119241714477539, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "each", "id": 334, "lower": "each", "norm": "each", "shape": "xxxx", "prefix": "e", "suffix": "ach", "length": 4, "cluster": "32746", "prob": -8.123948097229004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "fun", "id": 335, "lower": "fun", "norm": "fun", "shape": "xxx", "prefix": "f", "suffix": "fun", "length": 3, "cluster": "16229", "prob": -8.124406814575195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "agree", "id": 336, "lower": "agree", "norm": "agree", "shape": "xxxx", "prefix": "a", "suffix": "ree", "length": 5, "cluster": "394", "prob": -8.12778091430664, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Is", "id": 337, "lower": "is", "norm": "Is", "shape": "Xx", "prefix": "I", "suffix": "Is", "length": 2, "cluster": "1214", "prob": -8.129456520080566, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "buy", "id": 338, "lower": "buy", "norm": "buy", "shape": "xxx", "prefix": "b", "suffix": "buy", "length": 3, "cluster": "2826", "prob": -8.142950057983398, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Yes", "id": 339, "lower": "yes", "norm": "Yes", "shape": "Xxx", "prefix": "Y", "suffix": "Yes", "length": 3, "cluster": "1726", "prob": -8.147512435913086, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "run", "id": 340, "lower": "run", "norm": "run", "shape": "xxx", "prefix": "r", "suffix": "run", "length": 3, "cluster": "437", "prob": -8.156776428222656, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "change", "id": 341, "lower": "change", "norm": "change", "shape": "xxxx", "prefix": "c", "suffix": "nge", "length": 6, "cluster": "2997", "prob": -8.157740592956543, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "found", "id": 342, "lower": "found", "norm": "found", "shape": "xxxx", "prefix": "f", "suffix": "und", "length": 5, "cluster": "13738", "prob": -8.182107925415039, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "question", "id": 343, "lower": "question", "norm": "question", "shape": "xxxx", "prefix": "q", "suffix": "ion", "length": 8, "cluster": "709", "prob": -8.185464859008789, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "top", "id": 344, "lower": "top", "norm": "top", "shape": "xxx", "prefix": "t", "suffix": "top", "length": 3, "cluster": "1479", "prob": -8.191086769104004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "playing", "id": 345, "lower": "playing", "norm": "playing", "shape": "xxxx", "prefix": "p", "suffix": "ing", "length": 7, "cluster": "11242", "prob": -8.191595077514648, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "name", "id": 346, "lower": "name", "norm": "name", "shape": "xxxx", "prefix": "n", "suffix": "ame", "length": 4, "cluster": "4021", "prob": -8.19616985321045, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "mind", "id": 347, "lower": "mind", "norm": "mind", "shape": "xxxx", "prefix": "m", "suffix": "ind", "length": 4, "cluster": "1893", "prob": -8.197138786315918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "myself", "id": 348, "lower": "myself", "norm": "myself", "shape": "xxxx", "prefix": "m", "suffix": "elf", "length": 6, "cluster": "8042", "prob": -8.200143814086914, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "gets", "id": 349, "lower": "gets", "norm": "gets", "shape": "xxxx", "prefix": "g", "suffix": "ets", "length": 4, "cluster": "10666", "prob": -8.202808380126953, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "ago", "id": 350, "lower": "ago", "norm": "ago", "shape": "xxx", "prefix": "a", "suffix": "ago", "length": 3, "cluster": "6442", "prob": -8.206598281860352, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "friend", "id": 351, "lower": "friend", "norm": "friend", "shape": "xxxx", "prefix": "f", "suffix": "end", "length": 6, "cluster": "1061", "prob": -8.210515975952148, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "talking", "id": 352, "lower": "talking", "norm": "talking", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 7, "cluster": "4586", "prob": -8.22729778289795, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "days", "id": 353, "lower": "days", "norm": "days", "shape": "xxxx", "prefix": "d", "suffix": "ays", "length": 4, "cluster": "317", "prob": -8.227437973022461, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "yet", "id": 354, "lower": "yet", "norm": "yet", "shape": "xxx", "prefix": "y", "suffix": "yet", "length": 3, "cluster": "32490", "prob": -8.229137420654297, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "means", "id": 355, "lower": "means", "norm": "means", "shape": "xxxx", "prefix": "m", "suffix": "ans", "length": 5, "cluster": "31146", "prob": -8.234617233276367, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "hope", "id": 356, "lower": "hope", "norm": "hope", "shape": "xxxx", "prefix": "h", "suffix": "ope", "length": 4, "cluster": "650", "prob": -8.236272811889648, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "almost", "id": 357, "lower": "almost", "norm": "almost", "shape": "xxxx", "prefix": "a", "suffix": "ost", "length": 6, "cluster": "7402", "prob": -8.236738204956055, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "yourself", "id": 358, "lower": "yourself", "norm": "yourself", "shape": "xxxx", "prefix": "y", "suffix": "elf", "length": 8, "cluster": "8042", "prob": -8.2402982711792, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "awesome", "id": 359, "lower": "awesome", "norm": "awesome", "shape": "xxxx", "prefix": "a", "suffix": "ome", "length": 7, "cluster": "871", "prob": -8.247021675109863, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "care", "id": 360, "lower": "care", "norm": "care", "shape": "xxxx", "prefix": "c", "suffix": "are", "length": 4, "cluster": "1229", "prob": -8.248679161071777, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "quite", "id": 361, "lower": "quite", "norm": "quite", "shape": "xxxx", "prefix": "q", "suffix": "ite", "length": 5, "cluster": "15338", "prob": -8.254060745239258, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "10", "id": 362, "lower": "10", "norm": "10", "shape": "dd", "prefix": "1", "suffix": "10", "length": 2, "cluster": "1970", "prob": -8.258377075195312, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "true", "id": 363, "lower": "true", "norm": "true", "shape": "xxxx", "prefix": "t", "suffix": "rue", "length": 4, "cluster": "4586", "prob": -8.259368896484375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "remember", "id": 364, "lower": "remember", "norm": "remember", "shape": "xxxx", "prefix": "r", "suffix": "ber", "length": 8, "cluster": "3722", "prob": -8.259916305541992, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "definitely", "id": 365, "lower": "definitely", "norm": "definitely", "shape": "xxxx", "prefix": "d", "suffix": "ely", "length": 10, "cluster": "7802", "prob": -8.264209747314453, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "call", "id": 366, "lower": "call", "norm": "call", "shape": "xxxx", "prefix": "c", "suffix": "all", "length": 4, "cluster": "3765", "prob": -8.267317771911621, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "pay", "id": 367, "lower": "pay", "norm": "pay", "shape": "xxx", "prefix": "p", "suffix": "pay", "length": 3, "cluster": "7946", "prob": -8.26932144165039, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "stop", "id": 368, "lower": "stop", "norm": "stop", "shape": "xxxx", "prefix": "s", "suffix": "top", "length": 4, "cluster": "3338", "prob": -8.272970199584961, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "set", "id": 369, "lower": "set", "norm": "set", "shape": "xxx", "prefix": "s", "suffix": "set", "length": 3, "cluster": "2218", "prob": -8.285635948181152, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "started", "id": 370, "lower": "started", "norm": "started", "shape": "xxxx", "prefix": "s", "suffix": "ted", "length": 7, "cluster": "3242", "prob": -8.286487579345703, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "instead", "id": 371, "lower": "instead", "norm": "instead", "shape": "xxxx", "prefix": "i", "suffix": "ead", "length": 7, "cluster": "2005", "prob": -8.292781829833984, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "story", "id": 372, "lower": "story", "norm": "story", "shape": "xxxx", "prefix": "s", "suffix": "ory", "length": 5, "cluster": "6853", "prob": -8.293317794799805, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "level", "id": 373, "lower": "level", "norm": "level", "shape": "xxxx", "prefix": "l", "suffix": "vel", "length": 5, "cluster": "6117", "prob": -8.29642391204834, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "left", "id": 374, "lower": "left", "norm": "left", "shape": "xxxx", "prefix": "l", "suffix": "eft", "length": 4, "cluster": "54954", "prob": -8.296669006347656, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "week", "id": 375, "lower": "week", "norm": "week", "shape": "xxxx", "prefix": "w", "suffix": "eek", "length": 4, "cluster": "157", "prob": -8.300933837890625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "system", "id": 376, "lower": "system", "norm": "system", "shape": "xxxx", "prefix": "s", "suffix": "tem", "length": 6, "cluster": "4901", "prob": -8.303738594055176, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "full", "id": 377, "lower": "full", "norm": "full", "shape": "xxxx", "prefix": "f", "suffix": "ull", "length": 4, "cluster": "4071", "prob": -8.303950309753418, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "rather", "id": 378, "lower": "rather", "norm": "rather", "shape": "xxxx", "prefix": "r", "suffix": "her", "length": 6, "cluster": "6698", "prob": -8.312031745910645, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "video", "id": 379, "lower": "video", "norm": "video", "shape": "xxxx", "prefix": "v", "suffix": "deo", "length": 5, "cluster": "1975", "prob": -8.316000938415527, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "home", "id": 380, "lower": "home", "norm": "home", "shape": "xxxx", "prefix": "h", "suffix": "ome", "length": 4, "cluster": "1013", "prob": -8.316133499145508, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "women", "id": 381, "lower": "women", "norm": "women", "shape": "xxxx", "prefix": "w", "suffix": "men", "length": 5, "cluster": "877", "prob": -8.317564964294434, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "usually", "id": 382, "lower": "usually", "norm": "usually", "shape": "xxxx", "prefix": "u", "suffix": "lly", "length": 7, "cluster": "3706", "prob": -8.324220657348633, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "side", "id": 383, "lower": "side", "norm": "side", "shape": "xxxx", "prefix": "s", "suffix": "ide", "length": 4, "cluster": "8037", "prob": -8.327798843383789, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "wanted", "id": 384, "lower": "wanted", "norm": "wanted", "shape": "xxxx", "prefix": "w", "suffix": "ted", "length": 6, "cluster": "30634", "prob": -8.329934120178223, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "sense", "id": 385, "lower": "sense", "norm": "sense", "shape": "xxxx", "prefix": "s", "suffix": "nse", "length": 5, "cluster": "613", "prob": -8.338400840759277, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Your", "id": 386, "lower": "your", "norm": "Your", "shape": "Xxxx", "prefix": "Y", "suffix": "our", "length": 4, "cluster": "94", "prob": -8.347208023071289, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "second", "id": 387, "lower": "second", "norm": "second", "shape": "xxxx", "prefix": "s", "suffix": "ond", "length": 6, "cluster": "31", "prob": -8.351142883300781, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "comment", "id": 388, "lower": "comment", "norm": "comment", "shape": "xxxx", "prefix": "c", "suffix": "ent", "length": 7, "cluster": "757", "prob": -8.35578727722168, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "course", "id": 389, "lower": "course", "norm": "course", "shape": "xxxx", "prefix": "c", "suffix": "rse", "length": 6, "cluster": "1009", "prob": -8.35777759552002, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "ask", "id": 390, "lower": "ask", "norm": "ask", "shape": "xxx", "prefix": "a", "suffix": "ask", "length": 3, "cluster": "1546", "prob": -8.35922622680664, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Or", "id": 391, "lower": "or", "norm": "Or", "shape": "Xx", "prefix": "O", "suffix": "Or", "length": 2, "cluster": "1726", "prob": -8.361105918884277, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "seem", "id": 392, "lower": "seem", "norm": "seem", "shape": "xxxx", "prefix": "s", "suffix": "eem", "length": 4, "cluster": "906", "prob": -8.363061904907227, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Maybe", "id": 393, "lower": "maybe", "norm": "Maybe", "shape": "Xxxxx", "prefix": "M", "suffix": "ybe", "length": 5, "cluster": "190", "prob": -8.364654541015625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "must", "id": 394, "lower": "must", "norm": "must", "shape": "xxxx", "prefix": "m", "suffix": "ust", "length": 4, "cluster": "698", "prob": -8.365957260131836, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Then", "id": 395, "lower": "then", "norm": "Then", "shape": "Xxxx", "prefix": "T", "suffix": "hen", "length": 4, "cluster": "1726", "prob": -8.369159698486328, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "small", "id": 396, "lower": "small", "norm": "small", "shape": "xxxx", "prefix": "s", "suffix": "all", "length": 5, "cluster": "391", "prob": -8.371565818786621, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "car", "id": 397, "lower": "car", "norm": "car", "shape": "xxx", "prefix": "c", "suffix": "car", "length": 3, "cluster": "1145", "prob": -8.374984741210938, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "hate", "id": 398, "lower": "hate", "norm": "hate", "shape": "xxxx", "prefix": "h", "suffix": "ate", "length": 4, "cluster": "906", "prob": -8.380099296569824, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "came", "id": 399, "lower": "came", "norm": "came", "shape": "xxxx", "prefix": "c", "suffix": "ame", "length": 4, "cluster": "15530", "prob": -8.382718086242676, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "watch", "id": 400, "lower": "watch", "norm": "watch", "shape": "xxxx", "prefix": "w", "suffix": "tch", "length": 5, "cluster": "3765", "prob": -8.386272430419922, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "experience", "id": 401, "lower": "experience", "norm": "experience", "shape": "xxxx", "prefix": "e", "suffix": "nce", "length": 10, "cluster": "2917", "prob": -8.387101173400879, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "cool", "id": 402, "lower": "cool", "norm": "cool", "shape": "xxxx", "prefix": "c", "suffix": "ool", "length": 4, "cluster": "565", "prob": -8.393746376037598, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "matter", "id": 403, "lower": "matter", "norm": "matter", "shape": "xxxx", "prefix": "m", "suffix": "ter", "length": 6, "cluster": "4805", "prob": -8.395515441894531, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "others", "id": 404, "lower": "others", "norm": "others", "shape": "xxxx", "prefix": "o", "suffix": "ers", "length": 6, "cluster": "1901", "prob": -8.396527290344238, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "completely", "id": 405, "lower": "completely", "norm": "completely", "shape": "xxxx", "prefix": "c", "suffix": "ely", "length": 10, "cluster": "12010", "prob": -8.40324592590332, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "All", "id": 406, "lower": "all", "norm": "All", "shape": "Xxx", "prefix": "A", "suffix": "All", "length": 3, "cluster": "1214", "prob": -8.403707504272461, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "called", "id": 407, "lower": "called", "norm": "called", "shape": "xxxx", "prefix": "c", "suffix": "led", "length": 6, "cluster": "11946", "prob": -8.404229164123535, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "under", "id": 408, "lower": "under", "norm": "under", "shape": "xxxx", "prefix": "u", "suffix": "der", "length": 5, "cluster": "32764", "prob": -8.406200408935547, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "yes", "id": 409, "lower": "yes", "norm": "yes", "shape": "xxx", "prefix": "y", "suffix": "yes", "length": 3, "cluster": "15146", "prob": -8.41097354888916, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Now", "id": 410, "lower": "now", "norm": "Now", "shape": "Xxx", "prefix": "N", "suffix": "Now", "length": 3, "cluster": "1726", "prob": -8.417712211608887, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Please", "id": 411, "lower": "please", "norm": "Please", "shape": "Xxxxx", "prefix": "P", "suffix": "ase", "length": 6, "cluster": "3582", "prob": -8.41897964477539, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "worth", "id": 412, "lower": "worth", "norm": "worth", "shape": "xxxx", "prefix": "w", "suffix": "rth", "length": 5, "cluster": "981", "prob": -8.423324584960938, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "says", "id": 413, "lower": "says", "norm": "says", "shape": "xxxx", "prefix": "s", "suffix": "ays", "length": 4, "cluster": "244", "prob": -8.426565170288086, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "comes", "id": 414, "lower": "comes", "norm": "comes", "shape": "xxxx", "prefix": "c", "suffix": "mes", "length": 5, "cluster": "15530", "prob": -8.428640365600586, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "fine", "id": 415, "lower": "fine", "norm": "fine", "shape": "xxxx", "prefix": "f", "suffix": "ine", "length": 4, "cluster": "8057", "prob": -8.428781509399414, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Thank", "id": 416, "lower": "thank", "norm": "Thank", "shape": "Xxxxx", "prefix": "T", "suffix": "ank", "length": 5, "cluster": "190", "prob": -8.434432983398438, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": " \n", "id": 0, "lower": " \n", "norm": " \n", "shape": " \n", "prefix": " ", "suffix": " \n", "length": 2, "cluster": "0", "prob": -8.435208320617676, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "works", "id": 417, "lower": "works", "norm": "works", "shape": "xxxx", "prefix": "w", "suffix": "rks", "length": 5, "cluster": "77", "prob": -8.436944961547852, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "exactly", "id": 418, "lower": "exactly", "norm": "exactly", "shape": "xxxx", "prefix": "e", "suffix": "tly", "length": 7, "cluster": "15338", "prob": -8.43747615814209, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "heard", "id": 419, "lower": "heard", "norm": "heard", "shape": "xxxx", "prefix": "h", "suffix": "ard", "length": 5, "cluster": "26282", "prob": -8.4396333694458, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "possible", "id": 420, "lower": "possible", "norm": "possible", "shape": "xxxx", "prefix": "p", "suffix": "ble", "length": 8, "cluster": "2535", "prob": -8.44277572631836, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "thinking", "id": 421, "lower": "thinking", "norm": "thinking", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 8, "cluster": "4586", "prob": -8.442947387695312, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "hours", "id": 422, "lower": "hours", "norm": "hours", "shape": "xxxx", "prefix": "h", "suffix": "urs", "length": 5, "cluster": "957", "prob": -8.445417404174805, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "working", "id": 423, "lower": "working", "norm": "working", "shape": "xxxx", "prefix": "w", "suffix": "ing", "length": 7, "cluster": "27626", "prob": -8.44786262512207, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "took", "id": 424, "lower": "took", "norm": "took", "shape": "xxxx", "prefix": "t", "suffix": "ook", "length": 4, "cluster": "27050", "prob": -8.452874183654785, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "thanks", "id": 425, "lower": "thanks", "norm": "thanks", "shape": "xxxx", "prefix": "t", "suffix": "nks", "length": 6, "cluster": "554", "prob": -8.457283973693848, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "head", "id": 426, "lower": "head", "norm": "head", "shape": "xxxx", "prefix": "h", "suffix": "ead", "length": 4, "cluster": "1813", "prob": -8.458500862121582, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "power", "id": 427, "lower": "power", "norm": "power", "shape": "xxxx", "prefix": "p", "suffix": "wer", "length": 5, "cluster": "11621", "prob": -8.460216522216797, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "happen", "id": 428, "lower": "happen", "norm": "happen", "shape": "xxxx", "prefix": "h", "suffix": "pen", "length": 6, "cluster": "3466", "prob": -8.465093612670898, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "goes", "id": 429, "lower": "goes", "norm": "goes", "shape": "xxxx", "prefix": "g", "suffix": "oes", "length": 4, "cluster": "7338", "prob": -8.465673446655273, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Good", "id": 430, "lower": "good", "norm": "Good", "shape": "Xxxx", "prefix": "G", "suffix": "ood", "length": 4, "cluster": "614", "prob": -8.468016624450684, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "saw", "id": 431, "lower": "saw", "norm": "saw", "shape": "xxx", "prefix": "s", "suffix": "saw", "length": 3, "cluster": "6570", "prob": -8.472514152526855, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "please", "id": 432, "lower": "please", "norm": "please", "shape": "xxxx", "prefix": "p", "suffix": "ase", "length": 6, "cluster": "309", "prob": -8.473013877868652, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "couple", "id": 433, "lower": "couple", "norm": "couple", "shape": "xxxx", "prefix": "c", "suffix": "ple", "length": 6, "cluster": "853", "prob": -8.47309398651123, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "hit", "id": 434, "lower": "hit", "norm": "hit", "shape": "xxx", "prefix": "h", "suffix": "hit", "length": 3, "cluster": "682", "prob": -8.473491668701172, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "likely", "id": 435, "lower": "likely", "norm": "likely", "shape": "xxxx", "prefix": "l", "suffix": "ely", "length": 6, "cluster": "42", "prob": -8.47359561920166, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "ones", "id": 436, "lower": "ones", "norm": "ones", "shape": "xxxx", "prefix": "o", "suffix": "nes", "length": 4, "cluster": "15821", "prob": -8.474469184875488, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "often", "id": 437, "lower": "often", "norm": "often", "shape": "xxxx", "prefix": "o", "suffix": "ten", "length": 5, "cluster": "3706", "prob": -8.476237297058105, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "talk", "id": 438, "lower": "talk", "norm": "talk", "shape": "xxxx", "prefix": "t", "suffix": "alk", "length": 4, "cluster": "394", "prob": -8.479889869689941, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "issue", "id": 439, "lower": "issue", "norm": "issue", "shape": "xxxx", "prefix": "i", "suffix": "sue", "length": 5, "cluster": "3525", "prob": -8.48391342163086, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "easy", "id": 440, "lower": "easy", "norm": "easy", "shape": "xxxx", "prefix": "e", "suffix": "asy", "length": 4, "cluster": "2538", "prob": -8.489182472229004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "One", "id": 441, "lower": "one", "norm": "One", "shape": "Xxx", "prefix": "O", "suffix": "One", "length": 3, "cluster": "350", "prob": -8.494391441345215, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "needs", "id": 442, "lower": "needs", "norm": "needs", "shape": "xxxx", "prefix": "n", "suffix": "eds", "length": 5, "cluster": "14250", "prob": -8.49528694152832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "add", "id": 443, "lower": "add", "norm": "add", "shape": "xxx", "prefix": "a", "suffix": "add", "length": 3, "cluster": "3594", "prob": -8.496837615966797, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "support", "id": 444, "lower": "support", "norm": "support", "shape": "xxxx", "prefix": "s", "suffix": "ort", "length": 7, "cluster": "7861", "prob": -8.503355026245117, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "face", "id": 445, "lower": "face", "norm": "face", "shape": "xxxx", "prefix": "f", "suffix": "ace", "length": 4, "cluster": "1685", "prob": -8.504852294921875, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "hand", "id": 446, "lower": "hand", "norm": "hand", "shape": "xxxx", "prefix": "h", "suffix": "and", "length": 4, "cluster": "8037", "prob": -8.504961967468262, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "half", "id": 447, "lower": "half", "norm": "half", "shape": "xxxx", "prefix": "h", "suffix": "alf", "length": 4, "cluster": "469", "prob": -8.508658409118652, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "check", "id": 448, "lower": "check", "norm": "check", "shape": "xxxx", "prefix": "c", "suffix": "eck", "length": 5, "cluster": "2485", "prob": -8.512067794799805, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "night", "id": 449, "lower": "night", "norm": "night", "shape": "xxxx", "prefix": "n", "suffix": "ght", "length": 5, "cluster": "93", "prob": -8.517072677612305, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "months", "id": 450, "lower": "months", "norm": "months", "shape": "xxxx", "prefix": "m", "suffix": "ths", "length": 6, "cluster": "445", "prob": -8.517988204956055, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "kids", "id": 451, "lower": "kids", "norm": "kids", "shape": "xxxx", "prefix": "k", "suffix": "ids", "length": 4, "cluster": "877", "prob": -8.520237922668457, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "players", "id": 452, "lower": "players", "norm": "players", "shape": "xxxx", "prefix": "p", "suffix": "ers", "length": 7, "cluster": "3565", "prob": -8.520515441894531, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "line", "id": 453, "lower": "line", "norm": "line", "shape": "xxxx", "prefix": "l", "suffix": "ine", "length": 4, "cluster": "3941", "prob": -8.522600173950195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "told", "id": 454, "lower": "told", "norm": "told", "shape": "xxxx", "prefix": "t", "suffix": "old", "length": 4, "cluster": "20138", "prob": -8.52303409576416, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "example", "id": 455, "lower": "example", "norm": "example", "shape": "xxxx", "prefix": "e", "suffix": "ple", "length": 7, "cluster": "497", "prob": -8.523116111755371, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "played", "id": 456, "lower": "played", "norm": "played", "shape": "xxxx", "prefix": "p", "suffix": "yed", "length": 6, "cluster": "32426", "prob": -8.528886795043945, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "reddit", "id": 457, "lower": "reddit", "norm": "reddit", "shape": "xxxx", "prefix": "r", "suffix": "dit", "length": 6, "cluster": "0", "prob": -8.52908992767334, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "based", "id": 458, "lower": "based", "norm": "based", "shape": "xxxx", "prefix": "b", "suffix": "sed", "length": 5, "cluster": "1578", "prob": -8.53032112121582, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "tried", "id": 459, "lower": "tried", "norm": "tried", "shape": "xxxx", "prefix": "t", "suffix": "ied", "length": 5, "cluster": "28586", "prob": -8.532145500183105, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "sounds", "id": 460, "lower": "sounds", "norm": "sounds", "shape": "xxxx", "prefix": "s", "suffix": "nds", "length": 6, "cluster": "2442", "prob": -8.53985595703125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "link", "id": 461, "lower": "link", "norm": "link", "shape": "xxxx", "prefix": "l", "suffix": "ink", "length": 4, "cluster": "5829", "prob": -8.540618896484375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "girl", "id": 462, "lower": "girl", "norm": "girl", "shape": "xxxx", "prefix": "g", "suffix": "irl", "length": 4, "cluster": "549", "prob": -8.542597770690918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "open", "id": 463, "lower": "open", "norm": "open", "shape": "xxxx", "prefix": "o", "suffix": "pen", "length": 4, "cluster": "1589", "prob": -8.553583145141602, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "To", "id": 464, "lower": "to", "norm": "To", "shape": "Xx", "prefix": "T", "suffix": "To", "length": 2, "cluster": "3582", "prob": -8.557126998901367, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "taking", "id": 465, "lower": "taking", "norm": "taking", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 6, "cluster": "31722", "prob": -8.55748462677002, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "happened", "id": 466, "lower": "happened", "norm": "happened", "shape": "xxxx", "prefix": "h", "suffix": "ned", "length": 8, "cluster": "5290", "prob": -8.559469223022461, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "during", "id": 467, "lower": "during", "norm": "during", "shape": "xxxx", "prefix": "d", "suffix": "ing", "length": 6, "cluster": "262140", "prob": -8.559581756591797, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "deal", "id": 468, "lower": "deal", "norm": "deal", "shape": "xxxx", "prefix": "d", "suffix": "eal", "length": 4, "cluster": "5829", "prob": -8.560197830200195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "single", "id": 469, "lower": "single", "norm": "single", "shape": "xxxx", "prefix": "s", "suffix": "gle", "length": 6, "cluster": "71", "prob": -8.571329116821289, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "family", "id": 470, "lower": "family", "norm": "family", "shape": "xxxx", "prefix": "f", "suffix": "ily", "length": 6, "cluster": "1061", "prob": -8.571907043457031, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "close", "id": 471, "lower": "close", "norm": "close", "shape": "xxxx", "prefix": "c", "suffix": "ose", "length": 5, "cluster": "53", "prob": -8.581155776977539, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "happy", "id": 472, "lower": "happy", "norm": "happy", "shape": "xxxx", "prefix": "h", "suffix": "ppy", "length": 5, "cluster": "4586", "prob": -8.581560134887695, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "move", "id": 473, "lower": "move", "norm": "move", "shape": "xxxx", "prefix": "m", "suffix": "ove", "length": 4, "cluster": "7093", "prob": -8.582797050476074, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "number", "id": 474, "lower": "number", "norm": "number", "shape": "xxxx", "prefix": "n", "suffix": "ber", "length": 6, "cluster": "341", "prob": -8.584420204162598, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "US", "id": 475, "lower": "us", "norm": "US", "shape": "XX", "prefix": "U", "suffix": "US", "length": 2, "cluster": "1642", "prob": -8.585862159729004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": true, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "water", "id": 476, "lower": "water", "norm": "water", "shape": "xxxx", "prefix": "w", "suffix": "ter", "length": 5, "cluster": "3705", "prob": -8.589462280273438, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "men", "id": 477, "lower": "men", "norm": "men", "shape": "xxx", "prefix": "m", "suffix": "men", "length": 3, "cluster": "877", "prob": -8.59007453918457, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "yeah", "id": 478, "lower": "yeah", "norm": "yeah", "shape": "xxxx", "prefix": "y", "suffix": "eah", "length": 4, "cluster": "26", "prob": -8.593489646911621, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "later", "id": 479, "lower": "later", "norm": "later", "shape": "xxxx", "prefix": "l", "suffix": "ter", "length": 5, "cluster": "5866", "prob": -8.603795051574707, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "whatever", "id": 480, "lower": "whatever", "norm": "whatever", "shape": "xxxx", "prefix": "w", "suffix": "ver", "length": 8, "cluster": "2026", "prob": -8.610091209411621, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "government", "id": 481, "lower": "government", "norm": "government", "shape": "xxxx", "prefix": "g", "suffix": "ent", "length": 10, "cluster": "297", "prob": -8.610445022583008, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "6", "id": 482, "lower": "6", "norm": "6", "shape": "d", "prefix": "6", "suffix": "6", "length": 1, "cluster": "50", "prob": -8.611133575439453, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "house", "id": 483, "lower": "house", "norm": "house", "shape": "xxxx", "prefix": "h", "suffix": "use", "length": 5, "cluster": "37", "prob": -8.613367080688477, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "similar", "id": 484, "lower": "similar", "norm": "similar", "shape": "xxxx", "prefix": "s", "suffix": "lar", "length": 7, "cluster": "295", "prob": -8.613471031188965, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "wait", "id": 485, "lower": "wait", "norm": "wait", "shape": "xxxx", "prefix": "w", "suffix": "ait", "length": 4, "cluster": "3765", "prob": -8.613734245300293, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "questions", "id": 486, "lower": "questions", "norm": "questions", "shape": "xxxx", "prefix": "q", "suffix": "ons", "length": 9, "cluster": "1165", "prob": -8.613752365112305, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "sex", "id": 487, "lower": "sex", "norm": "sex", "shape": "xxx", "prefix": "s", "suffix": "sex", "length": 3, "cluster": "633", "prob": -8.613862991333008, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "especially", "id": 488, "lower": "especially", "norm": "especially", "shape": "xxxx", "prefix": "e", "suffix": "lly", "length": 10, "cluster": "27882", "prob": -8.616527557373047, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "lol", "id": 489, "lower": "lol", "norm": "lol", "shape": "xxx", "prefix": "l", "suffix": "lol", "length": 3, "cluster": "0", "prob": -8.621257781982422, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "Because", "id": 490, "lower": "because", "norm": "Because", "shape": "Xxxxx", "prefix": "B", "suffix": "use", "length": 7, "cluster": "1214", "prob": -8.623008728027344, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} +{"orth": "God", "id": 491, "lower": "god", "norm": "God", "shape": "Xxx", "prefix": "G", "suffix": "God", "length": 3, "cluster": "422", "prob": -8.62376594543457, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} diff --git a/website/api/_annotation/_training.jade b/website/api/_annotation/_training.jade index d05bfa825..9a5e96628 100644 --- a/website/api/_annotation/_training.jade +++ b/website/api/_annotation/_training.jade @@ -1,5 +1,7 @@ //- 💫 DOCS > API > ANNOTATION > TRAINING ++h(3, "json-input") JSON input format for training + p | spaCy takes training data in JSON format. The built-in | #[+api("cli#convert") #[code convert]] command helps you convert the @@ -46,3 +48,57 @@ p | Treebank: +github("spacy", "examples/training/training-data.json", false, false, "json") + ++h(3, "vocab-jsonl") Lexical data for vocabulary + +tag-new(2) + +p + | The populate a model's vocabulary, you can use the + | #[+api("cli#vocab") #[code spacy vocab]] command and load in a + | #[+a("https://jsonlines.readthedocs.io/en/latest/") newline-delimited JSON] + | (JSONL) file containing one lexical entry per line. The first line + | defines the language and vocabulary settings. All other lines are + | expected to be JSON objects describing an individual lexeme. The lexical + | attributes will be then set as attributes on spaCy's + | #[+api("lexeme#attributes") #[code Lexeme]] object. The #[code vocab] + | command outputs a ready-to-use spaCy model with a #[code Vocab] + | containing the lexical data. + ++code("First line"). + {"lang": "en", "settings": {"oov_prob": -20.502029418945312}} + ++code("Entry structure"). + { + "orth": string, + "id": int, + "lower": string, + "norm": string, + "shape": string + "prefix": string, + "suffix": string, + "length": int, + "cluster": string, + "prob": float, + "is_alpha": bool, + "is_ascii": bool, + "is_digit": bool, + "is_lower": bool, + "is_punct": bool, + "is_space": bool, + "is_title": bool, + "is_upper": bool, + "like_url": bool, + "like_num": bool, + "like_email": bool, + "is_stop": bool, + "is_oov": bool, + "is_quote": bool, + "is_left_punct": bool, + "is_right_punct": bool + } + +p + | Here's an example of the 500 most frequent lexemes in the English + | training data: + ++github("spacy", "examples/training/vocab-data.json", false, false, "json") diff --git a/website/api/_data.json b/website/api/_data.json index 0be09b782..886404c99 100644 --- a/website/api/_data.json +++ b/website/api/_data.json @@ -220,7 +220,7 @@ "Lemmatization": "lemmatization", "Dependencies": "dependency-parsing", "Named Entities": "named-entities", - "Training Data": "training" + "Models & Training": "training" } } } diff --git a/website/api/annotation.jade b/website/api/annotation.jade index c65cd3983..16598371d 100644 --- a/website/api/annotation.jade +++ b/website/api/annotation.jade @@ -99,6 +99,6 @@ p This document describes the target annotations spaCy is trained to predict. include _annotation/_biluo +section("training") - +h(2, "json-input") JSON input format for training + +h(2, "training") Models and training data include _annotation/_training From f02b0af821ab7f82dbc4cf42e4f2ed0d273d230a Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 19:44:35 +0100 Subject: [PATCH 28/32] Fix path and use smaller example size 500 was too larger and caused laggy rendering --- examples/training/vocab-data.jsonl | 399 ------------------------- website/api/_annotation/_training.jade | 4 +- 2 files changed, 2 insertions(+), 401 deletions(-) diff --git a/examples/training/vocab-data.jsonl b/examples/training/vocab-data.jsonl index 4fae8fd65..3fdf5eede 100644 --- a/examples/training/vocab-data.jsonl +++ b/examples/training/vocab-data.jsonl @@ -99,402 +99,3 @@ {"orth": ";", "id": 95, "lower": ";", "norm": ";", "shape": ";", "prefix": ";", "suffix": ";", "length": 1, "cluster": "36", "prob": -6.586422920227051, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} {"orth": "'ve", "id": 96, "lower": "'ve", "norm": "'ve", "shape": "'xx", "prefix": "'", "suffix": "'ve", "length": 3, "cluster": "1018", "prob": -6.593011379241943, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} {"orth": "could", "id": 97, "lower": "could", "norm": "could", "shape": "xxxx", "prefix": "c", "suffix": "uld", "length": 5, "cluster": "954", "prob": -6.595959186553955, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "then", "id": 98, "lower": "then", "norm": "then", "shape": "xxxx", "prefix": "t", "suffix": "hen", "length": 4, "cluster": "9962", "prob": -6.598200798034668, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "other", "id": 99, "lower": "other", "norm": "other", "shape": "xxxx", "prefix": "o", "suffix": "her", "length": 5, "cluster": "47", "prob": -6.6438727378845215, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "make", "id": 100, "lower": "make", "norm": "make", "shape": "xxxx", "prefix": "m", "suffix": "ake", "length": 4, "cluster": "4618", "prob": -6.66980504989624, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "been", "id": 101, "lower": "been", "norm": "been", "shape": "xxxx", "prefix": "b", "suffix": "een", "length": 4, "cluster": "202", "prob": -6.670916557312012, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "were", "id": 102, "lower": "were", "norm": "were", "shape": "xxxx", "prefix": "w", "suffix": "ere", "length": 4, "cluster": "506", "prob": -6.673174858093262, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "see", "id": 103, "lower": "see", "norm": "see", "shape": "xxx", "prefix": "s", "suffix": "see", "length": 3, "cluster": "1546", "prob": -6.6828837394714355, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "That", "id": 104, "lower": "that", "norm": "That", "shape": "Xxxx", "prefix": "T", "suffix": "hat", "length": 4, "cluster": "1406", "prob": -6.688080310821533, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "i", "id": 105, "lower": "i", "norm": "i", "shape": "x", "prefix": "i", "suffix": "i", "length": 1, "cluster": "966", "prob": -6.6887712478637695, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "any", "id": 106, "lower": "any", "norm": "any", "shape": "xxx", "prefix": "a", "suffix": "any", "length": 3, "cluster": "12266", "prob": -6.689523220062256, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "want", "id": 107, "lower": "want", "norm": "want", "shape": "xxxx", "prefix": "w", "suffix": "ant", "length": 4, "cluster": "906", "prob": -6.694204807281494, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "even", "id": 108, "lower": "even", "norm": "even", "shape": "xxxx", "prefix": "e", "suffix": "ven", "length": 4, "cluster": "3306", "prob": -6.702912330627441, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "should", "id": 109, "lower": "should", "norm": "should", "shape": "xxxx", "prefix": "s", "suffix": "uld", "length": 6, "cluster": "698", "prob": -6.733259677886963, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "way", "id": 110, "lower": "way", "norm": "way", "shape": "xxx", "prefix": "w", "suffix": "way", "length": 3, "cluster": "1349", "prob": -6.73627233505249, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "'", "id": 111, "lower": "'", "norm": "'", "shape": "'", "prefix": "'", "suffix": "'", "length": 1, "cluster": "916", "prob": -6.73720121383667, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": true, "is_left_punct": true, "is_right_punct": true} -{"orth": "too", "id": 112, "lower": "too", "norm": "too", "shape": "xxx", "prefix": "t", "suffix": "too", "length": 3, "cluster": "6378", "prob": -6.77581787109375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "go", "id": 113, "lower": "go", "norm": "go", "shape": "xx", "prefix": "g", "suffix": "go", "length": 2, "cluster": "3466", "prob": -6.775965213775635, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "him", "id": 114, "lower": "him", "norm": "him", "shape": "xxx", "prefix": "h", "suffix": "him", "length": 3, "cluster": "1898", "prob": -6.783067226409912, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "This", "id": 115, "lower": "this", "norm": "This", "shape": "Xxxx", "prefix": "T", "suffix": "his", "length": 4, "cluster": "382", "prob": -6.78391695022583, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "her", "id": 116, "lower": "her", "norm": "her", "shape": "xxx", "prefix": "h", "suffix": "her", "length": 3, "cluster": "507", "prob": -6.798486709594727, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "going", "id": 117, "lower": "going", "norm": "going", "shape": "xxxx", "prefix": "g", "suffix": "ing", "length": 5, "cluster": "2090", "prob": -6.833367824554443, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "now", "id": 118, "lower": "now", "norm": "now", "shape": "xxx", "prefix": "n", "suffix": "now", "length": 3, "cluster": "1770", "prob": -6.834407329559326, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "being", "id": 119, "lower": "being", "norm": "being", "shape": "xxxx", "prefix": "b", "suffix": "ing", "length": 5, "cluster": "3818", "prob": -6.845808029174805, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "still", "id": 120, "lower": "still", "norm": "still", "shape": "xxxx", "prefix": "s", "suffix": "ill", "length": 5, "cluster": "1658", "prob": -6.867525100708008, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "into", "id": 121, "lower": "into", "norm": "into", "shape": "xxxx", "prefix": "i", "suffix": "nto", "length": 4, "cluster": "8188", "prob": -6.87359094619751, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "which", "id": 122, "lower": "which", "norm": "which", "shape": "xxxx", "prefix": "w", "suffix": "ich", "length": 5, "cluster": "154", "prob": -6.877470970153809, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "something", "id": 123, "lower": "something", "norm": "something", "shape": "xxxx", "prefix": "s", "suffix": "ing", "length": 9, "cluster": "14314", "prob": -6.887354850769043, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "she", "id": 124, "lower": "she", "norm": "she", "shape": "xxx", "prefix": "s", "suffix": "she", "length": 3, "cluster": "218", "prob": -6.90155553817749, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "also", "id": 125, "lower": "also", "norm": "also", "shape": "xxxx", "prefix": "a", "suffix": "lso", "length": 4, "cluster": "122", "prob": -6.928974151611328, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "very", "id": 126, "lower": "very", "norm": "very", "shape": "xxxx", "prefix": "v", "suffix": "ery", "length": 4, "cluster": "234", "prob": -6.93242883682251, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "right", "id": 127, "lower": "right", "norm": "right", "shape": "xxxx", "prefix": "r", "suffix": "ght", "length": 5, "cluster": "14122", "prob": -6.933711051940918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "game", "id": 128, "lower": "game", "norm": "game", "shape": "xxxx", "prefix": "g", "suffix": "ame", "length": 4, "cluster": "7973", "prob": -6.940612316131592, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "say", "id": 129, "lower": "say", "norm": "say", "shape": "xxx", "prefix": "s", "suffix": "say", "length": 3, "cluster": "1162", "prob": -6.950479984283447, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "'ll", "id": 130, "lower": "'ll", "norm": "'ll", "shape": "'xx", "prefix": "'", "suffix": "'ll", "length": 3, "cluster": "5114", "prob": -6.958071231842041, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "got", "id": 131, "lower": "got", "norm": "got", "shape": "xxx", "prefix": "g", "suffix": "got", "length": 3, "cluster": "10666", "prob": -6.98855447769165, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "well", "id": 132, "lower": "well", "norm": "well", "shape": "xxxx", "prefix": "w", "suffix": "ell", "length": 4, "cluster": "746", "prob": -6.995903968811035, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "need", "id": 133, "lower": "need", "norm": "need", "shape": "xxxx", "prefix": "n", "suffix": "eed", "length": 4, "cluster": "2954", "prob": -7.008103370666504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "And", "id": 134, "lower": "and", "norm": "And", "shape": "Xxx", "prefix": "A", "suffix": "And", "length": 3, "cluster": "1470", "prob": -7.012199401855469, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "But", "id": 135, "lower": "but", "norm": "But", "shape": "Xxx", "prefix": "B", "suffix": "But", "length": 3, "cluster": "1470", "prob": -7.0142974853515625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "over", "id": 136, "lower": "over", "norm": "over", "shape": "xxxx", "prefix": "o", "suffix": "ver", "length": 4, "cluster": "49148", "prob": -7.027544975280762, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "back", "id": 137, "lower": "back", "norm": "back", "shape": "xxxx", "prefix": "b", "suffix": "ack", "length": 4, "cluster": "7530", "prob": -7.033305644989014, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "same", "id": 138, "lower": "same", "norm": "same", "shape": "xxxx", "prefix": "s", "suffix": "ame", "length": 4, "cluster": "991", "prob": -7.053191661834717, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "thing", "id": 139, "lower": "thing", "norm": "thing", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 5, "cluster": "2013", "prob": -7.063167572021484, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "first", "id": 140, "lower": "first", "norm": "first", "shape": "xxxx", "prefix": "f", "suffix": "rst", "length": 5, "cluster": "159", "prob": -7.063716888427734, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "most", "id": 141, "lower": "most", "norm": "most", "shape": "xxxx", "prefix": "m", "suffix": "ost", "length": 4, "cluster": "175", "prob": -7.0663957595825195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "here", "id": 142, "lower": "here", "norm": "here", "shape": "xxxx", "prefix": "h", "suffix": "ere", "length": 4, "cluster": "3946", "prob": -7.0680251121521, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "ca", "id": 143, "lower": "ca", "norm": "ca", "shape": "xx", "prefix": "c", "suffix": "ca", "length": 2, "cluster": "0", "prob": -7.071251392364502, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "off", "id": 144, "lower": "off", "norm": "off", "shape": "xxx", "prefix": "o", "suffix": "off", "length": 3, "cluster": "6506", "prob": -7.073742389678955, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "'d", "id": 145, "lower": "'d", "norm": "'d", "shape": "'x", "prefix": "'", "suffix": "'d", "length": 2, "cluster": "5114", "prob": -7.075286865234375, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "They", "id": 146, "lower": "they", "norm": "They", "shape": "Xxxx", "prefix": "T", "suffix": "hey", "length": 4, "cluster": "1882", "prob": -7.0789008140563965, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "work", "id": 147, "lower": "work", "norm": "work", "shape": "xxxx", "prefix": "w", "suffix": "ork", "length": 4, "cluster": "1973", "prob": -7.081293106079102, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "use", "id": 148, "lower": "use", "norm": "use", "shape": "xxx", "prefix": "u", "suffix": "use", "length": 3, "cluster": "2741", "prob": -7.083596229553223, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "never", "id": 149, "lower": "never", "norm": "never", "shape": "xxxx", "prefix": "n", "suffix": "ver", "length": 5, "cluster": "15994", "prob": -7.084620475769043, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "better", "id": 150, "lower": "better", "norm": "better", "shape": "xxxx", "prefix": "b", "suffix": "ter", "length": 6, "cluster": "7658", "prob": -7.1072587966918945, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "though", "id": 151, "lower": "though", "norm": "though", "shape": "xxxx", "prefix": "t", "suffix": "ugh", "length": 6, "cluster": "2004", "prob": -7.113335132598877, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "lot", "id": 152, "lower": "lot", "norm": "lot", "shape": "xxx", "prefix": "l", "suffix": "lot", "length": 3, "cluster": "853", "prob": -7.113600254058838, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "pretty", "id": 153, "lower": "pretty", "norm": "pretty", "shape": "xxxx", "prefix": "p", "suffix": "tty", "length": 6, "cluster": "234", "prob": -7.1256103515625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "where", "id": 154, "lower": "where", "norm": "where", "shape": "xxxx", "prefix": "w", "suffix": "ere", "length": 5, "cluster": "8148", "prob": -7.146170139312744, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "am", "id": 155, "lower": "am", "norm": "am", "shape": "xx", "prefix": "a", "suffix": "am", "length": 2, "cluster": "3066", "prob": -7.149725437164307, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "things", "id": 156, "lower": "things", "norm": "things", "shape": "xxxx", "prefix": "t", "suffix": "ngs", "length": 6, "cluster": "3917", "prob": -7.154941082000732, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "sure", "id": 157, "lower": "sure", "norm": "sure", "shape": "xxxx", "prefix": "s", "suffix": "ure", "length": 4, "cluster": "490", "prob": -7.157395839691162, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "actually", "id": 158, "lower": "actually", "norm": "actually", "shape": "xxxx", "prefix": "a", "suffix": "lly", "length": 8, "cluster": "7802", "prob": -7.160778045654297, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "He", "id": 159, "lower": "he", "norm": "He", "shape": "Xx", "prefix": "H", "suffix": "He", "length": 2, "cluster": "126", "prob": -7.162238121032715, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "those", "id": 160, "lower": "those", "norm": "those", "shape": "xxxx", "prefix": "t", "suffix": "ose", "length": 5, "cluster": "495", "prob": -7.169255256652832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "why", "id": 161, "lower": "why", "norm": "why", "shape": "xxx", "prefix": "w", "suffix": "why", "length": 3, "cluster": "18410", "prob": -7.178915500640869, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "So", "id": 162, "lower": "so", "norm": "So", "shape": "Xx", "prefix": "S", "suffix": "So", "length": 2, "cluster": "1726", "prob": -7.199381351470947, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "take", "id": 163, "lower": "take", "norm": "take", "shape": "xxxx", "prefix": "t", "suffix": "ake", "length": 4, "cluster": "6666", "prob": -7.209812641143799, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "down", "id": 164, "lower": "down", "norm": "down", "shape": "xxxx", "prefix": "d", "suffix": "own", "length": 4, "cluster": "2410", "prob": -7.223586082458496, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "What", "id": 165, "lower": "what", "norm": "What", "shape": "Xxxx", "prefix": "W", "suffix": "hat", "length": 4, "cluster": "702", "prob": -7.226758003234863, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "someone", "id": 166, "lower": "someone", "norm": "someone", "shape": "xxxx", "prefix": "s", "suffix": "one", "length": 7, "cluster": "30698", "prob": -7.249640464782715, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "before", "id": 167, "lower": "before", "norm": "before", "shape": "xxxx", "prefix": "b", "suffix": "ore", "length": 6, "cluster": "1492", "prob": -7.253359794616699, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "said", "id": 168, "lower": "said", "norm": "said", "shape": "xxxx", "prefix": "s", "suffix": "aid", "length": 4, "cluster": "116", "prob": -7.258025169372559, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "after", "id": 169, "lower": "after", "norm": "after", "shape": "xxxx", "prefix": "a", "suffix": "ter", "length": 5, "cluster": "3540", "prob": -7.265651702880859, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "around", "id": 170, "lower": "around", "norm": "around", "shape": "xxxx", "prefix": "a", "suffix": "und", "length": 6, "cluster": "245756", "prob": -7.313362121582031, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "its", "id": 171, "lower": "its", "norm": "its", "shape": "xxx", "prefix": "i", "suffix": "its", "length": 3, "cluster": "27", "prob": -7.321457862854004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "2", "id": 172, "lower": "2", "norm": "2", "shape": "d", "prefix": "2", "suffix": "2", "length": 1, "cluster": "818", "prob": -7.324268341064453, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "feel", "id": 173, "lower": "feel", "norm": "feel", "shape": "xxxx", "prefix": "f", "suffix": "eel", "length": 4, "cluster": "1674", "prob": -7.342533588409424, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "My", "id": 174, "lower": "my", "norm": "My", "shape": "Xx", "prefix": "M", "suffix": "My", "length": 2, "cluster": "94", "prob": -7.345071792602539, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "There", "id": 175, "lower": "there", "norm": "There", "shape": "Xxxxx", "prefix": "T", "suffix": "ere", "length": 5, "cluster": "1918", "prob": -7.347356796264648, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "look", "id": 176, "lower": "look", "norm": "look", "shape": "xxxx", "prefix": "l", "suffix": "ook", "length": 4, "cluster": "2442", "prob": -7.352481365203857, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "these", "id": 177, "lower": "these", "norm": "these", "shape": "xxxx", "prefix": "t", "suffix": "ese", "length": 5, "cluster": "1519", "prob": -7.36269474029541, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "years", "id": 178, "lower": "years", "norm": "years", "shape": "xxxx", "prefix": "y", "suffix": "ars", "length": 5, "cluster": "189", "prob": -7.368987560272217, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "love", "id": 179, "lower": "love", "norm": "love", "shape": "xxxx", "prefix": "l", "suffix": "ove", "length": 4, "cluster": "2661", "prob": -7.372685432434082, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "always", "id": 180, "lower": "always", "norm": "always", "shape": "xxxx", "prefix": "a", "suffix": "ays", "length": 6, "cluster": "15994", "prob": -7.37296724319458, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "many", "id": 181, "lower": "many", "norm": "many", "shape": "xxxx", "prefix": "m", "suffix": "any", "length": 4, "cluster": "751", "prob": -7.377613067626953, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": ">", "id": 0, "lower": ">", "norm": ">", "shape": "&xx", "prefix": "&", "suffix": ">", "length": 3, "cluster": "0", "prob": -7.38146448135376, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "A", "id": 182, "lower": "a", "norm": "A", "shape": "X", "prefix": "A", "suffix": "A", "length": 1, "cluster": "222", "prob": -7.38541841506958, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": true, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "point", "id": 183, "lower": "point", "norm": "point", "shape": "xxxx", "prefix": "p", "suffix": "int", "length": 5, "cluster": "389", "prob": -7.386973857879639, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "find", "id": 184, "lower": "find", "norm": "find", "shape": "xxxx", "prefix": "f", "suffix": "ind", "length": 4, "cluster": "5642", "prob": -7.387212753295898, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "probably", "id": 185, "lower": "probably", "norm": "probably", "shape": "xxxx", "prefix": "p", "suffix": "bly", "length": 8, "cluster": "5754", "prob": -7.395048141479492, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "new", "id": 186, "lower": "new", "norm": "new", "shape": "xxx", "prefix": "n", "suffix": "new", "length": 3, "cluster": "199", "prob": -7.398182392120361, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "made", "id": 187, "lower": "made", "norm": "made", "shape": "xxxx", "prefix": "m", "suffix": "ade", "length": 4, "cluster": "120490", "prob": -7.399899005889893, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "day", "id": 188, "lower": "day", "norm": "day", "shape": "xxx", "prefix": "d", "suffix": "day", "length": 3, "cluster": "989", "prob": -7.400947093963623, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "We", "id": 189, "lower": "we", "norm": "We", "shape": "Xx", "prefix": "W", "suffix": "We", "length": 2, "cluster": "858", "prob": -7.402578353881836, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "every", "id": 190, "lower": "every", "norm": "every", "shape": "xxxx", "prefix": "e", "suffix": "ery", "length": 5, "cluster": "61418", "prob": -7.414647579193115, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "great", "id": 191, "lower": "great", "norm": "great", "shape": "xxxx", "prefix": "g", "suffix": "eat", "length": 5, "cluster": "1831", "prob": -7.420454502105713, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "our", "id": 192, "lower": "our", "norm": "our", "shape": "xxx", "prefix": "o", "suffix": "our", "length": 3, "cluster": "59", "prob": -7.4210286140441895, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "two", "id": 193, "lower": "two", "norm": "two", "shape": "xxx", "prefix": "t", "suffix": "two", "length": 3, "cluster": "15", "prob": -7.433600425720215, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "anything", "id": 194, "lower": "anything", "norm": "anything", "shape": "xxxx", "prefix": "a", "suffix": "ing", "length": 8, "cluster": "14314", "prob": -7.439383506774902, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "while", "id": 195, "lower": "while", "norm": "while", "shape": "xxxx", "prefix": "w", "suffix": "ile", "length": 5, "cluster": "6100", "prob": -7.440170764923096, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "few", "id": 196, "lower": "few", "norm": "few", "shape": "xxx", "prefix": "f", "suffix": "few", "length": 3, "cluster": "79", "prob": -7.440912246704102, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "$", "id": 197, "lower": "$", "norm": "$", "shape": "$", "prefix": "$", "suffix": "$", "length": 1, "cluster": "18", "prob": -7.450106620788574, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "bad", "id": 198, "lower": "bad", "norm": "bad", "shape": "xxx", "prefix": "b", "suffix": "bad", "length": 3, "cluster": "551", "prob": -7.452563762664795, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "No", "id": 199, "lower": "no", "norm": "No", "shape": "Xx", "prefix": "N", "suffix": "No", "length": 2, "cluster": "94", "prob": -7.456389427185059, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "little", "id": 200, "lower": "little", "norm": "little", "shape": "xxxx", "prefix": "l", "suffix": "tle", "length": 6, "cluster": "1959", "prob": -7.480203628540039, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "might", "id": 201, "lower": "might", "norm": "might", "shape": "xxxx", "prefix": "m", "suffix": "ght", "length": 5, "cluster": "186", "prob": -7.490107536315918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "best", "id": 202, "lower": "best", "norm": "best", "shape": "xxxx", "prefix": "b", "suffix": "est", "length": 4, "cluster": "479", "prob": -7.492556571960449, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "play", "id": 203, "lower": "play", "norm": "play", "shape": "xxxx", "prefix": "p", "suffix": "lay", "length": 4, "cluster": "1717", "prob": -7.50220251083374, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "shit", "id": 204, "lower": "shit", "norm": "shit", "shape": "xxxx", "prefix": "s", "suffix": "hit", "length": 4, "cluster": "0", "prob": -7.522359371185303, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "try", "id": 205, "lower": "try", "norm": "try", "shape": "xxx", "prefix": "t", "suffix": "try", "length": 3, "cluster": "1930", "prob": -7.540920734405518, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "used", "id": 206, "lower": "used", "norm": "used", "shape": "xxxx", "prefix": "u", "suffix": "sed", "length": 4, "cluster": "15402", "prob": -7.542972087860107, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "long", "id": 207, "lower": "long", "norm": "long", "shape": "xxxx", "prefix": "l", "suffix": "ong", "length": 4, "cluster": "935", "prob": -7.544892311096191, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "doing", "id": 208, "lower": "doing", "norm": "doing", "shape": "xxxx", "prefix": "d", "suffix": "ing", "length": 5, "cluster": "15338", "prob": -7.553442478179932, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "getting", "id": 209, "lower": "getting", "norm": "getting", "shape": "xxxx", "prefix": "g", "suffix": "ing", "length": 7, "cluster": "31722", "prob": -7.564762115478516, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "post", "id": 210, "lower": "post", "norm": "post", "shape": "xxxx", "prefix": "p", "suffix": "ost", "length": 4, "cluster": "3733", "prob": -7.565684795379639, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "year", "id": 211, "lower": "year", "norm": "year", "shape": "xxxx", "prefix": "y", "suffix": "ear", "length": 4, "cluster": "29", "prob": -7.567681312561035, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Do", "id": 212, "lower": "do", "norm": "Do", "shape": "Xx", "prefix": "D", "suffix": "Do", "length": 2, "cluster": "702", "prob": -7.570033073425293, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "life", "id": 213, "lower": "life", "norm": "life", "shape": "xxxx", "prefix": "l", "suffix": "ife", "length": 4, "cluster": "1893", "prob": -7.574200630187988, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "through", "id": 214, "lower": "through", "norm": "through", "shape": "xxxx", "prefix": "t", "suffix": "ugh", "length": 7, "cluster": "65532", "prob": -7.575429439544678, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "guy", "id": 215, "lower": "guy", "norm": "guy", "shape": "xxx", "prefix": "g", "suffix": "guy", "length": 3, "cluster": "549", "prob": -7.582011699676514, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "enough", "id": 216, "lower": "enough", "norm": "enough", "shape": "xxxx", "prefix": "e", "suffix": "ugh", "length": 6, "cluster": "1834", "prob": -7.586349010467529, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "ever", "id": 217, "lower": "ever", "norm": "ever", "shape": "xxxx", "prefix": "e", "suffix": "ver", "length": 4, "cluster": "14058", "prob": -7.591183662414551, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "In", "id": 218, "lower": "in", "norm": "In", "shape": "Xx", "prefix": "I", "suffix": "In", "length": 2, "cluster": "62", "prob": -7.603263854980469, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "give", "id": 219, "lower": "give", "norm": "give", "shape": "xxxx", "prefix": "g", "suffix": "ive", "length": 4, "cluster": "522", "prob": -7.611863136291504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "mean", "id": 220, "lower": "mean", "norm": "mean", "shape": "xxxx", "prefix": "m", "suffix": "ean", "length": 4, "cluster": "3082", "prob": -7.611870765686035, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "thought", "id": 221, "lower": "thought", "norm": "thought", "shape": "xxxx", "prefix": "t", "suffix": "ght", "length": 7, "cluster": "650", "prob": -7.614910125732422, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "since", "id": 222, "lower": "since", "norm": "since", "shape": "xxxx", "prefix": "s", "suffix": "nce", "length": 5, "cluster": "468", "prob": -7.615171909332275, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "|", "id": 223, "lower": "|", "norm": "|", "shape": "|", "prefix": "|", "suffix": "|", "length": 1, "cluster": "0", "prob": -7.6297454833984375, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "different", "id": 224, "lower": "different", "norm": "different", "shape": "xxxx", "prefix": "d", "suffix": "ent", "length": 9, "cluster": "1319", "prob": -7.630640506744385, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "3", "id": 225, "lower": "3", "norm": "3", "shape": "d", "prefix": "3", "suffix": "3", "length": 1, "cluster": "818", "prob": -7.636006832122803, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "last", "id": 226, "lower": "last", "norm": "last", "shape": "xxxx", "prefix": "l", "suffix": "ast", "length": 4, "cluster": "127", "prob": -7.636077404022217, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "own", "id": 227, "lower": "own", "norm": "own", "shape": "xxx", "prefix": "o", "suffix": "own", "length": 3, "cluster": "217", "prob": -7.636797904968262, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "1", "id": 228, "lower": "1", "norm": "1", "shape": "d", "prefix": "1", "suffix": "1", "length": 1, "cluster": "306", "prob": -7.639832973480225, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "us", "id": 229, "lower": "us", "norm": "us", "shape": "xx", "prefix": "u", "suffix": "us", "length": 2, "cluster": "1898", "prob": -7.643693923950195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "%", "id": 230, "lower": "%", "norm": "%", "shape": "%", "prefix": "%", "suffix": "%", "length": 1, "cluster": "34", "prob": -7.645323753356934, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Not", "id": 231, "lower": "not", "norm": "Not", "shape": "Xxx", "prefix": "N", "suffix": "Not", "length": 3, "cluster": "1982", "prob": -7.65825080871582, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "put", "id": 232, "lower": "put", "norm": "put", "shape": "xxx", "prefix": "p", "suffix": "put", "length": 3, "cluster": "6314", "prob": -7.666473865509033, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "man", "id": 233, "lower": "man", "norm": "man", "shape": "xxx", "prefix": "m", "suffix": "man", "length": 3, "cluster": "549", "prob": -7.668745517730713, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "may", "id": 234, "lower": "may", "norm": "may", "shape": "xxx", "prefix": "m", "suffix": "may", "length": 3, "cluster": "186", "prob": -7.678494930267334, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "makes", "id": 235, "lower": "makes", "norm": "makes", "shape": "xxxx", "prefix": "m", "suffix": "kes", "length": 5, "cluster": "426", "prob": -7.684445858001709, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "money", "id": 236, "lower": "money", "norm": "money", "shape": "xxxx", "prefix": "m", "suffix": "ney", "length": 5, "cluster": "357", "prob": -7.693631172180176, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": ":)", "id": 237, "lower": ":)", "norm": ":)", "shape": ":)", "prefix": ":", "suffix": ":)", "length": 2, "cluster": "0", "prob": -7.694086074829102, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "without", "id": 238, "lower": "without", "norm": "without", "shape": "xxxx", "prefix": "w", "suffix": "out", "length": 7, "cluster": "57340", "prob": -7.694504261016846, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "bit", "id": 239, "lower": "bit", "norm": "bit", "shape": "xxx", "prefix": "b", "suffix": "bit", "length": 3, "cluster": "853", "prob": -7.721855640411377, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "person", "id": 240, "lower": "person", "norm": "person", "shape": "xxxx", "prefix": "p", "suffix": "son", "length": 6, "cluster": "549", "prob": -7.727076530456543, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Also", "id": 241, "lower": "also", "norm": "Also", "shape": "Xxxx", "prefix": "A", "suffix": "lso", "length": 4, "cluster": "254", "prob": -7.734253406524658, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "again", "id": 242, "lower": "again", "norm": "again", "shape": "xxxx", "prefix": "a", "suffix": "ain", "length": 5, "cluster": "28522", "prob": -7.7370924949646, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Just", "id": 243, "lower": "just", "norm": "Just", "shape": "Xxxx", "prefix": "J", "suffix": "ust", "length": 4, "cluster": "1982", "prob": -7.743429183959961, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "both", "id": 244, "lower": "both", "norm": "both", "shape": "xxxx", "prefix": "b", "suffix": "oth", "length": 4, "cluster": "1007", "prob": -7.750914573669434, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "help", "id": 245, "lower": "help", "norm": "help", "shape": "xxxx", "prefix": "h", "suffix": "elp", "length": 4, "cluster": "309", "prob": -7.758815288543701, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "trying", "id": 246, "lower": "trying", "norm": "trying", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 6, "cluster": "14378", "prob": -7.759474754333496, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "least", "id": 247, "lower": "least", "norm": "least", "shape": "xxxx", "prefix": "l", "suffix": "ast", "length": 5, "cluster": "3690", "prob": -7.7660088539123535, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "come", "id": 248, "lower": "come", "norm": "come", "shape": "xxxx", "prefix": "c", "suffix": "ome", "length": 4, "cluster": "7562", "prob": -7.775856971740723, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "keep", "id": 249, "lower": "keep", "norm": "keep", "shape": "xxxx", "prefix": "k", "suffix": "eep", "length": 4, "cluster": "3338", "prob": -7.778285980224609, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Thanks", "id": 250, "lower": "thanks", "norm": "Thanks", "shape": "Xxxxx", "prefix": "T", "suffix": "nks", "length": 6, "cluster": "510", "prob": -7.781467914581299, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "read", "id": 251, "lower": "read", "norm": "read", "shape": "xxxx", "prefix": "r", "suffix": "ead", "length": 4, "cluster": "6314", "prob": -7.787075042724609, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "nt", "id": 252, "lower": "nt", "norm": "nt", "shape": "xx", "prefix": "n", "suffix": "nt", "length": 2, "cluster": "3685", "prob": -7.788322925567627, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "part", "id": 253, "lower": "part", "norm": "part", "shape": "xxxx", "prefix": "p", "suffix": "art", "length": 4, "cluster": "725", "prob": -7.791079521179199, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "let", "id": 254, "lower": "let", "norm": "let", "shape": "xxx", "prefix": "l", "suffix": "let", "length": 3, "cluster": "522", "prob": -7.795135974884033, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "hard", "id": 255, "lower": "hard", "norm": "hard", "shape": "xxxx", "prefix": "h", "suffix": "ard", "length": 4, "cluster": "2538", "prob": -7.795384407043457, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "another", "id": 256, "lower": "another", "norm": "another", "shape": "xxxx", "prefix": "a", "suffix": "her", "length": 7, "cluster": "28650", "prob": -7.801506519317627, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "end", "id": 257, "lower": "end", "norm": "end", "shape": "xxx", "prefix": "e", "suffix": "end", "length": 3, "cluster": "21", "prob": -7.816553115844727, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "having", "id": 258, "lower": "having", "norm": "having", "shape": "xxxx", "prefix": "h", "suffix": "ing", "length": 6, "cluster": "130026", "prob": -7.818792819976807, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "As", "id": 259, "lower": "as", "norm": "As", "shape": "Xx", "prefix": "A", "suffix": "As", "length": 2, "cluster": "958", "prob": -7.836142539978027, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "games", "id": 260, "lower": "games", "norm": "games", "shape": "xxxx", "prefix": "g", "suffix": "mes", "length": 5, "cluster": "1485", "prob": -7.836157321929932, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "already", "id": 261, "lower": "already", "norm": "already", "shape": "xxxx", "prefix": "a", "suffix": "ady", "length": 7, "cluster": "634", "prob": -7.838688850402832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "..", "id": 0, "lower": "..", "norm": "..", "shape": "..", "prefix": ".", "suffix": "..", "length": 2, "cluster": "4906", "prob": -7.840396404266357, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "problem", "id": 262, "lower": "problem", "norm": "problem", "shape": "xxxx", "prefix": "p", "suffix": "lem", "length": 7, "cluster": "16069", "prob": -7.841479301452637, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "kind", "id": 263, "lower": "kind", "norm": "kind", "shape": "xxxx", "prefix": "k", "suffix": "ind", "length": 4, "cluster": "213", "prob": -7.844367980957031, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "old", "id": 264, "lower": "old", "norm": "old", "shape": "xxx", "prefix": "o", "suffix": "old", "length": 3, "cluster": "2346", "prob": -7.845602989196777, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "everyone", "id": 265, "lower": "everyone", "norm": "everyone", "shape": "xxxx", "prefix": "e", "suffix": "one", "length": 8, "cluster": "30698", "prob": -7.850788116455078, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "saying", "id": 266, "lower": "saying", "norm": "saying", "shape": "xxxx", "prefix": "s", "suffix": "ing", "length": 6, "cluster": "3732", "prob": -7.854340076446533, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "idea", "id": 267, "lower": "idea", "norm": "idea", "shape": "xxxx", "prefix": "i", "suffix": "dea", "length": 4, "cluster": "709", "prob": -7.855560779571533, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "else", "id": 268, "lower": "else", "norm": "else", "shape": "xxxx", "prefix": "e", "suffix": "lse", "length": 4, "cluster": "2013", "prob": -7.86043643951416, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "reason", "id": 269, "lower": "reason", "norm": "reason", "shape": "xxxx", "prefix": "r", "suffix": "son", "length": 6, "cluster": "113", "prob": -7.867291450500488, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Well", "id": 270, "lower": "well", "norm": "Well", "shape": "Xxxx", "prefix": "W", "suffix": "ell", "length": 4, "cluster": "1726", "prob": -7.871857643127441, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "less", "id": 271, "lower": "less", "norm": "less", "shape": "xxxx", "prefix": "l", "suffix": "ess", "length": 4, "cluster": "5610", "prob": -7.872425079345703, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "world", "id": 272, "lower": "world", "norm": "world", "shape": "xxxx", "prefix": "w", "suffix": "rld", "length": 5, "cluster": "329", "prob": -7.8744120597839355, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "wrong", "id": 273, "lower": "wrong", "norm": "wrong", "shape": "xxxx", "prefix": "w", "suffix": "ong", "length": 5, "cluster": "4586", "prob": -7.876842021942139, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "How", "id": 274, "lower": "how", "norm": "How", "shape": "Xxx", "prefix": "H", "suffix": "How", "length": 3, "cluster": "702", "prob": -7.879385948181152, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "far", "id": 275, "lower": "far", "norm": "far", "shape": "xxx", "prefix": "f", "suffix": "far", "length": 3, "cluster": "6890", "prob": -7.8802924156188965, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "big", "id": 276, "lower": "big", "norm": "big", "shape": "xxx", "prefix": "b", "suffix": "big", "length": 3, "cluster": "135", "prob": -7.880735874176025, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "done", "id": 277, "lower": "done", "norm": "done", "shape": "xxxx", "prefix": "d", "suffix": "one", "length": 4, "cluster": "26282", "prob": -7.886453151702881, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "believe", "id": 278, "lower": "believe", "norm": "believe", "shape": "xxxx", "prefix": "b", "suffix": "eve", "length": 7, "cluster": "138", "prob": -7.886724948883057, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Yeah", "id": 279, "lower": "yeah", "norm": "Yeah", "shape": "Xxxx", "prefix": "Y", "suffix": "eah", "length": 4, "cluster": "1726", "prob": -7.890377044677734, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "such", "id": 280, "lower": "such", "norm": "such", "shape": "xxxx", "prefix": "s", "suffix": "uch", "length": 4, "cluster": "111", "prob": -7.894707679748535, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "stuff", "id": 281, "lower": "stuff", "norm": "stuff", "shape": "xxxx", "prefix": "s", "suffix": "uff", "length": 5, "cluster": "6853", "prob": -7.898244380950928, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "away", "id": 282, "lower": "away", "norm": "away", "shape": "xxxx", "prefix": "a", "suffix": "way", "length": 4, "cluster": "3434", "prob": -7.9017462730407715, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "nothing", "id": 283, "lower": "nothing", "norm": "nothing", "shape": "xxxx", "prefix": "n", "suffix": "ing", "length": 7, "cluster": "14314", "prob": -7.909971714019775, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "tell", "id": 284, "lower": "tell", "norm": "tell", "shape": "xxxx", "prefix": "t", "suffix": "ell", "length": 4, "cluster": "1546", "prob": -7.910365581512451, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "looking", "id": 285, "lower": "looking", "norm": "looking", "shape": "xxxx", "prefix": "l", "suffix": "ing", "length": 7, "cluster": "1066", "prob": -7.911639213562012, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "start", "id": 286, "lower": "start", "norm": "start", "shape": "xxxx", "prefix": "s", "suffix": "art", "length": 5, "cluster": "3978", "prob": -7.923925876617432, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "using", "id": 287, "lower": "using", "norm": "using", "shape": "xxxx", "prefix": "u", "suffix": "ing", "length": 5, "cluster": "7146", "prob": -7.938363075256348, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "able", "id": 288, "lower": "able", "norm": "able", "shape": "xxxx", "prefix": "a", "suffix": "ble", "length": 4, "cluster": "6186", "prob": -7.939544677734375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "place", "id": 289, "lower": "place", "norm": "place", "shape": "xxxx", "prefix": "p", "suffix": "ace", "length": 5, "cluster": "6245", "prob": -7.954748153686523, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "high", "id": 290, "lower": "high", "norm": "high", "shape": "xxxx", "prefix": "h", "suffix": "igh", "length": 4, "cluster": "167", "prob": -7.963760852813721, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "until", "id": 291, "lower": "until", "norm": "until", "shape": "xxxx", "prefix": "u", "suffix": "til", "length": 5, "cluster": "2516", "prob": -7.964784622192383, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "either", "id": 292, "lower": "either", "norm": "either", "shape": "xxxx", "prefix": "e", "suffix": "her", "length": 6, "cluster": "30698", "prob": -7.965897560119629, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "seen", "id": 293, "lower": "seen", "norm": "seen", "shape": "xxxx", "prefix": "s", "suffix": "een", "length": 4, "cluster": "26282", "prob": -7.97322416305542, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "times", "id": 294, "lower": "times", "norm": "times", "shape": "xxxx", "prefix": "t", "suffix": "mes", "length": 5, "cluster": "61", "prob": -7.9734907150268555, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "real", "id": 295, "lower": "real", "norm": "real", "shape": "xxxx", "prefix": "r", "suffix": "eal", "length": 4, "cluster": "503", "prob": -7.981620788574219, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "When", "id": 296, "lower": "when", "norm": "When", "shape": "Xxxx", "prefix": "W", "suffix": "hen", "length": 4, "cluster": "190", "prob": -7.982150554656982, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "making", "id": 297, "lower": "making", "norm": "making", "shape": "xxxx", "prefix": "m", "suffix": "ing", "length": 6, "cluster": "7146", "prob": -7.985988616943359, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "seems", "id": 298, "lower": "seems", "norm": "seems", "shape": "xxxx", "prefix": "s", "suffix": "ems", "length": 5, "cluster": "16298", "prob": -7.989145278930664, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "fuck", "id": 299, "lower": "fuck", "norm": "fuck", "shape": "xxxx", "prefix": "f", "suffix": "uck", "length": 4, "cluster": "0", "prob": -7.992913246154785, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "fucking", "id": 300, "lower": "fucking", "norm": "fucking", "shape": "xxxx", "prefix": "f", "suffix": "ing", "length": 7, "cluster": "0", "prob": -7.993165969848633, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "\n\n\n", "id": 0, "lower": "\n\n\n", "norm": "\n\n\n", "shape": "\n\n\n", "prefix": "\n", "suffix": "\n\n\n", "length": 3, "cluster": "0", "prob": -7.996075630187988, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "next", "id": 301, "lower": "next", "norm": "next", "shape": "xxxx", "prefix": "n", "suffix": "ext", "length": 4, "cluster": "255", "prob": -7.996739864349365, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "anyone", "id": 302, "lower": "anyone", "norm": "anyone", "shape": "xxxx", "prefix": "a", "suffix": "one", "length": 6, "cluster": "30698", "prob": -7.997350215911865, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "#", "id": 303, "lower": "#", "norm": "#", "shape": "#", "prefix": "#", "suffix": "#", "length": 1, "cluster": "18", "prob": -8.001263618469238, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "looks", "id": 304, "lower": "looks", "norm": "looks", "shape": "xxxx", "prefix": "l", "suffix": "oks", "length": 5, "cluster": "2442", "prob": -8.001678466796875, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "everything", "id": 305, "lower": "everything", "norm": "everything", "shape": "xxxx", "prefix": "e", "suffix": "ing", "length": 10, "cluster": "14314", "prob": -8.00584602355957, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Oh", "id": 306, "lower": "oh", "norm": "Oh", "shape": "Xx", "prefix": "O", "suffix": "Oh", "length": 2, "cluster": "1726", "prob": -8.007224082946777, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "nice", "id": 307, "lower": "nice", "norm": "nice", "shape": "xxxx", "prefix": "n", "suffix": "ice", "length": 4, "cluster": "551", "prob": -8.009806632995605, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "once", "id": 308, "lower": "once", "norm": "once", "shape": "xxxx", "prefix": "o", "suffix": "nce", "length": 4, "cluster": "22250", "prob": -8.010163307189941, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "show", "id": 309, "lower": "show", "norm": "show", "shape": "xxxx", "prefix": "s", "suffix": "how", "length": 4, "cluster": "7690", "prob": -8.011373519897461, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "maybe", "id": 310, "lower": "maybe", "norm": "maybe", "shape": "xxxx", "prefix": "m", "suffix": "ybe", "length": 5, "cluster": "60650", "prob": -8.020626068115234, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "fact", "id": 311, "lower": "fact", "norm": "fact", "shape": "xxxx", "prefix": "f", "suffix": "act", "length": 4, "cluster": "369", "prob": -8.032754898071289, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "wo", "id": 312, "lower": "wo", "norm": "wo", "shape": "xx", "prefix": "w", "suffix": "wo", "length": 2, "cluster": "26", "prob": -8.0400972366333, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "5", "id": 313, "lower": "5", "norm": "5", "shape": "d", "prefix": "5", "suffix": "5", "length": 1, "cluster": "818", "prob": -8.040534019470215, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "free", "id": 314, "lower": "free", "norm": "free", "shape": "xxxx", "prefix": "f", "suffix": "ree", "length": 4, "cluster": "6634", "prob": -8.0440092086792, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "understand", "id": 315, "lower": "understand", "norm": "understand", "shape": "xxxx", "prefix": "u", "suffix": "and", "length": 10, "cluster": "3722", "prob": -8.052404403686523, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "team", "id": 316, "lower": "team", "norm": "team", "shape": "xxxx", "prefix": "t", "suffix": "eam", "length": 4, "cluster": "1061", "prob": -8.053070068359375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "....", "id": 317, "lower": "....", "norm": "....", "shape": "....", "prefix": ".", "suffix": "...", "length": 4, "cluster": "1202", "prob": -8.05477523803711, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "against", "id": 318, "lower": "against", "norm": "against", "shape": "xxxx", "prefix": "a", "suffix": "nst", "length": 7, "cluster": "24572", "prob": -8.064282417297363, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "live", "id": 319, "lower": "live", "norm": "live", "shape": "xxxx", "prefix": "l", "suffix": "ive", "length": 4, "cluster": "1418", "prob": -8.065953254699707, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": " \n\n", "id": 0, "lower": " \n\n", "norm": " \n\n", "shape": " \n\n", "prefix": " ", "suffix": " \n\n", "length": 3, "cluster": "0", "prob": -8.068946838378906, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Why", "id": 320, "lower": "why", "norm": "Why", "shape": "Xxx", "prefix": "W", "suffix": "Why", "length": 3, "cluster": "702", "prob": -8.06901741027832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "whole", "id": 321, "lower": "whole", "norm": "whole", "shape": "xxxx", "prefix": "w", "suffix": "ole", "length": 5, "cluster": "71", "prob": -8.070209503173828, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "For", "id": 322, "lower": "for", "norm": "For", "shape": "Xxx", "prefix": "F", "suffix": "For", "length": 3, "cluster": "1342", "prob": -8.072200775146484, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "guys", "id": 323, "lower": "guys", "norm": "guys", "shape": "xxxx", "prefix": "g", "suffix": "uys", "length": 4, "cluster": "365", "prob": -8.075167655944824, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "job", "id": 324, "lower": "job", "norm": "job", "shape": "xxx", "prefix": "j", "suffix": "job", "length": 3, "cluster": "37", "prob": -8.082273483276367, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "etc", "id": 325, "lower": "etc", "norm": "etc", "shape": "xxx", "prefix": "e", "suffix": "etc", "length": 3, "cluster": "26", "prob": -8.087606430053711, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "4", "id": 326, "lower": "4", "norm": "4", "shape": "d", "prefix": "4", "suffix": "4", "length": 1, "cluster": "818", "prob": -8.088510513305664, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "went", "id": 327, "lower": "went", "norm": "went", "shape": "xxxx", "prefix": "w", "suffix": "ent", "length": 4, "cluster": "7338", "prob": -8.091073989868164, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "school", "id": 328, "lower": "school", "norm": "school", "shape": "xxxx", "prefix": "s", "suffix": "ool", "length": 6, "cluster": "1829", "prob": -8.096077919006348, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "guess", "id": 329, "lower": "guess", "norm": "guess", "shape": "xxxx", "prefix": "g", "suffix": "ess", "length": 5, "cluster": "650", "prob": -8.097951889038086, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "friends", "id": 330, "lower": "friends", "norm": "friends", "shape": "xxxx", "prefix": "f", "suffix": "nds", "length": 7, "cluster": "3565", "prob": -8.10158634185791, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "between", "id": 331, "lower": "between", "norm": "between", "shape": "xxxx", "prefix": "b", "suffix": "een", "length": 7, "cluster": "12284", "prob": -8.106386184692383, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "case", "id": 332, "lower": "case", "norm": "case", "shape": "xxxx", "prefix": "c", "suffix": "ase", "length": 4, "cluster": "3269", "prob": -8.106882095336914, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "She", "id": 333, "lower": "she", "norm": "She", "shape": "Xxx", "prefix": "S", "suffix": "She", "length": 3, "cluster": "126", "prob": -8.119241714477539, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "each", "id": 334, "lower": "each", "norm": "each", "shape": "xxxx", "prefix": "e", "suffix": "ach", "length": 4, "cluster": "32746", "prob": -8.123948097229004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "fun", "id": 335, "lower": "fun", "norm": "fun", "shape": "xxx", "prefix": "f", "suffix": "fun", "length": 3, "cluster": "16229", "prob": -8.124406814575195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "agree", "id": 336, "lower": "agree", "norm": "agree", "shape": "xxxx", "prefix": "a", "suffix": "ree", "length": 5, "cluster": "394", "prob": -8.12778091430664, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Is", "id": 337, "lower": "is", "norm": "Is", "shape": "Xx", "prefix": "I", "suffix": "Is", "length": 2, "cluster": "1214", "prob": -8.129456520080566, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "buy", "id": 338, "lower": "buy", "norm": "buy", "shape": "xxx", "prefix": "b", "suffix": "buy", "length": 3, "cluster": "2826", "prob": -8.142950057983398, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Yes", "id": 339, "lower": "yes", "norm": "Yes", "shape": "Xxx", "prefix": "Y", "suffix": "Yes", "length": 3, "cluster": "1726", "prob": -8.147512435913086, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "run", "id": 340, "lower": "run", "norm": "run", "shape": "xxx", "prefix": "r", "suffix": "run", "length": 3, "cluster": "437", "prob": -8.156776428222656, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "change", "id": 341, "lower": "change", "norm": "change", "shape": "xxxx", "prefix": "c", "suffix": "nge", "length": 6, "cluster": "2997", "prob": -8.157740592956543, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "found", "id": 342, "lower": "found", "norm": "found", "shape": "xxxx", "prefix": "f", "suffix": "und", "length": 5, "cluster": "13738", "prob": -8.182107925415039, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "question", "id": 343, "lower": "question", "norm": "question", "shape": "xxxx", "prefix": "q", "suffix": "ion", "length": 8, "cluster": "709", "prob": -8.185464859008789, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "top", "id": 344, "lower": "top", "norm": "top", "shape": "xxx", "prefix": "t", "suffix": "top", "length": 3, "cluster": "1479", "prob": -8.191086769104004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "playing", "id": 345, "lower": "playing", "norm": "playing", "shape": "xxxx", "prefix": "p", "suffix": "ing", "length": 7, "cluster": "11242", "prob": -8.191595077514648, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "name", "id": 346, "lower": "name", "norm": "name", "shape": "xxxx", "prefix": "n", "suffix": "ame", "length": 4, "cluster": "4021", "prob": -8.19616985321045, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "mind", "id": 347, "lower": "mind", "norm": "mind", "shape": "xxxx", "prefix": "m", "suffix": "ind", "length": 4, "cluster": "1893", "prob": -8.197138786315918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "myself", "id": 348, "lower": "myself", "norm": "myself", "shape": "xxxx", "prefix": "m", "suffix": "elf", "length": 6, "cluster": "8042", "prob": -8.200143814086914, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "gets", "id": 349, "lower": "gets", "norm": "gets", "shape": "xxxx", "prefix": "g", "suffix": "ets", "length": 4, "cluster": "10666", "prob": -8.202808380126953, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "ago", "id": 350, "lower": "ago", "norm": "ago", "shape": "xxx", "prefix": "a", "suffix": "ago", "length": 3, "cluster": "6442", "prob": -8.206598281860352, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "friend", "id": 351, "lower": "friend", "norm": "friend", "shape": "xxxx", "prefix": "f", "suffix": "end", "length": 6, "cluster": "1061", "prob": -8.210515975952148, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "talking", "id": 352, "lower": "talking", "norm": "talking", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 7, "cluster": "4586", "prob": -8.22729778289795, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "days", "id": 353, "lower": "days", "norm": "days", "shape": "xxxx", "prefix": "d", "suffix": "ays", "length": 4, "cluster": "317", "prob": -8.227437973022461, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "yet", "id": 354, "lower": "yet", "norm": "yet", "shape": "xxx", "prefix": "y", "suffix": "yet", "length": 3, "cluster": "32490", "prob": -8.229137420654297, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "means", "id": 355, "lower": "means", "norm": "means", "shape": "xxxx", "prefix": "m", "suffix": "ans", "length": 5, "cluster": "31146", "prob": -8.234617233276367, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "hope", "id": 356, "lower": "hope", "norm": "hope", "shape": "xxxx", "prefix": "h", "suffix": "ope", "length": 4, "cluster": "650", "prob": -8.236272811889648, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "almost", "id": 357, "lower": "almost", "norm": "almost", "shape": "xxxx", "prefix": "a", "suffix": "ost", "length": 6, "cluster": "7402", "prob": -8.236738204956055, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "yourself", "id": 358, "lower": "yourself", "norm": "yourself", "shape": "xxxx", "prefix": "y", "suffix": "elf", "length": 8, "cluster": "8042", "prob": -8.2402982711792, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "awesome", "id": 359, "lower": "awesome", "norm": "awesome", "shape": "xxxx", "prefix": "a", "suffix": "ome", "length": 7, "cluster": "871", "prob": -8.247021675109863, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "care", "id": 360, "lower": "care", "norm": "care", "shape": "xxxx", "prefix": "c", "suffix": "are", "length": 4, "cluster": "1229", "prob": -8.248679161071777, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "quite", "id": 361, "lower": "quite", "norm": "quite", "shape": "xxxx", "prefix": "q", "suffix": "ite", "length": 5, "cluster": "15338", "prob": -8.254060745239258, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "10", "id": 362, "lower": "10", "norm": "10", "shape": "dd", "prefix": "1", "suffix": "10", "length": 2, "cluster": "1970", "prob": -8.258377075195312, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "true", "id": 363, "lower": "true", "norm": "true", "shape": "xxxx", "prefix": "t", "suffix": "rue", "length": 4, "cluster": "4586", "prob": -8.259368896484375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "remember", "id": 364, "lower": "remember", "norm": "remember", "shape": "xxxx", "prefix": "r", "suffix": "ber", "length": 8, "cluster": "3722", "prob": -8.259916305541992, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "definitely", "id": 365, "lower": "definitely", "norm": "definitely", "shape": "xxxx", "prefix": "d", "suffix": "ely", "length": 10, "cluster": "7802", "prob": -8.264209747314453, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "call", "id": 366, "lower": "call", "norm": "call", "shape": "xxxx", "prefix": "c", "suffix": "all", "length": 4, "cluster": "3765", "prob": -8.267317771911621, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "pay", "id": 367, "lower": "pay", "norm": "pay", "shape": "xxx", "prefix": "p", "suffix": "pay", "length": 3, "cluster": "7946", "prob": -8.26932144165039, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "stop", "id": 368, "lower": "stop", "norm": "stop", "shape": "xxxx", "prefix": "s", "suffix": "top", "length": 4, "cluster": "3338", "prob": -8.272970199584961, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "set", "id": 369, "lower": "set", "norm": "set", "shape": "xxx", "prefix": "s", "suffix": "set", "length": 3, "cluster": "2218", "prob": -8.285635948181152, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "started", "id": 370, "lower": "started", "norm": "started", "shape": "xxxx", "prefix": "s", "suffix": "ted", "length": 7, "cluster": "3242", "prob": -8.286487579345703, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "instead", "id": 371, "lower": "instead", "norm": "instead", "shape": "xxxx", "prefix": "i", "suffix": "ead", "length": 7, "cluster": "2005", "prob": -8.292781829833984, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "story", "id": 372, "lower": "story", "norm": "story", "shape": "xxxx", "prefix": "s", "suffix": "ory", "length": 5, "cluster": "6853", "prob": -8.293317794799805, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "level", "id": 373, "lower": "level", "norm": "level", "shape": "xxxx", "prefix": "l", "suffix": "vel", "length": 5, "cluster": "6117", "prob": -8.29642391204834, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "left", "id": 374, "lower": "left", "norm": "left", "shape": "xxxx", "prefix": "l", "suffix": "eft", "length": 4, "cluster": "54954", "prob": -8.296669006347656, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "week", "id": 375, "lower": "week", "norm": "week", "shape": "xxxx", "prefix": "w", "suffix": "eek", "length": 4, "cluster": "157", "prob": -8.300933837890625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "system", "id": 376, "lower": "system", "norm": "system", "shape": "xxxx", "prefix": "s", "suffix": "tem", "length": 6, "cluster": "4901", "prob": -8.303738594055176, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "full", "id": 377, "lower": "full", "norm": "full", "shape": "xxxx", "prefix": "f", "suffix": "ull", "length": 4, "cluster": "4071", "prob": -8.303950309753418, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "rather", "id": 378, "lower": "rather", "norm": "rather", "shape": "xxxx", "prefix": "r", "suffix": "her", "length": 6, "cluster": "6698", "prob": -8.312031745910645, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "video", "id": 379, "lower": "video", "norm": "video", "shape": "xxxx", "prefix": "v", "suffix": "deo", "length": 5, "cluster": "1975", "prob": -8.316000938415527, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "home", "id": 380, "lower": "home", "norm": "home", "shape": "xxxx", "prefix": "h", "suffix": "ome", "length": 4, "cluster": "1013", "prob": -8.316133499145508, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "women", "id": 381, "lower": "women", "norm": "women", "shape": "xxxx", "prefix": "w", "suffix": "men", "length": 5, "cluster": "877", "prob": -8.317564964294434, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "usually", "id": 382, "lower": "usually", "norm": "usually", "shape": "xxxx", "prefix": "u", "suffix": "lly", "length": 7, "cluster": "3706", "prob": -8.324220657348633, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "side", "id": 383, "lower": "side", "norm": "side", "shape": "xxxx", "prefix": "s", "suffix": "ide", "length": 4, "cluster": "8037", "prob": -8.327798843383789, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "wanted", "id": 384, "lower": "wanted", "norm": "wanted", "shape": "xxxx", "prefix": "w", "suffix": "ted", "length": 6, "cluster": "30634", "prob": -8.329934120178223, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "sense", "id": 385, "lower": "sense", "norm": "sense", "shape": "xxxx", "prefix": "s", "suffix": "nse", "length": 5, "cluster": "613", "prob": -8.338400840759277, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Your", "id": 386, "lower": "your", "norm": "Your", "shape": "Xxxx", "prefix": "Y", "suffix": "our", "length": 4, "cluster": "94", "prob": -8.347208023071289, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "second", "id": 387, "lower": "second", "norm": "second", "shape": "xxxx", "prefix": "s", "suffix": "ond", "length": 6, "cluster": "31", "prob": -8.351142883300781, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "comment", "id": 388, "lower": "comment", "norm": "comment", "shape": "xxxx", "prefix": "c", "suffix": "ent", "length": 7, "cluster": "757", "prob": -8.35578727722168, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "course", "id": 389, "lower": "course", "norm": "course", "shape": "xxxx", "prefix": "c", "suffix": "rse", "length": 6, "cluster": "1009", "prob": -8.35777759552002, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "ask", "id": 390, "lower": "ask", "norm": "ask", "shape": "xxx", "prefix": "a", "suffix": "ask", "length": 3, "cluster": "1546", "prob": -8.35922622680664, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Or", "id": 391, "lower": "or", "norm": "Or", "shape": "Xx", "prefix": "O", "suffix": "Or", "length": 2, "cluster": "1726", "prob": -8.361105918884277, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "seem", "id": 392, "lower": "seem", "norm": "seem", "shape": "xxxx", "prefix": "s", "suffix": "eem", "length": 4, "cluster": "906", "prob": -8.363061904907227, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Maybe", "id": 393, "lower": "maybe", "norm": "Maybe", "shape": "Xxxxx", "prefix": "M", "suffix": "ybe", "length": 5, "cluster": "190", "prob": -8.364654541015625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "must", "id": 394, "lower": "must", "norm": "must", "shape": "xxxx", "prefix": "m", "suffix": "ust", "length": 4, "cluster": "698", "prob": -8.365957260131836, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Then", "id": 395, "lower": "then", "norm": "Then", "shape": "Xxxx", "prefix": "T", "suffix": "hen", "length": 4, "cluster": "1726", "prob": -8.369159698486328, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "small", "id": 396, "lower": "small", "norm": "small", "shape": "xxxx", "prefix": "s", "suffix": "all", "length": 5, "cluster": "391", "prob": -8.371565818786621, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "car", "id": 397, "lower": "car", "norm": "car", "shape": "xxx", "prefix": "c", "suffix": "car", "length": 3, "cluster": "1145", "prob": -8.374984741210938, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "hate", "id": 398, "lower": "hate", "norm": "hate", "shape": "xxxx", "prefix": "h", "suffix": "ate", "length": 4, "cluster": "906", "prob": -8.380099296569824, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "came", "id": 399, "lower": "came", "norm": "came", "shape": "xxxx", "prefix": "c", "suffix": "ame", "length": 4, "cluster": "15530", "prob": -8.382718086242676, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "watch", "id": 400, "lower": "watch", "norm": "watch", "shape": "xxxx", "prefix": "w", "suffix": "tch", "length": 5, "cluster": "3765", "prob": -8.386272430419922, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "experience", "id": 401, "lower": "experience", "norm": "experience", "shape": "xxxx", "prefix": "e", "suffix": "nce", "length": 10, "cluster": "2917", "prob": -8.387101173400879, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "cool", "id": 402, "lower": "cool", "norm": "cool", "shape": "xxxx", "prefix": "c", "suffix": "ool", "length": 4, "cluster": "565", "prob": -8.393746376037598, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "matter", "id": 403, "lower": "matter", "norm": "matter", "shape": "xxxx", "prefix": "m", "suffix": "ter", "length": 6, "cluster": "4805", "prob": -8.395515441894531, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "others", "id": 404, "lower": "others", "norm": "others", "shape": "xxxx", "prefix": "o", "suffix": "ers", "length": 6, "cluster": "1901", "prob": -8.396527290344238, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "completely", "id": 405, "lower": "completely", "norm": "completely", "shape": "xxxx", "prefix": "c", "suffix": "ely", "length": 10, "cluster": "12010", "prob": -8.40324592590332, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "All", "id": 406, "lower": "all", "norm": "All", "shape": "Xxx", "prefix": "A", "suffix": "All", "length": 3, "cluster": "1214", "prob": -8.403707504272461, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "called", "id": 407, "lower": "called", "norm": "called", "shape": "xxxx", "prefix": "c", "suffix": "led", "length": 6, "cluster": "11946", "prob": -8.404229164123535, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "under", "id": 408, "lower": "under", "norm": "under", "shape": "xxxx", "prefix": "u", "suffix": "der", "length": 5, "cluster": "32764", "prob": -8.406200408935547, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "yes", "id": 409, "lower": "yes", "norm": "yes", "shape": "xxx", "prefix": "y", "suffix": "yes", "length": 3, "cluster": "15146", "prob": -8.41097354888916, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Now", "id": 410, "lower": "now", "norm": "Now", "shape": "Xxx", "prefix": "N", "suffix": "Now", "length": 3, "cluster": "1726", "prob": -8.417712211608887, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Please", "id": 411, "lower": "please", "norm": "Please", "shape": "Xxxxx", "prefix": "P", "suffix": "ase", "length": 6, "cluster": "3582", "prob": -8.41897964477539, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "worth", "id": 412, "lower": "worth", "norm": "worth", "shape": "xxxx", "prefix": "w", "suffix": "rth", "length": 5, "cluster": "981", "prob": -8.423324584960938, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "says", "id": 413, "lower": "says", "norm": "says", "shape": "xxxx", "prefix": "s", "suffix": "ays", "length": 4, "cluster": "244", "prob": -8.426565170288086, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "comes", "id": 414, "lower": "comes", "norm": "comes", "shape": "xxxx", "prefix": "c", "suffix": "mes", "length": 5, "cluster": "15530", "prob": -8.428640365600586, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "fine", "id": 415, "lower": "fine", "norm": "fine", "shape": "xxxx", "prefix": "f", "suffix": "ine", "length": 4, "cluster": "8057", "prob": -8.428781509399414, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Thank", "id": 416, "lower": "thank", "norm": "Thank", "shape": "Xxxxx", "prefix": "T", "suffix": "ank", "length": 5, "cluster": "190", "prob": -8.434432983398438, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": " \n", "id": 0, "lower": " \n", "norm": " \n", "shape": " \n", "prefix": " ", "suffix": " \n", "length": 2, "cluster": "0", "prob": -8.435208320617676, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "works", "id": 417, "lower": "works", "norm": "works", "shape": "xxxx", "prefix": "w", "suffix": "rks", "length": 5, "cluster": "77", "prob": -8.436944961547852, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "exactly", "id": 418, "lower": "exactly", "norm": "exactly", "shape": "xxxx", "prefix": "e", "suffix": "tly", "length": 7, "cluster": "15338", "prob": -8.43747615814209, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "heard", "id": 419, "lower": "heard", "norm": "heard", "shape": "xxxx", "prefix": "h", "suffix": "ard", "length": 5, "cluster": "26282", "prob": -8.4396333694458, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "possible", "id": 420, "lower": "possible", "norm": "possible", "shape": "xxxx", "prefix": "p", "suffix": "ble", "length": 8, "cluster": "2535", "prob": -8.44277572631836, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "thinking", "id": 421, "lower": "thinking", "norm": "thinking", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 8, "cluster": "4586", "prob": -8.442947387695312, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "hours", "id": 422, "lower": "hours", "norm": "hours", "shape": "xxxx", "prefix": "h", "suffix": "urs", "length": 5, "cluster": "957", "prob": -8.445417404174805, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "working", "id": 423, "lower": "working", "norm": "working", "shape": "xxxx", "prefix": "w", "suffix": "ing", "length": 7, "cluster": "27626", "prob": -8.44786262512207, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "took", "id": 424, "lower": "took", "norm": "took", "shape": "xxxx", "prefix": "t", "suffix": "ook", "length": 4, "cluster": "27050", "prob": -8.452874183654785, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "thanks", "id": 425, "lower": "thanks", "norm": "thanks", "shape": "xxxx", "prefix": "t", "suffix": "nks", "length": 6, "cluster": "554", "prob": -8.457283973693848, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "head", "id": 426, "lower": "head", "norm": "head", "shape": "xxxx", "prefix": "h", "suffix": "ead", "length": 4, "cluster": "1813", "prob": -8.458500862121582, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "power", "id": 427, "lower": "power", "norm": "power", "shape": "xxxx", "prefix": "p", "suffix": "wer", "length": 5, "cluster": "11621", "prob": -8.460216522216797, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "happen", "id": 428, "lower": "happen", "norm": "happen", "shape": "xxxx", "prefix": "h", "suffix": "pen", "length": 6, "cluster": "3466", "prob": -8.465093612670898, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "goes", "id": 429, "lower": "goes", "norm": "goes", "shape": "xxxx", "prefix": "g", "suffix": "oes", "length": 4, "cluster": "7338", "prob": -8.465673446655273, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Good", "id": 430, "lower": "good", "norm": "Good", "shape": "Xxxx", "prefix": "G", "suffix": "ood", "length": 4, "cluster": "614", "prob": -8.468016624450684, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "saw", "id": 431, "lower": "saw", "norm": "saw", "shape": "xxx", "prefix": "s", "suffix": "saw", "length": 3, "cluster": "6570", "prob": -8.472514152526855, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "please", "id": 432, "lower": "please", "norm": "please", "shape": "xxxx", "prefix": "p", "suffix": "ase", "length": 6, "cluster": "309", "prob": -8.473013877868652, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "couple", "id": 433, "lower": "couple", "norm": "couple", "shape": "xxxx", "prefix": "c", "suffix": "ple", "length": 6, "cluster": "853", "prob": -8.47309398651123, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "hit", "id": 434, "lower": "hit", "norm": "hit", "shape": "xxx", "prefix": "h", "suffix": "hit", "length": 3, "cluster": "682", "prob": -8.473491668701172, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "likely", "id": 435, "lower": "likely", "norm": "likely", "shape": "xxxx", "prefix": "l", "suffix": "ely", "length": 6, "cluster": "42", "prob": -8.47359561920166, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "ones", "id": 436, "lower": "ones", "norm": "ones", "shape": "xxxx", "prefix": "o", "suffix": "nes", "length": 4, "cluster": "15821", "prob": -8.474469184875488, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "often", "id": 437, "lower": "often", "norm": "often", "shape": "xxxx", "prefix": "o", "suffix": "ten", "length": 5, "cluster": "3706", "prob": -8.476237297058105, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "talk", "id": 438, "lower": "talk", "norm": "talk", "shape": "xxxx", "prefix": "t", "suffix": "alk", "length": 4, "cluster": "394", "prob": -8.479889869689941, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "issue", "id": 439, "lower": "issue", "norm": "issue", "shape": "xxxx", "prefix": "i", "suffix": "sue", "length": 5, "cluster": "3525", "prob": -8.48391342163086, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "easy", "id": 440, "lower": "easy", "norm": "easy", "shape": "xxxx", "prefix": "e", "suffix": "asy", "length": 4, "cluster": "2538", "prob": -8.489182472229004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "One", "id": 441, "lower": "one", "norm": "One", "shape": "Xxx", "prefix": "O", "suffix": "One", "length": 3, "cluster": "350", "prob": -8.494391441345215, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "needs", "id": 442, "lower": "needs", "norm": "needs", "shape": "xxxx", "prefix": "n", "suffix": "eds", "length": 5, "cluster": "14250", "prob": -8.49528694152832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "add", "id": 443, "lower": "add", "norm": "add", "shape": "xxx", "prefix": "a", "suffix": "add", "length": 3, "cluster": "3594", "prob": -8.496837615966797, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "support", "id": 444, "lower": "support", "norm": "support", "shape": "xxxx", "prefix": "s", "suffix": "ort", "length": 7, "cluster": "7861", "prob": -8.503355026245117, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "face", "id": 445, "lower": "face", "norm": "face", "shape": "xxxx", "prefix": "f", "suffix": "ace", "length": 4, "cluster": "1685", "prob": -8.504852294921875, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "hand", "id": 446, "lower": "hand", "norm": "hand", "shape": "xxxx", "prefix": "h", "suffix": "and", "length": 4, "cluster": "8037", "prob": -8.504961967468262, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "half", "id": 447, "lower": "half", "norm": "half", "shape": "xxxx", "prefix": "h", "suffix": "alf", "length": 4, "cluster": "469", "prob": -8.508658409118652, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "check", "id": 448, "lower": "check", "norm": "check", "shape": "xxxx", "prefix": "c", "suffix": "eck", "length": 5, "cluster": "2485", "prob": -8.512067794799805, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "night", "id": 449, "lower": "night", "norm": "night", "shape": "xxxx", "prefix": "n", "suffix": "ght", "length": 5, "cluster": "93", "prob": -8.517072677612305, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "months", "id": 450, "lower": "months", "norm": "months", "shape": "xxxx", "prefix": "m", "suffix": "ths", "length": 6, "cluster": "445", "prob": -8.517988204956055, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "kids", "id": 451, "lower": "kids", "norm": "kids", "shape": "xxxx", "prefix": "k", "suffix": "ids", "length": 4, "cluster": "877", "prob": -8.520237922668457, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "players", "id": 452, "lower": "players", "norm": "players", "shape": "xxxx", "prefix": "p", "suffix": "ers", "length": 7, "cluster": "3565", "prob": -8.520515441894531, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "line", "id": 453, "lower": "line", "norm": "line", "shape": "xxxx", "prefix": "l", "suffix": "ine", "length": 4, "cluster": "3941", "prob": -8.522600173950195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "told", "id": 454, "lower": "told", "norm": "told", "shape": "xxxx", "prefix": "t", "suffix": "old", "length": 4, "cluster": "20138", "prob": -8.52303409576416, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "example", "id": 455, "lower": "example", "norm": "example", "shape": "xxxx", "prefix": "e", "suffix": "ple", "length": 7, "cluster": "497", "prob": -8.523116111755371, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "played", "id": 456, "lower": "played", "norm": "played", "shape": "xxxx", "prefix": "p", "suffix": "yed", "length": 6, "cluster": "32426", "prob": -8.528886795043945, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "reddit", "id": 457, "lower": "reddit", "norm": "reddit", "shape": "xxxx", "prefix": "r", "suffix": "dit", "length": 6, "cluster": "0", "prob": -8.52908992767334, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "based", "id": 458, "lower": "based", "norm": "based", "shape": "xxxx", "prefix": "b", "suffix": "sed", "length": 5, "cluster": "1578", "prob": -8.53032112121582, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "tried", "id": 459, "lower": "tried", "norm": "tried", "shape": "xxxx", "prefix": "t", "suffix": "ied", "length": 5, "cluster": "28586", "prob": -8.532145500183105, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "sounds", "id": 460, "lower": "sounds", "norm": "sounds", "shape": "xxxx", "prefix": "s", "suffix": "nds", "length": 6, "cluster": "2442", "prob": -8.53985595703125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "link", "id": 461, "lower": "link", "norm": "link", "shape": "xxxx", "prefix": "l", "suffix": "ink", "length": 4, "cluster": "5829", "prob": -8.540618896484375, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "girl", "id": 462, "lower": "girl", "norm": "girl", "shape": "xxxx", "prefix": "g", "suffix": "irl", "length": 4, "cluster": "549", "prob": -8.542597770690918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "open", "id": 463, "lower": "open", "norm": "open", "shape": "xxxx", "prefix": "o", "suffix": "pen", "length": 4, "cluster": "1589", "prob": -8.553583145141602, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "To", "id": 464, "lower": "to", "norm": "To", "shape": "Xx", "prefix": "T", "suffix": "To", "length": 2, "cluster": "3582", "prob": -8.557126998901367, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "taking", "id": 465, "lower": "taking", "norm": "taking", "shape": "xxxx", "prefix": "t", "suffix": "ing", "length": 6, "cluster": "31722", "prob": -8.55748462677002, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "happened", "id": 466, "lower": "happened", "norm": "happened", "shape": "xxxx", "prefix": "h", "suffix": "ned", "length": 8, "cluster": "5290", "prob": -8.559469223022461, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "during", "id": 467, "lower": "during", "norm": "during", "shape": "xxxx", "prefix": "d", "suffix": "ing", "length": 6, "cluster": "262140", "prob": -8.559581756591797, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "deal", "id": 468, "lower": "deal", "norm": "deal", "shape": "xxxx", "prefix": "d", "suffix": "eal", "length": 4, "cluster": "5829", "prob": -8.560197830200195, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "single", "id": 469, "lower": "single", "norm": "single", "shape": "xxxx", "prefix": "s", "suffix": "gle", "length": 6, "cluster": "71", "prob": -8.571329116821289, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "family", "id": 470, "lower": "family", "norm": "family", "shape": "xxxx", "prefix": "f", "suffix": "ily", "length": 6, "cluster": "1061", "prob": -8.571907043457031, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "close", "id": 471, "lower": "close", "norm": "close", "shape": "xxxx", "prefix": "c", "suffix": "ose", "length": 5, "cluster": "53", "prob": -8.581155776977539, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "happy", "id": 472, "lower": "happy", "norm": "happy", "shape": "xxxx", "prefix": "h", "suffix": "ppy", "length": 5, "cluster": "4586", "prob": -8.581560134887695, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "move", "id": 473, "lower": "move", "norm": "move", "shape": "xxxx", "prefix": "m", "suffix": "ove", "length": 4, "cluster": "7093", "prob": -8.582797050476074, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "number", "id": 474, "lower": "number", "norm": "number", "shape": "xxxx", "prefix": "n", "suffix": "ber", "length": 6, "cluster": "341", "prob": -8.584420204162598, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "US", "id": 475, "lower": "us", "norm": "US", "shape": "XX", "prefix": "U", "suffix": "US", "length": 2, "cluster": "1642", "prob": -8.585862159729004, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": true, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "water", "id": 476, "lower": "water", "norm": "water", "shape": "xxxx", "prefix": "w", "suffix": "ter", "length": 5, "cluster": "3705", "prob": -8.589462280273438, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "men", "id": 477, "lower": "men", "norm": "men", "shape": "xxx", "prefix": "m", "suffix": "men", "length": 3, "cluster": "877", "prob": -8.59007453918457, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "yeah", "id": 478, "lower": "yeah", "norm": "yeah", "shape": "xxxx", "prefix": "y", "suffix": "eah", "length": 4, "cluster": "26", "prob": -8.593489646911621, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "later", "id": 479, "lower": "later", "norm": "later", "shape": "xxxx", "prefix": "l", "suffix": "ter", "length": 5, "cluster": "5866", "prob": -8.603795051574707, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "whatever", "id": 480, "lower": "whatever", "norm": "whatever", "shape": "xxxx", "prefix": "w", "suffix": "ver", "length": 8, "cluster": "2026", "prob": -8.610091209411621, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "government", "id": 481, "lower": "government", "norm": "government", "shape": "xxxx", "prefix": "g", "suffix": "ent", "length": 10, "cluster": "297", "prob": -8.610445022583008, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "6", "id": 482, "lower": "6", "norm": "6", "shape": "d", "prefix": "6", "suffix": "6", "length": 1, "cluster": "50", "prob": -8.611133575439453, "is_alpha": false, "is_ascii": true, "is_digit": true, "is_lower": false, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "house", "id": 483, "lower": "house", "norm": "house", "shape": "xxxx", "prefix": "h", "suffix": "use", "length": 5, "cluster": "37", "prob": -8.613367080688477, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "similar", "id": 484, "lower": "similar", "norm": "similar", "shape": "xxxx", "prefix": "s", "suffix": "lar", "length": 7, "cluster": "295", "prob": -8.613471031188965, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "wait", "id": 485, "lower": "wait", "norm": "wait", "shape": "xxxx", "prefix": "w", "suffix": "ait", "length": 4, "cluster": "3765", "prob": -8.613734245300293, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "questions", "id": 486, "lower": "questions", "norm": "questions", "shape": "xxxx", "prefix": "q", "suffix": "ons", "length": 9, "cluster": "1165", "prob": -8.613752365112305, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "sex", "id": 487, "lower": "sex", "norm": "sex", "shape": "xxx", "prefix": "s", "suffix": "sex", "length": 3, "cluster": "633", "prob": -8.613862991333008, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "especially", "id": 488, "lower": "especially", "norm": "especially", "shape": "xxxx", "prefix": "e", "suffix": "lly", "length": 10, "cluster": "27882", "prob": -8.616527557373047, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "lol", "id": 489, "lower": "lol", "norm": "lol", "shape": "xxx", "prefix": "l", "suffix": "lol", "length": 3, "cluster": "0", "prob": -8.621257781982422, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "Because", "id": 490, "lower": "because", "norm": "Because", "shape": "Xxxxx", "prefix": "B", "suffix": "use", "length": 7, "cluster": "1214", "prob": -8.623008728027344, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "God", "id": 491, "lower": "god", "norm": "God", "shape": "Xxx", "prefix": "G", "suffix": "God", "length": 3, "cluster": "422", "prob": -8.62376594543457, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} diff --git a/website/api/_annotation/_training.jade b/website/api/_annotation/_training.jade index 9a5e96628..4e37ee2b1 100644 --- a/website/api/_annotation/_training.jade +++ b/website/api/_annotation/_training.jade @@ -98,7 +98,7 @@ p } p - | Here's an example of the 500 most frequent lexemes in the English + | Here's an example of the 100 most frequent lexemes in the English | training data: -+github("spacy", "examples/training/vocab-data.json", false, false, "json") ++github("spacy", "examples/training/vocab-data.jsonl", false, false, "json") From 4112a991ec012b175a1a97add51ce04d09351886 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 30 Oct 2017 19:44:40 +0100 Subject: [PATCH 29/32] Fix vector pruning --- spacy/vectors.pyx | 26 ++++++++++++++------------ spacy/vocab.pyx | 17 +++++++++++++---- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index 368b73866..552a6bcf3 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -30,7 +30,8 @@ cdef class Vectors: cdef readonly StringStore strings cdef public object key2row cdef public object keys - cdef public int i + cdef public int _i_key + cdef public int _i_vec def __init__(self, strings, width=0, data=None): """Create a new vector store. To keep the vector table empty, pass @@ -53,7 +54,8 @@ cdef class Vectors: self.data = numpy.asarray(data, dtype='f') else: self.data = numpy.zeros((len(self.strings), width), dtype='f') - self.i = 0 + self._i_key = 0 + self._i_vec = 0 self.key2row = {} self.keys = numpy.zeros((self.data.shape[0],), dtype='uint64') if data is not None: @@ -105,7 +107,7 @@ cdef class Vectors: RETURNS (int): The number of vectors in the data. """ - return self.i + return self._i_vec def __contains__(self, key): """Check whether a key has a vector entry in the table. @@ -127,20 +129,20 @@ cdef class Vectors: """ if isinstance(key, basestring_): key = self.strings.add(key) - if key in self.key2row and row is None: + if row is None and key in self.key2row: row = self.key2row[key] - elif key in self.key2row and row is not None: - self.key2row[key] = row elif row is None: - row = self.i - self.i += 1 - if row >= self.keys.shape[0]: - self.keys.resize((row*2,)) + row = self._i_vec + self._i_vec += 1 + if row >= self.data.shape[0]: self.data.resize((row*2, self.data.shape[1])) - self.keys[row] = key + if key not in self.key2row: + if self._i_key >= self.keys.shape[0]: + self.keys.resize((self._i_key*2,)) + self.keys[self._i_key] = key + self._i_key += 1 self.key2row[key] = row - self.keys[row] = key if vector is not None: self.data[row] = vector return row diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index ff6c5b844..ecf1ad9d9 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -248,7 +248,7 @@ cdef class Vocab: width = self.vectors.data.shape[1] self.vectors = Vectors(self.strings, width=width) - def prune_vectors(self, nr_row, batch_size=1024): + def prune_vectors(self, nr_row, batch_size=8): """Reduce the current vector table to `nr_row` unique entries. Words mapped to the discarded vectors will be remapped to the closest vector among those remaining. @@ -267,22 +267,31 @@ cdef class Vocab: xp = get_array_module(self.vectors.data) # Work in batches, to avoid memory problems. keep = self.vectors.data[:nr_row] + keep_keys = [key for key, row in self.vectors.key2row.items() if row < nr_row] toss = self.vectors.data[nr_row:] # Normalize the vectors, so cosine similarity is just dot product. # Note we can't modify the ones we're keeping in-place... - keep = keep / (xp.linalg.norm(keep)+1e-8) + keep = keep / (xp.linalg.norm(keep, axis=1, keepdims=True)+1e-8) keep = xp.ascontiguousarray(keep.T) neighbours = xp.zeros((toss.shape[0],), dtype='i') + scores = xp.zeros((toss.shape[0],), dtype='f') for i in range(0, toss.shape[0], batch_size): batch = toss[i : i+batch_size] - batch /= xp.linalg.norm(batch)+1e-8 - neighbours[i:i+batch_size] = xp.dot(batch, keep).argmax(axis=1) + batch /= xp.linalg.norm(batch, axis=1, keepdims=True)+1e-8 + sims = xp.dot(batch, keep) + matches = sims.argmax(axis=1) + neighbours[i:i+batch_size] = matches + scores[i:i+batch_size] = sims.max(axis=1) for lex in self: # If we're losing the vector for this word, map it to the nearest # vector we're keeping. if lex.rank >= nr_row: lex.rank = neighbours[lex.rank-nr_row] self.vectors.add(lex.orth, row=lex.rank) + for key in self.vectors.keys: + row = self.vectors.key2row[key] + if row >= nr_row: + self.vectors.key2row[key] = neighbours[row-nr_row] # Make copy, to encourage the original table to be garbage collected. self.vectors.data = xp.ascontiguousarray(self.vectors.data[:nr_row]) From 33af6ac69ad73b7e9245a8fa0cd6862bf569d73b Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 19:46:45 +0100 Subject: [PATCH 30/32] Use even smaller examle size 100 was still too much, so try 20 instead --- examples/training/vocab-data.jsonl | 80 -------------------------- website/api/_annotation/_training.jade | 2 +- 2 files changed, 1 insertion(+), 81 deletions(-) diff --git a/examples/training/vocab-data.jsonl b/examples/training/vocab-data.jsonl index 3fdf5eede..2f129dd30 100644 --- a/examples/training/vocab-data.jsonl +++ b/examples/training/vocab-data.jsonl @@ -19,83 +19,3 @@ {"orth": "\"", "id": 17, "lower": "\"", "norm": "\"", "shape": "\"", "prefix": "\"", "suffix": "\"", "length": 1, "cluster": "0", "prob": -5.02677583694458, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": true, "is_left_punct": true, "is_right_punct": true} {"orth": "?", "id": 18, "lower": "?", "norm": "?", "shape": "?", "prefix": "?", "suffix": "?", "length": 1, "cluster": "0", "prob": -5.05924654006958, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} {"orth": " ", "id": 0, "lower": " ", "norm": " ", "shape": " ", "prefix": " ", "suffix": " ", "length": 1, "cluster": "0", "prob": -5.129165172576904, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "have", "id": 19, "lower": "have", "norm": "have", "shape": "xxxx", "prefix": "h", "suffix": "ave", "length": 4, "cluster": "378", "prob": -5.156484603881836, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "on", "id": 20, "lower": "on", "norm": "on", "shape": "xx", "prefix": "o", "suffix": "on", "length": 2, "cluster": "2044", "prob": -5.172736167907715, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "*", "id": 21, "lower": "*", "norm": "*", "shape": "*", "prefix": "*", "suffix": "*", "length": 1, "cluster": "5098", "prob": -5.1977410316467285, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": ")", "id": 22, "lower": ")", "norm": ")", "shape": ")", "prefix": ")", "suffix": ")", "length": 1, "cluster": "0", "prob": -5.197994232177734, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": true} -{"orth": "be", "id": 23, "lower": "be", "norm": "be", "shape": "xx", "prefix": "b", "suffix": "be", "length": 2, "cluster": "458", "prob": -5.225094318389893, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "with", "id": 24, "lower": "with", "norm": "with", "shape": "xxxx", "prefix": "w", "suffix": "ith", "length": 4, "cluster": "1020", "prob": -5.243249893188477, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "do", "id": 25, "lower": "do", "norm": "do", "shape": "xx", "prefix": "d", "suffix": "do", "length": 2, "cluster": "2042", "prob": -5.246996879577637, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "was", "id": 26, "lower": "was", "norm": "was", "shape": "xxx", "prefix": "w", "suffix": "was", "length": 3, "cluster": "250", "prob": -5.252320289611816, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "are", "id": 27, "lower": "are", "norm": "are", "shape": "xxx", "prefix": "a", "suffix": "are", "length": 3, "cluster": "1530", "prob": -5.271068096160889, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "not", "id": 28, "lower": "not", "norm": "not", "shape": "xxx", "prefix": "n", "suffix": "not", "length": 3, "cluster": "1258", "prob": -5.332601070404053, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "but", "id": 29, "lower": "but", "norm": "but", "shape": "xxx", "prefix": "b", "suffix": "but", "length": 3, "cluster": "148", "prob": -5.3419694900512695, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "!", "id": 30, "lower": "!", "norm": "!", "shape": "!", "prefix": "!", "suffix": "!", "length": 1, "cluster": "0", "prob": -5.359641075134277, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "this", "id": 31, "lower": "this", "norm": "this", "shape": "xxxx", "prefix": "t", "suffix": "his", "length": 4, "cluster": "63", "prob": -5.36181640625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "[", "id": 32, "lower": "[", "norm": "[", "shape": "[", "prefix": "[", "suffix": "[", "length": 1, "cluster": "0", "prob": -5.438112258911133, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": true, "is_right_punct": false} -{"orth": "-", "id": 33, "lower": "-", "norm": "-", "shape": "-", "prefix": "-", "suffix": "-", "length": 1, "cluster": "36", "prob": -5.468655109405518, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "my", "id": 34, "lower": "my", "norm": "my", "shape": "xx", "prefix": "m", "suffix": "my", "length": 2, "cluster": "251", "prob": -5.491642951965332, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "they", "id": 35, "lower": "they", "norm": "they", "shape": "xxxx", "prefix": "t", "suffix": "hey", "length": 4, "cluster": "90", "prob": -5.5243682861328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "as", "id": 36, "lower": "as", "norm": "as", "shape": "xx", "prefix": "a", "suffix": "as", "length": 2, "cluster": "212", "prob": -5.53448486328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "like", "id": 37, "lower": "like", "norm": "like", "shape": "xxxx", "prefix": "l", "suffix": "ike", "length": 4, "cluster": "1684", "prob": -5.610429763793945, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "just", "id": 38, "lower": "just", "norm": "just", "shape": "xxxx", "prefix": "j", "suffix": "ust", "length": 4, "cluster": "31978", "prob": -5.630868434906006, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "your", "id": 39, "lower": "your", "norm": "your", "shape": "xxxx", "prefix": "y", "suffix": "our", "length": 4, "cluster": "251", "prob": -5.650108814239502, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "or", "id": 40, "lower": "or", "norm": "or", "shape": "xx", "prefix": "o", "suffix": "or", "length": 2, "cluster": "404", "prob": -5.654984951019287, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "(", "id": 41, "lower": "(", "norm": "(", "shape": "(", "prefix": "(", "suffix": "(", "length": 1, "cluster": "0", "prob": -5.75598669052124, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": true, "is_right_punct": false} -{"orth": "at", "id": 42, "lower": "at", "norm": "at", "shape": "xx", "prefix": "a", "suffix": "at", "length": 2, "cluster": "124", "prob": -5.763442516326904, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "if", "id": 43, "lower": "if", "norm": "if", "shape": "xx", "prefix": "i", "suffix": "if", "length": 2, "cluster": "4052", "prob": -5.763589859008789, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "would", "id": 44, "lower": "would", "norm": "would", "shape": "xxxx", "prefix": "w", "suffix": "uld", "length": 5, "cluster": "1978", "prob": -5.772674560546875, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "so", "id": 45, "lower": "so", "norm": "so", "shape": "xx", "prefix": "s", "suffix": "so", "length": 2, "cluster": "2282", "prob": -5.823773384094238, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "can", "id": 46, "lower": "can", "norm": "can", "shape": "xxx", "prefix": "c", "suffix": "can", "length": 3, "cluster": "58", "prob": -5.827763080596924, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "me", "id": 47, "lower": "me", "norm": "me", "shape": "xx", "prefix": "m", "suffix": "me", "length": 2, "cluster": "1898", "prob": -5.846089839935303, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "about", "id": 48, "lower": "about", "norm": "about", "shape": "xxxx", "prefix": "a", "suffix": "out", "length": 5, "cluster": "618", "prob": -5.906808853149414, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "he", "id": 49, "lower": "he", "norm": "he", "shape": "xx", "prefix": "h", "suffix": "he", "length": 2, "cluster": "218", "prob": -5.9319047927856445, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "It", "id": 50, "lower": "it", "norm": "It", "shape": "Xx", "prefix": "I", "suffix": "It", "length": 2, "cluster": "894", "prob": -5.93662691116333, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "all", "id": 51, "lower": "all", "norm": "all", "shape": "xxx", "prefix": "a", "suffix": "all", "length": 3, "cluster": "6122", "prob": -5.936640739440918, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "The", "id": 52, "lower": "the", "norm": "The", "shape": "Xxx", "prefix": "T", "suffix": "The", "length": 3, "cluster": "30", "prob": -5.958707332611084, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "get", "id": 53, "lower": "get", "norm": "get", "shape": "xxx", "prefix": "g", "suffix": "get", "length": 3, "cluster": "2570", "prob": -5.992605686187744, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "one", "id": 54, "lower": "one", "norm": "one", "shape": "xxx", "prefix": "o", "suffix": "one", "length": 3, "cluster": "8170", "prob": -5.996385097503662, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": true, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "'m", "id": 55, "lower": "'m", "norm": "'m", "shape": "'x", "prefix": "'", "suffix": "'m", "length": 2, "cluster": "3066", "prob": -5.9999823570251465, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "out", "id": 56, "lower": "out", "norm": "out", "shape": "xxx", "prefix": "o", "suffix": "out", "length": 3, "cluster": "1386", "prob": -6.0027008056640625, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "from", "id": 57, "lower": "from", "norm": "from", "shape": "xxxx", "prefix": "f", "suffix": "rom", "length": 4, "cluster": "380", "prob": -6.010132312774658, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "an", "id": 58, "lower": "an", "norm": "an", "shape": "xx", "prefix": "a", "suffix": "an", "length": 2, "cluster": "3", "prob": -6.014852046966553, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "what", "id": 59, "lower": "what", "norm": "what", "shape": "xxxx", "prefix": "w", "suffix": "hat", "length": 4, "cluster": "2026", "prob": -6.023346424102783, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "up", "id": 60, "lower": "up", "norm": "up", "shape": "xx", "prefix": "u", "suffix": "up", "length": 2, "cluster": "362", "prob": -6.028695583343506, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "]", "id": 61, "lower": "]", "norm": "]", "shape": "]", "prefix": "]", "suffix": "]", "length": 1, "cluster": "0", "prob": -6.0386552810668945, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": true} -{"orth": "\n", "id": 0, "lower": "\n", "norm": "\n", "shape": "\n", "prefix": "\n", "suffix": "\n", "length": 1, "cluster": "0", "prob": -6.0506510734558105, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": true, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "people", "id": 62, "lower": "people", "norm": "people", "shape": "xxxx", "prefix": "p", "suffix": "ple", "length": 6, "cluster": "365", "prob": -6.0715765953063965, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "more", "id": 63, "lower": "more", "norm": "more", "shape": "xxxx", "prefix": "m", "suffix": "ore", "length": 4, "cluster": "1514", "prob": -6.081598281860352, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": ":", "id": 64, "lower": ":", "norm": ":", "shape": ":", "prefix": ":", "suffix": ":", "length": 1, "cluster": "228", "prob": -6.128875732421875, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "there", "id": 65, "lower": "there", "norm": "there", "shape": "xxxx", "prefix": "t", "suffix": "ere", "length": 5, "cluster": "986", "prob": -6.135282039642334, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "deleted", "id": 66, "lower": "deleted", "norm": "deleted", "shape": "xxxx", "prefix": "d", "suffix": "ted", "length": 7, "cluster": "1706", "prob": -6.1543049812316895, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "think", "id": 67, "lower": "think", "norm": "think", "shape": "xxxx", "prefix": "t", "suffix": "ink", "length": 5, "cluster": "1674", "prob": -6.180924892425537, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "will", "id": 68, "lower": "will", "norm": "will", "shape": "xxxx", "prefix": "w", "suffix": "ill", "length": 4, "cluster": "442", "prob": -6.199834823608398, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "them", "id": 69, "lower": "them", "norm": "them", "shape": "xxxx", "prefix": "t", "suffix": "hem", "length": 4, "cluster": "5994", "prob": -6.2177276611328125, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "we", "id": 70, "lower": "we", "norm": "we", "shape": "xx", "prefix": "w", "suffix": "we", "length": 2, "cluster": "1626", "prob": -6.230024337768555, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "'re", "id": 71, "lower": "'re", "norm": "'re", "shape": "'xx", "prefix": "'", "suffix": "'re", "length": 3, "cluster": "7162", "prob": -6.255462646484375, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "when", "id": 72, "lower": "when", "norm": "when", "shape": "xxxx", "prefix": "w", "suffix": "hen", "length": 4, "cluster": "16340", "prob": -6.2623114585876465, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "You", "id": 73, "lower": "you", "norm": "You", "shape": "Xxx", "prefix": "Y", "suffix": "You", "length": 3, "cluster": "858", "prob": -6.276494026184082, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "...", "id": 74, "lower": "...", "norm": "...", "shape": "...", "prefix": ".", "suffix": "...", "length": 3, "cluster": "966", "prob": -6.278521537780762, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "some", "id": 75, "lower": "some", "norm": "some", "shape": "xxxx", "prefix": "s", "suffix": "ome", "length": 4, "cluster": "239", "prob": -6.318882465362549, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "has", "id": 76, "lower": "has", "norm": "has", "shape": "xxx", "prefix": "h", "suffix": "has", "length": 3, "cluster": "890", "prob": -6.325605392456055, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "because", "id": 77, "lower": "because", "norm": "because", "shape": "xxxx", "prefix": "b", "suffix": "use", "length": 7, "cluster": "980", "prob": -6.349620342254639, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "know", "id": 78, "lower": "know", "norm": "know", "shape": "xxxx", "prefix": "k", "suffix": "now", "length": 4, "cluster": "3722", "prob": -6.368943214416504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "really", "id": 79, "lower": "really", "norm": "really", "shape": "xxxx", "prefix": "r", "suffix": "lly", "length": 6, "cluster": "7802", "prob": -6.370757102966309, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "by", "id": 80, "lower": "by", "norm": "by", "shape": "xx", "prefix": "b", "suffix": "by", "length": 2, "cluster": "252", "prob": -6.375086784362793, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "time", "id": 81, "lower": "time", "norm": "time", "shape": "xxxx", "prefix": "t", "suffix": "ime", "length": 4, "cluster": "477", "prob": -6.3782219886779785, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "did", "id": 82, "lower": "did", "norm": "did", "shape": "xxx", "prefix": "d", "suffix": "did", "length": 3, "cluster": "8186", "prob": -6.389003753662109, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "no", "id": 83, "lower": "no", "norm": "no", "shape": "xx", "prefix": "n", "suffix": "no", "length": 2, "cluster": "4074", "prob": -6.402691841125488, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "had", "id": 84, "lower": "had", "norm": "had", "shape": "xxx", "prefix": "h", "suffix": "had", "length": 3, "cluster": "1914", "prob": -6.45427131652832, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "their", "id": 85, "lower": "their", "norm": "their", "shape": "xxxx", "prefix": "t", "suffix": "eir", "length": 5, "cluster": "187", "prob": -6.461463928222656, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "If", "id": 86, "lower": "if", "norm": "If", "shape": "Xx", "prefix": "I", "suffix": "If", "length": 2, "cluster": "190", "prob": -6.469156742095947, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": false, "is_space": false, "is_title": true, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "how", "id": 87, "lower": "how", "norm": "how", "shape": "xxx", "prefix": "h", "suffix": "how", "length": 3, "cluster": "10218", "prob": -6.496722221374512, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "does", "id": 88, "lower": "does", "norm": "does", "shape": "xxxx", "prefix": "d", "suffix": "oes", "length": 4, "cluster": "4090", "prob": -6.500738143920898, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "who", "id": 89, "lower": "who", "norm": "who", "shape": "xxx", "prefix": "w", "suffix": "who", "length": 3, "cluster": "410", "prob": -6.504637241363525, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "than", "id": 90, "lower": "than", "norm": "than", "shape": "xxxx", "prefix": "t", "suffix": "han", "length": 4, "cluster": "106", "prob": -6.512253761291504, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "good", "id": 91, "lower": "good", "norm": "good", "shape": "xxxx", "prefix": "g", "suffix": "ood", "length": 4, "cluster": "551", "prob": -6.518923759460449, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "only", "id": 92, "lower": "only", "norm": "only", "shape": "xxxx", "prefix": "o", "suffix": "nly", "length": 4, "cluster": "15594", "prob": -6.535442352294922, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "his", "id": 93, "lower": "his", "norm": "his", "shape": "xxx", "prefix": "h", "suffix": "his", "length": 3, "cluster": "123", "prob": -6.574275016784668, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "much", "id": 94, "lower": "much", "norm": "much", "shape": "xxxx", "prefix": "m", "suffix": "uch", "length": 4, "cluster": "2794", "prob": -6.584301948547363, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": ";", "id": 95, "lower": ";", "norm": ";", "shape": ";", "prefix": ";", "suffix": ";", "length": 1, "cluster": "36", "prob": -6.586422920227051, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": false, "is_punct": true, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "'ve", "id": 96, "lower": "'ve", "norm": "'ve", "shape": "'xx", "prefix": "'", "suffix": "'ve", "length": 3, "cluster": "1018", "prob": -6.593011379241943, "is_alpha": false, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} -{"orth": "could", "id": 97, "lower": "could", "norm": "could", "shape": "xxxx", "prefix": "c", "suffix": "uld", "length": 5, "cluster": "954", "prob": -6.595959186553955, "is_alpha": true, "is_ascii": true, "is_digit": false, "is_lower": true, "is_punct": false, "is_space": false, "is_title": false, "is_upper": false, "like_url": false, "like_num": false, "like_email": false, "is_stop": false, "is_oov": false, "is_quote": false, "is_left_punct": false, "is_right_punct": false} diff --git a/website/api/_annotation/_training.jade b/website/api/_annotation/_training.jade index 4e37ee2b1..9bd59cdae 100644 --- a/website/api/_annotation/_training.jade +++ b/website/api/_annotation/_training.jade @@ -98,7 +98,7 @@ p } p - | Here's an example of the 100 most frequent lexemes in the English + | Here's an example of the 20 most frequent lexemes in the English | training data: +github("spacy", "examples/training/vocab-data.jsonl", false, false, "json") From 8ad4f3f6e506a7c93f2e0dc821e235262bd5cda5 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 19:48:35 +0100 Subject: [PATCH 31/32] Take out JSON format include in tagger/parser --- website/usage/_training/_tagger-parser.jade | 4 ---- 1 file changed, 4 deletions(-) diff --git a/website/usage/_training/_tagger-parser.jade b/website/usage/_training/_tagger-parser.jade index f2fa4bab5..646f9ecb0 100644 --- a/website/usage/_training/_tagger-parser.jade +++ b/website/usage/_training/_tagger-parser.jade @@ -190,7 +190,3 @@ p +item | #[strong Test] the model to make sure the parser works as expected. - -+h(3, "training-json") JSON format for training - -include ../../api/_annotation/_training From 5af6c8b746c26f92e4859c01b65296edda2f9e7f Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 30 Oct 2017 20:28:00 +0100 Subject: [PATCH 32/32] Update training docs --- website/_includes/_svg.jade | 3 +++ website/usage/_training/_basics.jade | 23 +++++++++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/website/_includes/_svg.jade b/website/_includes/_svg.jade index 0f7266c0a..54e0667a3 100644 --- a/website/_includes/_svg.jade +++ b/website/_includes/_svg.jade @@ -62,6 +62,9 @@ svg(style="position: absolute; visibility: hidden; width: 0; height: 0;" width=" symbol#svg_explosion(viewBox="0 0 500 500") path(fill="currentColor" d="M111.7 74.9L91.2 93.1l9.1 10.2 17.8-15.8 7.4 8.4-17.8 15.8 10.1 11.4 20.6-18.2 7.7 8.7-30.4 26.9-41.9-47.3 30.3-26.9 7.6 8.6zM190.8 59.6L219 84.3l-14.4 4.5-20.4-18.2-6.4 26.6-14.4 4.5 8.9-36.4-26.9-24.1 14.3-4.5L179 54.2l5.7-25.2 14.3-4.5-8.2 35.1zM250.1 21.2l27.1 3.4c6.1.8 10.8 3.1 14 7.2 3.2 4.1 4.5 9.2 3.7 15.5-.8 6.3-3.2 11-7.4 14.1-4.1 3.1-9.2 4.3-15.3 3.5L258 63.2l-2.8 22.3-13-1.6 7.9-62.7zm11.5 13l-2.2 17.5 12.6 1.6c5.1.6 9.1-2 9.8-7.6.7-5.6-2.5-9.2-7.6-9.9l-12.6-1.6zM329.1 95.4l23.8 13.8-5.8 10L312 98.8l31.8-54.6 11.3 6.6-26 44.6zM440.5 145c-1.3 8.4-5.9 15.4-13.9 21.1s-16.2 7.7-24.6 6.1c-8.4-1.6-15.3-6.3-20.8-14.1-5.5-7.9-7.6-16-6.4-24.4 1.3-8.5 6-15.5 14-21.1 8-5.6 16.2-7.7 24.5-6 8.4 1.6 15.4 6.3 20.9 14.2 5.5 7.6 7.6 15.7 6.3 24.2zM412 119c-5.1-.8-10.3.6-15.6 4.4-5.2 3.7-8.4 8.1-9.4 13.2-1 5.2.2 10.1 3.5 14.8 3.4 4.8 7.5 7.5 12.7 8.2 5.2.8 10.4-.7 15.6-4.4 5.3-3.7 8.4-8.1 9.4-13.2 1.1-5.1-.1-9.9-3.4-14.7-3.4-4.8-7.6-7.6-12.8-8.3zM471.5 237.9c-2.8 4.8-7.1 7.6-13 8.7l-2.6-13.1c5.3-.9 8.1-5 7.2-11-.9-5.8-4.3-8.8-8.9-8.2-2.3.3-3.7 1.4-4.5 3.3-.7 1.9-1.4 5.2-1.7 10.1-.8 7.5-2.2 13.1-4.3 16.9-2.1 3.9-5.7 6.2-10.9 7-6.3.9-11.3-.5-15.2-4.4-3.9-3.8-6.3-9-7.3-15.7-1.1-7.4-.2-13.7 2.6-18.8 2.8-5.1 7.4-8.2 13.7-9.2l2.6 13c-5.6 1.1-8.7 6.6-7.7 13.4 1 6.6 3.9 9.5 8.6 8.8 4.4-.7 5.7-4.5 6.7-14.1.3-3.5.7-6.2 1.1-8.4.4-2.2 1.2-4.4 2.2-6.8 2.1-4.7 6-7.2 11.8-8.1 5.4-.8 10.3.4 14.5 3.7 4.2 3.3 6.9 8.5 8 15.6.9 6.9-.1 12.6-2.9 17.3zM408.6 293.5l2.4-12.9 62 11.7-2.4 12.9-62-11.7zM419.6 396.9c-8.3 2-16.5.3-24.8-5-8.2-5.3-13.2-12.1-14.9-20.5-1.6-8.4.1-16.6 5.3-24.6 5.2-8.1 11.9-13.1 20.2-15.1 8.4-1.9 16.6-.3 24.9 5 8.2 5.3 13.2 12.1 14.8 20.5 1.7 8.4 0 16.6-5.2 24.7-5.2 8-12 13-20.3 15zm13.4-36.3c-1.2-5.1-4.5-9.3-9.9-12.8s-10.6-4.7-15.8-3.7-9.3 4-12.4 8.9-4.1 9.8-2.8 14.8c1.2 5.1 4.5 9.3 9.9 12.8 5.5 3.5 10.7 4.8 15.8 3.7 5.1-.9 9.2-3.8 12.3-8.7s4.1-9.9 2.9-15zM303.6 416.5l9.6-5.4 43.3 20.4-19.2-34 11.4-6.4 31 55-9.6 5.4-43.4-20.5 19.2 34.1-11.3 6.4-31-55zM238.2 468.8c-49 0-96.9-17.4-134.8-49-38.3-32-64-76.7-72.5-125.9-2-11.9-3.1-24-3.1-35.9 0-36.5 9.6-72.6 27.9-104.4 2.1-3.6 6.7-4.9 10.3-2.8 3.6 2.1 4.9 6.7 2.8 10.3-16.9 29.5-25.9 63.1-25.9 96.9 0 11.1 1 22.3 2.9 33.4 7.9 45.7 31.8 87.2 67.3 116.9 35.2 29.3 79.6 45.5 125.1 45.5 11.1 0 22.3-1 33.4-2.9 4.1-.7 8 2 8.7 6.1.7 4.1-2 8-6.1 8.7-11.9 2-24 3.1-36 3.1z") + symbol#svg_prodigy(viewBox="0 0 538.5 157.6") + path(fill="currentColor" d="M70.6 48.6c7 7.3 10.5 17.1 10.5 29.2S77.7 99.7 70.6 107c-6.9 7.3-15.9 11.1-27 11.1-9.4 0-16.8-2.7-21.7-8.2v44.8H0V39h20.7v8.1c4.8-6.4 12.4-9.6 22.9-9.6 11.1 0 20.1 3.7 27 11.1zM21.9 76v3.6c0 12.1 7.3 19.8 18.3 19.8 11.2 0 18.7-7.9 18.7-21.6s-7.5-21.6-18.7-21.6c-11 0-18.3 7.7-18.3 19.8zM133.8 59.4c-12.6 0-20.5 7-20.5 17.8v39.3h-22V39h21.1v8.8c4-6.4 11.2-9.6 21.3-9.6v21.2zM209.5 107.1c-7.6 7.3-17.5 11.1-29.5 11.1s-21.9-3.8-29.7-11.1c-7.6-7.5-11.5-17.2-11.5-29.2 0-12.1 3.9-21.9 11.5-29.2 7.8-7.3 17.7-11.1 29.7-11.1s21.9 3.8 29.5 11.1c7.8 7.3 11.7 17.1 11.7 29.2 0 11.9-3.9 21.7-11.7 29.2zM180 56.2c-5.7 0-10.3 1.9-13.8 5.8-3.5 3.8-5.2 9-5.2 15.7 0 6.7 1.8 12 5.2 15.7 3.4 3.8 8.1 5.7 13.8 5.7s10.3-1.9 13.8-5.7 5.2-9 5.2-15.7c0-6.8-1.8-12-5.2-15.7-3.5-3.8-8.1-5.8-13.8-5.8zM313 116.5h-20.5v-7.9c-4.4 5.5-12.7 9.6-23.1 9.6-10.9 0-19.9-3.8-27-11.1C235.5 99.7 232 90 232 77.8s3.5-21.9 10.3-29.2c7-7.3 16-11.1 27-11.1 9.7 0 17.1 2.7 21.9 8.2V0H313v116.5zm-58.8-38.7c0 13.6 7.5 21.4 18.7 21.4 10.9 0 18.3-7.3 18.3-19.8V76c0-12.2-7.3-19.8-18.3-19.8-11.2 0-18.7 8-18.7 21.6zM354.1 13.6c0 3.6-1.3 6.8-3.9 9.3-5 4.9-13.6 4.9-18.6 0-8.4-7.5-1.6-23.1 9.3-22.5 7.4 0 13.2 5.9 13.2 13.2zm-2.2 102.9H330V39h21.9v77.5zM425.1 47.1V39h20.5v80.4c0 11.2-3.6 20.1-10.6 26.8-7 6.7-16.6 10-28.5 10-23.4 0-36.9-11.4-39.9-29.8l21.7-.8c1 7.6 7.6 12 17.4 12 11.2 0 18.1-5.8 18.1-16.6v-11.1c-5.1 5.5-12.5 8.2-21.9 8.2-10.9 0-19.9-3.8-27-11.1-6.9-7.3-10.3-17.1-10.3-29.2s3.5-21.9 10.3-29.2c7-7.3 16-11.1 27-11.1 10.7 0 18.4 3.1 23.2 9.6zm-38.3 30.7c0 13.6 7.5 21.6 18.7 21.6 11 0 18.3-7.6 18.3-19.8V76c0-12.2-7.3-19.8-18.3-19.8-11.2 0-18.7 8-18.7 21.6zM488.8 154.8H465l19.8-45.1L454.5 39h24.1l17.8 46.2L514.2 39h24.3l-49.7 115.8z") + //- Machine learning & NLP libraries diff --git a/website/usage/_training/_basics.jade b/website/usage/_training/_basics.jade index 77df3c433..d20648416 100644 --- a/website/usage/_training/_basics.jade +++ b/website/usage/_training/_basics.jade @@ -76,6 +76,16 @@ p ("Google rebrands its business apps", [(0, 6, "ORG")]), ("look what i found on google! 😂", [(21, 27, "PRODUCT")])] ++infobox("Tip: Try the Prodigy annotation tool") + +infobox-logos(["prodigy", 100, 29, "https://prodi.gy"]) + | If you need to label a lot of data, check out + | #[+a("https://prodi.gy", true) Prodigy], a new, active learning-powered + | annotation tool we've developed. Prodigy is fast and extensible, and + | comes with a modern #[strong web application] that helps you collect + | training data faster. It integrates seamlessly with spaCy, pre-selects + | the #[strong most relevant examples] for annotation, and lets you + | train and evaluate ready-to-use spaCy models. + +h(3, "annotations") Training with annotations p @@ -180,9 +190,10 @@ p +cell #[code optimizer] +cell Callable to update the model's weights. -+infobox - | For the #[strong full example and more details], see the usage guide on - | #[+a("/usage/training#ner") training the named entity recognizer], - | or the runnable - | #[+src(gh("spaCy", "examples/training/train_ner.py")) training script] - | on GitHub. +p + | Instead of writing your own training loop, you can also use the + | built-in #[+api("cli#train") #[code train]] command, which expects data + | in spaCy's #[+a("/api/annotation#json-input") JSON format]. On each epoch, + | a model will be saved out to the directory. After training, you can + | use the #[+api("cli#package") #[code package]] command to generate an + | installable Python package from your model.