mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 10:26:35 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
624644adfe
|
@ -67,7 +67,7 @@ class BaseDefaults(object):
|
||||||
infix_finditer=infix_finditer,
|
infix_finditer=infix_finditer,
|
||||||
token_match=token_match)
|
token_match=token_match)
|
||||||
|
|
||||||
pipe_names = ['tensorizer', 'tagger', 'parser', 'ner']
|
pipe_names = ['tagger', 'parser', 'ner']
|
||||||
token_match = TOKEN_MATCH
|
token_match = TOKEN_MATCH
|
||||||
prefixes = tuple(TOKENIZER_PREFIXES)
|
prefixes = tuple(TOKENIZER_PREFIXES)
|
||||||
suffixes = tuple(TOKENIZER_SUFFIXES)
|
suffixes = tuple(TOKENIZER_SUFFIXES)
|
||||||
|
|
|
@ -624,7 +624,7 @@ mixin qs(data, style)
|
||||||
//- Terminal-style code window
|
//- Terminal-style code window
|
||||||
label - [string] title displayed in top bar of terminal window
|
label - [string] title displayed in top bar of terminal window
|
||||||
|
|
||||||
mixin terminal(label)
|
mixin terminal(label, button_text, button_url)
|
||||||
.x-terminal
|
.x-terminal
|
||||||
.x-terminal__icons: span
|
.x-terminal__icons: span
|
||||||
.u-padding-small.u-text-label.u-text-center=label
|
.u-padding-small.u-text-label.u-text-center=label
|
||||||
|
@ -632,6 +632,9 @@ mixin terminal(label)
|
||||||
+code.x-terminal__code
|
+code.x-terminal__code
|
||||||
block
|
block
|
||||||
|
|
||||||
|
if button_text && button_url
|
||||||
|
+button(button_url, true, "primary", "small").x-terminal__button=button_text
|
||||||
|
|
||||||
|
|
||||||
//- Landing
|
//- Landing
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
"Pipeline": {
|
"Pipeline": {
|
||||||
"Language": "language",
|
"Language": "language",
|
||||||
"Pipe": "pipe",
|
"Pipe": "pipe",
|
||||||
"Tensorizer": "tensorizer",
|
|
||||||
"Tagger": "tagger",
|
"Tagger": "tagger",
|
||||||
"DependencyParser": "dependencyparser",
|
"DependencyParser": "dependencyparser",
|
||||||
"EntityRecognizer": "entityrecognizer",
|
"EntityRecognizer": "entityrecognizer",
|
||||||
|
@ -180,14 +179,6 @@
|
||||||
"source": "spacy/pipeline.pyx"
|
"source": "spacy/pipeline.pyx"
|
||||||
},
|
},
|
||||||
|
|
||||||
"tensorizer": {
|
|
||||||
"title": "Tensorizer",
|
|
||||||
"teaser": "Add a tensor with position-sensitive meaning representations to a document.",
|
|
||||||
"tag": "class",
|
|
||||||
"tag_new": 2,
|
|
||||||
"source": "spacy/pipeline.pyx"
|
|
||||||
},
|
|
||||||
|
|
||||||
"goldparse": {
|
"goldparse": {
|
||||||
"title": "GoldParse",
|
"title": "GoldParse",
|
||||||
"tag": "class",
|
"tag": "class",
|
||||||
|
|
|
@ -1,6 +0,0 @@
|
||||||
//- 💫 DOCS > API > TENSORIZER
|
|
||||||
|
|
||||||
include ../_includes/_mixins
|
|
||||||
|
|
||||||
//- This class inherits from Pipe, so this page uses the template in pipe.jade.
|
|
||||||
!=partial("pipe", { subclass: "Tensorizer", pipeline_id: "tensorizer" })
|
|
|
@ -7,14 +7,13 @@ p
|
||||||
| labels. You can change the model architecture rather easily, but by
|
| labels. You can change the model architecture rather easily, but by
|
||||||
| default, the #[code TextCategorizer] class uses a convolutional
|
| default, the #[code TextCategorizer] class uses a convolutional
|
||||||
| neural network to assign position-sensitive vectors to each word in the
|
| neural network to assign position-sensitive vectors to each word in the
|
||||||
| document. This step is similar to the #[+api("tensorizer") #[code Tensorizer]]
|
| document. The #[code TextCategorizer] uses its own CNN model, to
|
||||||
| component, but the #[code TextCategorizer] uses its own CNN model, to
|
|
||||||
| avoid sharing weights with the other pipeline components. The document
|
| avoid sharing weights with the other pipeline components. The document
|
||||||
| tensor is then
|
| tensor is then summarized by concatenating max and mean pooling, and a
|
||||||
| summarized by concatenating max and mean pooling, and a multilayer
|
| multilayer perceptron is used to predict an output vector of length
|
||||||
| perceptron is used to predict an output vector of length #[code nr_class],
|
| #[code nr_class], before a logistic activation is applied elementwise.
|
||||||
| before a logistic activation is applied elementwise. The value of each
|
| The value of each output neuron is the probability that some class is
|
||||||
| output neuron is the probability that some class is present.
|
| present.
|
||||||
|
|
||||||
//- This class inherits from Pipe, so this page uses the template in pipe.jade.
|
//- This class inherits from Pipe, so this page uses the template in pipe.jade.
|
||||||
!=partial("pipe", { subclass: "TextCategorizer", short: "textcat", pipeline_id: "textcat" })
|
!=partial("pipe", { subclass: "TextCategorizer", short: "textcat", pipeline_id: "textcat" })
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
|
|
||||||
.c-code-block__content
|
.c-code-block__content
|
||||||
display: block
|
display: block
|
||||||
font: normal normal 1.1rem/#{2} $font-code
|
font: normal normal 1.1rem/#{1.9} $font-code
|
||||||
padding: 1em 2em
|
padding: 1em 2em
|
||||||
|
|
||||||
&[data-prompt]:before,
|
&[data-prompt]:before,
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
padding: $border-radius
|
padding: $border-radius
|
||||||
border-radius: 1em
|
border-radius: 1em
|
||||||
width: 100%
|
width: 100%
|
||||||
|
position: relative
|
||||||
|
|
||||||
.x-terminal__icons
|
.x-terminal__icons
|
||||||
position: absolute
|
position: absolute
|
||||||
|
@ -39,3 +40,13 @@
|
||||||
width: 100%
|
width: 100%
|
||||||
max-width: 100%
|
max-width: 100%
|
||||||
white-space: pre-wrap
|
white-space: pre-wrap
|
||||||
|
|
||||||
|
|
||||||
|
.x-terminal__button.x-terminal__button
|
||||||
|
@include position(absolute, bottom, right, 2.65rem, 2.6rem)
|
||||||
|
background: $color-dark
|
||||||
|
border-color: $color-dark
|
||||||
|
|
||||||
|
&:hover
|
||||||
|
background: darken($color-dark, 5)
|
||||||
|
border-color: darken($color-dark, 5)
|
||||||
|
|
|
@ -18,13 +18,13 @@
|
||||||
<path fill="#f8cecc" stroke="#b85450" stroke-width="2" stroke-miterlimit="10" d="M176 58h103.3L296 88l-16.8 30H176l16.8-30z"/>
|
<path fill="#f8cecc" stroke="#b85450" stroke-width="2" stroke-miterlimit="10" d="M176 58h103.3L296 88l-16.8 30H176l16.8-30z"/>
|
||||||
<text class="svg__pipeline__text-small" dy="0.75em" dx="-0.25em" width="58" height="14" transform="translate(206.5 80.5)">tokenizer</text>
|
<text class="svg__pipeline__text-small" dy="0.75em" dx="-0.25em" width="58" height="14" transform="translate(206.5 80.5)">tokenizer</text>
|
||||||
<path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M314 58h103.3L434 88l-16.8 30H314l16.8-30z"/>
|
<path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M314 58h103.3L434 88l-16.8 30H314l16.8-30z"/>
|
||||||
<text class="svg__pipeline__text-small" dy="0.75em" dx="-0.25em" width="62" height="14" transform="translate(342.5 80.5)">tensorizer</text>
|
<text class="svg__pipeline__text-small" dy="0.75em" dx="8" width="62" height="14" transform="translate(342.5 80.5)">tagger</text>
|
||||||
<path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M296.5 88.2h24.7"/>
|
<path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M296.5 88.2h24.7"/>
|
||||||
<path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M327.2 88.2l-8 4 2-4-2-4z"/>
|
<path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M327.2 88.2l-8 4 2-4-2-4z"/>
|
||||||
<path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M416 58h103.3L536 88l-16.8 30H416l16.8-30z"/>
|
<path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M416 58h103.3L536 88l-16.8 30H416l16.8-30z"/>
|
||||||
<text class="svg__pipeline__text-small" dy="0.75em" dx="-0.25em" width="40" height="14" transform="translate(455.5 80.5)">tagger</text>
|
<text class="svg__pipeline__text-small" dy="0.75em" dx="-0.25em" width="40" height="14" transform="translate(455.5 80.5)">parser</text>
|
||||||
<path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M519 58h103.3L639 88l-16.8 30H519l16.8-30z"/>
|
<path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M519 58h103.3L639 88l-16.8 30H519l16.8-30z"/>
|
||||||
<text class="svg__pipeline__text-small" dy="0.75em" dx="-0.25em" width="40" height="14" transform="translate(558.5 80.5)">parser</text>
|
<text class="svg__pipeline__text-small" dy="0.75em" dx="8" width="40" height="14" transform="translate(558.5 80.5)">ner</text>
|
||||||
<path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M622 58h103.3L742 88l-16.8 30H622l16.8-30z"/>
|
<path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M622 58h103.3L742 88l-16.8 30H622l16.8-30z"/>
|
||||||
<text class="svg__pipeline__text-small" dy="0.75em" dx="-0.25em" width="20" height="14" transform="translate(671.5 80.5)">ner</text>
|
<text class="svg__pipeline__text-small" dy="0.75em" dx="8" width="20" height="14" transform="translate(671.5 80.5)">...</text>
|
||||||
</svg>
|
</svg>
|
||||||
|
|
Before Width: | Height: | Size: 3.1 KiB After Width: | Height: | Size: 3.1 KiB |
|
@ -54,7 +54,7 @@ include _includes/_mixins
|
||||||
.o-content
|
.o-content
|
||||||
+grid
|
+grid
|
||||||
+grid-col("two-thirds")
|
+grid-col("two-thirds")
|
||||||
+terminal("lightning_tour.py").
|
+terminal("lightning_tour.py", "More examples", "/usage/spacy-101#lightning-tour").
|
||||||
# Install: pip install spacy && spacy download en
|
# Install: pip install spacy && spacy download en
|
||||||
import spacy
|
import spacy
|
||||||
|
|
||||||
|
@ -65,16 +65,18 @@ include _includes/_mixins
|
||||||
text = open('war_and_peace.txt').read()
|
text = open('war_and_peace.txt').read()
|
||||||
doc = nlp(text)
|
doc = nlp(text)
|
||||||
|
|
||||||
# Hook in your own deep learning models
|
# Find named entities, phrases and concepts
|
||||||
similarity_model = load_my_neural_network()
|
for entity in doc.ents:
|
||||||
def install_similarity(doc):
|
print(entity.text, entity.label_)
|
||||||
doc.user_hooks['similarity'] = similarity_model
|
|
||||||
nlp.pipeline.append(install_similarity)
|
|
||||||
|
|
||||||
|
# Determine semantic similarities
|
||||||
doc1 = nlp(u'the fries were gross')
|
doc1 = nlp(u'the fries were gross')
|
||||||
doc2 = nlp(u'worst fries ever')
|
doc2 = nlp(u'worst fries ever')
|
||||||
doc1.similarity(doc2)
|
doc1.similarity(doc2)
|
||||||
|
|
||||||
|
# Hook in your own deep learning models
|
||||||
|
nlp.add_pipe(load_my_model(), before='parser')
|
||||||
|
|
||||||
+grid-col("third")
|
+grid-col("third")
|
||||||
+h(2) Features
|
+h(2) Features
|
||||||
+list
|
+list
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
p
|
p
|
||||||
| spaCy makes it very easy to create your own pipelines consisting of
|
| spaCy makes it very easy to create your own pipelines consisting of
|
||||||
| reusable components – this includes spaCy's default tensorizer, tagger,
|
| reusable components – this includes spaCy's default tagger,
|
||||||
| parser and entity regcognizer, but also your own custom processing
|
| parser and entity regcognizer, but also your own custom processing
|
||||||
| functions. A pipeline component can be added to an already existing
|
| functions. A pipeline component can be added to an already existing
|
||||||
| #[code nlp] object, specified when initialising a #[code Language] class,
|
| #[code nlp] object, specified when initialising a #[code Language] class,
|
||||||
|
@ -49,9 +49,9 @@ p
|
||||||
nlp = spacy.load('en')
|
nlp = spacy.load('en')
|
||||||
|
|
||||||
p
|
p
|
||||||
| ... the model tells spaCy to use the language #[code "en"] and the pipeline
|
| ... the model tells spaCy to use the language #[code "en"] and the
|
||||||
| #[code.u-break ["tensorizer", "tagger", "parser", "ner"]]. spaCy will
|
| pipeline #[code.u-break ["tagger", "parser", "ner"]]. spaCy will then
|
||||||
| then initialise #[code spacy.lang.en.English], and create each pipeline
|
| initialise #[code spacy.lang.en.English], and create each pipeline
|
||||||
| component and add it to the processing pipeline. It'll then load in the
|
| component and add it to the processing pipeline. It'll then load in the
|
||||||
| model's data from its data ditectory and return the modified
|
| model's data from its data ditectory and return the modified
|
||||||
| #[code Language] class for you to use as the #[code nlp] object.
|
| #[code Language] class for you to use as the #[code nlp] object.
|
||||||
|
@ -72,7 +72,7 @@ p
|
||||||
|
|
||||||
+code("spacy.load under the hood").
|
+code("spacy.load under the hood").
|
||||||
lang = 'en'
|
lang = 'en'
|
||||||
pipeline = ['tensorizer', 'tagger', 'parser', 'ner']
|
pipeline = ['tagger', 'parser', 'ner']
|
||||||
data_path = 'path/to/en_core_web_sm/en_core_web_sm-2.0.0'
|
data_path = 'path/to/en_core_web_sm/en_core_web_sm-2.0.0'
|
||||||
|
|
||||||
cls = spacy.util.get_lang_class(lang) # 1. get Language instance, e.g. English()
|
cls = spacy.util.get_lang_class(lang) # 1. get Language instance, e.g. English()
|
||||||
|
@ -120,7 +120,7 @@ p
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
nlp = spacy.load('en', disable['parser', 'tagger'])
|
nlp = spacy.load('en', disable['parser', 'tagger'])
|
||||||
nlp = English().from_disk('/model', disable=['tensorizer', 'ner'])
|
nlp = English().from_disk('/model', disable=['ner'])
|
||||||
|
|
||||||
p
|
p
|
||||||
| You can also use the #[+api("language#remove_pipe") #[code remove_pipe]]
|
| You can also use the #[+api("language#remove_pipe") #[code remove_pipe]]
|
||||||
|
|
|
@ -60,12 +60,6 @@ p
|
||||||
+cell #[+api("pipe") #[code Pipe]]
|
+cell #[+api("pipe") #[code Pipe]]
|
||||||
+cell Base class for processing pipeline components.
|
+cell Base class for processing pipeline components.
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[+api("tensorizer") #[code Tensorizer]]
|
|
||||||
+cell
|
|
||||||
| Add tensors with position-sensitive meaning representations to
|
|
||||||
| #[code Doc] objects.
|
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[+api("tagger") #[code Tagger]]
|
+cell #[+api("tagger") #[code Tagger]]
|
||||||
+cell Annotate part-of-speech tags on #[code Doc] objects.
|
+cell Annotate part-of-speech tags on #[code Doc] objects.
|
||||||
|
|
|
@ -2,8 +2,7 @@
|
||||||
|
|
||||||
p
|
p
|
||||||
| The following examples and code snippets give you an overview of spaCy's
|
| The following examples and code snippets give you an overview of spaCy's
|
||||||
| functionality and its usage. If you're new to spaCy, make sure to check
|
| functionality and its usage.
|
||||||
| out the #[+a("/usage/spacy-101") spaCy 101 guide].
|
|
||||||
|
|
||||||
+h(3, "lightning-tour-models") Install models and process text
|
+h(3, "lightning-tour-models") Install models and process text
|
||||||
|
|
||||||
|
|
|
@ -5,10 +5,9 @@ p
|
||||||
| produce a #[code Doc] object. The #[code Doc] is then processed in several
|
| produce a #[code Doc] object. The #[code Doc] is then processed in several
|
||||||
| different steps – this is also referred to as the
|
| different steps – this is also referred to as the
|
||||||
| #[strong processing pipeline]. The pipeline used by the
|
| #[strong processing pipeline]. The pipeline used by the
|
||||||
| #[+a("/models") default models] consists of a
|
| #[+a("/models") default models] consists of a tagger, a parser and an
|
||||||
| tensorizer, a tagger, a parser and an entity recognizer. Each pipeline
|
| entity recognizer. Each pipeline component returns the processed
|
||||||
| component returns the processed #[code Doc], which is then passed on to
|
| #[code Doc], which is then passed on to the next component.
|
||||||
| the next component.
|
|
||||||
|
|
||||||
+graphic("/assets/img/pipeline.svg")
|
+graphic("/assets/img/pipeline.svg")
|
||||||
include ../../assets/img/pipeline.svg
|
include ../../assets/img/pipeline.svg
|
||||||
|
@ -21,43 +20,45 @@ p
|
||||||
|
|
||||||
+table(["Name", "Component", "Creates", "Description"])
|
+table(["Name", "Component", "Creates", "Description"])
|
||||||
+row
|
+row
|
||||||
+cell tokenizer
|
+cell #[strong tokenizer]
|
||||||
+cell #[+api("tokenizer") #[code Tokenizer]]
|
+cell #[+api("tokenizer") #[code Tokenizer]]
|
||||||
+cell #[code Doc]
|
+cell #[code Doc]
|
||||||
+cell Segment text into tokens.
|
+cell Segment text into tokens.
|
||||||
|
|
||||||
+row("divider")
|
+row("divider")
|
||||||
+cell tensorizer
|
+cell #[strong tagger]
|
||||||
+cell #[+api("tensorizer") Tensorizer]
|
|
||||||
+cell #[code Doc.tensor]
|
|
||||||
+cell Create feature representation tensor for #[code Doc].
|
|
||||||
|
|
||||||
+row
|
|
||||||
+cell tagger
|
|
||||||
+cell #[+api("tagger") #[code Tagger]]
|
+cell #[+api("tagger") #[code Tagger]]
|
||||||
+cell #[code Doc[i].tag]
|
+cell #[code Doc[i].tag]
|
||||||
+cell Assign part-of-speech tags.
|
+cell Assign part-of-speech tags.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell parser
|
+cell #[strong parser]
|
||||||
+cell #[+api("dependencyparser") #[code DependencyParser]]
|
+cell #[+api("dependencyparser") #[code DependencyParser]]
|
||||||
+cell
|
+cell
|
||||||
| #[code Doc[i].head], #[code Doc[i].dep], #[code Doc.sents],
|
| #[code Doc[i].head],
|
||||||
|
| #[code Doc[i].dep],
|
||||||
|
| #[code Doc.sents],
|
||||||
| #[code Doc.noun_chunks]
|
| #[code Doc.noun_chunks]
|
||||||
+cell Assign dependency labels.
|
+cell Assign dependency labels.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell ner
|
+cell #[strong ner]
|
||||||
+cell #[+api("entityrecognizer") #[code EntityRecognizer]]
|
+cell #[+api("entityrecognizer") #[code EntityRecognizer]]
|
||||||
+cell #[code Doc.ents], #[code Doc[i].ent_iob], #[code Doc[i].ent_type]
|
+cell #[code Doc.ents], #[code Doc[i].ent_iob], #[code Doc[i].ent_type]
|
||||||
+cell Detect and label named entities.
|
+cell Detect and label named entities.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell textcat
|
+cell #[strong textcat]
|
||||||
+cell #[+api("textcategorizer") #[code TextCategorizer]]
|
+cell #[+api("textcategorizer") #[code TextCategorizer]]
|
||||||
+cell #[code Doc.cats]
|
+cell #[code Doc.cats]
|
||||||
+cell Assign document labels.
|
+cell Assign document labels.
|
||||||
|
|
||||||
|
+row("divider")
|
||||||
|
+cell #[strong ...]
|
||||||
|
+cell #[+a("/usage/processing-pipelines#custom-components") custom components]
|
||||||
|
+cell #[code Doc._.xxx], #[code Token._.xxx], #[code Span._.xxx]
|
||||||
|
+cell Assign custom attributes, methods or properties.
|
||||||
|
|
||||||
p
|
p
|
||||||
| The processing pipeline always #[strong depends on the statistical model]
|
| The processing pipeline always #[strong depends on the statistical model]
|
||||||
| and its capabilities. For example, a pipeline can only include an entity
|
| and its capabilities. For example, a pipeline can only include an entity
|
||||||
|
@ -66,17 +67,16 @@ p
|
||||||
| in its meta data, as a simple list containing the component names:
|
| in its meta data, as a simple list containing the component names:
|
||||||
|
|
||||||
+code(false, "json").
|
+code(false, "json").
|
||||||
"pipeline": ["tensorizer", "tagger", "parser", "ner"]
|
"pipeline": ["tagger", "parser", "ner"]
|
||||||
|
|
||||||
p
|
p
|
||||||
| Although you can mix and match pipeline components, their
|
| Although you can mix and match pipeline components, their
|
||||||
| #[strong order and combination] is usually important. Some components may
|
| #[strong order and combination] is usually important. Some components may
|
||||||
| require certain modifications on the #[code Doc] to process it. For
|
| require certain modifications on the #[code Doc] to process it. As the
|
||||||
| example, the default pipeline first applies the tensorizer, which
|
| processing pipeline is applied, spaCy encodes the document's internal
|
||||||
| pre-processes the doc and encodes its internal
|
|
||||||
| #[strong meaning representations] as an array of floats, also called a
|
| #[strong meaning representations] as an array of floats, also called a
|
||||||
| #[strong tensor]. This includes the tokens and their context, which is
|
| #[strong tensor]. This includes the tokens and their context, which is
|
||||||
| required for the next component, the tagger, to make predictions of the
|
| required for the first component, the tagger, to make predictions of the
|
||||||
| part-of-speech tags. Because spaCy's models are neural network models,
|
| part-of-speech tags. Because spaCy's models are neural network models,
|
||||||
| they only "speak" tensors and expect the input #[code Doc] to have
|
| they only "speak" tensors and expect the input #[code Doc] to have
|
||||||
| a #[code tensor].
|
| a #[code tensor].
|
||||||
|
|
|
@ -20,9 +20,8 @@ p
|
||||||
| Aside from spaCy's built-in word vectors, which were trained on a lot of
|
| Aside from spaCy's built-in word vectors, which were trained on a lot of
|
||||||
| text with a wide vocabulary, the parsing, tagging and NER models also
|
| text with a wide vocabulary, the parsing, tagging and NER models also
|
||||||
| rely on vector representations of the #[strong meanings of words in context].
|
| rely on vector representations of the #[strong meanings of words in context].
|
||||||
| As the first component of the
|
| As the #[+a("/usage/processing-pipelines") processing pipeline] is
|
||||||
| #[+a("/usage/processing-pipelines") processing pipeline], the
|
| applied spaCy encodes a document's internal meaning representations as an
|
||||||
| tensorizer encodes a document's internal meaning representations as an
|
|
||||||
| array of floats, also called a tensor. This allows spaCy to make a
|
| array of floats, also called a tensor. This allows spaCy to make a
|
||||||
| reasonable guess at a word's meaning, based on its surrounding words.
|
| reasonable guess at a word's meaning, based on its surrounding words.
|
||||||
| Even if a word hasn't been seen before, spaCy will know #[em something]
|
| Even if a word hasn't been seen before, spaCy will know #[em something]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user