mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
Update docs [ci skip]
This commit is contained in:
parent
7c52def5da
commit
e50dc2c1c9
|
@ -643,7 +643,7 @@ Debug a Thinc [`Model`](https://thinc.ai/docs/api-model) by running it on a
|
||||||
sample text and checking how it updates its internal weights and parameters.
|
sample text and checking how it updates its internal weights and parameters.
|
||||||
|
|
||||||
```cli
|
```cli
|
||||||
$ python -m spacy debug model [config_path] [component] [--layers] [-DIM] [-PAR] [-GRAD] [-ATTR] [-P0] [-P1] [-P2] [P3] [--gpu-id]
|
$ python -m spacy debug model [config_path] [component] [--layers] [--dimensions] [--parameters] [--gradients] [--attributes] [--print-step0] [--print-step1] [--print-step2] [--print-step3] [--gpu-id]
|
||||||
```
|
```
|
||||||
|
|
||||||
<Accordion title="Example outputs" spaced>
|
<Accordion title="Example outputs" spaced>
|
||||||
|
|
|
@ -232,7 +232,9 @@ transformers as subnetworks directly, you can also use them via the
|
||||||
|
|
||||||
The `Transformer` component sets the
|
The `Transformer` component sets the
|
||||||
[`Doc._.trf_data`](/api/transformer#custom_attributes) extension attribute,
|
[`Doc._.trf_data`](/api/transformer#custom_attributes) extension attribute,
|
||||||
which lets you access the transformers outputs at runtime.
|
which lets you access the transformers outputs at runtime. The trained
|
||||||
|
transformer-based [pipelines](/models) provided by spaCy end on `_trf`, e.g.
|
||||||
|
[`en_core_web_trf`](/models/en#en_core_web_trf).
|
||||||
|
|
||||||
```cli
|
```cli
|
||||||
$ python -m spacy download en_core_web_trf
|
$ python -m spacy download en_core_web_trf
|
||||||
|
|
|
@ -1656,9 +1656,10 @@ because it only requires annotated sentence boundaries rather than full
|
||||||
dependency parses. spaCy's [trained pipelines](/models) include both a parser
|
dependency parses. spaCy's [trained pipelines](/models) include both a parser
|
||||||
and a trained sentence segmenter, which is
|
and a trained sentence segmenter, which is
|
||||||
[disabled](/usage/processing-pipelines#disabling) by default. If you only need
|
[disabled](/usage/processing-pipelines#disabling) by default. If you only need
|
||||||
sentence boundaries and no parser, you can use the `enable` and `disable`
|
sentence boundaries and no parser, you can use the `exclude` or `disable`
|
||||||
arguments on [`spacy.load`](/api/top-level#spacy.load) to enable the senter and
|
argument on [`spacy.load`](/api/top-level#spacy.load) to load the pipeline
|
||||||
disable the parser.
|
without the parser and then enable the sentence recognizer explicitly with
|
||||||
|
[`nlp.enable_pipe`](/api/language#enable_pipe).
|
||||||
|
|
||||||
> #### senter vs. parser
|
> #### senter vs. parser
|
||||||
>
|
>
|
||||||
|
@ -1670,7 +1671,8 @@ disable the parser.
|
||||||
### {executable="true"}
|
### {executable="true"}
|
||||||
import spacy
|
import spacy
|
||||||
|
|
||||||
nlp = spacy.load("en_core_web_sm", enable=["senter"], disable=["parser"])
|
nlp = spacy.load("en_core_web_sm", exclude=["parser"])
|
||||||
|
nlp.enable_pipe("senter")
|
||||||
doc = nlp("This is a sentence. This is another sentence.")
|
doc = nlp("This is a sentence. This is another sentence.")
|
||||||
for sent in doc.sents:
|
for sent in doc.sents:
|
||||||
print(sent.text)
|
print(sent.text)
|
||||||
|
@ -1734,7 +1736,7 @@ nlp = spacy.load("en_core_web_sm")
|
||||||
doc = nlp(text)
|
doc = nlp(text)
|
||||||
print("Before:", [sent.text for sent in doc.sents])
|
print("Before:", [sent.text for sent in doc.sents])
|
||||||
|
|
||||||
@Language.component("set_custom_coundaries")
|
@Language.component("set_custom_boundaries")
|
||||||
def set_custom_boundaries(doc):
|
def set_custom_boundaries(doc):
|
||||||
for token in doc[:-1]:
|
for token in doc[:-1]:
|
||||||
if token.text == "...":
|
if token.text == "...":
|
||||||
|
|
|
@ -1159,7 +1159,8 @@ class DebugComponent:
|
||||||
self.logger.info(f"Pipeline: {nlp.pipe_names}")
|
self.logger.info(f"Pipeline: {nlp.pipe_names}")
|
||||||
|
|
||||||
def __call__(self, doc: Doc) -> Doc:
|
def __call__(self, doc: Doc) -> Doc:
|
||||||
self.logger.debug(f"Doc: {len(doc)} tokens, is_tagged: {doc.is_tagged}")
|
is_tagged = doc.has_annotation("TAG")
|
||||||
|
self.logger.debug(f"Doc: {len(doc)} tokens, is tagged: {is_tagged}")
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
nlp = spacy.load("en_core_web_sm")
|
nlp = spacy.load("en_core_web_sm")
|
||||||
|
|
|
@ -838,7 +838,7 @@ nlp = spacy.load("en_core_web_sm")
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
|
|
||||||
# Add pattern for valid hashtag, i.e. '#' plus any ASCII token
|
# Add pattern for valid hashtag, i.e. '#' plus any ASCII token
|
||||||
matcher.add("HASHTAG", None, [{"ORTH": "#"}, {"IS_ASCII": True}])
|
matcher.add("HASHTAG", [[{"ORTH": "#"}, {"IS_ASCII": True}]])
|
||||||
|
|
||||||
# Register token extension
|
# Register token extension
|
||||||
Token.set_extension("is_hashtag", default=False)
|
Token.set_extension("is_hashtag", default=False)
|
||||||
|
|
|
@ -285,6 +285,7 @@ add to your pipeline and customize for your use case:
|
||||||
| [`Lemmatizer`](/api/lemmatizer) | Standalone component for rule-based and lookup lemmatization. |
|
| [`Lemmatizer`](/api/lemmatizer) | Standalone component for rule-based and lookup lemmatization. |
|
||||||
| [`AttributeRuler`](/api/attributeruler) | Component for setting token attributes using match patterns. |
|
| [`AttributeRuler`](/api/attributeruler) | Component for setting token attributes using match patterns. |
|
||||||
| [`Transformer`](/api/transformer) | Component for using [transformer models](/usage/embeddings-transformers) in your pipeline, accessing outputs and aligning tokens. Provided via [`spacy-transformers`](https://github.com/explosion/spacy-transformers). |
|
| [`Transformer`](/api/transformer) | Component for using [transformer models](/usage/embeddings-transformers) in your pipeline, accessing outputs and aligning tokens. Provided via [`spacy-transformers`](https://github.com/explosion/spacy-transformers). |
|
||||||
|
| [`TrainablePipe`](/api/pipe) | Base class for trainable pipeline components. |
|
||||||
|
|
||||||
<Infobox title="Details & Documentation" emoji="📖" list>
|
<Infobox title="Details & Documentation" emoji="📖" list>
|
||||||
|
|
||||||
|
@ -396,8 +397,8 @@ type-check model definitions.
|
||||||
For data validation, spaCy v3.0 adopts
|
For data validation, spaCy v3.0 adopts
|
||||||
[`pydantic`](https://github.com/samuelcolvin/pydantic). It also powers the data
|
[`pydantic`](https://github.com/samuelcolvin/pydantic). It also powers the data
|
||||||
validation of Thinc's [config system](https://thinc.ai/docs/usage-config), which
|
validation of Thinc's [config system](https://thinc.ai/docs/usage-config), which
|
||||||
lets you register **custom functions with typed arguments**, reference them
|
lets you register **custom functions with typed arguments**, reference them in
|
||||||
in your config and see validation errors if the argument values don't match.
|
your config and see validation errors if the argument values don't match.
|
||||||
|
|
||||||
<Infobox title="Details & Documentation" emoji="📖" list>
|
<Infobox title="Details & Documentation" emoji="📖" list>
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user