mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Update docs [ci skip]
This commit is contained in:
parent
7c52def5da
commit
e50dc2c1c9
|
@ -643,7 +643,7 @@ Debug a Thinc [`Model`](https://thinc.ai/docs/api-model) by running it on a
|
|||
sample text and checking how it updates its internal weights and parameters.
|
||||
|
||||
```cli
|
||||
$ python -m spacy debug model [config_path] [component] [--layers] [-DIM] [-PAR] [-GRAD] [-ATTR] [-P0] [-P1] [-P2] [P3] [--gpu-id]
|
||||
$ python -m spacy debug model [config_path] [component] [--layers] [--dimensions] [--parameters] [--gradients] [--attributes] [--print-step0] [--print-step1] [--print-step2] [--print-step3] [--gpu-id]
|
||||
```
|
||||
|
||||
<Accordion title="Example outputs" spaced>
|
||||
|
|
|
@ -232,7 +232,9 @@ transformers as subnetworks directly, you can also use them via the
|
|||
|
||||
The `Transformer` component sets the
|
||||
[`Doc._.trf_data`](/api/transformer#custom_attributes) extension attribute,
|
||||
which lets you access the transformers outputs at runtime.
|
||||
which lets you access the transformers outputs at runtime. The trained
|
||||
transformer-based [pipelines](/models) provided by spaCy end on `_trf`, e.g.
|
||||
[`en_core_web_trf`](/models/en#en_core_web_trf).
|
||||
|
||||
```cli
|
||||
$ python -m spacy download en_core_web_trf
|
||||
|
|
|
@ -1656,9 +1656,10 @@ because it only requires annotated sentence boundaries rather than full
|
|||
dependency parses. spaCy's [trained pipelines](/models) include both a parser
|
||||
and a trained sentence segmenter, which is
|
||||
[disabled](/usage/processing-pipelines#disabling) by default. If you only need
|
||||
sentence boundaries and no parser, you can use the `enable` and `disable`
|
||||
arguments on [`spacy.load`](/api/top-level#spacy.load) to enable the senter and
|
||||
disable the parser.
|
||||
sentence boundaries and no parser, you can use the `exclude` or `disable`
|
||||
argument on [`spacy.load`](/api/top-level#spacy.load) to load the pipeline
|
||||
without the parser and then enable the sentence recognizer explicitly with
|
||||
[`nlp.enable_pipe`](/api/language#enable_pipe).
|
||||
|
||||
> #### senter vs. parser
|
||||
>
|
||||
|
@ -1670,7 +1671,8 @@ disable the parser.
|
|||
### {executable="true"}
|
||||
import spacy
|
||||
|
||||
nlp = spacy.load("en_core_web_sm", enable=["senter"], disable=["parser"])
|
||||
nlp = spacy.load("en_core_web_sm", exclude=["parser"])
|
||||
nlp.enable_pipe("senter")
|
||||
doc = nlp("This is a sentence. This is another sentence.")
|
||||
for sent in doc.sents:
|
||||
print(sent.text)
|
||||
|
@ -1734,7 +1736,7 @@ nlp = spacy.load("en_core_web_sm")
|
|||
doc = nlp(text)
|
||||
print("Before:", [sent.text for sent in doc.sents])
|
||||
|
||||
@Language.component("set_custom_coundaries")
|
||||
@Language.component("set_custom_boundaries")
|
||||
def set_custom_boundaries(doc):
|
||||
for token in doc[:-1]:
|
||||
if token.text == "...":
|
||||
|
|
|
@ -1159,7 +1159,8 @@ class DebugComponent:
|
|||
self.logger.info(f"Pipeline: {nlp.pipe_names}")
|
||||
|
||||
def __call__(self, doc: Doc) -> Doc:
|
||||
self.logger.debug(f"Doc: {len(doc)} tokens, is_tagged: {doc.is_tagged}")
|
||||
is_tagged = doc.has_annotation("TAG")
|
||||
self.logger.debug(f"Doc: {len(doc)} tokens, is tagged: {is_tagged}")
|
||||
return doc
|
||||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
|
|
|
@ -838,7 +838,7 @@ nlp = spacy.load("en_core_web_sm")
|
|||
matcher = Matcher(nlp.vocab)
|
||||
|
||||
# Add pattern for valid hashtag, i.e. '#' plus any ASCII token
|
||||
matcher.add("HASHTAG", None, [{"ORTH": "#"}, {"IS_ASCII": True}])
|
||||
matcher.add("HASHTAG", [[{"ORTH": "#"}, {"IS_ASCII": True}]])
|
||||
|
||||
# Register token extension
|
||||
Token.set_extension("is_hashtag", default=False)
|
||||
|
|
|
@ -285,6 +285,7 @@ add to your pipeline and customize for your use case:
|
|||
| [`Lemmatizer`](/api/lemmatizer) | Standalone component for rule-based and lookup lemmatization. |
|
||||
| [`AttributeRuler`](/api/attributeruler) | Component for setting token attributes using match patterns. |
|
||||
| [`Transformer`](/api/transformer) | Component for using [transformer models](/usage/embeddings-transformers) in your pipeline, accessing outputs and aligning tokens. Provided via [`spacy-transformers`](https://github.com/explosion/spacy-transformers). |
|
||||
| [`TrainablePipe`](/api/pipe) | Base class for trainable pipeline components. |
|
||||
|
||||
<Infobox title="Details & Documentation" emoji="📖" list>
|
||||
|
||||
|
@ -396,8 +397,8 @@ type-check model definitions.
|
|||
For data validation, spaCy v3.0 adopts
|
||||
[`pydantic`](https://github.com/samuelcolvin/pydantic). It also powers the data
|
||||
validation of Thinc's [config system](https://thinc.ai/docs/usage-config), which
|
||||
lets you register **custom functions with typed arguments**, reference them
|
||||
in your config and see validation errors if the argument values don't match.
|
||||
lets you register **custom functions with typed arguments**, reference them in
|
||||
your config and see validation errors if the argument values don't match.
|
||||
|
||||
<Infobox title="Details & Documentation" emoji="📖" list>
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user