Add TextCategorizer API docs stub

2025-10-19 10:14:24 +03:00 · 2017-07-22 17:56:33 +02:00 · 2017-07-22 17:56:33 +02:00 · f085b88f9d
commit f085b88f9d
parent ab1a4e8b3c
2 changed files with 28 additions and 0 deletions
--- a/website/docs/api/_data.json
+++ b/website/docs/api/_data.json
@ -21,6 +21,7 @@
            "Tagger": "tagger",
            "DependencyParser": "dependencyparser",
            "EntityRecognizer": "entityrecognizer",
            "TextCategorizer": "textcategorizer",
            "Matcher": "matcher",
            "Lexeme": "lexeme",
            "Vocab": "vocab",
@ -130,6 +131,12 @@
        "source": "spacy/pipeline.pyx"
    },
    "textcategorizer": {
        "title": "TextCategorizer",
        "tag": "class",
        "source": "spacy/pipeline.pyx"
    },
    "dependencyparser": {
        "title": "DependencyParser",
        "tag": "class",
--- a/website/docs/api/textcategorizer.jade
+++ b/website/docs/api/textcategorizer.jade
@ -0,0 +1,21 @@
 //- 💫 DOCS > API > TEXTCATEGORIZER
 include ../../_includes/_mixins
 p
    |  Add text categorization models to spaCy pipelines. The model supports
    |  classification with multiple, non-mutually exclusive labels.
 p
    |  You can change the model architecture rather easily, but by default, the
    |  #[code TextCategorizer] class uses a convolutional neural network to
    |  assign position-sensitive vectors to each word in the document. This step
    |  is similar to the #[+api("tensorizer") #[code Tensorizer]] component, but the
    |  #[code TextCategorizer] uses its own CNN model, to avoid sharing weights
    |  with the other pipeline components. The document tensor is then
    |  summarized by concatenating max and mean pooling, and a multilayer
    |  perceptron is used to predict an output vector of length #[code nr_class],
    |  before a logistic activation is applied elementwise. The value of each
    |  output neuron is the probability that some class is present.
 +under-construction