spaCy/website/usage/linguistic-features.jade

//- 💫 DOCS > USAGE > LINGUISTIC FEATURES

include ../_includes/_mixins

p
    |  Processing raw text intelligently is difficult: most words are rare, and
    |  it's common for words that look completely different to mean almost the
    |  same thing. The same words in a different order can mean something
    |  completely different. Even splitting text into useful word-like units can
    |  be difficult in many languages. While it's possible to solve some
    |  problems starting from only the raw characters, it's usually better to
    |  use linguistic knowledge to add useful information. That's exactly what
    |  spaCy is designed to do: you put in raw text, and get back a
    |  #[+api("doc") #[code Doc]] object, that comes with a variety of
    |  annotations.

+section("pos-tagging")
    +h(2, "pos-tagging") Part-of-speech tagging
        +tag-model("tagger", "dependency parse")
    include _linguistic-features/_pos-tagging

+section("dependency-parse")
    +h(2, "dependency-parse") Dependency parsing
        +tag-model("dependency parse")
    include _linguistic-features/_dependency-parse

+section("named-entities")
    +h(2, "named-entities") Named Entities
        +tag-model("named entities")
    include _linguistic-features/_named-entities

+section("tokenization")
    +h(2, "tokenization") Tokenization
    include _linguistic-features/_tokenization

+section("sbd")
    +h(2, "sbd") Sentence Segmentation
    include _linguistic-features/_sentence-segmentation

+section("rule-based-matching")
    +h(2, "rule-based-matching") Rule-based matching
    include _linguistic-features/_rule-based-matching