From 040553ca5920c366259b4ffc6b31547ecf7a254c Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 5 Jun 2017 13:33:01 +0200 Subject: [PATCH] Update architecture and features table --- .../docs/usage/_spacy-101/_architecture.jade | 45 ++++++++++++++++++- website/docs/usage/spacy-101.jade | 41 +++-------------- 2 files changed, 51 insertions(+), 35 deletions(-) diff --git a/website/docs/usage/_spacy-101/_architecture.jade b/website/docs/usage/_spacy-101/_architecture.jade index 4905171e7..c5a85f0b0 100644 --- a/website/docs/usage/_spacy-101/_architecture.jade +++ b/website/docs/usage/_spacy-101/_architecture.jade @@ -70,14 +70,57 @@ p +cell Map strings to and from hash values. +row - +row +cell #[+api("tokenizer") #[code Tokenizer]] +cell | Segment text, and create #[code Doc] objects with the discovered | segment boundaries. + +row + +cell #[code Lemmatizer] + +cell + | Determine the base forms of words. + +row +cell #[+api("matcher") #[code Matcher]] +cell | Match sequences of tokens, based on pattern rules, similar to | regular expressions. + + ++h(3, "architecture-pipeline") Pipeline components + ++table(["Name", "Description"]) + +row + +cell #[+api("tagger") #[code Tagger]] + +cell Annotate part-of-speech tags on #[code Doc] objects. + + +row + +cell #[+api("dependencyparser") #[code DependencyParser]] + +cell Annotate syntactic dependencies on #[code Doc] objects. + + +row + +cell #[+api("entityrecognizer") #[code EntityRecognizer]] + +cell + | Annotate named entities, e.g. persons or products, on #[code Doc] + | objects. + ++h(3, "architecture-other") Other classes + ++table(["Name", "Description"]) + +row + +cell #[+api("vectors") #[code Vectors]] + +cell Container class for vector data keyed by string. + + +row + +cell #[+api("binder") #[code Binder]] + +cell Container class for serializing collections of #[code Doc] objects. + + +row + +cell #[+api("goldparse") #[code GoldParse]] + +cell Collection for training annotations. + + +row + +cell #[+api("goldcorpus") #[code GoldCorpus]] + +cell + | An annotated corpus, using the JSON file format. Manages + | annotations for tagging, dependency parsing and NER. diff --git a/website/docs/usage/spacy-101.jade b/website/docs/usage/spacy-101.jade index 5b7908651..4c7a8b09d 100644 --- a/website/docs/usage/spacy-101.jade +++ b/website/docs/usage/spacy-101.jade @@ -110,6 +110,13 @@ p | between individual tokens, like subject or object. +cell #[+procon("pro")] + +row + +cell #[strong Lemmatization] + +cell + | Assigning the base forms of words. For example, the lemma of + | "was" is "be", and the lemma of "rats" is "rat". + +cell #[+procon("pro")] + +row +cell #[strong Sentence Boundary Detection] (SBD) +cell Finding and segmenting individual sentences. @@ -274,40 +281,6 @@ include _spacy-101/_language-data include _spacy-101/_architecture.jade -+h(3, "architecture-pipeline") Pipeline components - -+table(["Name", "Description"]) - +row - +cell #[+api("tagger") #[code Tagger]] - +cell Annotate part-of-speech tags on #[code Doc] objects. - - +row - +cell #[+api("dependencyparser") #[code DependencyParser]] - +cell Annotate syntactic dependencies on #[code Doc] objects. - - +row - +cell #[+api("entityrecognizer") #[code EntityRecognizer]] - +cell - | Annotate named entities, e.g. persons or products, on #[code Doc] - | objects. - -+h(3, "architecture-other") Other classes - -+table(["Name", "Description"]) - +row - +cell #[+api("binder") #[code Binder]] - +cell Container class for serializing collections of #[code Doc] objects. - - +row - +cell #[+api("goldparse") #[code GoldParse]] - +cell Collection for training annotations. - - +row - +cell #[+api("goldcorpus") #[code GoldCorpus]] - +cell - | An annotated corpus, using the JSON file format. Manages - | annotations for tagging, dependency parsing and NER. - +h(2, "community") Community & FAQ p