Update architecture and features table

2025-08-09 06:34:54 +03:00 · 2017-06-05 13:33:01 +02:00 · 2017-06-05 13:33:01 +02:00 · 040553ca59
commit 040553ca59
parent e204788c30
2 changed files with 51 additions and 35 deletions
--- a/website/docs/usage/_spacy-101/_architecture.jade
+++ b/website/docs/usage/_spacy-101/_architecture.jade
@ -70,14 +70,57 @@ p
        +cell Map strings to and from hash values.

    +row
-        +row
        +cell #[+api("tokenizer") #[code Tokenizer]]
        +cell
            |  Segment text, and create #[code Doc] objects with the discovered
            |  segment boundaries.

+    +row
+        +cell #[code Lemmatizer]
+        +cell
+            |  Determine the base forms of words.
+
    +row
        +cell #[+api("matcher") #[code Matcher]]
        +cell
            |  Match sequences of tokens, based on pattern rules, similar to
            |  regular expressions.
+
+
+h(3, "architecture-pipeline") Pipeline components
+
+table(["Name", "Description"])
+    +row
+        +cell #[+api("tagger") #[code Tagger]]
+        +cell Annotate part-of-speech tags on #[code Doc] objects.
+
+    +row
+        +cell #[+api("dependencyparser") #[code DependencyParser]]
+        +cell Annotate syntactic dependencies on #[code Doc] objects.
+
+    +row
+        +cell #[+api("entityrecognizer") #[code EntityRecognizer]]
+        +cell
+            |  Annotate named entities, e.g. persons or products, on #[code Doc]
+            |  objects.
+
+h(3, "architecture-other") Other classes
+
+table(["Name", "Description"])
+    +row
+        +cell #[+api("vectors") #[code Vectors]]
+        +cell Container class for vector data keyed by string.
+
+    +row
+        +cell #[+api("binder") #[code Binder]]
+        +cell Container class for serializing collections of #[code Doc] objects.
+
+    +row
+        +cell #[+api("goldparse") #[code GoldParse]]
+        +cell Collection for training annotations.
+
+    +row
+        +cell #[+api("goldcorpus") #[code GoldCorpus]]
+        +cell
+            |  An annotated corpus, using the JSON file format. Manages
+            |  annotations for tagging, dependency parsing and NER.
--- a/website/docs/usage/spacy-101.jade
+++ b/website/docs/usage/spacy-101.jade
@ -110,6 +110,13 @@ p
            |  between individual tokens, like subject or object.
        +cell #[+procon("pro")]

+    +row
+        +cell #[strong Lemmatization]
+        +cell
+            |  Assigning the base forms of words. For example, the lemma of
+            |  "was" is "be", and the lemma of "rats" is "rat".
+        +cell #[+procon("pro")]
+
    +row
        +cell #[strong Sentence Boundary Detection] (SBD)
        +cell Finding and segmenting individual sentences.
@ -274,40 +281,6 @@ include _spacy-101/_language-data

 include _spacy-101/_architecture.jade

-+h(3, "architecture-pipeline") Pipeline components
-
-+table(["Name", "Description"])
-    +row
-        +cell #[+api("tagger") #[code Tagger]]
-        +cell Annotate part-of-speech tags on #[code Doc] objects.
-
-    +row
-        +cell #[+api("dependencyparser") #[code DependencyParser]]
-        +cell Annotate syntactic dependencies on #[code Doc] objects.
-
-    +row
-        +cell #[+api("entityrecognizer") #[code EntityRecognizer]]
-        +cell
-            |  Annotate named entities, e.g. persons or products, on #[code Doc]
-            |  objects.
-
-+h(3, "architecture-other") Other classes
-
-+table(["Name", "Description"])
-    +row
-        +cell #[+api("binder") #[code Binder]]
-        +cell Container class for serializing collections of #[code Doc] objects.
-
-    +row
-        +cell #[+api("goldparse") #[code GoldParse]]
-        +cell Collection for training annotations.
-
-    +row
-        +cell #[+api("goldcorpus") #[code GoldCorpus]]
-        +cell
-            |  An annotated corpus, using the JSON file format. Manages
-            |  annotations for tagging, dependency parsing and NER.
-
 +h(2, "community") Community & FAQ

 p