mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Update architecture and features table
This commit is contained in:
parent
e204788c30
commit
040553ca59
|
@ -70,14 +70,57 @@ p
|
|||
+cell Map strings to and from hash values.
|
||||
|
||||
+row
|
||||
+row
|
||||
+cell #[+api("tokenizer") #[code Tokenizer]]
|
||||
+cell
|
||||
| Segment text, and create #[code Doc] objects with the discovered
|
||||
| segment boundaries.
|
||||
|
||||
+row
|
||||
+cell #[code Lemmatizer]
|
||||
+cell
|
||||
| Determine the base forms of words.
|
||||
|
||||
+row
|
||||
+cell #[+api("matcher") #[code Matcher]]
|
||||
+cell
|
||||
| Match sequences of tokens, based on pattern rules, similar to
|
||||
| regular expressions.
|
||||
|
||||
|
||||
+h(3, "architecture-pipeline") Pipeline components
|
||||
|
||||
+table(["Name", "Description"])
|
||||
+row
|
||||
+cell #[+api("tagger") #[code Tagger]]
|
||||
+cell Annotate part-of-speech tags on #[code Doc] objects.
|
||||
|
||||
+row
|
||||
+cell #[+api("dependencyparser") #[code DependencyParser]]
|
||||
+cell Annotate syntactic dependencies on #[code Doc] objects.
|
||||
|
||||
+row
|
||||
+cell #[+api("entityrecognizer") #[code EntityRecognizer]]
|
||||
+cell
|
||||
| Annotate named entities, e.g. persons or products, on #[code Doc]
|
||||
| objects.
|
||||
|
||||
+h(3, "architecture-other") Other classes
|
||||
|
||||
+table(["Name", "Description"])
|
||||
+row
|
||||
+cell #[+api("vectors") #[code Vectors]]
|
||||
+cell Container class for vector data keyed by string.
|
||||
|
||||
+row
|
||||
+cell #[+api("binder") #[code Binder]]
|
||||
+cell Container class for serializing collections of #[code Doc] objects.
|
||||
|
||||
+row
|
||||
+cell #[+api("goldparse") #[code GoldParse]]
|
||||
+cell Collection for training annotations.
|
||||
|
||||
+row
|
||||
+cell #[+api("goldcorpus") #[code GoldCorpus]]
|
||||
+cell
|
||||
| An annotated corpus, using the JSON file format. Manages
|
||||
| annotations for tagging, dependency parsing and NER.
|
||||
|
|
|
@ -110,6 +110,13 @@ p
|
|||
| between individual tokens, like subject or object.
|
||||
+cell #[+procon("pro")]
|
||||
|
||||
+row
|
||||
+cell #[strong Lemmatization]
|
||||
+cell
|
||||
| Assigning the base forms of words. For example, the lemma of
|
||||
| "was" is "be", and the lemma of "rats" is "rat".
|
||||
+cell #[+procon("pro")]
|
||||
|
||||
+row
|
||||
+cell #[strong Sentence Boundary Detection] (SBD)
|
||||
+cell Finding and segmenting individual sentences.
|
||||
|
@ -274,40 +281,6 @@ include _spacy-101/_language-data
|
|||
|
||||
include _spacy-101/_architecture.jade
|
||||
|
||||
+h(3, "architecture-pipeline") Pipeline components
|
||||
|
||||
+table(["Name", "Description"])
|
||||
+row
|
||||
+cell #[+api("tagger") #[code Tagger]]
|
||||
+cell Annotate part-of-speech tags on #[code Doc] objects.
|
||||
|
||||
+row
|
||||
+cell #[+api("dependencyparser") #[code DependencyParser]]
|
||||
+cell Annotate syntactic dependencies on #[code Doc] objects.
|
||||
|
||||
+row
|
||||
+cell #[+api("entityrecognizer") #[code EntityRecognizer]]
|
||||
+cell
|
||||
| Annotate named entities, e.g. persons or products, on #[code Doc]
|
||||
| objects.
|
||||
|
||||
+h(3, "architecture-other") Other classes
|
||||
|
||||
+table(["Name", "Description"])
|
||||
+row
|
||||
+cell #[+api("binder") #[code Binder]]
|
||||
+cell Container class for serializing collections of #[code Doc] objects.
|
||||
|
||||
+row
|
||||
+cell #[+api("goldparse") #[code GoldParse]]
|
||||
+cell Collection for training annotations.
|
||||
|
||||
+row
|
||||
+cell #[+api("goldcorpus") #[code GoldCorpus]]
|
||||
+cell
|
||||
| An annotated corpus, using the JSON file format. Manages
|
||||
| annotations for tagging, dependency parsing and NER.
|
||||
|
||||
+h(2, "community") Community & FAQ
|
||||
|
||||
p
|
||||
|
|
Loading…
Reference in New Issue
Block a user