mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-28 02:46:35 +03:00
commit
a9465271a7
|
@ -60,7 +60,7 @@ GLOSSARY = {
|
|||
'JJR': 'adjective, comparative',
|
||||
'JJS': 'adjective, superlative',
|
||||
'LS': 'list item marker',
|
||||
'MD': 'verb, modal auxillary',
|
||||
'MD': 'verb, modal auxiliary',
|
||||
'NIL': 'missing tag',
|
||||
'NN': 'noun, singular or mass',
|
||||
'NNP': 'noun, proper singular',
|
||||
|
@ -91,7 +91,7 @@ GLOSSARY = {
|
|||
'NFP': 'superfluous punctuation',
|
||||
'GW': 'additional word in multi-word expression',
|
||||
'XX': 'unknown',
|
||||
'BES': 'auxillary "be"',
|
||||
'BES': 'auxiliary "be"',
|
||||
'HVS': 'forms of "have"',
|
||||
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ p
|
|||
+pos-row("$", "SYM", "SymType=currency", "symbol, currency")
|
||||
+pos-row("ADD", "X", "", "email")
|
||||
+pos-row("AFX", "ADJ", "Hyph=yes", "affix")
|
||||
+pos-row("BES", "VERB", "", 'auxillary "be"')
|
||||
+pos-row("BES", "VERB", "", 'auxiliary "be"')
|
||||
+pos-row("CC", "CONJ", "ConjType=coor", "conjunction, coordinating")
|
||||
+pos-row("CD", "NUM", "NumType=card", "cardinal number")
|
||||
+pos-row("DT", "DET", "determiner")
|
||||
|
@ -35,7 +35,7 @@ p
|
|||
+pos-row("JJR", "ADJ", "Degree=comp", "adjective, comparative")
|
||||
+pos-row("JJS", "ADJ", "Degree=sup", "adjective, superlative")
|
||||
+pos-row("LS", "PUNCT", "NumType=ord", "list item marker")
|
||||
+pos-row("MD", "VERB", "VerbType=mod", "verb, modal auxillary")
|
||||
+pos-row("MD", "VERB", "VerbType=mod", "verb, modal auxiliary")
|
||||
+pos-row("NFP", "PUNCT", "", "superfluous punctuation")
|
||||
+pos-row("NIL", "", "", "missing tag")
|
||||
+pos-row("NN", "NOUN", "Number=sing", "noun, singular or mass")
|
||||
|
|
|
@ -18,7 +18,7 @@ p
|
|||
| consisting of the words to be processed.
|
||||
|
||||
p
|
||||
| Each state consists of the words on the stack (if any), which consistute
|
||||
| Each state consists of the words on the stack (if any), which constitute
|
||||
| the current entity being constructed. We also have the current word, and
|
||||
| the two subsequent words. Finally, we also have the entities previously
|
||||
| built.
|
||||
|
|
|
@ -222,7 +222,7 @@ p The sentence span that this span is a part of.
|
|||
|
||||
p
|
||||
| The token within the span that's highest in the parse tree. If there's a
|
||||
| tie, the earlist is prefered.
|
||||
| tie, the earliest is preferred.
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
|
|
|
@ -28,7 +28,7 @@ p
|
|||
| #[a(href="#word-vectors") word vectors].
|
||||
|
||||
+item
|
||||
| #[strong Set up] a #[a(href="#model-directory") model direcory] and #[strong train] the #[a(href="#train-tagger-parser") tagger and parser].
|
||||
| #[strong Set up] a #[a(href="#model-directory") model directory] and #[strong train] the #[a(href="#train-tagger-parser") tagger and parser].
|
||||
|
||||
p
|
||||
| For some languages, you may also want to develop a solution for
|
||||
|
@ -303,7 +303,7 @@ p
|
|||
p
|
||||
| Because languages can vary in quite arbitrary ways, spaCy avoids
|
||||
| organising the language data into an explicit inheritance hierarchy.
|
||||
| Instead, reuseable functions and data are collected as atomic pieces in
|
||||
| Instead, reusable functions and data are collected as atomic pieces in
|
||||
| the #[code spacy.language_data] package.
|
||||
|
||||
+aside-code("Example").
|
||||
|
|
|
@ -21,7 +21,7 @@ p
|
|||
+h(2, "special-cases") Adding special case tokenization rules
|
||||
|
||||
p
|
||||
| Most domains have at least some idiosyncracies that require custom
|
||||
| Most domains have at least some idiosyncrasies that require custom
|
||||
| tokenization rules. Here's how to add a special case rule to an existing
|
||||
| #[+api("tokenizer") #[code Tokenizer]] instance:
|
||||
|
||||
|
|
|
@ -87,7 +87,7 @@ p
|
|||
| The other way to install spaCy is to clone its
|
||||
| #[+a(gh("spaCy")) GitHub repository] and build it from source. That is
|
||||
| the common way if you want to make changes to the code base. You'll need to
|
||||
| make sure that you have a development enviroment consisting of a Python
|
||||
| make sure that you have a development environment consisting of a Python
|
||||
| distribution including header files, a compiler,
|
||||
| #[+a("https://pip.pypa.io/en/latest/installing/") pip],
|
||||
| #[+a("https://virtualenv.pypa.io/") virtualenv] and
|
||||
|
|
|
@ -205,7 +205,7 @@ p
|
|||
| is mostly intended as a convenient, interactive wrapper. It performs
|
||||
| compatibility checks and prints detailed error messages and warnings.
|
||||
| However, if you're downloading models as part of an automated build
|
||||
| process, this only adds an unecessary layer of complexity. If you know
|
||||
| process, this only adds an unnecessary layer of complexity. If you know
|
||||
| which models your application needs, you should be specifying them directly.
|
||||
|
||||
+aside("Prevent re-downloading models")
|
||||
|
|
|
@ -50,7 +50,7 @@ p
|
|||
+cell #[code VerbForm=Fin], #[code Mood=Ind], #[code Tense=Pres]
|
||||
|
||||
+row
|
||||
+cell I read the paper yesteday
|
||||
+cell I read the paper yesterday
|
||||
+cell read
|
||||
+cell read
|
||||
+cell verb
|
||||
|
|
|
@ -58,7 +58,7 @@ p This command will create a model package directory that should look like this:
|
|||
|
||||
p
|
||||
| You can also find templates for all files in our
|
||||
| #[+a(gh("spacy-dev-resouces", "templates/model")) spaCy dev resources].
|
||||
| #[+a(gh("spacy-dev-resources", "templates/model")) spaCy dev resources].
|
||||
| If you're creating the package manually, keep in mind that the directories
|
||||
| need to be named according to the naming conventions of
|
||||
| #[code [language]_[name]] and #[code [language]_[name]-[version]]. The
|
||||
|
|
Loading…
Reference in New Issue
Block a user