mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Port over docs changes [ci skip]
This commit is contained in:
parent
74cc6bb06a
commit
53680642af
|
@ -208,7 +208,7 @@ p
|
|||
+row
|
||||
+cell #[code word_spacing]
|
||||
+cell int
|
||||
+cell Horizontal spacing between words and arcs in px.
|
||||
+cell Vertical spacing between words and arcs in px.
|
||||
+cell #[code 45]
|
||||
|
||||
+row
|
||||
|
|
|
@ -674,7 +674,7 @@ p
|
|||
| token vectors.
|
||||
|
||||
+aside-code("Example").
|
||||
apples = nlp(u'I like apples')
|
||||
doc = nlp(u'I like apples')
|
||||
assert doc.vector.dtype == 'float32'
|
||||
assert doc.vector.shape == (300,)
|
||||
|
||||
|
|
|
@ -12,11 +12,24 @@ p Create a #[code GoldCorpus].
|
|||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code train_path]
|
||||
+cell unicode or #[code Path]
|
||||
+cell File or directory of training data.
|
||||
+cell #[code train]
|
||||
+cell unicode or #[code Path] or iterable
|
||||
+cell
|
||||
| Training data, as a path (file or directory) or iterable. If an
|
||||
| iterable, each item should be a #[code (text, paragraphs)]
|
||||
| tuple, where each paragraph is a tuple
|
||||
| #[code.u-break (sentences, brackets)],and each sentence is a
|
||||
| tuple #[code.u-break (ids, words, tags, heads, ner)]. See the
|
||||
| implementation of
|
||||
| #[+src(gh("spacy", "spacy/gold.pyx")) #[code gold.read_json_file]]
|
||||
| for further details.
|
||||
|
||||
+row
|
||||
+cell #[code dev_path]
|
||||
+cell unicode or #[code Path]
|
||||
+cell File or directory of development data.
|
||||
+cell #[code dev]
|
||||
+cell unicode or #[code Path] or iterable
|
||||
+cell Development data, as a path (file or directory) or iterable.
|
||||
|
||||
+row("foot")
|
||||
+cell returns
|
||||
+cell #[code GoldCorpus]
|
||||
+cell The newly constructed object.
|
||||
|
|
|
@ -325,6 +325,12 @@ p The L2 norm of the lexeme's vector representation.
|
|||
+cell bool
|
||||
+cell Is the lexeme a quotation mark?
|
||||
|
||||
+row
|
||||
+cell #[code is_currency]
|
||||
+tag-new("2.0.8")
|
||||
+cell bool
|
||||
+cell Is the lexeme a currency symbol?
|
||||
|
||||
+row
|
||||
+cell #[code like_url]
|
||||
+cell bool
|
||||
|
|
|
@ -111,6 +111,25 @@ p Match a stream of documents, yielding them in turn.
|
|||
| parallel, if the #[code Matcher] implementation supports
|
||||
| multi-threading.
|
||||
|
||||
+row
|
||||
+cell #[code return_matches]
|
||||
+tag-new(2.1)
|
||||
+cell bool
|
||||
+cell
|
||||
| Yield the match lists along with the docs, making results
|
||||
| #[code (doc, matches)] tuples.
|
||||
|
||||
+row
|
||||
+cell #[code as_tuples]
|
||||
+tag-new(2.1)
|
||||
+cell bool
|
||||
+cell
|
||||
| Interpret the input stream as #[code (doc, context)] tuples, and
|
||||
| yield #[code (result, context)] tuples out. If both
|
||||
| #[code return_matches] and #[code as_tuples] are #[code True],
|
||||
| the output will be a sequence of
|
||||
| #[code ((doc, matches), context)] tuples.
|
||||
|
||||
+row("foot")
|
||||
+cell yields
|
||||
+cell #[code Doc]
|
||||
|
|
|
@ -209,7 +209,7 @@ p
|
|||
|
||||
+row
|
||||
+cell #[code drop]
|
||||
+cell int
|
||||
+cell float
|
||||
+cell The dropout rate.
|
||||
|
||||
+row
|
||||
|
|
|
@ -740,6 +740,12 @@ p The L2 norm of the token's vector representation.
|
|||
+cell bool
|
||||
+cell Is the token a quotation mark?
|
||||
|
||||
+row
|
||||
+cell #[code is_currency]
|
||||
+tag-new("2.0.8")
|
||||
+cell bool
|
||||
+cell Is the token a currency symbol?
|
||||
|
||||
+row
|
||||
+cell #[code like_url]
|
||||
+cell bool
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 378 KiB |
|
@ -76,13 +76,15 @@
|
|||
},
|
||||
|
||||
"MODEL_LICENSES": {
|
||||
"CC BY-SA": "https://creativecommons.org/licenses/by-sa/3.0/",
|
||||
"CC BY-SA 3.0": "https://creativecommons.org/licenses/by-sa/3.0/",
|
||||
"CC BY-SA 4.0": "https://creativecommons.org/licenses/by-sa/4.0/",
|
||||
"CC BY-NC": "https://creativecommons.org/licenses/by-nc/3.0/",
|
||||
"CC BY-NC 3.0": "https://creativecommons.org/licenses/by-nc/3.0/",
|
||||
"GPL": "https://www.gnu.org/licenses/gpl.html",
|
||||
"LGPL": "https://www.gnu.org/licenses/lgpl.html"
|
||||
"CC BY 4.0": "https://creativecommons.org/licenses/by/4.0/",
|
||||
"CC BY-SA": "https://creativecommons.org/licenses/by-sa/3.0/",
|
||||
"CC BY-SA 3.0": "https://creativecommons.org/licenses/by-sa/3.0/",
|
||||
"CC BY-SA 4.0": "https://creativecommons.org/licenses/by-sa/4.0/",
|
||||
"CC BY-NC": "https://creativecommons.org/licenses/by-nc/3.0/",
|
||||
"CC BY-NC 3.0": "https://creativecommons.org/licenses/by-nc/3.0/",
|
||||
"CC-BY-NC-SA 3.0": "https://creativecommons.org/licenses/by-nc-sa/3.0/",
|
||||
"GPL": "https://www.gnu.org/licenses/gpl.html",
|
||||
"LGPL": "https://www.gnu.org/licenses/lgpl.html"
|
||||
},
|
||||
|
||||
"MODEL_BENCHMARKS": {
|
||||
|
|
|
@ -40,7 +40,7 @@ p
|
|||
+item
|
||||
| Make the #[strong model data] available to the #[code Language] class
|
||||
| by calling #[+api("language#from_disk") #[code from_disk]] with the
|
||||
| path to the model data ditectory.
|
||||
| path to the model data directory.
|
||||
|
||||
p
|
||||
| So when you call this...
|
||||
|
@ -53,7 +53,7 @@ p
|
|||
| pipeline #[code.u-break ["tagger", "parser", "ner"]]. spaCy will then
|
||||
| initialise #[code spacy.lang.en.English], and create each pipeline
|
||||
| component and add it to the processing pipeline. It'll then load in the
|
||||
| model's data from its data ditectory and return the modified
|
||||
| model's data from its data directory and return the modified
|
||||
| #[code Language] class for you to use as the #[code nlp] object.
|
||||
|
||||
p
|
||||
|
|
|
@ -37,7 +37,7 @@ p
|
|||
+cell.u-text-label.u-color-theme=label
|
||||
for cell in cells
|
||||
+cell.u-text-center
|
||||
- var result = cell > 0.5 ? ["yes", "similar"] : cell != 1 ? ["no", "dissimilar"] : ["neutral", "identical"]
|
||||
- var result = cell < 0.5 ? ["no", "dissimilar"] : cell != 1 ? ["yes", "similar"] : ["neutral", "identical"]
|
||||
| #[code=cell.toFixed(2)] #[+procon(...result)]
|
||||
|
||||
p
|
||||
|
|
|
@ -163,7 +163,7 @@ p
|
|||
nlp = English().from_disk('/path/to/nlp')
|
||||
|
||||
p
|
||||
| spay's serialization API has been made consistent across classes and
|
||||
| spaCy's serialization API has been made consistent across classes and
|
||||
| objects. All container classes, i.e. #[code Language], #[code Doc],
|
||||
| #[code Vocab] and #[code StringStore] now have a #[code to_bytes()],
|
||||
| #[code from_bytes()], #[code to_disk()] and #[code from_disk()] method
|
||||
|
|
|
@ -68,7 +68,7 @@ p
|
|||
+item #[strong spaCy is not research software].
|
||||
| It's built on the latest research, but it's designed to get
|
||||
| things done. This leads to fairly different design decisions than
|
||||
| #[+a("https://github./nltk/nltk") NLTK]
|
||||
| #[+a("https://github.com/nltk/nltk") NLTK]
|
||||
| or #[+a("https://stanfordnlp.github.io/CoreNLP/") CoreNLP], which were
|
||||
| created as platforms for teaching and research. The main difference
|
||||
| is that spaCy is integrated and opinionated. spaCy tries to avoid asking
|
||||
|
|
Loading…
Reference in New Issue
Block a user