mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
a93276bb78
|
@ -41,7 +41,7 @@ mixin src(url)
|
||||||
path - [string] path to API docs page relative to /docs/api/
|
path - [string] path to API docs page relative to /docs/api/
|
||||||
|
|
||||||
mixin api(path)
|
mixin api(path)
|
||||||
+a("/docs/api/" + path, true)(target="_self").u-no-border.u-inline-block
|
+a("/docs/api/" + path, true)(target="_self").u-no-border.u-inline-block.u-nowrap
|
||||||
block
|
block
|
||||||
|
|
||||||
| #[+icon("book", 18).o-icon--inline.u-color-theme]
|
| #[+icon("book", 18).o-icon--inline.u-color-theme]
|
||||||
|
|
|
@ -26,7 +26,7 @@ $font-code: 'Source Code Pro', Consolas, 'Andale Mono', Menlo, Monaco, Courier,
|
||||||
|
|
||||||
// Colors
|
// Colors
|
||||||
|
|
||||||
$colors: ( blue: #09a3d5, red: #d9515d )
|
$colors: ( blue: #09a3d5, red: #d9515d, green: #08c35e )
|
||||||
|
|
||||||
$color-back: #fff !default
|
$color-back: #fff !default
|
||||||
$color-front: #1a1e23 !default
|
$color-front: #1a1e23 !default
|
||||||
|
|
4
website/assets/css/style_green.sass
Normal file
4
website/assets/css/style_green.sass
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
//- 💫 STYLESHEET (GREEN)
|
||||||
|
|
||||||
|
$theme: green
|
||||||
|
@import style
|
BIN
website/assets/img/pattern_green.jpg
Normal file
BIN
website/assets/img/pattern_green.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 216 KiB |
|
@ -2,8 +2,13 @@
|
||||||
"sidebar": {
|
"sidebar": {
|
||||||
"Introduction": {
|
"Introduction": {
|
||||||
"Facts & Figures": "./",
|
"Facts & Figures": "./",
|
||||||
"Languages": "language-models",
|
"Languages": "language-models"
|
||||||
"Philosophy": "philosophy"
|
},
|
||||||
|
"Top-level": {
|
||||||
|
"spacy": "spacy",
|
||||||
|
"displacy": "displacy",
|
||||||
|
"Utility Functions": "util",
|
||||||
|
"Command line": "cli"
|
||||||
},
|
},
|
||||||
"Classes": {
|
"Classes": {
|
||||||
"Doc": "doc",
|
"Doc": "doc",
|
||||||
|
@ -21,9 +26,6 @@
|
||||||
"GoldParse": "goldparse"
|
"GoldParse": "goldparse"
|
||||||
},
|
},
|
||||||
"Other": {
|
"Other": {
|
||||||
"Command line": "cli",
|
|
||||||
"displaCy": "displacy",
|
|
||||||
"Utility Functions": "util",
|
|
||||||
"Annotation Specs": "annotation",
|
"Annotation Specs": "annotation",
|
||||||
"Feature Scheme": "features"
|
"Feature Scheme": "features"
|
||||||
}
|
}
|
||||||
|
@ -43,6 +45,26 @@
|
||||||
"title": "Philosophy"
|
"title": "Philosophy"
|
||||||
},
|
},
|
||||||
|
|
||||||
|
"spacy": {
|
||||||
|
"title": "spaCy top-level functions",
|
||||||
|
"next": "displacy"
|
||||||
|
},
|
||||||
|
|
||||||
|
"displacy": {
|
||||||
|
"title": "displaCy",
|
||||||
|
"tag": "module",
|
||||||
|
"next": "util"
|
||||||
|
},
|
||||||
|
|
||||||
|
"util": {
|
||||||
|
"title": "Utility Functions",
|
||||||
|
"next": "cli"
|
||||||
|
},
|
||||||
|
|
||||||
|
"cli": {
|
||||||
|
"title": "Command Line Interface"
|
||||||
|
},
|
||||||
|
|
||||||
"language": {
|
"language": {
|
||||||
"title": "Language",
|
"title": "Language",
|
||||||
"tag": "class"
|
"tag": "class"
|
||||||
|
@ -113,20 +135,6 @@
|
||||||
"tag": "class"
|
"tag": "class"
|
||||||
},
|
},
|
||||||
|
|
||||||
"cli": {
|
|
||||||
"title": "Command Line Interface",
|
|
||||||
"next": "displacy"
|
|
||||||
},
|
|
||||||
|
|
||||||
"displacy": {
|
|
||||||
"title": "displaCy",
|
|
||||||
"tag": "module"
|
|
||||||
},
|
|
||||||
|
|
||||||
"util": {
|
|
||||||
"title": "Utility Functions"
|
|
||||||
},
|
|
||||||
|
|
||||||
"annotation": {
|
"annotation": {
|
||||||
"title": "Annotation Specifications"
|
"title": "Annotation Specifications"
|
||||||
},
|
},
|
||||||
|
|
|
@ -92,7 +92,7 @@ p
|
||||||
+row
|
+row
|
||||||
+cell #[code model]
|
+cell #[code model]
|
||||||
+cell positional
|
+cell positional
|
||||||
+cell Shortcut link of model (optional).
|
+cell A model, i.e. shortcut link, package name or path (optional).
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code --markdown], #[code -md]
|
+cell #[code --markdown], #[code -md]
|
||||||
|
|
|
@ -8,7 +8,7 @@ p
|
||||||
| #[+a("/docs/usage/visualizers") visualizing spaCy].
|
| #[+a("/docs/usage/visualizers") visualizing spaCy].
|
||||||
|
|
||||||
|
|
||||||
+h(2, "serve") serve
|
+h(2, "serve") displacy.serve
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
p
|
p
|
||||||
|
@ -60,7 +60,7 @@ p
|
||||||
+cell Port to serve visualization.
|
+cell Port to serve visualization.
|
||||||
+cell #[code 5000]
|
+cell #[code 5000]
|
||||||
|
|
||||||
+h(2, "render") render
|
+h(2, "render") displacy.render
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
p Render a dependency parse tree or named entity visualization.
|
p Render a dependency parse tree or named entity visualization.
|
||||||
|
|
|
@ -63,9 +63,8 @@ p
|
||||||
| is preserved.
|
| is preserved.
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
tokens = nlp('An example sentence. Another example sentence.')
|
doc = nlp(u'An example sentence. Another sentence.')
|
||||||
tokens[0].text, tokens[0].head.tag_
|
assert (doc[0].text, doc[0].head.tag_) == ('An', 'NN')
|
||||||
# ('An', 'NN')
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row
|
+row
|
||||||
|
|
|
@ -129,8 +129,8 @@ p
|
||||||
print('Matched!', matches)
|
print('Matched!', matches)
|
||||||
|
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
matcher.add('HelloWorld', [{LOWER: "hello"}, {LOWER: "world"}], on_match=on_match)
|
matcher.add('HelloWorld', on_match, [{LOWER: "hello"}, {LOWER: "world"}])
|
||||||
matcher.add('GoogleMaps', [{ORTH: "Google"}, {ORTH: "Maps"}], on_match=on_match)
|
matcher.add('GoogleMaps', on_match, [{ORTH: "Google"}, {ORTH: "Maps"}])
|
||||||
|
|
||||||
doc = nlp(u'HELLO WORLD on Google Maps.')
|
doc = nlp(u'HELLO WORLD on Google Maps.')
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
|
@ -141,16 +141,16 @@ p
|
||||||
+cell unicode
|
+cell unicode
|
||||||
+cell An ID for the thing you're matching.
|
+cell An ID for the thing you're matching.
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[code *patterns]
|
|
||||||
+cell list
|
|
||||||
+cell
|
|
||||||
| Match pattern. A pattern consists of a list of dicts, where each
|
|
||||||
| dict describes a token.
|
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code on_match]
|
+cell #[code on_match]
|
||||||
+cell function
|
+cell function
|
||||||
+cell
|
+cell
|
||||||
| Callback function to act on matches. Takes the arguments
|
| Callback function to act on matches. Takes the arguments
|
||||||
| #[code matcher], #[code doc], #[code i] and #[code matches].
|
| #[code matcher], #[code doc], #[code i] and #[code matches].
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code *patterns]
|
||||||
|
+cell list
|
||||||
|
+cell
|
||||||
|
| Match pattern. A pattern consists of a list of dicts, where each
|
||||||
|
| dict describes a token.
|
||||||
|
|
93
website/docs/api/spacy.jade
Normal file
93
website/docs/api/spacy.jade
Normal file
|
@ -0,0 +1,93 @@
|
||||||
|
//- 💫 DOCS > API > SPACY
|
||||||
|
|
||||||
|
include ../../_includes/_mixins
|
||||||
|
|
||||||
|
+h(2, "load") spacy.load
|
||||||
|
+tag function
|
||||||
|
|
||||||
|
p
|
||||||
|
| Load a model via its #[+a("/docs/usage/models#usage") shortcut link],
|
||||||
|
| the name of an installed
|
||||||
|
| #[+a("/docs/usage/saving-loading#generating") model package], a unicode
|
||||||
|
| path or a #[code Path]-like object. spaCy will try resolving the load
|
||||||
|
| argument in this order. The #[code Language] class to initialise will be
|
||||||
|
| determined based on the model's settings.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
nlp = spacy.load('en') # shortcut link
|
||||||
|
nlp = spacy.load('en_core_web_sm') # package
|
||||||
|
nlp = spacy.load('/path/to/en') # unicode path
|
||||||
|
nlp = spacy.load(Path('/path/to/en')) # pathlib Path
|
||||||
|
|
||||||
|
+infobox("⚠️ Deprecation note")
|
||||||
|
| As of spaCy 2.0, the #[code path] keyword argument is deprecated. spaCy
|
||||||
|
| will also raise an error if no model could be loaded and never just
|
||||||
|
| return an empty #[code Language] object. If you need a blank language,
|
||||||
|
| you need to import it explicitly: #[code from spacy.lang.en import English].
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code name]
|
||||||
|
+cell unicode or #[code Path]
|
||||||
|
+cell Model to load, i.e. shortcut link, package name or path.
|
||||||
|
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell #[code Language]
|
||||||
|
+cell A #[code Language] object with the loaded model.
|
||||||
|
|
||||||
|
+h(2, "info") spacy.info
|
||||||
|
+tag function
|
||||||
|
|
||||||
|
p
|
||||||
|
| The same as the #[+api("cli#info") #[code info] command]. Pretty-print
|
||||||
|
| information about your installation, models and local setup from within
|
||||||
|
| spaCy. To get the model meta data as a dictionary instead, you can
|
||||||
|
| use the #[code meta] attribute on your #[code nlp] object with a
|
||||||
|
| loaded model, e.g. #[code nlp['meta']].
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
spacy.info()
|
||||||
|
spacy.info('en')
|
||||||
|
spacy.info('de', markdown=True)
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code model]
|
||||||
|
+cell unicode
|
||||||
|
+cell A model, i.e. shortcut link, package name or path (optional).
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[code markdown]
|
||||||
|
+cell bool
|
||||||
|
+cell Print information as Markdown.
|
||||||
|
|
||||||
|
|
||||||
|
+h(2, "explain") spacy.explain
|
||||||
|
+tag function
|
||||||
|
|
||||||
|
p
|
||||||
|
| Get a description for a given POS tag, dependency label or entity type.
|
||||||
|
| For a list of available terms, see
|
||||||
|
| #[+src(gh("spacy", "spacy/glossary.py")) glossary.py].
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
spacy.explain('NORP')
|
||||||
|
# Nationalities or religious or political groups
|
||||||
|
|
||||||
|
doc = nlp(u'Hello world')
|
||||||
|
for word in doc:
|
||||||
|
print(word.text, word.tag_, spacy.explain(word.tag_))
|
||||||
|
# Hello UH interjection
|
||||||
|
# world NN noun, singular or mass
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code term]
|
||||||
|
+cell unicode
|
||||||
|
+cell Term to explain.
|
||||||
|
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell unicode
|
||||||
|
+cell The explanation, or #[code None] if not found in the glossary.
|
|
@ -14,7 +14,7 @@ p
|
||||||
| recommend having additional tests in place if your application depends on
|
| recommend having additional tests in place if your application depends on
|
||||||
| any of spaCy's utilities.
|
| any of spaCy's utilities.
|
||||||
|
|
||||||
+h(2, "get_data_path") get_data_path
|
+h(2, "get_data_path") util.get_data_path
|
||||||
+tag function
|
+tag function
|
||||||
|
|
||||||
p
|
p
|
||||||
|
@ -32,7 +32,7 @@ p
|
||||||
+cell #[code Path] / #[code None]
|
+cell #[code Path] / #[code None]
|
||||||
+cell Data path or #[code None].
|
+cell Data path or #[code None].
|
||||||
|
|
||||||
+h(2, "set_data_path") set_data_path
|
+h(2, "set_data_path") util.set_data_path
|
||||||
+tag function
|
+tag function
|
||||||
|
|
||||||
p
|
p
|
||||||
|
@ -49,7 +49,7 @@ p
|
||||||
+cell unicode or #[code Path]
|
+cell unicode or #[code Path]
|
||||||
+cell Path to new data directory.
|
+cell Path to new data directory.
|
||||||
|
|
||||||
+h(2, "get_lang_class") get_lang_class
|
+h(2, "get_lang_class") util.get_lang_class
|
||||||
+tag function
|
+tag function
|
||||||
|
|
||||||
p
|
p
|
||||||
|
@ -74,7 +74,7 @@ p
|
||||||
+cell #[code Language]
|
+cell #[code Language]
|
||||||
+cell Language class.
|
+cell Language class.
|
||||||
|
|
||||||
+h(2, "resolve_model_path") resolve_model_path
|
+h(2, "resolve_model_path") util.resolve_model_path
|
||||||
+tag function
|
+tag function
|
||||||
|
|
||||||
p Resolve a model name or string to a model path.
|
p Resolve a model name or string to a model path.
|
||||||
|
@ -94,7 +94,7 @@ p Resolve a model name or string to a model path.
|
||||||
+cell #[code Path]
|
+cell #[code Path]
|
||||||
+cell Path to model data directory.
|
+cell Path to model data directory.
|
||||||
|
|
||||||
+h(2, "is_package") is_package
|
+h(2, "is_package") util.is_package
|
||||||
+tag function
|
+tag function
|
||||||
|
|
||||||
p
|
p
|
||||||
|
@ -116,7 +116,7 @@ p
|
||||||
+cell #[code bool]
|
+cell #[code bool]
|
||||||
+cell #[code True] if installed package, #[code False] if not.
|
+cell #[code True] if installed package, #[code False] if not.
|
||||||
|
|
||||||
+h(2, "get_model_package_path") get_model_package_path
|
+h(2, "get_model_package_path") util.get_model_package_path
|
||||||
+tag function
|
+tag function
|
||||||
|
|
||||||
p
|
p
|
||||||
|
@ -138,7 +138,7 @@ p
|
||||||
+cell #[code Path]
|
+cell #[code Path]
|
||||||
+cell Path to model data directory.
|
+cell Path to model data directory.
|
||||||
|
|
||||||
+h(2, "parse_package_meta") parse_package_meta
|
+h(2, "parse_package_meta") util.parse_package_meta
|
||||||
+tag function
|
+tag function
|
||||||
|
|
||||||
p
|
p
|
||||||
|
@ -167,7 +167,7 @@ p
|
||||||
+cell dict / #[code None]
|
+cell dict / #[code None]
|
||||||
+cell Model meta data or #[code None].
|
+cell Model meta data or #[code None].
|
||||||
|
|
||||||
+h(2, "update_exc") update_exc
|
+h(2, "update_exc") util.update_exc
|
||||||
+tag function
|
+tag function
|
||||||
|
|
||||||
p
|
p
|
||||||
|
@ -199,7 +199,7 @@ p
|
||||||
+cell Combined tokenizer exceptions.
|
+cell Combined tokenizer exceptions.
|
||||||
|
|
||||||
|
|
||||||
+h(2, "prints") prints
|
+h(2, "prints") util.prints
|
||||||
+tag function
|
+tag function
|
||||||
|
|
||||||
p
|
p
|
||||||
|
|
|
@ -124,7 +124,7 @@ p
|
||||||
+cell #[code Lexeme]
|
+cell #[code Lexeme]
|
||||||
+cell The lexeme indicated by the given ID.
|
+cell The lexeme indicated by the given ID.
|
||||||
|
|
||||||
+h(2, "iter") Span.__iter__
|
+h(2, "iter") Vocab.__iter__
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
p Iterate over the lexemes in the vocabulary.
|
p Iterate over the lexemes in the vocabulary.
|
||||||
|
|
|
@ -36,7 +36,9 @@ p
|
||||||
| First, we initialise the #[code Matcher] with a vocab. The matcher must
|
| First, we initialise the #[code Matcher] with a vocab. The matcher must
|
||||||
| always share the same vocab with the documents it will operate on. We
|
| always share the same vocab with the documents it will operate on. We
|
||||||
| can now call #[+api("matcher#add") #[code matcher.add()]] with an ID and
|
| can now call #[+api("matcher#add") #[code matcher.add()]] with an ID and
|
||||||
| our custom pattern:
|
| our custom pattern. The second argument lets you pass in an optional
|
||||||
|
| callback function to invoke on a successful match. For now, we set it
|
||||||
|
| to #[code None].
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
import spacy
|
import spacy
|
||||||
|
@ -45,7 +47,9 @@ p
|
||||||
|
|
||||||
nlp = spacy.load('en')
|
nlp = spacy.load('en')
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
matcher.add('HelloWorld', [{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}])
|
# add match ID "HelloWorld" with no callback and one pattern
|
||||||
|
matcher.add('HelloWorld', on_match=None,
|
||||||
|
[{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}])
|
||||||
|
|
||||||
doc = nlp(u'Hello, world! Hello world!')
|
doc = nlp(u'Hello, world! Hello world!')
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
|
@ -58,8 +62,9 @@ p
|
||||||
| without punctuation between "hello" and "world":
|
| without punctuation between "hello" and "world":
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
matcher.add('HelloWorld', [{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}],
|
matcher.add('HelloWorld', on_match=None,
|
||||||
[{LOWER: 'hello'}, {LOWER: 'world'}])
|
[{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}],
|
||||||
|
[{LOWER: 'hello'}, {LOWER: 'world'}])
|
||||||
|
|
||||||
p
|
p
|
||||||
| By default, the matcher will only return the matches and
|
| By default, the matcher will only return the matches and
|
||||||
|
@ -92,9 +97,9 @@ p
|
||||||
nlp = spacy.load('en')
|
nlp = spacy.load('en')
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
|
|
||||||
matcher.add('GoogleIO', [{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}],
|
matcher.add('GoogleIO', on_match=add_event_ent,
|
||||||
[{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}, {IS_DIGIT: True}],
|
[{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}],
|
||||||
on_match=add_event_ent)
|
[{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}, {IS_DIGIT: True}])
|
||||||
|
|
||||||
# Get the ID of the 'EVENT' entity type. This is required to set an entity.
|
# Get the ID of the 'EVENT' entity type. This is required to set an entity.
|
||||||
EVENT = nlp.vocab.strings['EVENT']
|
EVENT = nlp.vocab.strings['EVENT']
|
||||||
|
@ -114,9 +119,9 @@ p
|
||||||
| function #[code merge_and_flag]:
|
| function #[code merge_and_flag]:
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
matcher.add('BAD_HTML', [{ORTH: '<'}, {LOWER: 'br'}, {ORTH: '>'}],
|
matcher.add('BAD_HTML', on_match=merge_and_flag,
|
||||||
[{ORTH: '<'}, {LOWER: 'br/'}, {ORTH: '>'}]
|
[{ORTH: '<'}, {LOWER: 'br'}, {ORTH: '>'}],
|
||||||
on_match=merge_and_flag)
|
[{ORTH: '<'}, {LOWER: 'br/'}, {ORTH: '>'}])
|
||||||
|
|
||||||
# Add a new custom flag to the vocab, which is always False by default.
|
# Add a new custom flag to the vocab, which is always False by default.
|
||||||
# BAD_HTML_FLAG will be the flag ID, which we can use to set it to True on the span.
|
# BAD_HTML_FLAG will be the flag ID, which we can use to set it to True on the span.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user