Merge remote-tracking branch 'origin/develop' into feature/streaming-data-memory-growth

This commit is contained in:
Matthew Honnibal 2017-10-16 20:49:48 +02:00
commit 4018486d31
3 changed files with 25 additions and 23 deletions

View File

@ -494,7 +494,7 @@ def from_disk(path, readers, exclude):
path = ensure_path(path) path = ensure_path(path)
for key, reader in readers.items(): for key, reader in readers.items():
if key not in exclude: if key not in exclude:
reader(path / key) reader(path2str(path / key))
return path return path
@ -584,4 +584,3 @@ def use_gpu(gpu_id):
Model.ops = CupyOps() Model.ops = CupyOps()
Model.Ops = CupyOps Model.Ops = CupyOps
return device return device

View File

@ -80,7 +80,6 @@
"da": "Danish", "da": "Danish",
"hu": "Hungarian", "hu": "Hungarian",
"pl": "Polish", "pl": "Polish",
"ru": "Russian",
"he": "Hebrew", "he": "Hebrew",
"bn": "Bengali", "bn": "Bengali",
"id": "Indonesian", "id": "Indonesian",

View File

@ -20,8 +20,8 @@ p
doc_de = nlp_de(u'Ich bin ein Berliner.') doc_de = nlp_de(u'Ich bin ein Berliner.')
+infobox +infobox
| #[strong API:] #[+api("spacy#load") #[code spacy.load()]] | #[+label-inline API:] #[+api("spacy#load") #[code spacy.load()]]
| #[strong Usage:] #[+a("/usage/models") Models], | #[+label-inline Usage:] #[+a("/usage/models") Models],
| #[+a("/usage/spacy-101") spaCy 101] | #[+a("/usage/spacy-101") spaCy 101]
+h(3, "lightning-tour-tokens-sentences") Get tokens, noun chunks & sentences +h(3, "lightning-tour-tokens-sentences") Get tokens, noun chunks & sentences
@ -42,8 +42,8 @@ p
assert sentences[1].text == u'Peach is the superior emoji.' assert sentences[1].text == u'Peach is the superior emoji.'
+infobox +infobox
| #[strong API:] #[+api("doc") #[code Doc]], #[+api("token") #[code Token]] | #[+label-inline API:] #[+api("doc") #[code Doc]], #[+api("token") #[code Token]]
| #[strong Usage:] #[+a("/usage/spacy-101") spaCy 101] | #[+label-inline Usage:] #[+a("/usage/spacy-101") spaCy 101]
+h(3, "lightning-tour-pos-tags") Get part-of-speech tags and flags +h(3, "lightning-tour-pos-tags") Get part-of-speech tags and flags
+tag-model("tagger") +tag-model("tagger")
@ -63,8 +63,8 @@ p
assert billion.like_email == False assert billion.like_email == False
+infobox +infobox
| #[strong API:] #[+api("token") #[code Token]] | #[+label-inline API:] #[+api("token") #[code Token]]
| #[strong Usage:] #[+a("/usage/linguistic-features#pos-tagging") Part-of-speech tagging] | #[+label-inline Usage:] #[+a("/usage/linguistic-features#pos-tagging") Part-of-speech tagging]
+h(3, "lightning-tour-hashes") Use hash values for any string +h(3, "lightning-tour-hashes") Use hash values for any string
@ -83,8 +83,8 @@ p
unicorn_text = doc.vocab.strings[unicorn_hash] # '🦄 ' unicorn_text = doc.vocab.strings[unicorn_hash] # '🦄 '
+infobox +infobox
| #[strong API:] #[+api("stringstore") #[code stringstore]] | #[+label-inline API:] #[+api("stringstore") #[code stringstore]]
| #[strong Usage:] #[+a("/usage/spacy-101#vocab") Vocab, hashes and lexemes 101] | #[+label-inline Usage:] #[+a("/usage/spacy-101#vocab") Vocab, hashes and lexemes 101]
+h(3, "lightning-tour-entities") Recongnise and update named entities +h(3, "lightning-tour-entities") Recongnise and update named entities
+tag-model("NER") +tag-model("NER")
@ -101,7 +101,7 @@ p
assert ents == [(0, 7, u'ORG')] assert ents == [(0, 7, u'ORG')]
+infobox +infobox
| #[strong Usage:] #[+a("/usage/linguistic-features#named-entities") Named entity recognition] | #[+label-inline Usage:] #[+a("/usage/linguistic-features#named-entities") Named entity recognition]
+h(3, "lightning-tour-displacy") Visualize a dependency parse and named entities in your browser +h(3, "lightning-tour-displacy") Visualize a dependency parse and named entities in your browser
+tag-model("dependency parse", "NER") +tag-model("dependency parse", "NER")
@ -153,8 +153,8 @@ p
displacy.serve(doc_ent, style='ent') displacy.serve(doc_ent, style='ent')
+infobox +infobox
| #[strong API:] #[+api("displacy") #[code displacy]] | #[+label-inline API:] #[+api("displacy") #[code displacy]]
| #[strong Usage:] #[+a("/usage/visualizers") Visualizers] | #[+label-inline Usage:] #[+a("/usage/visualizers") Visualizers]
+h(3, "lightning-tour-word-vectors") Get word vectors and similarity +h(3, "lightning-tour-word-vectors") Get word vectors and similarity
+tag-model("word vectors") +tag-model("word vectors")
@ -168,8 +168,12 @@ p
assert apple.similarity(banana) > pasta.similarity(hippo) assert apple.similarity(banana) > pasta.similarity(hippo)
assert apple.has_vector, banana.has_vector, pasta.has_vector, hippo.has_vector assert apple.has_vector, banana.has_vector, pasta.has_vector, hippo.has_vector
p
| For the best results, you should run this example using the
| #[+a("/models/en#en_vectors_web_lg") #[code en_vectors_web_lg]] model.
+infobox +infobox
| #[strong Usage:] #[+a("/usage/vectors-similarity") Word vectors and similarity] | #[+label-inline Usage:] #[+a("/usage/vectors-similarity") Word vectors and similarity]
+h(3, "lightning-tour-serialization") Simple and efficient serialization +h(3, "lightning-tour-serialization") Simple and efficient serialization
@ -186,9 +190,9 @@ p
new_doc = Doc(Vocab()).from_disk('/moby_dick.bin') new_doc = Doc(Vocab()).from_disk('/moby_dick.bin')
+infobox +infobox
| #[strong API:] #[+api("language") #[code Language]], | #[+label-inline API:] #[+api("language") #[code Language]],
| #[+api("doc") #[code Doc]] | #[+api("doc") #[code Doc]]
| #[strong Usage:] #[+a("/usage/models#saving-loading") Saving and loading models] | #[+label-inline Usage:] #[+a("/usage/models#saving-loading") Saving and loading models]
+h(3, "lightning-tour-rule-matcher") Match text with token rules +h(3, "lightning-tour-rule-matcher") Match text with token rules
@ -209,8 +213,8 @@ p
matches = nlp(LOTS_OF TEXT) matches = nlp(LOTS_OF TEXT)
+infobox +infobox
| #[strong API:] #[+api("matcher") #[code Matcher]] | #[+label-inline API:] #[+api("matcher") #[code Matcher]]
| #[strong Usage:] #[+a("/usage/linguistic-features#rule-based-matching") Rule-based matching] | #[+label-inline Usage:] #[+a("/usage/linguistic-features#rule-based-matching") Rule-based matching]
+h(3, "lightning-tour-multi-threaded") Multi-threaded generator +h(3, "lightning-tour-multi-threaded") Multi-threaded generator
@ -224,8 +228,8 @@ p
break break
+infobox +infobox
| #[strong API:] #[+api("doc") #[code Doc]] | #[+label-inline API:] #[+api("doc") #[code Doc]]
| #[strong Usage:] #[+a("/usage/processing-pipelines#multithreading") Processing pipelines] | #[+label-inline Usage:] #[+a("/usage/processing-pipelines#multithreading") Processing pipelines]
+h(3, "lightning-tour-dependencies") Get syntactic dependencies +h(3, "lightning-tour-dependencies") Get syntactic dependencies
+tag-model("dependency parse") +tag-model("dependency parse")
@ -240,8 +244,8 @@ p
return dep_labels return dep_labels
+infobox +infobox
| #[strong API:] #[+api("token") #[code Token]] | #[+label-inline API:] #[+api("token") #[code Token]]
| #[strong Usage:] #[+a("/usage/linguistic-features#dependency-parse") Using the dependency parse] | #[+label-inline Usage:] #[+a("/usage/linguistic-features#dependency-parse") Using the dependency parse]
+h(3, "lightning-tour-numpy-arrays") Export to numpy arrays +h(3, "lightning-tour-numpy-arrays") Export to numpy arrays