mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
Merge remote-tracking branch 'origin/develop' into feature/streaming-data-memory-growth
This commit is contained in:
commit
4018486d31
|
@ -494,7 +494,7 @@ def from_disk(path, readers, exclude):
|
||||||
path = ensure_path(path)
|
path = ensure_path(path)
|
||||||
for key, reader in readers.items():
|
for key, reader in readers.items():
|
||||||
if key not in exclude:
|
if key not in exclude:
|
||||||
reader(path / key)
|
reader(path2str(path / key))
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
@ -584,4 +584,3 @@ def use_gpu(gpu_id):
|
||||||
Model.ops = CupyOps()
|
Model.ops = CupyOps()
|
||||||
Model.Ops = CupyOps
|
Model.Ops = CupyOps
|
||||||
return device
|
return device
|
||||||
|
|
||||||
|
|
|
@ -80,7 +80,6 @@
|
||||||
"da": "Danish",
|
"da": "Danish",
|
||||||
"hu": "Hungarian",
|
"hu": "Hungarian",
|
||||||
"pl": "Polish",
|
"pl": "Polish",
|
||||||
"ru": "Russian",
|
|
||||||
"he": "Hebrew",
|
"he": "Hebrew",
|
||||||
"bn": "Bengali",
|
"bn": "Bengali",
|
||||||
"id": "Indonesian",
|
"id": "Indonesian",
|
||||||
|
|
|
@ -20,8 +20,8 @@ p
|
||||||
doc_de = nlp_de(u'Ich bin ein Berliner.')
|
doc_de = nlp_de(u'Ich bin ein Berliner.')
|
||||||
|
|
||||||
+infobox
|
+infobox
|
||||||
| #[strong API:] #[+api("spacy#load") #[code spacy.load()]]
|
| #[+label-inline API:] #[+api("spacy#load") #[code spacy.load()]]
|
||||||
| #[strong Usage:] #[+a("/usage/models") Models],
|
| #[+label-inline Usage:] #[+a("/usage/models") Models],
|
||||||
| #[+a("/usage/spacy-101") spaCy 101]
|
| #[+a("/usage/spacy-101") spaCy 101]
|
||||||
|
|
||||||
+h(3, "lightning-tour-tokens-sentences") Get tokens, noun chunks & sentences
|
+h(3, "lightning-tour-tokens-sentences") Get tokens, noun chunks & sentences
|
||||||
|
@ -42,8 +42,8 @@ p
|
||||||
assert sentences[1].text == u'Peach is the superior emoji.'
|
assert sentences[1].text == u'Peach is the superior emoji.'
|
||||||
|
|
||||||
+infobox
|
+infobox
|
||||||
| #[strong API:] #[+api("doc") #[code Doc]], #[+api("token") #[code Token]]
|
| #[+label-inline API:] #[+api("doc") #[code Doc]], #[+api("token") #[code Token]]
|
||||||
| #[strong Usage:] #[+a("/usage/spacy-101") spaCy 101]
|
| #[+label-inline Usage:] #[+a("/usage/spacy-101") spaCy 101]
|
||||||
|
|
||||||
+h(3, "lightning-tour-pos-tags") Get part-of-speech tags and flags
|
+h(3, "lightning-tour-pos-tags") Get part-of-speech tags and flags
|
||||||
+tag-model("tagger")
|
+tag-model("tagger")
|
||||||
|
@ -63,8 +63,8 @@ p
|
||||||
assert billion.like_email == False
|
assert billion.like_email == False
|
||||||
|
|
||||||
+infobox
|
+infobox
|
||||||
| #[strong API:] #[+api("token") #[code Token]]
|
| #[+label-inline API:] #[+api("token") #[code Token]]
|
||||||
| #[strong Usage:] #[+a("/usage/linguistic-features#pos-tagging") Part-of-speech tagging]
|
| #[+label-inline Usage:] #[+a("/usage/linguistic-features#pos-tagging") Part-of-speech tagging]
|
||||||
|
|
||||||
+h(3, "lightning-tour-hashes") Use hash values for any string
|
+h(3, "lightning-tour-hashes") Use hash values for any string
|
||||||
|
|
||||||
|
@ -83,8 +83,8 @@ p
|
||||||
unicorn_text = doc.vocab.strings[unicorn_hash] # '🦄 '
|
unicorn_text = doc.vocab.strings[unicorn_hash] # '🦄 '
|
||||||
|
|
||||||
+infobox
|
+infobox
|
||||||
| #[strong API:] #[+api("stringstore") #[code stringstore]]
|
| #[+label-inline API:] #[+api("stringstore") #[code stringstore]]
|
||||||
| #[strong Usage:] #[+a("/usage/spacy-101#vocab") Vocab, hashes and lexemes 101]
|
| #[+label-inline Usage:] #[+a("/usage/spacy-101#vocab") Vocab, hashes and lexemes 101]
|
||||||
|
|
||||||
+h(3, "lightning-tour-entities") Recongnise and update named entities
|
+h(3, "lightning-tour-entities") Recongnise and update named entities
|
||||||
+tag-model("NER")
|
+tag-model("NER")
|
||||||
|
@ -101,7 +101,7 @@ p
|
||||||
assert ents == [(0, 7, u'ORG')]
|
assert ents == [(0, 7, u'ORG')]
|
||||||
|
|
||||||
+infobox
|
+infobox
|
||||||
| #[strong Usage:] #[+a("/usage/linguistic-features#named-entities") Named entity recognition]
|
| #[+label-inline Usage:] #[+a("/usage/linguistic-features#named-entities") Named entity recognition]
|
||||||
|
|
||||||
+h(3, "lightning-tour-displacy") Visualize a dependency parse and named entities in your browser
|
+h(3, "lightning-tour-displacy") Visualize a dependency parse and named entities in your browser
|
||||||
+tag-model("dependency parse", "NER")
|
+tag-model("dependency parse", "NER")
|
||||||
|
@ -153,8 +153,8 @@ p
|
||||||
displacy.serve(doc_ent, style='ent')
|
displacy.serve(doc_ent, style='ent')
|
||||||
|
|
||||||
+infobox
|
+infobox
|
||||||
| #[strong API:] #[+api("displacy") #[code displacy]]
|
| #[+label-inline API:] #[+api("displacy") #[code displacy]]
|
||||||
| #[strong Usage:] #[+a("/usage/visualizers") Visualizers]
|
| #[+label-inline Usage:] #[+a("/usage/visualizers") Visualizers]
|
||||||
|
|
||||||
+h(3, "lightning-tour-word-vectors") Get word vectors and similarity
|
+h(3, "lightning-tour-word-vectors") Get word vectors and similarity
|
||||||
+tag-model("word vectors")
|
+tag-model("word vectors")
|
||||||
|
@ -168,8 +168,12 @@ p
|
||||||
assert apple.similarity(banana) > pasta.similarity(hippo)
|
assert apple.similarity(banana) > pasta.similarity(hippo)
|
||||||
assert apple.has_vector, banana.has_vector, pasta.has_vector, hippo.has_vector
|
assert apple.has_vector, banana.has_vector, pasta.has_vector, hippo.has_vector
|
||||||
|
|
||||||
|
p
|
||||||
|
| For the best results, you should run this example using the
|
||||||
|
| #[+a("/models/en#en_vectors_web_lg") #[code en_vectors_web_lg]] model.
|
||||||
|
|
||||||
+infobox
|
+infobox
|
||||||
| #[strong Usage:] #[+a("/usage/vectors-similarity") Word vectors and similarity]
|
| #[+label-inline Usage:] #[+a("/usage/vectors-similarity") Word vectors and similarity]
|
||||||
|
|
||||||
+h(3, "lightning-tour-serialization") Simple and efficient serialization
|
+h(3, "lightning-tour-serialization") Simple and efficient serialization
|
||||||
|
|
||||||
|
@ -186,9 +190,9 @@ p
|
||||||
new_doc = Doc(Vocab()).from_disk('/moby_dick.bin')
|
new_doc = Doc(Vocab()).from_disk('/moby_dick.bin')
|
||||||
|
|
||||||
+infobox
|
+infobox
|
||||||
| #[strong API:] #[+api("language") #[code Language]],
|
| #[+label-inline API:] #[+api("language") #[code Language]],
|
||||||
| #[+api("doc") #[code Doc]]
|
| #[+api("doc") #[code Doc]]
|
||||||
| #[strong Usage:] #[+a("/usage/models#saving-loading") Saving and loading models]
|
| #[+label-inline Usage:] #[+a("/usage/models#saving-loading") Saving and loading models]
|
||||||
|
|
||||||
+h(3, "lightning-tour-rule-matcher") Match text with token rules
|
+h(3, "lightning-tour-rule-matcher") Match text with token rules
|
||||||
|
|
||||||
|
@ -209,8 +213,8 @@ p
|
||||||
matches = nlp(LOTS_OF TEXT)
|
matches = nlp(LOTS_OF TEXT)
|
||||||
|
|
||||||
+infobox
|
+infobox
|
||||||
| #[strong API:] #[+api("matcher") #[code Matcher]]
|
| #[+label-inline API:] #[+api("matcher") #[code Matcher]]
|
||||||
| #[strong Usage:] #[+a("/usage/linguistic-features#rule-based-matching") Rule-based matching]
|
| #[+label-inline Usage:] #[+a("/usage/linguistic-features#rule-based-matching") Rule-based matching]
|
||||||
|
|
||||||
+h(3, "lightning-tour-multi-threaded") Multi-threaded generator
|
+h(3, "lightning-tour-multi-threaded") Multi-threaded generator
|
||||||
|
|
||||||
|
@ -224,8 +228,8 @@ p
|
||||||
break
|
break
|
||||||
|
|
||||||
+infobox
|
+infobox
|
||||||
| #[strong API:] #[+api("doc") #[code Doc]]
|
| #[+label-inline API:] #[+api("doc") #[code Doc]]
|
||||||
| #[strong Usage:] #[+a("/usage/processing-pipelines#multithreading") Processing pipelines]
|
| #[+label-inline Usage:] #[+a("/usage/processing-pipelines#multithreading") Processing pipelines]
|
||||||
|
|
||||||
+h(3, "lightning-tour-dependencies") Get syntactic dependencies
|
+h(3, "lightning-tour-dependencies") Get syntactic dependencies
|
||||||
+tag-model("dependency parse")
|
+tag-model("dependency parse")
|
||||||
|
@ -240,8 +244,8 @@ p
|
||||||
return dep_labels
|
return dep_labels
|
||||||
|
|
||||||
+infobox
|
+infobox
|
||||||
| #[strong API:] #[+api("token") #[code Token]]
|
| #[+label-inline API:] #[+api("token") #[code Token]]
|
||||||
| #[strong Usage:] #[+a("/usage/linguistic-features#dependency-parse") Using the dependency parse]
|
| #[+label-inline Usage:] #[+a("/usage/linguistic-features#dependency-parse") Using the dependency parse]
|
||||||
|
|
||||||
+h(3, "lightning-tour-numpy-arrays") Export to numpy arrays
|
+h(3, "lightning-tour-numpy-arrays") Export to numpy arrays
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user