Merge remote-tracking branch 'origin/develop' into feature/streaming-data-memory-growth

2025-11-07 11:27:37 +03:00 · 2017-10-16 20:49:48 +02:00 · 2017-10-16 20:49:48 +02:00 · 4018486d31
commit 4018486d31
parent 4174477161 4cfe259266
3 changed files with 25 additions and 23 deletions
--- a/spacy/util.py
+++ b/spacy/util.py
@ -494,7 +494,7 @@ def from_disk(path, readers, exclude):
    path = ensure_path(path)
    for key, reader in readers.items():
        if key not in exclude:
-            reader(path / key)
+            reader(path2str(path / key))
    return path


@ -584,4 +584,3 @@ def use_gpu(gpu_id):
    Model.ops = CupyOps()
    Model.Ops = CupyOps
    return device
-
--- a/website/models/_data.json
+++ b/website/models/_data.json
@ -80,7 +80,6 @@
        "da": "Danish",
        "hu": "Hungarian",
        "pl": "Polish",
-        "ru": "Russian",
        "he": "Hebrew",
        "bn": "Bengali",
        "id": "Indonesian",
--- a/website/usage/_spacy-101/_lightning-tour.jade
+++ b/website/usage/_spacy-101/_lightning-tour.jade
@ -20,8 +20,8 @@ p
    doc_de = nlp_de(u'Ich bin ein Berliner.')

 +infobox
-    |  #[strong API:] #[+api("spacy#load") #[code spacy.load()]]
-    |  #[strong Usage:] #[+a("/usage/models") Models],
+    |  #[+label-inline API:] #[+api("spacy#load") #[code spacy.load()]]
+    |  #[+label-inline Usage:] #[+a("/usage/models") Models],
    |  #[+a("/usage/spacy-101") spaCy 101]

 +h(3, "lightning-tour-tokens-sentences") Get tokens, noun chunks & sentences
@ -42,8 +42,8 @@ p
    assert sentences[1].text == u'Peach is the superior emoji.'

 +infobox
-    |  #[strong API:] #[+api("doc") #[code Doc]], #[+api("token") #[code Token]]
-    |  #[strong Usage:] #[+a("/usage/spacy-101") spaCy 101]
+    |  #[+label-inline API:] #[+api("doc") #[code Doc]], #[+api("token") #[code Token]]
+    |  #[+label-inline Usage:] #[+a("/usage/spacy-101") spaCy 101]

 +h(3, "lightning-tour-pos-tags") Get part-of-speech tags and flags
    +tag-model("tagger")
@ -63,8 +63,8 @@ p
    assert billion.like_email == False

 +infobox
-    |  #[strong API:] #[+api("token") #[code Token]]
-    |  #[strong Usage:] #[+a("/usage/linguistic-features#pos-tagging") Part-of-speech tagging]
+    |  #[+label-inline API:] #[+api("token") #[code Token]]
+    |  #[+label-inline Usage:] #[+a("/usage/linguistic-features#pos-tagging") Part-of-speech tagging]

 +h(3, "lightning-tour-hashes") Use hash values for any string

@ -83,8 +83,8 @@ p
    unicorn_text = doc.vocab.strings[unicorn_hash] # '🦄 '

 +infobox
-    |  #[strong API:] #[+api("stringstore") #[code stringstore]]
-    |  #[strong Usage:] #[+a("/usage/spacy-101#vocab") Vocab, hashes and lexemes 101]
+    |  #[+label-inline API:] #[+api("stringstore") #[code stringstore]]
+    |  #[+label-inline Usage:] #[+a("/usage/spacy-101#vocab") Vocab, hashes and lexemes 101]

 +h(3, "lightning-tour-entities") Recongnise and update named entities
    +tag-model("NER")
@ -101,7 +101,7 @@ p
    assert ents == [(0, 7, u'ORG')]

 +infobox
-    |  #[strong Usage:] #[+a("/usage/linguistic-features#named-entities") Named entity recognition]
+    |  #[+label-inline Usage:] #[+a("/usage/linguistic-features#named-entities") Named entity recognition]

 +h(3, "lightning-tour-displacy") Visualize a dependency parse and named entities in your browser
    +tag-model("dependency parse", "NER")
@ -153,8 +153,8 @@ p
    displacy.serve(doc_ent, style='ent')

 +infobox
-    |  #[strong API:] #[+api("displacy") #[code displacy]]
-    |  #[strong Usage:] #[+a("/usage/visualizers") Visualizers]
+    |  #[+label-inline API:] #[+api("displacy") #[code displacy]]
+    |  #[+label-inline Usage:] #[+a("/usage/visualizers") Visualizers]

 +h(3, "lightning-tour-word-vectors") Get word vectors and similarity
    +tag-model("word vectors")
@ -168,8 +168,12 @@ p
    assert apple.similarity(banana) > pasta.similarity(hippo)
    assert apple.has_vector, banana.has_vector, pasta.has_vector, hippo.has_vector

+p
+    |  For the best results, you should run this example using the
+    |  #[+a("/models/en#en_vectors_web_lg") #[code en_vectors_web_lg]] model.
+
 +infobox
-    |  #[strong Usage:] #[+a("/usage/vectors-similarity") Word vectors and similarity]
+    |  #[+label-inline Usage:] #[+a("/usage/vectors-similarity") Word vectors and similarity]

 +h(3, "lightning-tour-serialization") Simple and efficient serialization

@ -186,9 +190,9 @@ p
    new_doc = Doc(Vocab()).from_disk('/moby_dick.bin')

 +infobox
-    |  #[strong API:] #[+api("language") #[code Language]],
+    |  #[+label-inline API:] #[+api("language") #[code Language]],
    |  #[+api("doc") #[code Doc]]
-    |  #[strong Usage:] #[+a("/usage/models#saving-loading") Saving and loading models]
+    |  #[+label-inline Usage:] #[+a("/usage/models#saving-loading") Saving and loading models]

 +h(3, "lightning-tour-rule-matcher") Match text with token rules

@ -209,8 +213,8 @@ p
    matches = nlp(LOTS_OF TEXT)

 +infobox
-    |  #[strong API:] #[+api("matcher") #[code Matcher]]
-    |  #[strong Usage:] #[+a("/usage/linguistic-features#rule-based-matching") Rule-based matching]
+    |  #[+label-inline API:] #[+api("matcher") #[code Matcher]]
+    |  #[+label-inline Usage:] #[+a("/usage/linguistic-features#rule-based-matching") Rule-based matching]

 +h(3, "lightning-tour-multi-threaded") Multi-threaded generator

@ -224,8 +228,8 @@ p
            break

 +infobox
-    |  #[strong API:] #[+api("doc") #[code Doc]]
-    |  #[strong Usage:] #[+a("/usage/processing-pipelines#multithreading") Processing pipelines]
+    |  #[+label-inline API:] #[+api("doc") #[code Doc]]
+    |  #[+label-inline Usage:] #[+a("/usage/processing-pipelines#multithreading") Processing pipelines]

 +h(3, "lightning-tour-dependencies") Get syntactic dependencies
    +tag-model("dependency parse")
@ -240,8 +244,8 @@ p
        return dep_labels

 +infobox
-    |  #[strong API:] #[+api("token") #[code Token]]
-    |  #[strong Usage:] #[+a("/usage/linguistic-features#dependency-parse") Using the dependency parse]
+    |  #[+label-inline API:] #[+api("token") #[code Token]]
+    |  #[+label-inline Usage:] #[+a("/usage/linguistic-features#dependency-parse") Using the dependency parse]

 +h(3, "lightning-tour-numpy-arrays") Export to numpy arrays