Merge branch 'master' into spacy.io

2025-09-19 02:22:43 +03:00 · 2019-09-06 10:31:42 +02:00 · 2019-09-06 10:31:42 +02:00 · 61eaaac98d
commit 61eaaac98d
parent 26f92826f0 6b012cebff
3 changed files with 17 additions and 10 deletions
--- a/spacy/lang/ko/init.py
+++ b/spacy/lang/ko/init.py
@ -58,7 +58,8 @@ def check_spaces(text, tokens):
            yield prev_end != idx
        prev_end = idx + len(token)
        start = prev_end
-    yield False
+    if start > 0:
        yield False
 class KoreanTokenizer(DummyTokenizer):
--- a/spacy/tests/lang/ko/test_tokenizer.py
+++ b/spacy/tests/lang/ko/test_tokenizer.py
@ -45,3 +45,8 @@ def test_ko_tokenizer_full_tags(ko_tokenizer, text, expected_tags):
 def test_ko_tokenizer_pos(ko_tokenizer, text, expected_pos):
    pos = [token.pos_ for token in ko_tokenizer(text)]
    assert pos == expected_pos.split()
 def test_ko_empty_doc(ko_tokenizer):
    tokens = ko_tokenizer("")
    assert len(tokens) == 0
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@ -26,6 +26,14 @@ import PosDeps101 from 'usage/101/\_pos-deps.md'
 <PosDeps101 />
 <Infobox title="📖 Part-of-speech tag scheme">
 For a list of the fine-grained and coarse-grained part-of-speech tags assigned
 by spaCy's models across different languages, see the
 [POS tag scheme documentation](/api/annotation#pos-tagging).
 </Infobox>
 ### Rule-based morphology {#rule-based-morphology}
 Inflectional morphology is the process by which a root form of a word is
@ -61,14 +69,7 @@ of the two. The system works as follows:
   morphological information, without consulting the context of the token. The
   lemmatizer also accepts list-based exception files, acquired from
   [WordNet](https://wordnet.princeton.edu/).
-
+   
 <Infobox title="📖 Part-of-speech tag scheme">
 For a list of the fine-grained and coarse-grained part-of-speech tags assigned
 by spaCy's models across different languages, see the
 [POS tag scheme documentation](/api/annotation#pos-tagging).
 </Infobox>
 ## Dependency Parsing {#dependency-parse model="parser"}
@ -289,7 +290,7 @@ for token in doc:
 For a list of the syntactic dependency labels assigned by spaCy's models across
 different languages, see the
-[dependency label scheme documentation](/api/annotation#pos-tagging).
+[dependency label scheme documentation](/api/annotation#dependency-parsing).
 </Infobox>