diff --git a/spacy/lang/ko/__init__.py b/spacy/lang/ko/__init__.py
index 6dc6456e5..c8cd9c3fd 100644
--- a/spacy/lang/ko/__init__.py
+++ b/spacy/lang/ko/__init__.py
@@ -58,7 +58,8 @@ def check_spaces(text, tokens):
yield prev_end != idx
prev_end = idx + len(token)
start = prev_end
- yield False
+ if start > 0:
+ yield False
class KoreanTokenizer(DummyTokenizer):
diff --git a/spacy/tests/lang/ko/test_tokenizer.py b/spacy/tests/lang/ko/test_tokenizer.py
index 531a41d0b..b8fe7959c 100644
--- a/spacy/tests/lang/ko/test_tokenizer.py
+++ b/spacy/tests/lang/ko/test_tokenizer.py
@@ -45,3 +45,8 @@ def test_ko_tokenizer_full_tags(ko_tokenizer, text, expected_tags):
def test_ko_tokenizer_pos(ko_tokenizer, text, expected_pos):
pos = [token.pos_ for token in ko_tokenizer(text)]
assert pos == expected_pos.split()
+
+
+def test_ko_empty_doc(ko_tokenizer):
+ tokens = ko_tokenizer("")
+ assert len(tokens) == 0
diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md
index 5c1e56157..66ad816f5 100644
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@@ -26,6 +26,14 @@ import PosDeps101 from 'usage/101/\_pos-deps.md'
+
+
+For a list of the fine-grained and coarse-grained part-of-speech tags assigned
+by spaCy's models across different languages, see the
+[POS tag scheme documentation](/api/annotation#pos-tagging).
+
+
+
### Rule-based morphology {#rule-based-morphology}
Inflectional morphology is the process by which a root form of a word is
@@ -61,14 +69,7 @@ of the two. The system works as follows:
morphological information, without consulting the context of the token. The
lemmatizer also accepts list-based exception files, acquired from
[WordNet](https://wordnet.princeton.edu/).
-
-
-
-For a list of the fine-grained and coarse-grained part-of-speech tags assigned
-by spaCy's models across different languages, see the
-[POS tag scheme documentation](/api/annotation#pos-tagging).
-
-
+
## Dependency Parsing {#dependency-parse model="parser"}
@@ -289,7 +290,7 @@ for token in doc:
For a list of the syntactic dependency labels assigned by spaCy's models across
different languages, see the
-[dependency label scheme documentation](/api/annotation#pos-tagging).
+[dependency label scheme documentation](/api/annotation#dependency-parsing).