diff --git a/spacy/lang/ja/__init__.py b/spacy/lang/ja/__init__.py
index 294c6b38d..39e0445c2 100644
--- a/spacy/lang/ja/__init__.py
+++ b/spacy/lang/ja/__init__.py
@@ -139,6 +139,16 @@ def get_words_lemmas_tags_spaces(dtokens, text, gap_tag=("空白", "")):
     text_tags = []
     text_spaces = []
     text_pos = 0
+    # handle empty and whitespace-only texts
+    if len(words) == 0:
+        return text_words, text_lemmas, text_tags, text_spaces
+    elif len([word for word in words if not word.isspace()]) == 0:
+        assert text.isspace()
+        text_words = [text]
+        text_lemmas = [text]
+        text_tags = [gap_tag]
+        text_spaces = [False]
+        return text_words, text_lemmas, text_tags, text_spaces
     # normalize words to remove all whitespace tokens
     norm_words, norm_dtokens = zip(*[(word, dtokens) for word, dtokens in zip(words, dtokens) if not word.isspace()])
     # align words with text
diff --git a/spacy/tests/lang/ja/test_tokenizer.py b/spacy/tests/lang/ja/test_tokenizer.py
index 82c43fe4c..30cba42b1 100644
--- a/spacy/tests/lang/ja/test_tokenizer.py
+++ b/spacy/tests/lang/ja/test_tokenizer.py
@@ -93,3 +93,12 @@ def test_ja_tokenizer_split_modes(ja_tokenizer, text, len_a, len_b, len_c):
     assert len(nlp_a(text)) == len_a
     assert len(nlp_b(text)) == len_b
     assert len(nlp_c(text)) == len_c
+
+
+def test_ja_tokenizer_emptyish_texts(ja_tokenizer):
+    doc = ja_tokenizer("")
+    assert len(doc) == 0
+    doc = ja_tokenizer(" ")
+    assert len(doc) == 1
+    doc = ja_tokenizer("\n\n\n \t\t \n\n\n")
+    assert len(doc) == 1