diff --git a/spacy/lang/ja/__init__.py b/spacy/lang/ja/__init__.py index 3a9c58fca..04cc013a4 100644 --- a/spacy/lang/ja/__init__.py +++ b/spacy/lang/ja/__init__.py @@ -33,8 +33,7 @@ class Japanese(Language): Defaults = JapaneseDefaults def make_doc(self, text): - words = self.tokenizer(text) - return Doc(self.vocab, words=words, spaces=[False]*len(words)) + return self.tokenizer(text) __all__ = ['Japanese'] diff --git a/spacy/lang/ja/examples.py b/spacy/lang/ja/examples.py new file mode 100644 index 000000000..623609205 --- /dev/null +++ b/spacy/lang/ja/examples.py @@ -0,0 +1,18 @@ +# coding: utf8 +from __future__ import unicode_literals + + +""" +Example sentences to test spaCy and its language models. + +>>> from spacy.lang.ja.examples import sentences +>>> docs = nlp.pipe(sentences) +""" + + +sentences = [ + 'アップルがイギリスの新興企業を10億ドルで購入を検討', + '自動運転車の損害賠償責任、自動車メーカーに一定の負担を求める', + '歩道を走る自動配達ロボ、サンフランシスコ市が走行禁止を検討', + 'ロンドンはイギリスの大都市です。' +] diff --git a/spacy/lang/zh/examples.py b/spacy/lang/zh/examples.py new file mode 100644 index 000000000..5e8a36119 --- /dev/null +++ b/spacy/lang/zh/examples.py @@ -0,0 +1,18 @@ +# coding: utf8 +from __future__ import unicode_literals + + +""" +Example sentences to test spaCy and its language models. + +>>> from spacy.lang.zh.examples import sentences +>>> docs = nlp.pipe(sentences) +""" + + +sentences = [ + "蘋果公司正考量用一億元買下英國的新創公司", + "自駕車將保險責任歸屬轉移至製造商", + "舊金山考慮禁止送貨機器人在人行道上行駛", + "倫敦是英國的大城市" +]