diff --git a/spacy/ja/__init__.py b/spacy/ja/__init__.py index da2ec8af2..c55f67e47 100644 --- a/spacy/ja/__init__.py +++ b/spacy/ja/__init__.py @@ -1,14 +1,10 @@ # encoding: utf8 from __future__ import unicode_literals, print_function -from os import path - from ..language import Language from ..attrs import LANG from ..tokens import Doc -from .language_data import * - class Japanese(Language): lang = 'ja' @@ -22,4 +18,5 @@ class Japanese(Language): words = [x.surface for x in Tokenizer().tokenize(text)] return Doc(self.vocab, words=words, spaces=[False]*len(words)) -EXPORT = Japanese \ No newline at end of file + +__all__ = ['Japanese'] diff --git a/spacy/ja/stop_words.py b/spacy/ja/stop_words.py deleted file mode 100644 index 45bb7a4d8..000000000 --- a/spacy/ja/stop_words.py +++ /dev/null @@ -1,9 +0,0 @@ -# encoding: utf8 -from __future__ import unicode_literals - - -# stop words as whitespace-separated list -STOP_WORDS = set(""" -。 -、 -""".split()) diff --git a/spacy/ja/tag_map.py b/spacy/ja/tag_map.py deleted file mode 100644 index f5b6b5040..000000000 --- a/spacy/ja/tag_map.py +++ /dev/null @@ -1,24 +0,0 @@ -# encoding: utf8 -from __future__ import unicode_literals - -from ..symbols import * - - -TAG_MAP = { - "ADV": {POS: ADV}, - "NOUN": {POS: NOUN}, - "ADP": {POS: ADP}, - "PRON": {POS: PRON}, - "SCONJ": {POS: SCONJ}, - "PROPN": {POS: PROPN}, - "DET": {POS: DET}, - "SYM": {POS: SYM}, - "INTJ": {POS: INTJ}, - "PUNCT": {POS: PUNCT}, - "NUM": {POS: NUM}, - "AUX": {POS: AUX}, - "X": {POS: X}, - "CONJ": {POS: CONJ}, - "ADJ": {POS: ADJ}, - "VERB": {POS: VERB} -}