From e9af79a80382639e5fa0a61687c655fe500c110e Mon Sep 17 00:00:00 2001 From: Jim Geovedi Date: Sun, 30 Jul 2017 21:23:01 +0700 Subject: [PATCH] added u-\d+ rules (sports team) --- spacy/lang/id/tokenizer_exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/lang/id/tokenizer_exceptions.py b/spacy/lang/id/tokenizer_exceptions.py index 5d5a6fac5..57135b4d5 100644 --- a/spacy/lang/id/tokenizer_exceptions.py +++ b/spacy/lang/id/tokenizer_exceptions.py @@ -36,7 +36,7 @@ amir an antar anti ar as ash asy at ath az bekas ber best bi co di double dual duo e eco eks el era ex full hi high i in inter intra ke kontra korona kuartal lintas m macro makro me mem meng micro mid mikro mini multi neo nge no non on pan pasca pe pem poli poly post pra pre pro re se self serba seri -sub super t trans ultra un x""".split() +sub super t trans u ultra un x \#""".split() _hyphen_infix = """ber-an berke-an de-isasi di-kan di-kannya di-nya ke-an ke-annya me-kan me-kannya men-kan men-kannya meng-kannya pe-an pen-an