mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 10:26:35 +03:00
2e31921d0a
* Add base classes for more languages * Add test for language class initialization Make sure language can be initialize – otherwise, it's difficult to catch serious errors in the test suite, because languages are lazy-loaded
46 lines
303 B
Python
46 lines
303 B
Python
# coding: utf8
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
# Source: https://github.com/stopwords-iso/stopwords-et
|
|
|
|
STOP_WORDS = set(
|
|
"""
|
|
aga
|
|
ei
|
|
et
|
|
ja
|
|
jah
|
|
kas
|
|
kui
|
|
kõik
|
|
ma
|
|
me
|
|
mida
|
|
midagi
|
|
mind
|
|
minu
|
|
mis
|
|
mu
|
|
mul
|
|
mulle
|
|
nad
|
|
nii
|
|
oled
|
|
olen
|
|
oli
|
|
oma
|
|
on
|
|
pole
|
|
sa
|
|
seda
|
|
see
|
|
selle
|
|
siin
|
|
siis
|
|
ta
|
|
te
|
|
ära
|
|
""".split()
|
|
)
|