mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 21:57:15 +03:00
c09d2fa25b
* luganda language extension * __init__.py changes * New enhancements * Lexical attribute changed * punctuaction and sentence additions * Remove comment header * Fix typos, reformat * reformated version * Add tokenizer test * Remove contractions from stop words * Format * Add Luganda to website Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
20 lines
1.3 KiB
Python
20 lines
1.3 KiB
Python
STOP_WORDS = set(
|
|
"""
|
|
abadde abalala abamu abangi abava ajja ali alina ani anti ateekeddwa atewamu
|
|
atya awamu aweebwa ayinza ba baali babadde babalina bajja
|
|
bajjanewankubade bali balina bandi bangi bano bateekeddwa baweebwa bayina bebombi beera bibye
|
|
bimu bingi bino bo bokka bonna buli bulijjo bulungi bwabwe bwaffe bwayo bwe bwonna bya byabwe
|
|
byaffe byebimu byonna ddaa ddala ddi e ebimu ebiri ebweruobulungi ebyo edda ejja ekirala ekyo
|
|
endala engeri ennyo era erimu erina ffe ffenna ga gujja gumu gunno guno gwa gwe kaseera kati
|
|
kennyini ki kiki kikino kikye kikyo kino kirungi kki ku kubangabyombi kubangaolwokuba kudda
|
|
kuva kuwa kwegamba kyaffe kye kyekimuoyo kyekyo kyonna leero liryo lwa lwaki lyabwezaabwe
|
|
lyaffe lyange mbadde mingi mpozzi mu mulinaoyina munda mwegyabwe nolwekyo nabadde nabo nandiyagadde
|
|
nandiye nanti naye ne nedda neera nga nnyingi nnyini nnyinza nnyo nti nyinza nze oba ojja okudda
|
|
okugenda okuggyako okutuusa okuva okuwa oli olina oluvannyuma olwekyobuva omuli ono osobola otya
|
|
oyina oyo seetaaga si sinakindi singa talina tayina tebaali tebaalina tebayina terina tetulina
|
|
tetuteekeddwa tewali teyalina teyayina tolina tu tuyina tulina tuyina twafuna twetaaga wa wabula
|
|
wabweru wadde waggulunnina wakati waliwobangi waliyo wandi wange wano wansi weebwa yabadde yaffe
|
|
ye yenna yennyini yina yonna ziba zijja zonna
|
|
""".split()
|
|
)
|