2018-05-15 01:27:19 +03:00
|
|
|
|
# coding: utf8
|
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
|
STOP_WORDS = set(
|
|
|
|
|
"""
|
2018-05-15 01:27:19 +03:00
|
|
|
|
من
|
|
|
|
|
نحو
|
|
|
|
|
لعل
|
|
|
|
|
بما
|
|
|
|
|
بين
|
|
|
|
|
وبين
|
|
|
|
|
ايضا
|
|
|
|
|
وبينما
|
|
|
|
|
تحت
|
|
|
|
|
مثلا
|
|
|
|
|
لدي
|
|
|
|
|
عنه
|
|
|
|
|
مع
|
|
|
|
|
هي
|
|
|
|
|
وهذا
|
|
|
|
|
واذا
|
|
|
|
|
هذان
|
|
|
|
|
انه
|
|
|
|
|
بينما
|
|
|
|
|
أمسى
|
|
|
|
|
وسوف
|
|
|
|
|
ولم
|
|
|
|
|
لذلك
|
|
|
|
|
إلى
|
|
|
|
|
منه
|
|
|
|
|
منها
|
|
|
|
|
كما
|
|
|
|
|
ظل
|
|
|
|
|
هنا
|
|
|
|
|
به
|
|
|
|
|
كذلك
|
|
|
|
|
اما
|
|
|
|
|
هما
|
|
|
|
|
بعد
|
|
|
|
|
بينهم
|
|
|
|
|
التي
|
|
|
|
|
أبو
|
|
|
|
|
اذا
|
|
|
|
|
بدلا
|
|
|
|
|
لها
|
|
|
|
|
أمام
|
|
|
|
|
يلي
|
|
|
|
|
حين
|
|
|
|
|
ضد
|
|
|
|
|
الذي
|
|
|
|
|
قد
|
|
|
|
|
صار
|
|
|
|
|
إذا
|
|
|
|
|
مابرح
|
|
|
|
|
قبل
|
|
|
|
|
كل
|
|
|
|
|
وليست
|
|
|
|
|
الذين
|
|
|
|
|
لهذا
|
|
|
|
|
وثي
|
|
|
|
|
انهم
|
|
|
|
|
باللتي
|
|
|
|
|
مافتئ
|
|
|
|
|
ولا
|
|
|
|
|
بهذه
|
|
|
|
|
بحيث
|
|
|
|
|
كيف
|
|
|
|
|
وله
|
|
|
|
|
علي
|
|
|
|
|
بات
|
|
|
|
|
لاسيما
|
|
|
|
|
حتى
|
|
|
|
|
وقد
|
|
|
|
|
و
|
|
|
|
|
أما
|
|
|
|
|
فيها
|
|
|
|
|
بهذا
|
|
|
|
|
لذا
|
|
|
|
|
حيث
|
|
|
|
|
لقد
|
|
|
|
|
إن
|
|
|
|
|
فإن
|
|
|
|
|
اول
|
|
|
|
|
ليت
|
|
|
|
|
فاللتي
|
|
|
|
|
ولقد
|
|
|
|
|
لسوف
|
|
|
|
|
هذه
|
|
|
|
|
ولماذا
|
|
|
|
|
معه
|
|
|
|
|
الحالي
|
|
|
|
|
بإن
|
|
|
|
|
حول
|
|
|
|
|
في
|
|
|
|
|
عليه
|
|
|
|
|
مايزال
|
|
|
|
|
ولعل
|
|
|
|
|
أنه
|
|
|
|
|
أضحى
|
|
|
|
|
اي
|
|
|
|
|
ستكون
|
|
|
|
|
لن
|
|
|
|
|
أن
|
|
|
|
|
ضمن
|
|
|
|
|
وعلى
|
|
|
|
|
امسى
|
|
|
|
|
الي
|
|
|
|
|
ذات
|
|
|
|
|
ولايزال
|
|
|
|
|
ذلك
|
|
|
|
|
فقد
|
|
|
|
|
هم
|
|
|
|
|
أي
|
|
|
|
|
عند
|
|
|
|
|
ابن
|
|
|
|
|
أو
|
|
|
|
|
فهو
|
|
|
|
|
فانه
|
|
|
|
|
سوف
|
|
|
|
|
ما
|
|
|
|
|
آل
|
|
|
|
|
كلا
|
|
|
|
|
عنها
|
|
|
|
|
وكذلك
|
|
|
|
|
ليست
|
|
|
|
|
لم
|
|
|
|
|
وأن
|
|
|
|
|
ماذا
|
|
|
|
|
لو
|
|
|
|
|
وهل
|
|
|
|
|
اللتي
|
|
|
|
|
ولذا
|
|
|
|
|
يمكن
|
|
|
|
|
فيه
|
|
|
|
|
الا
|
|
|
|
|
عليها
|
|
|
|
|
وبينهم
|
|
|
|
|
يوم
|
|
|
|
|
وبما
|
|
|
|
|
لما
|
|
|
|
|
فكان
|
|
|
|
|
اضحى
|
|
|
|
|
اصبح
|
|
|
|
|
لهم
|
|
|
|
|
بها
|
|
|
|
|
او
|
|
|
|
|
الذى
|
|
|
|
|
الى
|
|
|
|
|
إلي
|
|
|
|
|
قال
|
|
|
|
|
والتي
|
|
|
|
|
لازال
|
|
|
|
|
أصبح
|
|
|
|
|
ولهذا
|
|
|
|
|
مثل
|
|
|
|
|
وكانت
|
|
|
|
|
لكنه
|
|
|
|
|
بذلك
|
|
|
|
|
هذا
|
|
|
|
|
لماذا
|
|
|
|
|
قالت
|
|
|
|
|
فقط
|
|
|
|
|
لكن
|
|
|
|
|
مما
|
|
|
|
|
وكل
|
|
|
|
|
وان
|
|
|
|
|
وأبو
|
|
|
|
|
ومن
|
|
|
|
|
كان
|
|
|
|
|
مازال
|
|
|
|
|
هل
|
|
|
|
|
بينهن
|
|
|
|
|
هو
|
|
|
|
|
وما
|
|
|
|
|
على
|
|
|
|
|
وهو
|
|
|
|
|
لأن
|
|
|
|
|
واللتي
|
|
|
|
|
والذي
|
|
|
|
|
دون
|
|
|
|
|
عن
|
|
|
|
|
وايضا
|
|
|
|
|
هناك
|
|
|
|
|
بلا
|
|
|
|
|
جدا
|
|
|
|
|
ثم
|
|
|
|
|
منذ
|
|
|
|
|
اللذين
|
|
|
|
|
لايزال
|
|
|
|
|
بعض
|
|
|
|
|
مساء
|
|
|
|
|
تكون
|
|
|
|
|
فلا
|
|
|
|
|
بيننا
|
|
|
|
|
لا
|
|
|
|
|
ولكن
|
|
|
|
|
إذ
|
|
|
|
|
وأثناء
|
|
|
|
|
ليس
|
|
|
|
|
ومع
|
|
|
|
|
فيهم
|
|
|
|
|
ولسوف
|
|
|
|
|
بل
|
|
|
|
|
تلك
|
|
|
|
|
أحد
|
|
|
|
|
وهي
|
|
|
|
|
وكان
|
|
|
|
|
ومنها
|
|
|
|
|
وفي
|
|
|
|
|
ماانفك
|
|
|
|
|
اليوم
|
|
|
|
|
وماذا
|
|
|
|
|
هؤلاء
|
|
|
|
|
وليس
|
|
|
|
|
له
|
|
|
|
|
أثناء
|
|
|
|
|
بد
|
|
|
|
|
اليه
|
|
|
|
|
كأن
|
|
|
|
|
اليها
|
|
|
|
|
بتلك
|
|
|
|
|
يكون
|
|
|
|
|
ولما
|
|
|
|
|
هن
|
|
|
|
|
والى
|
|
|
|
|
كانت
|
|
|
|
|
وقبل
|
|
|
|
|
ان
|
|
|
|
|
لدى
|
2018-06-08 03:33:23 +03:00
|
|
|
|
إذما
|
|
|
|
|
إذن
|
|
|
|
|
أف
|
|
|
|
|
أقل
|
|
|
|
|
أكثر
|
|
|
|
|
ألا
|
|
|
|
|
إلا
|
|
|
|
|
اللاتي
|
|
|
|
|
اللائي
|
|
|
|
|
اللتان
|
|
|
|
|
اللتيا
|
|
|
|
|
اللتين
|
|
|
|
|
اللذان
|
|
|
|
|
اللواتي
|
|
|
|
|
إليك
|
|
|
|
|
إليكم
|
|
|
|
|
إليكما
|
|
|
|
|
إليكن
|
|
|
|
|
أم
|
|
|
|
|
أما
|
|
|
|
|
إما
|
|
|
|
|
إنا
|
|
|
|
|
أنا
|
|
|
|
|
أنت
|
|
|
|
|
أنتم
|
|
|
|
|
أنتما
|
|
|
|
|
أنتن
|
|
|
|
|
إنما
|
|
|
|
|
إنه
|
|
|
|
|
أنى
|
|
|
|
|
أنى
|
|
|
|
|
آه
|
|
|
|
|
آها
|
|
|
|
|
أولاء
|
|
|
|
|
أولئك
|
|
|
|
|
أوه
|
|
|
|
|
آي
|
|
|
|
|
أيها
|
|
|
|
|
إي
|
|
|
|
|
أين
|
|
|
|
|
أين
|
|
|
|
|
أينما
|
|
|
|
|
إيه
|
|
|
|
|
بخ
|
|
|
|
|
بس
|
|
|
|
|
بك
|
|
|
|
|
بكم
|
|
|
|
|
بكم
|
|
|
|
|
بكما
|
|
|
|
|
بكن
|
|
|
|
|
بلى
|
|
|
|
|
بماذا
|
|
|
|
|
بمن
|
|
|
|
|
بنا
|
|
|
|
|
بهم
|
|
|
|
|
بهما
|
|
|
|
|
بهن
|
|
|
|
|
بي
|
|
|
|
|
بيد
|
|
|
|
|
تلكم
|
|
|
|
|
تلكما
|
|
|
|
|
ته
|
|
|
|
|
تي
|
|
|
|
|
تين
|
|
|
|
|
تينك
|
|
|
|
|
ثمة
|
|
|
|
|
حاشا
|
|
|
|
|
حبذا
|
|
|
|
|
حيثما
|
|
|
|
|
خلا
|
|
|
|
|
ذا
|
|
|
|
|
ذاك
|
|
|
|
|
ذان
|
|
|
|
|
ذانك
|
|
|
|
|
ذلكم
|
|
|
|
|
ذلكما
|
|
|
|
|
ذلكن
|
|
|
|
|
ذه
|
|
|
|
|
ذو
|
|
|
|
|
ذوا
|
|
|
|
|
ذواتا
|
|
|
|
|
ذواتي
|
|
|
|
|
ذي
|
|
|
|
|
ذين
|
|
|
|
|
ذينك
|
|
|
|
|
ريث
|
|
|
|
|
سوى
|
|
|
|
|
شتان
|
|
|
|
|
عدا
|
|
|
|
|
عسى
|
|
|
|
|
عل
|
|
|
|
|
عليك
|
|
|
|
|
عما
|
|
|
|
|
غير
|
|
|
|
|
فإذا
|
|
|
|
|
فمن
|
|
|
|
|
فيم
|
|
|
|
|
فيما
|
|
|
|
|
كأنما
|
|
|
|
|
كأي
|
|
|
|
|
كأين
|
|
|
|
|
كذا
|
|
|
|
|
كلاهما
|
|
|
|
|
كلتا
|
|
|
|
|
كلما
|
|
|
|
|
كليكما
|
|
|
|
|
كليهما
|
|
|
|
|
كم
|
|
|
|
|
كم
|
|
|
|
|
كي
|
|
|
|
|
كيت
|
|
|
|
|
كيفما
|
|
|
|
|
لست
|
|
|
|
|
لستم
|
|
|
|
|
لستما
|
|
|
|
|
لستن
|
|
|
|
|
لسن
|
|
|
|
|
لسنا
|
|
|
|
|
لك
|
|
|
|
|
لكم
|
|
|
|
|
لكما
|
|
|
|
|
لكنما
|
|
|
|
|
لكي
|
|
|
|
|
لكيلا
|
|
|
|
|
لنا
|
|
|
|
|
لهما
|
|
|
|
|
لهن
|
|
|
|
|
لولا
|
|
|
|
|
لوما
|
|
|
|
|
لي
|
|
|
|
|
لئن
|
|
|
|
|
ليسا
|
|
|
|
|
ليستا
|
|
|
|
|
ليسوا
|
|
|
|
|
متى
|
|
|
|
|
مذ
|
|
|
|
|
ممن
|
|
|
|
|
مه
|
|
|
|
|
مهما
|
|
|
|
|
نحن
|
|
|
|
|
نعم
|
|
|
|
|
ها
|
|
|
|
|
هاتان
|
|
|
|
|
هاته
|
|
|
|
|
هاتي
|
|
|
|
|
هاتين
|
|
|
|
|
هاك
|
|
|
|
|
هاهنا
|
|
|
|
|
هذي
|
|
|
|
|
هذين
|
|
|
|
|
هكذا
|
|
|
|
|
هلا
|
|
|
|
|
هنالك
|
|
|
|
|
هيا
|
|
|
|
|
هيت
|
|
|
|
|
هيهات
|
|
|
|
|
والذين
|
|
|
|
|
وإذ
|
|
|
|
|
وإذا
|
|
|
|
|
وإن
|
|
|
|
|
ولو
|
|
|
|
|
يا
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
|
""".split()
|
|
|
|
|
)
|