diff --git a/.github/contributors/fucking-signup.md b/.github/contributors/fucking-signup.md new file mode 100644 index 000000000..b34e23e00 --- /dev/null +++ b/.github/contributors/fucking-signup.md @@ -0,0 +1,106 @@ +# spaCy contributor agreement + +This spaCy Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term +**"you"** shall mean the person or entity identified below. + +If you agree to be bound by these terms, fill in the information requested +below and include the filled-in version with your first pull request, under the +folder [`.github/contributors/`](/.github/contributors/). The name of the file +should be your GitHub username, with the extension `.md`. For example, the user +example_user would create the file `.github/contributors/example_user.md`. + +Read this agreement carefully before signing. These terms and conditions +constitute a binding legal agreement. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. We may publicly disclose your + participation in the project, including the fact that you have signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [x] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect to my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | -------------------- | +| Name | Kit | +| Company name (if applicable) | - | +| Title or role (if applicable) | - | +| Date | 2018/01/08 | +| GitHub username | fucking-signup | +| Website (optional) | - | diff --git a/spacy/lang/da/lex_attrs.py b/spacy/lang/da/lex_attrs.py index 8152ad259..ec60c5132 100644 --- a/spacy/lang/da/lex_attrs.py +++ b/spacy/lang/da/lex_attrs.py @@ -41,9 +41,9 @@ def like_num(text): num, denom = text.split('/') if num.isdigit() and denom.isdigit(): return True - if text in _num_words: + if text.lower() in _num_words: return True - if text in _ordinal_words: + if text.lower() in _ordinal_words: return True return False diff --git a/spacy/lang/en/lex_attrs.py b/spacy/lang/en/lex_attrs.py index 471f61977..5af136449 100644 --- a/spacy/lang/en/lex_attrs.py +++ b/spacy/lang/en/lex_attrs.py @@ -20,7 +20,7 @@ def like_num(text): num, denom = text.split('/') if num.isdigit() and denom.isdigit(): return True - if text in _num_words: + if text.lower() in _num_words: return True return False diff --git a/spacy/lang/fr/lex_attrs.py b/spacy/lang/fr/lex_attrs.py index 41c509dff..5cf7170b9 100644 --- a/spacy/lang/fr/lex_attrs.py +++ b/spacy/lang/fr/lex_attrs.py @@ -31,7 +31,9 @@ def like_num(text): num, denom = text.split('/') if num.isdigit() and denom.isdigit(): return True - if text in _num_words: + if text.lower() in _num_words: + return True + if text.lower() in _ordinal_words: return True return False diff --git a/spacy/lang/id/lex_attrs.py b/spacy/lang/id/lex_attrs.py index fb6a31f99..235cee438 100644 --- a/spacy/lang/id/lex_attrs.py +++ b/spacy/lang/id/lex_attrs.py @@ -27,7 +27,7 @@ def like_num(text): num, denom = text.split('/') if num.isdigit() and denom.isdigit(): return True - if text in _num_words: + if text.lower() in _num_words: return True if text.count('-') == 1: _, num = text.split('-') diff --git a/spacy/lang/nl/lex_attrs.py b/spacy/lang/nl/lex_attrs.py index 08b1df3be..ba391e0b2 100644 --- a/spacy/lang/nl/lex_attrs.py +++ b/spacy/lang/nl/lex_attrs.py @@ -30,7 +30,9 @@ def like_num(text): num, denom = text.split('/') if num.isdigit() and denom.isdigit(): return True - if text in _num_words: + if text.lower() in _num_words: + return True + if text.lower() in _ordinal_words: return True return False diff --git a/spacy/lang/pt/lex_attrs.py b/spacy/lang/pt/lex_attrs.py index fa80cde73..4f1b066a3 100644 --- a/spacy/lang/pt/lex_attrs.py +++ b/spacy/lang/pt/lex_attrs.py @@ -11,13 +11,13 @@ _num_words = ['zero', 'um', 'dois', 'três', 'quatro', 'cinco', 'seis', 'sete', 'oitenta', 'noventa', 'cem', 'mil', 'milhão', 'bilião', 'trilião', 'quadrilião'] -_ord_words = ['primeiro', 'segundo', 'terceiro', 'quarto', 'quinto', 'sexto', - 'sétimo', 'oitavo', 'nono', 'décimo', 'vigésimo', 'trigésimo', - 'quadragésimo', 'quinquagésimo', 'sexagésimo', 'septuagésimo', - 'octogésimo', 'nonagésimo', 'centésimo', 'ducentésimo', - 'trecentésimo', 'quadringentésimo', 'quingentésimo', 'sexcentésimo', - 'septingentésimo', 'octingentésimo', 'nongentésimo', 'milésimo', - 'milionésimo', 'bilionésimo'] +_ordinal_words = ['primeiro', 'segundo', 'terceiro', 'quarto', 'quinto', 'sexto', + 'sétimo', 'oitavo', 'nono', 'décimo', 'vigésimo', 'trigésimo', + 'quadragésimo', 'quinquagésimo', 'sexagésimo', 'septuagésimo', + 'octogésimo', 'nonagésimo', 'centésimo', 'ducentésimo', + 'trecentésimo', 'quadringentésimo', 'quingentésimo', 'sexcentésimo', + 'septingentésimo', 'octingentésimo', 'nongentésimo', 'milésimo', + 'milionésimo', 'bilionésimo'] def like_num(text): @@ -28,7 +28,9 @@ def like_num(text): num, denom = text.split('/') if num.isdigit() and denom.isdigit(): return True - if text in _num_words: + if text.lower() in _num_words: + return True + if text.lower() in _ordinal_words: return True return False diff --git a/spacy/lang/ru/lex_attrs.py b/spacy/lang/ru/lex_attrs.py index e44525743..27ae76326 100644 --- a/spacy/lang/ru/lex_attrs.py +++ b/spacy/lang/ru/lex_attrs.py @@ -25,7 +25,7 @@ def like_num(text): num, denom = text.split('/') if num.isdigit() and denom.isdigit(): return True - if text in _num_words: + if text.lower() in _num_words: return True return False diff --git a/spacy/tests/regression/test_issue1769.py b/spacy/tests/regression/test_issue1769.py new file mode 100644 index 000000000..5ce228189 --- /dev/null +++ b/spacy/tests/regression/test_issue1769.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ...lang.da.lex_attrs import like_num as da_like_num +# from ...lang.en.lex_attrs import like_num as en_like_num +# from ...lang.fr.lex_attrs import like_num as fr_like_num +# from ...lang.id.lex_attrs import like_num as id_like_num +# from ...lang.nl.lex_attrs import like_num as nl_like_num +# from ...lang.pt.lex_attrs import like_num as pt_like_num +# from ...lang.ru.lex_attrs import like_num as ru_like_num + +import pytest + + +@pytest.mark.parametrize('num_words', ['elleve', 'ELLEVE']) +@pytest.mark.parametrize('ordinal_words', ['første', 'FØRSTE']) +def test_da_lex_attrs(num_words, ordinal_words): + assert da_like_num(num_words) == True + assert da_like_num(ordinal_words) == True + + +# @pytest.mark.parametrize('num_words', ['eleven', 'ELEVEN']) +# def test_en_lex_attrs(num_words): +# assert en_like_num(num_words) == True +# +# +# @pytest.mark.parametrize('num_words', ['onze', 'ONZE']) +# @pytest.mark.parametrize('ordinal_words', ['onzième', 'ONZIÈME']) +# def test_fr_lex_attrs(num_words, ordinal_words): +# assert fr_like_num(num_words) == True +# assert fr_like_num(ordinal_words) == True +# +# +# @pytest.mark.parametrize('num_words', ['sebelas', 'SEBELAS']) +# def test_id_lex_attrs(num_words): +# assert id_like_num(num_words) == True +# +# +# @pytest.mark.parametrize('num_words', ['elf', 'ELF']) +# @pytest.mark.parametrize('ordinal_words', ['elfde', 'ELFDE']) +# def test_nl_lex_attrs(num_words, ordinal_words): +# assert nl_like_num(num_words) == True +# assert nl_like_num(ordinal_words) == True +# +# +# @pytest.mark.parametrize('num_words', ['onze', 'ONZE']) +# @pytest.mark.parametrize('ordinal_words', ['quadragésimo', 'QUADRAGÉSIMO']) +# def test_pt_lex_attrs(num_words, ordinal_words): +# assert pt_like_num(num_words) == True +# assert pt_like_num(ordinal_words) == True +# +# +# @pytest.mark.parametrize('num_words', ['одиннадцать', 'ОДИННАДЦАТЬ']) +# def test_ru_lex_attrs(num_words): +# assert ru_like_num(num_words) == True diff --git a/website/usage/_adding-languages/_language-data.jade b/website/usage/_adding-languages/_language-data.jade index 1b0ed241a..eedf343e6 100644 --- a/website/usage/_adding-languages/_language-data.jade +++ b/website/usage/_adding-languages/_language-data.jade @@ -394,7 +394,7 @@ p num, denom = text.split('/') if num.isdigit() and denom.isdigit(): return True - if text in _num_words: + if text.lower() in _num_words: return True return False