From 98c8e70dc27010902638e38a56b058cc15942a2c Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sun, 6 Nov 2016 13:46:11 +0100 Subject: [PATCH 01/23] Update installation docs --- website/docs/usage/index.jade | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/website/docs/usage/index.jade b/website/docs/usage/index.jade index a5a28ccda..a4bb29d06 100644 --- a/website/docs/usage/index.jade +++ b/website/docs/usage/index.jade @@ -69,6 +69,11 @@ p | including the so-called "Command Line Tools". macOS and OS X ship with | Python and git preinstalled. +p + | To compile spaCy with multi-threading support on macOS / OS X, + | #[+a("https://github.com/explosion/spaCy/issues/267") see here]. + + +h(3, "source-windows") Windows p From d5668cf0d2e98344e7fd0288e362e35e26c26a88 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sun, 6 Nov 2016 13:46:20 +0100 Subject: [PATCH 02/23] Add spacy-api-docker to showcase --- website/docs/usage/_data.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/website/docs/usage/_data.json b/website/docs/usage/_data.json index c67119796..200e22e9a 100644 --- a/website/docs/usage/_data.json +++ b/website/docs/usage/_data.json @@ -84,6 +84,11 @@ "author": "Wah Loon Keng", "description": "Expose spaCy NLP text parsing to Node.js (and other languages) via Socket.IO." }, + "spacy-api-docker": { + "url": "https://github.com/jgontrum/spacy-api-docker", + "author": "Johannes Gontrum", + "description": "spaCy accessed by a REST API, wrapped in a Docker container." + }, "textacy": { "url": "https://github.com/chartbeat-labs/textacy", "author": " Burton DeWilde (Chartbeat)", From 4352371b3644520cb337afcfb2137174312d402b Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 7 Nov 2016 02:13:45 +0100 Subject: [PATCH 03/23] =?UTF-8?q?=F0=9F=94=B4=20Fix=20bug=20that=20would?= =?UTF-8?q?=20prevent=20rendering=20of=20robots.txt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- website/_data.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/_data.json b/website/_data.json index cf203caf9..525c70d80 100644 --- a/website/_data.json +++ b/website/_data.json @@ -25,7 +25,7 @@ ] }, - "robots": { + "robots.txt": { "layout": false }, From f91bf4d59c1320b66d4931992fc5a4f3674b7d03 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 7 Nov 2016 02:14:26 +0100 Subject: [PATCH 04/23] Add right margin to tags to make them usable inline --- website/assets/css/_base/_utilities.sass | 1 + 1 file changed, 1 insertion(+) diff --git a/website/assets/css/_base/_utilities.sass b/website/assets/css/_base/_utilities.sass index dfe514e80..eb1af1f13 100644 --- a/website/assets/css/_base/_utilities.sass +++ b/website/assets/css/_base/_utilities.sass @@ -39,6 +39,7 @@ &.u-text-tag--spaced margin-left: 0.75em + margin-right: 0.5em //- Headings From 3502654551079fb734492d77e03df2b6d1499f4a Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 7 Nov 2016 02:14:43 +0100 Subject: [PATCH 05/23] Add option for "latest news" on landing page --- website/index.jade | 3 +++ 1 file changed, 3 insertions(+) diff --git a/website/index.jade b/website/index.jade index 024e7dfb9..5a126c8b3 100644 --- a/website/index.jade +++ b/website/index.jade @@ -59,6 +59,9 @@ include _includes/_mixins strong.u-text-label.u-color-subtle #[+icon("code", 18)] Latest release: | v#{SPACY_VERSION} + if LATEST_NEWS + +a(LATEST_NEWS.url) #[+tag.o-icon New!] #{LATEST_NEWS.title} + .o-content +grid +grid-col("two-thirds") From da52bcf0801847790677b91f4753de66b7c6d651 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 7 Nov 2016 02:17:48 +0100 Subject: [PATCH 06/23] Patch version --- website/_harp.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/_harp.json b/website/_harp.json index a1bc2755e..07a7e93c0 100644 --- a/website/_harp.json +++ b/website/_harp.json @@ -47,7 +47,7 @@ } }, - "V_CSS": "1.8", + "V_CSS": "1.9", "V_JS": "1.0", "DEFAULT_SYNTAX" : "python", "ANALYTICS": "UA-58931649-1", From 418c084f12433ab8573a371bb88ec560ec7c7fa6 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 7 Nov 2016 02:18:36 +0100 Subject: [PATCH 07/23] Replace "" with false to prevent rending of empty attributes --- website/_includes/_footer.jade | 2 +- website/_includes/_mixins.jade | 6 +++--- website/_includes/_navigation.jade | 4 ++-- website/_includes/_sidebar.jade | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/website/_includes/_footer.jade b/website/_includes/_footer.jade index 8f11c2c80..e49ba309f 100644 --- a/website/_includes/_footer.jade +++ b/website/_includes/_footer.jade @@ -11,7 +11,7 @@ footer.o-footer.u-text.u-border-dotted each url, item in group li - +a(url)(target=url.includes("http") ? "_blank" : "")=item + +a(url)(target=url.includes("http") ? "_blank" : false)=item if SECTION != "docs" +grid-col("quarter") diff --git a/website/_includes/_mixins.jade b/website/_includes/_mixins.jade index 416eb3966..030e9a776 100644 --- a/website/_includes/_mixins.jade +++ b/website/_includes/_mixins.jade @@ -20,7 +20,7 @@ mixin h(level, id) info: https://mathiasbynens.github.io/rel-noopener/ mixin a(url, trusted) - a(href=url target="_blank" rel=(!trusted) ? "noopener nofollow" : "")&attributes(attributes) + a(href=url target="_blank" rel=!trusted ? "noopener nofollow" : false)&attributes(attributes) block //- Source link (with added icon for "code") @@ -72,7 +72,7 @@ mixin aside-code(label, language) see assets/css/_components/_buttons.sass mixin button(url, trusted, ...style) - a.c-button.u-text-label(href=url class=prefixArgs(style, "c-button") role="button" target="_blank" rel=(!trusted) ? "noopener nofollow" : "")&attributes(attributes) + a.c-button.u-text-label(href=url class=prefixArgs(style, "c-button") role="button" target="_blank" rel=!trusted ? "noopener nofollow" : false)&attributes(attributes) block @@ -133,7 +133,7 @@ mixin tag() mixin list(type, start) if type - ol.c-list.o-block.u-text(class="c-list--#{type}" style=(start === 0 || start) ? "counter-reset: li #{(start - 1)}" : "")&attributes(attributes) + ol.c-list.o-block.u-text(class="c-list--#{type}" style=(start === 0 || start) ? "counter-reset: li #{(start - 1)}" : false)&attributes(attributes) block else diff --git a/website/_includes/_navigation.jade b/website/_includes/_navigation.jade index e2b18aa4b..881a5db56 100644 --- a/website/_includes/_navigation.jade +++ b/website/_includes/_navigation.jade @@ -2,7 +2,7 @@ include _mixins -nav.c-nav.u-text.js-nav(class=landing ? "c-nav--theme" : "") +nav.c-nav.u-text.js-nav(class=landing ? "c-nav--theme" : false) a(href='/') #[+logo] if SUBSECTION != "index" @@ -11,7 +11,7 @@ nav.c-nav.u-text.js-nav(class=landing ? "c-nav--theme" : "") ul.c-nav__menu each url, item in NAVIGATION li.c-nav__menu__item - a(href=url target=url.includes("http") ? "_blank" : "")=item + a(href=url target=url.includes("http") ? "_blank" : false)=item li.c-nav__menu__item +a(gh("spaCy"))(aria-label="GitHub").u-hidden-xs #[+icon("github", 20)] diff --git a/website/_includes/_sidebar.jade b/website/_includes/_sidebar.jade index fb695e369..a0d4d4cd3 100644 --- a/website/_includes/_sidebar.jade +++ b/website/_includes/_sidebar.jade @@ -9,5 +9,5 @@ menu.c-sidebar.js-sidebar.u-text li.u-text-label.u-color-subtle=menu each url, item in items - li(class=(CURRENT == url || (CURRENT == "index" && url == "./")) ? "is-active" : "") - +a(url)(target=url.includes("http") ? "_blank" : "")=item + li(class=(CURRENT == url || (CURRENT == "index" && url == "./")) ? "is-active" : false) + +a(url)(target=url.includes("http") ? "_blank" : false)=item From bf3c1c7a48c00be7c7a926af2e0697ba1d1e90ec Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 7 Nov 2016 21:32:03 +0100 Subject: [PATCH 08/23] Add link to dependency parse workflow --- website/docs/usage/dependency-parse.jade | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/website/docs/usage/dependency-parse.jade b/website/docs/usage/dependency-parse.jade index 28ab62d77..92594ee04 100644 --- a/website/docs/usage/dependency-parse.jade +++ b/website/docs/usage/dependency-parse.jade @@ -28,9 +28,10 @@ p p | The best way to understand spaCy's dependency parser is interactively, - | through the displacy visualizer. If you want to know how to write rules - | that hook into some type of syntactic construction, just plug the - | sentence into the visualizer and see how spaCy annotates it. + | through the #[+a(DEMOS_URL + "/displacy") displaCy visualizer]. If you + | want to know how to write rules that hook into some type of syntactic + | construction, just plug the sentence into the visualizer and see how + | spaCy annotates it. +h(2, "navigating") Navigating the parse tree From 0a90b141f4bb1554bc94f3bb0663f4b43d4bcb7c Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 7 Nov 2016 21:50:40 +0100 Subject: [PATCH 09/23] Trust link --- website/docs/usage/dependency-parse.jade | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/docs/usage/dependency-parse.jade b/website/docs/usage/dependency-parse.jade index 92594ee04..597e73c71 100644 --- a/website/docs/usage/dependency-parse.jade +++ b/website/docs/usage/dependency-parse.jade @@ -28,8 +28,8 @@ p p | The best way to understand spaCy's dependency parser is interactively, - | through the #[+a(DEMOS_URL + "/displacy") displaCy visualizer]. If you - | want to know how to write rules that hook into some type of syntactic + | through the #[+a(DEMOS_URL + "/displacy", true) displaCy visualizer]. If + | you want to know how to write rules that hook into some type of syntactic | construction, just plug the sentence into the visualizer and see how | spaCy annotates it. From 8abc2084ff465a31b3f698c1899c0c180518ce9f Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 7 Nov 2016 22:48:35 +0100 Subject: [PATCH 10/23] Add user survey results as latest news --- website/_harp.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/website/_harp.json b/website/_harp.json index 07a7e93c0..c9cd5f02b 100644 --- a/website/_harp.json +++ b/website/_harp.json @@ -11,7 +11,12 @@ "COMPANY": "Explosion AI", "COMPANY_URL": "https://explosion.ai", "DEMOS_URL": "https://demos.explosion.ai", + "SPACY_VERSION": "1.2", + "LATEST_NEWS": { + "url": "https://explosion.ai/blog/spacy-user-survey", + "title": "The results of the spaCy user survey" + }, "SOCIAL": { "twitter": "spacy_io", From 660c3268c1c8bc7f1f8a390f28fbf06f9745cede Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 8 Nov 2016 00:04:46 +0100 Subject: [PATCH 11/23] Mention regression tests in "Issues & bug reports" --- CONTRIBUTING.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 739a5a659..2e665fd76 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,6 +20,10 @@ If you're looking for help with your code, consider posting a question on [Stack When opening an issue, use a descriptive title and include your environment (operating system, Python version, spaCy version). Our [issue template](https://github.com/explosion/spaCy/issues/new) helps you remember the most important details to include. +If you've discovered a bug, you can also submit a [regression test](#fixing-bugs) straight away. When you're opening an issue to report the bug, simply refer to your pull request in the issue body. + +### Issue labels + We use the following system to tag our issues: | Issue label | Description | From 86c056ba649f876982c1a35218326360cccbfba8 Mon Sep 17 00:00:00 2001 From: Dmitry Sadovnychyi Date: Wed, 9 Nov 2016 00:10:32 +0800 Subject: [PATCH 12/23] Add basic test for PhraseMatcher #613 --- spacy/tests/test_matcher.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/spacy/tests/test_matcher.py b/spacy/tests/test_matcher.py index 9f04a7a0d..903328981 100644 --- a/spacy/tests/test_matcher.py +++ b/spacy/tests/test_matcher.py @@ -87,6 +87,13 @@ def test_match_zero_plus(matcher): assert len(matcher(doc)) == 1 +def test_phrase_matcher(): + vocab = Vocab(lex_attr_getters=English.Defaults.lex_attr_getters) + matcher = PhraseMatcher(vocab, [Doc(vocab, words='Google Now'.split())]) + doc = Doc(vocab, words=['I', 'like', 'Google', 'Now', 'best']) + assert len(matcher(doc)) == 1 + + #@pytest.mark.models #def test_match_preserved(EN): # patterns = { From 9488222e799357337e6623be6ecea7abc45ea91b Mon Sep 17 00:00:00 2001 From: Dmitry Sadovnychyi Date: Wed, 9 Nov 2016 00:14:26 +0800 Subject: [PATCH 13/23] Fix PhraseMatcher to work with updated Matcher #613 --- spacy/matcher.pyx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/spacy/matcher.pyx b/spacy/matcher.pyx index 7c694431e..72a4a97d6 100644 --- a/spacy/matcher.pyx +++ b/spacy/matcher.pyx @@ -142,7 +142,7 @@ def _convert_strings(token_specs, string_store): tokens = [] op = ONE for spec in token_specs: - token = [] + token = [] ops = (ONE,) for attr, value in spec.items(): if isinstance(attr, basestring) and attr.upper() == 'OP': @@ -173,7 +173,7 @@ cdef class Matcher: cdef public object _entities cdef public object _callbacks cdef public object _acceptors - + @classmethod def load(cls, path, vocab): '''Load the matcher and patterns from a file path. @@ -218,7 +218,7 @@ cdef class Matcher: def __reduce__(self): return (self.__class__, (self.vocab, self._patterns), None, None) - + property n_patterns: def __get__(self): return self.patterns.size() @@ -492,14 +492,14 @@ cdef class PhraseMatcher: abstract_patterns = [] for length in range(1, max_length): abstract_patterns.append([{tag: True} for tag in get_bilou(length)]) - self.matcher.add('Candidate', 'MWE', {}, abstract_patterns) + self.matcher.add('Candidate', 'MWE', {}, abstract_patterns, acceptor=self.accept_match) def add(self, Doc tokens): cdef int length = tokens.length assert length < self.max_length tags = get_bilou(length) assert len(tags) == length, length - + cdef int i for i in range(self.max_length): self._phrase_key[i] = 0 @@ -512,7 +512,7 @@ cdef class PhraseMatcher: def __call__(self, Doc doc): matches = [] - for label, start, end in self.matcher(doc, acceptor=self.accept_match): + for ent_id, label, start, end in self.matcher(doc): cand = doc[start : end] start = cand[0].idx end = cand[-1].idx + len(cand[-1]) @@ -526,7 +526,7 @@ cdef class PhraseMatcher: self(doc) yield doc - def accept_match(self, Doc doc, int label, int start, int end): + def accept_match(self, Doc doc, int ent_id, int label, int start, int end): assert (end - start) < self.max_length cdef int i, j for i in range(self.max_length): @@ -535,6 +535,6 @@ cdef class PhraseMatcher: self._phrase_key[i] = doc.c[j].lex.orth cdef hash_t key = hash64(self._phrase_key, self.max_length * sizeof(attr_t), 0) if self.phrase_ids.get(key): - return True + return (ent_id, label, start, end) else: return False From bb33c30f772ab6b6b28e2a193039e6c2cb68198d Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 8 Nov 2016 17:36:41 +0100 Subject: [PATCH 14/23] Update CONTRIBUTORS.md --- CONTRIBUTORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 975de5c45..184d86686 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -6,6 +6,7 @@ This is a list of everyone who has made significant contributions to spaCy, in a * Andreas Grivas, [@andreasgrv](https://github.com/andreasgrv) * Chris DuBois, [@chrisdubois](https://github.com/chrisdubois) * Christoph Schwienheer, [@chssch](https://github.com/chssch) +* Dmytro Sadovnychyi, [@sadovnychyi](https://github.com/sadovnychyi) * Henning Peters, [@henningpeters](https://github.com/henningpeters) * Ines Montani, [@ines](https://github.com/ines) * J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading) From b38cfd0ef93cf7eb8a863b028ae2ce3ca91b0906 Mon Sep 17 00:00:00 2001 From: tiago Date: Wed, 9 Nov 2016 14:58:19 +0000 Subject: [PATCH 15/23] now span.merge returns token like it says on documentation --- spacy/tokens/span.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index c55de3c3e..e645c1a6f 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -99,7 +99,7 @@ cdef class Span: token (Token): The newly merged token. """ - self.doc.merge(self.start_char, self.end_char, *args, **attributes) + return self.doc.merge(self.start_char, self.end_char, *args, **attributes) def similarity(self, other): '''Make a semantic similarity estimate. The default estimate is cosine From 3005f76652bcc70ee17341d1ef575e934c19ea44 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 9 Nov 2016 16:23:55 +0100 Subject: [PATCH 16/23] Create CONTRIBUTOR_AGREEMENT.md --- .github/CONTRIBUTOR_AGREEMENT.md | 107 +++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 .github/CONTRIBUTOR_AGREEMENT.md diff --git a/.github/CONTRIBUTOR_AGREEMENT.md b/.github/CONTRIBUTOR_AGREEMENT.md new file mode 100644 index 000000000..1ab3af38e --- /dev/null +++ b/.github/CONTRIBUTOR_AGREEMENT.md @@ -0,0 +1,107 @@ +# spaCy contributor agreement + +This spaCy Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +[ExplosionAI UG (haftungsbeschränkt)](https://explosion.ai/legal). The term +**"you"** shall mean the person or entity identified below. + +If you agree to be bound by these terms, fill in the information requested +below and include the filled-in version with your first pull request, under the +folder [`/.github/contributors/`](/.github/contributors/). The name of the file +should be your GitHub username, with the extension `.md`. For example, the user +example_user would create the file `/.github/contributors/example_user.md`. + +Read this agreement carefully before signing. These terms and conditions +constitute a binding legal agreement. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. ExplosionAI UG + (haftungsbeschränkt) may publicly disclose your participation in the + project, including the fact that you have signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [ ] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | -------------------- | +| Name | | +| Company's name (if applicable) | | +| Title or Role (if applicable) | | +| Date | | +| GitHub username | | +| Website (optional) | | From 8239878dce4b7ff50cd606d0bfd6e38911d8c126 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 9 Nov 2016 16:30:32 +0100 Subject: [PATCH 17/23] Update CONTRIBUTOR_AGREEMENT.md --- .github/CONTRIBUTOR_AGREEMENT.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/CONTRIBUTOR_AGREEMENT.md b/.github/CONTRIBUTOR_AGREEMENT.md index 1ab3af38e..db0e7b0f2 100644 --- a/.github/CONTRIBUTOR_AGREEMENT.md +++ b/.github/CONTRIBUTOR_AGREEMENT.md @@ -78,9 +78,8 @@ took place before the date you sign these terms. * each contribution shall be in compliance with U.S. export control laws and other applicable export and import laws. You agree to notify us if you become aware of any circumstance which would make any of the foregoing - representations inaccurate in any respect. ExplosionAI UG - (haftungsbeschränkt) may publicly disclose your participation in the - project, including the fact that you have signed the SCA. + representations inaccurate in any respect. We may publicly disclose your + participation in the project, including the fact that you have signed the SCA. 6. This SCA is governed by the laws of the State of California and applicable U.S. Federal law. Any choice of law rules will not apply. From 592d244484fd9e93ba814dce4f087831ade117b3 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 9 Nov 2016 16:41:58 +0100 Subject: [PATCH 18/23] Re-add existing contributor agreements --- .github/contributors/NSchrading.md | 98 +++++++++++++++++++++++++++++ .github/contributors/chrisdubois.md | 98 +++++++++++++++++++++++++++++ .github/contributors/suchow.md | 98 +++++++++++++++++++++++++++++ .github/contributors/vsolovyov.md | 98 +++++++++++++++++++++++++++++ 4 files changed, 392 insertions(+) create mode 100644 .github/contributors/NSchrading.md create mode 100644 .github/contributors/chrisdubois.md create mode 100644 .github/contributors/suchow.md create mode 100644 .github/contributors/vsolovyov.md diff --git a/.github/contributors/NSchrading.md b/.github/contributors/NSchrading.md new file mode 100644 index 000000000..304a03a86 --- /dev/null +++ b/.github/contributors/NSchrading.md @@ -0,0 +1,98 @@ +# Syllogism contributor agreement + +This Syllogism Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +Syllogism Co. The term **"you"** shall mean the person or entity identified +below. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. Syllogism Co. may publicly + disclose your participation in the project, including the fact that you have + signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [x] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | -------------------- | +| Name | J Nicolas Schrading | +| Company's name (if applicable) | | +| Title or Role (if applicable) | | +| Date | 2015-08-24 | +| GitHub username | NSchrading | +| Website (optional) | nicschrading.com | diff --git a/.github/contributors/chrisdubois.md b/.github/contributors/chrisdubois.md new file mode 100644 index 000000000..beac1c502 --- /dev/null +++ b/.github/contributors/chrisdubois.md @@ -0,0 +1,98 @@ +# Syllogism contributor agreement + +This Syllogism Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +Syllogism Co. The term **"you"** shall mean the person or entity identified +below. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. Syllogism Co. may publicly + disclose your participation in the project, including the fact that you have + signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [x] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | -------------------- | +| Name | Chris DuBois | +| Company's name (if applicable) | | +| Title or Role (if applicable) | | +| Date | 2015.10.07 | +| GitHub username | chrisdubois | +| Website (optional) | | diff --git a/.github/contributors/suchow.md b/.github/contributors/suchow.md new file mode 100644 index 000000000..b1d5ed000 --- /dev/null +++ b/.github/contributors/suchow.md @@ -0,0 +1,98 @@ +# Syllogism contributor agreement + +This Syllogism Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +Syllogism Co. The term **"you"** shall mean the person or entity identified +below. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. Syllogism Co. may publicly + disclose your participation in the project, including the fact that you have + signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [x] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | -------------------- | +| Name | Jordan Suchow | +| Company's name (if applicable) | | +| Title or Role (if applicable) | | +| Date | 2015-04-19 | +| GitHub username | suchow | +| Website (optional) | http://suchow.io | diff --git a/.github/contributors/vsolovyov.md b/.github/contributors/vsolovyov.md new file mode 100644 index 000000000..1ef72f895 --- /dev/null +++ b/.github/contributors/vsolovyov.md @@ -0,0 +1,98 @@ +# Syllogism contributor agreement + +This Syllogism Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +Syllogism Co. The term **"you"** shall mean the person or entity identified +below. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. Syllogism Co. may publicly + disclose your participation in the project, including the fact that you have + signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [x] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | -------------------- | +| Name | Vsevolod Solovyov | +| Company's name (if applicable) | | +| Title or Role (if applicable) | | +| Date | 2015-08-24 | +| GitHub username | vsolovyov | +| Website (optional) | | From 8cd361e319ddfcfe296a5ca11abacae6b2886da6 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 9 Nov 2016 17:20:35 +0100 Subject: [PATCH 19/23] Fix paths --- .github/CONTRIBUTOR_AGREEMENT.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/CONTRIBUTOR_AGREEMENT.md b/.github/CONTRIBUTOR_AGREEMENT.md index db0e7b0f2..ffe679a87 100644 --- a/.github/CONTRIBUTOR_AGREEMENT.md +++ b/.github/CONTRIBUTOR_AGREEMENT.md @@ -10,9 +10,9 @@ you grant to us in the contributed materials. The term **"us"** shall mean If you agree to be bound by these terms, fill in the information requested below and include the filled-in version with your first pull request, under the -folder [`/.github/contributors/`](/.github/contributors/). The name of the file +folder [`.github/contributors/`](/.github/contributors/). The name of the file should be your GitHub username, with the extension `.md`. For example, the user -example_user would create the file `/.github/contributors/example_user.md`. +example_user would create the file `.github/contributors/example_user.md`. Read this agreement carefully before signing. These terms and conditions constitute a binding legal agreement. From 2c1d9e407c7f8a10117d0c9a5630689a312f014e Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 9 Nov 2016 17:27:47 +0100 Subject: [PATCH 20/23] Add contributor agreement section --- CONTRIBUTING.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2e665fd76..a20e19e40 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -53,6 +53,11 @@ Coming soon. Coming soon. +### Contributor agreement + +If you've made a substantial contribution to spaCy, you should fill in the [spaCy contributor agreement](.github/CONTRIBUTOR_AGREEMENT.md) to ensure that your contribution can be used across the project. If you agree to be bound by the terms of the agreement, fill in the [template]((.github/CONTRIBUTOR_AGREEMENT.md)) and include it with your pull request, or sumit it separately to [`.github/contributors/`](/.github/contributors). The name of the file should be your GitHub username, with the extension `.md`. For example, the user +example_user would create the file `.github/contributors/example_user.md`. + ### Fixing bugs From 1515434eaaa0df397df4cf38e9786ca28eba5524 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 9 Nov 2016 17:29:20 +0100 Subject: [PATCH 21/23] Fix wording --- .github/CONTRIBUTOR_AGREEMENT.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/CONTRIBUTOR_AGREEMENT.md b/.github/CONTRIBUTOR_AGREEMENT.md index ffe679a87..668b9dba2 100644 --- a/.github/CONTRIBUTOR_AGREEMENT.md +++ b/.github/CONTRIBUTOR_AGREEMENT.md @@ -99,8 +99,8 @@ mark both statements: | Field | Entry | |------------------------------- | -------------------- | | Name | | -| Company's name (if applicable) | | -| Title or Role (if applicable) | | +| Company name (if applicable) | | +| Title or role (if applicable) | | | Date | | | GitHub username | | | Website (optional) | | From 2a3e342c1f8a81fcaa8d0b795e3f958806af6e2b Mon Sep 17 00:00:00 2001 From: tiago Date: Wed, 9 Nov 2016 18:57:50 +0000 Subject: [PATCH 22/23] Added a test case to cover the span.merge returning values --- spacy/tests/spans/test_merge.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/spacy/tests/spans/test_merge.py b/spacy/tests/spans/test_merge.py index 9b792d80b..f96d532cd 100644 --- a/spacy/tests/spans/test_merge.py +++ b/spacy/tests/spans/test_merge.py @@ -52,6 +52,13 @@ def test_np_merges(EN): merged = tokens.merge(start, end, label, lemma, label) assert merged != None, (start, end, label, lemma) + + tokens = EN(u'One test with entities like New York City so the ents list is not void') + + for span in tokens.ents: + merged = span.merge() + assert merged != None, (span.start, span.end, span.label_, span.lemma_) + @pytest.mark.models def test_entity_merge(EN): tokens = EN(u'Stewart Lee is a stand up comedian who lives in England and loves Joe Pasquale.\n') From cf8ca1338c5b1156369688d905808f6914ffe33d Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 9 Nov 2016 20:06:35 +0100 Subject: [PATCH 23/23] Update CONTRIBUTORS.md --- CONTRIBUTORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 184d86686..63ddf26e6 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -17,6 +17,7 @@ This is a list of everyone who has made significant contributions to spaCy, in a * Maxim Samsonov, [@maxirmx](https://github.com/maxirmx) * Oleg Zd, [@olegzd](https://github.com/olegzd) * Sam Bozek, [@sambozek](https://github.com/sambozek) +* Tiago Rodrigues, [@TiagoMRodrigues](https://github.com/TiagoMRodrigues) * Vsevolod Solovyov, [@vsolovyov](https://github.com/vsolovyov) * Wah Loon Keng, [@kengz](https://github.com/kengz) * Wolfgang Seeker, [@wbwseeker](https://github.com/wbwseeker)