From f8322a69e77719c185b3a97efcab322fd1207636 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 21 Oct 2016 00:58:24 +0200 Subject: [PATCH 01/11] Rename "English" section to "Language" --- .../{_api-english.jade => _api-language.jade} | 214 ++++++++---------- website/docs/_data.json | 2 +- website/docs/index.jade | 2 +- 3 files changed, 101 insertions(+), 117 deletions(-) rename website/docs/{_api-english.jade => _api-language.jade} (51%) diff --git a/website/docs/_api-english.jade b/website/docs/_api-language.jade similarity index 51% rename from website/docs/_api-english.jade rename to website/docs/_api-language.jade index 2a951a8a4..fae3916b2 100644 --- a/website/docs/_api-english.jade +++ b/website/docs/_api-language.jade @@ -1,150 +1,134 @@ //- ---------------------------------- -//- 💫 DOCS > API > ENGLISH +//- 💫 DOCS > API > LANGUAGE //- ---------------------------------- -+section("english") - +h(2, "english", "https://github.com/" + SOCIAL.github + "/spaCy/blob/master/spacy/language.py") - | #[+tag class] English(Language) ++section("language") + +h(2, "language", "https://github.com/" + SOCIAL.github + "/spaCy/blob/master/spacy/language.py") + | #[+tag class] Language p. - The English analysis pipeline. Usually you"ll load this once per process, - and pass the instance around your program. + A pipeline that transforms text strings into annotated spaCy Doc objects. Usually you'll load the Language pipeline once and pass the instance around your program. +code("python", "Overview"). class Language: - lang = None - def __init__(self, data_dir=None, tokenizer=None, tagger=None, parser=None, entity=None, matcher=None): - return self + Defaults = BaseDefaults - def __call__(self, text, tag=True, parse=True, entity=True): - return Doc() + def __init__(self, path=True, **overrides): + self.vocab = Vocab() + self.tokenizer = Tokenizer() + self.tagger = Tagger() + self.parser = DependencyParser() + self.entity = EntityRecognizer() + self.make_doc = lambda text: Doc() + self.pipeline = [self.tagger, self.parser, self.entity] - def pipe(self, texts_iterator, batch_size=1000, n_threads=2): - yield Doc() + def __call__(self, text, **toggle): + doc = self.make_doc(text) + for proc in self.pipeline: + if toggle.get(process.name, True): + process(doc) + return doc - def end_training(self, data_dir=None): + def pipe(self, texts_iterator, batch_size=1000, n_threads=2, **toggle): + docs = (self.make_doc(text) for text in texts_iterator) + for process in self.pipeline: + if toggle.get(process.name, True): + docs = process.pipe(docs, batch_size=batch_size, n_threads=n_threads) + for doc in self.docs: + yield doc + + def end_training(self, path=None): return None - class English(Language): - lang = "en" + class English(Language): + class Defaults(BaseDefaults): + pass - class German(Language): - lang = "de" + class German(Language): + class Defaults(BaseDefaults): + pass +section("english-init") +h(3, "english-init") - | #[+tag method] English.__init__ + | #[+tag method] Language.__init__ p - | Load the pipeline. Each component can be passed - | as an argument, or left as #[code None], in which case it will be loaded - | from a classmethod, named e.g. #[code default_vocab()]. + | Load the pipeline. You can disable components by passing None as a value, + | e.g. pass parser=None, vectors=None to save memory if you're not using + | those components. You can also pass an object as the value. + | Pass a function create_pipeline to use a custom pipeline --- see + | the custom pipeline tutorial. +aside("Efficiency"). Loading takes 10-20 seconds, and the instance consumes 2 to 3 gigabytes of memory. Intended use is for one instance to be created for each language per process, but you can create more - if you"re doing something unusual. You may wish to make the + if you're doing something unusual. You may wish to make the instance a global variable or "singleton". +table(["Example", "Description"]) +row - +cell #[code.lang-python nlp = English()] - +cell Load everything, from default package + +cell #[code nlp = English()] + +cell Load everything, from default path. +row - +cell #[code.lang-python nlp = English(data_dir='my_data')] - +cell Load everything, from specified dir + +cell #[code nlp = English(path='my_data')] + +cell Load everything, from specified path +row - +cell #[code.lang-python nlp = English(parser=False)] - +cell Load everything except the parser. + +cell #[code nlp = English(path=path_obj)] + +cell Load everything, from an object that follows the #[code pathlib.Path] protocol. +row - +cell #[code.lang-python nlp = English(parser=False, tagger=False)] - +cell Load everything except the parser and tagger. + +cell #[code nlp = English(parser=False, vectors=False)] + +cell Load everything except the parser and the word vectors. +row - +cell #[code.lang-python nlp = English(parser=MyParser())] - +cell Supply your own parser + +cell #[code nlp = English(parser=my_parser)] + +cell Load everything, and use a custom parser. + + +row + +cell #[code nlp = English(create_pipeline=my_pipeline)] + +cell Load everything, and use a custom pipeline. +code("python", "Definition"). - def __init__(self, data_dir=None, tokenizer=None, tagger=None, parser=None, entity=None, matcher=None): - return self + def __init__(self, path=True, **overrides): + D = self.Defaults + self.vocab = Vocab(path=path, parent=self, **D.vocab) \ + if 'vocab' not in overrides \ + else overrides['vocab'] + self.tokenizer = Tokenizer(self.vocab, path=path, **D.tokenizer) \ + if 'tokenizer' not in overrides \ + else overrides['tokenizer'] + self.tagger = Tagger(self.vocab, path=path, **D.tagger) \ + if 'tagger' not in overrides \ + else overrides['tagger'] + self.parser = DependencyParser(self.vocab, path=path, **D.parser) \ + if 'parser' not in overrides \ + else overrides['parser'] + self.entity = EntityRecognizer(self.vocab, path=path, **D.entity) \ + if 'entity' not in overrides \ + else overrides['entity'] + self.matcher = Matcher(self.vocab, path=path, **D.matcher) \ + if 'matcher' not in overrides \ + else overrides['matcher'] - +table(["Arg", "Type", "Description"]) - +row - +cell data_dir - +cell str - +cell. - The data directory. If None, value is obtained via the - #[code default_data_dir()] method. + if 'make_doc' in overrides: + self.make_doc = overrides['make_doc'] + elif 'create_make_doc' in overrides: + self.make_doc = overrides['create_make_doc'](self) + else: + self.make_doc = lambda text: self.tokenizer(text) + if 'pipeline' in overrides: + self.pipeline = overrides['pipeline'] + elif 'create_pipeline' in overrides: + self.pipeline = overrides['create_pipeline'](self) + else: + self.pipeline = [self.tagger, self.parser, self.matcher, self.entity] - +row - +cell vocab - +cell #[code Vocab] - +cell. - The vocab object, which should be an instance of class - #[code spacy.vocab.Vocab]. If #[code None], the object is - obtained from the #[code default_vocab()] class method. The - vocab object manages all of the language specific rules and - definitions, maintains the cache of lexical types, and manages - the word vectors. Because the vocab owns this important data, - most objects hold a reference to the vocab. - - +row - +cell tokenizer - +cell #[code Tokenizer] - +cell. - The tokenizer, which should be a callable that accepts a - unicode string, and returns a #[code Doc] object. If set to - #[code None], the default tokenizer is constructed from the - #[code default_tokenizer()] method. - - +row - +cell tagger - +cell #[code Tagger] - +cell. - The part-of-speech tagger, which should be a callable that - accepts a #[code Doc] object, and sets the part-of-speech - tags in-place. If set to None, the default tagger is constructed - from the #[code default_tagger()] method. - - +row - +cell parser - +cell #[code Parser] - +cell. - The dependency parser, which should be a callable that accepts - a #[code Doc] object, and sets the sentence boundaries, - syntactic heads and dependency labels in-place. - If set to #[code None], the default parser is - constructed from the #[code default_parser()] method. To disable - the parser and prevent it from being loaded, pass #[code parser=False]. - - +row - +cell entity - +cell #[code Parser] - +cell. - The named entity recognizer, which should be a callable that - accepts a #[code Doc] object, and sets the named entity annotations - in-place. If set to None, the default entity recognizer is - constructed from the #[code default_entity()] method. To disable - the entity recognizer and prevent it from being loaded, pass - #[code entity=False]. - - +row - +cell matcher - +cell #[code Matcher] - +cell. - The pattern matcher, which should be a callable that accepts - a #[code Doc] object, and sets named entity annotations in-place - using token-based rules. If set - to None, the default matcher is constructed from the - #[code default_matcher()] method. - - +section("english-call") - +h(3, "english-call") - | #[+tag method] English.__call__ + +section("language-call") + +h(3, "language-call") + | #[+tag method] Language.__call__ p | The main entry point to spaCy. Takes raw unicode text, and returns @@ -152,30 +136,30 @@ | and #[code Span] objects. +aside("Efficiency"). - spaCy"s algorithms are all linear-time, so you can supply + spaCy's algorithms are all linear-time, so you can supply documents of arbitrary length, e.g. whole novels. +table(["Example", "Description"], "code") +row - +cell #[code.lang-python doc = nlp(u'Some text.')] + +cell #[ doc = nlp(u'Some text.')] +cell Apply the full pipeline. +row - +cell #[code.lang-python doc = nlp(u'Some text.', parse=False)] + +cell #[ doc = nlp(u'Some text.', parse=False)] +cell Applies tagger and entity, not parser +row - +cell #[code.lang-python doc = nlp(u'Some text.', entity=False)] + +cell #[ doc = nlp(u'Some text.', entity=False)] +cell Applies tagger and parser, not entity. +row - +cell #[code.lang-python doc = nlp(u'Some text.', tag=False)] + +cell #[ doc = nlp(u'Some text.', tag=False)] +cell Does not apply tagger, entity or parser +row - +cell #[code.lang-python doc = nlp(u'')] + +cell #[ doc = nlp(u'')] +cell Zero-length tokens, not an error +row - +cell #[code.lang-python doc = nlp(b'Some text')] + +cell #[ doc = nlp(b'Some text')] +cell Error: need unicode +row - +cell #[code.lang-python doc = nlp(b'Some text'.decode('utf8'))] + +cell #[ doc = nlp(b'Some text'.decode('utf8'))] +cell Decode bytes into unicode first. +code("python", "Definition"). diff --git a/website/docs/_data.json b/website/docs/_data.json index cde95e48b..37cafbc20 100644 --- a/website/docs/_data.json +++ b/website/docs/_data.json @@ -8,7 +8,7 @@ ["Usage Examples", "#examples", "examples"] ], "API": [ - ["English", "#english", "english"], + ["Language", "#language", "language"], ["Doc", "#doc", "doc"], ["Token", "#token", "token"], ["Span", "#span", "span"], diff --git a/website/docs/index.jade b/website/docs/index.jade index 043021193..9d745777e 100644 --- a/website/docs/index.jade +++ b/website/docs/index.jade @@ -13,7 +13,7 @@ include _quickstart-examples +h(2, "api") API -include _api-english +include _api-language include _api-doc include _api-token include _api-span From 2251abb2367638b7bcce833cb2774f8790c0df85 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 21 Oct 2016 00:58:38 +0200 Subject: [PATCH 02/11] Update training tutorial --- website/docs/tutorials/training.jade | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/docs/tutorials/training.jade b/website/docs/tutorials/training.jade index 7e26da2ed..2d23853b2 100644 --- a/website/docs/tutorials/training.jade +++ b/website/docs/tutorials/training.jade @@ -27,12 +27,13 @@ p #[+a("https://github.com/" + SOCIAL.github + "/spaCy/examples/training/train_t from spacy.vocab import Vocab from spacy.pipeline import EntityRecognizer from spacy.tokens import Doc + from spacy.gold import GoldParse vocab = Vocab() entity = EntityRecognizer(vocab, entity_types=['PERSON', 'LOC']) doc = Doc(vocab, words=['Who', 'is', 'Shaka', 'Khan', '?']) - entity.update(doc, ['O', 'O', 'B-PERSON', 'L-PERSON', 'O']) + entity.update(doc, GoldParse(doc, entities=['O', 'O', 'B-PERSON', 'L-PERSON', 'O'])) entity.model.end_training() @@ -49,8 +50,7 @@ p #[+a("https://github.com/" + SOCIAL.github + "/spaCy/examples/training/train_n parser = DependencyParser(vocab, labels=['nsubj', 'compound', 'dobj', 'punct']) doc = Doc(vocab, words=['Who', 'is', 'Shaka', 'Khan', '?']) - parser.update(doc, [(1, 'nsubj'), (1, 'ROOT'), (3, 'compound'), (1, 'dobj'), - (1, 'punct')]) + parser.update(doc, GoldParse(doc, heads=[1, 1, 3, 1, 1,], deps=['nsubj', 'ROOT', 'compound', 'dobj', 'punct'])) parser.model.end_training() From 0373d237277b2aaeae38f6f94d22f06ec12ef0da Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 21 Oct 2016 00:58:49 +0200 Subject: [PATCH 03/11] Update code in rule-based matcher tutorial --- website/docs/tutorials/rule-based-matcher.jade | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/tutorials/rule-based-matcher.jade b/website/docs/tutorials/rule-based-matcher.jade index f0f83d0cd..8c8949631 100644 --- a/website/docs/tutorials/rule-based-matcher.jade +++ b/website/docs/tutorials/rule-based-matcher.jade @@ -4,7 +4,7 @@ p.u-text-large spaCy features a rule-matching engine that operates over tokens. +code("python", "Matcher Example"). from spacy.matcher import Matcher - from spacy.attributes import * + from spacy.attrs import * import spacy nlp = spacy.load('en', parser=False, entity=False) From 593f7eb4137ca6fa830aa3ab4407f22f5a08ec68 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 21 Oct 2016 00:59:07 +0200 Subject: [PATCH 04/11] Update installation docs --- website/docs/_quickstart-install.jade | 93 ++++++--------------------- 1 file changed, 19 insertions(+), 74 deletions(-) diff --git a/website/docs/_quickstart-install.jade b/website/docs/_quickstart-install.jade index 2e746e6cd..f5931ff40 100644 --- a/website/docs/_quickstart-install.jade +++ b/website/docs/_quickstart-install.jade @@ -12,78 +12,35 @@ p. spaCy is compatible with 64-bit CPython 2.6+/3.3+ and runs on Unix/Linux, - OS X and Windows. Source and binary packages are available via - #[+a("https://pypi.python.org/pypi/spacy") pip] and - #[+a("https://anaconda.org/spacy/spacy") conda]. If there are - no binary packages for your platform available please make sure that you have - a working build enviroment set up. See - notes on #[a(href="/docs#install-source-ubuntu") Ubuntu], + OS X and Windows. The latest spaCy releases are currently only available as source packages over #[+a("https://pypy.python.org/pypi/spacy") pip]. Installaton requires a working build environment. See notes on #[a(href="/docs#install-source-ubuntu") Ubuntu], #[a(href="/docs#install-source-osx") OS X] and #[a(href="/docs#install-source-windows") Windows] for details. - +code("bash", "conda"). - conda config --add channels spacy # only needed once - conda install spacy - - p. - When using pip it is generally recommended to install packages in a - #[+a("https://virtualenv.readthedocs.org/en/latest/") virtualenv] - to avoid modifying system state: - - +code("bash", "pip"). - # make sure you are using a recent pip/virtualenv version - python -m pip install -U pip virtualenv - - virtualenv .env - source .env/bin/activate - - pip install spacy - - p. - Python packaging is awkward at the best of times, and it's particularly - tricky with C extensions, built via Cython, requiring large data files. - So, please report issues as you encounter them. - - +section("install-model") - +h(3, "install-model") - | Install model - - p. - After installation you need to download a language model. - Currently only models for English and German, named #[code en] and #[code de], are available. Please get in touch with us if you need support for a particular language. - - +code("bash"). - sputnik --name spacy --repository-url http://index.spacy.io install en==1.1.0 - - p. - Then check whether the model was successfully installed: - - +code("bash"). - python -c "import spacy; spacy.load('en'); print('OK')" - - p. - The download command fetches and installs about 500 MB of data which it installs - within the #[code spacy] package directory. - - +section("install-upgrade") - +h(3, "install-upgrade") - | Upgrading spaCy - - p. - To upgrade spaCy to the latest release: - - +code("bash", "conda"). - conda update spacy - +code("bash", "pip"). pip install -U spacy p. - Sometimes new releases require a new language model. Then you will have to upgrade to - a new model, too. You can also force re-downloading and installing a new language model: + After installation you need to download a language model. Models for English (#[code en]) and German (#[code de]) are available. +code("bash"). + # English: + # - Install tagger, parser, NER and GloVe vectors: + python -m spacy.en.download all + # - OR install English tagger, parser and NER + python -m spacy.en.download parser + # - OR install English GloVe vectors + python -m spacy.en.download glove + # German: + # - Install German tagger, parser, NER and word vectors + python -m spacy.de.download all + # Upgrade/overwrite existing data python -m spacy.en.download --force + # Check whether the model was successfully installed + python -c "import spacy; spacy.load('en'); print('OK')" + + p. + The download command fetches and installs about 1 GB of data which it installs + within the #[code spacy] package directory. +section("install-source") +h(3, "install-source") @@ -144,18 +101,6 @@ used to compile your Python interpreter. For official distributions these are VS 2008 (Python 2.7), VS 2010 (Python 3.4) and VS 2015 (Python 3.5). - +section("install-obsolete-python") - +h(3, "install-obsolete-python") - | Workaround for obsolete system Python - - p. - If you're stuck using a system with an old version of Python, and you - don't have root access, we've prepared a bootstrap script to help you - compile a local Python install. Run: - - +code("bash"). - curl https://raw.githubusercontent.com/spacy-io/gist/master/bootstrap_python_env.sh | bash && source .env/bin/activate - +section("run-tests") +h(3, "run-tests") | Run tests From cd5e47050806d3cde9fe76e1b34cff8115366f52 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 21 Oct 2016 00:59:27 +0200 Subject: [PATCH 05/11] Add section object with larger margin --- website/_includes/_mixins/_base.jade | 2 +- website/assets/css/_base/_objects.sass | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/website/_includes/_mixins/_base.jade b/website/_includes/_mixins/_base.jade index 4ab081daa..4d8d7c571 100644 --- a/website/_includes/_mixins/_base.jade +++ b/website/_includes/_mixins/_base.jade @@ -14,7 +14,7 @@ mixin a(url, trusted) block - section content (block and inline elements) mixin section(id) - section.o-block(id=(id) ? 'section-' + id : '')&attributes(attributes) + section.o-section(id=(id) ? 'section-' + id : '')&attributes(attributes) block diff --git a/website/assets/css/_base/_objects.sass b/website/assets/css/_base/_objects.sass index 914a372c7..87f265f40 100644 --- a/website/assets/css/_base/_objects.sass +++ b/website/assets/css/_base/_objects.sass @@ -45,6 +45,9 @@ .o-block-small margin-bottom: 2rem +.o-section + margin-bottom: 12.5rem + .o-responsive overflow: auto width: 100% From 4c3682a3622e79672fa7f5bd9c9140a4b7466e70 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 21 Oct 2016 00:59:39 +0200 Subject: [PATCH 06/11] Update version number and stargazers --- website/_harp.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/_harp.json b/website/_harp.json index 1ceb54023..753351b46 100644 --- a/website/_harp.json +++ b/website/_harp.json @@ -22,8 +22,8 @@ "DEFAULT_SYNTAX" : "python", "ANALYTICS": "UA-58931649-1", - "SPACY_VERSION": "0.101.0", - "SPACY_STARS": "2300", + "SPACY_VERSION": "1.0", + "SPACY_STARS": "2500", "GITHUB": { "user": "explosion", "repo": "spacy" } } } From ace0b2de928a1e0a1523d7ca1bbacd70c7697560 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 21 Oct 2016 01:00:16 +0200 Subject: [PATCH 07/11] Remove contributors folder (temporary, until copy is fixed) --- contributors/NSchrading.md | 95 ------------------------------------ contributors/chrisdubois.md | 95 ------------------------------------ contributors/cla.md | 13 ----- contributors/cla_template.md | 95 ------------------------------------ contributors/suchow.md | 95 ------------------------------------ contributors/vsolovyov.md | 95 ------------------------------------ 6 files changed, 488 deletions(-) delete mode 100644 contributors/NSchrading.md delete mode 100644 contributors/chrisdubois.md delete mode 100644 contributors/cla.md delete mode 100644 contributors/cla_template.md delete mode 100644 contributors/suchow.md delete mode 100644 contributors/vsolovyov.md diff --git a/contributors/NSchrading.md b/contributors/NSchrading.md deleted file mode 100644 index cf2ece8e6..000000000 --- a/contributors/NSchrading.md +++ /dev/null @@ -1,95 +0,0 @@ -Syllogism Contributor Agreement -=============================== - -This Syllogism Contributor Agreement (“SCA”) is based on the Oracle Contributor -Agreement. The SCA applies to any contribution that you make to any product or -project managed by us (the “project”), and sets out the intellectual property -rights you grant to us in the contributed materials. The term “us” shall mean -Syllogism Co. The term "you" shall mean the person or entity identified below. -If you agree to be bound by these terms, fill in the information requested below -and include the filled-in version with your first pull-request, under the file -contrbutors/. The name of the file should be your GitHub username, with the -extension .md. For example, the user example_user would create the file -spaCy/contributors/example_user.md . - -Read this agreement carefully before signing. These terms and conditions -constitute a binding legal agreement. - -1. The term 'contribution' or ‘contributed materials’ means any source code, -object code, patch, tool, sample, graphic, specification, manual, documentation, -or any other material posted or submitted by you to the project. - -2. With respect to any worldwide copyrights, or copyright applications and registrations, -in your contribution: - * you hereby assign to us joint ownership, and to the extent that such assignment - is or becomes invalid, ineffective or unenforceable, you hereby grant to us a perpetual, - irrevocable, non-exclusive, worldwide, no-charge, royalty-free, unrestricted license - to exercise all rights under those copyrights. This includes, at our option, the - right to sublicense these same rights to third parties through multiple levels of - sublicensees or other licensing arrangements; - - * you agree that each of us can do all things in relation to your contribution - as if each of us were the sole owners, and if one of us makes a derivative work - of your contribution, the one who makes the derivative work (or has it made) will - be the sole owner of that derivative work; - - * you agree that you will not assert any moral rights in your contribution against - us, our licensees or transferees; - - * you agree that we may register a copyright in your contribution and exercise - all ownership rights associated with it; and - - * you agree that neither of us has any duty to consult with, obtain the consent - of, pay or render an accounting to the other for any use or distribution of your - contribution. - -3. With respect to any patents you own, or that you can license without payment -to any third party, you hereby grant to us a perpetual, irrevocable, non-exclusive, -worldwide, no-charge, royalty-free license to: - - * make, have made, use, sell, offer to sell, import, and otherwise transfer your - contribution in whole or in part, alone or in combination with - or included in any product, work or materials arising out of the project to - which your contribution was submitted, and - - * at our option, to sublicense these same rights to third parties through multiple - levels of sublicensees or other licensing arrangements. - -4. Except as set out above, you keep all right, title, and interest in your -contribution. The rights that you grant to us under these terms are effective on -the date you first submitted a contribution to us, even if your submission took -place before the date you sign these terms. - -5. You covenant, represent, warrant and agree that: - - * Each contribution that you submit is and shall be an original work of authorship - and you can legally grant the rights set out in this SCA; - - * to the best of your knowledge, each contribution will not violate any third - party's copyrights, trademarks, patents, or other intellectual property rights; and - - * each contribution shall be in compliance with U.S. export control laws and other - applicable export and import laws. You agree to notify us if you become aware of - any circumstance which would make any of the foregoing representations inaccurate - in any respect. Syllogism Co. may publicly disclose your participation in the project, - including the fact that you have signed the SCA. - -6. This SCA is governed by the laws of the State of California and applicable U.S. - Federal law. Any choice of law rules will not apply. - -7. Please place an “x” on one of the applicable statement below. Please do NOT -mark both statements: - -_x__ I am signing on behalf of myself as an individual and no other person or entity, including my employer, has or will have rights with respect my contributions. - -____ I am signing on behalf of my employer or a legal entity and I have the actual authority to contractually bind that entity. - -| Field | Entry | -|------------------------------- | -------------------- | -| Name | J Nicolas Schrading | -| Company's name (if applicable) | | -| Title or Role (if applicable) | | -| Date | 2015-08-24 | -| GitHub username | NSchrading | -| Website (optional) | nicschrading.com | - diff --git a/contributors/chrisdubois.md b/contributors/chrisdubois.md deleted file mode 100644 index 2ffa8869d..000000000 --- a/contributors/chrisdubois.md +++ /dev/null @@ -1,95 +0,0 @@ -Syllogism Contributor Agreement -=============================== - -This Syllogism Contributor Agreement (“SCA”) is based on the Oracle Contributor -Agreement. The SCA applies to any contribution that you make to any product or -project managed by us (the “project”), and sets out the intellectual property -rights you grant to us in the contributed materials. The term “us” shall mean -Syllogism Co. The term "you" shall mean the person or entity identified below. -If you agree to be bound by these terms, fill in the information requested below -and include the filled-in version with your first pull-request, under the file -contrbutors/. The name of the file should be your GitHub username, with the -extension .md. For example, the user example_user would create the file -spaCy/contributors/example_user.md . - -Read this agreement carefully before signing. These terms and conditions -constitute a binding legal agreement. - -1. The term 'contribution' or ‘contributed materials’ means any source code, -object code, patch, tool, sample, graphic, specification, manual, documentation, -or any other material posted or submitted by you to the project. - -2. With respect to any worldwide copyrights, or copyright applications and registrations, -in your contribution: - * you hereby assign to us joint ownership, and to the extent that such assignment - is or becomes invalid, ineffective or unenforceable, you hereby grant to us a perpetual, - irrevocable, non-exclusive, worldwide, no-charge, royalty-free, unrestricted license - to exercise all rights under those copyrights. This includes, at our option, the - right to sublicense these same rights to third parties through multiple levels of - sublicensees or other licensing arrangements; - - * you agree that each of us can do all things in relation to your contribution - as if each of us were the sole owners, and if one of us makes a derivative work - of your contribution, the one who makes the derivative work (or has it made) will - be the sole owner of that derivative work; - - * you agree that you will not assert any moral rights in your contribution against - us, our licensees or transferees; - - * you agree that we may register a copyright in your contribution and exercise - all ownership rights associated with it; and - - * you agree that neither of us has any duty to consult with, obtain the consent - of, pay or render an accounting to the other for any use or distribution of your - contribution. - -3. With respect to any patents you own, or that you can license without payment -to any third party, you hereby grant to us a perpetual, irrevocable, non-exclusive, -worldwide, no-charge, royalty-free license to: - - * make, have made, use, sell, offer to sell, import, and otherwise transfer your - contribution in whole or in part, alone or in combination with - or included in any product, work or materials arising out of the project to - which your contribution was submitted, and - - * at our option, to sublicense these same rights to third parties through multiple - levels of sublicensees or other licensing arrangements. - -4. Except as set out above, you keep all right, title, and interest in your -contribution. The rights that you grant to us under these terms are effective on -the date you first submitted a contribution to us, even if your submission took -place before the date you sign these terms. - -5. You covenant, represent, warrant and agree that: - - * Each contribution that you submit is and shall be an original work of authorship - and you can legally grant the rights set out in this SCA; - - * to the best of your knowledge, each contribution will not violate any third - party's copyrights, trademarks, patents, or other intellectual property rights; and - - * each contribution shall be in compliance with U.S. export control laws and other - applicable export and import laws. You agree to notify us if you become aware of - any circumstance which would make any of the foregoing representations inaccurate - in any respect. Syllogism Co. may publicly disclose your participation in the project, - including the fact that you have signed the SCA. - -6. This SCA is governed by the laws of the State of California and applicable U.S. - Federal law. Any choice of law rules will not apply. - -7. Please place an “x” on one of the applicable statement below. Please do NOT -mark both statements: - -x I am signing on behalf of myself as an individual and no other person or entity, including my employer, has or will have rights with respect my contributions. - -____ I am signing on behalf of my employer or a legal entity and I have the actual authority to contractually bind that entity. - -| Field | Entry | -|------------------------------- | -------------------- | -| Name | Chris DuBois | -| Company's name (if applicable) | | -| Title or Role (if applicable) | | -| Date | 2015.10.07 | -| GitHub username | chrisdubois | -| Website (optional) | | - diff --git a/contributors/cla.md b/contributors/cla.md deleted file mode 100644 index 27b522dc8..000000000 --- a/contributors/cla.md +++ /dev/null @@ -1,13 +0,0 @@ -Signing the Contributors License Agreement -========================================== - -SpaCy is a commercial open-source project, owned by Syllogism Co. We require that contributors to SpaCy sign our Contributors License Agreement, which is based on the Oracle Contributor Agreement. - -The CLA must be signed on your first pull request. To do this, simply fill in the file cla_template.md, and include the filed in form in your first pull request. - - $ git clone https://github.com/honnibal/spaCy - $ cp spaCy/contributors/cla_template.md spaCy/contributors/.md - .md> - $ git add -A spaCy/contributors/.md - -Now finish your pull request, and you're done. diff --git a/contributors/cla_template.md b/contributors/cla_template.md deleted file mode 100644 index fca6771de..000000000 --- a/contributors/cla_template.md +++ /dev/null @@ -1,95 +0,0 @@ -Syllogism Contributor Agreement -=============================== - -This Syllogism Contributor Agreement (“SCA”) is based on the Oracle Contributor -Agreement. The SCA applies to any contribution that you make to any product or -project managed by us (the “project”), and sets out the intellectual property -rights you grant to us in the contributed materials. The term “us” shall mean -Syllogism Co. The term "you" shall mean the person or entity identified below. -If you agree to be bound by these terms, fill in the information requested below -and include the filled-in version with your first pull-request, under the file -contrbutors/. The name of the file should be your GitHub username, with the -extension .md. For example, the user example_user would create the file -spaCy/contributors/example_user.md . - -Read this agreement carefully before signing. These terms and conditions -constitute a binding legal agreement. - -1. The term 'contribution' or ‘contributed materials’ means any source code, -object code, patch, tool, sample, graphic, specification, manual, documentation, -or any other material posted or submitted by you to the project. - -2. With respect to any worldwide copyrights, or copyright applications and registrations, -in your contribution: - * you hereby assign to us joint ownership, and to the extent that such assignment - is or becomes invalid, ineffective or unenforceable, you hereby grant to us a perpetual, - irrevocable, non-exclusive, worldwide, no-charge, royalty-free, unrestricted license - to exercise all rights under those copyrights. This includes, at our option, the - right to sublicense these same rights to third parties through multiple levels of - sublicensees or other licensing arrangements; - - * you agree that each of us can do all things in relation to your contribution - as if each of us were the sole owners, and if one of us makes a derivative work - of your contribution, the one who makes the derivative work (or has it made) will - be the sole owner of that derivative work; - - * you agree that you will not assert any moral rights in your contribution against - us, our licensees or transferees; - - * you agree that we may register a copyright in your contribution and exercise - all ownership rights associated with it; and - - * you agree that neither of us has any duty to consult with, obtain the consent - of, pay or render an accounting to the other for any use or distribution of your - contribution. - -3. With respect to any patents you own, or that you can license without payment -to any third party, you hereby grant to us a perpetual, irrevocable, non-exclusive, -worldwide, no-charge, royalty-free license to: - - * make, have made, use, sell, offer to sell, import, and otherwise transfer your - contribution in whole or in part, alone or in combination with - or included in any product, work or materials arising out of the project to - which your contribution was submitted, and - - * at our option, to sublicense these same rights to third parties through multiple - levels of sublicensees or other licensing arrangements. - -4. Except as set out above, you keep all right, title, and interest in your -contribution. The rights that you grant to us under these terms are effective on -the date you first submitted a contribution to us, even if your submission took -place before the date you sign these terms. - -5. You covenant, represent, warrant and agree that: - - * Each contribution that you submit is and shall be an original work of authorship - and you can legally grant the rights set out in this SCA; - - * to the best of your knowledge, each contribution will not violate any third - party's copyrights, trademarks, patents, or other intellectual property rights; and - - * each contribution shall be in compliance with U.S. export control laws and other - applicable export and import laws. You agree to notify us if you become aware of - any circumstance which would make any of the foregoing representations inaccurate - in any respect. Syllogism Co. may publicly disclose your participation in the project, - including the fact that you have signed the SCA. - -6. This SCA is governed by the laws of the State of California and applicable U.S. - Federal law. Any choice of law rules will not apply. - -7. Please place an “x” on one of the applicable statement below. Please do NOT -mark both statements: - -____ I am signing on behalf of myself as an individual and no other person or entity, including my employer, has or will have rights with respect my contributions. - -____ I am signing on behalf of my employer or a legal entity and I have the actual authority to contractually bind that entity. - -| Field | Entry | -|------------------------------- | -------------------- | -| Name | | -| Company's name (if applicable) | | -| Title or Role (if applicable) | | -| Date | | -| GitHub username | | -| Website (optional) | | - diff --git a/contributors/suchow.md b/contributors/suchow.md deleted file mode 100644 index 099e78c2c..000000000 --- a/contributors/suchow.md +++ /dev/null @@ -1,95 +0,0 @@ -Syllogism Contributor Agreement -=============================== - -This Syllogism Contributor Agreement (“SCA”) is based on the Oracle Contributor -Agreement. The SCA applies to any contribution that you make to any product or -project managed by us (the “project”), and sets out the intellectual property -rights you grant to us in the contributed materials. The term “us” shall mean -Syllogism Co. The term "you" shall mean the person or entity identified below. -If you agree to be bound by these terms, fill in the information requested below -and include the filled-in version with your first pull-request, under the file -contrbutors/. The name of the file should be your GitHub username, with the -extension .md. For example, the user example_user would create the file -spaCy/contributors/example_user.md . - -Read this agreement carefully before signing. These terms and conditions -constitute a binding legal agreement. - -1. The term 'contribution' or ‘contributed materials’ means any source code, -object code, patch, tool, sample, graphic, specification, manual, documentation, -or any other material posted or submitted by you to the project. - -2. With respect to any worldwide copyrights, or copyright applications and registrations, -in your contribution: - * you hereby assign to us joint ownership, and to the extent that such assignment - is or becomes invalid, ineffective or unenforceable, you hereby grant to us a perpetual, - irrevocable, non-exclusive, worldwide, no-charge, royalty-free, unrestricted license - to exercise all rights under those copyrights. This includes, at our option, the - right to sublicense these same rights to third parties through multiple levels of - sublicensees or other licensing arrangements; - - * you agree that each of us can do all things in relation to your contribution - as if each of us were the sole owners, and if one of us makes a derivative work - of your contribution, the one who makes the derivative work (or has it made) will - be the sole owner of that derivative work; - - * you agree that you will not assert any moral rights in your contribution against - us, our licensees or transferees; - - * you agree that we may register a copyright in your contribution and exercise - all ownership rights associated with it; and - - * you agree that neither of us has any duty to consult with, obtain the consent - of, pay or render an accounting to the other for any use or distribution of your - contribution. - -3. With respect to any patents you own, or that you can license without payment -to any third party, you hereby grant to us a perpetual, irrevocable, non-exclusive, -worldwide, no-charge, royalty-free license to: - - * make, have made, use, sell, offer to sell, import, and otherwise transfer your - contribution in whole or in part, alone or in combination with - or included in any product, work or materials arising out of the project to - which your contribution was submitted, and - - * at our option, to sublicense these same rights to third parties through multiple - levels of sublicensees or other licensing arrangements. - -4. Except as set out above, you keep all right, title, and interest in your -contribution. The rights that you grant to us under these terms are effective on -the date you first submitted a contribution to us, even if your submission took -place before the date you sign these terms. - -5. You covenant, represent, warrant and agree that: - - * Each contribution that you submit is and shall be an original work of authorship - and you can legally grant the rights set out in this SCA; - - * to the best of your knowledge, each contribution will not violate any third - party's copyrights, trademarks, patents, or other intellectual property rights; and - - * each contribution shall be in compliance with U.S. export control laws and other - applicable export and import laws. You agree to notify us if you become aware of - any circumstance which would make any of the foregoing representations inaccurate - in any respect. Syllogism Co. may publicly disclose your participation in the project, - including the fact that you have signed the SCA. - -6. This SCA is governed by the laws of the State of California and applicable U.S. - Federal law. Any choice of law rules will not apply. - -7. Please place an “x” on one of the applicable statement below. Please do NOT -mark both statements: - -x___ I am signing on behalf of myself as an individual and no other person or entity, including my employer, has or will have rights with respect my contributions. - -____ I am signing on behalf of my employer or a legal entity and I have the actual authority to contractually bind that entity. - -| Field | Entry | -|------------------------------- | -------------------- | -| Name | Jordan Suchow | -| Company's name (if applicable) | | -| Title or Role (if applicable) | | -| Date | 2015-04-19 | -| GitHub username | suchow | -| Website (optional) | http://suchow.io | - diff --git a/contributors/vsolovyov.md b/contributors/vsolovyov.md deleted file mode 100644 index 7f3a4bebe..000000000 --- a/contributors/vsolovyov.md +++ /dev/null @@ -1,95 +0,0 @@ -Syllogism Contributor Agreement -=============================== - -This Syllogism Contributor Agreement (“SCA”) is based on the Oracle Contributor -Agreement. The SCA applies to any contribution that you make to any product or -project managed by us (the “project”), and sets out the intellectual property -rights you grant to us in the contributed materials. The term “us” shall mean -Syllogism Co. The term "you" shall mean the person or entity identified below. -If you agree to be bound by these terms, fill in the information requested below -and include the filled-in version with your first pull-request, under the file -contrbutors/. The name of the file should be your GitHub username, with the -extension .md. For example, the user example_user would create the file -spaCy/contributors/example_user.md . - -Read this agreement carefully before signing. These terms and conditions -constitute a binding legal agreement. - -1. The term 'contribution' or ‘contributed materials’ means any source code, -object code, patch, tool, sample, graphic, specification, manual, documentation, -or any other material posted or submitted by you to the project. - -2. With respect to any worldwide copyrights, or copyright applications and registrations, -in your contribution: - * you hereby assign to us joint ownership, and to the extent that such assignment - is or becomes invalid, ineffective or unenforceable, you hereby grant to us a perpetual, - irrevocable, non-exclusive, worldwide, no-charge, royalty-free, unrestricted license - to exercise all rights under those copyrights. This includes, at our option, the - right to sublicense these same rights to third parties through multiple levels of - sublicensees or other licensing arrangements; - - * you agree that each of us can do all things in relation to your contribution - as if each of us were the sole owners, and if one of us makes a derivative work - of your contribution, the one who makes the derivative work (or has it made) will - be the sole owner of that derivative work; - - * you agree that you will not assert any moral rights in your contribution against - us, our licensees or transferees; - - * you agree that we may register a copyright in your contribution and exercise - all ownership rights associated with it; and - - * you agree that neither of us has any duty to consult with, obtain the consent - of, pay or render an accounting to the other for any use or distribution of your - contribution. - -3. With respect to any patents you own, or that you can license without payment -to any third party, you hereby grant to us a perpetual, irrevocable, non-exclusive, -worldwide, no-charge, royalty-free license to: - - * make, have made, use, sell, offer to sell, import, and otherwise transfer your - contribution in whole or in part, alone or in combination with - or included in any product, work or materials arising out of the project to - which your contribution was submitted, and - - * at our option, to sublicense these same rights to third parties through multiple - levels of sublicensees or other licensing arrangements. - -4. Except as set out above, you keep all right, title, and interest in your -contribution. The rights that you grant to us under these terms are effective on -the date you first submitted a contribution to us, even if your submission took -place before the date you sign these terms. - -5. You covenant, represent, warrant and agree that: - - * Each contribution that you submit is and shall be an original work of authorship - and you can legally grant the rights set out in this SCA; - - * to the best of your knowledge, each contribution will not violate any third - party's copyrights, trademarks, patents, or other intellectual property rights; and - - * each contribution shall be in compliance with U.S. export control laws and other - applicable export and import laws. You agree to notify us if you become aware of - any circumstance which would make any of the foregoing representations inaccurate - in any respect. Syllogism Co. may publicly disclose your participation in the project, - including the fact that you have signed the SCA. - -6. This SCA is governed by the laws of the State of California and applicable U.S. - Federal law. Any choice of law rules will not apply. - -7. Please place an “x” on one of the applicable statement below. Please do NOT -mark both statements: - -_x__ I am signing on behalf of myself as an individual and no other person or entity, including my employer, has or will have rights with respect my contributions. - -____ I am signing on behalf of my employer or a legal entity and I have the actual authority to contractually bind that entity. - -| Field | Entry | -|------------------------------- | -------------------- | -| Name | Vsevolod Solovyov | -| Company's name (if applicable) | | -| Title or Role (if applicable) | | -| Date | 2015-08-24 | -| GitHub username | vsolovyov | -| Website (optional) | | - From d1c0b1feb7fd1f30549d150adc019d17b99fd1bf Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 21 Oct 2016 01:00:31 +0200 Subject: [PATCH 08/11] Add v1.0 release announcement to landing page --- website/index.jade | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/website/index.jade b/website/index.jade index 35487dab3..6a8e98e6a 100644 --- a/website/index.jade +++ b/website/index.jade @@ -28,6 +28,15 @@ main.o-main +a("https://www.reddit.com/r/" + SOCIAL.reddit) #[+icon("reddit")] #[strong User Group] on Reddit +grid.u-border-bottom + +grid-col("half").u-padding + +label Release update + +h(2) + +a("https://github.com/" + SOCIAL.github + "/spaCy/releases") spaCy v1.0 out now! + + p.u-text-medium I'm excited — and more than a little nervous! — to finally make the #[+a("https://github.com/" + SOCIAL.github + "/spaCy/releases") 1.0 release of spaCy]. By far my favourite part of the release is the new support for custom pipelines. Default support for GloVe vectors is also nice. The trickiest change was a significant rewrite of the Matcher class, to support entity IDs and attributes. I've added #[a(href="/docs/#tutorials") tutorials] for the new features, and some training examples.#[br]#[br] + + +button("https://explosion.ai/blog/spacy-deep-learning-keras", true, "primary") Read the blog post + +grid-col("half").u-padding +label Are you using spaCy? +h(2) @@ -42,14 +51,6 @@ main.o-main #[+button("https://survey.spacy.io", true, "primary") Take the survey] - +grid-col("half").u-padding - +label The blog posts have moved - +h(2) Check out the new blog - - p.u-text-medium We've updated the site to make it more focussed on the library itself. This will help us stay organised when we expand the tutorials section — by far the clearest message we've gotten from the survey so far. The blog posts have been moved to the new site for our consulting services, #[+a("https://explosion.ai", true) Explosion AI]. We've also updated our demos, and have open-sourced the services behind them. There are lots more releases to come. #[br]#[br] - - +button("https://explosion.ai/blog", true, "primary") Go to the new blogs - +grid +grid-col("half").u-padding +h(2) Built for Production From e619aba8df55aa736c13ef86557bf16a1b5f63a6 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 21 Oct 2016 01:07:16 +0200 Subject: [PATCH 09/11] Move WordNet license to correct place --- lang_data/en/LICENSE | 20 ++++++++++++++++++++ wordnet_license.txt | 16 ---------------- 2 files changed, 20 insertions(+), 16 deletions(-) create mode 100644 lang_data/en/LICENSE delete mode 100644 wordnet_license.txt diff --git a/lang_data/en/LICENSE b/lang_data/en/LICENSE new file mode 100644 index 000000000..4f49c2dff --- /dev/null +++ b/lang_data/en/LICENSE @@ -0,0 +1,20 @@ +WordNet Release 3.0 This software and database is being provided to you, the +LICENSEE, by Princeton University under the following license. By obtaining, +using and/or copying this software and database, you agree that you have read, +understood, and will comply with these terms and conditions.: Permission to +use, copy, modify and distribute this software and database and its +documentation for any purpose and without fee or royalty is hereby granted, +provided that you agree to comply with the following copyright notice and +statements, including the disclaimer, and that the same appear on ALL copies of +the software, database and documentation, including modifications that you make for internal use or for distribution. WordNet 3.0 Copyright 2006 by Princeton +University. All rights reserved. THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" +AND PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON UNIVERSITY MAKES NO +REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY +PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE, DATABASE OR +DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS +OR OTHER RIGHTS. The name of Princeton University or Princeton may not be used +in advertising or publicity pertaining to distribution of the software and/or +database. Title to copyright in this software, database and any associated +documentation shall at all times remain with Princeton University and LICENSEE +agrees to preserve same. diff --git a/wordnet_license.txt b/wordnet_license.txt deleted file mode 100644 index 3237f9ce4..000000000 --- a/wordnet_license.txt +++ /dev/null @@ -1,16 +0,0 @@ -spaCy uses data from Princeton's WordNet project, which is free for commercial use. - -The data is installed alongside spaCy, in spacy/en/data/wordnet. - -WordNet is licensed as follows. - -Commercial Use - -WordNet® is unencumbered, and may be used in commercial applications in accordance with the following license agreement. An attorney representing the commercial interest should review this WordNet license with respect to the intended use. - -WordNet License - -This license is available as the file LICENSE in any downloaded version of WordNet. -WordNet 3.0 license: (Download) - -WordNet Release 3.0 This software and database is being provided to you, the LICENSEE, by Princeton University under the following license. By obtaining, using and/or copying this software and database, you agree that you have read, understood, and will comply with these terms and conditions.: Permission to use, copy, modify and distribute this software and database and its documentation for any purpose and without fee or royalty is hereby granted, provided that you agree to comply with the following copyright notice and statements, including the disclaimer, and that the same appear on ALL copies of the software, database and documentation, including modifications that you make for internal use or for distribution. WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved. THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. The name of Princeton University or Princeton may not be used in advertising or publicity pertaining to distribution of the software and/or database. Title to copyright in this software, database and any associated documentation shall at all times remain with Princeton University and LICENSEE agrees to preserve same. From 296897ad4223c1ec02890dd3c0b4c04b0ab2add5 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 21 Oct 2016 01:10:33 +0200 Subject: [PATCH 10/11] Move example sputnik package.json to examples directory --- package.json | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 package.json diff --git a/package.json b/package.json deleted file mode 100644 index 6add32a19..000000000 --- a/package.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "name": "en", - "version": "1.1.0", - "description": "english test model", - "license": "public domain", - "include": [ - ["deps", "*"], - ["ner", "*"], - ["pos", "*"], - ["tokenizer", "*"], - ["vocab", "*"], - ["wordnet", "*"] - ] -} From 9132546c2191bd094dfb15269803c31a6b2dc652 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 21 Oct 2016 20:53:50 +1100 Subject: [PATCH 11/11] Update README.rst --- README.rst | 41 ++++++----------------------------------- 1 file changed, 6 insertions(+), 35 deletions(-) diff --git a/README.rst b/README.rst index 4c4a2e96b..389b4ea14 100644 --- a/README.rst +++ b/README.rst @@ -78,20 +78,11 @@ Install spaCy ============= spaCy is compatible with 64-bit CPython 2.6+/3.3+ and runs on Unix/Linux, OS X -and Windows. Source and binary packages are available via -`pip `_ and `conda `_. -If there are no binary packages for your platform available please make sure that +and Windows. Source packages are available via +`pip `_. Please make sure that you have a working build enviroment set up. See notes on Ubuntu, OS X and Windows for details. -conda ------ - -.. code:: bash - - conda config --add channels spacy # only needed once - conda install spacy - pip --- @@ -100,12 +91,6 @@ avoid modifying system state: .. code:: bash - # make sure you are using a recent pip/virtualenv version - python -m pip install -U pip virtualenv - - virtualenv .env - source .env/bin/activate - pip install spacy Python packaging is awkward at the best of times, and it's particularly tricky with @@ -120,17 +105,10 @@ English and German, named ``en`` and ``de``, are available. .. code:: bash - python -m spacy.en.download - python -m spacy.de.download - sputnik --name spacy en_glove_cc_300_1m_vectors # For better word vectors + python -m spacy.en.download all + python -m spacy.de.download all -Then check whether the model was successfully installed: - -.. code:: bash - - python -c "import spacy; spacy.load('en'); print('OK')" - -The download command fetches and installs about 500 MB of data which it installs +The download command fetches about 1 GB of data which it installs within the ``spacy`` package directory. Upgrading spaCy @@ -138,13 +116,6 @@ Upgrading spaCy To upgrade spaCy to the latest release: -conda ------ - -.. code:: bash - - conda update spacy - pip --- @@ -183,7 +154,7 @@ system. See notes on Ubuntu, OS X and Windows for details. pip install -r requirements.txt pip install -e . -Compared to regular install via pip and conda `requirements.txt `_ +Compared to regular install via pip `requirements.txt `_ additionally installs developer dependencies such as cython. Ubuntu