From f16bd3b853211aaa2853058d0c923c52ff671719 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 23 Oct 2015 10:28:05 +1100 Subject: [PATCH 01/15] * Fix FileExistsError for Python2 --- spacy/en/download.py | 58 +++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/spacy/en/download.py b/spacy/en/download.py index cfceb6590..b95288422 100644 --- a/spacy/en/download.py +++ b/spacy/en/download.py @@ -1,12 +1,17 @@ from __future__ import print_function -from os import path import sys import os import tarfile import shutil -import uget import plac +from . import uget + +try: + FileExistsError +except NameError: + FileExistsError = Exception + # TODO: Read this from the same source as the setup VERSION = '0.9.5' @@ -14,45 +19,44 @@ AWS_STORE = 'https://s3-us-west-1.amazonaws.com/media.spacynlp.com' ALL_DATA_DIR_URL = '%s/en_data_all-%s.tgz' % (AWS_STORE, VERSION) -DEST_DIR = path.join(path.dirname(path.abspath(__file__)), 'data') +DEST_DIR = os.path.dirname(os.path.abspath(__file__)) -def download_file(url, dest_dir): - return uget.download(url, dest_dir, console=sys.stdout) +def download_file(url, download_path): + return uget.download(url, download_path, console=sys.stdout) -def install_data(url, dest_dir): - filename = download_file(url, dest_dir) - t = tarfile.open(filename) - t.extractall(dest_dir) +def install_data(url, extract_path, download_path): + try: + os.makedirs(extract_path) + except FileExistsError: + pass - -def install_parser_model(url, dest_dir): - filename = download_file(url, dest_dir) - t = tarfile.open(filename, mode=":gz") - t.extractall(dest_dir) - - -def install_dep_vectors(url, dest_dir): - download_file(url, dest_dir) + tmp = download_file(url, download_path) + assert tmp == download_path + t = tarfile.open(download_path) + t.extractall(extract_path) @plac.annotations( force=("Force overwrite", "flag", "f", bool), ) def main(data_size='all', force=False): - if data_size == 'all': - data_url = ALL_DATA_DIR_URL - elif data_size == 'small': - data_url = SM_DATA_DIR_URL + filename = ALL_DATA_DIR_URL.rsplit('/', 1)[1] + download_path = os.path.join(DEST_DIR, filename) + data_path = os.path.join(DEST_DIR, 'data') - if force and path.exists(DEST_DIR): - shutil.rmtree(DEST_DIR) + if force and os.path.exists(download_path): + os.unlink(download_path) - if not os.path.exists(DEST_DIR): - os.makedirs(DEST_DIR) + if force and os.path.exists(data_path): + shutil.rmtree(data_path) - install_data(data_url, DEST_DIR) + if os.path.exists(data_path): + print('data already installed at %s, overwrite with --force' % DEST_DIR) + sys.exit(1) + + install_data(ALL_DATA_DIR_URL, DEST_DIR, download_path) if __name__ == '__main__': From dac8fe7bdbdc9bb13730a9da5ced9fac78bb9262 Mon Sep 17 00:00:00 2001 From: Chris DuBois Date: Fri, 23 Oct 2015 22:18:47 -0700 Subject: [PATCH 02/15] Add __reduce__ to Tokenizer so that English pickles. - Add tests to test_pickle and test_tokenizer that save to tempfiles. --- spacy/tokenizer.pxd | 1 + spacy/tokenizer.pyx | 10 ++++++++++ tests/test_pickle.py | 18 +++++++++++++++--- tests/tokenizer/test_tokenizer.py | 15 ++++++++++++++- 4 files changed, 40 insertions(+), 4 deletions(-) diff --git a/spacy/tokenizer.pxd b/spacy/tokenizer.pxd index 9d60d2a6e..c07e87bbc 100644 --- a/spacy/tokenizer.pxd +++ b/spacy/tokenizer.pxd @@ -19,6 +19,7 @@ cdef class Tokenizer: cdef object _prefix_re cdef object _suffix_re cdef object _infix_re + cdef object _rules cpdef Doc tokens_from_list(self, list strings) diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index ef9c26c01..f0d664c09 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -29,6 +29,16 @@ cdef class Tokenizer: self._infix_re = infix_re self.vocab = vocab self._load_special_tokenization(rules) + self._rules = rules + + def __reduce__(self): + args = (self.vocab, + self._rules, + self._prefix_re, + self._suffix_re, + self._infix_re) + + return (self.__class__, args, None, None) @classmethod def from_dir(cls, Vocab vocab, data_dir): diff --git a/tests/test_pickle.py b/tests/test_pickle.py index a3d54c627..540e54486 100644 --- a/tests/test_pickle.py +++ b/tests/test_pickle.py @@ -1,8 +1,9 @@ -import pytest -import io import cloudpickle +import io +import os import pickle - +import pytest +import tempfile @pytest.mark.models def test_pickle_english(EN): @@ -12,4 +13,15 @@ def test_pickle_english(EN): file_.seek(0) loaded = pickle.load(file_) + assert loaded is not None +@pytest.mark.models +def test_cloudpickle_to_file(EN): + f = tempfile.NamedTemporaryFile(delete=False) + p = cloudpickle.CloudPickler(f) + p.dump(EN) + f.close() + loaded_en = cloudpickle.load(open(f.name)) + os.unlink(f.name) + doc = loaded_en(unicode('test parse')) + assert len(doc) == 2 diff --git a/tests/tokenizer/test_tokenizer.py b/tests/tokenizer/test_tokenizer.py index abf09dd03..be93b9953 100644 --- a/tests/tokenizer/test_tokenizer.py +++ b/tests/tokenizer/test_tokenizer.py @@ -2,6 +2,19 @@ from __future__ import unicode_literals import pytest +import io +import pickle +import cloudpickle +import tempfile + + +@pytest.mark.models +def test_pickle(en_tokenizer): + file_ = io.BytesIO() + cloudpickle.dump(en_tokenizer, file_) + file_.seek(0) + loaded = pickle.load(file_) + assert loaded is not None def test_no_word(en_tokenizer): @@ -108,7 +121,7 @@ def test_cnts5(en_tokenizer): # text = """Today is Tuesday.Mr.""" # tokens = en_tokenizer(text) # assert len(tokens) == 5 -# assert [w.orth_ for w in tokens] == ['Today', 'is', 'Tuesday', '.', 'Mr.'] +# assert [w.orth_ for w in tokens] == ['Today', 'is', 'Tuesday', '.', 'Mr.'] def test_cnts6(en_tokenizer): From 63af820609257f2d86c4db0300f414939daff280 Mon Sep 17 00:00:00 2001 From: Henning Peters Date: Sat, 24 Oct 2015 10:28:37 +0200 Subject: [PATCH 03/15] add travis ci build status --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8eb39ba01..b5801392b 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ - - +[![Travis CI status](https://travis-ci.org/honnibal/spaCy.svg?branch=master)](https://travis-ci.org/honnibal/spaCy) +![Appveyor status](https://ci.appveyor.com/api/projects/status/aoe3dtkep36rdaqf?svg=true) spaCy: Industrial-strength NLP ============================== From c27327d92b81cb72550215fe681545e30d5e6998 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 25 Oct 2015 23:15:51 +1100 Subject: [PATCH 04/15] * Rework the docs compilation function --- fabfile.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/fabfile.py b/fabfile.py index 61a2dcdc3..f4fa28169 100644 --- a/fabfile.py +++ b/fabfile.py @@ -1,9 +1,12 @@ +from __future__ import print_function + from fabric.api import local, lcd, env, settings, prefix from os.path import exists as file_exists from fabtools.python import virtualenv from os import path import os import shutil +from pathlib import Path PWD = path.dirname(__file__) @@ -56,10 +59,25 @@ def prebuild(build_dir='/tmp/build_spacy'): local('fab test') local('python setup.py sdist') + def docs(): - with virtualenv(VENV_DIR): - with lcd(path.join(path.dirname(__file__), 'docs')): - local('make html') + def jade(source_name, out_dir): + pwd = path.join(path.dirname(__file__), 'website') + jade_loc = path.join(pwd, 'src', 'jade', source_name) + out_loc = path.join(pwd, 'site', out_dir) + local('jade -P %s --out %s' % (jade_loc, out_loc)) + + jade('home/index.jade', '') + jade('docs/index.jade', 'docs/') + jade('blog/index.jade', 'blog/') + jade('tutorials/index.jade', 'tutorials/') + + for post_dir in (Path(__file__).parent / 'website' / 'src' / 'jade' / 'blog').iterdir(): + if post_dir.is_dir() \ + and (post_dir / 'index.jade').exists() \ + and (post_dir / 'meta.jade').exists(): + jade(str(post_dir / 'index.jade'), path.join('blogs', post_dir.parts[-1])) + def publish(version): with virtualenv(VENV_DIR): @@ -68,7 +86,7 @@ def publish(version): local('git push origin %s' % version) local('python setup.py sdist') local('python setup.py register') - local('twine upload dist/%s.tar.gz' % version) + local('twine upload dist/spacy-%s.tar.gz' % version) def env(lang="python2.7"): From e1810c82ad5d10211e937efd36a29d58cebd19e1 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 25 Oct 2015 23:27:37 +1100 Subject: [PATCH 05/15] * Fix docs function in fabfile --- fabfile.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fabfile.py b/fabfile.py index f4fa28169..6080ff1da 100644 --- a/fabfile.py +++ b/fabfile.py @@ -67,6 +67,9 @@ def docs(): out_loc = path.join(pwd, 'site', out_dir) local('jade -P %s --out %s' % (jade_loc, out_loc)) + with virtualenv(VENV_DIR): + local('./website/create_code_samples tests/website/ website/src/code/')t + jade('home/index.jade', '') jade('docs/index.jade', 'docs/') jade('blog/index.jade', 'blog/') From 781ea3e0b97f39fb5770134f7bf763140f09c0b0 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 25 Oct 2015 23:27:55 +1100 Subject: [PATCH 06/15] * Add missing import for website/ceate_code_samples --- website/create_code_samples | 1 + 1 file changed, 1 insertion(+) diff --git a/website/create_code_samples b/website/create_code_samples index 659a3d71c..2b9938edb 100755 --- a/website/create_code_samples +++ b/website/create_code_samples @@ -4,6 +4,7 @@ from __future__ import unicode_literals import os import ast import io +import re import plac From a3dae9d9ac179166efb2e7d8f51d9159e4c9d4d9 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 25 Oct 2015 23:36:27 +1100 Subject: [PATCH 07/15] * Switch website compilation to fab docs command instead of Makefile --- website/Makefile | 52 ------------------------------------------------ 1 file changed, 52 deletions(-) delete mode 100644 website/Makefile diff --git a/website/Makefile b/website/Makefile deleted file mode 100644 index 940a8182c..000000000 --- a/website/Makefile +++ /dev/null @@ -1,52 +0,0 @@ -all: src/code site - -src/code: - mkdir -p src/code/ - ./create_code_samples ../tests/website/ src/code/ - -site: site/index.html site/blog/ site/docs/ site/license/ site/blog/introducing-spacy/ site/blog/parsing-english-in-python/ site/blog/part-of-speech-POS-tagger-in-python/ site/tutorials/twitter-filter/ site/tutorials/syntax-search/ site/tutorials/mark-adverbs/ site/blog/writing-c-in-cython/ site/blog/how-spacy-works/ - -site/index.html: src/jade/header.jade src/jade/*.jade - jade -P src/jade/home/index.jade --out site/ - -site/docs/: src/jade/docs/*.jade src/jade/header.jade - jade -P src/jade/docs/index.jade --out $@ - -site/blog/: src/jade/blog/*.jade site/blog/*/ site/tutorials/*/ src/jade/header.jade - jade -P src/jade/blog/index.jade --out $@ - -site/tutorials/: src/jade/tutorials/*.jade site/tutorials/*/ src/jade/header.jade - jade -P src/jade/tutorials/index.jade --out $@ - -site/blog/parsing-english-in-python/: src/jade/blog/parsing-english-in-python/*.jade src/jade/header.jade - jade -P $< --out $@ - - -site/blog/writing-c-in-cython/: src/jade/blog/writing-c-in-cython/*.jade src/jade/header.jade - jade -P $< --out $@ - - -site/blog/part-of-speech-POS-tagger-in-python/: src/jade/blog/part-of-speech-POS-tagger-in-python/*.jade src/jade/header.jade - jade -P $< --out $@ - -site/blog/introducing-spacy/: src/jade/blog/introducing-spacy/*.jade src/jade/header.jade - jade -P $< --out $@ - -site/blog/displacy/: src/jade/blog/displacy/*.jade src/jade/header.jade - jade -P $< --out $@ - -site/blog/eli5-computers-learn-reading: src/jade/blog/eli5-computers-learn-reading/*.jade src/jade/header.jade - jade -P $< --out $@ - -site/tutorials/mark-adverbs/: src/jade/tutorials/mark-adverbs/*.jade src/jade/header.jade - jade -P $< --out $@ - -site/blog/how-spacy-works/: src/jade/blog/how-spacy-works/*.jade src/jade/header.jade - jade -P $< --out $@ - -site/tutorials/syntax-search/: src/jade/tutorials/syntax-search/*.jade src/jade/header.jade - jade -P $< --out $@ - -site/tutorials/twitter-filter/: src/jade/tutorials/twitter-filter/*.jade src/jade/header.jade - jade -P $< --out $@ - From 4e16f9e435003cbb413176b8b9fae9f00561b672 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 26 Oct 2015 00:07:12 +1100 Subject: [PATCH 08/15] * Move tests underneath spacy/ --- {tests => spacy/tests}/conftest.py | 0 {tests => spacy/tests}/de/test_de.py | 0 {tests => spacy/tests}/matcher/test_matcher_bugfixes.py | 0 {tests => spacy/tests}/morphology/test_morphology_pickle.py | 0 {tests => spacy/tests}/munge/test_align.py | 0 {tests => spacy/tests}/munge/test_bad_periods.py | 0 {tests => spacy/tests}/munge/test_detokenize.py | 0 {tests => spacy/tests}/munge/test_lev_align.py | 0 {tests => spacy/tests}/munge/test_onto_ner.py | 0 {tests => spacy/tests}/parser/test_base_nps.py | 0 {tests => spacy/tests}/parser/test_conjuncts.py | 0 {tests => spacy/tests}/parser/test_initial_actions_parse.py | 0 {tests => spacy/tests}/parser/test_ner.py | 0 {tests => spacy/tests}/parser/test_parse.py | 0 {tests => spacy/tests}/parser/test_parse_navigate.py | 0 {tests => spacy/tests}/parser/test_parser_pickle.py | 0 {tests => spacy/tests}/parser/test_sbd.py | 0 {tests => spacy/tests}/parser/test_space_attachment.py | 0 {tests => spacy/tests}/parser/test_subtree.py | 0 {tests => spacy/tests}/prag_sbd.py | 0 {tests => spacy/tests}/serialize/test_codecs.py | 0 {tests => spacy/tests}/serialize/test_huffman.py | 0 {tests => spacy/tests}/serialize/test_io.py | 0 {tests => spacy/tests}/serialize/test_packer.py | 0 {tests => spacy/tests}/spans/conftest.py | 0 {tests => spacy/tests}/spans/test_merge.py | 0 {tests => spacy/tests}/spans/test_span.py | 0 {tests => spacy/tests}/spans/test_times.py | 0 {tests => spacy/tests}/sun.tokens | 0 {tests => spacy/tests}/sun.txt | 0 {tests => spacy/tests}/tagger/test_add_lemmas.py | 0 {tests => spacy/tests}/tagger/test_lemmatizer.py | 0 {tests => spacy/tests}/tagger/test_morph_exceptions.py | 0 {tests => spacy/tests}/tagger/test_spaces.py | 0 {tests => spacy/tests}/tagger/test_tag_names.py | 0 {tests => spacy/tests}/test_basic_create.py | 0 {tests => spacy/tests}/test_basic_load.py | 0 {tests => spacy/tests}/test_docs.py | 0 {tests => spacy/tests}/test_matcher.py | 0 {tests => spacy/tests}/test_pickle.py | 0 {tests => spacy/tests}/tokenizer.sed | 0 {tests => spacy/tests}/tokenizer/conftest.py | 0 {tests => spacy/tests}/tokenizer/test_contractions.py | 0 {tests => spacy/tests}/tokenizer/test_emoticons.py | 0 {tests => spacy/tests}/tokenizer/test_indices.py | 0 {tests => spacy/tests}/tokenizer/test_infix.py | 0 {tests => spacy/tests}/tokenizer/test_only_punct.py | 0 {tests => spacy/tests}/tokenizer/test_post_punct.py | 0 {tests => spacy/tests}/tokenizer/test_pre_punct.py | 0 {tests => spacy/tests}/tokenizer/test_special_affix.py | 0 {tests => spacy/tests}/tokenizer/test_string_loading.py | 0 {tests => spacy/tests}/tokenizer/test_surround_punct.py | 0 {tests => spacy/tests}/tokenizer/test_tokenizer.py | 0 {tests => spacy/tests}/tokenizer/test_tokens_from_list.py | 0 {tests => spacy/tests}/tokenizer/test_whitespace.py | 0 {tests => spacy/tests}/tokenizer/test_wiki_sun.py | 0 {tests => spacy/tests}/tokens/test_array.py | 0 {tests => spacy/tests}/tokens/test_token.py | 0 {tests => spacy/tests}/tokens/test_token_api.py | 0 {tests => spacy/tests}/tokens/test_token_references.py | 0 {tests => spacy/tests}/tokens/test_tokens_api.py | 0 {tests => spacy/tests}/tokens/test_vec.py | 0 {tests => spacy/tests}/vectors/test_vectors.py | 0 {tests => spacy/tests}/vocab/conftest.py | 0 {tests => spacy/tests}/vocab/test_asciify.py | 0 {tests => spacy/tests}/vocab/test_flag_features.py | 0 {tests => spacy/tests}/vocab/test_intern.py | 0 {tests => spacy/tests}/vocab/test_is_punct.py | 0 {tests => spacy/tests}/vocab/test_lexeme_flags.py | 0 {tests => spacy/tests}/vocab/test_number.py | 0 {tests => spacy/tests}/vocab/test_shape.py | 0 {tests => spacy/tests}/vocab/test_urlish.py | 0 {tests => spacy/tests}/vocab/test_vocab.py | 0 {tests => spacy/tests}/website/conftest.py | 0 {tests => spacy/tests}/website/test_api.py | 0 {tests => spacy/tests}/website/test_home.py | 0 76 files changed, 0 insertions(+), 0 deletions(-) rename {tests => spacy/tests}/conftest.py (100%) rename {tests => spacy/tests}/de/test_de.py (100%) rename {tests => spacy/tests}/matcher/test_matcher_bugfixes.py (100%) rename {tests => spacy/tests}/morphology/test_morphology_pickle.py (100%) rename {tests => spacy/tests}/munge/test_align.py (100%) rename {tests => spacy/tests}/munge/test_bad_periods.py (100%) rename {tests => spacy/tests}/munge/test_detokenize.py (100%) rename {tests => spacy/tests}/munge/test_lev_align.py (100%) rename {tests => spacy/tests}/munge/test_onto_ner.py (100%) rename {tests => spacy/tests}/parser/test_base_nps.py (100%) rename {tests => spacy/tests}/parser/test_conjuncts.py (100%) rename {tests => spacy/tests}/parser/test_initial_actions_parse.py (100%) rename {tests => spacy/tests}/parser/test_ner.py (100%) rename {tests => spacy/tests}/parser/test_parse.py (100%) rename {tests => spacy/tests}/parser/test_parse_navigate.py (100%) rename {tests => spacy/tests}/parser/test_parser_pickle.py (100%) rename {tests => spacy/tests}/parser/test_sbd.py (100%) rename {tests => spacy/tests}/parser/test_space_attachment.py (100%) rename {tests => spacy/tests}/parser/test_subtree.py (100%) rename {tests => spacy/tests}/prag_sbd.py (100%) rename {tests => spacy/tests}/serialize/test_codecs.py (100%) rename {tests => spacy/tests}/serialize/test_huffman.py (100%) rename {tests => spacy/tests}/serialize/test_io.py (100%) rename {tests => spacy/tests}/serialize/test_packer.py (100%) rename {tests => spacy/tests}/spans/conftest.py (100%) rename {tests => spacy/tests}/spans/test_merge.py (100%) rename {tests => spacy/tests}/spans/test_span.py (100%) rename {tests => spacy/tests}/spans/test_times.py (100%) rename {tests => spacy/tests}/sun.tokens (100%) rename {tests => spacy/tests}/sun.txt (100%) rename {tests => spacy/tests}/tagger/test_add_lemmas.py (100%) rename {tests => spacy/tests}/tagger/test_lemmatizer.py (100%) rename {tests => spacy/tests}/tagger/test_morph_exceptions.py (100%) rename {tests => spacy/tests}/tagger/test_spaces.py (100%) rename {tests => spacy/tests}/tagger/test_tag_names.py (100%) rename {tests => spacy/tests}/test_basic_create.py (100%) rename {tests => spacy/tests}/test_basic_load.py (100%) rename {tests => spacy/tests}/test_docs.py (100%) rename {tests => spacy/tests}/test_matcher.py (100%) rename {tests => spacy/tests}/test_pickle.py (100%) rename {tests => spacy/tests}/tokenizer.sed (100%) rename {tests => spacy/tests}/tokenizer/conftest.py (100%) rename {tests => spacy/tests}/tokenizer/test_contractions.py (100%) rename {tests => spacy/tests}/tokenizer/test_emoticons.py (100%) rename {tests => spacy/tests}/tokenizer/test_indices.py (100%) rename {tests => spacy/tests}/tokenizer/test_infix.py (100%) rename {tests => spacy/tests}/tokenizer/test_only_punct.py (100%) rename {tests => spacy/tests}/tokenizer/test_post_punct.py (100%) rename {tests => spacy/tests}/tokenizer/test_pre_punct.py (100%) rename {tests => spacy/tests}/tokenizer/test_special_affix.py (100%) rename {tests => spacy/tests}/tokenizer/test_string_loading.py (100%) rename {tests => spacy/tests}/tokenizer/test_surround_punct.py (100%) rename {tests => spacy/tests}/tokenizer/test_tokenizer.py (100%) rename {tests => spacy/tests}/tokenizer/test_tokens_from_list.py (100%) rename {tests => spacy/tests}/tokenizer/test_whitespace.py (100%) rename {tests => spacy/tests}/tokenizer/test_wiki_sun.py (100%) rename {tests => spacy/tests}/tokens/test_array.py (100%) rename {tests => spacy/tests}/tokens/test_token.py (100%) rename {tests => spacy/tests}/tokens/test_token_api.py (100%) rename {tests => spacy/tests}/tokens/test_token_references.py (100%) rename {tests => spacy/tests}/tokens/test_tokens_api.py (100%) rename {tests => spacy/tests}/tokens/test_vec.py (100%) rename {tests => spacy/tests}/vectors/test_vectors.py (100%) rename {tests => spacy/tests}/vocab/conftest.py (100%) rename {tests => spacy/tests}/vocab/test_asciify.py (100%) rename {tests => spacy/tests}/vocab/test_flag_features.py (100%) rename {tests => spacy/tests}/vocab/test_intern.py (100%) rename {tests => spacy/tests}/vocab/test_is_punct.py (100%) rename {tests => spacy/tests}/vocab/test_lexeme_flags.py (100%) rename {tests => spacy/tests}/vocab/test_number.py (100%) rename {tests => spacy/tests}/vocab/test_shape.py (100%) rename {tests => spacy/tests}/vocab/test_urlish.py (100%) rename {tests => spacy/tests}/vocab/test_vocab.py (100%) rename {tests => spacy/tests}/website/conftest.py (100%) rename {tests => spacy/tests}/website/test_api.py (100%) rename {tests => spacy/tests}/website/test_home.py (100%) diff --git a/tests/conftest.py b/spacy/tests/conftest.py similarity index 100% rename from tests/conftest.py rename to spacy/tests/conftest.py diff --git a/tests/de/test_de.py b/spacy/tests/de/test_de.py similarity index 100% rename from tests/de/test_de.py rename to spacy/tests/de/test_de.py diff --git a/tests/matcher/test_matcher_bugfixes.py b/spacy/tests/matcher/test_matcher_bugfixes.py similarity index 100% rename from tests/matcher/test_matcher_bugfixes.py rename to spacy/tests/matcher/test_matcher_bugfixes.py diff --git a/tests/morphology/test_morphology_pickle.py b/spacy/tests/morphology/test_morphology_pickle.py similarity index 100% rename from tests/morphology/test_morphology_pickle.py rename to spacy/tests/morphology/test_morphology_pickle.py diff --git a/tests/munge/test_align.py b/spacy/tests/munge/test_align.py similarity index 100% rename from tests/munge/test_align.py rename to spacy/tests/munge/test_align.py diff --git a/tests/munge/test_bad_periods.py b/spacy/tests/munge/test_bad_periods.py similarity index 100% rename from tests/munge/test_bad_periods.py rename to spacy/tests/munge/test_bad_periods.py diff --git a/tests/munge/test_detokenize.py b/spacy/tests/munge/test_detokenize.py similarity index 100% rename from tests/munge/test_detokenize.py rename to spacy/tests/munge/test_detokenize.py diff --git a/tests/munge/test_lev_align.py b/spacy/tests/munge/test_lev_align.py similarity index 100% rename from tests/munge/test_lev_align.py rename to spacy/tests/munge/test_lev_align.py diff --git a/tests/munge/test_onto_ner.py b/spacy/tests/munge/test_onto_ner.py similarity index 100% rename from tests/munge/test_onto_ner.py rename to spacy/tests/munge/test_onto_ner.py diff --git a/tests/parser/test_base_nps.py b/spacy/tests/parser/test_base_nps.py similarity index 100% rename from tests/parser/test_base_nps.py rename to spacy/tests/parser/test_base_nps.py diff --git a/tests/parser/test_conjuncts.py b/spacy/tests/parser/test_conjuncts.py similarity index 100% rename from tests/parser/test_conjuncts.py rename to spacy/tests/parser/test_conjuncts.py diff --git a/tests/parser/test_initial_actions_parse.py b/spacy/tests/parser/test_initial_actions_parse.py similarity index 100% rename from tests/parser/test_initial_actions_parse.py rename to spacy/tests/parser/test_initial_actions_parse.py diff --git a/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py similarity index 100% rename from tests/parser/test_ner.py rename to spacy/tests/parser/test_ner.py diff --git a/tests/parser/test_parse.py b/spacy/tests/parser/test_parse.py similarity index 100% rename from tests/parser/test_parse.py rename to spacy/tests/parser/test_parse.py diff --git a/tests/parser/test_parse_navigate.py b/spacy/tests/parser/test_parse_navigate.py similarity index 100% rename from tests/parser/test_parse_navigate.py rename to spacy/tests/parser/test_parse_navigate.py diff --git a/tests/parser/test_parser_pickle.py b/spacy/tests/parser/test_parser_pickle.py similarity index 100% rename from tests/parser/test_parser_pickle.py rename to spacy/tests/parser/test_parser_pickle.py diff --git a/tests/parser/test_sbd.py b/spacy/tests/parser/test_sbd.py similarity index 100% rename from tests/parser/test_sbd.py rename to spacy/tests/parser/test_sbd.py diff --git a/tests/parser/test_space_attachment.py b/spacy/tests/parser/test_space_attachment.py similarity index 100% rename from tests/parser/test_space_attachment.py rename to spacy/tests/parser/test_space_attachment.py diff --git a/tests/parser/test_subtree.py b/spacy/tests/parser/test_subtree.py similarity index 100% rename from tests/parser/test_subtree.py rename to spacy/tests/parser/test_subtree.py diff --git a/tests/prag_sbd.py b/spacy/tests/prag_sbd.py similarity index 100% rename from tests/prag_sbd.py rename to spacy/tests/prag_sbd.py diff --git a/tests/serialize/test_codecs.py b/spacy/tests/serialize/test_codecs.py similarity index 100% rename from tests/serialize/test_codecs.py rename to spacy/tests/serialize/test_codecs.py diff --git a/tests/serialize/test_huffman.py b/spacy/tests/serialize/test_huffman.py similarity index 100% rename from tests/serialize/test_huffman.py rename to spacy/tests/serialize/test_huffman.py diff --git a/tests/serialize/test_io.py b/spacy/tests/serialize/test_io.py similarity index 100% rename from tests/serialize/test_io.py rename to spacy/tests/serialize/test_io.py diff --git a/tests/serialize/test_packer.py b/spacy/tests/serialize/test_packer.py similarity index 100% rename from tests/serialize/test_packer.py rename to spacy/tests/serialize/test_packer.py diff --git a/tests/spans/conftest.py b/spacy/tests/spans/conftest.py similarity index 100% rename from tests/spans/conftest.py rename to spacy/tests/spans/conftest.py diff --git a/tests/spans/test_merge.py b/spacy/tests/spans/test_merge.py similarity index 100% rename from tests/spans/test_merge.py rename to spacy/tests/spans/test_merge.py diff --git a/tests/spans/test_span.py b/spacy/tests/spans/test_span.py similarity index 100% rename from tests/spans/test_span.py rename to spacy/tests/spans/test_span.py diff --git a/tests/spans/test_times.py b/spacy/tests/spans/test_times.py similarity index 100% rename from tests/spans/test_times.py rename to spacy/tests/spans/test_times.py diff --git a/tests/sun.tokens b/spacy/tests/sun.tokens similarity index 100% rename from tests/sun.tokens rename to spacy/tests/sun.tokens diff --git a/tests/sun.txt b/spacy/tests/sun.txt similarity index 100% rename from tests/sun.txt rename to spacy/tests/sun.txt diff --git a/tests/tagger/test_add_lemmas.py b/spacy/tests/tagger/test_add_lemmas.py similarity index 100% rename from tests/tagger/test_add_lemmas.py rename to spacy/tests/tagger/test_add_lemmas.py diff --git a/tests/tagger/test_lemmatizer.py b/spacy/tests/tagger/test_lemmatizer.py similarity index 100% rename from tests/tagger/test_lemmatizer.py rename to spacy/tests/tagger/test_lemmatizer.py diff --git a/tests/tagger/test_morph_exceptions.py b/spacy/tests/tagger/test_morph_exceptions.py similarity index 100% rename from tests/tagger/test_morph_exceptions.py rename to spacy/tests/tagger/test_morph_exceptions.py diff --git a/tests/tagger/test_spaces.py b/spacy/tests/tagger/test_spaces.py similarity index 100% rename from tests/tagger/test_spaces.py rename to spacy/tests/tagger/test_spaces.py diff --git a/tests/tagger/test_tag_names.py b/spacy/tests/tagger/test_tag_names.py similarity index 100% rename from tests/tagger/test_tag_names.py rename to spacy/tests/tagger/test_tag_names.py diff --git a/tests/test_basic_create.py b/spacy/tests/test_basic_create.py similarity index 100% rename from tests/test_basic_create.py rename to spacy/tests/test_basic_create.py diff --git a/tests/test_basic_load.py b/spacy/tests/test_basic_load.py similarity index 100% rename from tests/test_basic_load.py rename to spacy/tests/test_basic_load.py diff --git a/tests/test_docs.py b/spacy/tests/test_docs.py similarity index 100% rename from tests/test_docs.py rename to spacy/tests/test_docs.py diff --git a/tests/test_matcher.py b/spacy/tests/test_matcher.py similarity index 100% rename from tests/test_matcher.py rename to spacy/tests/test_matcher.py diff --git a/tests/test_pickle.py b/spacy/tests/test_pickle.py similarity index 100% rename from tests/test_pickle.py rename to spacy/tests/test_pickle.py diff --git a/tests/tokenizer.sed b/spacy/tests/tokenizer.sed similarity index 100% rename from tests/tokenizer.sed rename to spacy/tests/tokenizer.sed diff --git a/tests/tokenizer/conftest.py b/spacy/tests/tokenizer/conftest.py similarity index 100% rename from tests/tokenizer/conftest.py rename to spacy/tests/tokenizer/conftest.py diff --git a/tests/tokenizer/test_contractions.py b/spacy/tests/tokenizer/test_contractions.py similarity index 100% rename from tests/tokenizer/test_contractions.py rename to spacy/tests/tokenizer/test_contractions.py diff --git a/tests/tokenizer/test_emoticons.py b/spacy/tests/tokenizer/test_emoticons.py similarity index 100% rename from tests/tokenizer/test_emoticons.py rename to spacy/tests/tokenizer/test_emoticons.py diff --git a/tests/tokenizer/test_indices.py b/spacy/tests/tokenizer/test_indices.py similarity index 100% rename from tests/tokenizer/test_indices.py rename to spacy/tests/tokenizer/test_indices.py diff --git a/tests/tokenizer/test_infix.py b/spacy/tests/tokenizer/test_infix.py similarity index 100% rename from tests/tokenizer/test_infix.py rename to spacy/tests/tokenizer/test_infix.py diff --git a/tests/tokenizer/test_only_punct.py b/spacy/tests/tokenizer/test_only_punct.py similarity index 100% rename from tests/tokenizer/test_only_punct.py rename to spacy/tests/tokenizer/test_only_punct.py diff --git a/tests/tokenizer/test_post_punct.py b/spacy/tests/tokenizer/test_post_punct.py similarity index 100% rename from tests/tokenizer/test_post_punct.py rename to spacy/tests/tokenizer/test_post_punct.py diff --git a/tests/tokenizer/test_pre_punct.py b/spacy/tests/tokenizer/test_pre_punct.py similarity index 100% rename from tests/tokenizer/test_pre_punct.py rename to spacy/tests/tokenizer/test_pre_punct.py diff --git a/tests/tokenizer/test_special_affix.py b/spacy/tests/tokenizer/test_special_affix.py similarity index 100% rename from tests/tokenizer/test_special_affix.py rename to spacy/tests/tokenizer/test_special_affix.py diff --git a/tests/tokenizer/test_string_loading.py b/spacy/tests/tokenizer/test_string_loading.py similarity index 100% rename from tests/tokenizer/test_string_loading.py rename to spacy/tests/tokenizer/test_string_loading.py diff --git a/tests/tokenizer/test_surround_punct.py b/spacy/tests/tokenizer/test_surround_punct.py similarity index 100% rename from tests/tokenizer/test_surround_punct.py rename to spacy/tests/tokenizer/test_surround_punct.py diff --git a/tests/tokenizer/test_tokenizer.py b/spacy/tests/tokenizer/test_tokenizer.py similarity index 100% rename from tests/tokenizer/test_tokenizer.py rename to spacy/tests/tokenizer/test_tokenizer.py diff --git a/tests/tokenizer/test_tokens_from_list.py b/spacy/tests/tokenizer/test_tokens_from_list.py similarity index 100% rename from tests/tokenizer/test_tokens_from_list.py rename to spacy/tests/tokenizer/test_tokens_from_list.py diff --git a/tests/tokenizer/test_whitespace.py b/spacy/tests/tokenizer/test_whitespace.py similarity index 100% rename from tests/tokenizer/test_whitespace.py rename to spacy/tests/tokenizer/test_whitespace.py diff --git a/tests/tokenizer/test_wiki_sun.py b/spacy/tests/tokenizer/test_wiki_sun.py similarity index 100% rename from tests/tokenizer/test_wiki_sun.py rename to spacy/tests/tokenizer/test_wiki_sun.py diff --git a/tests/tokens/test_array.py b/spacy/tests/tokens/test_array.py similarity index 100% rename from tests/tokens/test_array.py rename to spacy/tests/tokens/test_array.py diff --git a/tests/tokens/test_token.py b/spacy/tests/tokens/test_token.py similarity index 100% rename from tests/tokens/test_token.py rename to spacy/tests/tokens/test_token.py diff --git a/tests/tokens/test_token_api.py b/spacy/tests/tokens/test_token_api.py similarity index 100% rename from tests/tokens/test_token_api.py rename to spacy/tests/tokens/test_token_api.py diff --git a/tests/tokens/test_token_references.py b/spacy/tests/tokens/test_token_references.py similarity index 100% rename from tests/tokens/test_token_references.py rename to spacy/tests/tokens/test_token_references.py diff --git a/tests/tokens/test_tokens_api.py b/spacy/tests/tokens/test_tokens_api.py similarity index 100% rename from tests/tokens/test_tokens_api.py rename to spacy/tests/tokens/test_tokens_api.py diff --git a/tests/tokens/test_vec.py b/spacy/tests/tokens/test_vec.py similarity index 100% rename from tests/tokens/test_vec.py rename to spacy/tests/tokens/test_vec.py diff --git a/tests/vectors/test_vectors.py b/spacy/tests/vectors/test_vectors.py similarity index 100% rename from tests/vectors/test_vectors.py rename to spacy/tests/vectors/test_vectors.py diff --git a/tests/vocab/conftest.py b/spacy/tests/vocab/conftest.py similarity index 100% rename from tests/vocab/conftest.py rename to spacy/tests/vocab/conftest.py diff --git a/tests/vocab/test_asciify.py b/spacy/tests/vocab/test_asciify.py similarity index 100% rename from tests/vocab/test_asciify.py rename to spacy/tests/vocab/test_asciify.py diff --git a/tests/vocab/test_flag_features.py b/spacy/tests/vocab/test_flag_features.py similarity index 100% rename from tests/vocab/test_flag_features.py rename to spacy/tests/vocab/test_flag_features.py diff --git a/tests/vocab/test_intern.py b/spacy/tests/vocab/test_intern.py similarity index 100% rename from tests/vocab/test_intern.py rename to spacy/tests/vocab/test_intern.py diff --git a/tests/vocab/test_is_punct.py b/spacy/tests/vocab/test_is_punct.py similarity index 100% rename from tests/vocab/test_is_punct.py rename to spacy/tests/vocab/test_is_punct.py diff --git a/tests/vocab/test_lexeme_flags.py b/spacy/tests/vocab/test_lexeme_flags.py similarity index 100% rename from tests/vocab/test_lexeme_flags.py rename to spacy/tests/vocab/test_lexeme_flags.py diff --git a/tests/vocab/test_number.py b/spacy/tests/vocab/test_number.py similarity index 100% rename from tests/vocab/test_number.py rename to spacy/tests/vocab/test_number.py diff --git a/tests/vocab/test_shape.py b/spacy/tests/vocab/test_shape.py similarity index 100% rename from tests/vocab/test_shape.py rename to spacy/tests/vocab/test_shape.py diff --git a/tests/vocab/test_urlish.py b/spacy/tests/vocab/test_urlish.py similarity index 100% rename from tests/vocab/test_urlish.py rename to spacy/tests/vocab/test_urlish.py diff --git a/tests/vocab/test_vocab.py b/spacy/tests/vocab/test_vocab.py similarity index 100% rename from tests/vocab/test_vocab.py rename to spacy/tests/vocab/test_vocab.py diff --git a/tests/website/conftest.py b/spacy/tests/website/conftest.py similarity index 100% rename from tests/website/conftest.py rename to spacy/tests/website/conftest.py diff --git a/tests/website/test_api.py b/spacy/tests/website/test_api.py similarity index 100% rename from tests/website/test_api.py rename to spacy/tests/website/test_api.py diff --git a/tests/website/test_home.py b/spacy/tests/website/test_home.py similarity index 100% rename from tests/website/test_home.py rename to spacy/tests/website/test_home.py From c0e25bb5e2c60b22e9524c1226bba16216701691 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 26 Oct 2015 00:07:47 +1100 Subject: [PATCH 09/15] * Fix syntax error in fabfile --- fabfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fabfile.py b/fabfile.py index 6080ff1da..8def13386 100644 --- a/fabfile.py +++ b/fabfile.py @@ -68,7 +68,7 @@ def docs(): local('jade -P %s --out %s' % (jade_loc, out_loc)) with virtualenv(VENV_DIR): - local('./website/create_code_samples tests/website/ website/src/code/')t + local('./website/create_code_samples tests/website/ website/src/code/') jade('home/index.jade', '') jade('docs/index.jade', 'docs/') From a78dff48bb793760c6d3f5aabad7862460c12c1d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 26 Oct 2015 00:08:12 +1100 Subject: [PATCH 10/15] * Update comparisons page, with minor tweaks --- website/src/jade/home/_comparisons.jade | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/src/jade/home/_comparisons.jade b/website/src/jade/home/_comparisons.jade index 0017a0ec8..ca0bd3077 100644 --- a/website/src/jade/home/_comparisons.jade +++ b/website/src/jade/home/_comparisons.jade @@ -35,7 +35,7 @@ mixin comparison(name) +columns("System", "Language", "Accuracy", "Speed") tbody - +row("spaCy v0.93", "Cython", "91.8", "13,000 (est.)") + +row("spaCy v0.97", "Cython", "91.8", "13,000 (est.)") +row("ClearNLP", "Java", "91.7", "10,271") +row("CoreNLP", "Java", "89.6", "8,602") +row("MATE", "Java", "92.5", "550") @@ -80,7 +80,7 @@ mixin comparison(name) li.con English only li.pro Python .col - h5 CoreNLP features: + h5 CoreNLP ul li.pro More accurate NER li.pro Coreference resolution @@ -103,7 +103,7 @@ mixin comparison(name) li.pro Python .col - h5 ClearNLP: + h5 ClearNLP ul li.pro Semantic Role Labelling li.pro Model for biology/life-science From 9dc94b353d4af64486329ea82fdda97d98a9a046 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 26 Oct 2015 00:08:32 +1100 Subject: [PATCH 11/15] * Add release notes for v0.97 --- website/src/jade/home/_installation.jade | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/website/src/jade/home/_installation.jade b/website/src/jade/home/_installation.jade index c0e0b1445..b6a84b53d 100644 --- a/website/src/jade/home/_installation.jade +++ b/website/src/jade/home/_installation.jade @@ -10,7 +10,7 @@ mixin Option(name, open) pre.language-bash code $ pip install --upgrade spacy - $ python -m spacy.en.download all + $ python -m spacy.en.download --force all p Most updates ship a new model, so you will usually have to redownload the data. @@ -93,6 +93,17 @@ mixin Option(name, open) h4 What's New? +details + summary + h4 2015-10-24 v0.97: Reduce load time, bug fixes + + ul + li Load the StringStore from a json list, instead of a text file. Accept a file-like object in the API instead of a path, for better flexibility. + li * Load from file, rather than path, in StringStore + li Fix bugs in download.py + li Require #[code --force] to over-write the data directory in download.py + li Fix bugs in #[code Matcher] and #[code doc.merge()] + details summary h4 2015-09-21 v0.93: Bug fixes to word vectors. Rename .repvec to .vector. Rename .string attribute. From a2d7e481e0131dfe5171a9a024c5ea6fc13f1744 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 26 Oct 2015 00:09:02 +1100 Subject: [PATCH 12/15] * Fix version number on install page. --- website/src/jade/home/index.jade | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/src/jade/home/index.jade b/website/src/jade/home/index.jade index a77dd323c..cbf5d9255 100644 --- a/website/src/jade/home/index.jade +++ b/website/src/jade/home/index.jade @@ -29,10 +29,10 @@ include ../header.jade li: a.button(href="#example-use") Examples li: a.button(href="#install") | Install - v0.94 + v0.97 article.page.landing-page +Section("Comparisons and Benchmarks", "comparisons", "./_comparisons.jade") +Section("Online Demo", "online-demo", "./_online_demo.jade") +Section("Usage by Example", "example-use", "./_usage_examples.jade") - +Section("Install v0.94", "install", "./_installation.jade") + +Section("Install v0.97", "install", "./_installation.jade") From b0ba534d4a4bc5e49d109c8a2caf973f956dbca9 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 26 Oct 2015 00:16:37 +1100 Subject: [PATCH 13/15] * Fix license descriptor in setup.py --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 48e72ff99..eb8454171 100644 --- a/setup.py +++ b/setup.py @@ -128,7 +128,7 @@ def cython_setup(mod_names, language, includes): author_email='honnibal@gmail.com', version=VERSION, url="http://honnibal.github.io/spaCy/", - package_data={"spacy": ["*.pxd"], + package_data={"spacy": ["*.pxd", "tests/*.py", "tests/*/*.py"], "spacy.tokens": ["*.pxd"], "spacy.serialize": ["*.pxd"], "spacy.en": ["*.pxd", "data/pos/*", @@ -139,7 +139,7 @@ def cython_setup(mod_names, language, includes): "spacy.syntax": ["*.pxd"]}, ext_modules=exts, cmdclass={'build_ext': build_ext_cython_subclass}, - license="Dual: Commercial or AGPL", + license="MIT", ) From 09e0b15629a7dd98c45a24e045dcaf518b184782 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 26 Oct 2015 00:30:33 +1100 Subject: [PATCH 14/15] * Package tests, for distriution in PyPi --- setup.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index eb8454171..87cbbc141 100644 --- a/setup.py +++ b/setup.py @@ -147,7 +147,19 @@ def run_setup(exts): setup( name='spacy', packages=['spacy', 'spacy.tokens', 'spacy.en', 'spacy.serialize', - 'spacy.syntax', 'spacy.munge'], + 'spacy.syntax', 'spacy.munge', + 'spacy.tests', + 'spacy.tests.matcher', + 'spacy.tests.morphology', + 'spacy.tests.munge', + 'spacy.tests.parser', + 'spacy.tests.serialize', + 'spacy.tests.spans', + 'spacy.tests.tagger', + 'spacy.tests.tokenizer', + 'spacy.tests.tokens', + 'spacy.tests.vectors', + 'spacy.tests.vocab'], description="Industrial-strength NLP", author='Matthew Honnibal', author_email='honnibal@gmail.com', From 3b1e8b70072256340671595691800c7dabc02b56 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 26 Oct 2015 00:31:04 +1100 Subject: [PATCH 15/15] * Update travis.yml for new tests path --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f21301db1..fc2441e3e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,4 +24,4 @@ install: # run tests script: - - "py.test tests/ -x" + - "py.test spacy/ -x"