mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
Merge remote-tracking branch 'refs/remotes/honnibal/master'
Conflicts: appveyor
This commit is contained in:
commit
a728374806
|
@ -18,11 +18,11 @@ environment:
|
||||||
|
|
||||||
# The lastest Python 3.4.
|
# The lastest Python 3.4.
|
||||||
- PYTHON: "C:\\Python34-x64"
|
- PYTHON: "C:\\Python34-x64"
|
||||||
PYTHON_VERSION: "3.4.x" # currently 3.4.3
|
PYTHON_VERSION: "3.4.3" # currently 3.4.3
|
||||||
PYTHON_ARCH: "64"
|
PYTHON_ARCH: "64"
|
||||||
|
|
||||||
- PYTHON: "C:\\Python34-x32"
|
- PYTHON: "C:\\Python34-x32"
|
||||||
PYTHON_VERSION: "3.4.x" # currently 3.4.3
|
PYTHON_VERSION: "3.4.3" # currently 3.4.3
|
||||||
PYTHON_ARCH: "32"
|
PYTHON_ARCH: "32"
|
||||||
|
|
||||||
|
|
||||||
|
|
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -1,3 +0,0 @@
|
||||||
[submodule "appveyor"]
|
|
||||||
path = appveyor
|
|
||||||
url = https://github.com/maxirmx/spaCy-appveyor-toolkit
|
|
|
@ -24,4 +24,4 @@ install:
|
||||||
|
|
||||||
# run tests
|
# run tests
|
||||||
script:
|
script:
|
||||||
- "py.test tests/ -x"
|
- "py.test spacy/ -x"
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
[![Travis CI status](https://travis-ci.org/honnibal/spaCy.svg?branch=master)](https://travis-ci.org/honnibal/spaCy)
|
||||||
<img src="https://ci.appveyor.com/api/projects/status/aoe3dtkep36rdaqf?svg=true" />
|
![Appveyor status](https://ci.appveyor.com/api/projects/status/aoe3dtkep36rdaqf?svg=true)
|
||||||
|
|
||||||
spaCy: Industrial-strength NLP
|
spaCy: Industrial-strength NLP
|
||||||
==============================
|
==============================
|
||||||
|
|
25
fabfile.py
vendored
25
fabfile.py
vendored
|
@ -1,9 +1,12 @@
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
from fabric.api import local, lcd, env, settings, prefix
|
from fabric.api import local, lcd, env, settings, prefix
|
||||||
from os.path import exists as file_exists
|
from os.path import exists as file_exists
|
||||||
from fabtools.python import virtualenv
|
from fabtools.python import virtualenv
|
||||||
from os import path
|
from os import path
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
PWD = path.dirname(__file__)
|
PWD = path.dirname(__file__)
|
||||||
|
@ -56,10 +59,28 @@ def prebuild(build_dir='/tmp/build_spacy'):
|
||||||
local('fab test')
|
local('fab test')
|
||||||
local('python setup.py sdist')
|
local('python setup.py sdist')
|
||||||
|
|
||||||
|
|
||||||
def docs():
|
def docs():
|
||||||
|
def jade(source_name, out_dir):
|
||||||
|
pwd = path.join(path.dirname(__file__), 'website')
|
||||||
|
jade_loc = path.join(pwd, 'src', 'jade', source_name)
|
||||||
|
out_loc = path.join(pwd, 'site', out_dir)
|
||||||
|
local('jade -P %s --out %s' % (jade_loc, out_loc))
|
||||||
|
|
||||||
with virtualenv(VENV_DIR):
|
with virtualenv(VENV_DIR):
|
||||||
with lcd(path.join(path.dirname(__file__), 'docs')):
|
local('./website/create_code_samples tests/website/ website/src/code/')
|
||||||
local('make html')
|
|
||||||
|
jade('home/index.jade', '')
|
||||||
|
jade('docs/index.jade', 'docs/')
|
||||||
|
jade('blog/index.jade', 'blog/')
|
||||||
|
jade('tutorials/index.jade', 'tutorials/')
|
||||||
|
|
||||||
|
for post_dir in (Path(__file__).parent / 'website' / 'src' / 'jade' / 'blog').iterdir():
|
||||||
|
if post_dir.is_dir() \
|
||||||
|
and (post_dir / 'index.jade').exists() \
|
||||||
|
and (post_dir / 'meta.jade').exists():
|
||||||
|
jade(str(post_dir / 'index.jade'), path.join('blogs', post_dir.parts[-1]))
|
||||||
|
|
||||||
|
|
||||||
def publish(version):
|
def publish(version):
|
||||||
with virtualenv(VENV_DIR):
|
with virtualenv(VENV_DIR):
|
||||||
|
|
18
setup.py
18
setup.py
|
@ -128,7 +128,7 @@ def cython_setup(mod_names, language, includes):
|
||||||
author_email='honnibal@gmail.com',
|
author_email='honnibal@gmail.com',
|
||||||
version=VERSION,
|
version=VERSION,
|
||||||
url="http://honnibal.github.io/spaCy/",
|
url="http://honnibal.github.io/spaCy/",
|
||||||
package_data={"spacy": ["*.pxd"],
|
package_data={"spacy": ["*.pxd", "tests/*.py", "tests/*/*.py"],
|
||||||
"spacy.tokens": ["*.pxd"],
|
"spacy.tokens": ["*.pxd"],
|
||||||
"spacy.serialize": ["*.pxd"],
|
"spacy.serialize": ["*.pxd"],
|
||||||
"spacy.en": ["*.pxd", "data/pos/*",
|
"spacy.en": ["*.pxd", "data/pos/*",
|
||||||
|
@ -139,7 +139,7 @@ def cython_setup(mod_names, language, includes):
|
||||||
"spacy.syntax": ["*.pxd"]},
|
"spacy.syntax": ["*.pxd"]},
|
||||||
ext_modules=exts,
|
ext_modules=exts,
|
||||||
cmdclass={'build_ext': build_ext_cython_subclass},
|
cmdclass={'build_ext': build_ext_cython_subclass},
|
||||||
license="Dual: Commercial or AGPL",
|
license="MIT",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -147,7 +147,19 @@ def run_setup(exts):
|
||||||
setup(
|
setup(
|
||||||
name='spacy',
|
name='spacy',
|
||||||
packages=['spacy', 'spacy.tokens', 'spacy.en', 'spacy.serialize',
|
packages=['spacy', 'spacy.tokens', 'spacy.en', 'spacy.serialize',
|
||||||
'spacy.syntax', 'spacy.munge'],
|
'spacy.syntax', 'spacy.munge',
|
||||||
|
'spacy.tests',
|
||||||
|
'spacy.tests.matcher',
|
||||||
|
'spacy.tests.morphology',
|
||||||
|
'spacy.tests.munge',
|
||||||
|
'spacy.tests.parser',
|
||||||
|
'spacy.tests.serialize',
|
||||||
|
'spacy.tests.spans',
|
||||||
|
'spacy.tests.tagger',
|
||||||
|
'spacy.tests.tokenizer',
|
||||||
|
'spacy.tests.tokens',
|
||||||
|
'spacy.tests.vectors',
|
||||||
|
'spacy.tests.vocab'],
|
||||||
description="Industrial-strength NLP",
|
description="Industrial-strength NLP",
|
||||||
author='Matthew Honnibal',
|
author='Matthew Honnibal',
|
||||||
author_email='honnibal@gmail.com',
|
author_email='honnibal@gmail.com',
|
||||||
|
|
27
spacy/tests/test_pickle.py
Normal file
27
spacy/tests/test_pickle.py
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
import cloudpickle
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
import pytest
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
@pytest.mark.models
|
||||||
|
def test_pickle_english(EN):
|
||||||
|
file_ = io.BytesIO()
|
||||||
|
cloudpickle.dump(EN, file_)
|
||||||
|
|
||||||
|
file_.seek(0)
|
||||||
|
|
||||||
|
loaded = pickle.load(file_)
|
||||||
|
assert loaded is not None
|
||||||
|
|
||||||
|
@pytest.mark.models
|
||||||
|
def test_cloudpickle_to_file(EN):
|
||||||
|
f = tempfile.NamedTemporaryFile(delete=False)
|
||||||
|
p = cloudpickle.CloudPickler(f)
|
||||||
|
p.dump(EN)
|
||||||
|
f.close()
|
||||||
|
loaded_en = cloudpickle.load(open(f.name))
|
||||||
|
os.unlink(f.name)
|
||||||
|
doc = loaded_en(unicode('test parse'))
|
||||||
|
assert len(doc) == 2
|
|
@ -2,6 +2,19 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import io
|
||||||
|
import pickle
|
||||||
|
import cloudpickle
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.models
|
||||||
|
def test_pickle(en_tokenizer):
|
||||||
|
file_ = io.BytesIO()
|
||||||
|
cloudpickle.dump(en_tokenizer, file_)
|
||||||
|
file_.seek(0)
|
||||||
|
loaded = pickle.load(file_)
|
||||||
|
assert loaded is not None
|
||||||
|
|
||||||
|
|
||||||
def test_no_word(en_tokenizer):
|
def test_no_word(en_tokenizer):
|
|
@ -19,6 +19,7 @@ cdef class Tokenizer:
|
||||||
cdef object _prefix_re
|
cdef object _prefix_re
|
||||||
cdef object _suffix_re
|
cdef object _suffix_re
|
||||||
cdef object _infix_re
|
cdef object _infix_re
|
||||||
|
cdef object _rules
|
||||||
|
|
||||||
cpdef Doc tokens_from_list(self, list strings)
|
cpdef Doc tokens_from_list(self, list strings)
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,16 @@ cdef class Tokenizer:
|
||||||
self._infix_re = infix_re
|
self._infix_re = infix_re
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
self._load_special_tokenization(rules)
|
self._load_special_tokenization(rules)
|
||||||
|
self._rules = rules
|
||||||
|
|
||||||
|
def __reduce__(self):
|
||||||
|
args = (self.vocab,
|
||||||
|
self._rules,
|
||||||
|
self._prefix_re,
|
||||||
|
self._suffix_re,
|
||||||
|
self._infix_re)
|
||||||
|
|
||||||
|
return (self.__class__, args, None, None)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_dir(cls, Vocab vocab, data_dir):
|
def from_dir(cls, Vocab vocab, data_dir):
|
||||||
|
|
|
@ -1,15 +0,0 @@
|
||||||
import pytest
|
|
||||||
import io
|
|
||||||
import cloudpickle
|
|
||||||
import pickle
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.models
|
|
||||||
def test_pickle_english(EN):
|
|
||||||
file_ = io.BytesIO()
|
|
||||||
cloudpickle.dump(EN, file_)
|
|
||||||
|
|
||||||
file_.seek(0)
|
|
||||||
|
|
||||||
loaded = pickle.load(file_)
|
|
||||||
|
|
|
@ -1,52 +0,0 @@
|
||||||
all: src/code site
|
|
||||||
|
|
||||||
src/code:
|
|
||||||
mkdir -p src/code/
|
|
||||||
./create_code_samples ../tests/website/ src/code/
|
|
||||||
|
|
||||||
site: site/index.html site/blog/ site/docs/ site/license/ site/blog/introducing-spacy/ site/blog/parsing-english-in-python/ site/blog/part-of-speech-POS-tagger-in-python/ site/tutorials/twitter-filter/ site/tutorials/syntax-search/ site/tutorials/mark-adverbs/ site/blog/writing-c-in-cython/ site/blog/how-spacy-works/
|
|
||||||
|
|
||||||
site/index.html: src/jade/header.jade src/jade/*.jade
|
|
||||||
jade -P src/jade/home/index.jade --out site/
|
|
||||||
|
|
||||||
site/docs/: src/jade/docs/*.jade src/jade/header.jade
|
|
||||||
jade -P src/jade/docs/index.jade --out $@
|
|
||||||
|
|
||||||
site/blog/: src/jade/blog/*.jade site/blog/*/ site/tutorials/*/ src/jade/header.jade
|
|
||||||
jade -P src/jade/blog/index.jade --out $@
|
|
||||||
|
|
||||||
site/tutorials/: src/jade/tutorials/*.jade site/tutorials/*/ src/jade/header.jade
|
|
||||||
jade -P src/jade/tutorials/index.jade --out $@
|
|
||||||
|
|
||||||
site/blog/parsing-english-in-python/: src/jade/blog/parsing-english-in-python/*.jade src/jade/header.jade
|
|
||||||
jade -P $< --out $@
|
|
||||||
|
|
||||||
|
|
||||||
site/blog/writing-c-in-cython/: src/jade/blog/writing-c-in-cython/*.jade src/jade/header.jade
|
|
||||||
jade -P $< --out $@
|
|
||||||
|
|
||||||
|
|
||||||
site/blog/part-of-speech-POS-tagger-in-python/: src/jade/blog/part-of-speech-POS-tagger-in-python/*.jade src/jade/header.jade
|
|
||||||
jade -P $< --out $@
|
|
||||||
|
|
||||||
site/blog/introducing-spacy/: src/jade/blog/introducing-spacy/*.jade src/jade/header.jade
|
|
||||||
jade -P $< --out $@
|
|
||||||
|
|
||||||
site/blog/displacy/: src/jade/blog/displacy/*.jade src/jade/header.jade
|
|
||||||
jade -P $< --out $@
|
|
||||||
|
|
||||||
site/blog/eli5-computers-learn-reading: src/jade/blog/eli5-computers-learn-reading/*.jade src/jade/header.jade
|
|
||||||
jade -P $< --out $@
|
|
||||||
|
|
||||||
site/tutorials/mark-adverbs/: src/jade/tutorials/mark-adverbs/*.jade src/jade/header.jade
|
|
||||||
jade -P $< --out $@
|
|
||||||
|
|
||||||
site/blog/how-spacy-works/: src/jade/blog/how-spacy-works/*.jade src/jade/header.jade
|
|
||||||
jade -P $< --out $@
|
|
||||||
|
|
||||||
site/tutorials/syntax-search/: src/jade/tutorials/syntax-search/*.jade src/jade/header.jade
|
|
||||||
jade -P $< --out $@
|
|
||||||
|
|
||||||
site/tutorials/twitter-filter/: src/jade/tutorials/twitter-filter/*.jade src/jade/header.jade
|
|
||||||
jade -P $< --out $@
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||||
import os
|
import os
|
||||||
import ast
|
import ast
|
||||||
import io
|
import io
|
||||||
|
import re
|
||||||
|
|
||||||
import plac
|
import plac
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ mixin comparison(name)
|
||||||
+columns("System", "Language", "Accuracy", "Speed")
|
+columns("System", "Language", "Accuracy", "Speed")
|
||||||
|
|
||||||
tbody
|
tbody
|
||||||
+row("spaCy v0.93", "Cython", "91.8", "13,000 (est.)")
|
+row("spaCy v0.97", "Cython", "91.8", "13,000 (est.)")
|
||||||
+row("ClearNLP", "Java", "91.7", "10,271")
|
+row("ClearNLP", "Java", "91.7", "10,271")
|
||||||
+row("CoreNLP", "Java", "89.6", "8,602")
|
+row("CoreNLP", "Java", "89.6", "8,602")
|
||||||
+row("MATE", "Java", "92.5", "550")
|
+row("MATE", "Java", "92.5", "550")
|
||||||
|
@ -80,7 +80,7 @@ mixin comparison(name)
|
||||||
li.con English only
|
li.con English only
|
||||||
li.pro Python
|
li.pro Python
|
||||||
.col
|
.col
|
||||||
h5 CoreNLP features:
|
h5 CoreNLP
|
||||||
ul
|
ul
|
||||||
li.pro More accurate NER
|
li.pro More accurate NER
|
||||||
li.pro Coreference resolution
|
li.pro Coreference resolution
|
||||||
|
@ -103,7 +103,7 @@ mixin comparison(name)
|
||||||
li.pro Python
|
li.pro Python
|
||||||
|
|
||||||
.col
|
.col
|
||||||
h5 ClearNLP:
|
h5 ClearNLP
|
||||||
ul
|
ul
|
||||||
li.pro Semantic Role Labelling
|
li.pro Semantic Role Labelling
|
||||||
li.pro Model for biology/life-science
|
li.pro Model for biology/life-science
|
||||||
|
|
|
@ -10,7 +10,7 @@ mixin Option(name, open)
|
||||||
pre.language-bash
|
pre.language-bash
|
||||||
code
|
code
|
||||||
$ pip install --upgrade spacy
|
$ pip install --upgrade spacy
|
||||||
$ python -m spacy.en.download all
|
$ python -m spacy.en.download --force all
|
||||||
p Most updates ship a new model, so you will usually have to redownload the data.
|
p Most updates ship a new model, so you will usually have to redownload the data.
|
||||||
|
|
||||||
|
|
||||||
|
@ -93,6 +93,17 @@ mixin Option(name, open)
|
||||||
h4 What's New?
|
h4 What's New?
|
||||||
|
|
||||||
|
|
||||||
|
details
|
||||||
|
summary
|
||||||
|
h4 2015-10-24 v0.97: Reduce load time, bug fixes
|
||||||
|
|
||||||
|
ul
|
||||||
|
li Load the StringStore from a json list, instead of a text file. Accept a file-like object in the API instead of a path, for better flexibility.
|
||||||
|
li * Load from file, rather than path, in StringStore
|
||||||
|
li Fix bugs in download.py
|
||||||
|
li Require #[code --force] to over-write the data directory in download.py
|
||||||
|
li Fix bugs in #[code Matcher] and #[code doc.merge()]
|
||||||
|
|
||||||
details
|
details
|
||||||
summary
|
summary
|
||||||
h4 2015-09-21 v0.93: Bug fixes to word vectors. Rename .repvec to .vector. Rename .string attribute.
|
h4 2015-09-21 v0.93: Bug fixes to word vectors. Rename .repvec to .vector. Rename .string attribute.
|
||||||
|
|
|
@ -29,10 +29,10 @@ include ../header.jade
|
||||||
li: a.button(href="#example-use") Examples
|
li: a.button(href="#example-use") Examples
|
||||||
li: a.button(href="#install")
|
li: a.button(href="#install")
|
||||||
| Install
|
| Install
|
||||||
<span class="button-caption">v0.94</span>
|
<span class="button-caption">v0.97</span>
|
||||||
|
|
||||||
article.page.landing-page
|
article.page.landing-page
|
||||||
+Section("Comparisons and Benchmarks", "comparisons", "./_comparisons.jade")
|
+Section("Comparisons and Benchmarks", "comparisons", "./_comparisons.jade")
|
||||||
+Section("Online Demo", "online-demo", "./_online_demo.jade")
|
+Section("Online Demo", "online-demo", "./_online_demo.jade")
|
||||||
+Section("Usage by Example", "example-use", "./_usage_examples.jade")
|
+Section("Usage by Example", "example-use", "./_usage_examples.jade")
|
||||||
+Section("Install v0.94", "install", "./_installation.jade")
|
+Section("Install v0.97", "install", "./_installation.jade")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user