Merge branch 'master' into develop

This commit is contained in:
ines 2017-03-26 20:00:13 +02:00
commit bf2cc370fe
7 changed files with 25 additions and 11 deletions

View File

@ -4,6 +4,7 @@ This is a list of everyone who has made significant contributions to spaCy, in a
* Adam Bittlingmayer, [@bittlingmayer](https://github.com/bittlingmayer) * Adam Bittlingmayer, [@bittlingmayer](https://github.com/bittlingmayer)
* Andreas Grivas, [@andreasgrv](https://github.com/andreasgrv) * Andreas Grivas, [@andreasgrv](https://github.com/andreasgrv)
* Andrew Poliakov, [@pavlin99th](https://github.com/pavlin99th)
* Aniruddha Adhikary [@aniruddha-adhikary](https://github.com/aniruddha-adhikary) * Aniruddha Adhikary [@aniruddha-adhikary](https://github.com/aniruddha-adhikary)
* Bhargav Srinivasa, [@bhargavvader](https://github.com/bhargavvader) * Bhargav Srinivasa, [@bhargavvader](https://github.com/bhargavvader)
* Chris DuBois, [@chrisdubois](https://github.com/chrisdubois) * Chris DuBois, [@chrisdubois](https://github.com/chrisdubois)
@ -11,12 +12,16 @@ This is a list of everyone who has made significant contributions to spaCy, in a
* Dafne van Kuppevelt, [@dafnevk](https://github.com/dafnevk) * Dafne van Kuppevelt, [@dafnevk](https://github.com/dafnevk)
* Daniel Rapp, [@rappdw](https://github.com/rappdw) * Daniel Rapp, [@rappdw](https://github.com/rappdw)
* Dmytro Sadovnychyi, [@sadovnychyi](https://github.com/sadovnychyi) * Dmytro Sadovnychyi, [@sadovnychyi](https://github.com/sadovnychyi)
* Eric Zhao, [@ericzhao28](https://github.com/ericzhao28)
* Greg Baker, [@solresol](https://github.com/solresol)
* György Orosz, [@oroszgy](https://github.com/oroszgy) * György Orosz, [@oroszgy](https://github.com/oroszgy)
* Henning Peters, [@henningpeters](https://github.com/henningpeters) * Henning Peters, [@henningpeters](https://github.com/henningpeters)
* Iddo Berger, [@iddoberger](https://github.com/iddoberger)
* Ines Montani, [@ines](https://github.com/ines) * Ines Montani, [@ines](https://github.com/ines)
* J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading) * J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading)
* Janneke van der Zwaan, [@jvdzwaan](https://github.com/jvdzwaan) * Janneke van der Zwaan, [@jvdzwaan](https://github.com/jvdzwaan)
* Jordan Suchow, [@suchow](https://github.com/suchow) * Jordan Suchow, [@suchow](https://github.com/suchow)
* Juan Miguel Cejuela, [@juanmirocks](https://github.com/juanmirocks)
* Kendrick Tan, [@kendricktan](https://github.com/kendricktan) * Kendrick Tan, [@kendricktan](https://github.com/kendricktan)
* Kyle P. Johnson, [@kylepjohnson](https://github.com/kylepjohnson) * Kyle P. Johnson, [@kylepjohnson](https://github.com/kylepjohnson)
* Liling Tan, [@alvations](https://github.com/alvations) * Liling Tan, [@alvations](https://github.com/alvations)
@ -29,6 +34,7 @@ This is a list of everyone who has made significant contributions to spaCy, in a
* Pokey Rule, [@pokey](https://github.com/pokey) * Pokey Rule, [@pokey](https://github.com/pokey)
* Raphaël Bournhonesque, [@raphael0202](https://github.com/raphael0202) * Raphaël Bournhonesque, [@raphael0202](https://github.com/raphael0202)
* Rob van Nieuwpoort, [@RvanNieuwpoort](https://github.com/RvanNieuwpoort) * Rob van Nieuwpoort, [@RvanNieuwpoort](https://github.com/RvanNieuwpoort)
* Roman Inflianskas, [@rominf](https://github.com/rominf)
* Sam Bozek, [@sambozek](https://github.com/sambozek) * Sam Bozek, [@sambozek](https://github.com/sambozek)
* Sasho Savkov, [@savkov](https://github.com/savkov) * Sasho Savkov, [@savkov](https://github.com/savkov)
* Shuvanon Razik, [@shuvanon](https://github.com/shuvanon) * Shuvanon Razik, [@shuvanon](https://github.com/shuvanon)

View File

@ -5,8 +5,8 @@ spaCy is a library for advanced natural language processing in Python and
Cython. spaCy is built on the very latest research, but it isn't researchware. Cython. spaCy is built on the very latest research, but it isn't researchware.
It was designed from day one to be used in real products. spaCy currently supports It was designed from day one to be used in real products. spaCy currently supports
English and German, as well as tokenization for Chinese, Spanish, Italian, French, English and German, as well as tokenization for Chinese, Spanish, Italian, French,
Portuguese, Dutch, Swedish, Finnish, Hungarian and Bengali. It's commercial open-source Portuguese, Dutch, Swedish, Finnish, Hungarian, Bengali and Hebrew. It's commercial
software, released under the MIT license. open-source software, released under the MIT license.
💫 **Version 1.7 out now!** `Read the release notes here. <https://github.com/explosion/spaCy/releases/>`_ 💫 **Version 1.7 out now!** `Read the release notes here. <https://github.com/explosion/spaCy/releases/>`_
@ -316,6 +316,7 @@ and ``--model`` are optional and enable additional tests:
=========== ============== =========== =========== ============== ===========
Version Date Description Version Date Description
=========== ============== =========== =========== ============== ===========
`v1.7.3`_ ``2017-03-26`` Alpha support for Hebrew, new CLI commands and bug fixes
`v1.7.2`_ ``2017-03-20`` Small fixes to beam parser and model linking `v1.7.2`_ ``2017-03-20`` Small fixes to beam parser and model linking
`v1.7.1`_ ``2017-03-19`` Fix data download for system installation `v1.7.1`_ ``2017-03-19`` Fix data download for system installation
`v1.7.0`_ ``2017-03-18`` New 50 MB model, CLI, better downloads and lots of bug fixes `v1.7.0`_ ``2017-03-18`` New 50 MB model, CLI, better downloads and lots of bug fixes
@ -344,6 +345,7 @@ Version Date Description
`v0.93`_ ``2015-09-22`` Bug fixes to word vectors `v0.93`_ ``2015-09-22`` Bug fixes to word vectors
=========== ============== =========== =========== ============== ===========
.. _v1.7.3: https://github.com/explosion/spaCy/releases/tag/v1.7.3
.. _v1.7.2: https://github.com/explosion/spaCy/releases/tag/v1.7.2 .. _v1.7.2: https://github.com/explosion/spaCy/releases/tag/v1.7.2
.. _v1.7.1: https://github.com/explosion/spaCy/releases/tag/v1.7.1 .. _v1.7.1: https://github.com/explosion/spaCy/releases/tag/v1.7.1
.. _v1.7.0: https://github.com/explosion/spaCy/releases/tag/v1.7.0 .. _v1.7.0: https://github.com/explosion/spaCy/releases/tag/v1.7.0

View File

@ -14,7 +14,7 @@ from spacy.cli import train as cli_train
class CLI(object): class CLI(object):
"""Command-line interface for spaCy""" """Command-line interface for spaCy"""
commands = ('download', 'link', 'info', 'package', 'train', 'train_config') commands = ('download', 'link', 'info', 'package', 'train')
@plac.annotations( @plac.annotations(
model=("model to download (shortcut or model name)", "positional", None, str), model=("model to download (shortcut or model name)", "positional", None, str),

View File

@ -3,7 +3,7 @@
# https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py # https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
__title__ = 'spacy' __title__ = 'spacy'
__version__ = '1.7.2' __version__ = '1.7.3'
__summary__ = 'Industrial-strength Natural Language Processing (NLP) with Python and Cython' __summary__ = 'Industrial-strength Natural Language Processing (NLP) with Python and Cython'
__uri__ = 'https://spacy.io' __uri__ = 'https://spacy.io'
__author__ = 'Matthew Honnibal' __author__ = 'Matthew Honnibal'

View File

@ -93,7 +93,7 @@ def evaluate(Language, gold_tuples, output_path):
def check_dirs(output_path, train_path, dev_path): def check_dirs(output_path, train_path, dev_path):
if not output_path.exists(): if not output_path.exists():
util.sys_exit(output_path.as_posix(), title="Output directory not found") util.sys_exit(output_path.as_posix(), title="Output directory not found")
if not train_path.exists() or not train_path.is_file(): if not train_path.exists():
util.sys_exit(train_path.as_posix(), title="Training data not found") util.sys_exit(train_path.as_posix(), title="Training data not found")
if dev_path and not dev_path.exists(): if dev_path and not dev_path.exists():
util.sys_exit(dev_path.as_posix(), title="Development data not found") util.sys_exit(dev_path.as_posix(), title="Development data not found")

View File

@ -278,7 +278,8 @@ class Language(object):
path = pathlib.Path(path) path = pathlib.Path(path)
if path is True: if path is True:
path = util.get_data_path() / self.lang path = util.get_data_path() / self.lang
if not path.exists() and 'path' not in overrides:
path = None
self.meta = overrides.get('meta', {}) self.meta = overrides.get('meta', {})
self.path = path self.path = path

View File

@ -52,7 +52,7 @@ from ._parse_features cimport fill_context
from .stateclass cimport StateClass from .stateclass cimport StateClass
from ._state cimport StateC from ._state cimport StateC
USE_FTRL = False USE_FTRL = True
DEBUG = False DEBUG = False
def set_debug(val): def set_debug(val):
global DEBUG global DEBUG
@ -82,14 +82,19 @@ cdef class ParserModel(AveragedPerceptron):
def update(self, Example eg, itn=0): def update(self, Example eg, itn=0):
'''Does regression on negative cost. Sort of cute?''' '''Does regression on negative cost. Sort of cute?'''
self.time += 1 self.time += 1
best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class) cdef int best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class)
guess = eg.guess cdef int guess = eg.guess
if guess == best or best == -1: if guess == best or best == -1:
return 0.0 return 0.0
cdef FeatureC feat
cdef int clas
cdef weight_t gradient
if USE_FTRL: if USE_FTRL:
for feat in eg.c.features[:eg.c.nr_feat]: for feat in eg.c.features[:eg.c.nr_feat]:
self.update_weight_ftrl(feat.key, guess, feat.value * eg.c.costs[guess]) for clas in range(eg.c.nr_class):
self.update_weight_ftrl(feat.key, best, -feat.value * eg.c.costs[guess]) if eg.c.is_valid[clas] and eg.c.scores[clas] >= eg.c.scores[best]:
gradient = eg.c.scores[clas] + eg.c.costs[clas]
self.update_weight_ftrl(feat.key, clas, feat.value * gradient)
else: else:
for feat in eg.c.features[:eg.c.nr_feat]: for feat in eg.c.features[:eg.c.nr_feat]:
self.update_weight(feat.key, guess, feat.value * eg.c.costs[guess]) self.update_weight(feat.key, guess, feat.value * eg.c.costs[guess])