mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Merge branch 'master' into develop
This commit is contained in:
commit
bf2cc370fe
|
@ -4,6 +4,7 @@ This is a list of everyone who has made significant contributions to spaCy, in a
|
||||||
|
|
||||||
* Adam Bittlingmayer, [@bittlingmayer](https://github.com/bittlingmayer)
|
* Adam Bittlingmayer, [@bittlingmayer](https://github.com/bittlingmayer)
|
||||||
* Andreas Grivas, [@andreasgrv](https://github.com/andreasgrv)
|
* Andreas Grivas, [@andreasgrv](https://github.com/andreasgrv)
|
||||||
|
* Andrew Poliakov, [@pavlin99th](https://github.com/pavlin99th)
|
||||||
* Aniruddha Adhikary [@aniruddha-adhikary](https://github.com/aniruddha-adhikary)
|
* Aniruddha Adhikary [@aniruddha-adhikary](https://github.com/aniruddha-adhikary)
|
||||||
* Bhargav Srinivasa, [@bhargavvader](https://github.com/bhargavvader)
|
* Bhargav Srinivasa, [@bhargavvader](https://github.com/bhargavvader)
|
||||||
* Chris DuBois, [@chrisdubois](https://github.com/chrisdubois)
|
* Chris DuBois, [@chrisdubois](https://github.com/chrisdubois)
|
||||||
|
@ -11,12 +12,16 @@ This is a list of everyone who has made significant contributions to spaCy, in a
|
||||||
* Dafne van Kuppevelt, [@dafnevk](https://github.com/dafnevk)
|
* Dafne van Kuppevelt, [@dafnevk](https://github.com/dafnevk)
|
||||||
* Daniel Rapp, [@rappdw](https://github.com/rappdw)
|
* Daniel Rapp, [@rappdw](https://github.com/rappdw)
|
||||||
* Dmytro Sadovnychyi, [@sadovnychyi](https://github.com/sadovnychyi)
|
* Dmytro Sadovnychyi, [@sadovnychyi](https://github.com/sadovnychyi)
|
||||||
|
* Eric Zhao, [@ericzhao28](https://github.com/ericzhao28)
|
||||||
|
* Greg Baker, [@solresol](https://github.com/solresol)
|
||||||
* György Orosz, [@oroszgy](https://github.com/oroszgy)
|
* György Orosz, [@oroszgy](https://github.com/oroszgy)
|
||||||
* Henning Peters, [@henningpeters](https://github.com/henningpeters)
|
* Henning Peters, [@henningpeters](https://github.com/henningpeters)
|
||||||
|
* Iddo Berger, [@iddoberger](https://github.com/iddoberger)
|
||||||
* Ines Montani, [@ines](https://github.com/ines)
|
* Ines Montani, [@ines](https://github.com/ines)
|
||||||
* J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading)
|
* J Nicolas Schrading, [@NSchrading](https://github.com/NSchrading)
|
||||||
* Janneke van der Zwaan, [@jvdzwaan](https://github.com/jvdzwaan)
|
* Janneke van der Zwaan, [@jvdzwaan](https://github.com/jvdzwaan)
|
||||||
* Jordan Suchow, [@suchow](https://github.com/suchow)
|
* Jordan Suchow, [@suchow](https://github.com/suchow)
|
||||||
|
* Juan Miguel Cejuela, [@juanmirocks](https://github.com/juanmirocks)
|
||||||
* Kendrick Tan, [@kendricktan](https://github.com/kendricktan)
|
* Kendrick Tan, [@kendricktan](https://github.com/kendricktan)
|
||||||
* Kyle P. Johnson, [@kylepjohnson](https://github.com/kylepjohnson)
|
* Kyle P. Johnson, [@kylepjohnson](https://github.com/kylepjohnson)
|
||||||
* Liling Tan, [@alvations](https://github.com/alvations)
|
* Liling Tan, [@alvations](https://github.com/alvations)
|
||||||
|
@ -29,6 +34,7 @@ This is a list of everyone who has made significant contributions to spaCy, in a
|
||||||
* Pokey Rule, [@pokey](https://github.com/pokey)
|
* Pokey Rule, [@pokey](https://github.com/pokey)
|
||||||
* Raphaël Bournhonesque, [@raphael0202](https://github.com/raphael0202)
|
* Raphaël Bournhonesque, [@raphael0202](https://github.com/raphael0202)
|
||||||
* Rob van Nieuwpoort, [@RvanNieuwpoort](https://github.com/RvanNieuwpoort)
|
* Rob van Nieuwpoort, [@RvanNieuwpoort](https://github.com/RvanNieuwpoort)
|
||||||
|
* Roman Inflianskas, [@rominf](https://github.com/rominf)
|
||||||
* Sam Bozek, [@sambozek](https://github.com/sambozek)
|
* Sam Bozek, [@sambozek](https://github.com/sambozek)
|
||||||
* Sasho Savkov, [@savkov](https://github.com/savkov)
|
* Sasho Savkov, [@savkov](https://github.com/savkov)
|
||||||
* Shuvanon Razik, [@shuvanon](https://github.com/shuvanon)
|
* Shuvanon Razik, [@shuvanon](https://github.com/shuvanon)
|
||||||
|
|
|
@ -5,8 +5,8 @@ spaCy is a library for advanced natural language processing in Python and
|
||||||
Cython. spaCy is built on the very latest research, but it isn't researchware.
|
Cython. spaCy is built on the very latest research, but it isn't researchware.
|
||||||
It was designed from day one to be used in real products. spaCy currently supports
|
It was designed from day one to be used in real products. spaCy currently supports
|
||||||
English and German, as well as tokenization for Chinese, Spanish, Italian, French,
|
English and German, as well as tokenization for Chinese, Spanish, Italian, French,
|
||||||
Portuguese, Dutch, Swedish, Finnish, Hungarian and Bengali. It's commercial open-source
|
Portuguese, Dutch, Swedish, Finnish, Hungarian, Bengali and Hebrew. It's commercial
|
||||||
software, released under the MIT license.
|
open-source software, released under the MIT license.
|
||||||
|
|
||||||
💫 **Version 1.7 out now!** `Read the release notes here. <https://github.com/explosion/spaCy/releases/>`_
|
💫 **Version 1.7 out now!** `Read the release notes here. <https://github.com/explosion/spaCy/releases/>`_
|
||||||
|
|
||||||
|
@ -316,6 +316,7 @@ and ``--model`` are optional and enable additional tests:
|
||||||
=========== ============== ===========
|
=========== ============== ===========
|
||||||
Version Date Description
|
Version Date Description
|
||||||
=========== ============== ===========
|
=========== ============== ===========
|
||||||
|
`v1.7.3`_ ``2017-03-26`` Alpha support for Hebrew, new CLI commands and bug fixes
|
||||||
`v1.7.2`_ ``2017-03-20`` Small fixes to beam parser and model linking
|
`v1.7.2`_ ``2017-03-20`` Small fixes to beam parser and model linking
|
||||||
`v1.7.1`_ ``2017-03-19`` Fix data download for system installation
|
`v1.7.1`_ ``2017-03-19`` Fix data download for system installation
|
||||||
`v1.7.0`_ ``2017-03-18`` New 50 MB model, CLI, better downloads and lots of bug fixes
|
`v1.7.0`_ ``2017-03-18`` New 50 MB model, CLI, better downloads and lots of bug fixes
|
||||||
|
@ -344,6 +345,7 @@ Version Date Description
|
||||||
`v0.93`_ ``2015-09-22`` Bug fixes to word vectors
|
`v0.93`_ ``2015-09-22`` Bug fixes to word vectors
|
||||||
=========== ============== ===========
|
=========== ============== ===========
|
||||||
|
|
||||||
|
.. _v1.7.3: https://github.com/explosion/spaCy/releases/tag/v1.7.3
|
||||||
.. _v1.7.2: https://github.com/explosion/spaCy/releases/tag/v1.7.2
|
.. _v1.7.2: https://github.com/explosion/spaCy/releases/tag/v1.7.2
|
||||||
.. _v1.7.1: https://github.com/explosion/spaCy/releases/tag/v1.7.1
|
.. _v1.7.1: https://github.com/explosion/spaCy/releases/tag/v1.7.1
|
||||||
.. _v1.7.0: https://github.com/explosion/spaCy/releases/tag/v1.7.0
|
.. _v1.7.0: https://github.com/explosion/spaCy/releases/tag/v1.7.0
|
||||||
|
|
|
@ -14,7 +14,7 @@ from spacy.cli import train as cli_train
|
||||||
class CLI(object):
|
class CLI(object):
|
||||||
"""Command-line interface for spaCy"""
|
"""Command-line interface for spaCy"""
|
||||||
|
|
||||||
commands = ('download', 'link', 'info', 'package', 'train', 'train_config')
|
commands = ('download', 'link', 'info', 'package', 'train')
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
model=("model to download (shortcut or model name)", "positional", None, str),
|
model=("model to download (shortcut or model name)", "positional", None, str),
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
# https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
|
# https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
|
||||||
|
|
||||||
__title__ = 'spacy'
|
__title__ = 'spacy'
|
||||||
__version__ = '1.7.2'
|
__version__ = '1.7.3'
|
||||||
__summary__ = 'Industrial-strength Natural Language Processing (NLP) with Python and Cython'
|
__summary__ = 'Industrial-strength Natural Language Processing (NLP) with Python and Cython'
|
||||||
__uri__ = 'https://spacy.io'
|
__uri__ = 'https://spacy.io'
|
||||||
__author__ = 'Matthew Honnibal'
|
__author__ = 'Matthew Honnibal'
|
||||||
|
|
|
@ -93,7 +93,7 @@ def evaluate(Language, gold_tuples, output_path):
|
||||||
def check_dirs(output_path, train_path, dev_path):
|
def check_dirs(output_path, train_path, dev_path):
|
||||||
if not output_path.exists():
|
if not output_path.exists():
|
||||||
util.sys_exit(output_path.as_posix(), title="Output directory not found")
|
util.sys_exit(output_path.as_posix(), title="Output directory not found")
|
||||||
if not train_path.exists() or not train_path.is_file():
|
if not train_path.exists():
|
||||||
util.sys_exit(train_path.as_posix(), title="Training data not found")
|
util.sys_exit(train_path.as_posix(), title="Training data not found")
|
||||||
if dev_path and not dev_path.exists():
|
if dev_path and not dev_path.exists():
|
||||||
util.sys_exit(dev_path.as_posix(), title="Development data not found")
|
util.sys_exit(dev_path.as_posix(), title="Development data not found")
|
||||||
|
|
|
@ -278,7 +278,8 @@ class Language(object):
|
||||||
path = pathlib.Path(path)
|
path = pathlib.Path(path)
|
||||||
if path is True:
|
if path is True:
|
||||||
path = util.get_data_path() / self.lang
|
path = util.get_data_path() / self.lang
|
||||||
|
if not path.exists() and 'path' not in overrides:
|
||||||
|
path = None
|
||||||
self.meta = overrides.get('meta', {})
|
self.meta = overrides.get('meta', {})
|
||||||
self.path = path
|
self.path = path
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,7 @@ from ._parse_features cimport fill_context
|
||||||
from .stateclass cimport StateClass
|
from .stateclass cimport StateClass
|
||||||
from ._state cimport StateC
|
from ._state cimport StateC
|
||||||
|
|
||||||
USE_FTRL = False
|
USE_FTRL = True
|
||||||
DEBUG = False
|
DEBUG = False
|
||||||
def set_debug(val):
|
def set_debug(val):
|
||||||
global DEBUG
|
global DEBUG
|
||||||
|
@ -82,14 +82,19 @@ cdef class ParserModel(AveragedPerceptron):
|
||||||
def update(self, Example eg, itn=0):
|
def update(self, Example eg, itn=0):
|
||||||
'''Does regression on negative cost. Sort of cute?'''
|
'''Does regression on negative cost. Sort of cute?'''
|
||||||
self.time += 1
|
self.time += 1
|
||||||
best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class)
|
cdef int best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class)
|
||||||
guess = eg.guess
|
cdef int guess = eg.guess
|
||||||
if guess == best or best == -1:
|
if guess == best or best == -1:
|
||||||
return 0.0
|
return 0.0
|
||||||
|
cdef FeatureC feat
|
||||||
|
cdef int clas
|
||||||
|
cdef weight_t gradient
|
||||||
if USE_FTRL:
|
if USE_FTRL:
|
||||||
for feat in eg.c.features[:eg.c.nr_feat]:
|
for feat in eg.c.features[:eg.c.nr_feat]:
|
||||||
self.update_weight_ftrl(feat.key, guess, feat.value * eg.c.costs[guess])
|
for clas in range(eg.c.nr_class):
|
||||||
self.update_weight_ftrl(feat.key, best, -feat.value * eg.c.costs[guess])
|
if eg.c.is_valid[clas] and eg.c.scores[clas] >= eg.c.scores[best]:
|
||||||
|
gradient = eg.c.scores[clas] + eg.c.costs[clas]
|
||||||
|
self.update_weight_ftrl(feat.key, clas, feat.value * gradient)
|
||||||
else:
|
else:
|
||||||
for feat in eg.c.features[:eg.c.nr_feat]:
|
for feat in eg.c.features[:eg.c.nr_feat]:
|
||||||
self.update_weight(feat.key, guess, feat.value * eg.c.costs[guess])
|
self.update_weight(feat.key, guess, feat.value * eg.c.costs[guess])
|
||||||
|
|
Loading…
Reference in New Issue
Block a user