mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-10 16:22:29 +03:00
Merge remote-tracking branch 'upstream/develop' into indonesian
This commit is contained in:
commit
4705ae19ba
|
@ -61,6 +61,14 @@ elif is_python3:
|
||||||
json_dumps = lambda data: ujson.dumps(data, indent=2)
|
json_dumps = lambda data: ujson.dumps(data, indent=2)
|
||||||
path2str = lambda path: str(path)
|
path2str = lambda path: str(path)
|
||||||
|
|
||||||
|
|
||||||
|
def b_to_str(b_str):
|
||||||
|
if is_python2:
|
||||||
|
return b_str
|
||||||
|
# important: if no encoding is set, string becomes "b'...'"
|
||||||
|
return str(b_str, encoding='utf8')
|
||||||
|
|
||||||
|
|
||||||
def getattr_(obj, name, *default):
|
def getattr_(obj, name, *default):
|
||||||
if is_python3 and isinstance(name, bytes):
|
if is_python3 and isinstance(name, bytes):
|
||||||
name = name.decode('utf8')
|
name = name.decode('utf8')
|
||||||
|
|
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from .render import DependencyRenderer, EntityRenderer
|
from .render import DependencyRenderer, EntityRenderer
|
||||||
from ..tokens import Doc
|
from ..tokens import Doc
|
||||||
|
from ..compat import b_to_str
|
||||||
from ..util import prints, is_in_jupyter
|
from ..util import prints, is_in_jupyter
|
||||||
|
|
||||||
|
|
||||||
|
@ -65,7 +66,9 @@ def serve(docs, style='dep', page=True, minify=False, options={}, manual=False,
|
||||||
|
|
||||||
|
|
||||||
def app(environ, start_response):
|
def app(environ, start_response):
|
||||||
start_response('200 OK', [('Content-type', 'text/html; charset=utf-8')])
|
# headers and status need to be bytes in Python 2, see #1227
|
||||||
|
headers = [(b_to_str(b'Content-type'), b_to_str(b'text/html; charset=utf-8'))]
|
||||||
|
start_response(b_to_str(b'200 OK'), headers)
|
||||||
res = _html['parsed'].encode(encoding='utf-8')
|
res = _html['parsed'].encode(encoding='utf-8')
|
||||||
return [res]
|
return [res]
|
||||||
|
|
||||||
|
|
|
@ -292,6 +292,11 @@ class Language(object):
|
||||||
>>> for docs, golds in epoch:
|
>>> for docs, golds in epoch:
|
||||||
>>> state = nlp.update(docs, golds, sgd=optimizer)
|
>>> state = nlp.update(docs, golds, sgd=optimizer)
|
||||||
"""
|
"""
|
||||||
|
if len(docs) != len(golds):
|
||||||
|
raise IndexError("Update expects same number of docs and golds "
|
||||||
|
"Got: %d, %d" % (len(docs), len(golds)))
|
||||||
|
if len(docs) == 0:
|
||||||
|
return
|
||||||
tok2vec = self.pipeline[0]
|
tok2vec = self.pipeline[0]
|
||||||
feats = tok2vec.doc2feats(docs)
|
feats = tok2vec.doc2feats(docs)
|
||||||
grads = {}
|
grads = {}
|
||||||
|
|
|
@ -362,7 +362,7 @@ cdef class ArcEager(TransitionSystem):
|
||||||
if not self.has_gold(gold):
|
if not self.has_gold(gold):
|
||||||
return None
|
return None
|
||||||
for i in range(gold.length):
|
for i in range(gold.length):
|
||||||
if gold.heads[i] is None: # Missing values
|
if gold.heads[i] is None or gold.labels[i] is None: # Missing values
|
||||||
gold.c.heads[i] = i
|
gold.c.heads[i] = i
|
||||||
gold.c.has_dep[i] = False
|
gold.c.has_dep[i] = False
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user