mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-13 07:55:49 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
6a961928b2
|
@ -837,6 +837,9 @@ cdef class Parser:
|
||||||
def begin_training(self, get_gold_tuples, pipeline=None, sgd=None, **cfg):
|
def begin_training(self, get_gold_tuples, pipeline=None, sgd=None, **cfg):
|
||||||
if 'model' in cfg:
|
if 'model' in cfg:
|
||||||
self.model = cfg['model']
|
self.model = cfg['model']
|
||||||
|
if not hasattr(get_gold_tuples, '__call__'):
|
||||||
|
gold_tuples = get_gold_tuples
|
||||||
|
get_gold_tuples = lambda: gold_tuples
|
||||||
cfg.setdefault('min_action_freq', 30)
|
cfg.setdefault('min_action_freq', 30)
|
||||||
actions = self.moves.get_actions(gold_parses=get_gold_tuples(),
|
actions = self.moves.get_actions(gold_parses=get_gold_tuples(),
|
||||||
min_freq=cfg.get('min_action_freq', 30))
|
min_freq=cfg.get('min_action_freq', 30))
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ...attrs import IS_ALPHA, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_TITLE, IS_STOP
|
from ...attrs import IS_ALPHA, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_TITLE, IS_STOP
|
||||||
|
from ...symbols import NOUN, VERB
|
||||||
from ..util import get_doc
|
from ..util import get_doc
|
||||||
from ...vocab import Vocab
|
from ...vocab import Vocab
|
||||||
from ...tokens import Doc
|
from ...tokens import Doc
|
||||||
|
@ -162,3 +163,10 @@ def test_is_sent_start(en_tokenizer):
|
||||||
assert doc[5].is_sent_start is True
|
assert doc[5].is_sent_start is True
|
||||||
doc.is_parsed = True
|
doc.is_parsed = True
|
||||||
assert len(list(doc.sents)) == 2
|
assert len(list(doc.sents)) == 2
|
||||||
|
|
||||||
|
def test_set_pos():
|
||||||
|
doc = Doc(Vocab(), words=['hello', 'world'])
|
||||||
|
doc[0].pos_ = 'NOUN'
|
||||||
|
assert doc[0].pos_ == 'NOUN'
|
||||||
|
doc[1].pos = VERB
|
||||||
|
assert doc[1].pos_ == 'VERB'
|
||||||
|
|
|
@ -281,6 +281,8 @@ cdef class Token:
|
||||||
"""RETURNS (uint64): ID of coarse-grained part-of-speech tag."""
|
"""RETURNS (uint64): ID of coarse-grained part-of-speech tag."""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.c.pos
|
return self.c.pos
|
||||||
|
def __set__(self, pos):
|
||||||
|
self.c.pos = pos
|
||||||
|
|
||||||
property tag:
|
property tag:
|
||||||
"""RETURNS (uint64): ID of fine-grained part-of-speech tag."""
|
"""RETURNS (uint64): ID of fine-grained part-of-speech tag."""
|
||||||
|
@ -752,6 +754,8 @@ cdef class Token:
|
||||||
"""RETURNS (unicode): Coarse-grained part-of-speech tag."""
|
"""RETURNS (unicode): Coarse-grained part-of-speech tag."""
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return parts_of_speech.NAMES[self.c.pos]
|
return parts_of_speech.NAMES[self.c.pos]
|
||||||
|
def __set__(self, pos_name):
|
||||||
|
self.c.pos = parts_of_speech.IDS[pos_name]
|
||||||
|
|
||||||
property tag_:
|
property tag_:
|
||||||
"""RETURNS (unicode): Fine-grained part-of-speech tag."""
|
"""RETURNS (unicode): Fine-grained part-of-speech tag."""
|
||||||
|
|
Loading…
Reference in New Issue
Block a user