Merge branch 'develop' of https://github.com/explosion/spaCy into develop

2025-11-08 20:07:51 +03:00 · 2018-03-27 21:01:48 +00:00 · 2018-03-27 21:01:48 +00:00 · 6a961928b2
commit 6a961928b2
parent b7136cb094 de9fd091ac
3 changed files with 15 additions and 0 deletions
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -837,6 +837,9 @@ cdef class Parser:
    def begin_training(self, get_gold_tuples, pipeline=None, sgd=None, **cfg):
        if 'model' in cfg:
            self.model = cfg['model']
        if not hasattr(get_gold_tuples, '__call__'):
            gold_tuples = get_gold_tuples
            get_gold_tuples = lambda: gold_tuples
        cfg.setdefault('min_action_freq', 30)
        actions = self.moves.get_actions(gold_parses=get_gold_tuples(),
                                         min_freq=cfg.get('min_action_freq', 30))
--- a/spacy/tests/doc/test_token_api.py
+++ b/spacy/tests/doc/test_token_api.py
@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 from ...attrs import IS_ALPHA, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_TITLE, IS_STOP
 from ...symbols import NOUN, VERB
 from ..util import get_doc
 from ...vocab import Vocab
 from ...tokens import Doc
@ -162,3 +163,10 @@ def test_is_sent_start(en_tokenizer):
    assert doc[5].is_sent_start is True
    doc.is_parsed = True
    assert len(list(doc.sents)) == 2
 def test_set_pos():
    doc = Doc(Vocab(), words=['hello', 'world'])
    doc[0].pos_ = 'NOUN'
    assert doc[0].pos_ == 'NOUN'
    doc[1].pos = VERB
    assert doc[1].pos_ == 'VERB'
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@ -281,6 +281,8 @@ cdef class Token:
        """RETURNS (uint64): ID of coarse-grained part-of-speech tag."""
        def __get__(self):
            return self.c.pos
        def __set__(self, pos):
            self.c.pos = pos
    property tag:
        """RETURNS (uint64): ID of fine-grained part-of-speech tag."""
@ -752,6 +754,8 @@ cdef class Token:
        """RETURNS (unicode): Coarse-grained part-of-speech tag."""
        def __get__(self):
            return parts_of_speech.NAMES[self.c.pos]
        def __set__(self, pos_name):
            self.c.pos = parts_of_speech.IDS[pos_name]
    property tag_:
        """RETURNS (unicode): Fine-grained part-of-speech tag."""