Fix #2014: token.pos_ not writeable

This commit is contained in:
Matthew Honnibal 2018-03-27 21:21:11 +02:00
parent 18da89e04c
commit de9fd091ac
2 changed files with 12 additions and 0 deletions

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals
from ...attrs import IS_ALPHA, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_TITLE, IS_STOP
from ...symbols import NOUN, VERB
from ..util import get_doc
from ...vocab import Vocab
from ...tokens import Doc
@ -162,3 +163,10 @@ def test_is_sent_start(en_tokenizer):
assert doc[5].is_sent_start is True
doc.is_parsed = True
assert len(list(doc.sents)) == 2
def test_set_pos():
doc = Doc(Vocab(), words=['hello', 'world'])
doc[0].pos_ = 'NOUN'
assert doc[0].pos_ == 'NOUN'
doc[1].pos = VERB
assert doc[1].pos_ == 'VERB'

View File

@ -281,6 +281,8 @@ cdef class Token:
"""RETURNS (uint64): ID of coarse-grained part-of-speech tag."""
def __get__(self):
return self.c.pos
def __set__(self, pos):
self.c.pos = pos
property tag:
"""RETURNS (uint64): ID of fine-grained part-of-speech tag."""
@ -752,6 +754,8 @@ cdef class Token:
"""RETURNS (unicode): Coarse-grained part-of-speech tag."""
def __get__(self):
return parts_of_speech.NAMES[self.c.pos]
def __set__(self, pos_name):
self.c.pos = parts_of_speech.IDS[pos_name]
property tag_:
"""RETURNS (unicode): Fine-grained part-of-speech tag."""