Allow entity types with dashes (resolves #1967)

This commit is contained in:
ines 2018-03-28 20:51:26 +02:00
parent 9615ed5ed7
commit 3eb67bbe4b
2 changed files with 16 additions and 3 deletions

View File

@ -84,9 +84,7 @@ cdef class BiluoPushDown(TransitionSystem):
for (ids, words, tags, heads, labels, biluo), _ in sents:
for i, ner_tag in enumerate(biluo):
if ner_tag != 'O' and ner_tag != '-':
if ner_tag.count('-') != 1:
raise ValueError(ner_tag)
_, label = ner_tag.split('-')
_, label = ner_tag.split('-', 1)
if label not in seen_entities:
seen_entities.add(label)
for move_str in ('B', 'I', 'L', 'U'):

View File

@ -0,0 +1,15 @@
# coding: utf8
from __future__ import unicode_literals
import pytest
from ...pipeline import EntityRecognizer
from ...vocab import Vocab
@pytest.mark.parametrize('label', ['U-JOB-NAME'])
def test_issue1967(label):
ner = EntityRecognizer(Vocab())
entry = ([0], ['word'], ['tag'], [0], ['dep'], [label])
gold_parses = [(None, [(entry, None)])]
ner.moves.get_actions(gold_parses=gold_parses)