From e07fe92b27f0492d2e8c25eb0cb50f9f08bc59a1 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 15 Oct 2016 22:09:52 +0200 Subject: [PATCH] Draft a refactored init for the GoldParse class --- spacy/gold.pyx | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/spacy/gold.pyx b/spacy/gold.pyx index 3175a7321..9221eed65 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -215,6 +215,26 @@ def _consume_ent(tags): cdef class GoldParse: + @classmethod + def new_init(cls, doc, annot_tuples=None, words=None, tags=None, heads=None, + deps=None, entities=None): + if words is None: + words = [token.text for token in doc] + if tags is None: + tags = [None for _ in doc] + if heads is None: + heads = [None for _ in doc] + if deps is None: + deps = [None for _ in doc] + if entities is None: + entities = [None for _ in doc] + elif len(entities) == 0: + entities = ['O' for _ in doc] + elif not isinstance(entities[0], basestring): + # Assume we have entities specified by character offset. + entities = biluo_tags_from_offsets(doc, entities) + return cls(doc, [(range(len(doc)), words, tags, heads, deps, entities)]) + def __init__(self, tokens, annot_tuples, make_projective=False): self.mem = Pool() self.loss = 0