diff --git a/spacy/tests/doc/test_array.py b/spacy/tests/doc/test_array.py index c334cc6eb..7b179d662 100644 --- a/spacy/tests/doc/test_array.py +++ b/spacy/tests/doc/test_array.py @@ -123,14 +123,14 @@ def test_doc_from_array_heads_in_bounds(en_vocab): # head before start arr = doc.to_array(["HEAD"]) - arr[0] = -1 + arr[0] = numpy.array(-1).astype(numpy.uint64) doc_from_array = Doc(en_vocab, words=words) with pytest.raises(ValueError): doc_from_array.from_array(["HEAD"], arr) # head after end arr = doc.to_array(["HEAD"]) - arr[0] = 5 + arr[0] = numpy.array(5).astype(numpy.uint64) doc_from_array = Doc(en_vocab, words=words) with pytest.raises(ValueError): doc_from_array.from_array(["HEAD"], arr) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index f2621292c..aedb088de 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -361,9 +361,9 @@ cdef class Doc: if annot is heads or annot is sent_starts or annot is ent_iobs: for i in range(len(words)): if attrs.ndim == 1: - attrs[i] = annot[i] + attrs[i] = numpy.array(annot[i]).astype(numpy.uint64) else: - attrs[i, j] = annot[i] + attrs[i, j] = numpy.array(annot[i]).astype(numpy.uint64) elif annot is morphs: for i in range(len(words)): morph_key = vocab.morphology.add(morphs[i]) @@ -981,7 +981,7 @@ cdef class Doc: for i in range(self.length): token = &self.c[i] for j in range(nr_attr): - c_output[i*nr_attr + j] = get_token_attr(token, c_attr_ids[j]) + c_output[i*nr_attr + j] = numpy.array(get_token_attr(token, c_attr_ids[j])).astype(numpy.uint64) # Handle 1d case return output if len(attr_ids) >= 2 else output.reshape((self.length,)) @@ -1559,7 +1559,7 @@ cdef class Doc: for j, (attr, annot) in enumerate(token_annotations.items()): if attr is HEAD: for i in range(len(words)): - array[i, j] = annot[i] + array[i, j] = numpy.array(annot[i]).astype(numpy.uint64) elif attr is MORPH: for i in range(len(words)): array[i, j] = self.vocab.morphology.add(annot[i]) diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index c3495f497..1b3759128 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -299,7 +299,7 @@ cdef class Span: for ancestor in ancestors: ancestor_i = ancestor.i - self.c.start if ancestor_i in range(length): - array[i, head_col] = ancestor_i - i + array[i, head_col] = numpy.array(ancestor_i - i).astype(numpy.uint64) # if there is no appropriate ancestor, define a new artificial root value = array[i, head_col] @@ -307,7 +307,7 @@ cdef class Span: new_root = old_to_new_root.get(ancestor_i, None) if new_root is not None: # take the same artificial root as a previous token from the same sentence - array[i, head_col] = new_root - i + array[i, head_col] = numpy.array(new_root - i).astype(numpy.uint64) else: # set this token as the new artificial root array[i, head_col] = 0 diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx index dfd337b9e..7be363cef 100644 --- a/spacy/training/example.pyx +++ b/spacy/training/example.pyx @@ -443,26 +443,27 @@ def _annot2array(vocab, tok_annot, doc_annot): if key not in IDS: raise ValueError(Errors.E974.format(obj="token", key=key)) elif key in ["ORTH", "SPACY"]: - pass + continue elif key == "HEAD": attrs.append(key) - values.append([h-i if h is not None else 0 for i, h in enumerate(value)]) + row = [h-i if h is not None else 0 for i, h in enumerate(value)] elif key == "DEP": attrs.append(key) - values.append([vocab.strings.add(h) if h is not None else MISSING_DEP for h in value]) + row = [vocab.strings.add(h) if h is not None else MISSING_DEP for h in value] elif key == "SENT_START": attrs.append(key) - values.append([to_ternary_int(v) for v in value]) + row = [to_ternary_int(v) for v in value] elif key == "MORPH": attrs.append(key) - values.append([vocab.morphology.add(v) for v in value]) + row = [vocab.morphology.add(v) for v in value] else: attrs.append(key) if not all(isinstance(v, str) for v in value): types = set([type(v) for v in value]) raise TypeError(Errors.E969.format(field=key, types=types)) from None - values.append([vocab.strings.add(v) for v in value]) - array = numpy.asarray(values, dtype="uint64") + row = [vocab.strings.add(v) for v in value] + values.append([numpy.array(v).astype(numpy.uint64) for v in row]) + array = numpy.array(values, dtype=numpy.uint64) return attrs, array.T