Merge branch 'develop' of https://github.com/explosion/spaCy into develop

2025-08-12 16:14:56 +03:00 · 2017-08-19 09:03:15 -05:00 · 2017-08-19 09:03:15 -05:00 · 7c47e38c12
commit 7c47e38c12
parent ab28f911b4 5cb0200e63
24 changed files with 392 additions and 28 deletions
--- a/spacy/lang/da/examples.py
+++ b/spacy/lang/da/examples.py
@ -0,0 +1,18 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+"""
+Example sentences to test spaCy and its language models.
+
+>>> from spacy.lang.da.examples import sentences
+>>> docs = nlp.pipe(sentences)
+"""
+
+
+sentences = [
+    "Apple overvejer at købe et britisk statup for 1 milliard dollar",
+    "Selvkørende biler flytter forsikringsansvaret over på producenterne",
+    "San Francisco overvejer at forbyde leverandørrobotter på fortov",
+    "London er en stor by i Storbritannien"
+]
--- a/spacy/lang/de/examples.py
+++ b/spacy/lang/de/examples.py
@ -0,0 +1,22 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+"""
+Example sentences to test spaCy and its language models.
+
+>>> from spacy.lang.de.examples import sentences
+>>> docs = nlp.pipe(sentences)
+"""
+
+
+sentences = [
+    "Die ganze Stadt ist ein Startup: Shenzhen ist das Silicon Valley für Hardware-Firmen",
+    "Wie deutsche Startups die Technologie vorantreiben wollen: Künstliche Intelligenz",
+    "Trend zum Urlaub in Deutschland beschert Gastwirten mehr Umsatz",
+    "Bundesanwaltschaft erhebt Anklage gegen mutmaßlichen Schweizer Spion",
+    "San Francisco erwägt Verbot von Lieferrobotern",
+    "Autonome Fahrzeuge verlagern Haftpflicht auf Hersteller",
+    "Wo bist du?",
+    "Was ist die Hauptstadt von Deutschland?"
+]
--- a/spacy/lang/en/examples.py
+++ b/spacy/lang/en/examples.py
@ -0,0 +1,22 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+"""
+Example sentences to test spaCy and its language models.
+
+>>> from spacy.lang.en.examples import sentences
+>>> docs = nlp.pipe(sentences)
+"""
+
+
+sentences = [
+    "Apple is looking at buying U.K. startup for $1 billion",
+    "Autonomous cars shift insurance liability toward manufacturers",
+    "San Francisco considers banning sidewalk delivery robots",
+    "London is a big city in the United Kingdom.",
+    "Where are you?",
+    "Who is the president of France?",
+    "What is the capital of the United States?",
+    "When was Barack Obama born?"
+]
--- a/spacy/lang/es/examples.py
+++ b/spacy/lang/es/examples.py
@ -0,0 +1,22 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+"""
+Example sentences to test spaCy and its language models.
+
+>>> from spacy.lang.es.examples import sentences
+>>> docs = nlp.pipe(sentences)
+"""
+
+
+sentences = [
+    "Apple está buscando comprar una startup del Reino Unido por mil millones de dólares",
+    "Los coches autónomos delegan la responsabilidad del seguro en sus fabricantes",
+    "San Francisco analiza prohibir los robots delivery",
+    "Londres es una gran ciudad del Reino Unido",
+    "El gato come pescado",
+    "Veo al hombre con el telescopio",
+    "La araña come moscas",
+    "El pingüino incuba en su nido"
+]
--- a/spacy/lang/fr/examples.py
+++ b/spacy/lang/fr/examples.py
@ -0,0 +1,26 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+"""
+Example sentences to test spaCy and its language models.
+
+>>> from spacy.lang.fr.examples import sentences
+>>> docs = nlp.pipe(sentences)
+"""
+
+
+sentences = [
+    "Apple cherche a acheter une startup anglaise pour 1 milliard de dollard",
+    "Les voitures autonomes voient leur assurances décalées vers les constructeurs",
+    "San Francisco envisage d'interdire les robots coursiers",
+    "Londres est une grande ville du Royaume-Uni",
+    "L’Italie choisit ArcelorMittal pour reprendre la plus grande aciérie d’Europe",
+    "Apple lance HomePod parce qu'il se sent menacé par l'Echo d'Amazon",
+    "La France ne devrait pas manquer d'électricité cet été, même en cas de canicule",
+    "Nouvelles attaques de Trump contre le maire de Londres",
+    "Où es-tu ?",
+    "Qui est le président de la France ?",
+    "Où est la capitale des Etats-Unis ?",
+    "Quand est né Barack Obama ?"
+]
--- a/spacy/lang/he/examples.py
+++ b/spacy/lang/he/examples.py
@ -0,0 +1,28 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+"""
+Example sentences to test spaCy and its language models.
+
+>>> from spacy.lang.he.examples import sentences
+>>> docs = nlp.pipe(sentences)
+"""
+
+
+sentences = [
+    'סין מקימה קרן של 440 מיליון דולר להשקעה בהייטק בישראל',
+    'רה"מ הודיע כי יחרים טקס בחסותו',
+    'הכנסת צפויה לאשר איכון אוטומטי של שיחות למוקד 100',
+    'תוכנית לאומית תהפוך את ישראל למעצמה דיגיטלית',
+    'סע לשלום, המפתחות בפנים.',
+    'מלצר, פעמיים טורקי!',
+    'ואהבת לרעך כמוך.',
+    'היום נעשה משהו בלתי נשכח.',
+    'איפה הילד?',
+    'מיהו נשיא צרפת?',
+    'מהי בירת ארצות הברית?',
+    "איך קוראים בעברית לצ'ופצ'יק של הקומקום?",
+    'מה הייתה הדקה?',
+    'מי אומר שלום ראשון, זה שעולה או זה שיורד?'
+]
--- a/spacy/lang/it/examples.py
+++ b/spacy/lang/it/examples.py
@ -0,0 +1,18 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+"""
+Example sentences to test spaCy and its language models.
+
+>>> from spacy.lang.it.examples import sentences
+>>> docs = nlp.pipe(sentences)
+"""
+
+
+sentences = [
+    "Apple vuole comprare una startup del Regno Unito per un miliardo di dollari",
+    "Le automobili a guida autonoma spostano la responsabilità assicurativa verso i produttori",
+    "San Francisco prevede di bandire i robot di consegna porta a porta",
+    "Londra è una grande città del Regno Unito."
+]
--- a/spacy/lang/nb/examples.py
+++ b/spacy/lang/nb/examples.py
@ -0,0 +1,18 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+"""
+Example sentences to test spaCy and its language models.
+
+>>> from spacy.lang.nb.examples import sentences
+>>> docs = nlp.pipe(sentences)
+"""
+
+
+sentences = [
+    "Apple vurderer å kjøpe britisk oppstartfirma for en milliard dollar",
+    "Selvkjørende biler flytter forsikringsansvaret over på produsentene ",
+    "San Francisco vurderer å forby robotbud på fortauene",
+    "London er en stor by i Storbritannia."
+]
--- a/spacy/lang/pl/examples.py
+++ b/spacy/lang/pl/examples.py
@ -0,0 +1,20 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+"""
+Example sentences to test spaCy and its language models.
+
+>>> from spacy.lang.pl.examples import sentences
+>>> docs = nlp.pipe(sentences)
+"""
+
+
+sentences = [
+    "Poczuł przyjemną woń mocnej kawy.",
+    "Istnieje wiele dróg oddziaływania substancji psychoaktywnej na układ nerwowy.",
+    "Powitał mnie biało-czarny kot, płosząc siedzące na płocie trzy dorodne dudki.",
+    "Nowy abonament pod lupą Komisji Europejskiej",
+    "Czy w ciągu ostatnich 48 godzin spożyłeś leki zawierające paracetamol?",
+    "Kto ma ochotę zapoznać się z innymi niż w książkach przygodami Muminków i ich przyjaciół, temu polecam komiks Tove Jansson „Muminki i morze”."
+]
--- a/spacy/lang/pt/examples.py
+++ b/spacy/lang/pt/examples.py
@ -0,0 +1,18 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+"""
+Example sentences to test spaCy and its language models.
+
+>>> from spacy.lang.pt.examples import sentences
+>>> docs = nlp.pipe(sentences)
+"""
+
+
+sentences = [
+    "Apple está querendo comprar uma startup do Reino Unido por 100 milhões de dólares",
+    "Carros autônomos empurram a responsabilidade do seguro para os fabricantes."
+    "São Francisco considera banir os robôs de entrega que andam pelas calçadas",
+    "Londres é a maior cidade do Reino Unido"
+]
--- a/spacy/lang/sv/examples.py
+++ b/spacy/lang/sv/examples.py
@ -0,0 +1,18 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+"""
+Example sentences to test spaCy and its language models.
+
+>>> from spacy.lang.sv.examples import sentences
+>>> docs = nlp.pipe(sentences)
+"""
+
+
+sentences = [
+    "Apple överväger att köpa brittisk startup för 1 miljard dollar.",
+    "Självkörande bilar förskjuter försäkringsansvar mot tillverkare.",
+    "San Fransisco överväger förbud mot leveransrobotar på trottoarer.".
+    "London är en storstad i Storbritannien."
+]
--- a/spacy/language.py
+++ b/spacy/language.py
@ -430,11 +430,16 @@ class Language(object):
            except StopIteration:
                pass

-    def pipe(self, texts, tuples=False, n_threads=2, batch_size=1000, disable=[]):
+    def pipe(self, texts, as_tuples=False, n_threads=2, batch_size=1000,
+            disable=[]):
        """Process texts as a stream, and yield `Doc` objects in order. Supports
        GIL-free multi-threading.

        texts (iterator): A sequence of texts to process.
+        as_tuples (bool):
+            If set to True, inputs should be a sequence of
+            (text, context) tuples. Output will then be a sequence of
+            (doc, context) tuples. Defaults to False.
        n_threads (int): The number of worker threads to use. If -1, OpenMP will
            decide how many to use at run time. Default is 2.
        batch_size (int): The number of texts to buffer.
@ -446,7 +451,7 @@ class Language(object):
            >>>     for doc in nlp.pipe(texts, batch_size=50, n_threads=4):
            >>>         assert doc.is_parsed
        """
-        if tuples:
+        if as_tuples:
            text_context1, text_context2 = itertools.tee(texts)
            texts = (tc[0] for tc in text_context1)
            contexts = (tc[1] for tc in text_context2)
--- a/spacy/tests/parser/test_nn_beam.py
+++ b/spacy/tests/parser/test_nn_beam.py
@ -63,7 +63,7 @@ def vector_size():

@pytest.fixture
 def beam(moves, states, golds, beam_width):
-    return ParserBeam(moves, states, golds, width=beam_width)
+    return ParserBeam(moves, states, golds, width=beam_width, density=0.0)

@pytest.fixture
 def scores(moves, batch_size, beam_width):
--- a/spacy/tests/serialize/test_serialize_tagger.py
+++ b/spacy/tests/serialize/test_serialize_tagger.py
@ -11,8 +11,8 @@ import pytest
 def taggers(en_vocab):
    tagger1 = Tagger(en_vocab)
    tagger2 = Tagger(en_vocab)
-    tagger1.model = tagger1.Model(None, None)
-    tagger2.model = tagger2.Model(None, None)
+    tagger1.model = tagger1.Model(8, 8)
+    tagger2.model = tagger1.model
    return (tagger1, tagger2)


@ -20,7 +20,6 @@ def test_serialize_tagger_roundtrip_bytes(en_vocab, taggers):
    tagger1, tagger2 = taggers
    tagger1_b = tagger1.to_bytes()
    tagger2_b = tagger2.to_bytes()
-    assert tagger1_b == tagger2_b
    tagger1 = tagger1.from_bytes(tagger1_b)
    assert tagger1.to_bytes() == tagger1_b
    new_tagger1 = Tagger(en_vocab).from_bytes(tagger1_b)
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -238,6 +238,27 @@ cdef class Doc:
    def doc(self):
        return self

+    def char_span(self, int start_idx, int end_idx, attr_t label=0, vector=None):
+        """Create a `Span` object from the slice `doc.text[start : end]`.
+
+        doc (Doc): The parent document.
+        start (int): The index of the first character of the span.
+        end (int): The index of the first character after the span.
+        label (uint64): A label to attach to the Span, e.g. for named entities.
+        vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
+        RETURNS (Span): The newly constructed object.
+        """
+        cdef int start = token_by_start(self.c, self.length, start_idx)
+        if start == -1:
+            return None
+        cdef int end = token_by_end(self.c, self.length, end_idx)
+        if end == -1:
+            return None
+        # Currently we have the token index, we want the range-end index
+        end += 1
+        cdef Span span = Span(self, start, end, label=label, vector=vector)
+        return span
+
    def similarity(self, other):
        """Make a semantic similarity estimate. The default estimate is cosine
        similarity using an average of word vectors.
--- a/spacy/tokens/span.pxd
+++ b/spacy/tokens/span.pxd
@ -15,5 +15,5 @@ cdef class Span:
    cdef public _vector
    cdef public _vector_norm

-
    cpdef int _recalculate_indices(self) except -1
+    cpdef np.ndarray to_array(self, object features)
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@ -7,7 +7,7 @@ import numpy
 import numpy.linalg
 from libc.math cimport sqrt

-from .doc cimport token_by_start, token_by_end
+from .doc cimport token_by_start, token_by_end, get_token_attr
 from ..structs cimport TokenC, LexemeC
 from ..typedefs cimport flags_t, attr_t, hash_t
 from ..attrs cimport attr_id_t
@ -135,6 +135,28 @@ cdef class Span:
            return 0.0
        return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)

+    cpdef np.ndarray to_array(self, object py_attr_ids):
+        """Given a list of M attribute IDs, export the tokens to a numpy
+        `ndarray` of shape `(N, M)`, where `N` is the length of the document.
+        The values will be 32-bit integers.
+
+        attr_ids (list[int]): A list of attribute ID ints.
+        RETURNS (numpy.ndarray[long, ndim=2]): A feature matrix, with one row
+            per word, and one column per attribute indicated in the input
+            `attr_ids`.
+        """
+        cdef int i, j
+        cdef attr_id_t feature
+        cdef np.ndarray[attr_t, ndim=2] output
+        # Make an array from the attributes --- otherwise our inner loop is Python
+        # dict iteration.
+        cdef np.ndarray[attr_t, ndim=1] attr_ids = numpy.asarray(py_attr_ids, dtype=numpy.uint64)
+        output = numpy.ndarray(shape=(self.length, len(attr_ids)), dtype=numpy.uint64)
+        for i in range(self.start, self.end):
+            for j, feature in enumerate(attr_ids):
+                output[i, j] = get_token_attr(&self.doc.c[i], feature)
+        return output
+
    cpdef int _recalculate_indices(self) except -1:
        if self.end > self.doc.length \
        or self.doc.c[self.start].idx != self.start_char \
--- a/spacy/vectors.pyx
+++ b/spacy/vectors.pyx
@ -20,7 +20,7 @@ cdef class Vectors:
    '''Store, save and load word vectors.'''
    cdef public object data
    cdef readonly StringStore strings
-    cdef public object index
+    cdef public object key2row

    def __init__(self, strings, data_or_width):
        self.strings = StringStore()
@ -30,9 +30,9 @@ cdef class Vectors:
        else:
            data = data_or_width
        self.data = data
-        self.index = {}
+        self.key2row = {}
        for i, string in enumerate(strings):
-            self.index[self.strings.add(string)] = i
+            self.key2row[self.strings.add(string)] = i

    def __reduce__(self):
        return (Vectors, (self.strings, self.data))
@ -40,7 +40,7 @@ cdef class Vectors:
    def __getitem__(self, key):
        if isinstance(key, basestring):
            key = self.strings[key]
-        i = self.index[key]
+        i = self.key2row[key]
        if i is None:
            raise KeyError(key)
        else:
@ -49,7 +49,7 @@ cdef class Vectors:
    def __setitem__(self, key, vector):
        if isinstance(key, basestring):
            key = self.strings.add(key)
-        i = self.index[key]
+        i = self.key2row[key]
        self.data[i] = vector

    def __iter__(self):
@ -71,7 +71,7 @@ cdef class Vectors:

    def to_disk(self, path, **exclude):
        def serialize_vectors(p):
-            write_vectors_to_bin_loc(self.strings, self.key2i, self.data, str(p))
+            write_vectors_to_bin_loc(self.strings, self.key2row, self.data, str(p))

        serializers = OrderedDict((
            ('vec.bin', serialize_vectors),
@ -80,12 +80,13 @@ cdef class Vectors:

    def from_disk(self, path, **exclude):
        def deserialize_vectors(p):
-            self.key2i, self.vectors = load_vectors_from_bin_loc(self.strings, str(p))
+            values = load_vectors_from_bin_loc(self.strings, str(p))
+            self.key2row, self.data = values

        serializers = OrderedDict((
-            ('vec.bin', deserialize_vectors)
+            ('vec.bin', deserialize_vectors),
        ))
-        return util.to_disk(serializers, exclude)
+        return util.from_disk(path, serializers, exclude)

    def to_bytes(self, **exclude):
        def serialize_weights():
@ -93,9 +94,9 @@ cdef class Vectors:
                return self.data.to_bytes()
            else:
                return msgpack.dumps(self.data)
-
+        b = msgpack.dumps(self.key2row)
        serializers = OrderedDict((
-            ('key2row', lambda: msgpack.dumps(self.key2i)),
+            ('key2row', lambda: msgpack.dumps(self.key2row)),
            ('strings', lambda: self.strings.to_bytes()),
            ('vectors', serialize_weights)
        ))
@ -109,7 +110,7 @@ cdef class Vectors:
                self.data = msgpack.loads(b)

        deserializers = OrderedDict((
-            ('key2row', lambda b: self.key2i.update(msgpack.loads(b))),
+            ('key2row', lambda b: self.key2row.update(msgpack.loads(b))),
            ('strings', lambda b: self.strings.from_bytes(b)),
            ('vectors', deserialize_weights)
        ))
--- a/website/assets/css/_base/_utilities.sass
+++ b/website/assets/css/_base/_utilities.sass
@ -112,6 +112,10 @@
 .u-nowrap
    white-space: nowrap

+.u-break.u-break
+    word-wrap: break-word
+    white-space: initial
+
 .u-no-border
    border: none

--- a/website/docs/api/doc.jade
+++ b/website/docs/api/doc.jade
@ -140,6 +140,44 @@ p Get the number of tokens in the document.
        +cell int
        +cell The number of tokens in the document.

+h(2, "char_span") Doc.char_span
+    +tag method
+    +tag-new(2)
+
+p Create a #[code Span] object from the slice #[code doc.text[start : end]].
+
+aside-code("Example").
+    doc = nlp(u'I like New York')
+    label = doc.vocab.strings['GPE']
+    span = doc.char_span(7, 15, label=label)
+    assert span.text == 'New York'
+
+table(["Name", "Type", "Description"])
+    +row
+        +cell #[code start]
+        +cell int
+        +cell The index of the first character of the span.
+
+    +row
+        +cell #[code end]
+        +cell int
+        +cell The index of the first character after the span.
+
+    +row
+        +cell #[code label]
+        +cell uint64
+        +cell A label to attach to the Span, e.g. for named entities.
+
+    +row
+        +cell #[code vector]
+        +cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
+        +cell A meaning representation of the span.
+
+    +footrow
+        +cell returns
+        +cell #[code Span]
+        +cell The newly constructed object.
+
 +h(2, "similarity") Doc.similarity
    +tag method
    +tag-model("vectors")
@ -211,12 +249,12 @@ p
 +table(["Name", "Type", "Description"])
    +row
        +cell #[code attr_ids]
-        +cell ints
+        +cell list
        +cell A list of attribute ID ints.

    +footrow
        +cell returns
-        +cell #[code numpy.ndarray[ndim=2, dtype='int32']]
+        +cell #[code.u-break numpy.ndarray[ndim=2, dtype='int32']]
        +cell
            |  The exported attributes as a 2D numpy array, with one row per
            |  token and one column per attribute.
@ -245,7 +283,7 @@ p

    +row
        +cell #[code array]
-        +cell #[code numpy.ndarray[ndim=2, dtype='int32']]
+        +cell #[code.u-break numpy.ndarray[ndim=2, dtype='int32']]
        +cell The attribute values to load.

    +footrow
@ -509,7 +547,7 @@ p
 +table(["Name", "Type", "Description"])
    +footrow
        +cell returns
-        +cell #[code numpy.ndarray[ndim=1, dtype='float32']]
+        +cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
        +cell A 1D numpy array representing the document's semantics.

 +h(2, "vector_norm") Doc.vector_norm
--- a/website/docs/api/language.jade
+++ b/website/docs/api/language.jade
@ -111,6 +111,14 @@ p
        +cell -
        +cell A sequence of unicode objects.

+    +row
+        +cell #[code as_tuples]
+        +cell bool
+        +cell
+            |  If set to #[code True], inputs should be a sequence of
+            |  #[code (text, context)] tuples. Output will then be a sequence of
+            |  #[code (doc, context)] tuples. Defaults to #[code False].
+
    +row
        +cell #[code n_threads]
        +cell int
--- a/website/docs/api/lexeme.jade
+++ b/website/docs/api/lexeme.jade
@ -129,7 +129,7 @@ p A real-valued meaning representation.
 +table(["Name", "Type", "Description"])
    +footrow
        +cell returns
-        +cell #[code numpy.ndarray[ndim=1, dtype='float32']]
+        +cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
        +cell A 1D numpy array representing the lexeme's semantics.

 +h(2, "vector_norm") Lexeme.vector_norm
--- a/website/docs/api/span.jade
+++ b/website/docs/api/span.jade
@ -37,7 +37,7 @@ p Create a Span object from the #[code slice doc[start : end]].

    +row
        +cell #[code vector]
-        +cell #[code numpy.ndarray[ndim=1, dtype='float32']]
+        +cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
        +cell A meaning representation of the span.

    +footrow
@ -145,11 +145,47 @@ p
        +cell float
        +cell A scalar similarity score. Higher is more similar.

+h(2, "to_array") Span.to_array
+    +tag method
+    +tag-new(2)
+
+p
+    |  Given a list of #[code M] attribute IDs, export the tokens to a numpy
+    |  #[code ndarray] of shape #[code (N, M)], where #[code N] is the length of
+    |  the document. The values will be 32-bit integers.
+
+aside-code("Example").
+    from spacy.attrs import LOWER, POS, ENT_TYPE, IS_ALPHA
+    doc = nlp(u'I like New York in Autumn.')
+    span = doc[2:3]
+    # All strings mapped to integers, for easy export to numpy
+    np_array = span.to_array([LOWER, POS, ENT_TYPE, IS_ALPHA])
+
+table(["Name", "Type", "Description"])
+    +row
+        +cell #[code attr_ids]
+        +cell list
+        +cell A list of attribute ID ints.
+
+    +footrow
+        +cell returns
+        +cell #[code.u-break numpy.ndarray[long, ndim=2]]
+        +cell
+            |  A feature matrix, with one row per word, and one column per
+            |  attribute indicated in the input #[code attr_ids].
+
 +h(2, "merge") Span.merge
    +tag method

 p Retokenize the document, such that the span is merged into a single token.

+aside-code("Example").
+    doc = nlp(u'I like New York in Autumn.')
+    span = doc[2:3]
+    span.merge()
+    assert len(doc) == 6
+    assert doc[2].text == 'New York'
+
 +table(["Name", "Type", "Description"])
    +row
        +cell #[code **attributes]
@ -270,7 +306,7 @@ p
 +table(["Name", "Type", "Description"])
    +footrow
        +cell returns
-        +cell #[code numpy.ndarray[ndim=1, dtype='float32']]
+        +cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
        +cell A 1D numpy array representing the span's semantics.

 +h(2, "vector_norm") Span.vector_norm
--- a/website/docs/api/token.jade
+++ b/website/docs/api/token.jade
@ -250,7 +250,7 @@ p A real-valued meaning representation.
 +table(["Name", "Type", "Description"])
    +footrow
        +cell returns
-        +cell #[code numpy.ndarray[ndim=1, dtype='float32']]
+        +cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
        +cell A 1D numpy array representing the token's semantics.

 +h(2, "vector_norm") Span.vector_norm