Merge branch 'develop' of https://github.com/explosion/spaCy into develop

2025-09-18 10:02:40 +03:00 · 2019-03-11 15:59:28 +01:00 · 2019-03-11 15:59:28 +01:00 · e2b9b523ce
commit e2b9b523ce
parent db79a704bf 47e9c274ef
18 changed files with 614 additions and 561 deletions
--- a/spacy/lang/fa/init.py
+++ b/spacy/lang/fa/init.py
@ -27,6 +27,7 @@ class PersianDefaults(Language.Defaults):
    stop_words = STOP_WORDS
    tag_map = TAG_MAP
    suffixes = TOKENIZER_SUFFIXES
+    writing_system = {"direction": "rtl", "has_case": False, "has_letters": True}


 class Persian(Language):
--- a/spacy/lang/he/init.py
+++ b/spacy/lang/he/init.py
@ -14,7 +14,7 @@ class HebrewDefaults(Language.Defaults):
    lex_attr_getters[LANG] = lambda text: "he"
    tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
    stop_words = STOP_WORDS
-
+    writing_system = {"direction": "rtl", "has_case": False, "has_letters": True}

 class Hebrew(Language):
    lang = "he"
--- a/spacy/lang/ja/init.py
+++ b/spacy/lang/ja/init.py
@ -94,6 +94,7 @@ class JapaneseDefaults(Language.Defaults):
    lex_attr_getters[LANG] = lambda _text: "ja"
    stop_words = STOP_WORDS
    tag_map = TAG_MAP
+    writing_system = {"direction": "ltr", "has_case": False, "has_letters": False}

    @classmethod
    def create_tokenizer(cls, nlp=None):
--- a/spacy/lang/zh/init.py
+++ b/spacy/lang/zh/init.py
@ -14,7 +14,7 @@ class ChineseDefaults(Language.Defaults):
    use_jieba = True
    tokenizer_exceptions = BASE_EXCEPTIONS
    stop_words = STOP_WORDS
-
+    writing_system = {"direction": "ltr", "has_case": False, "has_letters": False}

 class Chinese(Language):
    lang = "zh"
--- a/spacy/language.py
+++ b/spacy/language.py
@ -94,6 +94,7 @@ class BaseDefaults(object):
    morph_rules = {}
    lex_attr_getters = LEX_ATTRS
    syntax_iterators = {}
+    writing_system = {"direction": "ltr", "has_case": True, "has_letters": True}


 class Language(object):
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@ -161,17 +161,17 @@ cdef class Lexeme:
        Lexeme.c_from_bytes(self.c, lex_data)
        self.orth = self.c.orth

-    property has_vector:
+    @property
+    def has_vector(self):
        """RETURNS (bool): Whether a word vector is associated with the object.
        """
-        def __get__(self):
-            return self.vocab.has_vector(self.c.orth)
+        return self.vocab.has_vector(self.c.orth)

-    property vector_norm:
+    @property
+    def vector_norm(self):
        """RETURNS (float): The L2 norm of the vector representation."""
-        def __get__(self):
-            vector = self.vector
-            return numpy.sqrt((vector**2).sum())
+        vector = self.vector
+        return numpy.sqrt((vector**2).sum())

    property vector:
        """A real-valued meaning representation.
@ -209,17 +209,17 @@ cdef class Lexeme:
        def __set__(self, float sentiment):
            self.c.sentiment = sentiment

-    property orth_:
+    @property
+    def orth_(self):
        """RETURNS (unicode): The original verbatim text of the lexeme
            (identical to `Lexeme.text`). Exists mostly for consistency with
            the other attributes."""
-        def __get__(self):
-            return self.vocab.strings[self.c.orth]
+        return self.vocab.strings[self.c.orth]

-    property text:
+    @property
+    def text(self):
        """RETURNS (unicode): The original verbatim text of the lexeme."""
-        def __get__(self):
-            return self.orth_
+        return self.orth_

    property lower:
        """RETURNS (unicode): Lowercase form of the lexeme."""
--- a/spacy/syntax/arc_eager.pyx
+++ b/spacy/syntax/arc_eager.pyx
@ -369,9 +369,9 @@ cdef class ArcEager(TransitionSystem):
        actions[LEFT].setdefault('dep', 0)
        return actions

-    property action_types:
-        def __get__(self):
-            return (SHIFT, REDUCE, LEFT, RIGHT, BREAK)
+    @property
+    def action_types(self):
+        return (SHIFT, REDUCE, LEFT, RIGHT, BREAK)

    def get_cost(self, StateClass state, GoldParse gold, action):
        cdef Transition t = self.lookup_transition(action)
@ -384,7 +384,7 @@ cdef class ArcEager(TransitionSystem):
        cdef Transition t = self.lookup_transition(action)
        t.do(state.c, t.label)
        return state
- 
+
    def is_gold_parse(self, StateClass state, GoldParse gold):
        predicted = set()
        truth = set()
--- a/spacy/syntax/ner.pyx
+++ b/spacy/syntax/ner.pyx
@ -80,9 +80,9 @@ cdef class BiluoPushDown(TransitionSystem):
                            actions[action][label] += 1
        return actions

-    property action_types:
-        def __get__(self):
-            return (BEGIN, IN, LAST, UNIT, OUT)
+    @property
+    def action_types(self):
+        return (BEGIN, IN, LAST, UNIT, OUT)

    def move_name(self, int move, attr_t label):
        if move == OUT:
--- a/spacy/tests/test_displacy.py
+++ b/spacy/tests/test_displacy.py
@ -0,0 +1,68 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import pytest
+from spacy import displacy
+from spacy.tokens import Span
+
+from .util import get_doc
+
+
+def test_displacy_parse_ents(en_vocab):
+    """Test that named entities on a Doc are converted into displaCy's format."""
+    doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
+    doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
+    ents = displacy.parse_ents(doc)
+    assert isinstance(ents, dict)
+    assert ents["text"] == "But Google is starting from behind "
+    assert ents["ents"] == [{"start": 4, "end": 10, "label": "ORG"}]
+
+
+def test_displacy_parse_deps(en_vocab):
+    """Test that deps and tags on a Doc are converted into displaCy's format."""
+    words = ["This", "is", "a", "sentence"]
+    heads = [1, 0, 1, -2]
+    pos = ["DET", "VERB", "DET", "NOUN"]
+    tags = ["DT", "VBZ", "DT", "NN"]
+    deps = ["nsubj", "ROOT", "det", "attr"]
+    doc = get_doc(en_vocab, words=words, heads=heads, pos=pos, tags=tags, deps=deps)
+    deps = displacy.parse_deps(doc)
+    assert isinstance(deps, dict)
+    assert deps["words"] == [
+        {"text": "This", "tag": "DET"},
+        {"text": "is", "tag": "VERB"},
+        {"text": "a", "tag": "DET"},
+        {"text": "sentence", "tag": "NOUN"},
+    ]
+    assert deps["arcs"] == [
+        {"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
+        {"start": 2, "end": 3, "label": "det", "dir": "left"},
+        {"start": 1, "end": 3, "label": "attr", "dir": "right"},
+    ]
+
+
+def test_displacy_spans(en_vocab):
+    """Test that displaCy can render Spans."""
+    doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
+    doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
+    html = displacy.render(doc[1:4], style="ent")
+    assert html.startswith("<div")
+
+
+def test_displacy_render_wrapper(en_vocab):
+    """Test that displaCy accepts custom rendering wrapper."""
+
+    def wrapper(html):
+        return "TEST" + html + "TEST"
+
+    displacy.set_render_wrapper(wrapper)
+    doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
+    doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
+    html = displacy.render(doc, style="ent")
+    assert html.startswith("TEST<div")
+    assert html.endswith("/div>TEST")
+
+
+def test_displacy_raises_for_wrong_type(en_vocab):
+    with pytest.raises(ValueError):
+        displacy.render("hello world")
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@ -4,13 +4,9 @@ from __future__ import unicode_literals
 import pytest
 from pathlib import Path
 from spacy import util
-from spacy import displacy
 from spacy import prefer_gpu, require_gpu
-from spacy.tokens import Span
 from spacy._ml import PrecomputableAffine

-from .util import get_doc
-

@pytest.mark.parametrize("text", ["hello/world", "hello world"])
 def test_util_ensure_path_succeeds(text):
@ -31,66 +27,6 @@ def test_util_get_package_path(package):
    assert isinstance(path, Path)


-def test_displacy_parse_ents(en_vocab):
-    """Test that named entities on a Doc are converted into displaCy's format."""
-    doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
-    doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
-    ents = displacy.parse_ents(doc)
-    assert isinstance(ents, dict)
-    assert ents["text"] == "But Google is starting from behind "
-    assert ents["ents"] == [{"start": 4, "end": 10, "label": "ORG"}]
-
-
-def test_displacy_parse_deps(en_vocab):
-    """Test that deps and tags on a Doc are converted into displaCy's format."""
-    words = ["This", "is", "a", "sentence"]
-    heads = [1, 0, 1, -2]
-    pos = ["DET", "VERB", "DET", "NOUN"]
-    tags = ["DT", "VBZ", "DT", "NN"]
-    deps = ["nsubj", "ROOT", "det", "attr"]
-    doc = get_doc(en_vocab, words=words, heads=heads, pos=pos, tags=tags, deps=deps)
-    deps = displacy.parse_deps(doc)
-    assert isinstance(deps, dict)
-    assert deps["words"] == [
-        {"text": "This", "tag": "DET"},
-        {"text": "is", "tag": "VERB"},
-        {"text": "a", "tag": "DET"},
-        {"text": "sentence", "tag": "NOUN"},
-    ]
-    assert deps["arcs"] == [
-        {"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
-        {"start": 2, "end": 3, "label": "det", "dir": "left"},
-        {"start": 1, "end": 3, "label": "attr", "dir": "right"},
-    ]
-
-
-def test_displacy_spans(en_vocab):
-    """Test that displaCy can render Spans."""
-    doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
-    doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
-    html = displacy.render(doc[1:4], style="ent")
-    assert html.startswith("<div")
-
-
-def test_displacy_render_wrapper(en_vocab):
-    """Test that displaCy accepts custom rendering wrapper."""
-
-    def wrapper(html):
-        return "TEST" + html + "TEST"
-
-    displacy.set_render_wrapper(wrapper)
-    doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
-    doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
-    html = displacy.render(doc, style="ent")
-    assert html.startswith("TEST<div")
-    assert html.endswith("/div>TEST")
-
-
-def test_displacy_raises_for_wrong_type(en_vocab):
-    with pytest.raises(ValueError):
-        displacy.render("hello world")
-
-
 def test_PrecomputableAffine(nO=4, nI=5, nF=3, nP=2):
    model = PrecomputableAffine(nO=nO, nI=nI, nF=nF, nP=nP)
    assert model.W.shape == (nF, nO, nP, nI)
--- a/spacy/tests/vocab_vectors/test_vocab_api.py
+++ b/spacy/tests/vocab_vectors/test_vocab_api.py
@ -45,3 +45,8 @@ def test_vocab_api_contains(en_vocab, text):
    _ = en_vocab[text]  # noqa: F841
    assert text in en_vocab
    assert "LKsdjvlsakdvlaksdvlkasjdvljasdlkfvm" not in en_vocab
+
+
+def test_vocab_writing_system(en_vocab):
+    assert en_vocab.writing_system["direction"] == "ltr"
+    assert en_vocab.writing_system["has_case"] is True
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -384,7 +384,8 @@ cdef class Doc:
        xp = get_array_module(vector)
        return xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm)

-    property has_vector:
+    @property
+    def has_vector(self):
        """A boolean value indicating whether a word vector is associated with
        the object.

@ -392,15 +393,14 @@ cdef class Doc:

        DOCS: https://spacy.io/api/doc#has_vector
        """
-        def __get__(self):
-            if "has_vector" in self.user_hooks:
-                return self.user_hooks["has_vector"](self)
-            elif self.vocab.vectors.data.size:
-                return True
-            elif self.tensor.size:
-                return True
-            else:
-                return False
+        if "has_vector" in self.user_hooks:
+            return self.user_hooks["has_vector"](self)
+        elif self.vocab.vectors.data.size:
+            return True
+        elif self.tensor.size:
+            return True
+        else:
+            return False

    property vector:
        """A real-valued meaning representation. Defaults to an average of the
@ -453,22 +453,22 @@ cdef class Doc:
        def __set__(self, value):
            self._vector_norm = value

-    property text:
+    @property
+    def text(self):
        """A unicode representation of the document text.

        RETURNS (unicode): The original verbatim text of the document.
        """
-        def __get__(self):
-            return "".join(t.text_with_ws for t in self)
+        return "".join(t.text_with_ws for t in self)

-    property text_with_ws:
+    @property
+    def text_with_ws(self):
        """An alias of `Doc.text`, provided for duck-type compatibility with
        `Span` and `Token`.

        RETURNS (unicode): The original verbatim text of the document.
        """
-        def __get__(self):
-            return self.text
+        return self.text

    property ents:
        """The named entities in the document. Returns a tuple of named entity
@ -545,7 +545,8 @@ cdef class Doc:
                    # Set start as B
                    self.c[start].ent_iob = 3

-    property noun_chunks:
+    @property
+    def noun_chunks(self):
        """Iterate over the base noun phrases in the document. Yields base
        noun-phrase #[code Span] objects, if the document has been
        syntactically parsed. A base noun phrase, or "NP chunk", is a noun
@ -557,22 +558,22 @@ cdef class Doc:

        DOCS: https://spacy.io/api/doc#noun_chunks
        """
-        def __get__(self):
-            if not self.is_parsed:
-                raise ValueError(Errors.E029)
-            # Accumulate the result before beginning to iterate over it. This
-            # prevents the tokenisation from being changed out from under us
-            # during the iteration. The tricky thing here is that Span accepts
-            # its tokenisation changing, so it's okay once we have the Span
-            # objects. See Issue #375.
-            spans = []
-            if self.noun_chunks_iterator is not None:
-                for start, end, label in self.noun_chunks_iterator(self):
-                    spans.append(Span(self, start, end, label=label))
-            for span in spans:
-                yield span
+        if not self.is_parsed:
+            raise ValueError(Errors.E029)
+        # Accumulate the result before beginning to iterate over it. This
+        # prevents the tokenisation from being changed out from under us
+        # during the iteration. The tricky thing here is that Span accepts
+        # its tokenisation changing, so it's okay once we have the Span
+        # objects. See Issue #375.
+        spans = []
+        if self.noun_chunks_iterator is not None:
+            for start, end, label in self.noun_chunks_iterator(self):
+                spans.append(Span(self, start, end, label=label))
+        for span in spans:
+            yield span

-    property sents:
+    @property
+    def sents(self):
        """Iterate over the sentences in the document. Yields sentence `Span`
        objects. Sentence spans have no label. To improve accuracy on informal
        texts, spaCy calculates sentence boundaries from the syntactic
@ -583,19 +584,18 @@ cdef class Doc:

        DOCS: https://spacy.io/api/doc#sents
        """
-        def __get__(self):
-            if not self.is_sentenced:
-                raise ValueError(Errors.E030)
-            if "sents" in self.user_hooks:
-                yield from self.user_hooks["sents"](self)
-            else:
-                start = 0
-                for i in range(1, self.length):
-                    if self.c[i].sent_start == 1:
-                        yield Span(self, start, i)
-                        start = i
-                if start != self.length:
-                    yield Span(self, start, self.length)
+        if not self.is_sentenced:
+            raise ValueError(Errors.E030)
+        if "sents" in self.user_hooks:
+            yield from self.user_hooks["sents"](self)
+        else:
+            start = 0
+            for i in range(1, self.length):
+                if self.c[i].sent_start == 1:
+                    yield Span(self, start, i)
+                    start = i
+            if start != self.length:
+                yield Span(self, start, self.length)

    @property
    def lang(self):
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@ -322,46 +322,47 @@ cdef class Span:
            self.start = start
            self.end = end + 1

-    property vocab:
+    @property
+    def vocab(self):
        """RETURNS (Vocab): The Span's Doc's vocab."""
-        def __get__(self):
-            return self.doc.vocab
+        return self.doc.vocab

-    property sent:
+    @property
+    def sent(self):
        """RETURNS (Span): The sentence span that the span is a part of."""
-        def __get__(self):
-            if "sent" in self.doc.user_span_hooks:
-                return self.doc.user_span_hooks["sent"](self)
-            # This should raise if not parsed / no custom sentence boundaries
-            self.doc.sents
-            # If doc is parsed we can use the deps to find the sentence
-            # otherwise we use the `sent_start` token attribute
-            cdef int n = 0
-            cdef int i
-            if self.doc.is_parsed:
-                root = &self.doc.c[self.start]
-                while root.head != 0:
-                    root += root.head
-                    n += 1
-                    if n >= self.doc.length:
-                        raise RuntimeError(Errors.E038)
-                return self.doc[root.l_edge:root.r_edge + 1]
-            elif self.doc.is_sentenced:
-                # Find start of the sentence
-                start = self.start
-                while self.doc.c[start].sent_start != 1 and start > 0:
-                    start += -1
-                # Find end of the sentence
-                end = self.end
-                n = 0
-                while end < self.doc.length and self.doc.c[end].sent_start != 1:
-                    end += 1
-                    n += 1
-                    if n >= self.doc.length:
-                        break
-                return self.doc[start:end]
+        if "sent" in self.doc.user_span_hooks:
+            return self.doc.user_span_hooks["sent"](self)
+        # This should raise if not parsed / no custom sentence boundaries
+        self.doc.sents
+        # If doc is parsed we can use the deps to find the sentence
+        # otherwise we use the `sent_start` token attribute
+        cdef int n = 0
+        cdef int i
+        if self.doc.is_parsed:
+            root = &self.doc.c[self.start]
+            while root.head != 0:
+                root += root.head
+                n += 1
+                if n >= self.doc.length:
+                    raise RuntimeError(Errors.E038)
+            return self.doc[root.l_edge:root.r_edge + 1]
+        elif self.doc.is_sentenced:
+            # Find start of the sentence
+            start = self.start
+            while self.doc.c[start].sent_start != 1 and start > 0:
+                start += -1
+            # Find end of the sentence
+            end = self.end
+            n = 0
+            while end < self.doc.length and self.doc.c[end].sent_start != 1:
+                end += 1
+                n += 1
+                if n >= self.doc.length:
+                    break
+            return self.doc[start:end]

-    property ents:
+    @property
+    def ents(self):
        """The named entities in the span. Returns a tuple of named entity
        `Span` objects, if the entity recognizer has been applied.

@ -369,14 +370,14 @@ cdef class Span:

        DOCS: https://spacy.io/api/span#ents
        """
-        def __get__(self):
-            ents = []
-            for ent in self.doc.ents:
-                if ent.start >= self.start and ent.end <= self.end:
-                    ents.append(ent)
-            return ents
+        ents = []
+        for ent in self.doc.ents:
+            if ent.start >= self.start and ent.end <= self.end:
+                ents.append(ent)
+        return ents

-    property has_vector:
+    @property
+    def has_vector(self):
        """A boolean value indicating whether a word vector is associated with
        the object.

@ -384,17 +385,17 @@ cdef class Span:

        DOCS: https://spacy.io/api/span#has_vector
        """
-        def __get__(self):
-            if "has_vector" in self.doc.user_span_hooks:
-                return self.doc.user_span_hooks["has_vector"](self)
-            elif self.vocab.vectors.data.size > 0:
-                return any(token.has_vector for token in self)
-            elif self.doc.tensor.size > 0:
-                return True
-            else:
-                return False
+        if "has_vector" in self.doc.user_span_hooks:
+            return self.doc.user_span_hooks["has_vector"](self)
+        elif self.vocab.vectors.data.size > 0:
+            return any(token.has_vector for token in self)
+        elif self.doc.tensor.size > 0:
+            return True
+        else:
+            return False

-    property vector:
+    @property
+    def vector(self):
        """A real-valued meaning representation. Defaults to an average of the
        token vectors.

@ -403,61 +404,61 @@ cdef class Span:

        DOCS: https://spacy.io/api/span#vector
        """
-        def __get__(self):
-            if "vector" in self.doc.user_span_hooks:
-                return self.doc.user_span_hooks["vector"](self)
-            if self._vector is None:
-                self._vector = sum(t.vector for t in self) / len(self)
-            return self._vector
+        if "vector" in self.doc.user_span_hooks:
+            return self.doc.user_span_hooks["vector"](self)
+        if self._vector is None:
+            self._vector = sum(t.vector for t in self) / len(self)
+        return self._vector

-    property vector_norm:
+    @property
+    def vector_norm(self):
        """The L2 norm of the span's vector representation.

        RETURNS (float): The L2 norm of the vector representation.

        DOCS: https://spacy.io/api/span#vector_norm
        """
-        def __get__(self):
-            if "vector_norm" in self.doc.user_span_hooks:
-                return self.doc.user_span_hooks["vector"](self)
-            cdef float value
-            cdef double norm = 0
-            if self._vector_norm is None:
-                norm = 0
-                for value in self.vector:
-                    norm += value * value
-                self._vector_norm = sqrt(norm) if norm != 0 else 0
-            return self._vector_norm
+        if "vector_norm" in self.doc.user_span_hooks:
+            return self.doc.user_span_hooks["vector"](self)
+        cdef float value
+        cdef double norm = 0
+        if self._vector_norm is None:
+            norm = 0
+            for value in self.vector:
+                norm += value * value
+            self._vector_norm = sqrt(norm) if norm != 0 else 0
+        return self._vector_norm

-    property sentiment:
+    @property
+    def sentiment(self):
        """RETURNS (float): A scalar value indicating the positivity or
            negativity of the span.
        """
-        def __get__(self):
-            if "sentiment" in self.doc.user_span_hooks:
-                return self.doc.user_span_hooks["sentiment"](self)
-            else:
-                return sum([token.sentiment for token in self]) / len(self)
+        if "sentiment" in self.doc.user_span_hooks:
+            return self.doc.user_span_hooks["sentiment"](self)
+        else:
+            return sum([token.sentiment for token in self]) / len(self)

-    property text:
+    @property
+    def text(self):
        """RETURNS (unicode): The original verbatim text of the span."""
-        def __get__(self):
-            text = self.text_with_ws
-            if self[-1].whitespace_:
-                text = text[:-1]
-            return text
+        text = self.text_with_ws
+        if self[-1].whitespace_:
+            text = text[:-1]
+        return text

-    property text_with_ws:
+    @property
+    def text_with_ws(self):
        """The text content of the span with a trailing whitespace character if
        the last token has one.

        RETURNS (unicode): The text content of the span (with trailing
            whitespace).
        """
-        def __get__(self):
-            return "".join([t.text_with_ws for t in self])
+        return "".join([t.text_with_ws for t in self])

-    property noun_chunks:
+    @property
+    def noun_chunks(self):
        """Yields base noun-phrase `Span` objects, if the document has been
        syntactically parsed. A base noun phrase, or "NP chunk", is a noun
        phrase that does not permit other NPs to be nested within it – so no
@ -468,23 +469,23 @@ cdef class Span:

        DOCS: https://spacy.io/api/span#noun_chunks
        """
-        def __get__(self):
-            if not self.doc.is_parsed:
-                raise ValueError(Errors.E029)
-            # Accumulate the result before beginning to iterate over it. This
-            # prevents the tokenisation from being changed out from under us
-            # during the iteration. The tricky thing here is that Span accepts
-            # its tokenisation changing, so it's okay once we have the Span
-            # objects. See Issue #375
-            spans = []
-            cdef attr_t label
-            if self.doc.noun_chunks_iterator is not None:
-                for start, end, label in self.doc.noun_chunks_iterator(self):
-                    spans.append(Span(self.doc, start, end, label=label))
-            for span in spans:
-                yield span
+        if not self.doc.is_parsed:
+            raise ValueError(Errors.E029)
+        # Accumulate the result before beginning to iterate over it. This
+        # prevents the tokenisation from being changed out from under us
+        # during the iteration. The tricky thing here is that Span accepts
+        # its tokenisation changing, so it's okay once we have the Span
+        # objects. See Issue #375
+        spans = []
+        cdef attr_t label
+        if self.doc.noun_chunks_iterator is not None:
+            for start, end, label in self.doc.noun_chunks_iterator(self):
+                spans.append(Span(self.doc, start, end, label=label))
+        for span in spans:
+            yield span

-    property root:
+    @property
+    def root(self):
        """The token with the shortest path to the root of the
        sentence (or the root itself). If multiple tokens are equally
        high in the tree, the first token is taken.
@ -493,41 +494,41 @@ cdef class Span:

        DOCS: https://spacy.io/api/span#root
        """
-        def __get__(self):
-            self._recalculate_indices()
-            if "root" in self.doc.user_span_hooks:
-                return self.doc.user_span_hooks["root"](self)
-            # This should probably be called 'head', and the other one called
-            # 'gov'. But we went with 'head' elsehwhere, and now we're stuck =/
-            cdef int i
-            # First, we scan through the Span, and check whether there's a word
-            # with head==0, i.e. a sentence root. If so, we can return it. The
-            # longer the span, the more likely it contains a sentence root, and
-            # in this case we return in linear time.
-            for i in range(self.start, self.end):
-                if self.doc.c[i].head == 0:
-                    return self.doc[i]
-            # If we don't have a sentence root, we do something that's not so
-            # algorithmically clever, but I think should be quite fast,
-            # especially for short spans.
-            # For each word, we count the path length, and arg min this measure.
-            # We could use better tree logic to save steps here...But I
-            # think this should be okay.
-            cdef int current_best = self.doc.length
-            cdef int root = -1
-            for i in range(self.start, self.end):
-                if self.start <= (i+self.doc.c[i].head) < self.end:
-                    continue
-                words_to_root = _count_words_to_root(&self.doc.c[i], self.doc.length)
-                if words_to_root < current_best:
-                    current_best = words_to_root
-                    root = i
-            if root == -1:
-                return self.doc[self.start]
-            else:
-                return self.doc[root]
+        self._recalculate_indices()
+        if "root" in self.doc.user_span_hooks:
+            return self.doc.user_span_hooks["root"](self)
+        # This should probably be called 'head', and the other one called
+        # 'gov'. But we went with 'head' elsehwhere, and now we're stuck =/
+        cdef int i
+        # First, we scan through the Span, and check whether there's a word
+        # with head==0, i.e. a sentence root. If so, we can return it. The
+        # longer the span, the more likely it contains a sentence root, and
+        # in this case we return in linear time.
+        for i in range(self.start, self.end):
+            if self.doc.c[i].head == 0:
+                return self.doc[i]
+        # If we don't have a sentence root, we do something that's not so
+        # algorithmically clever, but I think should be quite fast,
+        # especially for short spans.
+        # For each word, we count the path length, and arg min this measure.
+        # We could use better tree logic to save steps here...But I
+        # think this should be okay.
+        cdef int current_best = self.doc.length
+        cdef int root = -1
+        for i in range(self.start, self.end):
+            if self.start <= (i+self.doc.c[i].head) < self.end:
+                continue
+            words_to_root = _count_words_to_root(&self.doc.c[i], self.doc.length)
+            if words_to_root < current_best:
+                current_best = words_to_root
+                root = i
+        if root == -1:
+            return self.doc[self.start]
+        else:
+            return self.doc[root]

-    property lefts:
+    @property
+    def lefts(self):
        """Tokens that are to the left of the span, whose head is within the
        `Span`.

@ -535,13 +536,13 @@ cdef class Span:

        DOCS: https://spacy.io/api/span#lefts
        """
-        def __get__(self):
-            for token in reversed(self):  # Reverse, so we get tokens in order
-                for left in token.lefts:
-                    if left.i < self.start:
-                        yield left
+        for token in reversed(self):  # Reverse, so we get tokens in order
+            for left in token.lefts:
+                if left.i < self.start:
+                    yield left

-    property rights:
+    @property
+    def rights(self):
        """Tokens that are to the right of the Span, whose head is within the
        `Span`.

@ -549,13 +550,13 @@ cdef class Span:

        DOCS: https://spacy.io/api/span#rights
        """
-        def __get__(self):
-            for token in self:
-                for right in token.rights:
-                    if right.i >= self.end:
-                        yield right
+        for token in self:
+            for right in token.rights:
+                if right.i >= self.end:
+                    yield right

-    property n_lefts:
+    @property
+    def n_lefts(self):
        """The number of tokens that are to the left of the span, whose
        heads are within the span.

@ -564,10 +565,10 @@ cdef class Span:

        DOCS: https://spacy.io/api/span#n_lefts
        """
-        def __get__(self):
-            return len(list(self.lefts))
+        return len(list(self.lefts))

-    property n_rights:
+    @property
+    def n_rights(self):
        """The number of tokens that are to the right of the span, whose
        heads are within the span.

@ -576,22 +577,21 @@ cdef class Span:

        DOCS: https://spacy.io/api/span#n_rights
        """
-        def __get__(self):
-            return len(list(self.rights))
+        return len(list(self.rights))

-    property subtree:
+    @property
+    def subtree(self):
        """Tokens within the span and tokens which descend from them.

        YIELDS (Token): A token within the span, or a descendant from it.

        DOCS: https://spacy.io/api/span#subtree
        """
-        def __get__(self):
-            for word in self.lefts:
-                yield from word.subtree
-            yield from self
-            for word in self.rights:
-                yield from word.subtree
+        for word in self.lefts:
+            yield from word.subtree
+        yield from self
+        for word in self.rights:
+            yield from word.subtree

    property ent_id:
        """RETURNS (uint64): The entity ID."""
@ -609,33 +609,33 @@ cdef class Span:
        def __set__(self, hash_t key):
            raise NotImplementedError(TempErrors.T007.format(attr="ent_id_"))

-    property orth_:
+    @property
+    def orth_(self):
        """Verbatim text content (identical to `Span.text`). Exists mostly for
        consistency with other attributes.

        RETURNS (unicode): The span's text."""
-        def __get__(self):
-            return self.text
+        return self.text

-    property lemma_:
+    @property
+    def lemma_(self):
        """RETURNS (unicode): The span's lemma."""
-        def __get__(self):
-            return " ".join([t.lemma_ for t in self]).strip()
+        return " ".join([t.lemma_ for t in self]).strip()

-    property upper_:
+    @property
+    def upper_(self):
        """Deprecated. Use `Span.text.upper()` instead."""
-        def __get__(self):
-            return "".join([t.text_with_ws.upper() for t in self]).strip()
+        return "".join([t.text_with_ws.upper() for t in self]).strip()

-    property lower_:
+    @property
+    def lower_(self):
        """Deprecated. Use `Span.text.lower()` instead."""
-        def __get__(self):
-            return "".join([t.text_with_ws.lower() for t in self]).strip()
+        return "".join([t.text_with_ws.lower() for t in self]).strip()

-    property string:
+    @property
+    def string(self):
        """Deprecated: Use `Span.text_with_ws` instead."""
-        def __get__(self):
-            return "".join([t.text_with_ws for t in self])
+        return "".join([t.text_with_ws for t in self])

    property label_:
        """RETURNS (unicode): The span's label."""
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@ -218,111 +218,111 @@ cdef class Token:
        xp = get_array_module(vector)
        return (xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm))

-    property lex_id:
+    @property
+    def lex_id(self):
        """RETURNS (int): Sequential ID of the token's lexical type."""
-        def __get__(self):
-            return self.c.lex.id
+        return self.c.lex.id

-    property rank:
+    @property
+    def rank(self):
        """RETURNS (int): Sequential ID of the token's lexical type, used to
        index into tables, e.g. for word vectors."""
-        def __get__(self):
-            return self.c.lex.id
+        return self.c.lex.id

-    property string:
+    @property
+    def string(self):
        """Deprecated: Use Token.text_with_ws instead."""
-        def __get__(self):
-            return self.text_with_ws
+        return self.text_with_ws

-    property text:
+    @property
+    def text(self):
        """RETURNS (unicode): The original verbatim text of the token."""
-        def __get__(self):
-            return self.orth_
+        return self.orth_

-    property text_with_ws:
+    @property
+    def text_with_ws(self):
        """RETURNS (unicode): The text content of the span (with trailing
            whitespace).
        """
-        def __get__(self):
-            cdef unicode orth = self.vocab.strings[self.c.lex.orth]
-            if self.c.spacy:
-                return orth + " "
-            else:
-                return orth
+        cdef unicode orth = self.vocab.strings[self.c.lex.orth]
+        if self.c.spacy:
+            return orth + " "
+        else:
+            return orth

-    property prob:
+    @property
+    def prob(self):
        """RETURNS (float): Smoothed log probability estimate of token type."""
-        def __get__(self):
-            return self.c.lex.prob
+        return self.c.lex.prob

-    property sentiment:
+    @property
+    def sentiment(self):
        """RETURNS (float): A scalar value indicating the positivity or
            negativity of the token."""
-        def __get__(self):
-            if "sentiment" in self.doc.user_token_hooks:
-                return self.doc.user_token_hooks["sentiment"](self)
-            return self.c.lex.sentiment
+        if "sentiment" in self.doc.user_token_hooks:
+            return self.doc.user_token_hooks["sentiment"](self)
+        return self.c.lex.sentiment

-    property lang:
+    @property
+    def lang(self):
        """RETURNS (uint64): ID of the language of the parent document's
            vocabulary.
        """
-        def __get__(self):
-            return self.c.lex.lang
+        return self.c.lex.lang

-    property idx:
+    @property
+    def idx(self):
        """RETURNS (int): The character offset of the token within the parent
            document.
        """
-        def __get__(self):
-            return self.c.idx
+        return self.c.idx

-    property cluster:
+    @property
+    def cluster(self):
        """RETURNS (int): Brown cluster ID."""
-        def __get__(self):
-            return self.c.lex.cluster
+        return self.c.lex.cluster

-    property orth:
+    @property
+    def orth(self):
        """RETURNS (uint64): ID of the verbatim text content."""
-        def __get__(self):
-            return self.c.lex.orth
+        return self.c.lex.orth

-    property lower:
+    @property
+    def lower(self):
        """RETURNS (uint64): ID of the lowercase token text."""
-        def __get__(self):
-            return self.c.lex.lower
+        return self.c.lex.lower

-    property norm:
+    @property
+    def norm(self):
        """RETURNS (uint64): ID of the token's norm, i.e. a normalised form of
            the token text. Usually set in the language's tokenizer exceptions
            or norm exceptions.
        """
-        def __get__(self):
-            if self.c.norm == 0:
-                return self.c.lex.norm
-            else:
-                return self.c.norm
+        if self.c.norm == 0:
+            return self.c.lex.norm
+        else:
+            return self.c.norm

-    property shape:
+    @property
+    def shape(self):
        """RETURNS (uint64): ID of the token's shape, a transform of the
            tokens's string, to show orthographic features (e.g. "Xxxx", "dd").
        """
-        def __get__(self):
-            return self.c.lex.shape
+        return self.c.lex.shape

-    property prefix:
+    @property
+    def prefix(self):
        """RETURNS (uint64): ID of a length-N substring from the start of the
            token. Defaults to `N=1`.
        """
-        def __get__(self):
-            return self.c.lex.prefix
+        return self.c.lex.prefix

-    property suffix:
+    @property
+    def suffix(self):
        """RETURNS (uint64): ID of a length-N substring from the end of the
            token. Defaults to `N=3`.
        """
-        def __get__(self):
-            return self.c.lex.suffix
+        return self.c.lex.suffix

    property lemma:
        """RETURNS (uint64): ID of the base form of the word, with no
@ -362,7 +362,8 @@ cdef class Token:
        def __set__(self, attr_t label):
            self.c.dep = label

-    property has_vector:
+    @property
+    def has_vector(self):
        """A boolean value indicating whether a word vector is associated with
        the object.

@ -370,14 +371,14 @@ cdef class Token:

        DOCS: https://spacy.io/api/token#has_vector
        """
-        def __get__(self):
-            if 'has_vector' in self.doc.user_token_hooks:
-                return self.doc.user_token_hooks["has_vector"](self)
-            if self.vocab.vectors.size == 0 and self.doc.tensor.size != 0:
-                return True
-            return self.vocab.has_vector(self.c.lex.orth)
+        if "has_vector" in self.doc.user_token_hooks:
+            return self.doc.user_token_hooks["has_vector"](self)
+        if self.vocab.vectors.size == 0 and self.doc.tensor.size != 0:
+            return True
+        return self.vocab.has_vector(self.c.lex.orth)

-    property vector:
+    @property
+    def vector(self):
        """A real-valued meaning representation.

        RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
@ -385,28 +386,28 @@ cdef class Token:

        DOCS: https://spacy.io/api/token#vector
        """
-        def __get__(self):
-            if 'vector' in self.doc.user_token_hooks:
-                return self.doc.user_token_hooks["vector"](self)
-            if self.vocab.vectors.size == 0 and self.doc.tensor.size != 0:
-                return self.doc.tensor[self.i]
-            else:
-                return self.vocab.get_vector(self.c.lex.orth)
+        if "vector" in self.doc.user_token_hooks:
+            return self.doc.user_token_hooks["vector"](self)
+        if self.vocab.vectors.size == 0 and self.doc.tensor.size != 0:
+            return self.doc.tensor[self.i]
+        else:
+            return self.vocab.get_vector(self.c.lex.orth)

-    property vector_norm:
+    @property
+    def vector_norm(self):
        """The L2 norm of the token's vector representation.

        RETURNS (float): The L2 norm of the vector representation.

        DOCS: https://spacy.io/api/token#vector_norm
        """
-        def __get__(self):
-            if 'vector_norm' in self.doc.user_token_hooks:
-                return self.doc.user_token_hooks["vector_norm"](self)
-            vector = self.vector
-            return numpy.sqrt((vector ** 2).sum())
+        if "vector_norm" in self.doc.user_token_hooks:
+            return self.doc.user_token_hooks["vector_norm"](self)
+        vector = self.vector
+        return numpy.sqrt((vector ** 2).sum())

-    property n_lefts:
+    @property
+    def n_lefts(self):
        """The number of leftward immediate children of the word, in the
        syntactic dependency parse.

@ -415,10 +416,10 @@ cdef class Token:

        DOCS: https://spacy.io/api/token#n_lefts
        """
-        def __get__(self):
-            return self.c.l_kids
+        return self.c.l_kids

-    property n_rights:
+    @property
+    def n_rights(self):
        """The number of rightward immediate children of the word, in the
        syntactic dependency parse.

@ -427,15 +428,14 @@ cdef class Token:

        DOCS: https://spacy.io/api/token#n_rights
        """
-        def __get__(self):
-            return self.c.r_kids
+        return self.c.r_kids

-    property sent:
+    @property
+    def sent(self):
        """RETURNS (Span): The sentence span that the token is a part of."""
-        def __get__(self):
-            if 'sent' in self.doc.user_token_hooks:
-                return self.doc.user_token_hooks["sent"](self)
-            return self.doc[self.i : self.i+1].sent
+        if 'sent' in self.doc.user_token_hooks:
+            return self.doc.user_token_hooks["sent"](self)
+        return self.doc[self.i : self.i+1].sent

    property sent_start:
        def __get__(self):
@ -479,7 +479,8 @@ cdef class Token:
            else:
                raise ValueError(Errors.E044.format(value=value))

-    property lefts:
+    @property
+    def lefts(self):
        """The leftward immediate children of the word, in the syntactic
        dependency parse.

@ -487,19 +488,19 @@ cdef class Token:

        DOCS: https://spacy.io/api/token#lefts
        """
-        def __get__(self):
-            cdef int nr_iter = 0
-            cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge)
-            while ptr < self.c:
-                if ptr + ptr.head == self.c:
-                    yield self.doc[ptr - (self.c - self.i)]
-                ptr += 1
-                nr_iter += 1
-                # This is ugly, but it's a way to guard out infinite loops
-                if nr_iter >= 10000000:
-                    raise RuntimeError(Errors.E045.format(attr="token.lefts"))
+        cdef int nr_iter = 0
+        cdef const TokenC* ptr = self.c - (self.i - self.c.l_edge)
+        while ptr < self.c:
+            if ptr + ptr.head == self.c:
+                yield self.doc[ptr - (self.c - self.i)]
+            ptr += 1
+            nr_iter += 1
+            # This is ugly, but it's a way to guard out infinite loops
+            if nr_iter >= 10000000:
+                raise RuntimeError(Errors.E045.format(attr="token.lefts"))

-    property rights:
+    @property
+    def rights(self):
        """The rightward immediate children of the word, in the syntactic
        dependency parse.

@ -507,33 +508,33 @@ cdef class Token:

        DOCS: https://spacy.io/api/token#rights
        """
-        def __get__(self):
-            cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
-            tokens = []
-            cdef int nr_iter = 0
-            while ptr > self.c:
-                if ptr + ptr.head == self.c:
-                    tokens.append(self.doc[ptr - (self.c - self.i)])
-                ptr -= 1
-                nr_iter += 1
-                if nr_iter >= 10000000:
-                    raise RuntimeError(Errors.E045.format(attr="token.rights"))
-            tokens.reverse()
-            for t in tokens:
-                yield t
+        cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
+        tokens = []
+        cdef int nr_iter = 0
+        while ptr > self.c:
+            if ptr + ptr.head == self.c:
+                tokens.append(self.doc[ptr - (self.c - self.i)])
+            ptr -= 1
+            nr_iter += 1
+            if nr_iter >= 10000000:
+                raise RuntimeError(Errors.E045.format(attr="token.rights"))
+        tokens.reverse()
+        for t in tokens:
+            yield t

-    property children:
+    @property
+    def children(self):
        """A sequence of the token's immediate syntactic children.

        YIELDS (Token): A child token such that `child.head==self`.

        DOCS: https://spacy.io/api/token#children
        """
-        def __get__(self):
-            yield from self.lefts
-            yield from self.rights
+        yield from self.lefts
+        yield from self.rights

-    property subtree:
+    @property
+    def subtree(self):
        """A sequence containing the token and all the token's syntactic
        descendants.

@ -542,30 +543,30 @@ cdef class Token:

        DOCS: https://spacy.io/api/token#subtree
        """
-        def __get__(self):
-            for word in self.lefts:
-                yield from word.subtree
-            yield self
-            for word in self.rights:
-                yield from word.subtree
+        for word in self.lefts:
+            yield from word.subtree
+        yield self
+        for word in self.rights:
+            yield from word.subtree

-    property left_edge:
+    @property
+    def left_edge(self):
        """The leftmost token of this token's syntactic descendents.

        RETURNS (Token): The first token such that `self.is_ancestor(token)`.
        """
-        def __get__(self):
-            return self.doc[self.c.l_edge]
+        return self.doc[self.c.l_edge]

-    property right_edge:
+    @property
+    def right_edge(self):
        """The rightmost token of this token's syntactic descendents.

        RETURNS (Token): The last token such that `self.is_ancestor(token)`.
        """
-        def __get__(self):
-            return self.doc[self.c.r_edge]
+        return self.doc[self.c.r_edge]

-    property ancestors:
+    @property
+    def ancestors(self):
        """A sequence of this token's syntactic ancestors.

        YIELDS (Token): A sequence of ancestor tokens such that
@ -573,15 +574,14 @@ cdef class Token:

        DOCS: https://spacy.io/api/token#ancestors
        """
-        def __get__(self):
-            cdef const TokenC* head_ptr = self.c
-            # Guard against infinite loop, no token can have
-            # more ancestors than tokens in the tree.
-            cdef int i = 0
-            while head_ptr.head != 0 and i < self.doc.length:
-                head_ptr += head_ptr.head
-                yield self.doc[head_ptr - (self.c - self.i)]
-                i += 1
+        cdef const TokenC* head_ptr = self.c
+        # Guard against infinite loop, no token can have
+        # more ancestors than tokens in the tree.
+        cdef int i = 0
+        while head_ptr.head != 0 and i < self.doc.length:
+            head_ptr += head_ptr.head
+            yield self.doc[head_ptr - (self.c - self.i)]
+            i += 1

    def is_ancestor(self, descendant):
        """Check whether this token is a parent, grandparent, etc. of another
@ -685,23 +685,23 @@ cdef class Token:
            # Set new head
            self.c.head = rel_newhead_i

-    property conjuncts:
+    @property
+    def conjuncts(self):
        """A sequence of coordinated tokens, including the token itself.

        YIELDS (Token): A coordinated token.

        DOCS: https://spacy.io/api/token#conjuncts
        """
-        def __get__(self):
-            cdef Token word
-            if "conjuncts" in self.doc.user_token_hooks:
-                yield from self.doc.user_token_hooks["conjuncts"](self)
-            else:
-                if self.dep != conj:
-                    for word in self.rights:
-                        if word.dep == conj:
-                            yield word
-                            yield from word.conjuncts
+        cdef Token word
+        if "conjuncts" in self.doc.user_token_hooks:
+            yield from self.doc.user_token_hooks["conjuncts"](self)
+        else:
+            if self.dep != conj:
+                for word in self.rights:
+                    if word.dep == conj:
+                        yield word
+                        yield from word.conjuncts

    property ent_type:
        """RETURNS (uint64): Named entity type."""
@ -711,15 +711,6 @@ cdef class Token:
        def __set__(self, ent_type):
            self.c.ent_type = ent_type

-    property ent_iob:
-        """IOB code of named entity tag. `1="I", 2="O", 3="B"`. 0 means no tag
-        is assigned.
-
-        RETURNS (uint64): IOB code of named entity tag.
-        """
-        def __get__(self):
-            return self.c.ent_iob
-
    property ent_type_:
        """RETURNS (unicode): Named entity type."""
        def __get__(self):
@ -728,16 +719,25 @@ cdef class Token:
        def __set__(self, ent_type):
            self.c.ent_type = self.vocab.strings.add(ent_type)

-    property ent_iob_:
+    @property
+    def ent_iob(self):
+        """IOB code of named entity tag. `1="I", 2="O", 3="B"`. 0 means no tag
+        is assigned.
+
+        RETURNS (uint64): IOB code of named entity tag.
+        """
+        return self.c.ent_iob
+
+    @property
+    def ent_iob_(self):
        """IOB code of named entity tag. "B" means the token begins an entity,
        "I" means it is inside an entity, "O" means it is outside an entity,
        and "" means no entity tag is set.

        RETURNS (unicode): IOB code of named entity tag.
        """
-        def __get__(self):
-            iob_strings = ("", "I", "O", "B")
-            return iob_strings[self.c.ent_iob]
+        iob_strings = ("", "I", "O", "B")
+        return iob_strings[self.c.ent_iob]

    property ent_id:
        """RETURNS (uint64): ID of the entity the token is an instance of,
@ -759,26 +759,25 @@ cdef class Token:
        def __set__(self, name):
            self.c.ent_id = self.vocab.strings.add(name)

-    property whitespace_:
-        """RETURNS (unicode): The trailing whitespace character, if present.
-        """
-        def __get__(self):
-            return " " if self.c.spacy else ""
+    @property
+    def whitespace_(self):
+        """RETURNS (unicode): The trailing whitespace character, if present."""
+        return " " if self.c.spacy else ""

-    property orth_:
+    @property
+    def orth_(self):
        """RETURNS (unicode): Verbatim text content (identical to
            `Token.text`). Exists mostly for consistency with the other
            attributes.
        """
-        def __get__(self):
-            return self.vocab.strings[self.c.lex.orth]
+        return self.vocab.strings[self.c.lex.orth]

-    property lower_:
+    @property
+    def lower_(self):
        """RETURNS (unicode): The lowercase token text. Equivalent to
            `Token.text.lower()`.
        """
-        def __get__(self):
-            return self.vocab.strings[self.c.lex.lower]
+        return self.vocab.strings[self.c.lex.lower]

    property norm_:
        """RETURNS (unicode): The token's norm, i.e. a normalised form of the
@ -791,33 +790,33 @@ cdef class Token:
        def __set__(self, unicode norm_):
            self.c.norm = self.vocab.strings.add(norm_)

-    property shape_:
+    @property
+    def shape_(self):
        """RETURNS (unicode): Transform of the tokens's string, to show
            orthographic features. For example, "Xxxx" or "dd".
        """
-        def __get__(self):
-            return self.vocab.strings[self.c.lex.shape]
+        return self.vocab.strings[self.c.lex.shape]

-    property prefix_:
+    @property
+    def prefix_(self):
        """RETURNS (unicode): A length-N substring from the start of the token.
            Defaults to `N=1`.
        """
-        def __get__(self):
-            return self.vocab.strings[self.c.lex.prefix]
+        return self.vocab.strings[self.c.lex.prefix]

-    property suffix_:
+    @property
+    def suffix_(self):
        """RETURNS (unicode): A length-N substring from the end of the token.
            Defaults to `N=3`.
        """
-        def __get__(self):
-            return self.vocab.strings[self.c.lex.suffix]
+        return self.vocab.strings[self.c.lex.suffix]

-    property lang_:
+    @property
+    def lang_(self):
        """RETURNS (unicode): Language of the parent document's vocabulary,
            e.g. 'en'.
        """
-        def __get__(self):
-            return self.vocab.strings[self.c.lex.lang]
+        return self.vocab.strings[self.c.lex.lang]

    property lemma_:
        """RETURNS (unicode): The token lemma, i.e. the base form of the word,
@ -856,110 +855,110 @@ cdef class Token:
        def __set__(self, unicode label):
            self.c.dep = self.vocab.strings.add(label)

-    property is_oov:
+    @property
+    def is_oov(self):
        """RETURNS (bool): Whether the token is out-of-vocabulary."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_OOV)
+        return Lexeme.c_check_flag(self.c.lex, IS_OOV)

-    property is_stop:
+    @property
+    def is_stop(self):
        """RETURNS (bool): Whether the token is a stop word, i.e. part of a
            "stop list" defined by the language data.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_STOP)
+        return Lexeme.c_check_flag(self.c.lex, IS_STOP)

-    property is_alpha:
+    @property
+    def is_alpha(self):
        """RETURNS (bool): Whether the token consists of alpha characters.
            Equivalent to `token.text.isalpha()`.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_ALPHA)
+        return Lexeme.c_check_flag(self.c.lex, IS_ALPHA)

-    property is_ascii:
+    @property
+    def is_ascii(self):
        """RETURNS (bool): Whether the token consists of ASCII characters.
            Equivalent to `[any(ord(c) >= 128 for c in token.text)]`.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_ASCII)
+        return Lexeme.c_check_flag(self.c.lex, IS_ASCII)

-    property is_digit:
+    @property
+    def is_digit(self):
        """RETURNS (bool): Whether the token consists of digits. Equivalent to
            `token.text.isdigit()`.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_DIGIT)
+        return Lexeme.c_check_flag(self.c.lex, IS_DIGIT)

-    property is_lower:
+    @property
+    def is_lower(self):
        """RETURNS (bool): Whether the token is in lowercase. Equivalent to
            `token.text.islower()`.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_LOWER)
+        return Lexeme.c_check_flag(self.c.lex, IS_LOWER)

-    property is_upper:
+    @property
+    def is_upper(self):
        """RETURNS (bool): Whether the token is in uppercase. Equivalent to
            `token.text.isupper()`
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_UPPER)
+        return Lexeme.c_check_flag(self.c.lex, IS_UPPER)

-    property is_title:
+    @property
+    def is_title(self):
        """RETURNS (bool): Whether the token is in titlecase. Equivalent to
            `token.text.istitle()`.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_TITLE)
+        return Lexeme.c_check_flag(self.c.lex, IS_TITLE)

-    property is_punct:
+    @property
+    def is_punct(self):
        """RETURNS (bool): Whether the token is punctuation."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_PUNCT)
+        return Lexeme.c_check_flag(self.c.lex, IS_PUNCT)

-    property is_space:
+    @property
+    def is_space(self):
        """RETURNS (bool): Whether the token consists of whitespace characters.
            Equivalent to `token.text.isspace()`.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_SPACE)
+        return Lexeme.c_check_flag(self.c.lex, IS_SPACE)

-    property is_bracket:
+    @property
+    def is_bracket(self):
        """RETURNS (bool): Whether the token is a bracket."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_BRACKET)
+        return Lexeme.c_check_flag(self.c.lex, IS_BRACKET)

-    property is_quote:
+    @property
+    def is_quote(self):
        """RETURNS (bool): Whether the token is a quotation mark."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_QUOTE)
+        return Lexeme.c_check_flag(self.c.lex, IS_QUOTE)

-    property is_left_punct:
+    @property
+    def is_left_punct(self):
        """RETURNS (bool): Whether the token is a left punctuation mark."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_LEFT_PUNCT)
+        return Lexeme.c_check_flag(self.c.lex, IS_LEFT_PUNCT)

-    property is_right_punct:
+    @property
+    def is_right_punct(self):
        """RETURNS (bool): Whether the token is a right punctuation mark."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_RIGHT_PUNCT)
+        return Lexeme.c_check_flag(self.c.lex, IS_RIGHT_PUNCT)

-    property is_currency:
+    @property
+    def is_currency(self):
        """RETURNS (bool): Whether the token is a currency symbol."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, IS_CURRENCY)
+        return Lexeme.c_check_flag(self.c.lex, IS_CURRENCY)

-    property like_url:
+    @property
+    def like_url(self):
        """RETURNS (bool): Whether the token resembles a URL."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, LIKE_URL)
+        return Lexeme.c_check_flag(self.c.lex, LIKE_URL)

-    property like_num:
+    @property
+    def like_num(self):
        """RETURNS (bool): Whether the token resembles a number, e.g. "10.9",
            "10", "ten", etc.
        """
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, LIKE_NUM)
+        return Lexeme.c_check_flag(self.c.lex, LIKE_NUM)

-    property like_email:
+    @property
+    def like_email(self):
        """RETURNS (bool): Whether the token resembles an email address."""
-        def __get__(self):
-            return Lexeme.c_check_flag(self.c.lex, LIKE_EMAIL)
+        return Lexeme.c_check_flag(self.c.lex, LIKE_EMAIL)
--- a/spacy/util.py
+++ b/spacy/util.py
@ -38,6 +38,18 @@ def set_env_log(value):
    _PRINT_ENV = value


+def lang_class_is_loaded(lang):
+    """Check whether a Language class is already loaded. Language classes are
+    loaded lazily, to avoid expensive setup code associated with the language
+    data.
+
+    lang (unicode): Two-letter language code, e.g. 'en'.
+    RETURNS (bool): Whether a Language class has been loaded.
+    """
+    global LANGUAGES
+    return lang in LANGUAGES
+ 
+
 def get_lang_class(lang):
    """Import and load a Language class.

--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -60,12 +60,23 @@ cdef class Vocab:
        self.morphology = Morphology(self.strings, tag_map, lemmatizer)
        self.vectors = Vectors()

-    property lang:
+    @property
+    def lang(self):
+        langfunc = None
+        if self.lex_attr_getters:
+            langfunc = self.lex_attr_getters.get(LANG, None)
+        return langfunc("_") if langfunc else ""
+
+    property writing_system:
+        """A dict with information about the language's writing system. To get
+        the data, we use the vocab.lang property to fetch the Language class.
+        If the Language class is not loaded, an empty dict is returned.
+        """
        def __get__(self):
-            langfunc = None
-            if self.lex_attr_getters:
-                langfunc = self.lex_attr_getters.get(LANG, None)
-            return langfunc("_") if langfunc else ""
+            if not util.lang_class_is_loaded(self.lang):
+                return {}
+            lang_class = util.get_lang_class(self.lang)
+            return dict(lang_class.Defaults.writing_system)

    def __len__(self):
        """The current number of lexemes stored.
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@ -351,6 +351,24 @@ the two-letter language code.
 | `name` | unicode    | Two-letter language code, e.g. `'en'`. |
 | `cls`  | `Language` | The language class, e.g. `English`.    |

+### util.lang_class_is_loaded (#util.lang_class_is_loaded tag="function" new="2.1")
+
+Check whether a `Language` class is already loaded. `Language` classes are
+loaded lazily, to avoid expensive setup code associated with the language data.
+
+> #### Example
+>
+> ```python
+> lang_cls = util.get_lang_class("en")
+> assert util.lang_class_is_loaded("en") is True
+> assert util.lang_class_is_loaded("de") is False
+> ```
+
+| Name        | Type    | Description                            |
+| ----------- | ------- | -------------------------------------- |
+| `name`      | unicode | Two-letter language code, e.g. `'en'`. |
+| **RETURNS** | bool    | Whether the class has been loaded.     |
+
 ### util.load_model {#util.load_model tag="function" new="2"}

 Load a model from a shortcut link, package or data path. If called with a
--- a/website/docs/api/vocab.md
+++ b/website/docs/api/vocab.md
@ -288,11 +288,12 @@ Load state from a binary string.
 > assert type(PERSON) == int
 > ```

-| Name                                 | Type          | Description                                   |
-| ------------------------------------ | ------------- | --------------------------------------------- |
-| `strings`                            | `StringStore` | A table managing the string-to-int mapping.   |
-| `vectors` <Tag variant="new">2</Tag> | `Vectors`     | A table associating word IDs to word vectors. |
-| `vectors_length`                     | int           | Number of dimensions for each word vector.    |
+| Name                                          | Type          | Description                                                  |
+| --------------------------------------------- | ------------- | ------------------------------------------------------------ |
+| `strings`                                     | `StringStore` | A table managing the string-to-int mapping.                  |
+| `vectors` <Tag variant="new">2</Tag>          | `Vectors`     | A table associating word IDs to word vectors.                |
+| `vectors_length`                              | int           | Number of dimensions for each word vector.                   |
+| `writing_system` <Tag variant="new">2.1</Tag> | dict          | A dict with information about the language's writing system. |

 ## Serialization fields {#serialization-fields}