Tidy up rest

2025-11-04 09:57:26 +03:00 · 2017-10-27 21:07:59 +02:00 · 2017-10-27 21:07:59 +02:00 · d96e72f656
commit d96e72f656
parent a8e10f94e4
14 changed files with 233 additions and 261 deletions
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@ -8,11 +8,9 @@ from thinc.t2t import ExtractWindow, ParametricAttention
 from thinc.t2v import Pooling, sum_pool
 from thinc.misc import Residual
 from thinc.misc import LayerNorm as LN
 from thinc.api import add, layerize, chain, clone, concatenate, with_flatten
 from thinc.api import FeatureExtracter, with_getitem, flatten_add_lengths
 from thinc.api import uniqued, wrap, noop
 from thinc.linear.linear import LinearModel
 from thinc.neural.ops import NumpyOps, CupyOps
 from thinc.neural.util import get_array_module
--- a/spacy/attrs.pyx
+++ b/spacy/attrs.pyx
@ -101,17 +101,12 @@ def intify_attrs(stringy_attrs, strings_map=None, _do_deprecated=False):
    """
    Normalize a dictionary of attributes, converting them to ints.
-    Arguments:
+    stringy_attrs (dict): Dictionary keyed by attribute string names. Values
-        stringy_attrs (dict):
+        can be ints or strings.
-            Dictionary keyed by attribute string names. Values can be ints or strings.
+    strings_map (StringStore): Defaults to None. If provided, encodes string
-
+        values into ints.
-        strings_map (StringStore):
+    RETURNS (dict): Attributes dictionary with keys and optionally values
-            Defaults to None. If provided, encodes string values into ints.
+        converted to ints.
    Returns:
        inty_attrs (dict):
            Attributes dictionary with keys and optionally values converted to
            ints.
    """
    inty_attrs = {}
    if _do_deprecated:
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@ -2,7 +2,6 @@
 # coding: utf8
 from __future__ import unicode_literals, print_function
 import io
 import re
 import ujson
 import random
@ -10,9 +9,8 @@ import cytoolz
 import itertools
 from .syntax import nonproj
 from .util import ensure_path
 from . import util
 from .tokens import Doc
 from . import util
 def tags_to_entities(tags):
@ -310,7 +308,7 @@ def _corrupt(c, noise_level):
 def read_json_file(loc, docs_filter=None, limit=None):
-    loc = ensure_path(loc)
+    loc = util.ensure_path(loc)
    if loc.is_dir():
        for filename in loc.iterdir():
            yield from read_json_file(loc / filename, limit=limit)
--- a/spacy/language.py
+++ b/spacy/language.py
@ -1,22 +1,22 @@
 # coding: utf8
 from __future__ import absolute_import, unicode_literals
 from contextlib import contextmanager
 import copy
 from thinc.neural import Model
 from thinc.neural.optimizers import Adam
 import random
 import ujson
 from collections import OrderedDict
 import itertools
 import weakref
 import functools
 from collections import OrderedDict
 from contextlib import contextmanager
 from copy import copy
 from thinc.neural import Model
 from thinc.neural.optimizers import Adam
 from .tokenizer import Tokenizer
 from .vocab import Vocab
 from .lemmatizer import Lemmatizer
-from .pipeline import DependencyParser, Tensorizer, Tagger
+from .pipeline import DependencyParser, Tensorizer, Tagger, EntityRecognizer
-from .pipeline import EntityRecognizer, SimilarityHook, TextCategorizer
+from .pipeline import SimilarityHook, TextCategorizer
 from .compat import json_dumps, izip
 from .scorer import Scorer
 from ._ml import link_vectors_to_models
@ -649,7 +649,7 @@ class Language(object):
        serializers = OrderedDict((
            ('vocab', lambda: self.vocab.to_bytes()),
            ('tokenizer', lambda: self.tokenizer.to_bytes(vocab=False)),
-            ('meta', lambda: ujson.dumps(self.meta))
+            ('meta', lambda: json_dumps(self.meta))
        ))
        for i, (name, proc) in enumerate(self.pipeline):
            if name in disable:
@ -689,7 +689,7 @@ class DisabledPipes(list):
        # Important! Not deep copy -- we just want the container (but we also
        # want to support people providing arbitrarily typed nlp.pipeline
        # objects.)
-        self.original_pipeline = copy.copy(nlp.pipeline)
+        self.original_pipeline = copy(nlp.pipeline)
        list.__init__(self)
        self.extend(nlp.remove_pipe(name) for name in names)
--- a/spacy/matcher.pyx
+++ b/spacy/matcher.pyx
@ -4,12 +4,6 @@
 from __future__ import unicode_literals
 import ujson
 from .typedefs cimport attr_t
 from .typedefs cimport hash_t
 from .attrs cimport attr_id_t
 from .structs cimport TokenC
 from cymem.cymem cimport Pool
 from preshed.maps cimport PreshMap
 from libcpp.vector cimport vector
@ -17,14 +11,15 @@ from libcpp.pair cimport pair
 from murmurhash.mrmr cimport hash64
 from libc.stdint cimport int32_t
-from .attrs cimport ID, NULL_ATTR, ENT_TYPE
+from .typedefs cimport attr_t
-from . import attrs
+from .typedefs cimport hash_t
-from .tokens.doc cimport get_token_attr
+from .structs cimport TokenC
-from .tokens.doc cimport Doc
+from .tokens.doc cimport Doc, get_token_attr
 from .vocab cimport Vocab
 from .attrs import IDS
 from .attrs cimport attr_id_t, ID, NULL_ATTR
 from .attrs import FLAG61 as U_ENT
 from .attrs import FLAG60 as B2_ENT
 from .attrs import FLAG59 as B3_ENT
 from .attrs import FLAG58 as B4_ENT
@ -34,7 +29,6 @@ from .attrs import FLAG55 as B7_ENT
 from .attrs import FLAG54 as B8_ENT
 from .attrs import FLAG53 as B9_ENT
 from .attrs import FLAG52 as B10_ENT
 from .attrs import FLAG51 as I3_ENT
 from .attrs import FLAG50 as I4_ENT
 from .attrs import FLAG49 as I5_ENT
@ -43,7 +37,6 @@ from .attrs import FLAG47 as I7_ENT
 from .attrs import FLAG46 as I8_ENT
 from .attrs import FLAG45 as I9_ENT
 from .attrs import FLAG44 as I10_ENT
 from .attrs import FLAG43 as L2_ENT
 from .attrs import FLAG42 as L3_ENT
 from .attrs import FLAG41 as L4_ENT
@ -168,10 +161,10 @@ def _convert_strings(token_specs, string_store):
                if value in operators:
                    ops = operators[value]
                else:
-                    raise KeyError(
+                    msg = "Unknown operator '%s'. Options: %s"
-                        "Unknown operator '%s'. Options: %s" % (value, ', '.join(operators.keys())))
+                    raise KeyError(msg % (value, ', '.join(operators.keys())))
            if isinstance(attr, basestring):
-                attr = attrs.IDS.get(attr.upper())
+                attr = IDS.get(attr.upper())
            if isinstance(value, basestring):
                value = string_store.add(value)
            if isinstance(value, bool):
@ -186,7 +179,7 @@ def _convert_strings(token_specs, string_store):
 def merge_phrase(matcher, doc, i, matches):
    """Callback to merge a phrase on match."""
    ent_id, label, start, end = matches[i]
-    span = doc[start : end]
+    span = doc[start:end]
    span.merge(ent_type=label, ent_id=ent_id)
@ -233,13 +226,13 @@ cdef class Matcher:
        return self._normalize_key(key) in self._patterns
    def add(self, key, on_match, *patterns):
-        """Add a match-rule to the matcher. A match-rule consists of: an ID key,
+        """Add a match-rule to the matcher. A match-rule consists of: an ID
-        an on_match callback, and one or more patterns.
+        key, an on_match callback, and one or more patterns.
        If the key exists, the patterns are appended to the previous ones, and
-        the previous on_match callback is replaced. The `on_match` callback will
+        the previous on_match callback is replaced. The `on_match` callback
-        receive the arguments `(matcher, doc, i, matches)`. You can also set
+        will receive the arguments `(matcher, doc, i, matches)`. You can also
-        `on_match` to `None` to not perform any actions.
+        set `on_match` to `None` to not perform any actions.
        A pattern consists of one or more `token_specs`, where a `token_spec`
        is a dictionary mapping attribute IDs to values, and optionally a
@ -253,8 +246,8 @@ cdef class Matcher:
        The + and * operators are usually interpretted "greedily", i.e. longer
        matches are returned where possible. However, if you specify two '+'
        and '*' patterns in a row and their matches overlap, the first
-        operator will behave non-greedily. This quirk in the semantics
+        operator will behave non-greedily. This quirk in the semantics makes
-        makes the matcher more efficient, by avoiding the need for back-tracking.
+        the matcher more efficient, by avoiding the need for back-tracking.
        key (unicode): The match ID.
        on_match (callable): Callback executed on match.
@ -268,7 +261,6 @@ cdef class Matcher:
        key = self._normalize_key(key)
        self._patterns.setdefault(key, [])
        self._callbacks[key] = on_match
        for pattern in patterns:
            specs = _convert_strings(pattern, self.vocab.strings)
            self.patterns.push_back(init_pattern(self.mem, key, specs))
@ -315,9 +307,9 @@ cdef class Matcher:
        """Match a stream of documents, yielding them in turn.
        docs (iterable): A stream of documents.
-        batch_size (int): The number of documents to accumulate into a working set.
+        batch_size (int): Number of documents to accumulate into a working set.
        n_threads (int): The number of threads with which to work on the buffer
-            in parallel, if the `Matcher` implementation supports multi-threading.
+            in parallel, if the implementation supports multi-threading.
        YIELDS (Doc): Documents, in order.
        """
        for doc in docs:
@ -325,7 +317,7 @@ cdef class Matcher:
            yield doc
    def __call__(self, Doc doc):
-        """Find all token sequences matching the supplied patterns on the `Doc`.
+        """Find all token sequences matching the supplied pattern.
        doc (Doc): The document to match over.
        RETURNS (list): A list of `(key, start, end)` tuples,
@ -342,8 +334,8 @@ cdef class Matcher:
        for token_i in range(doc.length):
            token = &doc.c[token_i]
            q = 0
-            # Go over the open matches, extending or finalizing if able. Otherwise,
+            # Go over the open matches, extending or finalizing if able.
-            # we over-write them (q doesn't advance)
+            # Otherwise, we over-write them (q doesn't advance)
            for state in partials:
                action = get_action(state.second, token)
                if action == PANIC:
@ -356,8 +348,8 @@ cdef class Matcher:
                if action == REPEAT:
                    # Leave the state in the queue, and advance to next slot
-                    # (i.e. we don't overwrite -- we want to greedily match more
+                    # (i.e. we don't overwrite -- we want to greedily match
-                    # pattern.
+                    # more pattern.
                    q += 1
                elif action == REJECT:
                    pass
@ -366,8 +358,8 @@ cdef class Matcher:
                    partials[q].second += 1
                    q += 1
                elif action in (ACCEPT, ACCEPT_PREV):
-                    # TODO: What to do about patterns starting with ZERO? Need to
+                    # TODO: What to do about patterns starting with ZERO? Need
-                    # adjust the start position.
+                    # to adjust the start position.
                    start = state.first
                    end = token_i+1 if action == ACCEPT else token_i
                    ent_id = state.second[1].attrs[0].value
@ -388,8 +380,8 @@ cdef class Matcher:
                    state.second = pattern
                    partials.push_back(state)
                elif action == ADVANCE:
-                    # TODO: What to do about patterns starting with ZERO? Need to
+                    # TODO: What to do about patterns starting with ZERO? Need
-                    # adjust the start position.
+                    # to adjust the start position.
                    state.first = token_i
                    state.second = pattern + 1
                    partials.push_back(state)
@ -413,7 +405,6 @@ cdef class Matcher:
            on_match = self._callbacks.get(ent_id)
            if on_match is not None:
                on_match(self, doc, i, matches)
        # TODO: only return (match_id, start, end)
        return matches
    def _normalize_key(self, key):
@ -441,7 +432,8 @@ def get_bilou(length):
    elif length == 8:
        return [B8_ENT, I8_ENT, I8_ENT, I8_ENT, I8_ENT, I8_ENT, I8_ENT, L8_ENT]
    elif length == 9:
-        return [B9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, L9_ENT]
+        return [B9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT,
                L9_ENT]
    elif length == 10:
        return [B10_ENT, I10_ENT, I10_ENT, I10_ENT, I10_ENT, I10_ENT, I10_ENT,
                I10_ENT, I10_ENT, L10_ENT]
@ -454,10 +446,8 @@ cdef class PhraseMatcher:
    cdef Vocab vocab
    cdef Matcher matcher
    cdef PreshMap phrase_ids
    cdef int max_length
    cdef attr_t* _phrase_key
    cdef public object _callbacks
    cdef public object _patterns
@ -470,7 +460,8 @@ cdef class PhraseMatcher:
        self.phrase_ids = PreshMap()
        abstract_patterns = []
        for length in range(1, max_length):
-            abstract_patterns.append([{tag: True} for tag in get_bilou(length)])
+            abstract_patterns.append([{tag: True}
                                      for tag in get_bilou(length)])
        self.matcher.add('Candidate', None, *abstract_patterns)
        self._callbacks = {}
@ -496,8 +487,8 @@ cdef class PhraseMatcher:
        return (self.__class__, (self.vocab,), None, None)
    def add(self, key, on_match, *docs):
-        """Add a match-rule to the matcher. A match-rule consists of: an ID key,
+        """Add a match-rule to the matcher. A match-rule consists of: an ID
-        an on_match callback, and one or more patterns.
+        key, an on_match callback, and one or more patterns.
        key (unicode): The match ID.
        on_match (callable): Callback executed on match.
@ -513,7 +504,6 @@ cdef class PhraseMatcher:
                raise ValueError(msg % (len(doc), self.max_length))
        cdef hash_t ent_id = self.matcher._normalize_key(key)
        self._callbacks[ent_id] = on_match
        cdef int length
        cdef int i
        cdef hash_t phrase_hash
@ -553,9 +543,9 @@ cdef class PhraseMatcher:
        """Match a stream of documents, yielding them in turn.
        docs (iterable): A stream of documents.
-        batch_size (int): The number of documents to accumulate into a working set.
+        batch_size (int): Number of documents to accumulate into a working set.
        n_threads (int): The number of threads with which to work on the buffer
-            in parallel, if the `Matcher` implementation supports multi-threading.
+            in parallel, if the implementation supports multi-threading.
        YIELDS (Doc): Documents, in order.
        """
        for doc in stream:
@ -569,7 +559,8 @@ cdef class PhraseMatcher:
            self._phrase_key[i] = 0
        for i, j in enumerate(range(start, end)):
            self._phrase_key[i] = doc.c[j].lex.orth
-        cdef hash_t key = hash64(self._phrase_key, self.max_length * sizeof(attr_t), 0)
+        cdef hash_t key = hash64(self._phrase_key,
                                 self.max_length * sizeof(attr_t), 0)
        ent_id = <hash_t>self.phrase_ids.get(key)
        if ent_id == 0:
            return None
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@ -4,17 +4,15 @@ from __future__ import unicode_literals
 from libc.string cimport memset
 from .parts_of_speech cimport ADJ, VERB, NOUN, PUNCT, SPACE
 from .attrs cimport POS, IS_SPACE
 from .attrs import LEMMA, intify_attrs
 from .parts_of_speech cimport SPACE
 from .parts_of_speech import IDS as POS_IDS
 from .lexeme cimport Lexeme
 from .attrs import LEMMA, intify_attrs
 def _normalize_props(props):
-    """
+    """Transform deprecated string keys to correct names."""
    Transform deprecated string keys to correct names.
    """
    out = {}
    for key, value in props.items():
        if key == POS:
@ -77,7 +75,8 @@ cdef class Morphology:
    cdef int assign_untagged(self, TokenC* token) except -1:
        """Set morphological attributes on a token without a POS tag. Uses
        the lemmatizer's lookup() method, which looks up the string in the
-        table provided by the language data as lemma_lookup (if available)."""
+        table provided by the language data as lemma_lookup (if available).
        """
        if token.lemma == 0:
            orth_str = self.strings[token.lex.orth]
            lemma = self.lemmatizer.lookup(orth_str)
@ -95,11 +94,10 @@ cdef class Morphology:
    cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1:
        if tag_id > self.n_tags:
            raise ValueError("Unknown tag ID: %s" % tag_id)
-        # TODO: It's pretty arbitrary to put this logic here. I guess the justification
+        # TODO: It's pretty arbitrary to put this logic here. I guess the
-        # is that this is where the specific word and the tag interact. Still,
+        # justification is that this is where the specific word and the tag
-        # we should have a better way to enforce this rule, or figure out why
+        # interact. Still, we should have a better way to enforce this rule, or
-        # the statistical model fails.
+        # figure out why the statistical model fails. Related to Issue #220
        # Related to Issue #220
        if Lexeme.c_check_flag(token.lex, IS_SPACE):
            tag_id = self.reverse_index[self.strings.add('_SP')]
        rich_tag = self.rich_tags[tag_id]
@ -123,12 +121,11 @@ cdef class Morphology:
        else:
            flags[0] &= ~(one << flag_id)
-    def add_special_case(self, unicode tag_str, unicode orth_str, attrs, force=False):
+    def add_special_case(self, unicode tag_str, unicode orth_str, attrs,
-        """
+                         force=False):
-        Add a special-case rule to the morphological analyser. Tokens whose
+        """Add a special-case rule to the morphological analyser. Tokens whose
        tag and orth match the rule will receive the specified properties.
        Arguments:
        tag (unicode): The part-of-speech tag to key the exception.
        orth (unicode): The word-form to key the exception.
        """
@ -144,10 +141,9 @@ cdef class Morphology:
        elif force:
            memset(cached, 0, sizeof(cached[0]))
        else:
-            msg = ("Conflicting morphology exception for (%s, %s). Use force=True "
+            raise ValueError(
-                   "to overwrite.")
+                "Conflicting morphology exception for (%s, %s). Use "
-            msg = msg % (tag_str, orth_str)
+                "force=True to overwrite." % (tag_str, orth_str))
            raise ValueError(msg)
        cached.tag = rich_tag
        # TODO: Refactor this to take arbitrary attributes.
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@ -85,7 +85,6 @@ class Scorer(object):
    def score(self, tokens, gold, verbose=False, punct_labels=('p', 'punct')):
        assert len(tokens) == len(gold)
        gold_deps = set()
        gold_tags = set()
        gold_ents = set(tags_to_entities([annot[-1]
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@ -4,19 +4,15 @@ from __future__ import unicode_literals, absolute_import
 cimport cython
 from libc.string cimport memcpy
 from libc.stdint cimport uint64_t, uint32_t
 from murmurhash.mrmr cimport hash64, hash32
 from preshed.maps cimport map_iter, key_t
 from libc.stdint cimport uint32_t
 from murmurhash.mrmr cimport hash64, hash32
 import ujson
 import dill
 from .symbols import IDS as SYMBOLS_BY_STR
 from .symbols import NAMES as SYMBOLS_BY_INT
 from .typedefs cimport hash_t
 from . import util
 from .compat import json_dumps
 from . import util
 cpdef hash_t hash_string(unicode string) except 0:
@ -195,7 +191,7 @@ cdef class StringStore:
        """Save the current state to a directory.
        path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist. Paths may be either strings or `Path`-like objects.
+            it doesn't exist. Paths may be either strings or Path-like objects.
        """
        path = util.ensure_path(path)
        strings = list(self)
@ -225,7 +221,7 @@ cdef class StringStore:
        **exclude: Named attributes to prevent from being serialized.
        RETURNS (bytes): The serialized form of the `StringStore` object.
        """
-        return ujson.dumps(list(self))
+        return json_dumps(list(self))
    def from_bytes(self, bytes_data, **exclude):
        """Load state from a binary string.
--- a/spacy/symbols.pyx
+++ b/spacy/symbols.pyx
@ -1,8 +1,8 @@
 # coding: utf8
 #cython: optimize.unpack_method_calls=False
 from __future__ import unicode_literals
 IDS = {
    "": NIL,
    "IS_ALPHA": IS_ALPHA,
@ -464,9 +464,11 @@ IDS = {
    "LAW": LAW
 }
 def sort_nums(x):
    return x[1]
 NAMES = [it[0] for it in sorted(IDS.items(), key=sort_nums)]
 # Unfortunate hack here, to work around problem with long cpdef enum
 # (which is generating an enormous amount of C++ in Cython 0.24+)
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -8,12 +8,11 @@ from cython.operator cimport preincrement as preinc
 from cymem.cymem cimport Pool
 from preshed.maps cimport PreshMap
 import regex as re
 from .strings cimport hash_string
 from . import util
 cimport cython
 from .tokens.doc cimport Doc
 from .strings cimport hash_string
 from . import util
 cdef class Tokenizer:
@ -74,9 +73,8 @@ cdef class Tokenizer:
        RETURNS (Doc): A container for linguistic annotations.
        """
        if len(string) >= (2 ** 30):
-            raise ValueError(
+            msg = "String is too long: %d characters. Max is 2**30."
-                "String is too long: %d characters. Max is 2**30." % len(string)
+            raise ValueError(msg % len(string))
            )
        cdef int length = len(string)
        cdef Doc doc = Doc(self.vocab)
        if length == 0:
@ -122,8 +120,8 @@ cdef class Tokenizer:
        """Tokenize a stream of texts.
        texts: A sequence of unicode texts.
-        batch_size (int): The number of texts to accumulate in an internal buffer.
+        batch_size (int): Number of texts to accumulate in an internal buffer.
-        n_threads (int): The number of threads to use, if the implementation
+        n_threads (int): Number of threads to use, if the implementation
            supports multi-threading. The default tokenizer is single-threaded.
        YIELDS (Doc): A sequence of Doc objects, in order.
        """
@ -232,8 +230,8 @@ cdef class Tokenizer:
                if not matches:
                    tokens.push_back(self.vocab.get(tokens.mem, string), False)
                else:
-                    # let's say we have dyn-o-mite-dave
+                    # let's say we have dyn-o-mite-dave - the regex finds the
-                    # the regex finds the start and end positions of the hyphens
+                    # start and end positions of the hyphens
                    start = 0
                    for match in matches:
                        infix_start = match.start()
@ -293,8 +291,8 @@ cdef class Tokenizer:
        return list(self.infix_finditer(string))
    def find_prefix(self, unicode string):
-        """Find the length of a prefix that should be segmented from the string,
+        """Find the length of a prefix that should be segmented from the
-        or None if no prefix rules match.
+        string, or None if no prefix rules match.
        string (unicode): The string to segment.
        RETURNS (int): The length of the prefix if present, otherwise `None`.
@ -305,8 +303,8 @@ cdef class Tokenizer:
        return (match.end() - match.start()) if match is not None else 0
    def find_suffix(self, unicode string):
-        """Find the length of a suffix that should be segmented from the string,
+        """Find the length of a suffix that should be segmented from the
-        or None if no suffix rules match.
+        string, or None if no suffix rules match.
        string (unicode): The string to segment.
        Returns (int): The length of the suffix if present, otherwise `None`.
@ -326,8 +324,8 @@ cdef class Tokenizer:
        string (unicode): The string to specially tokenize.
        token_attrs (iterable): A sequence of dicts, where each dict describes
-            a token and its attributes. The `ORTH` fields of the attributes must
+            a token and its attributes. The `ORTH` fields of the attributes
-            exactly match the string when they are concatenated.
+            must exactly match the string when they are concatenated.
        """
        substrings = list(substrings)
        cached = <_Cached*>self.mem.alloc(1, sizeof(_Cached))
@ -343,7 +341,7 @@ cdef class Tokenizer:
        """Save the current state to a directory.
        path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist. Paths may be either strings or `Path`-like objects.
+            it doesn't exist. Paths may be either strings or Path-like objects.
        """
        with path.open('wb') as file_:
            file_.write(self.to_bytes(**exclude))
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@ -476,7 +476,7 @@ cdef class Span:
        """
        # TODO: implement
        def __get__(self):
-            raise NotImplementedError()
+            raise NotImplementedError
    property n_rights:
        """RETURNS (int): The number of rightward immediate children of the
@ -484,7 +484,7 @@ cdef class Span:
        """
        # TODO: implement
        def __get__(self):
-            raise NotImplementedError()
+            raise NotImplementedError
    property subtree:
        """Tokens that descend from tokens in the span, but fall outside it.
--- a/spacy/typedefs.pyx
+++ b/spacy/typedefs.pyx
@ -1 +0,0 @@
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -17,8 +17,8 @@ from .compat import copy_reg, basestring_
 from .lemmatizer import Lemmatizer
 from .attrs import intify_attrs
 from .vectors import Vectors
 from . import util
 from ._ml import link_vectors_to_models
 from . import util
 cdef class Vocab: