Tidy up rest

2025-09-18 01:52:37 +03:00 · 2017-10-27 21:07:59 +02:00 · 2017-10-27 21:07:59 +02:00 · d96e72f656
commit d96e72f656
parent a8e10f94e4
14 changed files with 233 additions and 261 deletions
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@ -8,11 +8,9 @@ from thinc.t2t import ExtractWindow, ParametricAttention
 from thinc.t2v import Pooling, sum_pool
 from thinc.misc import Residual
 from thinc.misc import LayerNorm as LN
-
 from thinc.api import add, layerize, chain, clone, concatenate, with_flatten
 from thinc.api import FeatureExtracter, with_getitem, flatten_add_lengths
 from thinc.api import uniqued, wrap, noop
-
 from thinc.linear.linear import LinearModel
 from thinc.neural.ops import NumpyOps, CupyOps
 from thinc.neural.util import get_array_module
--- a/spacy/attrs.pyx
+++ b/spacy/attrs.pyx
@ -101,17 +101,12 @@ def intify_attrs(stringy_attrs, strings_map=None, _do_deprecated=False):
    """
    Normalize a dictionary of attributes, converting them to ints.

-    Arguments:
-        stringy_attrs (dict):
-            Dictionary keyed by attribute string names. Values can be ints or strings.
-
-        strings_map (StringStore):
-            Defaults to None. If provided, encodes string values into ints.
-
-    Returns:
-        inty_attrs (dict):
-            Attributes dictionary with keys and optionally values converted to
-            ints.
+    stringy_attrs (dict): Dictionary keyed by attribute string names. Values
+        can be ints or strings.
+    strings_map (StringStore): Defaults to None. If provided, encodes string
+        values into ints.
+    RETURNS (dict): Attributes dictionary with keys and optionally values
+        converted to ints.
    """
    inty_attrs = {}
    if _do_deprecated:
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@ -2,7 +2,6 @@
 # coding: utf8
 from __future__ import unicode_literals, print_function

-import io
 import re
 import ujson
 import random
@ -10,9 +9,8 @@ import cytoolz
 import itertools

 from .syntax import nonproj
-from .util import ensure_path
-from . import util
 from .tokens import Doc
+from . import util


 def tags_to_entities(tags):
@ -310,7 +308,7 @@ def _corrupt(c, noise_level):


 def read_json_file(loc, docs_filter=None, limit=None):
-    loc = ensure_path(loc)
+    loc = util.ensure_path(loc)
    if loc.is_dir():
        for filename in loc.iterdir():
            yield from read_json_file(loc / filename, limit=limit)
--- a/spacy/language.py
+++ b/spacy/language.py
@ -1,22 +1,22 @@
 # coding: utf8
 from __future__ import absolute_import, unicode_literals
-from contextlib import contextmanager
-import copy

-from thinc.neural import Model
-from thinc.neural.optimizers import Adam
 import random
 import ujson
-from collections import OrderedDict
 import itertools
 import weakref
 import functools
+from collections import OrderedDict
+from contextlib import contextmanager
+from copy import copy
+from thinc.neural import Model
+from thinc.neural.optimizers import Adam

 from .tokenizer import Tokenizer
 from .vocab import Vocab
 from .lemmatizer import Lemmatizer
-from .pipeline import DependencyParser, Tensorizer, Tagger
-from .pipeline import EntityRecognizer, SimilarityHook, TextCategorizer
+from .pipeline import DependencyParser, Tensorizer, Tagger, EntityRecognizer
+from .pipeline import SimilarityHook, TextCategorizer
 from .compat import json_dumps, izip
 from .scorer import Scorer
 from ._ml import link_vectors_to_models
@ -649,7 +649,7 @@ class Language(object):
        serializers = OrderedDict((
            ('vocab', lambda: self.vocab.to_bytes()),
            ('tokenizer', lambda: self.tokenizer.to_bytes(vocab=False)),
-            ('meta', lambda: ujson.dumps(self.meta))
+            ('meta', lambda: json_dumps(self.meta))
        ))
        for i, (name, proc) in enumerate(self.pipeline):
            if name in disable:
@ -689,7 +689,7 @@ class DisabledPipes(list):
        # Important! Not deep copy -- we just want the container (but we also
        # want to support people providing arbitrarily typed nlp.pipeline
        # objects.)
-        self.original_pipeline = copy.copy(nlp.pipeline)
+        self.original_pipeline = copy(nlp.pipeline)
        list.__init__(self)
        self.extend(nlp.remove_pipe(name) for name in names)

--- a/spacy/matcher.pyx
+++ b/spacy/matcher.pyx
@ -4,12 +4,6 @@
 from __future__ import unicode_literals

 import ujson
-
-from .typedefs cimport attr_t
-from .typedefs cimport hash_t
-from .attrs cimport attr_id_t
-from .structs cimport TokenC
-
 from cymem.cymem cimport Pool
 from preshed.maps cimport PreshMap
 from libcpp.vector cimport vector
@ -17,14 +11,15 @@ from libcpp.pair cimport pair
 from murmurhash.mrmr cimport hash64
 from libc.stdint cimport int32_t

-from .attrs cimport ID, NULL_ATTR, ENT_TYPE
-from . import attrs
-from .tokens.doc cimport get_token_attr
-from .tokens.doc cimport Doc
+from .typedefs cimport attr_t
+from .typedefs cimport hash_t
+from .structs cimport TokenC
+from .tokens.doc cimport Doc, get_token_attr
 from .vocab cimport Vocab

+from .attrs import IDS
+from .attrs cimport attr_id_t, ID, NULL_ATTR
 from .attrs import FLAG61 as U_ENT
-
 from .attrs import FLAG60 as B2_ENT
 from .attrs import FLAG59 as B3_ENT
 from .attrs import FLAG58 as B4_ENT
@ -34,7 +29,6 @@ from .attrs import FLAG55 as B7_ENT
 from .attrs import FLAG54 as B8_ENT
 from .attrs import FLAG53 as B9_ENT
 from .attrs import FLAG52 as B10_ENT
-
 from .attrs import FLAG51 as I3_ENT
 from .attrs import FLAG50 as I4_ENT
 from .attrs import FLAG49 as I5_ENT
@ -43,7 +37,6 @@ from .attrs import FLAG47 as I7_ENT
 from .attrs import FLAG46 as I8_ENT
 from .attrs import FLAG45 as I9_ENT
 from .attrs import FLAG44 as I10_ENT
-
 from .attrs import FLAG43 as L2_ENT
 from .attrs import FLAG42 as L3_ENT
 from .attrs import FLAG41 as L4_ENT
@ -168,10 +161,10 @@ def _convert_strings(token_specs, string_store):
                if value in operators:
                    ops = operators[value]
                else:
-                    raise KeyError(
-                        "Unknown operator '%s'. Options: %s" % (value, ', '.join(operators.keys())))
+                    msg = "Unknown operator '%s'. Options: %s"
+                    raise KeyError(msg % (value, ', '.join(operators.keys())))
            if isinstance(attr, basestring):
-                attr = attrs.IDS.get(attr.upper())
+                attr = IDS.get(attr.upper())
            if isinstance(value, basestring):
                value = string_store.add(value)
            if isinstance(value, bool):
@ -186,7 +179,7 @@ def _convert_strings(token_specs, string_store):
 def merge_phrase(matcher, doc, i, matches):
    """Callback to merge a phrase on match."""
    ent_id, label, start, end = matches[i]
-    span = doc[start : end]
+    span = doc[start:end]
    span.merge(ent_type=label, ent_id=ent_id)


@ -233,13 +226,13 @@ cdef class Matcher:
        return self._normalize_key(key) in self._patterns

    def add(self, key, on_match, *patterns):
-        """Add a match-rule to the matcher. A match-rule consists of: an ID key,
-        an on_match callback, and one or more patterns.
+        """Add a match-rule to the matcher. A match-rule consists of: an ID
+        key, an on_match callback, and one or more patterns.

        If the key exists, the patterns are appended to the previous ones, and
-        the previous on_match callback is replaced. The `on_match` callback will
-        receive the arguments `(matcher, doc, i, matches)`. You can also set
-        `on_match` to `None` to not perform any actions.
+        the previous on_match callback is replaced. The `on_match` callback
+        will receive the arguments `(matcher, doc, i, matches)`. You can also
+        set `on_match` to `None` to not perform any actions.

        A pattern consists of one or more `token_specs`, where a `token_spec`
        is a dictionary mapping attribute IDs to values, and optionally a
@ -253,8 +246,8 @@ cdef class Matcher:
        The + and * operators are usually interpretted "greedily", i.e. longer
        matches are returned where possible. However, if you specify two '+'
        and '*' patterns in a row and their matches overlap, the first
-        operator will behave non-greedily. This quirk in the semantics
-        makes the matcher more efficient, by avoiding the need for back-tracking.
+        operator will behave non-greedily. This quirk in the semantics makes
+        the matcher more efficient, by avoiding the need for back-tracking.

        key (unicode): The match ID.
        on_match (callable): Callback executed on match.
@ -268,7 +261,6 @@ cdef class Matcher:
        key = self._normalize_key(key)
        self._patterns.setdefault(key, [])
        self._callbacks[key] = on_match
-
        for pattern in patterns:
            specs = _convert_strings(pattern, self.vocab.strings)
            self.patterns.push_back(init_pattern(self.mem, key, specs))
@ -315,9 +307,9 @@ cdef class Matcher:
        """Match a stream of documents, yielding them in turn.

        docs (iterable): A stream of documents.
-        batch_size (int): The number of documents to accumulate into a working set.
+        batch_size (int): Number of documents to accumulate into a working set.
        n_threads (int): The number of threads with which to work on the buffer
-            in parallel, if the `Matcher` implementation supports multi-threading.
+            in parallel, if the implementation supports multi-threading.
        YIELDS (Doc): Documents, in order.
        """
        for doc in docs:
@ -325,7 +317,7 @@ cdef class Matcher:
            yield doc

    def __call__(self, Doc doc):
-        """Find all token sequences matching the supplied patterns on the `Doc`.
+        """Find all token sequences matching the supplied pattern.

        doc (Doc): The document to match over.
        RETURNS (list): A list of `(key, start, end)` tuples,
@ -342,8 +334,8 @@ cdef class Matcher:
        for token_i in range(doc.length):
            token = &doc.c[token_i]
            q = 0
-            # Go over the open matches, extending or finalizing if able. Otherwise,
-            # we over-write them (q doesn't advance)
+            # Go over the open matches, extending or finalizing if able.
+            # Otherwise, we over-write them (q doesn't advance)
            for state in partials:
                action = get_action(state.second, token)
                if action == PANIC:
@ -356,8 +348,8 @@ cdef class Matcher:

                if action == REPEAT:
                    # Leave the state in the queue, and advance to next slot
-                    # (i.e. we don't overwrite -- we want to greedily match more
-                    # pattern.
+                    # (i.e. we don't overwrite -- we want to greedily match
+                    # more pattern.
                    q += 1
                elif action == REJECT:
                    pass
@ -366,8 +358,8 @@ cdef class Matcher:
                    partials[q].second += 1
                    q += 1
                elif action in (ACCEPT, ACCEPT_PREV):
-                    # TODO: What to do about patterns starting with ZERO? Need to
-                    # adjust the start position.
+                    # TODO: What to do about patterns starting with ZERO? Need
+                    # to adjust the start position.
                    start = state.first
                    end = token_i+1 if action == ACCEPT else token_i
                    ent_id = state.second[1].attrs[0].value
@ -388,8 +380,8 @@ cdef class Matcher:
                    state.second = pattern
                    partials.push_back(state)
                elif action == ADVANCE:
-                    # TODO: What to do about patterns starting with ZERO? Need to
-                    # adjust the start position.
+                    # TODO: What to do about patterns starting with ZERO? Need
+                    # to adjust the start position.
                    state.first = token_i
                    state.second = pattern + 1
                    partials.push_back(state)
@ -413,7 +405,6 @@ cdef class Matcher:
            on_match = self._callbacks.get(ent_id)
            if on_match is not None:
                on_match(self, doc, i, matches)
-        # TODO: only return (match_id, start, end)
        return matches

    def _normalize_key(self, key):
@ -441,7 +432,8 @@ def get_bilou(length):
    elif length == 8:
        return [B8_ENT, I8_ENT, I8_ENT, I8_ENT, I8_ENT, I8_ENT, I8_ENT, L8_ENT]
    elif length == 9:
-        return [B9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, L9_ENT]
+        return [B9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT, I9_ENT,
+                L9_ENT]
    elif length == 10:
        return [B10_ENT, I10_ENT, I10_ENT, I10_ENT, I10_ENT, I10_ENT, I10_ENT,
                I10_ENT, I10_ENT, L10_ENT]
@ -454,10 +446,8 @@ cdef class PhraseMatcher:
    cdef Vocab vocab
    cdef Matcher matcher
    cdef PreshMap phrase_ids
-
    cdef int max_length
    cdef attr_t* _phrase_key
-
    cdef public object _callbacks
    cdef public object _patterns

@ -470,7 +460,8 @@ cdef class PhraseMatcher:
        self.phrase_ids = PreshMap()
        abstract_patterns = []
        for length in range(1, max_length):
-            abstract_patterns.append([{tag: True} for tag in get_bilou(length)])
+            abstract_patterns.append([{tag: True}
+                                      for tag in get_bilou(length)])
        self.matcher.add('Candidate', None, *abstract_patterns)
        self._callbacks = {}

@ -496,8 +487,8 @@ cdef class PhraseMatcher:
        return (self.__class__, (self.vocab,), None, None)

    def add(self, key, on_match, *docs):
-        """Add a match-rule to the matcher. A match-rule consists of: an ID key,
-        an on_match callback, and one or more patterns.
+        """Add a match-rule to the matcher. A match-rule consists of: an ID
+        key, an on_match callback, and one or more patterns.

        key (unicode): The match ID.
        on_match (callable): Callback executed on match.
@ -513,7 +504,6 @@ cdef class PhraseMatcher:
                raise ValueError(msg % (len(doc), self.max_length))
        cdef hash_t ent_id = self.matcher._normalize_key(key)
        self._callbacks[ent_id] = on_match
-
        cdef int length
        cdef int i
        cdef hash_t phrase_hash
@ -553,9 +543,9 @@ cdef class PhraseMatcher:
        """Match a stream of documents, yielding them in turn.

        docs (iterable): A stream of documents.
-        batch_size (int): The number of documents to accumulate into a working set.
+        batch_size (int): Number of documents to accumulate into a working set.
        n_threads (int): The number of threads with which to work on the buffer
-            in parallel, if the `Matcher` implementation supports multi-threading.
+            in parallel, if the implementation supports multi-threading.
        YIELDS (Doc): Documents, in order.
        """
        for doc in stream:
@ -569,7 +559,8 @@ cdef class PhraseMatcher:
            self._phrase_key[i] = 0
        for i, j in enumerate(range(start, end)):
            self._phrase_key[i] = doc.c[j].lex.orth
-        cdef hash_t key = hash64(self._phrase_key, self.max_length * sizeof(attr_t), 0)
+        cdef hash_t key = hash64(self._phrase_key,
+                                 self.max_length * sizeof(attr_t), 0)
        ent_id = <hash_t>self.phrase_ids.get(key)
        if ent_id == 0:
            return None
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@ -4,17 +4,15 @@ from __future__ import unicode_literals

 from libc.string cimport memset

-from .parts_of_speech cimport ADJ, VERB, NOUN, PUNCT, SPACE
 from .attrs cimport POS, IS_SPACE
+from .attrs import LEMMA, intify_attrs
+from .parts_of_speech cimport SPACE
 from .parts_of_speech import IDS as POS_IDS
 from .lexeme cimport Lexeme
-from .attrs import LEMMA, intify_attrs


 def _normalize_props(props):
-    """
-    Transform deprecated string keys to correct names.
-    """
+    """Transform deprecated string keys to correct names."""
    out = {}
    for key, value in props.items():
        if key == POS:
@ -77,7 +75,8 @@ cdef class Morphology:
    cdef int assign_untagged(self, TokenC* token) except -1:
        """Set morphological attributes on a token without a POS tag. Uses
        the lemmatizer's lookup() method, which looks up the string in the
-        table provided by the language data as lemma_lookup (if available)."""
+        table provided by the language data as lemma_lookup (if available).
+        """
        if token.lemma == 0:
            orth_str = self.strings[token.lex.orth]
            lemma = self.lemmatizer.lookup(orth_str)
@ -95,11 +94,10 @@ cdef class Morphology:
    cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1:
        if tag_id > self.n_tags:
            raise ValueError("Unknown tag ID: %s" % tag_id)
-        # TODO: It's pretty arbitrary to put this logic here. I guess the justification
-        # is that this is where the specific word and the tag interact. Still,
-        # we should have a better way to enforce this rule, or figure out why
-        # the statistical model fails.
-        # Related to Issue #220
+        # TODO: It's pretty arbitrary to put this logic here. I guess the
+        # justification is that this is where the specific word and the tag
+        # interact. Still, we should have a better way to enforce this rule, or
+        # figure out why the statistical model fails. Related to Issue #220
        if Lexeme.c_check_flag(token.lex, IS_SPACE):
            tag_id = self.reverse_index[self.strings.add('_SP')]
        rich_tag = self.rich_tags[tag_id]
@ -123,12 +121,11 @@ cdef class Morphology:
        else:
            flags[0] &= ~(one << flag_id)

-    def add_special_case(self, unicode tag_str, unicode orth_str, attrs, force=False):
-        """
-        Add a special-case rule to the morphological analyser. Tokens whose
+    def add_special_case(self, unicode tag_str, unicode orth_str, attrs,
+                         force=False):
+        """Add a special-case rule to the morphological analyser. Tokens whose
        tag and orth match the rule will receive the specified properties.

-        Arguments:
        tag (unicode): The part-of-speech tag to key the exception.
        orth (unicode): The word-form to key the exception.
        """
@ -144,10 +141,9 @@ cdef class Morphology:
        elif force:
            memset(cached, 0, sizeof(cached[0]))
        else:
-            msg = ("Conflicting morphology exception for (%s, %s). Use force=True "
-                   "to overwrite.")
-            msg = msg % (tag_str, orth_str)
-            raise ValueError(msg)
+            raise ValueError(
+                "Conflicting morphology exception for (%s, %s). Use "
+                "force=True to overwrite." % (tag_str, orth_str))

        cached.tag = rich_tag
        # TODO: Refactor this to take arbitrary attributes.
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@ -85,7 +85,6 @@ class Scorer(object):

    def score(self, tokens, gold, verbose=False, punct_labels=('p', 'punct')):
        assert len(tokens) == len(gold)
-
        gold_deps = set()
        gold_tags = set()
        gold_ents = set(tags_to_entities([annot[-1]
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@ -4,19 +4,15 @@ from __future__ import unicode_literals, absolute_import

 cimport cython
 from libc.string cimport memcpy
-from libc.stdint cimport uint64_t, uint32_t
-from murmurhash.mrmr cimport hash64, hash32
-from preshed.maps cimport map_iter, key_t
 from libc.stdint cimport uint32_t
+from murmurhash.mrmr cimport hash64, hash32
 import ujson
-import dill

 from .symbols import IDS as SYMBOLS_BY_STR
 from .symbols import NAMES as SYMBOLS_BY_INT
-
 from .typedefs cimport hash_t
-from . import util
 from .compat import json_dumps
+from . import util


 cpdef hash_t hash_string(unicode string) except 0:
@ -195,7 +191,7 @@ cdef class StringStore:
        """Save the current state to a directory.

        path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist. Paths may be either strings or `Path`-like objects.
+            it doesn't exist. Paths may be either strings or Path-like objects.
        """
        path = util.ensure_path(path)
        strings = list(self)
@ -225,7 +221,7 @@ cdef class StringStore:
        **exclude: Named attributes to prevent from being serialized.
        RETURNS (bytes): The serialized form of the `StringStore` object.
        """
-        return ujson.dumps(list(self))
+        return json_dumps(list(self))

    def from_bytes(self, bytes_data, **exclude):
        """Load state from a binary string.
--- a/spacy/symbols.pyx
+++ b/spacy/symbols.pyx
@ -1,8 +1,8 @@
 # coding: utf8
 #cython: optimize.unpack_method_calls=False
-
 from __future__ import unicode_literals

+
 IDS = {
    "": NIL,
    "IS_ALPHA": IS_ALPHA,
@ -464,9 +464,11 @@ IDS = {
    "LAW": LAW
 }

+
 def sort_nums(x):
    return x[1]

+
 NAMES = [it[0] for it in sorted(IDS.items(), key=sort_nums)]
 # Unfortunate hack here, to work around problem with long cpdef enum
 # (which is generating an enormous amount of C++ in Cython 0.24+)
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -8,12 +8,11 @@ from cython.operator cimport preincrement as preinc
 from cymem.cymem cimport Pool
 from preshed.maps cimport PreshMap
 import regex as re
-
-from .strings cimport hash_string
-from . import util
 cimport cython

 from .tokens.doc cimport Doc
+from .strings cimport hash_string
+from . import util


 cdef class Tokenizer:
@ -74,9 +73,8 @@ cdef class Tokenizer:
        RETURNS (Doc): A container for linguistic annotations.
        """
        if len(string) >= (2 ** 30):
-            raise ValueError(
-                "String is too long: %d characters. Max is 2**30." % len(string)
-            )
+            msg = "String is too long: %d characters. Max is 2**30."
+            raise ValueError(msg % len(string))
        cdef int length = len(string)
        cdef Doc doc = Doc(self.vocab)
        if length == 0:
@ -122,8 +120,8 @@ cdef class Tokenizer:
        """Tokenize a stream of texts.

        texts: A sequence of unicode texts.
-        batch_size (int): The number of texts to accumulate in an internal buffer.
-        n_threads (int): The number of threads to use, if the implementation
+        batch_size (int): Number of texts to accumulate in an internal buffer.
+        n_threads (int): Number of threads to use, if the implementation
            supports multi-threading. The default tokenizer is single-threaded.
        YIELDS (Doc): A sequence of Doc objects, in order.
        """
@ -232,8 +230,8 @@ cdef class Tokenizer:
                if not matches:
                    tokens.push_back(self.vocab.get(tokens.mem, string), False)
                else:
-                    # let's say we have dyn-o-mite-dave
-                    # the regex finds the start and end positions of the hyphens
+                    # let's say we have dyn-o-mite-dave - the regex finds the
+                    # start and end positions of the hyphens
                    start = 0
                    for match in matches:
                        infix_start = match.start()
@ -293,8 +291,8 @@ cdef class Tokenizer:
        return list(self.infix_finditer(string))

    def find_prefix(self, unicode string):
-        """Find the length of a prefix that should be segmented from the string,
-        or None if no prefix rules match.
+        """Find the length of a prefix that should be segmented from the
+        string, or None if no prefix rules match.

        string (unicode): The string to segment.
        RETURNS (int): The length of the prefix if present, otherwise `None`.
@ -305,8 +303,8 @@ cdef class Tokenizer:
        return (match.end() - match.start()) if match is not None else 0

    def find_suffix(self, unicode string):
-        """Find the length of a suffix that should be segmented from the string,
-        or None if no suffix rules match.
+        """Find the length of a suffix that should be segmented from the
+        string, or None if no suffix rules match.

        string (unicode): The string to segment.
        Returns (int): The length of the suffix if present, otherwise `None`.
@ -326,8 +324,8 @@ cdef class Tokenizer:

        string (unicode): The string to specially tokenize.
        token_attrs (iterable): A sequence of dicts, where each dict describes
-            a token and its attributes. The `ORTH` fields of the attributes must
-            exactly match the string when they are concatenated.
+            a token and its attributes. The `ORTH` fields of the attributes
+            must exactly match the string when they are concatenated.
        """
        substrings = list(substrings)
        cached = <_Cached*>self.mem.alloc(1, sizeof(_Cached))
@ -343,7 +341,7 @@ cdef class Tokenizer:
        """Save the current state to a directory.

        path (unicode or Path): A path to a directory, which will be created if
-            it doesn't exist. Paths may be either strings or `Path`-like objects.
+            it doesn't exist. Paths may be either strings or Path-like objects.
        """
        with path.open('wb') as file_:
            file_.write(self.to_bytes(**exclude))
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@ -476,7 +476,7 @@ cdef class Span:
        """
        # TODO: implement
        def __get__(self):
-            raise NotImplementedError()
+            raise NotImplementedError

    property n_rights:
        """RETURNS (int): The number of rightward immediate children of the
@ -484,7 +484,7 @@ cdef class Span:
        """
        # TODO: implement
        def __get__(self):
-            raise NotImplementedError()
+            raise NotImplementedError

    property subtree:
        """Tokens that descend from tokens in the span, but fall outside it.
--- a/spacy/typedefs.pyx
+++ b/spacy/typedefs.pyx
@ -1 +0,0 @@
-
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -17,8 +17,8 @@ from .compat import copy_reg, basestring_
 from .lemmatizer import Lemmatizer
 from .attrs import intify_attrs
 from .vectors import Vectors
-from . import util
 from ._ml import link_vectors_to_models
+from . import util


 cdef class Vocab: