Tidy up and fix formatting and imports

2025-10-21 11:14:32 +03:00 · 2017-04-15 13:05:15 +02:00 · 2017-04-15 13:05:15 +02:00 · 0739ae7b76
commit 0739ae7b76
parent fefe6684cd
15 changed files with 251 additions and 230 deletions
--- a/spacy/compat.py
+++ b/spacy/compat.py
@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import six
 import sys
-import json
+import ujson
 try:
    import cPickle as pickle
@ -28,14 +28,14 @@ if is_python2:
    unicode_ = unicode
    basestring_ = basestring
    input_ = raw_input
-    json_dumps = lambda data: json.dumps(data, indent=2).decode('utf8')
+    json_dumps = lambda data: ujson.dumps(data, indent=2).decode('utf8')
 elif is_python3:
    bytes_ = bytes
    unicode_ = str
    basestring_ = str
    input_ = input
-    json_dumps = lambda data: json.dumps(data, indent=2)
+    json_dumps = lambda data: ujson.dumps(data, indent=2)
 def symlink_to(orig, dest):
--- a/spacy/deprecated.py
+++ b/spacy/deprecated.py
@ -1,3 +1,6 @@
 # coding: utf8
 from __future__ import unicode_literals
 from pathlib import Path
 from . import about
--- a/spacy/syntax/_parse_features.pyx
+++ b/spacy/syntax/_parse_features.pyx
@ -7,17 +7,17 @@ out of "context") is in features/extractor.pyx
 The atomic feature names are listed in a big enum, so that the feature tuples
 can refer to them.
 """
-from libc.string cimport memset
+# coding: utf-8
 from __future__ import unicode_literals
 from libc.string cimport memset
 from itertools import combinations
 from cymem.cymem cimport Pool
 from ..structs cimport TokenC
 from .stateclass cimport StateClass
 from ._state cimport StateC
 from cymem.cymem cimport Pool
 cdef inline void fill_token(atom_t* context, const TokenC* token) nogil:
    if token is NULL:
--- a/spacy/syntax/arc_eager.pyx
+++ b/spacy/syntax/arc_eager.pyx
@ -1,29 +1,26 @@
 # cython: profile=True
 # cython: cdivision=True
 # cython: infer_types=True
 # coding: utf-8
 from __future__ import unicode_literals
 from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
 import ctypes
-import os
+from libc.stdint cimport uint32_t
-
+from libc.string cimport memcpy
-from ..structs cimport TokenC
+from cymem.cymem cimport Pool
 from .stateclass cimport StateClass
 from ._state cimport StateC, is_space_token
 from .nonproj import PseudoProjectivity
 from .nonproj import is_nonproj_tree
 from .transition_system cimport do_func_t, get_cost_func_t
 from .transition_system cimport move_cost_func_t, label_cost_func_t
 from ..gold cimport GoldParse
 from ..gold cimport GoldParseC
 from ..attrs cimport TAG, HEAD, DEP, ENT_IOB, ENT_TYPE, IS_SPACE
 from ..lexeme cimport Lexeme
-
+from ..structs cimport TokenC
 from libc.stdint cimport uint32_t
 from libc.string cimport memcpy
 from cymem.cymem cimport Pool
 from .stateclass cimport StateClass
 from ._state cimport StateC, is_space_token
 from .nonproj import PseudoProjectivity
 from .nonproj import is_nonproj_tree
 DEF NON_MONOTONIC = True
--- a/spacy/syntax/beam_parser.pyx
+++ b/spacy/syntax/beam_parser.pyx
@ -1,50 +1,34 @@
 """
 MALT-style dependency parser
 """
 # cython: profile=True
 # cython: experimental_cpp_class_def=True
 # cython: cdivision=True
 # cython: infer_types=True
-"""
+# coding: utf-8
-MALT-style dependency parser
+
-"""
+from __future__ import unicode_literals, print_function
 from __future__ import unicode_literals
 cimport cython
 from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
 from libc.stdint cimport uint32_t, uint64_t
 from libc.string cimport memset, memcpy
 from libc.stdlib cimport rand
 from libc.math cimport log, exp, isnan, isinf
 import random
 import os.path
 from os import path
 import shutil
 import json
 import math
 from cymem.cymem cimport Pool, Address
 from murmurhash.mrmr cimport real_hash64 as hash64
 from thinc.typedefs cimport weight_t, class_t, feat_t, atom_t, hash_t
 from util import Config
 from thinc.linear.features cimport ConjunctionExtracter
 from thinc.structs cimport FeatureC, ExampleC
-
+from thinc.extra.search cimport Beam, MaxViolation
 from thinc.extra.search cimport Beam
 from thinc.extra.search cimport MaxViolation
 from thinc.extra.eg cimport Example
 from thinc.extra.mb cimport Minibatch
 from ..structs cimport TokenC
 from ..tokens.doc cimport Doc
 from ..strings cimport StringStore
 from .transition_system cimport TransitionSystem, Transition
 from ..gold cimport GoldParse
 from . import _parse_features
 from ._parse_features cimport CONTEXT_SIZE
 from ._parse_features cimport fill_context
@ -266,4 +250,3 @@ def is_gold(StateClass state, GoldParse gold, StringStore strings):
        id_, word, tag, head, dep, ner = gold.orig_annot[gold.cand_to_gold[i]]
        truth.add((id_, head, dep))
    return truth == predicted
--- a/spacy/syntax/iterators.pyx
+++ b/spacy/syntax/iterators.pyx
@ -1,9 +1,14 @@
-from spacy.parts_of_speech cimport NOUN, PROPN, PRON
+# coding: utf-8
 from __future__ import unicode_literals
 from ..parts_of_speech cimport NOUN, PROPN, PRON
 def english_noun_chunks(obj):
-    '''Detect base noun phrases from a dependency parse.
+    """
-    Works on both Doc and Span.'''
+    Detect base noun phrases from a dependency parse.
    Works on both Doc and Span.
    """
    labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj',
              'attr', 'ROOT', 'root']
    doc = obj.doc # Ensure works on both Doc and Span.
--- a/spacy/syntax/ner.pyx
+++ b/spacy/syntax/ner.pyx
@ -1,17 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .transition_system cimport Transition
 from .transition_system cimport do_func_t
 from ..structs cimport TokenC, Entity
 from thinc.typedefs cimport weight_t
 from ..gold cimport GoldParseC
 from ..gold cimport GoldParse
 from ..attrs cimport ENT_TYPE, ENT_IOB
 from .stateclass cimport StateClass
 from ._state cimport StateC
 from .transition_system cimport Transition
 from .transition_system cimport do_func_t
 from ..structs cimport TokenC, Entity
 from ..gold cimport GoldParseC
 from ..gold cimport GoldParse
 from ..attrs cimport ENT_TYPE, ENT_IOB
 cdef enum:
--- a/spacy/syntax/nonproj.pyx
+++ b/spacy/syntax/nonproj.pyx
@ -1,8 +1,9 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from copy import copy
 from ..tokens.doc cimport Doc
-from spacy.attrs import DEP, HEAD
+from ..attrs import DEP, HEAD
 def ancestors(tokenid, heads):
@ -201,5 +202,3 @@ class PseudoProjectivity:
                filtered_sents.append(((ids,words,tags,heads,filtered_labels,iob), ctnts))
            filtered.append((raw_text, filtered_sents))
        return filtered
--- a/spacy/syntax/parser.pyx
+++ b/spacy/syntax/parser.pyx
@ -1,56 +1,44 @@
 # cython: infer_types=True
 """
 MALT-style dependency parser
 """
 # coding: utf-8
 # cython: infer_types=True
 from __future__ import unicode_literals
 from collections import Counter
 import ujson
 cimport cython
 cimport cython.parallel
 from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
 from cpython.exc cimport PyErr_CheckSignals
 from libc.stdint cimport uint32_t, uint64_t
 from libc.string cimport memset, memcpy
 from libc.stdlib cimport malloc, calloc, free
 import os.path
 from collections import Counter
 from os import path
 import shutil
 import json
 import sys
 from .nonproj import PseudoProjectivity
 from cymem.cymem cimport Pool, Address
 from murmurhash.mrmr cimport hash64
 from thinc.typedefs cimport weight_t, class_t, feat_t, atom_t, hash_t
 from thinc.linear.avgtron cimport AveragedPerceptron
 from thinc.linalg cimport VecVec
-from thinc.structs cimport SparseArrayC
+from thinc.structs cimport SparseArrayC, FeatureC, ExampleC
 from thinc.extra.eg cimport Example
 from cymem.cymem cimport Pool, Address
 from murmurhash.mrmr cimport hash64
 from preshed.maps cimport MapStruct
 from preshed.maps cimport map_get
 from thinc.structs cimport FeatureC
 from thinc.structs cimport ExampleC
 from thinc.extra.eg cimport Example
 from util import Config
 from ..structs cimport TokenC
 from ..tokens.doc cimport Doc
 from ..strings cimport StringStore
 from .transition_system import OracleError
 from .transition_system cimport TransitionSystem, Transition
 from ..gold cimport GoldParse
 from . import _parse_features
 from ._parse_features cimport CONTEXT_SIZE
 from ._parse_features cimport fill_context
 from .stateclass cimport StateClass
 from ._state cimport StateC
 from .nonproj import PseudoProjectivity
 from .transition_system import OracleError
 from .transition_system cimport TransitionSystem, Transition
 from ..structs cimport TokenC
 from ..tokens.doc cimport Doc
 from ..strings cimport StringStore
 from ..gold cimport GoldParse
 USE_FTRL = False
 DEBUG = False
@ -80,7 +68,9 @@ cdef class ParserModel(AveragedPerceptron):
        return nr_feat
    def update(self, Example eg, itn=0):
-        '''Does regression on negative cost. Sort of cute?'''
+        """
        Does regression on negative cost. Sort of cute?
        """
        self.time += 1
        cdef int best = arg_max_if_gold(eg.c.scores, eg.c.costs, eg.c.nr_class)
        cdef int guess = eg.guess
@ -132,10 +122,13 @@ cdef class ParserModel(AveragedPerceptron):
 cdef class Parser:
-    """Base class of the DependencyParser and EntityRecognizer."""
+    """
    Base class of the DependencyParser and EntityRecognizer.
    """
    @classmethod
    def load(cls, path, Vocab vocab, TransitionSystem=None, require=False, **cfg):
-        """Load the statistical model from the supplied path.
+        """
        Load the statistical model from the supplied path.
        Arguments:
            path (Path):
@ -148,7 +141,7 @@ cdef class Parser:
            The newly constructed object.
        """
        with (path / 'config.json').open() as file_:
-            cfg = json.load(file_)
+            cfg = ujson.load(file_)
        # TODO: remove this shim when we don't have to support older data
        if 'labels' in cfg and 'actions' not in cfg:
            cfg['actions'] = cfg.pop('labels')
@ -168,7 +161,8 @@ cdef class Parser:
        return self
    def __init__(self, Vocab vocab, TransitionSystem=None, ParserModel model=None, **cfg):
-        """Create a Parser.
+        """
        Create a Parser.
        Arguments:
            vocab (Vocab):
@ -198,7 +192,8 @@ cdef class Parser:
        return (Parser, (self.vocab, self.moves, self.model), None, None)
    def __call__(self, Doc tokens):
-        """Apply the entity recognizer, setting the annotations onto the Doc object.
+        """
        Apply the entity recognizer, setting the annotations onto the Doc object.
        Arguments:
            doc (Doc): The document to be processed.
@ -215,7 +210,8 @@ cdef class Parser:
        self.moves.finalize_doc(tokens)
    def pipe(self, stream, int batch_size=1000, int n_threads=2):
-        """Process a stream of documents.
+        """
        Process a stream of documents.
        Arguments:
            stream: The sequence of documents to process.
@ -303,7 +299,8 @@ cdef class Parser:
        return 0
    def update(self, Doc tokens, GoldParse gold, itn=0):
-        """Update the statistical model.
+        """
        Update the statistical model.
        Arguments:
            doc (Doc):
@ -342,7 +339,8 @@ cdef class Parser:
        return loss
    def step_through(self, Doc doc, GoldParse gold=None):
-        """Set up a stepwise state, to introspect and control the transition sequence.
+        """
        Set up a stepwise state, to introspect and control the transition sequence.
        Arguments:
            doc (Doc): The document to step through.
@ -426,7 +424,9 @@ cdef class StepwiseState:
    @property
    def costs(self):
-        '''Find the action-costs for the current state'''
+        """
        Find the action-costs for the current state.
        """
        self.parser.moves.set_costs(self.eg.c.is_valid, self.eg.c.costs,
                self.stcls, self.gold)
        costs = {}
--- a/spacy/syntax/stateclass.pyx
+++ b/spacy/syntax/stateclass.pyx
@ -1,5 +1,9 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from libc.string cimport memcpy, memset
 from libc.stdint cimport uint32_t
 from ..vocab cimport EMPTY_LEXEME
 from ..structs cimport Entity
 from ..lexeme cimport Lexeme
--- a/spacy/syntax/transition_system.pyx
+++ b/spacy/syntax/transition_system.pyx
@ -1,4 +1,8 @@
 # cython: infer_types=True
 # coding: utf-8
 from __future__ import unicode_literals
 from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
 from cymem.cymem cimport Pool
 from thinc.typedefs cimport weight_t
 from collections import defaultdict
@ -6,7 +10,6 @@ from collections import defaultdict
 from ..structs cimport TokenC
 from .stateclass cimport StateClass
 from ..attrs cimport TAG, HEAD, DEP, ENT_TYPE, ENT_IOB
 from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
 cdef weight_t MIN_SCORE = -90000
--- a/spacy/syntax/util.py
+++ b/spacy/syntax/util.py
@ -1,18 +0,0 @@
 from os import path
 import json
 class Config(object):
    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            setattr(self, key, value)
    def get(self, attr, default=None):
        return self.__dict__.get(attr, default)
    @classmethod
    def write(cls, model_dir, name, **kwargs):
        open(path.join(model_dir, '%s.json' % name), 'w').write(json.dumps(kwargs))
    @classmethod
    def read(cls, model_dir, name):
        return cls(**json.load(open(path.join(model_dir, '%s.json' % name))))
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -1,15 +1,18 @@
 # coding: utf8
 from __future__ import unicode_literals
 cimport cython
 cimport numpy as np
 import numpy
 import numpy.linalg
 import struct
 from libc.string cimport memcpy, memset
 from libc.stdint cimport uint32_t
 from libc.math cimport sqrt
-import numpy
+from .span cimport Span
-import numpy.linalg
+from .token cimport Token
 import struct
 cimport numpy as np
 import six
 import warnings
 from ..lexeme cimport Lexeme
 from ..lexeme cimport EMPTY_LEXEME
 from ..typedefs cimport attr_t, flags_t
@ -19,11 +22,10 @@ from ..attrs cimport POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB, ENT_TYPE
 from ..parts_of_speech cimport CCONJ, PUNCT, NOUN
 from ..parts_of_speech cimport univ_pos_t
 from ..lexeme cimport Lexeme
 from .span cimport Span
 from .token cimport Token
 from ..serialize.bits cimport BitArray
 from ..util import normalize_slice
 from ..syntax.iterators import CHUNKERS
 from ..compat import is_config
 DEF PADDING = 5
@ -76,7 +78,7 @@ cdef class Doc:
    """
    def __init__(self, Vocab vocab, words=None, spaces=None, orths_and_spaces=None):
-        '''
+        """
        Create a Doc object.
        Aside: Implementation
@ -97,7 +99,7 @@ cdef class Doc:
                A list of boolean values, of the same length as words. True
                means that the word is followed by a space, False means it is not.
                If None, defaults to [True]*len(words)
-        '''
+        """
        self.vocab = vocab
        size = 20
        self.mem = Pool()
@ -158,7 +160,7 @@ cdef class Doc:
            self.is_parsed = True
    def __getitem__(self, object i):
-        '''
+        """
        doc[i]
            Get the Token object at position i, where i is an integer.
            Negative indexing is supported, and follows the usual Python
@ -172,7 +174,7 @@ cdef class Doc:
            are not supported, as `Span` objects must be contiguous (cannot have gaps).
            You can use negative indices and open-ended ranges, which have their
            normal Python semantics.
-        '''
+        """
        if isinstance(i, slice):
            start, stop = normalize_slice(len(self), i.start, i.stop, i.step)
            return Span(self, start, stop, label=0)
@ -186,7 +188,7 @@ cdef class Doc:
            return Token.cinit(self.vocab, &self.c[i], i, self)
    def __iter__(self):
-        '''
+        """
        for token in doc
            Iterate over `Token`  objects, from which the annotations can
            be easily accessed. This is the main way of accessing Token
@ -194,7 +196,7 @@ cdef class Doc:
            Python. If faster-than-Python speeds are required, you can
            instead access the annotations as a numpy array, or access the
            underlying C data directly from Cython.
-        '''
+        """
        cdef int i
        for i in range(self.length):
            if self._py_tokens[i] is not None:
@ -203,10 +205,10 @@ cdef class Doc:
                yield Token.cinit(self.vocab, &self.c[i], i, self)
    def __len__(self):
-        '''
+        """
        len(doc)
            The number of tokens in the document.
-        '''
+        """
        return self.length
    def __unicode__(self):
@ -216,7 +218,7 @@ cdef class Doc:
        return u''.join([t.text_with_ws for t in self]).encode('utf-8')
    def __str__(self):
-        if six.PY3:
+        if is_config(python3=True):
            return self.__unicode__()
        return self.__bytes__()
@ -228,7 +230,8 @@ cdef class Doc:
        return self
    def similarity(self, other):
-        '''Make a semantic similarity estimate. The default estimate is cosine
+        """
        Make a semantic similarity estimate. The default estimate is cosine
        similarity using an average of word vectors.
        Arguments:
@ -237,7 +240,7 @@ cdef class Doc:
        Return:
            score (float): A scalar similarity score. Higher is more similar.
-        '''
+        """
        if 'similarity' in self.user_hooks:
            return self.user_hooks['similarity'](self, other)
        if self.vector_norm == 0 or other.vector_norm == 0:
@ -245,9 +248,9 @@ cdef class Doc:
        return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
    property has_vector:
-        '''
+        """
        A boolean value indicating whether a word vector is associated with the object.
-        '''
+        """
        def __get__(self):
            if 'has_vector' in self.user_hooks:
                return self.user_hooks['has_vector'](self)
@ -255,11 +258,11 @@ cdef class Doc:
            return any(token.has_vector for token in self)
    property vector:
-        '''
+        """
        A real-valued meaning representation. Defaults to an average of the token vectors.
        Type: numpy.ndarray[ndim=1, dtype='float32']
-        '''
+        """
        def __get__(self):
            if 'vector' in self.user_hooks:
                return self.user_hooks['vector'](self)
@ -294,17 +297,21 @@ cdef class Doc:
        return self.text
    property text:
-        '''A unicode representation of the document text.'''
+        """
        A unicode representation of the document text.
        """
        def __get__(self):
            return u''.join(t.text_with_ws for t in self)
    property text_with_ws:
-        '''An alias of Doc.text, provided for duck-type compatibility with Span and Token.'''
+        """
        An alias of Doc.text, provided for duck-type compatibility with Span and Token.
        """
        def __get__(self):
            return self.text
    property ents:
-        '''
+        """
        Yields named-entity `Span` objects, if the entity recognizer
        has been applied to the document. Iterate over the span to get
        individual Token objects, or access the label:
@ -318,7 +325,7 @@ cdef class Doc:
            assert ents[0].label_ == 'PERSON'
            assert ents[0].orth_ == 'Best'
            assert ents[0].text == 'Mr. Best'
-        '''
+        """
        def __get__(self):
            cdef int i
            cdef const TokenC* token
@ -382,13 +389,13 @@ cdef class Doc:
                    self.c[start].ent_iob = 3
    property noun_chunks:
-        '''
+        """
        Yields base noun-phrase #[code Span] objects, if the document
        has been syntactically parsed. A base noun phrase, or
        'NP chunk', is a noun phrase that does not permit other NPs to
        be nested within it – so no NP-level coordination, no prepositional
-        phrases, and no relative clauses. For example:
+        phrases, and no relative clauses.
-        '''
+        """
        def __get__(self):
            if not self.is_parsed:
                raise ValueError(
@ -496,7 +503,8 @@ cdef class Doc:
        return output
    def count_by(self, attr_id_t attr_id, exclude=None, PreshCounter counts=None):
-        """Produce a dict of {attribute (int): count (ints)} frequencies, keyed
+        """
        Produce a dict of {attribute (int): count (ints)} frequencies, keyed
        by the values of the given attribute ID.
        Example:
@ -563,8 +571,9 @@ cdef class Doc:
            self.c[i] = parsed[i]
    def from_array(self, attrs, array):
-        '''Write to a `Doc` object, from an `(M, N)` array of attributes.
+        """
-        '''
+        Write to a `Doc` object, from an `(M, N)` array of attributes.
        """
        cdef int i, col
        cdef attr_id_t attr_id
        cdef TokenC* tokens = self.c
@ -603,19 +612,23 @@ cdef class Doc:
        return self
    def to_bytes(self):
-        '''Serialize, producing a byte string.'''
+        """
        Serialize, producing a byte string.
        """
        byte_string = self.vocab.serializer.pack(self)
        cdef uint32_t length = len(byte_string)
        return struct.pack('I', length) + byte_string
    def from_bytes(self, data):
-        '''Deserialize, loading from bytes.'''
+        """
        Deserialize, loading from bytes.
        """
        self.vocab.serializer.unpack_into(data[4:], self)
        return self
    @staticmethod
    def read_bytes(file_):
-        '''
+        """
        A static method, used to read serialized #[code Doc] objects from
        a file. For example:
@ -630,7 +643,7 @@ cdef class Doc:
                for byte_string in Doc.read_bytes(file_):
                    docs.append(Doc(nlp.vocab).from_bytes(byte_string))
            assert len(docs) == 2
-        '''
+        """
        keep_reading = True
        while keep_reading:
            try:
@ -644,7 +657,8 @@ cdef class Doc:
            yield n_bytes_str + data
    def merge(self, int start_idx, int end_idx, *args, **attributes):
-        """Retokenize the document, such that the span at doc.text[start_idx : end_idx]
+        """
        Retokenize the document, such that the span at doc.text[start_idx : end_idx]
        is merged into a single token. If start_idx and end_idx do not mark start
        and end token boundaries, the document remains unchanged.
@ -658,7 +672,6 @@ cdef class Doc:
            token (Token):
                The newly merged token, or None if the start and end indices did
                not fall at token boundaries.
        """
        cdef unicode tag, lemma, ent_type
        if len(args) == 3:
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@ -1,26 +1,31 @@
 # coding: utf8
 from __future__ import unicode_literals
 from collections import defaultdict
 cimport numpy as np
 import numpy
 import numpy.linalg
 cimport numpy as np
 from libc.math cimport sqrt
 import six
 from .doc cimport token_by_start, token_by_end
 from ..structs cimport TokenC, LexemeC
 from ..typedefs cimport flags_t, attr_t, hash_t
 from ..attrs cimport attr_id_t
 from ..parts_of_speech cimport univ_pos_t
 from ..util import normalize_slice
 from .doc cimport token_by_start, token_by_end
 from ..attrs cimport IS_PUNCT, IS_SPACE
 from ..lexeme cimport Lexeme
 from ..compat import is_config
 cdef class Span:
-    """A slice from a Doc object."""
+    """
    A slice from a Doc object.
    """
    def __cinit__(self, Doc doc, int start, int end, int label=0, vector=None,
                  vector_norm=None):
-        '''Create a Span object from the slice doc[start : end]
+        """
        Create a Span object from the slice doc[start : end]
        Arguments:
            doc (Doc): The parent document.
@ -30,7 +35,7 @@ cdef class Span:
            vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
        Returns:
            Span The newly constructed object.
-        '''
+        """
        if not (0 <= start <= end <= len(doc)):
            raise IndexError
@ -68,7 +73,7 @@ cdef class Span:
        return self.end - self.start
    def __repr__(self):
-        if six.PY3:
+        if is_config(python3=True):
            return self.text
        return self.text.encode('utf-8')
@ -89,7 +94,8 @@ cdef class Span:
            yield self.doc[i]
    def merge(self, *args, **attributes):
-        """Retokenize the document, such that the span is merged into a single token.
+        """
        Retokenize the document, such that the span is merged into a single token.
        Arguments:
            **attributes:
@ -102,7 +108,8 @@ cdef class Span:
        return self.doc.merge(self.start_char, self.end_char, *args, **attributes)
    def similarity(self, other):
-        '''Make a semantic similarity estimate. The default estimate is cosine
+        """
        Make a semantic similarity estimate. The default estimate is cosine
        similarity using an average of word vectors.
        Arguments:
@ -111,7 +118,7 @@ cdef class Span:
        Return:
            score (float): A scalar similarity score. Higher is more similar.
-        '''
+        """
        if 'similarity' in self.doc.user_span_hooks:
            self.doc.user_span_hooks['similarity'](self, other)
        if self.vector_norm == 0.0 or other.vector_norm == 0.0:
@ -133,11 +140,12 @@ cdef class Span:
            self.end = end + 1
    property sent:
-        '''The sentence span that this span is a part of.
+        """
        The sentence span that this span is a part of.
        Returns:
            Span The sentence this is part of.
-        '''
+        """
        def __get__(self):
            if 'sent' in self.doc.user_span_hooks:
                return self.doc.user_span_hooks['sent'](self)
@ -198,13 +206,13 @@ cdef class Span:
            return u''.join([t.text_with_ws for t in self])
    property noun_chunks:
-        '''
+        """
        Yields base noun-phrase #[code Span] objects, if the document
        has been syntactically parsed. A base noun phrase, or
        'NP chunk', is a noun phrase that does not permit other NPs to
        be nested within it – so no NP-level coordination, no prepositional
        phrases, and no relative clauses. For example:
-        '''
+        """
        def __get__(self):
            if not self.doc.is_parsed:
                raise ValueError(
@ -223,17 +231,16 @@ cdef class Span:
                yield span
    property root:
-        """The token within the span that's highest in the parse tree. If there's a tie, the earlist is prefered.
+        """
        The token within the span that's highest in the parse tree. If there's a
        tie, the earlist is prefered.
        Returns:
            Token: The root token.
-        i.e. has the
+        i.e. has the shortest path to the root of the sentence (or is the root
-        shortest path to the root of the sentence (or is the root itself).
+        itself). If multiple words are equally high in the tree, the first word
-
+        is taken. For example:
        If multiple words are equally high in the tree, the first word is taken.
        For example:
        >>> toks = nlp(u'I like New York in Autumn.')
@ -303,7 +310,8 @@ cdef class Span:
                return self.doc[root]
    property lefts:
-        """Tokens that are to the left of the span, whose head is within the Span.
+        """
        Tokens that are to the left of the span, whose head is within the Span.
        Yields: Token A left-child of a token of the span.
        """
@ -314,7 +322,8 @@ cdef class Span:
                        yield left
    property rights:
-        """Tokens that are to the right of the Span, whose head is within the Span.
+        """
        Tokens that are to the right of the Span, whose head is within the Span.
        Yields: Token A right-child of a token of the span.
        """
@ -325,7 +334,8 @@ cdef class Span:
                        yield right
    property subtree:
-        """Tokens that descend from tokens in the span, but fall outside it.
+        """
        Tokens that descend from tokens in the span, but fall outside it.
        Yields: Token A descendant of a token within the span.
        """
@ -337,7 +347,9 @@ cdef class Span:
                yield from word.subtree
    property ent_id:
-        '''An (integer) entity ID. Usually assigned by patterns in the Matcher.'''
+        """
        An (integer) entity ID. Usually assigned by patterns in the Matcher.
        """
        def __get__(self):
            return self.root.ent_id
@ -345,9 +357,11 @@ cdef class Span:
            # TODO
            raise NotImplementedError(
                "Can't yet set ent_id from Span. Vote for this feature on the issue "
-                "tracker: http://github.com/spacy-io/spaCy")
+                "tracker: http://github.com/explosion/spaCy/issues")
    property ent_id_:
-        '''A (string) entity ID. Usually assigned by patterns in the Matcher.'''
+        """
        A (string) entity ID. Usually assigned by patterns in the Matcher.
        """
        def __get__(self):
            return self.root.ent_id_
@ -355,7 +369,7 @@ cdef class Span:
            # TODO
            raise NotImplementedError(
                "Can't yet set ent_id_ from Span. Vote for this feature on the issue "
-                "tracker: http://github.com/spacy-io/spaCy")
+                "tracker: http://github.com/explosion/spaCy/issues")
    property orth_:
        def __get__(self):
@ -397,5 +411,5 @@ cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1:
            raise RuntimeError(
                "Array bounds exceeded while searching for root word. This likely "
                "means the parse tree is in an invalid state. Please report this "
-                "issue here: http://github.com/honnibal/spaCy/")
+                "issue here: http://github.com/explosion/spaCy/issues")
    return n
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@ -1,5 +1,5 @@
 # coding: utf8
 # cython: infer_types=True
 # coding: utf8
 from __future__ import unicode_literals
 from libc.string cimport memcpy
@ -8,20 +8,15 @@ from cpython.mem cimport PyMem_Malloc, PyMem_Free
 from cython.view cimport array as cvarray
 cimport numpy as np
 np.import_array()
 import numpy
 import six
 from ..typedefs cimport hash_t
 from ..lexeme cimport Lexeme
 from .. import parts_of_speech
 from ..attrs cimport LEMMA
 from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
 from ..attrs cimport POS, LEMMA, TAG, DEP
 from ..parts_of_speech cimport CCONJ, PUNCT
 from ..attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE
 from ..attrs cimport IS_BRACKET
 from ..attrs cimport IS_QUOTE
@ -29,12 +24,13 @@ from ..attrs cimport IS_LEFT_PUNCT
 from ..attrs cimport IS_RIGHT_PUNCT
 from ..attrs cimport IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL, IS_STOP
 from ..attrs cimport IS_OOV
 from ..lexeme cimport Lexeme
 from ..compat import is_config
 cdef class Token:
-    """An individual token --- i.e. a word, punctuation symbol, whitespace, etc.
+    """
    An individual token --- i.e. a word, punctuation symbol, whitespace, etc.
    """
    def __cinit__(self, Vocab vocab, Doc doc, int offset):
        self.vocab = vocab
@ -46,7 +42,9 @@ cdef class Token:
        return hash((self.doc, self.i))
    def __len__(self):
-        '''Number of unicode characters in token.text'''
+        """
        Number of unicode characters in token.text.
        """
        return self.c.lex.length
    def __unicode__(self):
@ -56,7 +54,7 @@ cdef class Token:
        return self.text.encode('utf8')
    def __str__(self):
-        if six.PY3:
+        if is_config(python3=True):
            return self.__unicode__()
        return self.__bytes__()
@ -83,27 +81,30 @@ cdef class Token:
            raise ValueError(op)
    cpdef bint check_flag(self, attr_id_t flag_id) except -1:
-        '''Check the value of a boolean flag.
+        """
        Check the value of a boolean flag.
        Arguments:
            flag_id (int): The ID of the flag attribute.
        Returns:
            is_set (bool): Whether the flag is set.
-        '''
+        """
        return Lexeme.c_check_flag(self.c.lex, flag_id)
    def nbor(self, int i=1):
-        '''Get a neighboring token.
+        """
        Get a neighboring token.
        Arguments:
            i (int): The relative position of the token to get. Defaults to 1.
        Returns:
            neighbor (Token): The token at position self.doc[self.i+i]
-        '''
+        """
        return self.doc[self.i+i]
    def similarity(self, other):
-        '''Compute a semantic similarity estimate. Defaults to cosine over vectors.
+        """
        Compute a semantic similarity estimate. Defaults to cosine over vectors.
        Arguments:
            other:
@ -111,7 +112,7 @@ cdef class Token:
                Token and Lexeme objects.
        Returns:
            score (float): A scalar similarity score. Higher is more similar.
-        '''
+        """
        if 'similarity' in self.doc.user_token_hooks:
                return self.doc.user_token_hooks['similarity'](self)
        if self.vector_norm == 0 or other.vector_norm == 0:
@ -209,9 +210,9 @@ cdef class Token:
            self.c.dep = label
    property has_vector:
-        '''
+        """
        A boolean value indicating whether a word vector is associated with the object.
-        '''
+        """
        def __get__(self):
            if 'has_vector' in self.doc.user_token_hooks:
                return self.doc.user_token_hooks['has_vector'](self)
@ -223,11 +224,11 @@ cdef class Token:
                return False
    property vector:
-        '''
+        """
        A real-valued meaning representation.
        Type: numpy.ndarray[ndim=1, dtype='float32']
-        '''
+        """
        def __get__(self):
            if 'vector' in self.doc.user_token_hooks:
                return self.doc.user_token_hooks['vector'](self)
@ -245,6 +246,7 @@ cdef class Token:
    property repvec:
        def __get__(self):
            raise AttributeError("repvec was renamed to vector in v0.100")
    property has_repvec:
        def __get__(self):
            raise AttributeError("has_repvec was renamed to has_vector in v0.100")
@ -265,7 +267,8 @@ cdef class Token:
    property lefts:
        def __get__(self):
-            """The leftward immediate children of the word, in the syntactic
+            """
            The leftward immediate children of the word, in the syntactic
            dependency parse.
            """
            cdef int nr_iter = 0
@ -282,8 +285,10 @@ cdef class Token:
    property rights:
        def __get__(self):
-            """The rightward immediate children of the word, in the syntactic
+            """
-            dependency parse."""
+            The rightward immediate children of the word, in the syntactic
            dependency parse.
            """
            cdef const TokenC* ptr = self.c + (self.c.r_edge - self.i)
            tokens = []
            cdef int nr_iter = 0
@ -300,19 +305,21 @@ cdef class Token:
                yield t
    property children:
-        '''A sequence of the token's immediate syntactic children.
+        """
        A sequence of the token's immediate syntactic children.
        Yields: Token A child token such that child.head==self
-        '''
+        """
        def __get__(self):
            yield from self.lefts
            yield from self.rights
    property subtree:
-        '''A sequence of all the token's syntactic descendents.
+        """
        A sequence of all the token's syntactic descendents.
        Yields: Token A descendent token such that self.is_ancestor(descendent)
-        '''
+        """
        def __get__(self):
            for word in self.lefts:
                yield from word.subtree
@ -321,26 +328,29 @@ cdef class Token:
                yield from word.subtree
    property left_edge:
-        '''The leftmost token of this token's syntactic descendents.
+        """
        The leftmost token of this token's syntactic descendents.
        Returns: Token The first token such that self.is_ancestor(token)
-        '''
+        """
        def __get__(self):
            return self.doc[self.c.l_edge]
    property right_edge:
-        '''The rightmost token of this token's syntactic descendents.
+        """
        The rightmost token of this token's syntactic descendents.
        Returns: Token The last token such that self.is_ancestor(token)
-        '''
+        """
        def __get__(self):
            return self.doc[self.c.r_edge]
    property ancestors:
-        '''A sequence of this token's syntactic ancestors.
+        """
        A sequence of this token's syntactic ancestors.
        Yields: Token A sequence of ancestor tokens such that ancestor.is_ancestor(self)
-        '''
+        """
        def __get__(self):
            cdef const TokenC* head_ptr = self.c
            # guard against infinite loop, no token can have
@ -356,25 +366,29 @@ cdef class Token:
        return self.is_ancestor(descendant)
    def is_ancestor(self, descendant):
-        '''Check whether this token is a parent, grandparent, etc. of another
+        """
        Check whether this token is a parent, grandparent, etc. of another
        in the dependency tree.
        Arguments:
            descendant (Token): Another token.
        Returns:
            is_ancestor (bool): Whether this token is the ancestor of the descendant.
-        '''
+        """
        if self.doc is not descendant.doc:
            return False
        return any( ancestor.i == self.i for ancestor in descendant.ancestors )
    property head:
-        '''The syntactic parent, or "governor", of this token.
+        """
        The syntactic parent, or "governor", of this token.
        Returns: Token
-        '''
+        """
        def __get__(self):
-            """The token predicted by the parser to be the head of the current token."""
+            """
            The token predicted by the parser to be the head of the current token.
            """
            return self.doc[self.i + self.c.head]
        def __set__(self, Token new_head):
            # this function sets the head of self to new_head
@ -467,10 +481,11 @@ cdef class Token:
            self.c.head = rel_newhead_i
    property conjuncts:
-        '''A sequence of coordinated tokens, including the token itself.
+        """
        A sequence of coordinated tokens, including the token itself.
        Yields: Token A coordinated token
-        '''
+        """
        def __get__(self):
            """Get a list of conjoined words."""
            cdef Token word
@ -501,7 +516,9 @@ cdef class Token:
            return iob_strings[self.c.ent_iob]
    property ent_id:
-        '''An (integer) entity ID. Usually assigned by patterns in the Matcher.'''
+        """
        An (integer) entity ID. Usually assigned by patterns in the Matcher.
        """
        def __get__(self):
            return self.c.ent_id
@ -509,7 +526,9 @@ cdef class Token:
            self.c.ent_id = key
    property ent_id_:
-        '''A (string) entity ID. Usually assigned by patterns in the Matcher.'''
+        """
        A (string) entity ID. Usually assigned by patterns in the Matcher.
        """
        def __get__(self):
            return self.vocab.strings[self.c.ent_id]