spaCy/spacy/spacy.cpp

3558 lines
138 KiB
C++
Raw Normal View History

/* Generated by Cython 0.20.1 on Mon Jul 7 04:19:15 2014 */
#define PY_SSIZE_T_CLEAN
#ifndef CYTHON_USE_PYLONG_INTERNALS
#ifdef PYLONG_BITS_IN_DIGIT
#define CYTHON_USE_PYLONG_INTERNALS 0
#else
#include "pyconfig.h"
#ifdef PYLONG_BITS_IN_DIGIT
#define CYTHON_USE_PYLONG_INTERNALS 1
#else
#define CYTHON_USE_PYLONG_INTERNALS 0
#endif
#endif
#endif
#include "Python.h"
#ifndef Py_PYTHON_H
#error Python headers needed to compile C extensions, please install development version of Python.
#elif PY_VERSION_HEX < 0x02040000
#error Cython requires Python 2.4+.
#else
#define CYTHON_ABI "0_20_1"
#include <stddef.h> /* For offsetof */
#ifndef offsetof
#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
#endif
#if !defined(WIN32) && !defined(MS_WINDOWS)
#ifndef __stdcall
#define __stdcall
#endif
#ifndef __cdecl
#define __cdecl
#endif
#ifndef __fastcall
#define __fastcall
#endif
#endif
#ifndef DL_IMPORT
#define DL_IMPORT(t) t
#endif
#ifndef DL_EXPORT
#define DL_EXPORT(t) t
#endif
#ifndef PY_LONG_LONG
#define PY_LONG_LONG LONG_LONG
#endif
#ifndef Py_HUGE_VAL
#define Py_HUGE_VAL HUGE_VAL
#endif
#ifdef PYPY_VERSION
#define CYTHON_COMPILING_IN_PYPY 1
#define CYTHON_COMPILING_IN_CPYTHON 0
#else
#define CYTHON_COMPILING_IN_PYPY 0
#define CYTHON_COMPILING_IN_CPYTHON 1
#endif
#if CYTHON_COMPILING_IN_PYPY
#define Py_OptimizeFlag 0
#endif
#if PY_VERSION_HEX < 0x02050000
typedef int Py_ssize_t;
#define PY_SSIZE_T_MAX INT_MAX
#define PY_SSIZE_T_MIN INT_MIN
#define PY_FORMAT_SIZE_T ""
#define CYTHON_FORMAT_SSIZE_T ""
#define PyInt_FromSsize_t(z) PyInt_FromLong(z)
#define PyInt_AsSsize_t(o) __Pyx_PyInt_As_int(o)
#define PyNumber_Index(o) ((PyNumber_Check(o) && !PyFloat_Check(o)) ? PyNumber_Int(o) : \
(PyErr_Format(PyExc_TypeError, \
"expected index value, got %.200s", Py_TYPE(o)->tp_name), \
(PyObject*)0))
#define __Pyx_PyIndex_Check(o) (PyNumber_Check(o) && !PyFloat_Check(o) && \
!PyComplex_Check(o))
#define PyIndex_Check __Pyx_PyIndex_Check
#define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)
#define __PYX_BUILD_PY_SSIZE_T "i"
#else
#define __PYX_BUILD_PY_SSIZE_T "n"
#define CYTHON_FORMAT_SSIZE_T "z"
#define __Pyx_PyIndex_Check PyIndex_Check
#endif
#if PY_VERSION_HEX < 0x02060000
#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)
#define PyVarObject_HEAD_INIT(type, size) \
PyObject_HEAD_INIT(type) size,
#define PyType_Modified(t)
typedef struct {
void *buf;
PyObject *obj;
Py_ssize_t len;
Py_ssize_t itemsize;
int readonly;
int ndim;
char *format;
Py_ssize_t *shape;
Py_ssize_t *strides;
Py_ssize_t *suboffsets;
void *internal;
} Py_buffer;
#define PyBUF_SIMPLE 0
#define PyBUF_WRITABLE 0x0001
#define PyBUF_FORMAT 0x0004
#define PyBUF_ND 0x0008
#define PyBUF_STRIDES (0x0010 | PyBUF_ND)
#define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)
#define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)
#define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)
#define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)
#define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_FORMAT | PyBUF_WRITABLE)
#define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_FORMAT | PyBUF_WRITABLE)
typedef int (*getbufferproc)(PyObject *, Py_buffer *, int);
typedef void (*releasebufferproc)(PyObject *, Py_buffer *);
#endif
#if PY_MAJOR_VERSION < 3
#define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
#define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
#define __Pyx_DefaultClassType PyClass_Type
#else
#define __Pyx_BUILTIN_MODULE_NAME "builtins"
#define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
#define __Pyx_DefaultClassType PyType_Type
#endif
#if PY_VERSION_HEX < 0x02060000
#define PyUnicode_FromString(s) PyUnicode_Decode(s, strlen(s), "UTF-8", "strict")
#endif
#if PY_MAJOR_VERSION >= 3
#define Py_TPFLAGS_CHECKTYPES 0
#define Py_TPFLAGS_HAVE_INDEX 0
#endif
#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3)
#define Py_TPFLAGS_HAVE_NEWBUFFER 0
#endif
#if PY_VERSION_HEX < 0x02060000
#define Py_TPFLAGS_HAVE_VERSION_TAG 0
#endif
#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TPFLAGS_IS_ABSTRACT)
#define Py_TPFLAGS_IS_ABSTRACT 0
#endif
#if PY_VERSION_HEX < 0x030400a1 && !defined(Py_TPFLAGS_HAVE_FINALIZE)
#define Py_TPFLAGS_HAVE_FINALIZE 0
#endif
#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
#define CYTHON_PEP393_ENABLED 1
#define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ? \
0 : _PyUnicode_Ready((PyObject *)(op)))
#define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
#define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
#define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u)
#define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u)
#define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i)
#else
#define CYTHON_PEP393_ENABLED 0
#define __Pyx_PyUnicode_READY(op) (0)
#define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
#define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
#define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE))
#define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u))
#define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
#endif
#if CYTHON_COMPILING_IN_PYPY
#define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b)
#define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b)
#else
#define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b)
#define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ? \
PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
#endif
#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b)
#else
#define __Pyx_PyString_Format(a, b) PyString_Format(a, b)
#endif
#if PY_MAJOR_VERSION >= 3
#define PyBaseString_Type PyUnicode_Type
#define PyStringObject PyUnicodeObject
#define PyString_Type PyUnicode_Type
#define PyString_Check PyUnicode_Check
#define PyString_CheckExact PyUnicode_CheckExact
#endif
#if PY_VERSION_HEX < 0x02060000
#define PyBytesObject PyStringObject
#define PyBytes_Type PyString_Type
#define PyBytes_Check PyString_Check
#define PyBytes_CheckExact PyString_CheckExact
#define PyBytes_FromString PyString_FromString
#define PyBytes_FromStringAndSize PyString_FromStringAndSize
#define PyBytes_FromFormat PyString_FromFormat
#define PyBytes_DecodeEscape PyString_DecodeEscape
#define PyBytes_AsString PyString_AsString
#define PyBytes_AsStringAndSize PyString_AsStringAndSize
#define PyBytes_Size PyString_Size
#define PyBytes_AS_STRING PyString_AS_STRING
#define PyBytes_GET_SIZE PyString_GET_SIZE
#define PyBytes_Repr PyString_Repr
#define PyBytes_Concat PyString_Concat
#define PyBytes_ConcatAndDel PyString_ConcatAndDel
#endif
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
#define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
#else
#define __Pyx_PyBaseString_Check(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj) || \
PyString_Check(obj) || PyUnicode_Check(obj))
#define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
#endif
#if PY_VERSION_HEX < 0x02060000
#define PySet_Check(obj) PyObject_TypeCheck(obj, &PySet_Type)
#define PyFrozenSet_Check(obj) PyObject_TypeCheck(obj, &PyFrozenSet_Type)
#endif
#ifndef PySet_CheckExact
#define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type)
#endif
#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
#if PY_MAJOR_VERSION >= 3
#define PyIntObject PyLongObject
#define PyInt_Type PyLong_Type
#define PyInt_Check(op) PyLong_Check(op)
#define PyInt_CheckExact(op) PyLong_CheckExact(op)
#define PyInt_FromString PyLong_FromString
#define PyInt_FromUnicode PyLong_FromUnicode
#define PyInt_FromLong PyLong_FromLong
#define PyInt_FromSize_t PyLong_FromSize_t
#define PyInt_FromSsize_t PyLong_FromSsize_t
#define PyInt_AsLong PyLong_AsLong
#define PyInt_AS_LONG PyLong_AS_LONG
#define PyInt_AsSsize_t PyLong_AsSsize_t
#define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask
#define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
#define PyNumber_Int PyNumber_Long
#endif
#if PY_MAJOR_VERSION >= 3
#define PyBoolObject PyLongObject
#endif
#if PY_VERSION_HEX < 0x030200A4
typedef long Py_hash_t;
#define __Pyx_PyInt_FromHash_t PyInt_FromLong
#define __Pyx_PyInt_AsHash_t PyInt_AsLong
#else
#define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
#define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t
#endif
#if (PY_MAJOR_VERSION < 3) || (PY_VERSION_HEX >= 0x03010300)
#define __Pyx_PySequence_GetSlice(obj, a, b) PySequence_GetSlice(obj, a, b)
#define __Pyx_PySequence_SetSlice(obj, a, b, value) PySequence_SetSlice(obj, a, b, value)
#define __Pyx_PySequence_DelSlice(obj, a, b) PySequence_DelSlice(obj, a, b)
#else
#define __Pyx_PySequence_GetSlice(obj, a, b) (unlikely(!(obj)) ? \
(PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), (PyObject*)0) : \
(likely((obj)->ob_type->tp_as_mapping) ? (PySequence_GetSlice(obj, a, b)) : \
(PyErr_Format(PyExc_TypeError, "'%.200s' object is unsliceable", (obj)->ob_type->tp_name), (PyObject*)0)))
#define __Pyx_PySequence_SetSlice(obj, a, b, value) (unlikely(!(obj)) ? \
(PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), -1) : \
(likely((obj)->ob_type->tp_as_mapping) ? (PySequence_SetSlice(obj, a, b, value)) : \
(PyErr_Format(PyExc_TypeError, "'%.200s' object doesn't support slice assignment", (obj)->ob_type->tp_name), -1)))
#define __Pyx_PySequence_DelSlice(obj, a, b) (unlikely(!(obj)) ? \
(PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), -1) : \
(likely((obj)->ob_type->tp_as_mapping) ? (PySequence_DelSlice(obj, a, b)) : \
(PyErr_Format(PyExc_TypeError, "'%.200s' object doesn't support slice deletion", (obj)->ob_type->tp_name), -1)))
#endif
#if PY_MAJOR_VERSION >= 3
#define PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func))
#endif
#if PY_VERSION_HEX < 0x02050000
#define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),((char *)(n)))
#define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),((char *)(n)),(a))
#define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),((char *)(n)))
#else
#define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),(n))
#define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),(n),(a))
#define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),(n))
#endif
#if PY_VERSION_HEX < 0x02050000
#define __Pyx_NAMESTR(n) ((char *)(n))
#define __Pyx_DOCSTR(n) ((char *)(n))
#else
#define __Pyx_NAMESTR(n) (n)
#define __Pyx_DOCSTR(n) (n)
#endif
#ifndef CYTHON_INLINE
#if defined(__GNUC__)
#define CYTHON_INLINE __inline__
#elif defined(_MSC_VER)
#define CYTHON_INLINE __inline
#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define CYTHON_INLINE inline
#else
#define CYTHON_INLINE
#endif
#endif
#ifndef CYTHON_RESTRICT
#if defined(__GNUC__)
#define CYTHON_RESTRICT __restrict__
#elif defined(_MSC_VER) && _MSC_VER >= 1400
#define CYTHON_RESTRICT __restrict
#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define CYTHON_RESTRICT restrict
#else
#define CYTHON_RESTRICT
#endif
#endif
#ifdef NAN
#define __PYX_NAN() ((float) NAN)
#else
static CYTHON_INLINE float __PYX_NAN() {
/* Initialize NaN. The sign is irrelevant, an exponent with all bits 1 and
a nonzero mantissa means NaN. If the first bit in the mantissa is 1, it is
a quiet NaN. */
float value;
memset(&value, 0xFF, sizeof(value));
return value;
}
#endif
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y)
#define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y)
#else
#define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y)
#define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y)
#endif
#ifndef __PYX_EXTERN_C
#ifdef __cplusplus
#define __PYX_EXTERN_C extern "C"
#else
#define __PYX_EXTERN_C extern
#endif
#endif
#if defined(WIN32) || defined(MS_WINDOWS)
#define _USE_MATH_DEFINES
#endif
#include <math.h>
#define __PYX_HAVE__spacy__spacy
#define __PYX_HAVE_API__spacy__spacy
#include <vector>
#include "ios"
#include "new"
#include "stdexcept"
#include "typeinfo"
#include "stdint.h"
#include <utility>
#include "sparsehash/dense_hash_map"
#include "../include/MurmurHash3.h"
#include "../include/MurmurHash2.h"
#ifdef _OPENMP
#include <omp.h>
#endif /* _OPENMP */
#ifdef PYREX_WITHOUT_ASSERTIONS
#define CYTHON_WITHOUT_ASSERTIONS
#endif
#ifndef CYTHON_UNUSED
# if defined(__GNUC__)
# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
# define CYTHON_UNUSED __attribute__ ((__unused__))
# else
# define CYTHON_UNUSED
# endif
# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
# define CYTHON_UNUSED __attribute__ ((__unused__))
# else
# define CYTHON_UNUSED
# endif
#endif
typedef struct {PyObject **p; char *s; const Py_ssize_t n; const char* encoding;
const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; /*proto*/
#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0
#define __PYX_DEFAULT_STRING_ENCODING ""
#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
#define __Pyx_fits_Py_ssize_t(v, type, is_signed) ( \
(sizeof(type) < sizeof(Py_ssize_t)) || \
(sizeof(type) > sizeof(Py_ssize_t) && \
likely(v < (type)PY_SSIZE_T_MAX || \
v == (type)PY_SSIZE_T_MAX) && \
(!is_signed || likely(v > (type)PY_SSIZE_T_MIN || \
v == (type)PY_SSIZE_T_MIN))) || \
(sizeof(type) == sizeof(Py_ssize_t) && \
(is_signed || likely(v < (type)PY_SSIZE_T_MAX || \
v == (type)PY_SSIZE_T_MAX))) )
static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*);
static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
#define __Pyx_PyBytes_FromString PyBytes_FromString
#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*);
#if PY_MAJOR_VERSION < 3
#define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString
#define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
#else
#define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString
#define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
#endif
#define __Pyx_PyObject_AsSString(s) ((signed char*) __Pyx_PyObject_AsString(s))
#define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s))
#define __Pyx_PyObject_FromUString(s) __Pyx_PyObject_FromString((char*)s)
#define __Pyx_PyBytes_FromUString(s) __Pyx_PyBytes_FromString((char*)s)
#define __Pyx_PyByteArray_FromUString(s) __Pyx_PyByteArray_FromString((char*)s)
#define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((char*)s)
#define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s)
#if PY_MAJOR_VERSION < 3
static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u)
{
const Py_UNICODE *u_end = u;
while (*u_end++) ;
return u_end - u - 1;
}
#else
#define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen
#endif
#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode
#define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None)
#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x);
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
#if CYTHON_COMPILING_IN_CPYTHON
#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
#else
#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
#endif
#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
static int __Pyx_sys_getdefaultencoding_not_ascii;
static int __Pyx_init_sys_getdefaultencoding_params(void) {
PyObject* sys = NULL;
PyObject* default_encoding = NULL;
PyObject* ascii_chars_u = NULL;
PyObject* ascii_chars_b = NULL;
sys = PyImport_ImportModule("sys");
if (sys == NULL) goto bad;
default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
if (default_encoding == NULL) goto bad;
if (strcmp(PyBytes_AsString(default_encoding), "ascii") == 0) {
__Pyx_sys_getdefaultencoding_not_ascii = 0;
} else {
const char* default_encoding_c = PyBytes_AS_STRING(default_encoding);
char ascii_chars[128];
int c;
for (c = 0; c < 128; c++) {
ascii_chars[c] = c;
}
__Pyx_sys_getdefaultencoding_not_ascii = 1;
ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
if (ascii_chars_u == NULL) goto bad;
ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
if (ascii_chars_b == NULL || strncmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
PyErr_Format(
PyExc_ValueError,
"This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
default_encoding_c);
goto bad;
}
}
Py_XDECREF(sys);
Py_XDECREF(default_encoding);
Py_XDECREF(ascii_chars_u);
Py_XDECREF(ascii_chars_b);
return 0;
bad:
Py_XDECREF(sys);
Py_XDECREF(default_encoding);
Py_XDECREF(ascii_chars_u);
Py_XDECREF(ascii_chars_b);
return -1;
}
#endif
#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
#else
#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
static char* __PYX_DEFAULT_STRING_ENCODING;
static int __Pyx_init_sys_getdefaultencoding_params(void) {
PyObject* sys = NULL;
PyObject* default_encoding = NULL;
char* default_encoding_c;
sys = PyImport_ImportModule("sys");
if (sys == NULL) goto bad;
default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
if (default_encoding == NULL) goto bad;
default_encoding_c = PyBytes_AS_STRING(default_encoding);
__PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c));
strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
Py_DECREF(sys);
Py_DECREF(default_encoding);
return 0;
bad:
Py_XDECREF(sys);
Py_XDECREF(default_encoding);
return -1;
}
#endif
#endif
#ifdef __GNUC__
/* Test for GCC > 2.95 */
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#else /* __GNUC__ > 2 ... */
#define likely(x) (x)
#define unlikely(x) (x)
#endif /* __GNUC__ > 2 ... */
#else /* __GNUC__ */
#define likely(x) (x)
#define unlikely(x) (x)
#endif /* __GNUC__ */
static PyObject *__pyx_m;
static PyObject *__pyx_d;
static PyObject *__pyx_b;
static PyObject *__pyx_empty_tuple;
static PyObject *__pyx_empty_bytes;
static int __pyx_lineno;
static int __pyx_clineno = 0;
static const char * __pyx_cfilenm= __FILE__;
static const char *__pyx_filename;
static const char *__pyx_f[] = {
"spacy.pyx",
"stringsource",
};
/* "spacy/lexeme.pxd":4
*
* # Put these above import to avoid circular import problem
* ctypedef int ClusterID # <<<<<<<<<<<<<<
* ctypedef uint64_t StringHash
* ctypedef size_t Lexeme_addr
*/
typedef int __pyx_t_5spacy_6lexeme_ClusterID;
/* "spacy/lexeme.pxd":5
* # Put these above import to avoid circular import problem
* ctypedef int ClusterID
* ctypedef uint64_t StringHash # <<<<<<<<<<<<<<
* ctypedef size_t Lexeme_addr
*
*/
typedef uint64_t __pyx_t_5spacy_6lexeme_StringHash;
/* "spacy/lexeme.pxd":6
* ctypedef int ClusterID
* ctypedef uint64_t StringHash
* ctypedef size_t Lexeme_addr # <<<<<<<<<<<<<<
*
* from spacy.spacy cimport Vocab
*/
typedef size_t __pyx_t_5spacy_6lexeme_Lexeme_addr;
/* "spacy/spacy.pxd":7
*
* # Circular import problems here
* ctypedef size_t Lexeme_addr # <<<<<<<<<<<<<<
* ctypedef uint64_t StringHash
* ctypedef dense_hash_map[StringHash, Lexeme_addr] Vocab
*/
typedef size_t __pyx_t_5spacy_5spacy_Lexeme_addr;
/* "spacy/spacy.pxd":8
* # Circular import problems here
* ctypedef size_t Lexeme_addr
* ctypedef uint64_t StringHash # <<<<<<<<<<<<<<
* ctypedef dense_hash_map[StringHash, Lexeme_addr] Vocab
* ctypedef int (*Splitter)(unicode word, size_t length)
*/
typedef uint64_t __pyx_t_5spacy_5spacy_StringHash;
/*--- Type declarations ---*/
struct __pyx_t_5spacy_6lexeme_Lexeme;
/* "spacy/lexeme.pxd":36
* # over the Lexeme, via:
* # for field in range(LexAttr.n): get_attr(Lexeme*, field)
* cdef enum HashFields: # <<<<<<<<<<<<<<
* sic
* lex
*/
enum __pyx_t_5spacy_6lexeme_HashFields {
__pyx_e_5spacy_6lexeme_sic,
__pyx_e_5spacy_6lexeme_lex,
__pyx_e_5spacy_6lexeme_normed,
__pyx_e_5spacy_6lexeme_cluster,
__pyx_e_5spacy_6lexeme_n
};
/* "spacy/lexeme.pxd":11
* from spacy.spacy cimport Splitter
*
* cdef struct Lexeme: # <<<<<<<<<<<<<<
* StringHash sic # Hash of the original string
* StringHash lex # Hash of the word, with punctuation and clitics split off
*/
struct __pyx_t_5spacy_6lexeme_Lexeme {
__pyx_t_5spacy_6lexeme_StringHash sic;
__pyx_t_5spacy_6lexeme_StringHash lex;
__pyx_t_5spacy_6lexeme_StringHash normed;
__pyx_t_5spacy_6lexeme_StringHash last3;
Py_UNICODE first;
double prob;
__pyx_t_5spacy_6lexeme_ClusterID cluster;
int oft_upper;
int oft_title;
struct __pyx_t_5spacy_6lexeme_Lexeme *tail;
};
/* "spacy/spacy.pxd":9
* ctypedef size_t Lexeme_addr
* ctypedef uint64_t StringHash
* ctypedef dense_hash_map[StringHash, Lexeme_addr] Vocab # <<<<<<<<<<<<<<
* ctypedef int (*Splitter)(unicode word, size_t length)
*
*/
typedef google::dense_hash_map<__pyx_t_5spacy_5spacy_StringHash,__pyx_t_5spacy_5spacy_Lexeme_addr> __pyx_t_5spacy_5spacy_Vocab;
/* "spacy/spacy.pxd":10
* ctypedef uint64_t StringHash
* ctypedef dense_hash_map[StringHash, Lexeme_addr] Vocab
* ctypedef int (*Splitter)(unicode word, size_t length) # <<<<<<<<<<<<<<
*
*
*/
typedef int (*__pyx_t_5spacy_5spacy_Splitter)(PyObject *, size_t);
#ifndef CYTHON_REFNANNY
#define CYTHON_REFNANNY 0
#endif
#if CYTHON_REFNANNY
typedef struct {
void (*INCREF)(void*, PyObject*, int);
void (*DECREF)(void*, PyObject*, int);
void (*GOTREF)(void*, PyObject*, int);
void (*GIVEREF)(void*, PyObject*, int);
void* (*SetupContext)(const char*, int, const char*);
void (*FinishContext)(void**);
} __Pyx_RefNannyAPIStruct;
static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); /*proto*/
#define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
#ifdef WITH_THREAD
#define __Pyx_RefNannySetupContext(name, acquire_gil) \
if (acquire_gil) { \
PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure(); \
__pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \
PyGILState_Release(__pyx_gilstate_save); \
} else { \
__pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \
}
#else
#define __Pyx_RefNannySetupContext(name, acquire_gil) \
__pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
#endif
#define __Pyx_RefNannyFinishContext() \
__Pyx_RefNanny->FinishContext(&__pyx_refnanny)
#define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
#define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
#define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
#define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
#define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
#define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
#define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
#define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
#else
#define __Pyx_RefNannyDeclarations
#define __Pyx_RefNannySetupContext(name, acquire_gil)
#define __Pyx_RefNannyFinishContext()
#define __Pyx_INCREF(r) Py_INCREF(r)
#define __Pyx_DECREF(r) Py_DECREF(r)
#define __Pyx_GOTREF(r)
#define __Pyx_GIVEREF(r)
#define __Pyx_XINCREF(r) Py_XINCREF(r)
#define __Pyx_XDECREF(r) Py_XDECREF(r)
#define __Pyx_XGOTREF(r)
#define __Pyx_XGIVEREF(r)
#endif /* CYTHON_REFNANNY */
#define __Pyx_XDECREF_SET(r, v) do { \
PyObject *tmp = (PyObject *) r; \
r = v; __Pyx_XDECREF(tmp); \
} while (0)
#define __Pyx_DECREF_SET(r, v) do { \
PyObject *tmp = (PyObject *) r; \
r = v; __Pyx_DECREF(tmp); \
} while (0)
#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) {
PyTypeObject* tp = Py_TYPE(obj);
if (likely(tp->tp_getattro))
return tp->tp_getattro(obj, attr_name);
#if PY_MAJOR_VERSION < 3
if (likely(tp->tp_getattr))
return tp->tp_getattr(obj, PyString_AS_STRING(attr_name));
#endif
return PyObject_GetAttr(obj, attr_name);
}
#else
#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n)
#endif
static PyObject *__Pyx_GetBuiltinName(PyObject *name); /*proto*/
static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected);
static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index);
static CYTHON_INLINE int __Pyx_IterFinish(void); /*proto*/
static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected); /*proto*/
#include <string.h>
static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/
static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/
#if PY_MAJOR_VERSION >= 3
static PyObject *__Pyx_PyDict_GetItem(PyObject *d, PyObject* key) {
PyObject *value;
value = PyDict_GetItemWithError(d, key);
if (unlikely(!value)) {
if (!PyErr_Occurred()) {
PyObject* args = PyTuple_Pack(1, key);
if (likely(args))
PyErr_SetObject(PyExc_KeyError, args);
Py_XDECREF(args);
}
return NULL;
}
Py_INCREF(value);
return value;
}
#else
#define __Pyx_PyDict_GetItem(d, key) PyObject_GetItem(d, key)
#endif
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) {
PyListObject* L = (PyListObject*) list;
Py_ssize_t len = Py_SIZE(list);
if (likely(L->allocated > len)) {
Py_INCREF(x);
PyList_SET_ITEM(list, len, x);
Py_SIZE(list) = len+1;
return 0;
}
return PyList_Append(list, x);
}
#else
#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x)
#endif
static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); /*proto*/
static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); /*proto*/
static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *);
#ifndef __Pyx_CppExn2PyErr
#include <new>
#include <typeinfo>
#include <stdexcept>
#include <ios>
static void __Pyx_CppExn2PyErr() {
try {
if (PyErr_Occurred())
; // let the latest Python exn pass through and ignore the current one
else
throw;
} catch (const std::bad_alloc& exn) {
PyErr_SetString(PyExc_MemoryError, exn.what());
} catch (const std::bad_cast& exn) {
PyErr_SetString(PyExc_TypeError, exn.what());
} catch (const std::domain_error& exn) {
PyErr_SetString(PyExc_ValueError, exn.what());
} catch (const std::invalid_argument& exn) {
PyErr_SetString(PyExc_ValueError, exn.what());
} catch (const std::ios_base::failure& exn) {
PyErr_SetString(PyExc_IOError, exn.what());
} catch (const std::out_of_range& exn) {
PyErr_SetString(PyExc_IndexError, exn.what());
} catch (const std::overflow_error& exn) {
PyErr_SetString(PyExc_OverflowError, exn.what());
} catch (const std::range_error& exn) {
PyErr_SetString(PyExc_ArithmeticError, exn.what());
} catch (const std::underflow_error& exn) {
PyErr_SetString(PyExc_ArithmeticError, exn.what());
} catch (const std::exception& exn) {
PyErr_SetString(PyExc_RuntimeError, exn.what());
}
catch (...)
{
PyErr_SetString(PyExc_RuntimeError, "Unknown exception");
}
}
#endif
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_uint64_t(uint64_t value);
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);
static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);
static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
static int __Pyx_check_binary_version(void);
static int __Pyx_ExportFunction(const char *name, void (*f)(void), const char *sig); /*proto*/
#if !defined(__Pyx_PyIdentifier_FromString)
#if PY_MAJOR_VERSION < 3
#define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s)
#else
#define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s)
#endif
#endif
static PyObject *__Pyx_ImportModule(const char *name); /*proto*/
static int __Pyx_ImportVoidPtr(PyObject *module, const char *name, void **p, const char *sig); /*proto*/
static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig); /*proto*/
typedef struct {
int code_line;
PyCodeObject* code_object;
} __Pyx_CodeObjectCacheEntry;
struct __Pyx_CodeObjectCache {
int count;
int max_count;
__Pyx_CodeObjectCacheEntry* entries;
};
static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
static PyCodeObject *__pyx_find_code_object(int code_line);
static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
static void __Pyx_AddTraceback(const char *funcname, int c_line,
int py_line, const char *filename); /*proto*/
static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
/* Module declarations from 'libcpp.vector' */
/* Module declarations from 'libc.stdint' */
/* Module declarations from 'libcpp.utility' */
/* Module declarations from 'ext.sparsehash' */
/* Module declarations from 'spacy.lexeme' */
static struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_vp_5spacy_6lexeme_BLANK_WORD = 0;
#define __pyx_v_5spacy_6lexeme_BLANK_WORD (*__pyx_vp_5spacy_6lexeme_BLANK_WORD)
static struct __pyx_t_5spacy_6lexeme_Lexeme *(*__pyx_f_5spacy_6lexeme_init_lexeme)(__pyx_t_5spacy_5spacy_Vocab, PyObject *, __pyx_t_5spacy_5spacy_Splitter, PyObject *, __pyx_t_5spacy_6lexeme_StringHash, int, size_t); /*proto*/
/* Module declarations from 'ext.murmurhash' */
/* Module declarations from 'spacy.string_tools' */
static int (*__pyx_f_5spacy_12string_tools_is_whitespace)(Py_UNICODE); /*proto*/
/* Module declarations from 'spacy.spacy' */
static __pyx_t_5spacy_5spacy_Lexeme_addr __pyx_f_5spacy_5spacy_lookup(__pyx_t_5spacy_5spacy_Vocab &, PyObject *, __pyx_t_5spacy_5spacy_Splitter, int, PyObject *); /*proto*/
static __pyx_t_5spacy_5spacy_StringHash __pyx_f_5spacy_5spacy_hash_string(PyObject *, size_t); /*proto*/
static std::vector<size_t> __pyx_f_5spacy_5spacy_expand_chunk(size_t, int __pyx_skip_dispatch); /*proto*/
static struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_f_5spacy_5spacy__add(__pyx_t_5spacy_5spacy_Vocab &, PyObject *, __pyx_t_5spacy_5spacy_Splitter, __pyx_t_5spacy_5spacy_StringHash, PyObject *, int, size_t); /*proto*/
static PyObject *__pyx_convert_vector_to_py_size_t(const std::vector<size_t> &); /*proto*/
#define __Pyx_MODULE_NAME "spacy.spacy"
int __pyx_module_is_main_spacy__spacy = 0;
/* Implementation of 'spacy.spacy' */
static PyObject *__pyx_builtin_enumerate;
static PyObject *__pyx_builtin_range;
static PyObject *__pyx_pf_5spacy_5spacy_expand_chunk(CYTHON_UNUSED PyObject *__pyx_self, size_t __pyx_v_addr); /* proto */
static char __pyx_k_[] = "";
static char __pyx_k_main[] = "__main__";
static char __pyx_k_test[] = "__test__";
static char __pyx_k_util[] = "util";
static char __pyx_k_range[] = "range";
static char __pyx_k_s_d_s[] = "%s:@:%d:@:%s";
static char __pyx_k_import[] = "__import__";
static char __pyx_k_enumerate[] = "enumerate";
static PyObject *__pyx_n_s_;
static PyObject *__pyx_kp_u_;
static PyObject *__pyx_n_s_enumerate;
static PyObject *__pyx_n_s_import;
static PyObject *__pyx_n_s_main;
static PyObject *__pyx_n_s_range;
static PyObject *__pyx_kp_u_s_d_s;
static PyObject *__pyx_n_s_test;
static PyObject *__pyx_n_s_util;
static PyObject *__pyx_int_0;
static PyObject *__pyx_int_1;
/* "spacy/spacy.pyx":14
*
*
* cdef load_tokenization(Vocab& vocab, dict bacov, token_rules): # <<<<<<<<<<<<<<
* cdef Lexeme* word
* cdef StringHash hashed
*/
static PyObject *__pyx_f_5spacy_5spacy_load_tokenization(__pyx_t_5spacy_5spacy_Vocab &__pyx_v_vocab, PyObject *__pyx_v_bacov, PyObject *__pyx_v_token_rules) {
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_word;
__pyx_t_5spacy_5spacy_StringHash __pyx_v_hashed;
PyObject *__pyx_v_chunk = NULL;
PyObject *__pyx_v_lex = NULL;
PyObject *__pyx_v_tokens = NULL;
PyObject *__pyx_v_i = NULL;
PyObject *__pyx_v_token_string = NULL;
Py_ssize_t __pyx_v_length;
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
Py_ssize_t __pyx_t_2;
PyObject *(*__pyx_t_3)(PyObject *);
PyObject *__pyx_t_4 = NULL;
PyObject *__pyx_t_5 = NULL;
PyObject *__pyx_t_6 = NULL;
PyObject *__pyx_t_7 = NULL;
PyObject *__pyx_t_8 = NULL;
PyObject *(*__pyx_t_9)(PyObject *);
Py_ssize_t __pyx_t_10;
__pyx_t_5spacy_5spacy_StringHash __pyx_t_11;
Py_ssize_t __pyx_t_12;
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_t_13;
PyObject *(*__pyx_t_14)(PyObject *);
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("load_tokenization", 0);
/* "spacy/spacy.pyx":17
* cdef Lexeme* word
* cdef StringHash hashed
* for chunk, lex, tokens in token_rules: # <<<<<<<<<<<<<<
* hashed = hash_string(chunk, len(chunk))
* assert vocab[hashed] == 0
*/
if (PyList_CheckExact(__pyx_v_token_rules) || PyTuple_CheckExact(__pyx_v_token_rules)) {
__pyx_t_1 = __pyx_v_token_rules; __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0;
__pyx_t_3 = NULL;
} else {
__pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_token_rules); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_3 = Py_TYPE(__pyx_t_1)->tp_iternext;
}
for (;;) {
if (!__pyx_t_3 && PyList_CheckExact(__pyx_t_1)) {
if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_1)) break;
#if CYTHON_COMPILING_IN_CPYTHON
__pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#else
__pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#endif
} else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_1)) {
if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break;
#if CYTHON_COMPILING_IN_CPYTHON
__pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#else
__pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#endif
} else {
__pyx_t_4 = __pyx_t_3(__pyx_t_1);
if (unlikely(!__pyx_t_4)) {
PyObject* exc_type = PyErr_Occurred();
if (exc_type) {
if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
break;
}
__Pyx_GOTREF(__pyx_t_4);
}
if ((likely(PyTuple_CheckExact(__pyx_t_4))) || (PyList_CheckExact(__pyx_t_4))) {
PyObject* sequence = __pyx_t_4;
#if CYTHON_COMPILING_IN_CPYTHON
Py_ssize_t size = Py_SIZE(sequence);
#else
Py_ssize_t size = PySequence_Size(sequence);
#endif
if (unlikely(size != 3)) {
if (size > 3) __Pyx_RaiseTooManyValuesError(3);
else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
#if CYTHON_COMPILING_IN_CPYTHON
if (likely(PyTuple_CheckExact(sequence))) {
__pyx_t_5 = PyTuple_GET_ITEM(sequence, 0);
__pyx_t_6 = PyTuple_GET_ITEM(sequence, 1);
__pyx_t_7 = PyTuple_GET_ITEM(sequence, 2);
} else {
__pyx_t_5 = PyList_GET_ITEM(sequence, 0);
__pyx_t_6 = PyList_GET_ITEM(sequence, 1);
__pyx_t_7 = PyList_GET_ITEM(sequence, 2);
}
__Pyx_INCREF(__pyx_t_5);
__Pyx_INCREF(__pyx_t_6);
__Pyx_INCREF(__pyx_t_7);
#else
__pyx_t_5 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_5);
__pyx_t_6 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_6);
__pyx_t_7 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_7);
#endif
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
} else {
Py_ssize_t index = -1;
__pyx_t_8 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_8);
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
__pyx_t_9 = Py_TYPE(__pyx_t_8)->tp_iternext;
index = 0; __pyx_t_5 = __pyx_t_9(__pyx_t_8); if (unlikely(!__pyx_t_5)) goto __pyx_L5_unpacking_failed;
__Pyx_GOTREF(__pyx_t_5);
index = 1; __pyx_t_6 = __pyx_t_9(__pyx_t_8); if (unlikely(!__pyx_t_6)) goto __pyx_L5_unpacking_failed;
__Pyx_GOTREF(__pyx_t_6);
index = 2; __pyx_t_7 = __pyx_t_9(__pyx_t_8); if (unlikely(!__pyx_t_7)) goto __pyx_L5_unpacking_failed;
__Pyx_GOTREF(__pyx_t_7);
if (__Pyx_IternextUnpackEndCheck(__pyx_t_9(__pyx_t_8), 3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_9 = NULL;
__Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
goto __pyx_L6_unpacking_done;
__pyx_L5_unpacking_failed:;
__Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
__pyx_t_9 = NULL;
if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_L6_unpacking_done:;
}
__Pyx_XDECREF_SET(__pyx_v_chunk, __pyx_t_5);
__pyx_t_5 = 0;
__Pyx_XDECREF_SET(__pyx_v_lex, __pyx_t_6);
__pyx_t_6 = 0;
__Pyx_XDECREF_SET(__pyx_v_tokens, __pyx_t_7);
__pyx_t_7 = 0;
/* "spacy/spacy.pyx":18
* cdef StringHash hashed
* for chunk, lex, tokens in token_rules:
* hashed = hash_string(chunk, len(chunk)) # <<<<<<<<<<<<<<
* assert vocab[hashed] == 0
* word = _add(vocab, bacov, <Splitter>NULL, hashed, lex, len(lex), len(lex))
*/
if (!(likely(PyUnicode_CheckExact(__pyx_v_chunk))||((__pyx_v_chunk) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "unicode", Py_TYPE(__pyx_v_chunk)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_10 = PyObject_Length(__pyx_v_chunk); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_11 = __pyx_f_5spacy_5spacy_hash_string(((PyObject*)__pyx_v_chunk), __pyx_t_10); if (unlikely(__pyx_t_11 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_hashed = __pyx_t_11;
/* "spacy/spacy.pyx":19
* for chunk, lex, tokens in token_rules:
* hashed = hash_string(chunk, len(chunk))
* assert vocab[hashed] == 0 # <<<<<<<<<<<<<<
* word = _add(vocab, bacov, <Splitter>NULL, hashed, lex, len(lex), len(lex))
* for i, lex in enumerate(tokens):
*/
#ifndef CYTHON_WITHOUT_ASSERTIONS
if (unlikely(!Py_OptimizeFlag)) {
if (unlikely(!(((__pyx_v_vocab[__pyx_v_hashed]) == 0) != 0))) {
PyErr_SetNone(PyExc_AssertionError);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
}
#endif
/* "spacy/spacy.pyx":20
* hashed = hash_string(chunk, len(chunk))
* assert vocab[hashed] == 0
* word = _add(vocab, bacov, <Splitter>NULL, hashed, lex, len(lex), len(lex)) # <<<<<<<<<<<<<<
* for i, lex in enumerate(tokens):
* token_string = '%s:@:%d:@:%s' % (chunk, i, lex)
*/
if (!(likely(PyUnicode_CheckExact(__pyx_v_lex))||((__pyx_v_lex) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "unicode", Py_TYPE(__pyx_v_lex)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_10 = PyObject_Length(__pyx_v_lex); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_12 = PyObject_Length(__pyx_v_lex); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_13 = __pyx_f_5spacy_5spacy__add(__pyx_v_vocab, __pyx_v_bacov, ((__pyx_t_5spacy_5spacy_Splitter)NULL), __pyx_v_hashed, ((PyObject*)__pyx_v_lex), __pyx_t_10, __pyx_t_12); if (unlikely(__pyx_t_13 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_word = __pyx_t_13;
/* "spacy/spacy.pyx":21
* assert vocab[hashed] == 0
* word = _add(vocab, bacov, <Splitter>NULL, hashed, lex, len(lex), len(lex))
* for i, lex in enumerate(tokens): # <<<<<<<<<<<<<<
* token_string = '%s:@:%d:@:%s' % (chunk, i, lex)
* length = len(token_string)
*/
__Pyx_INCREF(__pyx_int_0);
__pyx_t_4 = __pyx_int_0;
if (PyList_CheckExact(__pyx_v_tokens) || PyTuple_CheckExact(__pyx_v_tokens)) {
__pyx_t_7 = __pyx_v_tokens; __Pyx_INCREF(__pyx_t_7); __pyx_t_12 = 0;
__pyx_t_14 = NULL;
} else {
__pyx_t_12 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_v_tokens); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_7);
__pyx_t_14 = Py_TYPE(__pyx_t_7)->tp_iternext;
}
for (;;) {
if (!__pyx_t_14 && PyList_CheckExact(__pyx_t_7)) {
if (__pyx_t_12 >= PyList_GET_SIZE(__pyx_t_7)) break;
#if CYTHON_COMPILING_IN_CPYTHON
__pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_12); __Pyx_INCREF(__pyx_t_6); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#else
__pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#endif
} else if (!__pyx_t_14 && PyTuple_CheckExact(__pyx_t_7)) {
if (__pyx_t_12 >= PyTuple_GET_SIZE(__pyx_t_7)) break;
#if CYTHON_COMPILING_IN_CPYTHON
__pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_12); __Pyx_INCREF(__pyx_t_6); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#else
__pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#endif
} else {
__pyx_t_6 = __pyx_t_14(__pyx_t_7);
if (unlikely(!__pyx_t_6)) {
PyObject* exc_type = PyErr_Occurred();
if (exc_type) {
if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
break;
}
__Pyx_GOTREF(__pyx_t_6);
}
__Pyx_DECREF_SET(__pyx_v_lex, __pyx_t_6);
__pyx_t_6 = 0;
__Pyx_INCREF(__pyx_t_4);
__Pyx_XDECREF_SET(__pyx_v_i, __pyx_t_4);
__pyx_t_6 = PyNumber_Add(__pyx_t_4, __pyx_int_1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_6);
__Pyx_DECREF(__pyx_t_4);
__pyx_t_4 = __pyx_t_6;
__pyx_t_6 = 0;
/* "spacy/spacy.pyx":22
* word = _add(vocab, bacov, <Splitter>NULL, hashed, lex, len(lex), len(lex))
* for i, lex in enumerate(tokens):
* token_string = '%s:@:%d:@:%s' % (chunk, i, lex) # <<<<<<<<<<<<<<
* length = len(token_string)
* hashed = hash_string(token_string, length)
*/
__pyx_t_6 = PyTuple_New(3); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_6);
__Pyx_INCREF(__pyx_v_chunk);
PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_v_chunk);
__Pyx_GIVEREF(__pyx_v_chunk);
__Pyx_INCREF(__pyx_v_i);
PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_v_i);
__Pyx_GIVEREF(__pyx_v_i);
__Pyx_INCREF(__pyx_v_lex);
PyTuple_SET_ITEM(__pyx_t_6, 2, __pyx_v_lex);
__Pyx_GIVEREF(__pyx_v_lex);
__pyx_t_5 = PyUnicode_Format(__pyx_kp_u_s_d_s, __pyx_t_6); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_5);
__Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
__Pyx_XDECREF_SET(__pyx_v_token_string, ((PyObject*)__pyx_t_5));
__pyx_t_5 = 0;
/* "spacy/spacy.pyx":23
* for i, lex in enumerate(tokens):
* token_string = '%s:@:%d:@:%s' % (chunk, i, lex)
* length = len(token_string) # <<<<<<<<<<<<<<
* hashed = hash_string(token_string, length)
* word.tail = _add(vocab, bacov, <Splitter>NULL, hashed, lex, 0, len(lex))
*/
__pyx_t_10 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_token_string); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_length = __pyx_t_10;
/* "spacy/spacy.pyx":24
* token_string = '%s:@:%d:@:%s' % (chunk, i, lex)
* length = len(token_string)
* hashed = hash_string(token_string, length) # <<<<<<<<<<<<<<
* word.tail = _add(vocab, bacov, <Splitter>NULL, hashed, lex, 0, len(lex))
* word = word.tail
*/
__pyx_t_11 = __pyx_f_5spacy_5spacy_hash_string(__pyx_v_token_string, __pyx_v_length); if (unlikely(__pyx_t_11 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_hashed = __pyx_t_11;
/* "spacy/spacy.pyx":25
* length = len(token_string)
* hashed = hash_string(token_string, length)
* word.tail = _add(vocab, bacov, <Splitter>NULL, hashed, lex, 0, len(lex)) # <<<<<<<<<<<<<<
* word = word.tail
*
*/
if (!(likely(PyUnicode_CheckExact(__pyx_v_lex))||((__pyx_v_lex) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "unicode", Py_TYPE(__pyx_v_lex)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_10 = PyObject_Length(__pyx_v_lex); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_13 = __pyx_f_5spacy_5spacy__add(__pyx_v_vocab, __pyx_v_bacov, ((__pyx_t_5spacy_5spacy_Splitter)NULL), __pyx_v_hashed, ((PyObject*)__pyx_v_lex), 0, __pyx_t_10); if (unlikely(__pyx_t_13 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_word->tail = __pyx_t_13;
/* "spacy/spacy.pyx":26
* hashed = hash_string(token_string, length)
* word.tail = _add(vocab, bacov, <Splitter>NULL, hashed, lex, 0, len(lex))
* word = word.tail # <<<<<<<<<<<<<<
*
*
*/
__pyx_t_13 = __pyx_v_word->tail;
__pyx_v_word = __pyx_t_13;
}
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
}
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
/* "spacy/spacy.pyx":14
*
*
* cdef load_tokenization(Vocab& vocab, dict bacov, token_rules): # <<<<<<<<<<<<<<
* cdef Lexeme* word
* cdef StringHash hashed
*/
/* function exit code */
__pyx_r = Py_None; __Pyx_INCREF(Py_None);
goto __pyx_L0;
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_4);
__Pyx_XDECREF(__pyx_t_5);
__Pyx_XDECREF(__pyx_t_6);
__Pyx_XDECREF(__pyx_t_7);
__Pyx_XDECREF(__pyx_t_8);
__Pyx_AddTraceback("spacy.spacy.load_tokenization", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_XDECREF(__pyx_v_chunk);
__Pyx_XDECREF(__pyx_v_lex);
__Pyx_XDECREF(__pyx_v_tokens);
__Pyx_XDECREF(__pyx_v_i);
__Pyx_XDECREF(__pyx_v_token_string);
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "spacy/spacy.pyx":29
*
*
* cdef vector[Lexeme_addr] tokenize(Vocab& vocab, dict bacov, Splitter splitter, # <<<<<<<<<<<<<<
* unicode string) except *:
* cdef size_t length = len(string)
*/
static std::vector<__pyx_t_5spacy_5spacy_Lexeme_addr> __pyx_f_5spacy_5spacy_tokenize(__pyx_t_5spacy_5spacy_Vocab &__pyx_v_vocab, PyObject *__pyx_v_bacov, __pyx_t_5spacy_5spacy_Splitter __pyx_v_splitter, PyObject *__pyx_v_string) {
size_t __pyx_v_length;
Py_UNICODE *__pyx_v_characters;
size_t __pyx_v_i;
Py_UNICODE __pyx_v_c;
std::vector<__pyx_t_5spacy_5spacy_Lexeme_addr> __pyx_v_tokens;
PyObject *__pyx_v_current = 0;
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_token;
std::vector<__pyx_t_5spacy_5spacy_Lexeme_addr> __pyx_r;
__Pyx_RefNannyDeclarations
Py_ssize_t __pyx_t_1;
Py_UNICODE *__pyx_t_2;
std::vector<__pyx_t_5spacy_5spacy_Lexeme_addr> __pyx_t_3;
size_t __pyx_t_4;
size_t __pyx_t_5;
int __pyx_t_6;
__pyx_t_5spacy_5spacy_Lexeme_addr __pyx_t_7;
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_t_8;
PyObject *__pyx_t_9 = NULL;
PyObject *__pyx_t_10 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("tokenize", 0);
/* "spacy/spacy.pyx":31
* cdef vector[Lexeme_addr] tokenize(Vocab& vocab, dict bacov, Splitter splitter,
* unicode string) except *:
* cdef size_t length = len(string) # <<<<<<<<<<<<<<
* cdef Py_UNICODE* characters = <Py_UNICODE*>string
*
*/
if (unlikely(__pyx_v_string == Py_None)) {
PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_t_1 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_string); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_length = __pyx_t_1;
/* "spacy/spacy.pyx":32
* unicode string) except *:
* cdef size_t length = len(string)
* cdef Py_UNICODE* characters = <Py_UNICODE*>string # <<<<<<<<<<<<<<
*
* cdef size_t i
*/
__pyx_t_2 = __Pyx_PyUnicode_AsUnicode(__pyx_v_string); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_characters = ((Py_UNICODE *)__pyx_t_2);
/* "spacy/spacy.pyx":37
* cdef Py_UNICODE c
*
* cdef vector[Lexeme_addr] tokens = vector[Lexeme_addr]() # <<<<<<<<<<<<<<
* cdef unicode current = u''
* cdef Lexeme* token
*/
try {
__pyx_t_3 = std::vector<__pyx_t_5spacy_5spacy_Lexeme_addr>();
} catch(...) {
__Pyx_CppExn2PyErr();
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_tokens = __pyx_t_3;
/* "spacy/spacy.pyx":38
*
* cdef vector[Lexeme_addr] tokens = vector[Lexeme_addr]()
* cdef unicode current = u'' # <<<<<<<<<<<<<<
* cdef Lexeme* token
* for i in range(length):
*/
__Pyx_INCREF(__pyx_kp_u_);
__pyx_v_current = __pyx_kp_u_;
/* "spacy/spacy.pyx":40
* cdef unicode current = u''
* cdef Lexeme* token
* for i in range(length): # <<<<<<<<<<<<<<
* c = characters[i]
* if is_whitespace(c):
*/
__pyx_t_4 = __pyx_v_length;
for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) {
__pyx_v_i = __pyx_t_5;
/* "spacy/spacy.pyx":41
* cdef Lexeme* token
* for i in range(length):
* c = characters[i] # <<<<<<<<<<<<<<
* if is_whitespace(c):
* if current:
*/
__pyx_v_c = (__pyx_v_characters[__pyx_v_i]);
/* "spacy/spacy.pyx":42
* for i in range(length):
* c = characters[i]
* if is_whitespace(c): # <<<<<<<<<<<<<<
* if current:
* token = <Lexeme*>lookup(vocab, bacov, splitter, -1, current)
*/
__pyx_t_6 = (__pyx_f_5spacy_12string_tools_is_whitespace(__pyx_v_c) != 0);
if (__pyx_t_6) {
/* "spacy/spacy.pyx":43
* c = characters[i]
* if is_whitespace(c):
* if current: # <<<<<<<<<<<<<<
* token = <Lexeme*>lookup(vocab, bacov, splitter, -1, current)
* while token != NULL:
*/
__pyx_t_6 = (__pyx_v_current != Py_None) && (PyUnicode_GET_SIZE(__pyx_v_current) != 0);
if (__pyx_t_6) {
/* "spacy/spacy.pyx":44
* if is_whitespace(c):
* if current:
* token = <Lexeme*>lookup(vocab, bacov, splitter, -1, current) # <<<<<<<<<<<<<<
* while token != NULL:
* tokens.push_back(<Lexeme_addr>token)
*/
__pyx_t_7 = __pyx_f_5spacy_5spacy_lookup(__pyx_v_vocab, __pyx_v_bacov, __pyx_v_splitter, -1, __pyx_v_current); if (unlikely(__pyx_t_7 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_token = ((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_t_7);
/* "spacy/spacy.pyx":45
* if current:
* token = <Lexeme*>lookup(vocab, bacov, splitter, -1, current)
* while token != NULL: # <<<<<<<<<<<<<<
* tokens.push_back(<Lexeme_addr>token)
* token = token.tail
*/
while (1) {
__pyx_t_6 = ((__pyx_v_token != NULL) != 0);
if (!__pyx_t_6) break;
/* "spacy/spacy.pyx":46
* token = <Lexeme*>lookup(vocab, bacov, splitter, -1, current)
* while token != NULL:
* tokens.push_back(<Lexeme_addr>token) # <<<<<<<<<<<<<<
* token = token.tail
* current = u''
*/
__pyx_v_tokens.push_back(((__pyx_t_5spacy_5spacy_Lexeme_addr)__pyx_v_token));
/* "spacy/spacy.pyx":47
* while token != NULL:
* tokens.push_back(<Lexeme_addr>token)
* token = token.tail # <<<<<<<<<<<<<<
* current = u''
* else:
*/
__pyx_t_8 = __pyx_v_token->tail;
__pyx_v_token = __pyx_t_8;
}
goto __pyx_L6;
}
__pyx_L6:;
/* "spacy/spacy.pyx":48
* tokens.push_back(<Lexeme_addr>token)
* token = token.tail
* current = u'' # <<<<<<<<<<<<<<
* else:
* current += c
*/
__Pyx_INCREF(__pyx_kp_u_);
__Pyx_DECREF_SET(__pyx_v_current, __pyx_kp_u_);
goto __pyx_L5;
}
/*else*/ {
/* "spacy/spacy.pyx":50
* current = u''
* else:
* current += c # <<<<<<<<<<<<<<
* if current:
* token = <Lexeme*>lookup(vocab, bacov, splitter, -1, current)
*/
__pyx_t_9 = PyUnicode_FromOrdinal(__pyx_v_c); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_9);
__pyx_t_10 = __Pyx_PyUnicode_ConcatSafe(__pyx_v_current, __pyx_t_9); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_10);
__Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
__Pyx_DECREF_SET(__pyx_v_current, ((PyObject*)__pyx_t_10));
__pyx_t_10 = 0;
}
__pyx_L5:;
}
/* "spacy/spacy.pyx":51
* else:
* current += c
* if current: # <<<<<<<<<<<<<<
* token = <Lexeme*>lookup(vocab, bacov, splitter, -1, current)
* while token != NULL:
*/
__pyx_t_6 = (__pyx_v_current != Py_None) && (PyUnicode_GET_SIZE(__pyx_v_current) != 0);
if (__pyx_t_6) {
/* "spacy/spacy.pyx":52
* current += c
* if current:
* token = <Lexeme*>lookup(vocab, bacov, splitter, -1, current) # <<<<<<<<<<<<<<
* while token != NULL:
* tokens.push_back(<Lexeme_addr>token)
*/
__pyx_t_7 = __pyx_f_5spacy_5spacy_lookup(__pyx_v_vocab, __pyx_v_bacov, __pyx_v_splitter, -1, __pyx_v_current); if (unlikely(__pyx_t_7 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_token = ((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_t_7);
/* "spacy/spacy.pyx":53
* if current:
* token = <Lexeme*>lookup(vocab, bacov, splitter, -1, current)
* while token != NULL: # <<<<<<<<<<<<<<
* tokens.push_back(<Lexeme_addr>token)
* token = token.tail
*/
while (1) {
__pyx_t_6 = ((__pyx_v_token != NULL) != 0);
if (!__pyx_t_6) break;
/* "spacy/spacy.pyx":54
* token = <Lexeme*>lookup(vocab, bacov, splitter, -1, current)
* while token != NULL:
* tokens.push_back(<Lexeme_addr>token) # <<<<<<<<<<<<<<
* token = token.tail
* return tokens
*/
__pyx_v_tokens.push_back(((__pyx_t_5spacy_5spacy_Lexeme_addr)__pyx_v_token));
/* "spacy/spacy.pyx":55
* while token != NULL:
* tokens.push_back(<Lexeme_addr>token)
* token = token.tail # <<<<<<<<<<<<<<
* return tokens
*
*/
__pyx_t_8 = __pyx_v_token->tail;
__pyx_v_token = __pyx_t_8;
}
goto __pyx_L9;
}
__pyx_L9:;
/* "spacy/spacy.pyx":56
* tokens.push_back(<Lexeme_addr>token)
* token = token.tail
* return tokens # <<<<<<<<<<<<<<
*
*
*/
__pyx_r = __pyx_v_tokens;
goto __pyx_L0;
/* "spacy/spacy.pyx":29
*
*
* cdef vector[Lexeme_addr] tokenize(Vocab& vocab, dict bacov, Splitter splitter, # <<<<<<<<<<<<<<
* unicode string) except *:
* cdef size_t length = len(string)
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_9);
__Pyx_XDECREF(__pyx_t_10);
__Pyx_AddTraceback("spacy.spacy.tokenize", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_L0:;
__Pyx_XDECREF(__pyx_v_current);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "spacy/spacy.pyx":59
*
*
* cdef Lexeme_addr lookup(Vocab& vocab, dict bacov, Splitter find_split, int start, # <<<<<<<<<<<<<<
* unicode string) except 0:
* '''Fetch a Lexeme representing a word string. If the word has not been seen,
*/
static __pyx_t_5spacy_5spacy_Lexeme_addr __pyx_f_5spacy_5spacy_lookup(__pyx_t_5spacy_5spacy_Vocab &__pyx_v_vocab, PyObject *__pyx_v_bacov, __pyx_t_5spacy_5spacy_Splitter __pyx_v_find_split, int __pyx_v_start, PyObject *__pyx_v_string) {
size_t __pyx_v_length;
__pyx_t_5spacy_5spacy_StringHash __pyx_v_hashed;
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_word_ptr;
__pyx_t_5spacy_5spacy_Lexeme_addr __pyx_r;
__Pyx_RefNannyDeclarations
int __pyx_t_1;
int __pyx_t_2;
Py_ssize_t __pyx_t_3;
__pyx_t_5spacy_5spacy_StringHash __pyx_t_4;
int __pyx_t_5;
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_t_6;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("lookup", 0);
/* "spacy/spacy.pyx":67
* To specify the boundaries of the word if it has not been seen, use lookup_chunk.
* '''
* if string == '': # <<<<<<<<<<<<<<
* return <Lexeme_addr>&BLANK_WORD
* cdef size_t length = len(string)
*/
__pyx_t_1 = (__Pyx_PyUnicode_Equals(__pyx_v_string, __pyx_kp_u_, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_2 = (__pyx_t_1 != 0);
if (__pyx_t_2) {
/* "spacy/spacy.pyx":68
* '''
* if string == '':
* return <Lexeme_addr>&BLANK_WORD # <<<<<<<<<<<<<<
* cdef size_t length = len(string)
* cdef StringHash hashed = hash_string(string, length)
*/
__pyx_r = ((__pyx_t_5spacy_5spacy_Lexeme_addr)(&__pyx_v_5spacy_6lexeme_BLANK_WORD));
goto __pyx_L0;
}
/* "spacy/spacy.pyx":69
* if string == '':
* return <Lexeme_addr>&BLANK_WORD
* cdef size_t length = len(string) # <<<<<<<<<<<<<<
* cdef StringHash hashed = hash_string(string, length)
* cdef Lexeme* word_ptr = <Lexeme*>vocab[hashed]
*/
if (unlikely(__pyx_v_string == Py_None)) {
PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_t_3 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_string); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_length = __pyx_t_3;
/* "spacy/spacy.pyx":70
* return <Lexeme_addr>&BLANK_WORD
* cdef size_t length = len(string)
* cdef StringHash hashed = hash_string(string, length) # <<<<<<<<<<<<<<
* cdef Lexeme* word_ptr = <Lexeme*>vocab[hashed]
* if word_ptr == NULL:
*/
__pyx_t_4 = __pyx_f_5spacy_5spacy_hash_string(__pyx_v_string, __pyx_v_length); if (unlikely(__pyx_t_4 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_hashed = __pyx_t_4;
/* "spacy/spacy.pyx":71
* cdef size_t length = len(string)
* cdef StringHash hashed = hash_string(string, length)
* cdef Lexeme* word_ptr = <Lexeme*>vocab[hashed] # <<<<<<<<<<<<<<
* if word_ptr == NULL:
* start = find_split(string, length) if start == -1 else start
*/
__pyx_v_word_ptr = ((struct __pyx_t_5spacy_6lexeme_Lexeme *)(__pyx_v_vocab[__pyx_v_hashed]));
/* "spacy/spacy.pyx":72
* cdef StringHash hashed = hash_string(string, length)
* cdef Lexeme* word_ptr = <Lexeme*>vocab[hashed]
* if word_ptr == NULL: # <<<<<<<<<<<<<<
* start = find_split(string, length) if start == -1 else start
* word_ptr = _add(vocab, bacov, find_split, hashed, string, start, length)
*/
__pyx_t_2 = ((__pyx_v_word_ptr == NULL) != 0);
if (__pyx_t_2) {
/* "spacy/spacy.pyx":73
* cdef Lexeme* word_ptr = <Lexeme*>vocab[hashed]
* if word_ptr == NULL:
* start = find_split(string, length) if start == -1 else start # <<<<<<<<<<<<<<
* word_ptr = _add(vocab, bacov, find_split, hashed, string, start, length)
* return <Lexeme_addr>word_ptr
*/
if (((__pyx_v_start == -1) != 0)) {
__pyx_t_5 = __pyx_v_find_split(__pyx_v_string, __pyx_v_length);
} else {
__pyx_t_5 = __pyx_v_start;
}
__pyx_v_start = __pyx_t_5;
/* "spacy/spacy.pyx":74
* if word_ptr == NULL:
* start = find_split(string, length) if start == -1 else start
* word_ptr = _add(vocab, bacov, find_split, hashed, string, start, length) # <<<<<<<<<<<<<<
* return <Lexeme_addr>word_ptr
*
*/
__pyx_t_6 = __pyx_f_5spacy_5spacy__add(__pyx_v_vocab, __pyx_v_bacov, __pyx_v_find_split, __pyx_v_hashed, __pyx_v_string, __pyx_v_start, __pyx_v_length); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_word_ptr = __pyx_t_6;
goto __pyx_L4;
}
__pyx_L4:;
/* "spacy/spacy.pyx":75
* start = find_split(string, length) if start == -1 else start
* word_ptr = _add(vocab, bacov, find_split, hashed, string, start, length)
* return <Lexeme_addr>word_ptr # <<<<<<<<<<<<<<
*
*
*/
__pyx_r = ((__pyx_t_5spacy_5spacy_Lexeme_addr)__pyx_v_word_ptr);
goto __pyx_L0;
/* "spacy/spacy.pyx":59
*
*
* cdef Lexeme_addr lookup(Vocab& vocab, dict bacov, Splitter find_split, int start, # <<<<<<<<<<<<<<
* unicode string) except 0:
* '''Fetch a Lexeme representing a word string. If the word has not been seen,
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_AddTraceback("spacy.spacy.lookup", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "spacy/spacy.pyx":78
*
*
* cpdef vector[size_t] expand_chunk(size_t addr) except *: # <<<<<<<<<<<<<<
* cdef vector[size_t] tokens = vector[size_t]()
* word = <Lexeme*>addr
*/
static PyObject *__pyx_pw_5spacy_5spacy_1expand_chunk(PyObject *__pyx_self, PyObject *__pyx_arg_addr); /*proto*/
static std::vector<size_t> __pyx_f_5spacy_5spacy_expand_chunk(size_t __pyx_v_addr, CYTHON_UNUSED int __pyx_skip_dispatch) {
std::vector<size_t> __pyx_v_tokens;
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_word;
std::vector<size_t> __pyx_r;
__Pyx_RefNannyDeclarations
std::vector<size_t> __pyx_t_1;
int __pyx_t_2;
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_t_3;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("expand_chunk", 0);
/* "spacy/spacy.pyx":79
*
* cpdef vector[size_t] expand_chunk(size_t addr) except *:
* cdef vector[size_t] tokens = vector[size_t]() # <<<<<<<<<<<<<<
* word = <Lexeme*>addr
* while word is not NULL:
*/
try {
__pyx_t_1 = std::vector<size_t>();
} catch(...) {
__Pyx_CppExn2PyErr();
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 79; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_tokens = __pyx_t_1;
/* "spacy/spacy.pyx":80
* cpdef vector[size_t] expand_chunk(size_t addr) except *:
* cdef vector[size_t] tokens = vector[size_t]()
* word = <Lexeme*>addr # <<<<<<<<<<<<<<
* while word is not NULL:
* tokens.push_back(<size_t>word)
*/
__pyx_v_word = ((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_v_addr);
/* "spacy/spacy.pyx":81
* cdef vector[size_t] tokens = vector[size_t]()
* word = <Lexeme*>addr
* while word is not NULL: # <<<<<<<<<<<<<<
* tokens.push_back(<size_t>word)
* word = word.tail
*/
while (1) {
__pyx_t_2 = ((__pyx_v_word != NULL) != 0);
if (!__pyx_t_2) break;
/* "spacy/spacy.pyx":82
* word = <Lexeme*>addr
* while word is not NULL:
* tokens.push_back(<size_t>word) # <<<<<<<<<<<<<<
* word = word.tail
* return tokens
*/
__pyx_v_tokens.push_back(((size_t)__pyx_v_word));
/* "spacy/spacy.pyx":83
* while word is not NULL:
* tokens.push_back(<size_t>word)
* word = word.tail # <<<<<<<<<<<<<<
* return tokens
*
*/
__pyx_t_3 = __pyx_v_word->tail;
__pyx_v_word = __pyx_t_3;
}
/* "spacy/spacy.pyx":84
* tokens.push_back(<size_t>word)
* word = word.tail
* return tokens # <<<<<<<<<<<<<<
*
*
*/
__pyx_r = __pyx_v_tokens;
goto __pyx_L0;
/* "spacy/spacy.pyx":78
*
*
* cpdef vector[size_t] expand_chunk(size_t addr) except *: # <<<<<<<<<<<<<<
* cdef vector[size_t] tokens = vector[size_t]()
* word = <Lexeme*>addr
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_AddTraceback("spacy.spacy.expand_chunk", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_L0:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* Python wrapper */
static PyObject *__pyx_pw_5spacy_5spacy_1expand_chunk(PyObject *__pyx_self, PyObject *__pyx_arg_addr); /*proto*/
static PyObject *__pyx_pw_5spacy_5spacy_1expand_chunk(PyObject *__pyx_self, PyObject *__pyx_arg_addr) {
size_t __pyx_v_addr;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("expand_chunk (wrapper)", 0);
assert(__pyx_arg_addr); {
__pyx_v_addr = __Pyx_PyInt_As_size_t(__pyx_arg_addr); if (unlikely((__pyx_v_addr == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
}
goto __pyx_L4_argument_unpacking_done;
__pyx_L3_error:;
__Pyx_AddTraceback("spacy.spacy.expand_chunk", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext();
return NULL;
__pyx_L4_argument_unpacking_done:;
__pyx_r = __pyx_pf_5spacy_5spacy_expand_chunk(__pyx_self, ((size_t)__pyx_v_addr));
/* function exit code */
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_5spacy_5spacy_expand_chunk(CYTHON_UNUSED PyObject *__pyx_self, size_t __pyx_v_addr) {
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
std::vector<size_t> __pyx_t_1;
PyObject *__pyx_t_2 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("expand_chunk", 0);
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = __pyx_f_5spacy_5spacy_expand_chunk(__pyx_v_addr, 0); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_2 = __pyx_convert_vector_to_py_size_t(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_2);
__pyx_r = __pyx_t_2;
__pyx_t_2 = 0;
goto __pyx_L0;
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_2);
__Pyx_AddTraceback("spacy.spacy.expand_chunk", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "spacy/spacy.pyx":87
*
*
* cdef StringHash hash_string(unicode s, size_t length) except 0: # <<<<<<<<<<<<<<
* '''Hash unicode with MurmurHash64A'''
* assert length
*/
static __pyx_t_5spacy_5spacy_StringHash __pyx_f_5spacy_5spacy_hash_string(PyObject *__pyx_v_s, size_t __pyx_v_length) {
__pyx_t_5spacy_5spacy_StringHash __pyx_r;
__Pyx_RefNannyDeclarations
Py_UNICODE *__pyx_t_1;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("hash_string", 0);
/* "spacy/spacy.pyx":89
* cdef StringHash hash_string(unicode s, size_t length) except 0:
* '''Hash unicode with MurmurHash64A'''
* assert length # <<<<<<<<<<<<<<
* return MurmurHash64A(<Py_UNICODE*>s, length * sizeof(Py_UNICODE), 0)
*
*/
#ifndef CYTHON_WITHOUT_ASSERTIONS
if (unlikely(!Py_OptimizeFlag)) {
if (unlikely(!(__pyx_v_length != 0))) {
PyErr_SetNone(PyExc_AssertionError);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 89; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
}
#endif
/* "spacy/spacy.pyx":90
* '''Hash unicode with MurmurHash64A'''
* assert length
* return MurmurHash64A(<Py_UNICODE*>s, length * sizeof(Py_UNICODE), 0) # <<<<<<<<<<<<<<
*
*
*/
__pyx_t_1 = __Pyx_PyUnicode_AsUnicode(__pyx_v_s); if (unlikely((!__pyx_t_1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_r = MurmurHash64A(((Py_UNICODE *)__pyx_t_1), (__pyx_v_length * (sizeof(Py_UNICODE))), 0);
goto __pyx_L0;
/* "spacy/spacy.pyx":87
*
*
* cdef StringHash hash_string(unicode s, size_t length) except 0: # <<<<<<<<<<<<<<
* '''Hash unicode with MurmurHash64A'''
* assert length
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_AddTraceback("spacy.spacy.hash_string", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "spacy/spacy.pyx":93
*
*
* cdef unicode unhash(dict bacov, StringHash hash_value): # <<<<<<<<<<<<<<
* '''Fetch a string from the reverse index, given its hash value.'''
* return bacov[hash_value]
*/
static PyObject *__pyx_f_5spacy_5spacy_unhash(PyObject *__pyx_v_bacov, __pyx_t_5spacy_5spacy_StringHash __pyx_v_hash_value) {
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
PyObject *__pyx_t_2 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("unhash", 0);
/* "spacy/spacy.pyx":95
* cdef unicode unhash(dict bacov, StringHash hash_value):
* '''Fetch a string from the reverse index, given its hash value.'''
* return bacov[hash_value] # <<<<<<<<<<<<<<
*
*
*/
__Pyx_XDECREF(__pyx_r);
if (unlikely(__pyx_v_bacov == Py_None)) {
PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_t_1 = __Pyx_PyInt_From_uint64_t(__pyx_v_hash_value); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_2 = __Pyx_PyDict_GetItem(__pyx_v_bacov, __pyx_t_1); if (unlikely(__pyx_t_2 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
__Pyx_GOTREF(__pyx_t_2);
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "unicode", Py_TYPE(__pyx_t_2)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_r = ((PyObject*)__pyx_t_2);
__pyx_t_2 = 0;
goto __pyx_L0;
/* "spacy/spacy.pyx":93
*
*
* cdef unicode unhash(dict bacov, StringHash hash_value): # <<<<<<<<<<<<<<
* '''Fetch a string from the reverse index, given its hash value.'''
* return bacov[hash_value]
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_2);
__Pyx_AddTraceback("spacy.spacy.unhash", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "spacy/spacy.pyx":98
*
*
* cdef Lexeme* _add(Vocab& vocab, dict bacov, Splitter find_split, StringHash hashed, # <<<<<<<<<<<<<<
* unicode string, int split, size_t length) except NULL:
* assert string
*/
static struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_f_5spacy_5spacy__add(__pyx_t_5spacy_5spacy_Vocab &__pyx_v_vocab, PyObject *__pyx_v_bacov, __pyx_t_5spacy_5spacy_Splitter __pyx_v_find_split, __pyx_t_5spacy_5spacy_StringHash __pyx_v_hashed, PyObject *__pyx_v_string, int __pyx_v_split, size_t __pyx_v_length) {
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_word;
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_r;
__Pyx_RefNannyDeclarations
int __pyx_t_1;
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_t_2;
PyObject *__pyx_t_3 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("_add", 0);
/* "spacy/spacy.pyx":100
* cdef Lexeme* _add(Vocab& vocab, dict bacov, Splitter find_split, StringHash hashed,
* unicode string, int split, size_t length) except NULL:
* assert string # <<<<<<<<<<<<<<
* assert split <= length
* word = init_lexeme(vocab, bacov, find_split, string, hashed, split, length)
*/
#ifndef CYTHON_WITHOUT_ASSERTIONS
if (unlikely(!Py_OptimizeFlag)) {
__pyx_t_1 = (__pyx_v_string != Py_None) && (PyUnicode_GET_SIZE(__pyx_v_string) != 0);
if (unlikely(!__pyx_t_1)) {
PyErr_SetNone(PyExc_AssertionError);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
}
#endif
/* "spacy/spacy.pyx":101
* unicode string, int split, size_t length) except NULL:
* assert string
* assert split <= length # <<<<<<<<<<<<<<
* word = init_lexeme(vocab, bacov, find_split, string, hashed, split, length)
* vocab[hashed] = <Lexeme_addr>word
*/
#ifndef CYTHON_WITHOUT_ASSERTIONS
if (unlikely(!Py_OptimizeFlag)) {
if (unlikely(!((__pyx_v_split <= __pyx_v_length) != 0))) {
PyErr_SetNone(PyExc_AssertionError);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
}
#endif
/* "spacy/spacy.pyx":102
* assert string
* assert split <= length
* word = init_lexeme(vocab, bacov, find_split, string, hashed, split, length) # <<<<<<<<<<<<<<
* vocab[hashed] = <Lexeme_addr>word
* bacov[hashed] = string
*/
__pyx_t_2 = __pyx_f_5spacy_6lexeme_init_lexeme(__pyx_v_vocab, __pyx_v_bacov, __pyx_v_find_split, __pyx_v_string, __pyx_v_hashed, __pyx_v_split, __pyx_v_length); if (unlikely(__pyx_t_2 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_word = __pyx_t_2;
/* "spacy/spacy.pyx":103
* assert split <= length
* word = init_lexeme(vocab, bacov, find_split, string, hashed, split, length)
* vocab[hashed] = <Lexeme_addr>word # <<<<<<<<<<<<<<
* bacov[hashed] = string
* return word
*/
(__pyx_v_vocab[__pyx_v_hashed]) = ((__pyx_t_5spacy_5spacy_Lexeme_addr)__pyx_v_word);
/* "spacy/spacy.pyx":104
* word = init_lexeme(vocab, bacov, find_split, string, hashed, split, length)
* vocab[hashed] = <Lexeme_addr>word
* bacov[hashed] = string # <<<<<<<<<<<<<<
* return word
*/
if (unlikely(__pyx_v_bacov == Py_None)) {
PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_t_3 = __Pyx_PyInt_From_uint64_t(__pyx_v_hashed); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_3);
if (unlikely(PyDict_SetItem(__pyx_v_bacov, __pyx_t_3, __pyx_v_string) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
/* "spacy/spacy.pyx":105
* vocab[hashed] = <Lexeme_addr>word
* bacov[hashed] = string
* return word # <<<<<<<<<<<<<<
*/
__pyx_r = __pyx_v_word;
goto __pyx_L0;
/* "spacy/spacy.pyx":98
*
*
* cdef Lexeme* _add(Vocab& vocab, dict bacov, Splitter find_split, StringHash hashed, # <<<<<<<<<<<<<<
* unicode string, int split, size_t length) except NULL:
* assert string
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_3);
__Pyx_AddTraceback("spacy.spacy._add", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
__pyx_L0:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* "vector.to_py":63
*
* @cname("__pyx_convert_vector_to_py_size_t")
* cdef object __pyx_convert_vector_to_py_size_t(vector[X]& v): # <<<<<<<<<<<<<<
* return [X_to_py(v[i]) for i in range(v.size())]
*
*/
static PyObject *__pyx_convert_vector_to_py_size_t(const std::vector<size_t> &__pyx_v_v) {
size_t __pyx_v_i;
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
size_t __pyx_t_2;
size_t __pyx_t_3;
PyObject *__pyx_t_4 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("__pyx_convert_vector_to_py_size_t", 0);
/* "vector.to_py":64
* @cname("__pyx_convert_vector_to_py_size_t")
* cdef object __pyx_convert_vector_to_py_size_t(vector[X]& v):
* return [X_to_py(v[i]) for i in range(v.size())] # <<<<<<<<<<<<<<
*
*
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_2 = __pyx_v_v.size();
for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
__pyx_v_i = __pyx_t_3;
__pyx_t_4 = __Pyx_PyInt_FromSize_t((__pyx_v_v[__pyx_v_i])); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_4);
if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_4))) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
}
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* "vector.to_py":63
*
* @cname("__pyx_convert_vector_to_py_size_t")
* cdef object __pyx_convert_vector_to_py_size_t(vector[X]& v): # <<<<<<<<<<<<<<
* return [X_to_py(v[i]) for i in range(v.size())]
*
*/
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_4);
__Pyx_AddTraceback("vector.to_py.__pyx_convert_vector_to_py_size_t", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = 0;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyMethodDef __pyx_methods[] = {
{__Pyx_NAMESTR("expand_chunk"), (PyCFunction)__pyx_pw_5spacy_5spacy_1expand_chunk, METH_O, __Pyx_DOCSTR(0)},
{0, 0, 0, 0}
};
#if PY_MAJOR_VERSION >= 3
static struct PyModuleDef __pyx_moduledef = {
#if PY_VERSION_HEX < 0x03020000
{ PyObject_HEAD_INIT(NULL) NULL, 0, NULL },
#else
PyModuleDef_HEAD_INIT,
#endif
__Pyx_NAMESTR("spacy"),
0, /* m_doc */
-1, /* m_size */
__pyx_methods /* m_methods */,
NULL, /* m_reload */
NULL, /* m_traverse */
NULL, /* m_clear */
NULL /* m_free */
};
#endif
static __Pyx_StringTabEntry __pyx_string_tab[] = {
{&__pyx_n_s_, __pyx_k_, sizeof(__pyx_k_), 0, 0, 1, 1},
{&__pyx_kp_u_, __pyx_k_, sizeof(__pyx_k_), 0, 1, 0, 0},
{&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1},
{&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1},
{&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1},
{&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1},
{&__pyx_kp_u_s_d_s, __pyx_k_s_d_s, sizeof(__pyx_k_s_d_s), 0, 1, 0, 0},
{&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1},
{&__pyx_n_s_util, __pyx_k_util, sizeof(__pyx_k_util), 0, 0, 1, 1},
{0, 0, 0, 0, 0, 0, 0}
};
static int __Pyx_InitCachedBuiltins(void) {
__pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
return 0;
__pyx_L1_error:;
return -1;
}
static int __Pyx_InitCachedConstants(void) {
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0);
__Pyx_RefNannyFinishContext();
return 0;
}
static int __Pyx_InitGlobals(void) {
if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
__pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
return 0;
__pyx_L1_error:;
return -1;
}
#if PY_MAJOR_VERSION < 3
PyMODINIT_FUNC initspacy(void); /*proto*/
PyMODINIT_FUNC initspacy(void)
#else
PyMODINIT_FUNC PyInit_spacy(void); /*proto*/
PyMODINIT_FUNC PyInit_spacy(void)
#endif
{
PyObject *__pyx_t_1 = NULL;
PyObject *__pyx_t_2 = NULL;
PyObject *__pyx_t_3 = NULL;
PyObject *__pyx_t_4 = NULL;
PyObject *__pyx_t_5 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannyDeclarations
#if CYTHON_REFNANNY
__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny");
if (!__Pyx_RefNanny) {
PyErr_Clear();
__Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny");
if (!__Pyx_RefNanny)
Py_FatalError("failed to import 'refnanny' module");
}
#endif
__Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_spacy(void)", 0);
if ( __Pyx_check_binary_version() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#ifdef __Pyx_CyFunction_USED
if (__Pyx_CyFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#endif
#ifdef __Pyx_FusedFunction_USED
if (__pyx_FusedFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#endif
#ifdef __Pyx_Generator_USED
if (__pyx_Generator_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#endif
/*--- Library function declarations ---*/
/*--- Threads initialization code ---*/
#if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS
#ifdef WITH_THREAD /* Python build with threading support? */
PyEval_InitThreads();
#endif
#endif
/*--- Module creation code ---*/
#if PY_MAJOR_VERSION < 3
__pyx_m = Py_InitModule4(__Pyx_NAMESTR("spacy"), __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m);
#else
__pyx_m = PyModule_Create(&__pyx_moduledef);
#endif
if (unlikely(!__pyx_m)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
Py_INCREF(__pyx_d);
__pyx_b = PyImport_AddModule(__Pyx_NAMESTR(__Pyx_BUILTIN_MODULE_NAME)); if (unlikely(!__pyx_b)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#if CYTHON_COMPILING_IN_PYPY
Py_INCREF(__pyx_b);
#endif
if (__Pyx_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
/*--- Initialize various global constants etc. ---*/
if (unlikely(__Pyx_InitGlobals() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
if (__Pyx_init_sys_getdefaultencoding_params() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
#endif
if (__pyx_module_is_main_spacy__spacy) {
if (__Pyx_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
}
#if PY_MAJOR_VERSION >= 3
{
PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
if (!PyDict_GetItemString(modules, "spacy.spacy")) {
if (unlikely(PyDict_SetItemString(modules, "spacy.spacy", __pyx_m) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
}
#endif
/*--- Builtin init code ---*/
if (unlikely(__Pyx_InitCachedBuiltins() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
/*--- Constants init code ---*/
if (unlikely(__Pyx_InitCachedConstants() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
/*--- Global init code ---*/
/*--- Variable export code ---*/
/*--- Function export code ---*/
if (__Pyx_ExportFunction("load_tokenization", (void (*)(void))__pyx_f_5spacy_5spacy_load_tokenization, "PyObject *(__pyx_t_5spacy_5spacy_Vocab &, PyObject *, PyObject *)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
if (__Pyx_ExportFunction("tokenize", (void (*)(void))__pyx_f_5spacy_5spacy_tokenize, "std::vector<__pyx_t_5spacy_5spacy_Lexeme_addr> (__pyx_t_5spacy_5spacy_Vocab &, PyObject *, __pyx_t_5spacy_5spacy_Splitter, PyObject *)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
if (__Pyx_ExportFunction("lookup", (void (*)(void))__pyx_f_5spacy_5spacy_lookup, "__pyx_t_5spacy_5spacy_Lexeme_addr (__pyx_t_5spacy_5spacy_Vocab &, PyObject *, __pyx_t_5spacy_5spacy_Splitter, int, PyObject *)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
if (__Pyx_ExportFunction("hash_string", (void (*)(void))__pyx_f_5spacy_5spacy_hash_string, "__pyx_t_5spacy_5spacy_StringHash (PyObject *, size_t)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
if (__Pyx_ExportFunction("unhash", (void (*)(void))__pyx_f_5spacy_5spacy_unhash, "PyObject *(PyObject *, __pyx_t_5spacy_5spacy_StringHash)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
if (__Pyx_ExportFunction("expand_chunk", (void (*)(void))__pyx_f_5spacy_5spacy_expand_chunk, "std::vector<size_t> (size_t, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
/*--- Type init code ---*/
/*--- Type import code ---*/
/*--- Variable import code ---*/
__pyx_t_1 = __Pyx_ImportModule("spacy.lexeme"); if (!__pyx_t_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
if (__Pyx_ImportVoidPtr(__pyx_t_1, "BLANK_WORD", (void **)&__pyx_vp_5spacy_6lexeme_BLANK_WORD, "struct __pyx_t_5spacy_6lexeme_Lexeme") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
Py_DECREF(__pyx_t_1); __pyx_t_1 = 0;
/*--- Function import code ---*/
__pyx_t_2 = __Pyx_ImportModule("spacy.lexeme"); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
if (__Pyx_ImportFunction(__pyx_t_2, "init_lexeme", (void (**)(void))&__pyx_f_5spacy_6lexeme_init_lexeme, "struct __pyx_t_5spacy_6lexeme_Lexeme *(__pyx_t_5spacy_5spacy_Vocab, PyObject *, __pyx_t_5spacy_5spacy_Splitter, PyObject *, __pyx_t_5spacy_6lexeme_StringHash, int, size_t)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
Py_DECREF(__pyx_t_2); __pyx_t_2 = 0;
__pyx_t_3 = __Pyx_ImportModule("spacy.string_tools"); if (!__pyx_t_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
if (__Pyx_ImportFunction(__pyx_t_3, "is_whitespace", (void (**)(void))&__pyx_f_5spacy_12string_tools_is_whitespace, "int (Py_UNICODE)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
Py_DECREF(__pyx_t_3); __pyx_t_3 = 0;
/*--- Execution code ---*/
/* "spacy/spacy.pyx":11
* from spacy.string_tools cimport is_whitespace
*
* from . import util # <<<<<<<<<<<<<<
*
*
*/
__pyx_t_4 = PyList_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_4);
__Pyx_INCREF(__pyx_n_s_util);
PyList_SET_ITEM(__pyx_t_4, 0, __pyx_n_s_util);
__Pyx_GIVEREF(__pyx_n_s_util);
__pyx_t_5 = __Pyx_Import(__pyx_n_s_, __pyx_t_4, 1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_5);
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
__pyx_t_4 = __Pyx_ImportFrom(__pyx_t_5, __pyx_n_s_util); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_4);
if (PyDict_SetItem(__pyx_d, __pyx_n_s_util, __pyx_t_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
__Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
/* "spacy/spacy.pyx":1
* from __future__ import unicode_literals # <<<<<<<<<<<<<<
*
* from ext.murmurhash cimport MurmurHash64A
*/
__pyx_t_5 = PyDict_New(); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_5);
if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
/* "vector.to_py":63
*
* @cname("__pyx_convert_vector_to_py_size_t")
* cdef object __pyx_convert_vector_to_py_size_t(vector[X]& v): # <<<<<<<<<<<<<<
* return [X_to_py(v[i]) for i in range(v.size())]
*
*/
goto __pyx_L0;
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_XDECREF(__pyx_t_2);
__Pyx_XDECREF(__pyx_t_3);
__Pyx_XDECREF(__pyx_t_4);
__Pyx_XDECREF(__pyx_t_5);
if (__pyx_m) {
__Pyx_AddTraceback("init spacy.spacy", __pyx_clineno, __pyx_lineno, __pyx_filename);
Py_DECREF(__pyx_m); __pyx_m = 0;
} else if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ImportError, "init spacy.spacy");
}
__pyx_L0:;
__Pyx_RefNannyFinishContext();
#if PY_MAJOR_VERSION < 3
return;
#else
return __pyx_m;
#endif
}
/* Runtime support code */
#if CYTHON_REFNANNY
static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) {
PyObject *m = NULL, *p = NULL;
void *r = NULL;
m = PyImport_ImportModule((char *)modname);
if (!m) goto end;
p = PyObject_GetAttrString(m, (char *)"RefNannyAPI");
if (!p) goto end;
r = PyLong_AsVoidPtr(p);
end:
Py_XDECREF(p);
Py_XDECREF(m);
return (__Pyx_RefNannyAPIStruct *)r;
}
#endif /* CYTHON_REFNANNY */
static PyObject *__Pyx_GetBuiltinName(PyObject *name) {
PyObject* result = __Pyx_PyObject_GetAttrStr(__pyx_b, name);
if (unlikely(!result)) {
PyErr_Format(PyExc_NameError,
#if PY_MAJOR_VERSION >= 3
"name '%U' is not defined", name);
#else
"name '%.200s' is not defined", PyString_AS_STRING(name));
#endif
}
return result;
}
static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) {
PyErr_Format(PyExc_ValueError,
"too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected);
}
static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) {
PyErr_Format(PyExc_ValueError,
"need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack",
index, (index == 1) ? "" : "s");
}
static CYTHON_INLINE int __Pyx_IterFinish(void) {
#if CYTHON_COMPILING_IN_CPYTHON
PyThreadState *tstate = PyThreadState_GET();
PyObject* exc_type = tstate->curexc_type;
if (unlikely(exc_type)) {
if (likely(exc_type == PyExc_StopIteration) || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration)) {
PyObject *exc_value, *exc_tb;
exc_value = tstate->curexc_value;
exc_tb = tstate->curexc_traceback;
tstate->curexc_type = 0;
tstate->curexc_value = 0;
tstate->curexc_traceback = 0;
Py_DECREF(exc_type);
Py_XDECREF(exc_value);
Py_XDECREF(exc_tb);
return 0;
} else {
return -1;
}
}
return 0;
#else
if (unlikely(PyErr_Occurred())) {
if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) {
PyErr_Clear();
return 0;
} else {
return -1;
}
}
return 0;
#endif
}
static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected) {
if (unlikely(retval)) {
Py_DECREF(retval);
__Pyx_RaiseTooManyValuesError(expected);
return -1;
} else {
return __Pyx_IterFinish();
}
return 0;
}
static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) {
#if CYTHON_COMPILING_IN_PYPY
return PyObject_RichCompareBool(s1, s2, equals);
#else
if (s1 == s2) {
return (equals == Py_EQ);
} else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) {
const char *ps1, *ps2;
Py_ssize_t length = PyBytes_GET_SIZE(s1);
if (length != PyBytes_GET_SIZE(s2))
return (equals == Py_NE);
ps1 = PyBytes_AS_STRING(s1);
ps2 = PyBytes_AS_STRING(s2);
if (ps1[0] != ps2[0]) {
return (equals == Py_NE);
} else if (length == 1) {
return (equals == Py_EQ);
} else {
int result = memcmp(ps1, ps2, (size_t)length);
return (equals == Py_EQ) ? (result == 0) : (result != 0);
}
} else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) {
return (equals == Py_NE);
} else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) {
return (equals == Py_NE);
} else {
int result;
PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
if (!py_result)
return -1;
result = __Pyx_PyObject_IsTrue(py_result);
Py_DECREF(py_result);
return result;
}
#endif
}
static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) {
#if CYTHON_COMPILING_IN_PYPY
return PyObject_RichCompareBool(s1, s2, equals);
#else
#if PY_MAJOR_VERSION < 3
PyObject* owned_ref = NULL;
#endif
int s1_is_unicode, s2_is_unicode;
if (s1 == s2) {
goto return_eq;
}
s1_is_unicode = PyUnicode_CheckExact(s1);
s2_is_unicode = PyUnicode_CheckExact(s2);
#if PY_MAJOR_VERSION < 3
if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) {
owned_ref = PyUnicode_FromObject(s2);
if (unlikely(!owned_ref))
return -1;
s2 = owned_ref;
s2_is_unicode = 1;
} else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) {
owned_ref = PyUnicode_FromObject(s1);
if (unlikely(!owned_ref))
return -1;
s1 = owned_ref;
s1_is_unicode = 1;
} else if (((!s2_is_unicode) & (!s1_is_unicode))) {
return __Pyx_PyBytes_Equals(s1, s2, equals);
}
#endif
if (s1_is_unicode & s2_is_unicode) {
Py_ssize_t length;
int kind;
void *data1, *data2;
#if CYTHON_PEP393_ENABLED
if (unlikely(PyUnicode_READY(s1) < 0) || unlikely(PyUnicode_READY(s2) < 0))
return -1;
#endif
length = __Pyx_PyUnicode_GET_LENGTH(s1);
if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) {
goto return_ne;
}
kind = __Pyx_PyUnicode_KIND(s1);
if (kind != __Pyx_PyUnicode_KIND(s2)) {
goto return_ne;
}
data1 = __Pyx_PyUnicode_DATA(s1);
data2 = __Pyx_PyUnicode_DATA(s2);
if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) {
goto return_ne;
} else if (length == 1) {
goto return_eq;
} else {
int result = memcmp(data1, data2, length * kind);
#if PY_MAJOR_VERSION < 3
Py_XDECREF(owned_ref);
#endif
return (equals == Py_EQ) ? (result == 0) : (result != 0);
}
} else if ((s1 == Py_None) & s2_is_unicode) {
goto return_ne;
} else if ((s2 == Py_None) & s1_is_unicode) {
goto return_ne;
} else {
int result;
PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
if (!py_result)
return -1;
result = __Pyx_PyObject_IsTrue(py_result);
Py_DECREF(py_result);
return result;
}
return_eq:
#if PY_MAJOR_VERSION < 3
Py_XDECREF(owned_ref);
#endif
return (equals == Py_EQ);
return_ne:
#if PY_MAJOR_VERSION < 3
Py_XDECREF(owned_ref);
#endif
return (equals == Py_NE);
#endif
}
static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) {
PyObject* value = __Pyx_PyObject_GetAttrStr(module, name);
if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) {
PyErr_Format(PyExc_ImportError,
#if PY_MAJOR_VERSION < 3
"cannot import name %.230s", PyString_AS_STRING(name));
#else
"cannot import name %S", name);
#endif
}
return value;
}
static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) {
PyObject *empty_list = 0;
PyObject *module = 0;
PyObject *global_dict = 0;
PyObject *empty_dict = 0;
PyObject *list;
#if PY_VERSION_HEX < 0x03030000
PyObject *py_import;
py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import);
if (!py_import)
goto bad;
#endif
if (from_list)
list = from_list;
else {
empty_list = PyList_New(0);
if (!empty_list)
goto bad;
list = empty_list;
}
global_dict = PyModule_GetDict(__pyx_m);
if (!global_dict)
goto bad;
empty_dict = PyDict_New();
if (!empty_dict)
goto bad;
#if PY_VERSION_HEX >= 0x02050000
{
#if PY_MAJOR_VERSION >= 3
if (level == -1) {
if (strchr(__Pyx_MODULE_NAME, '.')) {
#if PY_VERSION_HEX < 0x03030000
PyObject *py_level = PyInt_FromLong(1);
if (!py_level)
goto bad;
module = PyObject_CallFunctionObjArgs(py_import,
name, global_dict, empty_dict, list, py_level, NULL);
Py_DECREF(py_level);
#else
module = PyImport_ImportModuleLevelObject(
name, global_dict, empty_dict, list, 1);
#endif
if (!module) {
if (!PyErr_ExceptionMatches(PyExc_ImportError))
goto bad;
PyErr_Clear();
}
}
level = 0; /* try absolute import on failure */
}
#endif
if (!module) {
#if PY_VERSION_HEX < 0x03030000
PyObject *py_level = PyInt_FromLong(level);
if (!py_level)
goto bad;
module = PyObject_CallFunctionObjArgs(py_import,
name, global_dict, empty_dict, list, py_level, NULL);
Py_DECREF(py_level);
#else
module = PyImport_ImportModuleLevelObject(
name, global_dict, empty_dict, list, level);
#endif
}
}
#else
if (level>0) {
PyErr_SetString(PyExc_RuntimeError, "Relative import is not supported for Python <=2.4.");
goto bad;
}
module = PyObject_CallFunctionObjArgs(py_import,
name, global_dict, empty_dict, list, NULL);
#endif
bad:
#if PY_VERSION_HEX < 0x03030000
Py_XDECREF(py_import);
#endif
Py_XDECREF(empty_list);
Py_XDECREF(empty_dict);
return module;
}
#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func) \
{ \
func_type value = func(x); \
if (sizeof(target_type) < sizeof(func_type)) { \
if (unlikely(value != (func_type) (target_type) value)) { \
func_type zero = 0; \
PyErr_SetString(PyExc_OverflowError, \
(is_unsigned && unlikely(value < zero)) ? \
"can't convert negative value to " #target_type : \
"value too large to convert to " #target_type); \
return (target_type) -1; \
} \
} \
return (target_type) value; \
}
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
#if CYTHON_USE_PYLONG_INTERNALS
#include "longintrepr.h"
#endif
#endif
static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *x) {
const size_t neg_one = (size_t) -1, const_zero = 0;
const int is_unsigned = neg_one > const_zero;
#if PY_MAJOR_VERSION < 3
if (likely(PyInt_Check(x))) {
if (sizeof(size_t) < sizeof(long)) {
__PYX_VERIFY_RETURN_INT(size_t, long, PyInt_AS_LONG)
} else {
long val = PyInt_AS_LONG(x);
if (is_unsigned && unlikely(val < 0)) {
PyErr_SetString(PyExc_OverflowError,
"can't convert negative value to size_t");
return (size_t) -1;
}
return (size_t) val;
}
} else
#endif
if (likely(PyLong_Check(x))) {
if (is_unsigned) {
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
#if CYTHON_USE_PYLONG_INTERNALS
if (sizeof(digit) <= sizeof(size_t)) {
switch (Py_SIZE(x)) {
case 0: return 0;
case 1: return (size_t) ((PyLongObject*)x)->ob_digit[0];
}
}
#endif
#endif
if (unlikely(Py_SIZE(x) < 0)) {
PyErr_SetString(PyExc_OverflowError,
"can't convert negative value to size_t");
return (size_t) -1;
}
if (sizeof(size_t) <= sizeof(unsigned long)) {
__PYX_VERIFY_RETURN_INT(size_t, unsigned long, PyLong_AsUnsignedLong)
} else if (sizeof(size_t) <= sizeof(unsigned long long)) {
__PYX_VERIFY_RETURN_INT(size_t, unsigned long long, PyLong_AsUnsignedLongLong)
}
} else {
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
#if CYTHON_USE_PYLONG_INTERNALS
if (sizeof(digit) <= sizeof(size_t)) {
switch (Py_SIZE(x)) {
case 0: return 0;
case 1: return +(size_t) ((PyLongObject*)x)->ob_digit[0];
case -1: return -(size_t) ((PyLongObject*)x)->ob_digit[0];
}
}
#endif
#endif
if (sizeof(size_t) <= sizeof(long)) {
__PYX_VERIFY_RETURN_INT(size_t, long, PyLong_AsLong)
} else if (sizeof(size_t) <= sizeof(long long)) {
__PYX_VERIFY_RETURN_INT(size_t, long long, PyLong_AsLongLong)
}
}
{
#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
PyErr_SetString(PyExc_RuntimeError,
"_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
#else
size_t val;
PyObject *v = __Pyx_PyNumber_Int(x);
#if PY_MAJOR_VERSION < 3
if (likely(v) && !PyLong_Check(v)) {
PyObject *tmp = v;
v = PyNumber_Long(tmp);
Py_DECREF(tmp);
}
#endif
if (likely(v)) {
int one = 1; int is_little = (int)*(unsigned char *)&one;
unsigned char *bytes = (unsigned char *)&val;
int ret = _PyLong_AsByteArray((PyLongObject *)v,
bytes, sizeof(val),
is_little, !is_unsigned);
Py_DECREF(v);
if (likely(!ret))
return val;
}
#endif
return (size_t) -1;
}
} else {
size_t val;
PyObject *tmp = __Pyx_PyNumber_Int(x);
if (!tmp) return (size_t) -1;
val = __Pyx_PyInt_As_size_t(tmp);
Py_DECREF(tmp);
return val;
}
}
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_uint64_t(uint64_t value) {
const uint64_t neg_one = (uint64_t) -1, const_zero = 0;
const int is_unsigned = neg_one > const_zero;
if (is_unsigned) {
if (sizeof(uint64_t) < sizeof(long)) {
return PyInt_FromLong((long) value);
} else if (sizeof(uint64_t) <= sizeof(unsigned long)) {
return PyLong_FromUnsignedLong((unsigned long) value);
} else if (sizeof(uint64_t) <= sizeof(unsigned long long)) {
return PyLong_FromUnsignedLongLong((unsigned long long) value);
}
} else {
if (sizeof(uint64_t) <= sizeof(long)) {
return PyInt_FromLong((long) value);
} else if (sizeof(uint64_t) <= sizeof(long long)) {
return PyLong_FromLongLong((long long) value);
}
}
{
int one = 1; int little = (int)*(unsigned char *)&one;
unsigned char *bytes = (unsigned char *)&value;
return _PyLong_FromByteArray(bytes, sizeof(uint64_t),
little, !is_unsigned);
}
}
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
const long neg_one = (long) -1, const_zero = 0;
const int is_unsigned = neg_one > const_zero;
if (is_unsigned) {
if (sizeof(long) < sizeof(long)) {
return PyInt_FromLong((long) value);
} else if (sizeof(long) <= sizeof(unsigned long)) {
return PyLong_FromUnsignedLong((unsigned long) value);
} else if (sizeof(long) <= sizeof(unsigned long long)) {
return PyLong_FromUnsignedLongLong((unsigned long long) value);
}
} else {
if (sizeof(long) <= sizeof(long)) {
return PyInt_FromLong((long) value);
} else if (sizeof(long) <= sizeof(long long)) {
return PyLong_FromLongLong((long long) value);
}
}
{
int one = 1; int little = (int)*(unsigned char *)&one;
unsigned char *bytes = (unsigned char *)&value;
return _PyLong_FromByteArray(bytes, sizeof(long),
little, !is_unsigned);
}
}
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
#if CYTHON_USE_PYLONG_INTERNALS
#include "longintrepr.h"
#endif
#endif
static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) {
const long neg_one = (long) -1, const_zero = 0;
const int is_unsigned = neg_one > const_zero;
#if PY_MAJOR_VERSION < 3
if (likely(PyInt_Check(x))) {
if (sizeof(long) < sizeof(long)) {
__PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG)
} else {
long val = PyInt_AS_LONG(x);
if (is_unsigned && unlikely(val < 0)) {
PyErr_SetString(PyExc_OverflowError,
"can't convert negative value to long");
return (long) -1;
}
return (long) val;
}
} else
#endif
if (likely(PyLong_Check(x))) {
if (is_unsigned) {
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
#if CYTHON_USE_PYLONG_INTERNALS
if (sizeof(digit) <= sizeof(long)) {
switch (Py_SIZE(x)) {
case 0: return 0;
case 1: return (long) ((PyLongObject*)x)->ob_digit[0];
}
}
#endif
#endif
if (unlikely(Py_SIZE(x) < 0)) {
PyErr_SetString(PyExc_OverflowError,
"can't convert negative value to long");
return (long) -1;
}
if (sizeof(long) <= sizeof(unsigned long)) {
__PYX_VERIFY_RETURN_INT(long, unsigned long, PyLong_AsUnsignedLong)
} else if (sizeof(long) <= sizeof(unsigned long long)) {
__PYX_VERIFY_RETURN_INT(long, unsigned long long, PyLong_AsUnsignedLongLong)
}
} else {
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
#if CYTHON_USE_PYLONG_INTERNALS
if (sizeof(digit) <= sizeof(long)) {
switch (Py_SIZE(x)) {
case 0: return 0;
case 1: return +(long) ((PyLongObject*)x)->ob_digit[0];
case -1: return -(long) ((PyLongObject*)x)->ob_digit[0];
}
}
#endif
#endif
if (sizeof(long) <= sizeof(long)) {
__PYX_VERIFY_RETURN_INT(long, long, PyLong_AsLong)
} else if (sizeof(long) <= sizeof(long long)) {
__PYX_VERIFY_RETURN_INT(long, long long, PyLong_AsLongLong)
}
}
{
#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
PyErr_SetString(PyExc_RuntimeError,
"_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
#else
long val;
PyObject *v = __Pyx_PyNumber_Int(x);
#if PY_MAJOR_VERSION < 3
if (likely(v) && !PyLong_Check(v)) {
PyObject *tmp = v;
v = PyNumber_Long(tmp);
Py_DECREF(tmp);
}
#endif
if (likely(v)) {
int one = 1; int is_little = (int)*(unsigned char *)&one;
unsigned char *bytes = (unsigned char *)&val;
int ret = _PyLong_AsByteArray((PyLongObject *)v,
bytes, sizeof(val),
is_little, !is_unsigned);
Py_DECREF(v);
if (likely(!ret))
return val;
}
#endif
return (long) -1;
}
} else {
long val;
PyObject *tmp = __Pyx_PyNumber_Int(x);
if (!tmp) return (long) -1;
val = __Pyx_PyInt_As_long(tmp);
Py_DECREF(tmp);
return val;
}
}
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
#if CYTHON_USE_PYLONG_INTERNALS
#include "longintrepr.h"
#endif
#endif
static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) {
const int neg_one = (int) -1, const_zero = 0;
const int is_unsigned = neg_one > const_zero;
#if PY_MAJOR_VERSION < 3
if (likely(PyInt_Check(x))) {
if (sizeof(int) < sizeof(long)) {
__PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG)
} else {
long val = PyInt_AS_LONG(x);
if (is_unsigned && unlikely(val < 0)) {
PyErr_SetString(PyExc_OverflowError,
"can't convert negative value to int");
return (int) -1;
}
return (int) val;
}
} else
#endif
if (likely(PyLong_Check(x))) {
if (is_unsigned) {
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
#if CYTHON_USE_PYLONG_INTERNALS
if (sizeof(digit) <= sizeof(int)) {
switch (Py_SIZE(x)) {
case 0: return 0;
case 1: return (int) ((PyLongObject*)x)->ob_digit[0];
}
}
#endif
#endif
if (unlikely(Py_SIZE(x) < 0)) {
PyErr_SetString(PyExc_OverflowError,
"can't convert negative value to int");
return (int) -1;
}
if (sizeof(int) <= sizeof(unsigned long)) {
__PYX_VERIFY_RETURN_INT(int, unsigned long, PyLong_AsUnsignedLong)
} else if (sizeof(int) <= sizeof(unsigned long long)) {
__PYX_VERIFY_RETURN_INT(int, unsigned long long, PyLong_AsUnsignedLongLong)
}
} else {
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
#if CYTHON_USE_PYLONG_INTERNALS
if (sizeof(digit) <= sizeof(int)) {
switch (Py_SIZE(x)) {
case 0: return 0;
case 1: return +(int) ((PyLongObject*)x)->ob_digit[0];
case -1: return -(int) ((PyLongObject*)x)->ob_digit[0];
}
}
#endif
#endif
if (sizeof(int) <= sizeof(long)) {
__PYX_VERIFY_RETURN_INT(int, long, PyLong_AsLong)
} else if (sizeof(int) <= sizeof(long long)) {
__PYX_VERIFY_RETURN_INT(int, long long, PyLong_AsLongLong)
}
}
{
#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
PyErr_SetString(PyExc_RuntimeError,
"_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
#else
int val;
PyObject *v = __Pyx_PyNumber_Int(x);
#if PY_MAJOR_VERSION < 3
if (likely(v) && !PyLong_Check(v)) {
PyObject *tmp = v;
v = PyNumber_Long(tmp);
Py_DECREF(tmp);
}
#endif
if (likely(v)) {
int one = 1; int is_little = (int)*(unsigned char *)&one;
unsigned char *bytes = (unsigned char *)&val;
int ret = _PyLong_AsByteArray((PyLongObject *)v,
bytes, sizeof(val),
is_little, !is_unsigned);
Py_DECREF(v);
if (likely(!ret))
return val;
}
#endif
return (int) -1;
}
} else {
int val;
PyObject *tmp = __Pyx_PyNumber_Int(x);
if (!tmp) return (int) -1;
val = __Pyx_PyInt_As_int(tmp);
Py_DECREF(tmp);
return val;
}
}
static int __Pyx_check_binary_version(void) {
char ctversion[4], rtversion[4];
PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION);
PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion());
if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) {
char message[200];
PyOS_snprintf(message, sizeof(message),
"compiletime version %s of module '%.100s' "
"does not match runtime version %s",
ctversion, __Pyx_MODULE_NAME, rtversion);
#if PY_VERSION_HEX < 0x02050000
return PyErr_Warn(NULL, message);
#else
return PyErr_WarnEx(NULL, message, 1);
#endif
}
return 0;
}
static int __Pyx_ExportFunction(const char *name, void (*f)(void), const char *sig) {
PyObject *d = 0;
PyObject *cobj = 0;
union {
void (*fp)(void);
void *p;
} tmp;
d = PyObject_GetAttrString(__pyx_m, (char *)"__pyx_capi__");
if (!d) {
PyErr_Clear();
d = PyDict_New();
if (!d)
goto bad;
Py_INCREF(d);
if (PyModule_AddObject(__pyx_m, (char *)"__pyx_capi__", d) < 0)
goto bad;
}
tmp.fp = f;
#if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION==3&&PY_MINOR_VERSION==0)
cobj = PyCapsule_New(tmp.p, sig, 0);
#else
cobj = PyCObject_FromVoidPtrAndDesc(tmp.p, (void *)sig, 0);
#endif
if (!cobj)
goto bad;
if (PyDict_SetItemString(d, name, cobj) < 0)
goto bad;
Py_DECREF(cobj);
Py_DECREF(d);
return 0;
bad:
Py_XDECREF(cobj);
Py_XDECREF(d);
return -1;
}
#ifndef __PYX_HAVE_RT_ImportModule
#define __PYX_HAVE_RT_ImportModule
static PyObject *__Pyx_ImportModule(const char *name) {
PyObject *py_name = 0;
PyObject *py_module = 0;
py_name = __Pyx_PyIdentifier_FromString(name);
if (!py_name)
goto bad;
py_module = PyImport_Import(py_name);
Py_DECREF(py_name);
return py_module;
bad:
Py_XDECREF(py_name);
return 0;
}
#endif
#ifndef __PYX_HAVE_RT_ImportVoidPtr
#define __PYX_HAVE_RT_ImportVoidPtr
static int __Pyx_ImportVoidPtr(PyObject *module, const char *name, void **p, const char *sig) {
PyObject *d = 0;
PyObject *cobj = 0;
d = PyObject_GetAttrString(module, (char *)"__pyx_capi__");
if (!d)
goto bad;
cobj = PyDict_GetItemString(d, name);
if (!cobj) {
PyErr_Format(PyExc_ImportError,
"%.200s does not export expected C variable %.200s",
PyModule_GetName(module), name);
goto bad;
}
#if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION==3 && PY_MINOR_VERSION==0)
if (!PyCapsule_IsValid(cobj, sig)) {
PyErr_Format(PyExc_TypeError,
"C variable %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
PyModule_GetName(module), name, sig, PyCapsule_GetName(cobj));
goto bad;
}
*p = PyCapsule_GetPointer(cobj, sig);
#else
{const char *desc, *s1, *s2;
desc = (const char *)PyCObject_GetDesc(cobj);
if (!desc)
goto bad;
s1 = desc; s2 = sig;
while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; }
if (*s1 != *s2) {
PyErr_Format(PyExc_TypeError,
"C variable %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
PyModule_GetName(module), name, sig, desc);
goto bad;
}
*p = PyCObject_AsVoidPtr(cobj);}
#endif
if (!(*p))
goto bad;
Py_DECREF(d);
return 0;
bad:
Py_XDECREF(d);
return -1;
}
#endif
#ifndef __PYX_HAVE_RT_ImportFunction
#define __PYX_HAVE_RT_ImportFunction
static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
PyObject *d = 0;
PyObject *cobj = 0;
union {
void (*fp)(void);
void *p;
} tmp;
d = PyObject_GetAttrString(module, (char *)"__pyx_capi__");
if (!d)
goto bad;
cobj = PyDict_GetItemString(d, funcname);
if (!cobj) {
PyErr_Format(PyExc_ImportError,
"%.200s does not export expected C function %.200s",
PyModule_GetName(module), funcname);
goto bad;
}
#if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION==3 && PY_MINOR_VERSION==0)
if (!PyCapsule_IsValid(cobj, sig)) {
PyErr_Format(PyExc_TypeError,
"C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj));
goto bad;
}
tmp.p = PyCapsule_GetPointer(cobj, sig);
#else
{const char *desc, *s1, *s2;
desc = (const char *)PyCObject_GetDesc(cobj);
if (!desc)
goto bad;
s1 = desc; s2 = sig;
while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; }
if (*s1 != *s2) {
PyErr_Format(PyExc_TypeError,
"C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
PyModule_GetName(module), funcname, sig, desc);
goto bad;
}
tmp.p = PyCObject_AsVoidPtr(cobj);}
#endif
*f = tmp.fp;
if (!(*f))
goto bad;
Py_DECREF(d);
return 0;
bad:
Py_XDECREF(d);
return -1;
}
#endif
static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) {
int start = 0, mid = 0, end = count - 1;
if (end >= 0 && code_line > entries[end].code_line) {
return count;
}
while (start < end) {
mid = (start + end) / 2;
if (code_line < entries[mid].code_line) {
end = mid;
} else if (code_line > entries[mid].code_line) {
start = mid + 1;
} else {
return mid;
}
}
if (code_line <= entries[mid].code_line) {
return mid;
} else {
return mid + 1;
}
}
static PyCodeObject *__pyx_find_code_object(int code_line) {
PyCodeObject* code_object;
int pos;
if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) {
return NULL;
}
pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) {
return NULL;
}
code_object = __pyx_code_cache.entries[pos].code_object;
Py_INCREF(code_object);
return code_object;
}
static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
int pos, i;
__Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries;
if (unlikely(!code_line)) {
return;
}
if (unlikely(!entries)) {
entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry));
if (likely(entries)) {
__pyx_code_cache.entries = entries;
__pyx_code_cache.max_count = 64;
__pyx_code_cache.count = 1;
entries[0].code_line = code_line;
entries[0].code_object = code_object;
Py_INCREF(code_object);
}
return;
}
pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) {
PyCodeObject* tmp = entries[pos].code_object;
entries[pos].code_object = code_object;
Py_DECREF(tmp);
return;
}
if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
int new_max = __pyx_code_cache.max_count + 64;
entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
__pyx_code_cache.entries, new_max*sizeof(__Pyx_CodeObjectCacheEntry));
if (unlikely(!entries)) {
return;
}
__pyx_code_cache.entries = entries;
__pyx_code_cache.max_count = new_max;
}
for (i=__pyx_code_cache.count; i>pos; i--) {
entries[i] = entries[i-1];
}
entries[pos].code_line = code_line;
entries[pos].code_object = code_object;
__pyx_code_cache.count++;
Py_INCREF(code_object);
}
#include "compile.h"
#include "frameobject.h"
#include "traceback.h"
static PyCodeObject* __Pyx_CreateCodeObjectForTraceback(
const char *funcname, int c_line,
int py_line, const char *filename) {
PyCodeObject *py_code = 0;
PyObject *py_srcfile = 0;
PyObject *py_funcname = 0;
#if PY_MAJOR_VERSION < 3
py_srcfile = PyString_FromString(filename);
#else
py_srcfile = PyUnicode_FromString(filename);
#endif
if (!py_srcfile) goto bad;
if (c_line) {
#if PY_MAJOR_VERSION < 3
py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
#else
py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
#endif
}
else {
#if PY_MAJOR_VERSION < 3
py_funcname = PyString_FromString(funcname);
#else
py_funcname = PyUnicode_FromString(funcname);
#endif
}
if (!py_funcname) goto bad;
py_code = __Pyx_PyCode_New(
0, /*int argcount,*/
0, /*int kwonlyargcount,*/
0, /*int nlocals,*/
0, /*int stacksize,*/
0, /*int flags,*/
__pyx_empty_bytes, /*PyObject *code,*/
__pyx_empty_tuple, /*PyObject *consts,*/
__pyx_empty_tuple, /*PyObject *names,*/
__pyx_empty_tuple, /*PyObject *varnames,*/
__pyx_empty_tuple, /*PyObject *freevars,*/
__pyx_empty_tuple, /*PyObject *cellvars,*/
py_srcfile, /*PyObject *filename,*/
py_funcname, /*PyObject *name,*/
py_line, /*int firstlineno,*/
__pyx_empty_bytes /*PyObject *lnotab*/
);
Py_DECREF(py_srcfile);
Py_DECREF(py_funcname);
return py_code;
bad:
Py_XDECREF(py_srcfile);
Py_XDECREF(py_funcname);
return NULL;
}
static void __Pyx_AddTraceback(const char *funcname, int c_line,
int py_line, const char *filename) {
PyCodeObject *py_code = 0;
PyObject *py_globals = 0;
PyFrameObject *py_frame = 0;
py_code = __pyx_find_code_object(c_line ? c_line : py_line);
if (!py_code) {
py_code = __Pyx_CreateCodeObjectForTraceback(
funcname, c_line, py_line, filename);
if (!py_code) goto bad;
__pyx_insert_code_object(c_line ? c_line : py_line, py_code);
}
py_globals = PyModule_GetDict(__pyx_m);
if (!py_globals) goto bad;
py_frame = PyFrame_New(
PyThreadState_GET(), /*PyThreadState *tstate,*/
py_code, /*PyCodeObject *code,*/
py_globals, /*PyObject *globals,*/
0 /*PyObject *locals*/
);
if (!py_frame) goto bad;
py_frame->f_lineno = py_line;
PyTraceBack_Here(py_frame);
bad:
Py_XDECREF(py_code);
Py_XDECREF(py_frame);
}
static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
while (t->p) {
#if PY_MAJOR_VERSION < 3
if (t->is_unicode) {
*t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
} else if (t->intern) {
*t->p = PyString_InternFromString(t->s);
} else {
*t->p = PyString_FromStringAndSize(t->s, t->n - 1);
}
#else /* Python 3+ has unicode identifiers */
if (t->is_unicode | t->is_str) {
if (t->intern) {
*t->p = PyUnicode_InternFromString(t->s);
} else if (t->encoding) {
*t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);
} else {
*t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
}
} else {
*t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);
}
#endif
if (!*t->p)
return -1;
++t;
}
return 0;
}
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char* c_str) {
return __Pyx_PyUnicode_FromStringAndSize(c_str, strlen(c_str));
}
static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) {
Py_ssize_t ignore;
return __Pyx_PyObject_AsStringAndSize(o, &ignore);
}
static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
if (
#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
__Pyx_sys_getdefaultencoding_not_ascii &&
#endif
PyUnicode_Check(o)) {
#if PY_VERSION_HEX < 0x03030000
char* defenc_c;
PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL);
if (!defenc) return NULL;
defenc_c = PyBytes_AS_STRING(defenc);
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
{
char* end = defenc_c + PyBytes_GET_SIZE(defenc);
char* c;
for (c = defenc_c; c < end; c++) {
if ((unsigned char) (*c) >= 128) {
PyUnicode_AsASCIIString(o);
return NULL;
}
}
}
#endif /*__PYX_DEFAULT_STRING_ENCODING_IS_ASCII*/
*length = PyBytes_GET_SIZE(defenc);
return defenc_c;
#else /* PY_VERSION_HEX < 0x03030000 */
if (PyUnicode_READY(o) == -1) return NULL;
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
if (PyUnicode_IS_ASCII(o)) {
*length = PyUnicode_GET_DATA_SIZE(o);
return PyUnicode_AsUTF8(o);
} else {
PyUnicode_AsASCIIString(o);
return NULL;
}
#else /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
return PyUnicode_AsUTF8AndSize(o, length);
#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
#endif /* PY_VERSION_HEX < 0x03030000 */
} else
#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT */
#if !CYTHON_COMPILING_IN_PYPY
#if PY_VERSION_HEX >= 0x02060000
if (PyByteArray_Check(o)) {
*length = PyByteArray_GET_SIZE(o);
return PyByteArray_AS_STRING(o);
} else
#endif
#endif
{
char* result;
int r = PyBytes_AsStringAndSize(o, &result, length);
if (unlikely(r < 0)) {
return NULL;
} else {
return result;
}
}
}
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
int is_true = x == Py_True;
if (is_true | (x == Py_False) | (x == Py_None)) return is_true;
else return PyObject_IsTrue(x);
}
static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {
PyNumberMethods *m;
const char *name = NULL;
PyObject *res = NULL;
#if PY_MAJOR_VERSION < 3
if (PyInt_Check(x) || PyLong_Check(x))
#else
if (PyLong_Check(x))
#endif
return Py_INCREF(x), x;
m = Py_TYPE(x)->tp_as_number;
#if PY_MAJOR_VERSION < 3
if (m && m->nb_int) {
name = "int";
res = PyNumber_Int(x);
}
else if (m && m->nb_long) {
name = "long";
res = PyNumber_Long(x);
}
#else
if (m && m->nb_int) {
name = "int";
res = PyNumber_Long(x);
}
#endif
if (res) {
#if PY_MAJOR_VERSION < 3
if (!PyInt_Check(res) && !PyLong_Check(res)) {
#else
if (!PyLong_Check(res)) {
#endif
PyErr_Format(PyExc_TypeError,
"__%.4s__ returned non-%.4s (type %.200s)",
name, name, Py_TYPE(res)->tp_name);
Py_DECREF(res);
return NULL;
}
}
else if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_TypeError,
"an integer is required");
}
return res;
}
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
#if CYTHON_USE_PYLONG_INTERNALS
#include "longintrepr.h"
#endif
#endif
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
Py_ssize_t ival;
PyObject *x;
#if PY_MAJOR_VERSION < 3
if (likely(PyInt_CheckExact(b)))
return PyInt_AS_LONG(b);
#endif
if (likely(PyLong_CheckExact(b))) {
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
#if CYTHON_USE_PYLONG_INTERNALS
switch (Py_SIZE(b)) {
case -1: return -(sdigit)((PyLongObject*)b)->ob_digit[0];
case 0: return 0;
case 1: return ((PyLongObject*)b)->ob_digit[0];
}
#endif
#endif
#if PY_VERSION_HEX < 0x02060000
return PyInt_AsSsize_t(b);
#else
return PyLong_AsSsize_t(b);
#endif
}
x = PyNumber_Index(b);
if (!x) return -1;
ival = PyInt_AsSsize_t(x);
Py_DECREF(x);
return ival;
}
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
#if PY_VERSION_HEX < 0x02050000
if (ival <= LONG_MAX)
return PyInt_FromLong((long)ival);
else {
unsigned char *bytes = (unsigned char *) &ival;
int one = 1; int little = (int)*(unsigned char*)&one;
return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0);
}
#else
return PyInt_FromSize_t(ival);
#endif
}
#endif /* Py_PYTHON_H */