/* Generated by Cython 0.20.1 on Sat Jul 5 20:44:26 2014 */ #define PY_SSIZE_T_CLEAN #ifndef CYTHON_USE_PYLONG_INTERNALS #ifdef PYLONG_BITS_IN_DIGIT #define CYTHON_USE_PYLONG_INTERNALS 0 #else #include "pyconfig.h" #ifdef PYLONG_BITS_IN_DIGIT #define CYTHON_USE_PYLONG_INTERNALS 1 #else #define CYTHON_USE_PYLONG_INTERNALS 0 #endif #endif #endif #include "Python.h" #ifndef Py_PYTHON_H #error Python headers needed to compile C extensions, please install development version of Python. #elif PY_VERSION_HEX < 0x02040000 #error Cython requires Python 2.4+. #else #define CYTHON_ABI "0_20_1" #include /* For offsetof */ #ifndef offsetof #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) #endif #if !defined(WIN32) && !defined(MS_WINDOWS) #ifndef __stdcall #define __stdcall #endif #ifndef __cdecl #define __cdecl #endif #ifndef __fastcall #define __fastcall #endif #endif #ifndef DL_IMPORT #define DL_IMPORT(t) t #endif #ifndef DL_EXPORT #define DL_EXPORT(t) t #endif #ifndef PY_LONG_LONG #define PY_LONG_LONG LONG_LONG #endif #ifndef Py_HUGE_VAL #define Py_HUGE_VAL HUGE_VAL #endif #ifdef PYPY_VERSION #define CYTHON_COMPILING_IN_PYPY 1 #define CYTHON_COMPILING_IN_CPYTHON 0 #else #define CYTHON_COMPILING_IN_PYPY 0 #define CYTHON_COMPILING_IN_CPYTHON 1 #endif #if CYTHON_COMPILING_IN_PYPY #define Py_OptimizeFlag 0 #endif #if PY_VERSION_HEX < 0x02050000 typedef int Py_ssize_t; #define PY_SSIZE_T_MAX INT_MAX #define PY_SSIZE_T_MIN INT_MIN #define PY_FORMAT_SIZE_T "" #define CYTHON_FORMAT_SSIZE_T "" #define PyInt_FromSsize_t(z) PyInt_FromLong(z) #define PyInt_AsSsize_t(o) __Pyx_PyInt_As_int(o) #define PyNumber_Index(o) ((PyNumber_Check(o) && !PyFloat_Check(o)) ? PyNumber_Int(o) : \ (PyErr_Format(PyExc_TypeError, \ "expected index value, got %.200s", Py_TYPE(o)->tp_name), \ (PyObject*)0)) #define __Pyx_PyIndex_Check(o) (PyNumber_Check(o) && !PyFloat_Check(o) && \ !PyComplex_Check(o)) #define PyIndex_Check __Pyx_PyIndex_Check #define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message) #define __PYX_BUILD_PY_SSIZE_T "i" #else #define __PYX_BUILD_PY_SSIZE_T "n" #define CYTHON_FORMAT_SSIZE_T "z" #define __Pyx_PyIndex_Check PyIndex_Check #endif #if PY_VERSION_HEX < 0x02060000 #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt) #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) #define PyVarObject_HEAD_INIT(type, size) \ PyObject_HEAD_INIT(type) size, #define PyType_Modified(t) typedef struct { void *buf; PyObject *obj; Py_ssize_t len; Py_ssize_t itemsize; int readonly; int ndim; char *format; Py_ssize_t *shape; Py_ssize_t *strides; Py_ssize_t *suboffsets; void *internal; } Py_buffer; #define PyBUF_SIMPLE 0 #define PyBUF_WRITABLE 0x0001 #define PyBUF_FORMAT 0x0004 #define PyBUF_ND 0x0008 #define PyBUF_STRIDES (0x0010 | PyBUF_ND) #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES) #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES) #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES) #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES) #define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_FORMAT | PyBUF_WRITABLE) #define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_FORMAT | PyBUF_WRITABLE) typedef int (*getbufferproc)(PyObject *, Py_buffer *, int); typedef void (*releasebufferproc)(PyObject *, Py_buffer *); #endif #if PY_MAJOR_VERSION < 3 #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \ PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) #define __Pyx_DefaultClassType PyClass_Type #else #define __Pyx_BUILTIN_MODULE_NAME "builtins" #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \ PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) #define __Pyx_DefaultClassType PyType_Type #endif #if PY_VERSION_HEX < 0x02060000 #define PyUnicode_FromString(s) PyUnicode_Decode(s, strlen(s), "UTF-8", "strict") #endif #if PY_MAJOR_VERSION >= 3 #define Py_TPFLAGS_CHECKTYPES 0 #define Py_TPFLAGS_HAVE_INDEX 0 #endif #if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3) #define Py_TPFLAGS_HAVE_NEWBUFFER 0 #endif #if PY_VERSION_HEX < 0x02060000 #define Py_TPFLAGS_HAVE_VERSION_TAG 0 #endif #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TPFLAGS_IS_ABSTRACT) #define Py_TPFLAGS_IS_ABSTRACT 0 #endif #if PY_VERSION_HEX < 0x030400a1 && !defined(Py_TPFLAGS_HAVE_FINALIZE) #define Py_TPFLAGS_HAVE_FINALIZE 0 #endif #if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) #define CYTHON_PEP393_ENABLED 1 #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ? \ 0 : _PyUnicode_Ready((PyObject *)(op))) #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) #define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u) #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) #else #define CYTHON_PEP393_ENABLED 0 #define __Pyx_PyUnicode_READY(op) (0) #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) #define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE)) #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) #endif #if CYTHON_COMPILING_IN_PYPY #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) #else #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ? \ PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) #endif #define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) #define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) #if PY_MAJOR_VERSION >= 3 #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) #else #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) #endif #if PY_MAJOR_VERSION >= 3 #define PyBaseString_Type PyUnicode_Type #define PyStringObject PyUnicodeObject #define PyString_Type PyUnicode_Type #define PyString_Check PyUnicode_Check #define PyString_CheckExact PyUnicode_CheckExact #endif #if PY_VERSION_HEX < 0x02060000 #define PyBytesObject PyStringObject #define PyBytes_Type PyString_Type #define PyBytes_Check PyString_Check #define PyBytes_CheckExact PyString_CheckExact #define PyBytes_FromString PyString_FromString #define PyBytes_FromStringAndSize PyString_FromStringAndSize #define PyBytes_FromFormat PyString_FromFormat #define PyBytes_DecodeEscape PyString_DecodeEscape #define PyBytes_AsString PyString_AsString #define PyBytes_AsStringAndSize PyString_AsStringAndSize #define PyBytes_Size PyString_Size #define PyBytes_AS_STRING PyString_AS_STRING #define PyBytes_GET_SIZE PyString_GET_SIZE #define PyBytes_Repr PyString_Repr #define PyBytes_Concat PyString_Concat #define PyBytes_ConcatAndDel PyString_ConcatAndDel #endif #if PY_MAJOR_VERSION >= 3 #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) #else #define __Pyx_PyBaseString_Check(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj) || \ PyString_Check(obj) || PyUnicode_Check(obj)) #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) #endif #if PY_VERSION_HEX < 0x02060000 #define PySet_Check(obj) PyObject_TypeCheck(obj, &PySet_Type) #define PyFrozenSet_Check(obj) PyObject_TypeCheck(obj, &PyFrozenSet_Type) #endif #ifndef PySet_CheckExact #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type) #endif #define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) #if PY_MAJOR_VERSION >= 3 #define PyIntObject PyLongObject #define PyInt_Type PyLong_Type #define PyInt_Check(op) PyLong_Check(op) #define PyInt_CheckExact(op) PyLong_CheckExact(op) #define PyInt_FromString PyLong_FromString #define PyInt_FromUnicode PyLong_FromUnicode #define PyInt_FromLong PyLong_FromLong #define PyInt_FromSize_t PyLong_FromSize_t #define PyInt_FromSsize_t PyLong_FromSsize_t #define PyInt_AsLong PyLong_AsLong #define PyInt_AS_LONG PyLong_AS_LONG #define PyInt_AsSsize_t PyLong_AsSsize_t #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask #define PyNumber_Int PyNumber_Long #endif #if PY_MAJOR_VERSION >= 3 #define PyBoolObject PyLongObject #endif #if PY_VERSION_HEX < 0x030200A4 typedef long Py_hash_t; #define __Pyx_PyInt_FromHash_t PyInt_FromLong #define __Pyx_PyInt_AsHash_t PyInt_AsLong #else #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t #define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t #endif #if (PY_MAJOR_VERSION < 3) || (PY_VERSION_HEX >= 0x03010300) #define __Pyx_PySequence_GetSlice(obj, a, b) PySequence_GetSlice(obj, a, b) #define __Pyx_PySequence_SetSlice(obj, a, b, value) PySequence_SetSlice(obj, a, b, value) #define __Pyx_PySequence_DelSlice(obj, a, b) PySequence_DelSlice(obj, a, b) #else #define __Pyx_PySequence_GetSlice(obj, a, b) (unlikely(!(obj)) ? \ (PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), (PyObject*)0) : \ (likely((obj)->ob_type->tp_as_mapping) ? (PySequence_GetSlice(obj, a, b)) : \ (PyErr_Format(PyExc_TypeError, "'%.200s' object is unsliceable", (obj)->ob_type->tp_name), (PyObject*)0))) #define __Pyx_PySequence_SetSlice(obj, a, b, value) (unlikely(!(obj)) ? \ (PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), -1) : \ (likely((obj)->ob_type->tp_as_mapping) ? (PySequence_SetSlice(obj, a, b, value)) : \ (PyErr_Format(PyExc_TypeError, "'%.200s' object doesn't support slice assignment", (obj)->ob_type->tp_name), -1))) #define __Pyx_PySequence_DelSlice(obj, a, b) (unlikely(!(obj)) ? \ (PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), -1) : \ (likely((obj)->ob_type->tp_as_mapping) ? (PySequence_DelSlice(obj, a, b)) : \ (PyErr_Format(PyExc_TypeError, "'%.200s' object doesn't support slice deletion", (obj)->ob_type->tp_name), -1))) #endif #if PY_MAJOR_VERSION >= 3 #define PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func)) #endif #if PY_VERSION_HEX < 0x02050000 #define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),((char *)(n))) #define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),((char *)(n)),(a)) #define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),((char *)(n))) #else #define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),(n)) #define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),(n),(a)) #define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),(n)) #endif #if PY_VERSION_HEX < 0x02050000 #define __Pyx_NAMESTR(n) ((char *)(n)) #define __Pyx_DOCSTR(n) ((char *)(n)) #else #define __Pyx_NAMESTR(n) (n) #define __Pyx_DOCSTR(n) (n) #endif #ifndef CYTHON_INLINE #if defined(__GNUC__) #define CYTHON_INLINE __inline__ #elif defined(_MSC_VER) #define CYTHON_INLINE __inline #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define CYTHON_INLINE inline #else #define CYTHON_INLINE #endif #endif #ifndef CYTHON_RESTRICT #if defined(__GNUC__) #define CYTHON_RESTRICT __restrict__ #elif defined(_MSC_VER) && _MSC_VER >= 1400 #define CYTHON_RESTRICT __restrict #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define CYTHON_RESTRICT restrict #else #define CYTHON_RESTRICT #endif #endif #ifdef NAN #define __PYX_NAN() ((float) NAN) #else static CYTHON_INLINE float __PYX_NAN() { /* Initialize NaN. The sign is irrelevant, an exponent with all bits 1 and a nonzero mantissa means NaN. If the first bit in the mantissa is 1, it is a quiet NaN. */ float value; memset(&value, 0xFF, sizeof(value)); return value; } #endif #if PY_MAJOR_VERSION >= 3 #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) #else #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) #endif #ifndef __PYX_EXTERN_C #ifdef __cplusplus #define __PYX_EXTERN_C extern "C" #else #define __PYX_EXTERN_C extern #endif #endif #if defined(WIN32) || defined(MS_WINDOWS) #define _USE_MATH_DEFINES #endif #include #define __PYX_HAVE__spacy__en #define __PYX_HAVE_API__spacy__en #include #include "ios" #include "new" #include "stdexcept" #include "typeinfo" #include #include "stdint.h" #include "sparsehash/dense_hash_map" #include "string.h" #include "stdlib.h" #include "../include/MurmurHash3.h" #include "../include/MurmurHash2.h" #ifdef _OPENMP #include #endif /* _OPENMP */ #ifdef PYREX_WITHOUT_ASSERTIONS #define CYTHON_WITHOUT_ASSERTIONS #endif #ifndef CYTHON_UNUSED # if defined(__GNUC__) # if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) # define CYTHON_UNUSED __attribute__ ((__unused__)) # else # define CYTHON_UNUSED # endif # elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) # define CYTHON_UNUSED __attribute__ ((__unused__)) # else # define CYTHON_UNUSED # endif #endif typedef struct {PyObject **p; char *s; const Py_ssize_t n; const char* encoding; const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; /*proto*/ #define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 #define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0 #define __PYX_DEFAULT_STRING_ENCODING "" #define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString #define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize #define __Pyx_fits_Py_ssize_t(v, type, is_signed) ( \ (sizeof(type) < sizeof(Py_ssize_t)) || \ (sizeof(type) > sizeof(Py_ssize_t) && \ likely(v < (type)PY_SSIZE_T_MAX || \ v == (type)PY_SSIZE_T_MAX) && \ (!is_signed || likely(v > (type)PY_SSIZE_T_MIN || \ v == (type)PY_SSIZE_T_MIN))) || \ (sizeof(type) == sizeof(Py_ssize_t) && \ (is_signed || likely(v < (type)PY_SSIZE_T_MAX || \ v == (type)PY_SSIZE_T_MAX))) ) static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*); static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); #define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s)) #define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) #define __Pyx_PyBytes_FromString PyBytes_FromString #define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*); #if PY_MAJOR_VERSION < 3 #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize #else #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize #endif #define __Pyx_PyObject_AsSString(s) ((signed char*) __Pyx_PyObject_AsString(s)) #define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s)) #define __Pyx_PyObject_FromUString(s) __Pyx_PyObject_FromString((char*)s) #define __Pyx_PyBytes_FromUString(s) __Pyx_PyBytes_FromString((char*)s) #define __Pyx_PyByteArray_FromUString(s) __Pyx_PyByteArray_FromString((char*)s) #define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((char*)s) #define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s) #if PY_MAJOR_VERSION < 3 static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) { const Py_UNICODE *u_end = u; while (*u_end++) ; return u_end - u - 1; } #else #define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen #endif #define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u)) #define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode #define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode #define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None) #define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False)) static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x); static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); #if CYTHON_COMPILING_IN_CPYTHON #define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) #else #define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) #endif #define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) #if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII static int __Pyx_sys_getdefaultencoding_not_ascii; static int __Pyx_init_sys_getdefaultencoding_params(void) { PyObject* sys = NULL; PyObject* default_encoding = NULL; PyObject* ascii_chars_u = NULL; PyObject* ascii_chars_b = NULL; sys = PyImport_ImportModule("sys"); if (sys == NULL) goto bad; default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); if (default_encoding == NULL) goto bad; if (strcmp(PyBytes_AsString(default_encoding), "ascii") == 0) { __Pyx_sys_getdefaultencoding_not_ascii = 0; } else { const char* default_encoding_c = PyBytes_AS_STRING(default_encoding); char ascii_chars[128]; int c; for (c = 0; c < 128; c++) { ascii_chars[c] = c; } __Pyx_sys_getdefaultencoding_not_ascii = 1; ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); if (ascii_chars_u == NULL) goto bad; ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); if (ascii_chars_b == NULL || strncmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { PyErr_Format( PyExc_ValueError, "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", default_encoding_c); goto bad; } } Py_XDECREF(sys); Py_XDECREF(default_encoding); Py_XDECREF(ascii_chars_u); Py_XDECREF(ascii_chars_b); return 0; bad: Py_XDECREF(sys); Py_XDECREF(default_encoding); Py_XDECREF(ascii_chars_u); Py_XDECREF(ascii_chars_b); return -1; } #endif #if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 #define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) #else #define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) #if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT static char* __PYX_DEFAULT_STRING_ENCODING; static int __Pyx_init_sys_getdefaultencoding_params(void) { PyObject* sys = NULL; PyObject* default_encoding = NULL; char* default_encoding_c; sys = PyImport_ImportModule("sys"); if (sys == NULL) goto bad; default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); if (default_encoding == NULL) goto bad; default_encoding_c = PyBytes_AS_STRING(default_encoding); __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c)); strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); Py_DECREF(sys); Py_DECREF(default_encoding); return 0; bad: Py_XDECREF(sys); Py_XDECREF(default_encoding); return -1; } #endif #endif #ifdef __GNUC__ /* Test for GCC > 2.95 */ #if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)) #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) #else /* __GNUC__ > 2 ... */ #define likely(x) (x) #define unlikely(x) (x) #endif /* __GNUC__ > 2 ... */ #else /* __GNUC__ */ #define likely(x) (x) #define unlikely(x) (x) #endif /* __GNUC__ */ static PyObject *__pyx_m; static PyObject *__pyx_d; static PyObject *__pyx_b; static PyObject *__pyx_empty_tuple; static PyObject *__pyx_empty_bytes; static int __pyx_lineno; static int __pyx_clineno = 0; static const char * __pyx_cfilenm= __FILE__; static const char *__pyx_filename; static const char *__pyx_f[] = { "en.pyx", }; /* "spacy/lexeme.pxd":4 * * * ctypedef int ClusterID # <<<<<<<<<<<<<< * ctypedef uint64_t StringHash * */ typedef int __pyx_t_5spacy_6lexeme_ClusterID; /* "spacy/lexeme.pxd":5 * * ctypedef int ClusterID * ctypedef uint64_t StringHash # <<<<<<<<<<<<<< * * */ typedef uint64_t __pyx_t_5spacy_6lexeme_StringHash; /* "spacy/en.pxd":7 * * ctypedef Py_UNICODE* string_ptr * ctypedef size_t Lexeme_addr # For python interop # <<<<<<<<<<<<<< * ctypedef Lexeme* Lexeme_ptr * */ typedef size_t __pyx_t_5spacy_2en_Lexeme_addr; /*--- Type declarations ---*/ struct __pyx_t_5spacy_6lexeme_Lexeme; /* "spacy/lexeme.pxd":27 * # over the Lexeme, via: * # for field in range(LexAttr.n): get_attr(Lexeme*, field) * cdef enum HashFields: # <<<<<<<<<<<<<< * sic * lex */ enum __pyx_t_5spacy_6lexeme_HashFields { __pyx_e_5spacy_6lexeme_sic, __pyx_e_5spacy_6lexeme_lex, __pyx_e_5spacy_6lexeme_normed, __pyx_e_5spacy_6lexeme_cluster, __pyx_e_5spacy_6lexeme_n }; /* "spacy/lexeme.pxd":8 * * * cdef struct Lexeme: # <<<<<<<<<<<<<< * StringHash sic # Hash of the original string * StringHash lex # Hash of the word, with punctuation and clitics split off */ struct __pyx_t_5spacy_6lexeme_Lexeme { __pyx_t_5spacy_6lexeme_StringHash sic; __pyx_t_5spacy_6lexeme_StringHash lex; __pyx_t_5spacy_6lexeme_StringHash normed; __pyx_t_5spacy_6lexeme_StringHash last3; Py_UNICODE first; double prob; __pyx_t_5spacy_6lexeme_ClusterID cluster; int oft_upper; int oft_title; struct __pyx_t_5spacy_6lexeme_Lexeme *tail; }; /* "spacy/en.pxd":6 * * * ctypedef Py_UNICODE* string_ptr # <<<<<<<<<<<<<< * ctypedef size_t Lexeme_addr # For python interop * ctypedef Lexeme* Lexeme_ptr */ typedef Py_UNICODE *__pyx_t_5spacy_2en_string_ptr; /* "spacy/en.pxd":8 * ctypedef Py_UNICODE* string_ptr * ctypedef size_t Lexeme_addr # For python interop * ctypedef Lexeme* Lexeme_ptr # <<<<<<<<<<<<<< * * */ typedef struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_t_5spacy_2en_Lexeme_ptr; #ifndef CYTHON_REFNANNY #define CYTHON_REFNANNY 0 #endif #if CYTHON_REFNANNY typedef struct { void (*INCREF)(void*, PyObject*, int); void (*DECREF)(void*, PyObject*, int); void (*GOTREF)(void*, PyObject*, int); void (*GIVEREF)(void*, PyObject*, int); void* (*SetupContext)(const char*, int, const char*); void (*FinishContext)(void**); } __Pyx_RefNannyAPIStruct; static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); /*proto*/ #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; #ifdef WITH_THREAD #define __Pyx_RefNannySetupContext(name, acquire_gil) \ if (acquire_gil) { \ PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure(); \ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \ PyGILState_Release(__pyx_gilstate_save); \ } else { \ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \ } #else #define __Pyx_RefNannySetupContext(name, acquire_gil) \ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__) #endif #define __Pyx_RefNannyFinishContext() \ __Pyx_RefNanny->FinishContext(&__pyx_refnanny) #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__) #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__) #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__) #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__) #define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0) #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0) #define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0) #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0) #else #define __Pyx_RefNannyDeclarations #define __Pyx_RefNannySetupContext(name, acquire_gil) #define __Pyx_RefNannyFinishContext() #define __Pyx_INCREF(r) Py_INCREF(r) #define __Pyx_DECREF(r) Py_DECREF(r) #define __Pyx_GOTREF(r) #define __Pyx_GIVEREF(r) #define __Pyx_XINCREF(r) Py_XINCREF(r) #define __Pyx_XDECREF(r) Py_XDECREF(r) #define __Pyx_XGOTREF(r) #define __Pyx_XGIVEREF(r) #endif /* CYTHON_REFNANNY */ #define __Pyx_XDECREF_SET(r, v) do { \ PyObject *tmp = (PyObject *) r; \ r = v; __Pyx_XDECREF(tmp); \ } while (0) #define __Pyx_DECREF_SET(r, v) do { \ PyObject *tmp = (PyObject *) r; \ r = v; __Pyx_DECREF(tmp); \ } while (0) #define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) #define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { PyTypeObject* tp = Py_TYPE(obj); if (likely(tp->tp_getattro)) return tp->tp_getattro(obj, attr_name); #if PY_MAJOR_VERSION < 3 if (likely(tp->tp_getattr)) return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); #endif return PyObject_GetAttr(obj, attr_name); } #else #define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) #endif static PyObject *__Pyx_GetBuiltinName(PyObject *name); /*proto*/ #include static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/ static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/ static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, const char *name, int exact); /*proto*/ static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); /*proto*/ static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); /*proto*/ static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[], \ PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, \ const char* function_name); /*proto*/ static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name); /*proto*/ #define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \ (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \ __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) : \ (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) : \ __Pyx_GetItemInt_Generic(o, to_py_func(i)))) #define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \ (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \ __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) : \ (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL)) static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, int wraparound, int boundscheck); #define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \ (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \ __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) : \ (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL)) static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, int wraparound, int boundscheck); static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j); static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, int wraparound, int boundscheck); #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); /*proto*/ #else #define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) #endif static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb); /*proto*/ static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb); /*proto*/ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); /*proto*/ #define __Pyx_GetItemInt_Unicode(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \ (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \ __Pyx_GetItemInt_Unicode_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) : \ (PyErr_SetString(PyExc_IndexError, "string index out of range"), (Py_UCS4)-1)) static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring, Py_ssize_t i, int wraparound, int boundscheck); static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring( PyObject* text, Py_ssize_t start, Py_ssize_t stop); #define __Pyx_SetItemInt(o, i, v, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \ (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \ __Pyx_SetItemInt_Fast(o, (Py_ssize_t)i, v, is_list, wraparound, boundscheck) : \ (is_list ? (PyErr_SetString(PyExc_IndexError, "list assignment index out of range"), -1) : \ __Pyx_SetItemInt_Generic(o, to_py_func(i), v))) static CYTHON_INLINE int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v); static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, int is_list, int wraparound, int boundscheck); static void __Pyx_WriteUnraisable(const char *name, int clineno, int lineno, const char *filename, int full_traceback); /*proto*/ static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); static CYTHON_INLINE uint64_t __Pyx_PyInt_As_uint64_t(PyObject *); static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *); static CYTHON_INLINE PyObject* __Pyx_PyInt_From_uint64_t(uint64_t value); static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); static int __Pyx_check_binary_version(void); #if CYTHON_COMPILING_IN_CPYTHON #define __Pyx_PyObject_DelAttrStr(o,n) __Pyx_PyObject_SetAttrStr(o,n,NULL) static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value) { PyTypeObject* tp = Py_TYPE(obj); if (likely(tp->tp_setattro)) return tp->tp_setattro(obj, attr_name, value); #if PY_MAJOR_VERSION < 3 if (likely(tp->tp_setattr)) return tp->tp_setattr(obj, PyString_AS_STRING(attr_name), value); #endif return PyObject_SetAttr(obj, attr_name, value); } #else #define __Pyx_PyObject_DelAttrStr(o,n) PyObject_DelAttr(o,n) #define __Pyx_PyObject_SetAttrStr(o,n,v) PyObject_SetAttr(o,n,v) #endif static int __Pyx_ExportVoidPtr(PyObject *name, void *p, const char *sig); /*proto*/ static int __Pyx_ExportFunction(const char *name, void (*f)(void), const char *sig); /*proto*/ typedef struct { int code_line; PyCodeObject* code_object; } __Pyx_CodeObjectCacheEntry; struct __Pyx_CodeObjectCache { int count; int max_count; __Pyx_CodeObjectCacheEntry* entries; }; static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); static PyCodeObject *__pyx_find_code_object(int code_line); static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); static void __Pyx_AddTraceback(const char *funcname, int c_line, int py_line, const char *filename); /*proto*/ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/ /* Module declarations from 'libcpp.utility' */ /* Module declarations from 'libcpp.vector' */ /* Module declarations from 'libc.stdint' */ /* Module declarations from 'ext.sparsehash' */ /* Module declarations from 'spacy.lexeme' */ /* Module declarations from 'libc.string' */ /* Module declarations from 'libc.stdlib' */ /* Module declarations from 'ext.murmurhash' */ /* Module declarations from 'spacy.en' */ static google::dense_hash_map<__pyx_t_5spacy_6lexeme_StringHash,__pyx_t_5spacy_2en_Lexeme_ptr> __pyx_v_5spacy_2en_LEXEMES; static struct __pyx_t_5spacy_6lexeme_Lexeme __pyx_v_5spacy_2en_BLANK_WORD; static __pyx_t_5spacy_2en_Lexeme_addr __pyx_f_5spacy_2en_lookup(PyObject *, int __pyx_skip_dispatch); /*proto*/ static __pyx_t_5spacy_2en_Lexeme_addr __pyx_f_5spacy_2en_lookup_chunk(PyObject *, int, int, int __pyx_skip_dispatch); /*proto*/ static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_2en_hash_string(PyObject *, size_t); /*proto*/ static PyObject *__pyx_f_5spacy_2en_unhash(__pyx_t_5spacy_6lexeme_StringHash, int __pyx_skip_dispatch); /*proto*/ static PyObject *__pyx_f_5spacy_2en_normalize_word_string(PyObject *); /*proto*/ static PyObject *__pyx_f_5spacy_2en__substr(PyObject *, int, int, size_t, int __pyx_skip_dispatch); /*proto*/ static struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_f_5spacy_2en__add(__pyx_t_5spacy_6lexeme_StringHash, PyObject *, int, size_t); /*proto*/ static struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_f_5spacy_2en__init_lexeme(PyObject *, __pyx_t_5spacy_6lexeme_StringHash, int, size_t); /*proto*/ static size_t __pyx_f_5spacy_2en__find_split(PyObject *, size_t); /*proto*/ #define __Pyx_MODULE_NAME "spacy.en" int __pyx_module_is_main_spacy__en = 0; /* Implementation of 'spacy.en' */ static PyObject *__pyx_builtin_ValueError; static PyObject *__pyx_pf_5spacy_2en_lookup(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string); /* proto */ static PyObject *__pyx_pf_5spacy_2en_2lookup_chunk(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string, int __pyx_v_start, int __pyx_v_end); /* proto */ static PyObject *__pyx_pf_5spacy_2en_4unhash(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_StringHash __pyx_v_hash_value); /* proto */ static PyObject *__pyx_pf_5spacy_2en_6_substr(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string, int __pyx_v_start, int __pyx_v_end, size_t __pyx_v_length); /* proto */ static char __pyx_k_[] = ""; static char __pyx_k_end[] = "end"; static char __pyx_k_lex[] = "lex"; static char __pyx_k_sic[] = "sic"; static char __pyx_k_YEAR[] = "!YEAR"; static char __pyx_k_main[] = "__main__"; static char __pyx_k_prob[] = "prob"; static char __pyx_k_tail[] = "tail"; static char __pyx_k_test[] = "__test__"; static char __pyx_k_first[] = "first"; static char __pyx_k_last3[] = "last3"; static char __pyx_k_lower[] = "lower"; static char __pyx_k_start[] = "start"; static char __pyx_k_DIGITS[] = "!DIGITS"; static char __pyx_k_length[] = "length"; static char __pyx_k_normed[] = "normed"; static char __pyx_k_string[] = "string"; static char __pyx_k_LEXEMES[] = "LEXEMES"; static char __pyx_k_STRINGS[] = "STRINGS"; static char __pyx_k_cluster[] = "cluster"; static char __pyx_k_isdigit[] = "isdigit"; static char __pyx_k_pyx_capi[] = "__pyx_capi__"; static char __pyx_k_oft_title[] = "oft_title"; static char __pyx_k_oft_upper[] = "oft_upper"; static char __pyx_k_ValueError[] = "ValueError"; static char __pyx_k_Serve_pointers_to_Lexeme_structs[] = "Serve pointers to Lexeme structs, given strings. Maintain a reverse index,\nso that strings can be retrieved from hashes. Use 64-bit hash values and\nboldly assume no collisions.\n"; static PyObject *__pyx_kp_u_; static PyObject *__pyx_kp_u_DIGITS; static PyObject *__pyx_n_s_LEXEMES; static PyObject *__pyx_n_s_STRINGS; static PyObject *__pyx_n_s_ValueError; static PyObject *__pyx_kp_u_YEAR; static PyObject *__pyx_n_s_cluster; static PyObject *__pyx_n_s_end; static PyObject *__pyx_n_s_first; static PyObject *__pyx_n_s_isdigit; static PyObject *__pyx_n_s_last3; static PyObject *__pyx_n_s_length; static PyObject *__pyx_n_s_lex; static PyObject *__pyx_n_s_lower; static PyObject *__pyx_n_s_main; static PyObject *__pyx_n_s_normed; static PyObject *__pyx_n_s_oft_title; static PyObject *__pyx_n_s_oft_upper; static PyObject *__pyx_n_s_prob; static PyObject *__pyx_n_s_pyx_capi; static PyObject *__pyx_n_s_sic; static PyObject *__pyx_n_s_start; static PyObject *__pyx_n_s_string; static PyObject *__pyx_n_s_tail; static PyObject *__pyx_n_s_test; /* "spacy/en.pyx":23 * * * cpdef Lexeme_addr lookup(unicode string) except 0: # <<<<<<<<<<<<<< * '''.. function:: enumerate(sequence[, start=0]) * Fetch a Lexeme representing a word string. If the word has not been seen, */ static PyObject *__pyx_pw_5spacy_2en_1lookup(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ static __pyx_t_5spacy_2en_Lexeme_addr __pyx_f_5spacy_2en_lookup(PyObject *__pyx_v_string, CYTHON_UNUSED int __pyx_skip_dispatch) { size_t __pyx_v_length; __pyx_t_5spacy_6lexeme_StringHash __pyx_v_hashed; struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_word_ptr; __pyx_t_5spacy_2en_Lexeme_addr __pyx_r; __Pyx_RefNannyDeclarations int __pyx_t_1; int __pyx_t_2; Py_ssize_t __pyx_t_3; __pyx_t_5spacy_6lexeme_StringHash __pyx_t_4; struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_t_5; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("lookup", 0); /* "spacy/en.pyx":31 * To specify the boundaries of the word if it has not been seen, use lookup_chunk. * ''' * if string == '': # <<<<<<<<<<<<<< * return &BLANK_WORD * cdef size_t length = len(string) */ __pyx_t_1 = (__Pyx_PyUnicode_Equals(__pyx_v_string, __pyx_kp_u_, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { /* "spacy/en.pyx":32 * ''' * if string == '': * return &BLANK_WORD # <<<<<<<<<<<<<< * cdef size_t length = len(string) * cdef StringHash hashed = hash_string(string, length) */ __pyx_r = ((__pyx_t_5spacy_2en_Lexeme_addr)(&__pyx_v_5spacy_2en_BLANK_WORD)); goto __pyx_L0; } /* "spacy/en.pyx":33 * if string == '': * return &BLANK_WORD * cdef size_t length = len(string) # <<<<<<<<<<<<<< * cdef StringHash hashed = hash_string(string, length) * cdef Lexeme* word_ptr = LEXEMES[hashed] */ if (unlikely(__pyx_v_string == Py_None)) { PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_3 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_string); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_length = __pyx_t_3; /* "spacy/en.pyx":34 * return &BLANK_WORD * cdef size_t length = len(string) * cdef StringHash hashed = hash_string(string, length) # <<<<<<<<<<<<<< * cdef Lexeme* word_ptr = LEXEMES[hashed] * cdef size_t n */ __pyx_t_4 = __pyx_f_5spacy_2en_hash_string(__pyx_v_string, __pyx_v_length); if (unlikely(__pyx_t_4 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_hashed = __pyx_t_4; /* "spacy/en.pyx":35 * cdef size_t length = len(string) * cdef StringHash hashed = hash_string(string, length) * cdef Lexeme* word_ptr = LEXEMES[hashed] # <<<<<<<<<<<<<< * cdef size_t n * if word_ptr == NULL: */ __pyx_v_word_ptr = (__pyx_v_5spacy_2en_LEXEMES[__pyx_v_hashed]); /* "spacy/en.pyx":37 * cdef Lexeme* word_ptr = LEXEMES[hashed] * cdef size_t n * if word_ptr == NULL: # <<<<<<<<<<<<<< * word_ptr = _add(hashed, string, _find_split(string, length), length) * return word_ptr */ __pyx_t_2 = ((__pyx_v_word_ptr == NULL) != 0); if (__pyx_t_2) { /* "spacy/en.pyx":38 * cdef size_t n * if word_ptr == NULL: * word_ptr = _add(hashed, string, _find_split(string, length), length) # <<<<<<<<<<<<<< * return word_ptr * */ __pyx_t_5 = __pyx_f_5spacy_2en__add(__pyx_v_hashed, __pyx_v_string, __pyx_f_5spacy_2en__find_split(__pyx_v_string, __pyx_v_length), __pyx_v_length); if (unlikely(__pyx_t_5 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_word_ptr = __pyx_t_5; goto __pyx_L4; } __pyx_L4:; /* "spacy/en.pyx":39 * if word_ptr == NULL: * word_ptr = _add(hashed, string, _find_split(string, length), length) * return word_ptr # <<<<<<<<<<<<<< * * */ __pyx_r = ((__pyx_t_5spacy_2en_Lexeme_addr)__pyx_v_word_ptr); goto __pyx_L0; /* "spacy/en.pyx":23 * * * cpdef Lexeme_addr lookup(unicode string) except 0: # <<<<<<<<<<<<<< * '''.. function:: enumerate(sequence[, start=0]) * Fetch a Lexeme representing a word string. If the word has not been seen, */ /* function exit code */ __pyx_L1_error:; __Pyx_AddTraceback("spacy.en.lookup", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = 0; __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } /* Python wrapper */ static PyObject *__pyx_pw_5spacy_2en_1lookup(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/ static char __pyx_doc_5spacy_2en_lookup[] = ".. function:: enumerate(sequence[, start=0])\n Fetch a Lexeme representing a word string. If the word has not been seen,\n construct one, splitting off any attached punctuation or clitics. A\n reference to BLANK_WORD is returned for the empty string.\n \n To specify the boundaries of the word if it has not been seen, use lookup_chunk.\n "; static PyObject *__pyx_pw_5spacy_2en_1lookup(PyObject *__pyx_self, PyObject *__pyx_v_string) { CYTHON_UNUSED int __pyx_lineno = 0; CYTHON_UNUSED const char *__pyx_filename = NULL; CYTHON_UNUSED int __pyx_clineno = 0; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("lookup (wrapper)", 0); if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyUnicode_Type), 1, "string", 1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = __pyx_pf_5spacy_2en_lookup(__pyx_self, ((PyObject*)__pyx_v_string)); /* function exit code */ goto __pyx_L0; __pyx_L1_error:; __pyx_r = NULL; __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_5spacy_2en_lookup(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations __pyx_t_5spacy_2en_Lexeme_addr __pyx_t_1; PyObject *__pyx_t_2 = NULL; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("lookup", 0); __Pyx_XDECREF(__pyx_r); __pyx_t_1 = __pyx_f_5spacy_2en_lookup(__pyx_v_string, 0); if (unlikely(__pyx_t_1 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_2 = __Pyx_PyInt_FromSize_t(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_2); __Pyx_AddTraceback("spacy.en.lookup", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "spacy/en.pyx":42 * * * cpdef Lexeme_addr lookup_chunk(unicode string, int start, int end) except 0: # <<<<<<<<<<<<<< * '''Fetch a Lexeme representing a word string. If the word has not been seen, * construct one, given the specified start and end indices. A negative index */ static PyObject *__pyx_pw_5spacy_2en_3lookup_chunk(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static __pyx_t_5spacy_2en_Lexeme_addr __pyx_f_5spacy_2en_lookup_chunk(PyObject *__pyx_v_string, int __pyx_v_start, CYTHON_UNUSED int __pyx_v_end, CYTHON_UNUSED int __pyx_skip_dispatch) { size_t __pyx_v_length; __pyx_t_5spacy_6lexeme_StringHash __pyx_v_hashed; struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_chunk_ptr; __pyx_t_5spacy_2en_Lexeme_addr __pyx_r; __Pyx_RefNannyDeclarations int __pyx_t_1; int __pyx_t_2; Py_ssize_t __pyx_t_3; __pyx_t_5spacy_6lexeme_StringHash __pyx_t_4; struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_t_5; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("lookup_chunk", 0); /* "spacy/en.pyx":50 * A reference to BLANK_WORD is returned for the empty string. * ''' * if string == '': # <<<<<<<<<<<<<< * return &BLANK_WORD * cdef size_t length = len(string) */ __pyx_t_1 = (__Pyx_PyUnicode_Equals(__pyx_v_string, __pyx_kp_u_, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { /* "spacy/en.pyx":51 * ''' * if string == '': * return &BLANK_WORD # <<<<<<<<<<<<<< * cdef size_t length = len(string) * cdef StringHash hashed = hash_string(string, length) */ __pyx_r = ((__pyx_t_5spacy_2en_Lexeme_addr)(&__pyx_v_5spacy_2en_BLANK_WORD)); goto __pyx_L0; } /* "spacy/en.pyx":52 * if string == '': * return &BLANK_WORD * cdef size_t length = len(string) # <<<<<<<<<<<<<< * cdef StringHash hashed = hash_string(string, length) * cdef Lexeme* chunk_ptr = LEXEMES[hashed] */ if (unlikely(__pyx_v_string == Py_None)) { PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_3 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_string); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_length = __pyx_t_3; /* "spacy/en.pyx":53 * return &BLANK_WORD * cdef size_t length = len(string) * cdef StringHash hashed = hash_string(string, length) # <<<<<<<<<<<<<< * cdef Lexeme* chunk_ptr = LEXEMES[hashed] * if chunk_ptr == NULL: */ __pyx_t_4 = __pyx_f_5spacy_2en_hash_string(__pyx_v_string, __pyx_v_length); if (unlikely(__pyx_t_4 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_hashed = __pyx_t_4; /* "spacy/en.pyx":54 * cdef size_t length = len(string) * cdef StringHash hashed = hash_string(string, length) * cdef Lexeme* chunk_ptr = LEXEMES[hashed] # <<<<<<<<<<<<<< * if chunk_ptr == NULL: * chunk_ptr = _add(hashed, string, start, length) */ __pyx_v_chunk_ptr = (__pyx_v_5spacy_2en_LEXEMES[__pyx_v_hashed]); /* "spacy/en.pyx":55 * cdef StringHash hashed = hash_string(string, length) * cdef Lexeme* chunk_ptr = LEXEMES[hashed] * if chunk_ptr == NULL: # <<<<<<<<<<<<<< * chunk_ptr = _add(hashed, string, start, length) * return chunk_ptr */ __pyx_t_2 = ((__pyx_v_chunk_ptr == NULL) != 0); if (__pyx_t_2) { /* "spacy/en.pyx":56 * cdef Lexeme* chunk_ptr = LEXEMES[hashed] * if chunk_ptr == NULL: * chunk_ptr = _add(hashed, string, start, length) # <<<<<<<<<<<<<< * return chunk_ptr * */ __pyx_t_5 = __pyx_f_5spacy_2en__add(__pyx_v_hashed, __pyx_v_string, __pyx_v_start, __pyx_v_length); if (unlikely(__pyx_t_5 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_chunk_ptr = __pyx_t_5; goto __pyx_L4; } __pyx_L4:; /* "spacy/en.pyx":57 * if chunk_ptr == NULL: * chunk_ptr = _add(hashed, string, start, length) * return chunk_ptr # <<<<<<<<<<<<<< * * */ __pyx_r = ((__pyx_t_5spacy_2en_Lexeme_addr)__pyx_v_chunk_ptr); goto __pyx_L0; /* "spacy/en.pyx":42 * * * cpdef Lexeme_addr lookup_chunk(unicode string, int start, int end) except 0: # <<<<<<<<<<<<<< * '''Fetch a Lexeme representing a word string. If the word has not been seen, * construct one, given the specified start and end indices. A negative index */ /* function exit code */ __pyx_L1_error:; __Pyx_AddTraceback("spacy.en.lookup_chunk", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = 0; __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } /* Python wrapper */ static PyObject *__pyx_pw_5spacy_2en_3lookup_chunk(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static char __pyx_doc_5spacy_2en_2lookup_chunk[] = "Fetch a Lexeme representing a word string. If the word has not been seen,\n construct one, given the specified start and end indices. A negative index\n significes 0 for start, and the string length for end --- i.e. the string\n will not be sliced if start == -1 and end == -1.\n \n A reference to BLANK_WORD is returned for the empty string.\n "; static PyObject *__pyx_pw_5spacy_2en_3lookup_chunk(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_string = 0; int __pyx_v_start; int __pyx_v_end; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("lookup_chunk (wrapper)", 0); { static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_string,&__pyx_n_s_start,&__pyx_n_s_end,0}; PyObject* values[3] = {0,0,0}; if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); case 0: break; default: goto __pyx_L5_argtuple_error; } kw_args = PyDict_Size(__pyx_kwds); switch (pos_args) { case 0: if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_string)) != 0)) kw_args--; else goto __pyx_L5_argtuple_error; case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_start)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("lookup_chunk", 1, 3, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_end)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("lookup_chunk", 1, 3, 3, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "lookup_chunk") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 3) { goto __pyx_L5_argtuple_error; } else { values[0] = PyTuple_GET_ITEM(__pyx_args, 0); values[1] = PyTuple_GET_ITEM(__pyx_args, 1); values[2] = PyTuple_GET_ITEM(__pyx_args, 2); } __pyx_v_string = ((PyObject*)values[0]); __pyx_v_start = __Pyx_PyInt_As_int(values[1]); if (unlikely((__pyx_v_start == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_end = __Pyx_PyInt_As_int(values[2]); if (unlikely((__pyx_v_end == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("lookup_chunk", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("spacy.en.lookup_chunk", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyUnicode_Type), 1, "string", 1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = __pyx_pf_5spacy_2en_2lookup_chunk(__pyx_self, __pyx_v_string, __pyx_v_start, __pyx_v_end); /* function exit code */ goto __pyx_L0; __pyx_L1_error:; __pyx_r = NULL; __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_5spacy_2en_2lookup_chunk(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string, int __pyx_v_start, int __pyx_v_end) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations __pyx_t_5spacy_2en_Lexeme_addr __pyx_t_1; PyObject *__pyx_t_2 = NULL; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("lookup_chunk", 0); __Pyx_XDECREF(__pyx_r); __pyx_t_1 = __pyx_f_5spacy_2en_lookup_chunk(__pyx_v_string, __pyx_v_start, __pyx_v_end, 0); if (unlikely(__pyx_t_1 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_2 = __Pyx_PyInt_FromSize_t(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_2); __Pyx_AddTraceback("spacy.en.lookup_chunk", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "spacy/en.pyx":60 * * * cdef StringHash hash_string(unicode s, size_t length) except 0: # <<<<<<<<<<<<<< * '''Hash unicode with MurmurHash64A''' * assert length */ static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_2en_hash_string(PyObject *__pyx_v_s, size_t __pyx_v_length) { __pyx_t_5spacy_6lexeme_StringHash __pyx_r; __Pyx_RefNannyDeclarations __pyx_t_5spacy_2en_string_ptr __pyx_t_1; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("hash_string", 0); /* "spacy/en.pyx":62 * cdef StringHash hash_string(unicode s, size_t length) except 0: * '''Hash unicode with MurmurHash64A''' * assert length # <<<<<<<<<<<<<< * return MurmurHash64A(s, length * sizeof(Py_UNICODE), 0) * */ #ifndef CYTHON_WITHOUT_ASSERTIONS if (unlikely(!Py_OptimizeFlag)) { if (unlikely(!(__pyx_v_length != 0))) { PyErr_SetNone(PyExc_AssertionError); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } } #endif /* "spacy/en.pyx":63 * '''Hash unicode with MurmurHash64A''' * assert length * return MurmurHash64A(s, length * sizeof(Py_UNICODE), 0) # <<<<<<<<<<<<<< * * */ __pyx_t_1 = __Pyx_PyUnicode_AsUnicode(__pyx_v_s); if (unlikely((!__pyx_t_1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = MurmurHash64A(((__pyx_t_5spacy_2en_string_ptr)__pyx_t_1), (__pyx_v_length * (sizeof(Py_UNICODE))), 0); goto __pyx_L0; /* "spacy/en.pyx":60 * * * cdef StringHash hash_string(unicode s, size_t length) except 0: # <<<<<<<<<<<<<< * '''Hash unicode with MurmurHash64A''' * assert length */ /* function exit code */ __pyx_L1_error:; __Pyx_AddTraceback("spacy.en.hash_string", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = 0; __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "spacy/en.pyx":66 * * * cpdef unicode unhash(StringHash hash_value): # <<<<<<<<<<<<<< * '''Fetch a string from the reverse index, given its hash value.''' * cdef string_ptr string = STRINGS[hash_value] */ static PyObject *__pyx_pw_5spacy_2en_5unhash(PyObject *__pyx_self, PyObject *__pyx_arg_hash_value); /*proto*/ static PyObject *__pyx_f_5spacy_2en_unhash(__pyx_t_5spacy_6lexeme_StringHash __pyx_v_hash_value, CYTHON_UNUSED int __pyx_skip_dispatch) { __pyx_t_5spacy_2en_string_ptr __pyx_v_string; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; PyObject *__pyx_t_2 = NULL; __pyx_t_5spacy_2en_string_ptr __pyx_t_3; int __pyx_t_4; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("unhash", 0); /* "spacy/en.pyx":68 * cpdef unicode unhash(StringHash hash_value): * '''Fetch a string from the reverse index, given its hash value.''' * cdef string_ptr string = STRINGS[hash_value] # <<<<<<<<<<<<<< * if string == NULL: * raise ValueError(hash_value) */ __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_STRINGS); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = __Pyx_GetItemInt(__pyx_t_1, __pyx_v_hash_value, __pyx_t_5spacy_6lexeme_StringHash, 0, __Pyx_PyInt_From_uint64_t, 0, 0, 1); if (unlikely(__pyx_t_2 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_3 = __Pyx_PyUnicode_AsUnicode(__pyx_t_2); if (unlikely((!__pyx_t_3) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_string = __pyx_t_3; /* "spacy/en.pyx":69 * '''Fetch a string from the reverse index, given its hash value.''' * cdef string_ptr string = STRINGS[hash_value] * if string == NULL: # <<<<<<<<<<<<<< * raise ValueError(hash_value) * */ __pyx_t_4 = ((__pyx_v_string == NULL) != 0); if (__pyx_t_4) { /* "spacy/en.pyx":70 * cdef string_ptr string = STRINGS[hash_value] * if string == NULL: * raise ValueError(hash_value) # <<<<<<<<<<<<<< * * return string */ __pyx_t_2 = __Pyx_PyInt_From_uint64_t(__pyx_v_hash_value); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_2 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_1, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_Raise(__pyx_t_2, 0, 0, 0); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; {__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } /* "spacy/en.pyx":72 * raise ValueError(hash_value) * * return string # <<<<<<<<<<<<<< * * */ __Pyx_XDECREF(__pyx_r); __pyx_t_2 = __Pyx_PyUnicode_FromUnicode(__pyx_v_string); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "unicode", Py_TYPE(__pyx_t_2)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 72; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = ((PyObject*)__pyx_t_2); __pyx_t_2 = 0; goto __pyx_L0; /* "spacy/en.pyx":66 * * * cpdef unicode unhash(StringHash hash_value): # <<<<<<<<<<<<<< * '''Fetch a string from the reverse index, given its hash value.''' * cdef string_ptr string = STRINGS[hash_value] */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_2); __Pyx_AddTraceback("spacy.en.unhash", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = 0; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* Python wrapper */ static PyObject *__pyx_pw_5spacy_2en_5unhash(PyObject *__pyx_self, PyObject *__pyx_arg_hash_value); /*proto*/ static char __pyx_doc_5spacy_2en_4unhash[] = "Fetch a string from the reverse index, given its hash value."; static PyObject *__pyx_pw_5spacy_2en_5unhash(PyObject *__pyx_self, PyObject *__pyx_arg_hash_value) { __pyx_t_5spacy_6lexeme_StringHash __pyx_v_hash_value; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("unhash (wrapper)", 0); assert(__pyx_arg_hash_value); { __pyx_v_hash_value = __Pyx_PyInt_As_uint64_t(__pyx_arg_hash_value); if (unlikely((__pyx_v_hash_value == (uint64_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; __Pyx_AddTraceback("spacy.en.unhash", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_5spacy_2en_4unhash(__pyx_self, ((__pyx_t_5spacy_6lexeme_StringHash)__pyx_v_hash_value)); /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_5spacy_2en_4unhash(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_StringHash __pyx_v_hash_value) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("unhash", 0); __Pyx_XDECREF(__pyx_r); __pyx_t_1 = __pyx_f_5spacy_2en_unhash(__pyx_v_hash_value, 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_AddTraceback("spacy.en.unhash", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "spacy/en.pyx":75 * * * cdef unicode normalize_word_string(unicode word): # <<<<<<<<<<<<<< * '''Return a normalized version of the word, mapping: * - 4 digit strings into !YEAR */ static PyObject *__pyx_f_5spacy_2en_normalize_word_string(PyObject *__pyx_v_word) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; PyObject *__pyx_t_2 = NULL; int __pyx_t_3; Py_ssize_t __pyx_t_4; int __pyx_t_5; int __pyx_t_6; Py_UCS4 __pyx_t_7; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("normalize_word_string", 0); /* "spacy/en.pyx":82 * ''' * cdef unicode s * if word.isdigit() and len(word) == 4: # <<<<<<<<<<<<<< * return '!YEAR' * elif word[0].isdigit(): */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_isdigit); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 82; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 82; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_3 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 82; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (__pyx_t_3) { if (unlikely(__pyx_v_word == Py_None)) { PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 82; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_4 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_word); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 82; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_5 = (__pyx_t_4 == 4); __pyx_t_6 = __pyx_t_5; } else { __pyx_t_6 = __pyx_t_3; } if (__pyx_t_6) { /* "spacy/en.pyx":83 * cdef unicode s * if word.isdigit() and len(word) == 4: * return '!YEAR' # <<<<<<<<<<<<<< * elif word[0].isdigit(): * return '!DIGITS' */ __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_kp_u_YEAR); __pyx_r = __pyx_kp_u_YEAR; goto __pyx_L0; } /* "spacy/en.pyx":84 * if word.isdigit() and len(word) == 4: * return '!YEAR' * elif word[0].isdigit(): # <<<<<<<<<<<<<< * return '!DIGITS' * else: */ __pyx_t_7 = __Pyx_GetItemInt_Unicode(__pyx_v_word, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(__pyx_t_7 == (Py_UCS4)-1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __pyx_t_6 = Py_UNICODE_ISDIGIT(__pyx_t_7); if ((__pyx_t_6 != 0)) { /* "spacy/en.pyx":85 * return '!YEAR' * elif word[0].isdigit(): * return '!DIGITS' # <<<<<<<<<<<<<< * else: * return word.lower() */ __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_kp_u_DIGITS); __pyx_r = __pyx_kp_u_DIGITS; goto __pyx_L0; } /*else*/ { /* "spacy/en.pyx":87 * return '!DIGITS' * else: * return word.lower() # <<<<<<<<<<<<<< * * */ __Pyx_XDECREF(__pyx_r); __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_lower); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "unicode", Py_TYPE(__pyx_t_1)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 87; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L0; } /* "spacy/en.pyx":75 * * * cdef unicode normalize_word_string(unicode word): # <<<<<<<<<<<<<< * '''Return a normalized version of the word, mapping: * - 4 digit strings into !YEAR */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_2); __Pyx_AddTraceback("spacy.en.normalize_word_string", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = 0; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "spacy/en.pyx":90 * * * cpdef unicode _substr(unicode string, int start, int end, size_t length): # <<<<<<<<<<<<<< * if end >= length: * end = -1 */ static PyObject *__pyx_pw_5spacy_2en_7_substr(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static PyObject *__pyx_f_5spacy_2en__substr(PyObject *__pyx_v_string, int __pyx_v_start, int __pyx_v_end, size_t __pyx_v_length, CYTHON_UNUSED int __pyx_skip_dispatch) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations int __pyx_t_1; int __pyx_t_2; int __pyx_t_3; PyObject *__pyx_t_4 = NULL; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_substr", 0); /* "spacy/en.pyx":91 * * cpdef unicode _substr(unicode string, int start, int end, size_t length): * if end >= length: # <<<<<<<<<<<<<< * end = -1 * if start >= length: */ __pyx_t_1 = ((__pyx_v_end >= __pyx_v_length) != 0); if (__pyx_t_1) { /* "spacy/en.pyx":92 * cpdef unicode _substr(unicode string, int start, int end, size_t length): * if end >= length: * end = -1 # <<<<<<<<<<<<<< * if start >= length: * start = 0 */ __pyx_v_end = -1; goto __pyx_L3; } __pyx_L3:; /* "spacy/en.pyx":93 * if end >= length: * end = -1 * if start >= length: # <<<<<<<<<<<<<< * start = 0 * if start <= 0 and end < 0: */ __pyx_t_1 = ((__pyx_v_start >= __pyx_v_length) != 0); if (__pyx_t_1) { /* "spacy/en.pyx":94 * end = -1 * if start >= length: * start = 0 # <<<<<<<<<<<<<< * if start <= 0 and end < 0: * return string */ __pyx_v_start = 0; goto __pyx_L4; } __pyx_L4:; /* "spacy/en.pyx":95 * if start >= length: * start = 0 * if start <= 0 and end < 0: # <<<<<<<<<<<<<< * return string * elif start < 0: */ __pyx_t_1 = ((__pyx_v_start <= 0) != 0); if (__pyx_t_1) { __pyx_t_2 = ((__pyx_v_end < 0) != 0); __pyx_t_3 = __pyx_t_2; } else { __pyx_t_3 = __pyx_t_1; } if (__pyx_t_3) { /* "spacy/en.pyx":96 * start = 0 * if start <= 0 and end < 0: * return string # <<<<<<<<<<<<<< * elif start < 0: * start = 0 */ __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_v_string); __pyx_r = __pyx_v_string; goto __pyx_L0; } /* "spacy/en.pyx":97 * if start <= 0 and end < 0: * return string * elif start < 0: # <<<<<<<<<<<<<< * start = 0 * elif end < 0: */ __pyx_t_3 = ((__pyx_v_start < 0) != 0); if (__pyx_t_3) { /* "spacy/en.pyx":98 * return string * elif start < 0: * start = 0 # <<<<<<<<<<<<<< * elif end < 0: * end = length */ __pyx_v_start = 0; goto __pyx_L5; } /* "spacy/en.pyx":99 * elif start < 0: * start = 0 * elif end < 0: # <<<<<<<<<<<<<< * end = length * return string[start:end] */ __pyx_t_3 = ((__pyx_v_end < 0) != 0); if (__pyx_t_3) { /* "spacy/en.pyx":100 * start = 0 * elif end < 0: * end = length # <<<<<<<<<<<<<< * return string[start:end] * */ __pyx_v_end = __pyx_v_length; goto __pyx_L5; } __pyx_L5:; /* "spacy/en.pyx":101 * elif end < 0: * end = length * return string[start:end] # <<<<<<<<<<<<<< * * */ __Pyx_XDECREF(__pyx_r); if (unlikely(__pyx_v_string == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_4 = __Pyx_PyUnicode_Substring(__pyx_v_string, __pyx_v_start, __pyx_v_end); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __pyx_r = ((PyObject*)__pyx_t_4); __pyx_t_4 = 0; goto __pyx_L0; /* "spacy/en.pyx":90 * * * cpdef unicode _substr(unicode string, int start, int end, size_t length): # <<<<<<<<<<<<<< * if end >= length: * end = -1 */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_4); __Pyx_AddTraceback("spacy.en._substr", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = 0; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* Python wrapper */ static PyObject *__pyx_pw_5spacy_2en_7_substr(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ static PyObject *__pyx_pw_5spacy_2en_7_substr(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_string = 0; int __pyx_v_start; int __pyx_v_end; size_t __pyx_v_length; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("_substr (wrapper)", 0); { static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_string,&__pyx_n_s_start,&__pyx_n_s_end,&__pyx_n_s_length,0}; PyObject* values[4] = {0,0,0,0}; if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); case 0: break; default: goto __pyx_L5_argtuple_error; } kw_args = PyDict_Size(__pyx_kwds); switch (pos_args) { case 0: if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_string)) != 0)) kw_args--; else goto __pyx_L5_argtuple_error; case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_start)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("_substr", 1, 4, 4, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_end)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("_substr", 1, 4, 4, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_length)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("_substr", 1, 4, 4, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "_substr") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 4) { goto __pyx_L5_argtuple_error; } else { values[0] = PyTuple_GET_ITEM(__pyx_args, 0); values[1] = PyTuple_GET_ITEM(__pyx_args, 1); values[2] = PyTuple_GET_ITEM(__pyx_args, 2); values[3] = PyTuple_GET_ITEM(__pyx_args, 3); } __pyx_v_string = ((PyObject*)values[0]); __pyx_v_start = __Pyx_PyInt_As_int(values[1]); if (unlikely((__pyx_v_start == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_end = __Pyx_PyInt_As_int(values[2]); if (unlikely((__pyx_v_end == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_v_length = __Pyx_PyInt_As_size_t(values[3]); if (unlikely((__pyx_v_length == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("_substr", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("spacy.en._substr", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyUnicode_Type), 1, "string", 1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = __pyx_pf_5spacy_2en_6_substr(__pyx_self, __pyx_v_string, __pyx_v_start, __pyx_v_end, __pyx_v_length); /* function exit code */ goto __pyx_L0; __pyx_L1_error:; __pyx_r = NULL; __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_5spacy_2en_6_substr(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string, int __pyx_v_start, int __pyx_v_end, size_t __pyx_v_length) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_substr", 0); __Pyx_XDECREF(__pyx_r); __pyx_t_1 = __pyx_f_5spacy_2en__substr(__pyx_v_string, __pyx_v_start, __pyx_v_end, __pyx_v_length, 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_AddTraceback("spacy.en._substr", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "spacy/en.pyx":104 * * * cdef Lexeme* _add(StringHash hashed, unicode string, int split, size_t length) except NULL: # <<<<<<<<<<<<<< * assert string * assert split <= length */ static struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_f_5spacy_2en__add(__pyx_t_5spacy_6lexeme_StringHash __pyx_v_hashed, PyObject *__pyx_v_string, int __pyx_v_split, size_t __pyx_v_length) { struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_word; struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_r; __Pyx_RefNannyDeclarations int __pyx_t_1; struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_t_2; PyObject *__pyx_t_3 = NULL; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_add", 0); /* "spacy/en.pyx":105 * * cdef Lexeme* _add(StringHash hashed, unicode string, int split, size_t length) except NULL: * assert string # <<<<<<<<<<<<<< * assert split <= length * word = _init_lexeme(string, hashed, split, length) */ #ifndef CYTHON_WITHOUT_ASSERTIONS if (unlikely(!Py_OptimizeFlag)) { __pyx_t_1 = (__pyx_v_string != Py_None) && (PyUnicode_GET_SIZE(__pyx_v_string) != 0); if (unlikely(!__pyx_t_1)) { PyErr_SetNone(PyExc_AssertionError); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } } #endif /* "spacy/en.pyx":106 * cdef Lexeme* _add(StringHash hashed, unicode string, int split, size_t length) except NULL: * assert string * assert split <= length # <<<<<<<<<<<<<< * word = _init_lexeme(string, hashed, split, length) * LEXEMES[hashed] = word */ #ifndef CYTHON_WITHOUT_ASSERTIONS if (unlikely(!Py_OptimizeFlag)) { if (unlikely(!((__pyx_v_split <= __pyx_v_length) != 0))) { PyErr_SetNone(PyExc_AssertionError); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 106; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } } #endif /* "spacy/en.pyx":107 * assert string * assert split <= length * word = _init_lexeme(string, hashed, split, length) # <<<<<<<<<<<<<< * LEXEMES[hashed] = word * STRINGS[hashed] = string */ __pyx_t_2 = __pyx_f_5spacy_2en__init_lexeme(__pyx_v_string, __pyx_v_hashed, __pyx_v_split, __pyx_v_length); if (unlikely(__pyx_t_2 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_word = __pyx_t_2; /* "spacy/en.pyx":108 * assert split <= length * word = _init_lexeme(string, hashed, split, length) * LEXEMES[hashed] = word # <<<<<<<<<<<<<< * STRINGS[hashed] = string * return word */ (__pyx_v_5spacy_2en_LEXEMES[__pyx_v_hashed]) = __pyx_v_word; /* "spacy/en.pyx":109 * word = _init_lexeme(string, hashed, split, length) * LEXEMES[hashed] = word * STRINGS[hashed] = string # <<<<<<<<<<<<<< * return word * */ __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_STRINGS); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); if (unlikely(__Pyx_SetItemInt(__pyx_t_3, __pyx_v_hashed, __pyx_v_string, __pyx_t_5spacy_6lexeme_StringHash, 0, __Pyx_PyInt_From_uint64_t, 0, 0, 1) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "spacy/en.pyx":110 * LEXEMES[hashed] = word * STRINGS[hashed] = string * return word # <<<<<<<<<<<<<< * * */ __pyx_r = __pyx_v_word; goto __pyx_L0; /* "spacy/en.pyx":104 * * * cdef Lexeme* _add(StringHash hashed, unicode string, int split, size_t length) except NULL: # <<<<<<<<<<<<<< * assert string * assert split <= length */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_3); __Pyx_AddTraceback("spacy.en._add", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "spacy/en.pyx":113 * * * cdef Lexeme* _init_lexeme(unicode string, StringHash hashed, # <<<<<<<<<<<<<< * int split, size_t length) except NULL: * assert split <= length */ static struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_f_5spacy_2en__init_lexeme(PyObject *__pyx_v_string, __pyx_t_5spacy_6lexeme_StringHash __pyx_v_hashed, int __pyx_v_split, size_t __pyx_v_length) { struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_word; PyObject *__pyx_v_tail_string = 0; PyObject *__pyx_v_lex = 0; PyObject *__pyx_v_normed = 0; PyObject *__pyx_v_last3 = 0; struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_r; __Pyx_RefNannyDeclarations long __pyx_t_1; int __pyx_t_2; Py_UCS4 __pyx_t_3; int __pyx_t_4; int __pyx_t_5; PyObject *__pyx_t_6 = NULL; Py_ssize_t __pyx_t_7; __pyx_t_5spacy_6lexeme_StringHash __pyx_t_8; __pyx_t_5spacy_2en_Lexeme_addr __pyx_t_9; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_init_lexeme", 0); /* "spacy/en.pyx":115 * cdef Lexeme* _init_lexeme(unicode string, StringHash hashed, * int split, size_t length) except NULL: * assert split <= length # <<<<<<<<<<<<<< * cdef Lexeme* word = calloc(1, sizeof(Lexeme)) * */ #ifndef CYTHON_WITHOUT_ASSERTIONS if (unlikely(!Py_OptimizeFlag)) { if (unlikely(!((__pyx_v_split <= __pyx_v_length) != 0))) { PyErr_SetNone(PyExc_AssertionError); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } } #endif /* "spacy/en.pyx":116 * int split, size_t length) except NULL: * assert split <= length * cdef Lexeme* word = calloc(1, sizeof(Lexeme)) # <<<<<<<<<<<<<< * * word.first = (string[0] if string else 0) */ __pyx_v_word = ((struct __pyx_t_5spacy_6lexeme_Lexeme *)calloc(1, (sizeof(struct __pyx_t_5spacy_6lexeme_Lexeme)))); /* "spacy/en.pyx":118 * cdef Lexeme* word = calloc(1, sizeof(Lexeme)) * * word.first = (string[0] if string else 0) # <<<<<<<<<<<<<< * word.sic = hashed * */ __pyx_t_2 = (__pyx_v_string != Py_None) && (PyUnicode_GET_SIZE(__pyx_v_string) != 0); if (__pyx_t_2) { __pyx_t_3 = __Pyx_GetItemInt_Unicode(__pyx_v_string, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(__pyx_t_3 == (Py_UCS4)-1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 118; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __pyx_t_1 = __pyx_t_3; } else { __pyx_t_1 = 0; } __pyx_v_word->first = ((Py_UNICODE)__pyx_t_1); /* "spacy/en.pyx":119 * * word.first = (string[0] if string else 0) * word.sic = hashed # <<<<<<<<<<<<<< * * cdef unicode tail_string */ __pyx_v_word->sic = __pyx_v_hashed; /* "spacy/en.pyx":123 * cdef unicode tail_string * cdef unicode lex * if split != 0 and split < length: # <<<<<<<<<<<<<< * lex = _substr(string, 0, split, length) * tail_string = _substr(string, split, length, length) */ __pyx_t_2 = ((__pyx_v_split != 0) != 0); if (__pyx_t_2) { __pyx_t_4 = ((__pyx_v_split < __pyx_v_length) != 0); __pyx_t_5 = __pyx_t_4; } else { __pyx_t_5 = __pyx_t_2; } if (__pyx_t_5) { /* "spacy/en.pyx":124 * cdef unicode lex * if split != 0 and split < length: * lex = _substr(string, 0, split, length) # <<<<<<<<<<<<<< * tail_string = _substr(string, split, length, length) * else: */ __pyx_t_6 = __pyx_f_5spacy_2en__substr(__pyx_v_string, 0, __pyx_v_split, __pyx_v_length, 0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __pyx_v_lex = ((PyObject*)__pyx_t_6); __pyx_t_6 = 0; /* "spacy/en.pyx":125 * if split != 0 and split < length: * lex = _substr(string, 0, split, length) * tail_string = _substr(string, split, length, length) # <<<<<<<<<<<<<< * else: * lex = string */ __pyx_t_6 = __pyx_f_5spacy_2en__substr(__pyx_v_string, __pyx_v_split, __pyx_v_length, __pyx_v_length, 0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __pyx_v_tail_string = ((PyObject*)__pyx_t_6); __pyx_t_6 = 0; goto __pyx_L3; } /*else*/ { /* "spacy/en.pyx":127 * tail_string = _substr(string, split, length, length) * else: * lex = string # <<<<<<<<<<<<<< * tail_string = '' * assert lex */ __Pyx_INCREF(__pyx_v_string); __pyx_v_lex = __pyx_v_string; /* "spacy/en.pyx":128 * else: * lex = string * tail_string = '' # <<<<<<<<<<<<<< * assert lex * cdef unicode normed = normalize_word_string(lex) */ __Pyx_INCREF(__pyx_kp_u_); __pyx_v_tail_string = __pyx_kp_u_; } __pyx_L3:; /* "spacy/en.pyx":129 * lex = string * tail_string = '' * assert lex # <<<<<<<<<<<<<< * cdef unicode normed = normalize_word_string(lex) * cdef unicode last3 = _substr(string, length - 3, length, length) */ #ifndef CYTHON_WITHOUT_ASSERTIONS if (unlikely(!Py_OptimizeFlag)) { __pyx_t_5 = (__pyx_v_lex != Py_None) && (PyUnicode_GET_SIZE(__pyx_v_lex) != 0); if (unlikely(!__pyx_t_5)) { PyErr_SetNone(PyExc_AssertionError); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } } #endif /* "spacy/en.pyx":130 * tail_string = '' * assert lex * cdef unicode normed = normalize_word_string(lex) # <<<<<<<<<<<<<< * cdef unicode last3 = _substr(string, length - 3, length, length) * */ __pyx_t_6 = __pyx_f_5spacy_2en_normalize_word_string(__pyx_v_lex); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __pyx_v_normed = ((PyObject*)__pyx_t_6); __pyx_t_6 = 0; /* "spacy/en.pyx":131 * assert lex * cdef unicode normed = normalize_word_string(lex) * cdef unicode last3 = _substr(string, length - 3, length, length) # <<<<<<<<<<<<<< * * assert normed */ __pyx_t_6 = __pyx_f_5spacy_2en__substr(__pyx_v_string, (__pyx_v_length - 3), __pyx_v_length, __pyx_v_length, 0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __pyx_v_last3 = ((PyObject*)__pyx_t_6); __pyx_t_6 = 0; /* "spacy/en.pyx":133 * cdef unicode last3 = _substr(string, length - 3, length, length) * * assert normed # <<<<<<<<<<<<<< * assert len(normed) * */ #ifndef CYTHON_WITHOUT_ASSERTIONS if (unlikely(!Py_OptimizeFlag)) { __pyx_t_5 = (__pyx_v_normed != Py_None) && (PyUnicode_GET_SIZE(__pyx_v_normed) != 0); if (unlikely(!__pyx_t_5)) { PyErr_SetNone(PyExc_AssertionError); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } } #endif /* "spacy/en.pyx":134 * * assert normed * assert len(normed) # <<<<<<<<<<<<<< * * word.lex = hash_string(lex, len(lex)) */ #ifndef CYTHON_WITHOUT_ASSERTIONS if (unlikely(!Py_OptimizeFlag)) { if (unlikely(__pyx_v_normed == Py_None)) { PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_7 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_normed); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (unlikely(!(__pyx_t_7 != 0))) { PyErr_SetNone(PyExc_AssertionError); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } } #endif /* "spacy/en.pyx":136 * assert len(normed) * * word.lex = hash_string(lex, len(lex)) # <<<<<<<<<<<<<< * word.normed = hash_string(normed, len(normed)) * word.last3 = hash_string(last3, len(last3)) */ if (unlikely(__pyx_v_lex == Py_None)) { PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 136; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_7 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_lex); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 136; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_8 = __pyx_f_5spacy_2en_hash_string(__pyx_v_lex, __pyx_t_7); if (unlikely(__pyx_t_8 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 136; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_word->lex = __pyx_t_8; /* "spacy/en.pyx":137 * * word.lex = hash_string(lex, len(lex)) * word.normed = hash_string(normed, len(normed)) # <<<<<<<<<<<<<< * word.last3 = hash_string(last3, len(last3)) * */ if (unlikely(__pyx_v_normed == Py_None)) { PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_7 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_normed); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_8 = __pyx_f_5spacy_2en_hash_string(__pyx_v_normed, __pyx_t_7); if (unlikely(__pyx_t_8 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_word->normed = __pyx_t_8; /* "spacy/en.pyx":138 * word.lex = hash_string(lex, len(lex)) * word.normed = hash_string(normed, len(normed)) * word.last3 = hash_string(last3, len(last3)) # <<<<<<<<<<<<<< * * STRINGS[word.lex] = lex */ if (unlikely(__pyx_v_last3 == Py_None)) { PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_7 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_last3); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_8 = __pyx_f_5spacy_2en_hash_string(__pyx_v_last3, __pyx_t_7); if (unlikely(__pyx_t_8 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_word->last3 = __pyx_t_8; /* "spacy/en.pyx":140 * word.last3 = hash_string(last3, len(last3)) * * STRINGS[word.lex] = lex # <<<<<<<<<<<<<< * STRINGS[word.normed] = normed * STRINGS[word.last3] = last3 */ __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_STRINGS); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); if (unlikely(__Pyx_SetItemInt(__pyx_t_6, __pyx_v_word->lex, __pyx_v_lex, __pyx_t_5spacy_6lexeme_StringHash, 0, __Pyx_PyInt_From_uint64_t, 0, 0, 1) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "spacy/en.pyx":141 * * STRINGS[word.lex] = lex * STRINGS[word.normed] = normed # <<<<<<<<<<<<<< * STRINGS[word.last3] = last3 * */ __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_STRINGS); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); if (unlikely(__Pyx_SetItemInt(__pyx_t_6, __pyx_v_word->normed, __pyx_v_normed, __pyx_t_5spacy_6lexeme_StringHash, 0, __Pyx_PyInt_From_uint64_t, 0, 0, 1) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "spacy/en.pyx":142 * STRINGS[word.lex] = lex * STRINGS[word.normed] = normed * STRINGS[word.last3] = last3 # <<<<<<<<<<<<<< * * # These are loaded later */ __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_STRINGS); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); if (unlikely(__Pyx_SetItemInt(__pyx_t_6, __pyx_v_word->last3, __pyx_v_last3, __pyx_t_5spacy_6lexeme_StringHash, 0, __Pyx_PyInt_From_uint64_t, 0, 0, 1) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; /* "spacy/en.pyx":145 * * # These are loaded later * word.prob = 0 # <<<<<<<<<<<<<< * word.cluster = 0 * word.oft_upper = False */ __pyx_v_word->prob = 0.0; /* "spacy/en.pyx":146 * # These are loaded later * word.prob = 0 * word.cluster = 0 # <<<<<<<<<<<<<< * word.oft_upper = False * word.oft_title = False */ __pyx_v_word->cluster = 0; /* "spacy/en.pyx":147 * word.prob = 0 * word.cluster = 0 * word.oft_upper = False # <<<<<<<<<<<<<< * word.oft_title = False * */ __pyx_v_word->oft_upper = 0; /* "spacy/en.pyx":148 * word.cluster = 0 * word.oft_upper = False * word.oft_title = False # <<<<<<<<<<<<<< * * # Now recurse, and deal with the tail */ __pyx_v_word->oft_title = 0; /* "spacy/en.pyx":151 * * # Now recurse, and deal with the tail * if tail_string: # <<<<<<<<<<<<<< * word.tail = lookup(tail_string) * return word */ __pyx_t_5 = (__pyx_v_tail_string != Py_None) && (PyUnicode_GET_SIZE(__pyx_v_tail_string) != 0); if (__pyx_t_5) { /* "spacy/en.pyx":152 * # Now recurse, and deal with the tail * if tail_string: * word.tail = lookup(tail_string) # <<<<<<<<<<<<<< * return word * */ __pyx_t_9 = __pyx_f_5spacy_2en_lookup(__pyx_v_tail_string, 0); if (unlikely(__pyx_t_9 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_word->tail = ((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_t_9); goto __pyx_L4; } __pyx_L4:; /* "spacy/en.pyx":153 * if tail_string: * word.tail = lookup(tail_string) * return word # <<<<<<<<<<<<<< * * */ __pyx_r = __pyx_v_word; goto __pyx_L0; /* "spacy/en.pyx":113 * * * cdef Lexeme* _init_lexeme(unicode string, StringHash hashed, # <<<<<<<<<<<<<< * int split, size_t length) except NULL: * assert split <= length */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_6); __Pyx_AddTraceback("spacy.en._init_lexeme", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_tail_string); __Pyx_XDECREF(__pyx_v_lex); __Pyx_XDECREF(__pyx_v_normed); __Pyx_XDECREF(__pyx_v_last3); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "spacy/en.pyx":156 * * * cdef size_t _find_split(unicode word, size_t length): # <<<<<<<<<<<<<< * cdef size_t i = 0 * if word[0].isalnum(): */ static size_t __pyx_f_5spacy_2en__find_split(PyObject *__pyx_v_word, size_t __pyx_v_length) { size_t __pyx_v_i; size_t __pyx_r; __Pyx_RefNannyDeclarations Py_UCS4 __pyx_t_1; int __pyx_t_2; int __pyx_t_3; int __pyx_t_4; size_t __pyx_t_5; Py_UCS4 __pyx_t_6; int __pyx_t_7; int __pyx_t_8; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_find_split", 0); /* "spacy/en.pyx":157 * * cdef size_t _find_split(unicode word, size_t length): * cdef size_t i = 0 # <<<<<<<<<<<<<< * if word[0].isalnum(): * while i < length and word[i].isalnum(): */ __pyx_v_i = 0; /* "spacy/en.pyx":158 * cdef size_t _find_split(unicode word, size_t length): * cdef size_t i = 0 * if word[0].isalnum(): # <<<<<<<<<<<<<< * while i < length and word[i].isalnum(): * i += 1 */ __pyx_t_1 = __Pyx_GetItemInt_Unicode(__pyx_v_word, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(__pyx_t_1 == (Py_UCS4)-1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __pyx_t_2 = Py_UNICODE_ISALNUM(__pyx_t_1); if ((__pyx_t_2 != 0)) { /* "spacy/en.pyx":159 * cdef size_t i = 0 * if word[0].isalnum(): * while i < length and word[i].isalnum(): # <<<<<<<<<<<<<< * i += 1 * else: */ while (1) { __pyx_t_2 = (__pyx_v_i < __pyx_v_length); if (__pyx_t_2) { __pyx_t_1 = __Pyx_GetItemInt_Unicode(__pyx_v_word, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(__pyx_t_1 == (Py_UCS4)-1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __pyx_t_3 = Py_UNICODE_ISALNUM(__pyx_t_1); __pyx_t_4 = (__pyx_t_3 != 0); } else { __pyx_t_4 = __pyx_t_2; } if (!__pyx_t_4) break; /* "spacy/en.pyx":160 * if word[0].isalnum(): * while i < length and word[i].isalnum(): * i += 1 # <<<<<<<<<<<<<< * else: * # Split off a punctuation character, or a sequence of the same punctuation character */ __pyx_v_i = (__pyx_v_i + 1); } goto __pyx_L3; } /*else*/ { /* "spacy/en.pyx":163 * else: * # Split off a punctuation character, or a sequence of the same punctuation character * while i < length and not word[i].isalnum() and (i == 0 or word[i-1] == word[i]): # <<<<<<<<<<<<<< * i += 1 * return i */ while (1) { __pyx_t_4 = ((__pyx_v_i < __pyx_v_length) != 0); if (__pyx_t_4) { __pyx_t_1 = __Pyx_GetItemInt_Unicode(__pyx_v_word, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(__pyx_t_1 == (Py_UCS4)-1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 163; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __pyx_t_2 = Py_UNICODE_ISALNUM(__pyx_t_1); __pyx_t_3 = ((!(__pyx_t_2 != 0)) != 0); if (__pyx_t_3) { __pyx_t_2 = ((__pyx_v_i == 0) != 0); if (!__pyx_t_2) { __pyx_t_5 = (__pyx_v_i - 1); __pyx_t_1 = __Pyx_GetItemInt_Unicode(__pyx_v_word, __pyx_t_5, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(__pyx_t_1 == (Py_UCS4)-1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 163; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __pyx_t_6 = __Pyx_GetItemInt_Unicode(__pyx_v_word, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(__pyx_t_6 == (Py_UCS4)-1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 163; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __pyx_t_7 = ((__pyx_t_1 == __pyx_t_6) != 0); __pyx_t_8 = __pyx_t_7; } else { __pyx_t_8 = __pyx_t_2; } __pyx_t_2 = __pyx_t_8; } else { __pyx_t_2 = __pyx_t_3; } __pyx_t_3 = __pyx_t_2; } else { __pyx_t_3 = __pyx_t_4; } if (!__pyx_t_3) break; /* "spacy/en.pyx":164 * # Split off a punctuation character, or a sequence of the same punctuation character * while i < length and not word[i].isalnum() and (i == 0 or word[i-1] == word[i]): * i += 1 # <<<<<<<<<<<<<< * return i */ __pyx_v_i = (__pyx_v_i + 1); } } __pyx_L3:; /* "spacy/en.pyx":165 * while i < length and not word[i].isalnum() and (i == 0 or word[i-1] == word[i]): * i += 1 * return i # <<<<<<<<<<<<<< */ __pyx_r = __pyx_v_i; goto __pyx_L0; /* "spacy/en.pyx":156 * * * cdef size_t _find_split(unicode word, size_t length): # <<<<<<<<<<<<<< * cdef size_t i = 0 * if word[0].isalnum(): */ /* function exit code */ __pyx_L1_error:; __Pyx_WriteUnraisable("spacy.en._find_split", __pyx_clineno, __pyx_lineno, __pyx_filename, 0); __pyx_r = 0; __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyMethodDef __pyx_methods[] = { {__Pyx_NAMESTR("lookup"), (PyCFunction)__pyx_pw_5spacy_2en_1lookup, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_2en_lookup)}, {__Pyx_NAMESTR("lookup_chunk"), (PyCFunction)__pyx_pw_5spacy_2en_3lookup_chunk, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_5spacy_2en_2lookup_chunk)}, {__Pyx_NAMESTR("unhash"), (PyCFunction)__pyx_pw_5spacy_2en_5unhash, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_2en_4unhash)}, {__Pyx_NAMESTR("_substr"), (PyCFunction)__pyx_pw_5spacy_2en_7_substr, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}, {0, 0, 0, 0} }; #if PY_MAJOR_VERSION >= 3 static struct PyModuleDef __pyx_moduledef = { #if PY_VERSION_HEX < 0x03020000 { PyObject_HEAD_INIT(NULL) NULL, 0, NULL }, #else PyModuleDef_HEAD_INIT, #endif __Pyx_NAMESTR("en"), __Pyx_DOCSTR(__pyx_k_Serve_pointers_to_Lexeme_structs), /* m_doc */ -1, /* m_size */ __pyx_methods /* m_methods */, NULL, /* m_reload */ NULL, /* m_traverse */ NULL, /* m_clear */ NULL /* m_free */ }; #endif static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_u_, __pyx_k_, sizeof(__pyx_k_), 0, 1, 0, 0}, {&__pyx_kp_u_DIGITS, __pyx_k_DIGITS, sizeof(__pyx_k_DIGITS), 0, 1, 0, 0}, {&__pyx_n_s_LEXEMES, __pyx_k_LEXEMES, sizeof(__pyx_k_LEXEMES), 0, 0, 1, 1}, {&__pyx_n_s_STRINGS, __pyx_k_STRINGS, sizeof(__pyx_k_STRINGS), 0, 0, 1, 1}, {&__pyx_n_s_ValueError, __pyx_k_ValueError, sizeof(__pyx_k_ValueError), 0, 0, 1, 1}, {&__pyx_kp_u_YEAR, __pyx_k_YEAR, sizeof(__pyx_k_YEAR), 0, 1, 0, 0}, {&__pyx_n_s_cluster, __pyx_k_cluster, sizeof(__pyx_k_cluster), 0, 0, 1, 1}, {&__pyx_n_s_end, __pyx_k_end, sizeof(__pyx_k_end), 0, 0, 1, 1}, {&__pyx_n_s_first, __pyx_k_first, sizeof(__pyx_k_first), 0, 0, 1, 1}, {&__pyx_n_s_isdigit, __pyx_k_isdigit, sizeof(__pyx_k_isdigit), 0, 0, 1, 1}, {&__pyx_n_s_last3, __pyx_k_last3, sizeof(__pyx_k_last3), 0, 0, 1, 1}, {&__pyx_n_s_length, __pyx_k_length, sizeof(__pyx_k_length), 0, 0, 1, 1}, {&__pyx_n_s_lex, __pyx_k_lex, sizeof(__pyx_k_lex), 0, 0, 1, 1}, {&__pyx_n_s_lower, __pyx_k_lower, sizeof(__pyx_k_lower), 0, 0, 1, 1}, {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, {&__pyx_n_s_normed, __pyx_k_normed, sizeof(__pyx_k_normed), 0, 0, 1, 1}, {&__pyx_n_s_oft_title, __pyx_k_oft_title, sizeof(__pyx_k_oft_title), 0, 0, 1, 1}, {&__pyx_n_s_oft_upper, __pyx_k_oft_upper, sizeof(__pyx_k_oft_upper), 0, 0, 1, 1}, {&__pyx_n_s_prob, __pyx_k_prob, sizeof(__pyx_k_prob), 0, 0, 1, 1}, {&__pyx_n_s_pyx_capi, __pyx_k_pyx_capi, sizeof(__pyx_k_pyx_capi), 0, 0, 1, 1}, {&__pyx_n_s_sic, __pyx_k_sic, sizeof(__pyx_k_sic), 0, 0, 1, 1}, {&__pyx_n_s_start, __pyx_k_start, sizeof(__pyx_k_start), 0, 0, 1, 1}, {&__pyx_n_s_string, __pyx_k_string, sizeof(__pyx_k_string), 0, 0, 1, 1}, {&__pyx_n_s_tail, __pyx_k_tail, sizeof(__pyx_k_tail), 0, 0, 1, 1}, {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, {0, 0, 0, 0, 0, 0, 0} }; static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; __pyx_L1_error:; return -1; } static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); __Pyx_RefNannyFinishContext(); return 0; } static int __Pyx_InitGlobals(void) { if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; return 0; __pyx_L1_error:; return -1; } #if PY_MAJOR_VERSION < 3 PyMODINIT_FUNC initen(void); /*proto*/ PyMODINIT_FUNC initen(void) #else PyMODINIT_FUNC PyInit_en(void); /*proto*/ PyMODINIT_FUNC PyInit_en(void) #endif { PyObject *__pyx_t_1 = NULL; struct __pyx_t_5spacy_6lexeme_Lexeme __pyx_t_2; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannyDeclarations #if CYTHON_REFNANNY __Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); if (!__Pyx_RefNanny) { PyErr_Clear(); __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); if (!__Pyx_RefNanny) Py_FatalError("failed to import 'refnanny' module"); } #endif __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_en(void)", 0); if ( __Pyx_check_binary_version() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #ifdef __Pyx_CyFunction_USED if (__Pyx_CyFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif #ifdef __Pyx_FusedFunction_USED if (__pyx_FusedFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif #ifdef __Pyx_Generator_USED if (__pyx_Generator_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif /*--- Library function declarations ---*/ /*--- Threads initialization code ---*/ #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS #ifdef WITH_THREAD /* Python build with threading support? */ PyEval_InitThreads(); #endif #endif /*--- Module creation code ---*/ #if PY_MAJOR_VERSION < 3 __pyx_m = Py_InitModule4(__Pyx_NAMESTR("en"), __pyx_methods, __Pyx_DOCSTR(__pyx_k_Serve_pointers_to_Lexeme_structs), 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); #else __pyx_m = PyModule_Create(&__pyx_moduledef); #endif if (unlikely(!__pyx_m)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} Py_INCREF(__pyx_d); __pyx_b = PyImport_AddModule(__Pyx_NAMESTR(__Pyx_BUILTIN_MODULE_NAME)); if (unlikely(!__pyx_b)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #if CYTHON_COMPILING_IN_PYPY Py_INCREF(__pyx_b); #endif if (__Pyx_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; /*--- Initialize various global constants etc. ---*/ if (unlikely(__Pyx_InitGlobals() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) if (__Pyx_init_sys_getdefaultencoding_params() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif if (__pyx_module_is_main_spacy__en) { if (__Pyx_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; } #if PY_MAJOR_VERSION >= 3 { PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (!PyDict_GetItemString(modules, "spacy.en")) { if (unlikely(PyDict_SetItemString(modules, "spacy.en", __pyx_m) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } } #endif /*--- Builtin init code ---*/ if (unlikely(__Pyx_InitCachedBuiltins() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /*--- Constants init code ---*/ if (unlikely(__Pyx_InitCachedConstants() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /*--- Global init code ---*/ /*--- Variable export code ---*/ if (__Pyx_ExportVoidPtr(__pyx_n_s_LEXEMES, (void *)&__pyx_v_5spacy_2en_LEXEMES, "google::dense_hash_map<__pyx_t_5spacy_6lexeme_StringHash,__pyx_t_5spacy_2en_Lexeme_ptr>") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /*--- Function export code ---*/ if (__Pyx_ExportFunction("lookup", (void (*)(void))__pyx_f_5spacy_2en_lookup, "__pyx_t_5spacy_2en_Lexeme_addr (PyObject *, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_ExportFunction("lookup_chunk", (void (*)(void))__pyx_f_5spacy_2en_lookup_chunk, "__pyx_t_5spacy_2en_Lexeme_addr (PyObject *, int, int, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_ExportFunction("hash_string", (void (*)(void))__pyx_f_5spacy_2en_hash_string, "__pyx_t_5spacy_6lexeme_StringHash (PyObject *, size_t)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_ExportFunction("unhash", (void (*)(void))__pyx_f_5spacy_2en_unhash, "PyObject *(__pyx_t_5spacy_6lexeme_StringHash, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /*--- Type init code ---*/ /*--- Type import code ---*/ /*--- Variable import code ---*/ /*--- Function import code ---*/ /*--- Execution code ---*/ /* "spacy/en.pyx":15 * * * STRINGS = {} # <<<<<<<<<<<<<< * LEXEMES = dense_hash_map[StringHash, Lexeme_ptr]() * LEXEMES.set_empty_key(0) */ __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_STRINGS, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "spacy/en.pyx":16 * * STRINGS = {} * LEXEMES = dense_hash_map[StringHash, Lexeme_ptr]() # <<<<<<<<<<<<<< * LEXEMES.set_empty_key(0) * */ __pyx_v_5spacy_2en_LEXEMES = google::dense_hash_map<__pyx_t_5spacy_6lexeme_StringHash,__pyx_t_5spacy_2en_Lexeme_ptr>(); /* "spacy/en.pyx":17 * STRINGS = {} * LEXEMES = dense_hash_map[StringHash, Lexeme_ptr]() * LEXEMES.set_empty_key(0) # <<<<<<<<<<<<<< * * */ __pyx_v_5spacy_2en_LEXEMES.set_empty_key(0); /* "spacy/en.pyx":20 * * * cdef Lexeme BLANK_WORD = Lexeme(0, 0, 0, 0, 0, 0.0, 0, False, False, NULL) # <<<<<<<<<<<<<< * * */ __pyx_t_2.sic = 0; __pyx_t_2.lex = 0; __pyx_t_2.normed = 0; __pyx_t_2.last3 = 0; __pyx_t_2.first = 0; __pyx_t_2.prob = 0.0; __pyx_t_2.cluster = 0; __pyx_t_2.oft_upper = 0; __pyx_t_2.oft_title = 0; __pyx_t_2.tail = NULL; __pyx_v_5spacy_2en_BLANK_WORD = __pyx_t_2; /* "spacy/en.pyx":1 * '''Serve pointers to Lexeme structs, given strings. Maintain a reverse index, # <<<<<<<<<<<<<< * so that strings can be retrieved from hashes. Use 64-bit hash values and * boldly assume no collisions. */ __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L0; __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); if (__pyx_m) { __Pyx_AddTraceback("init spacy.en", __pyx_clineno, __pyx_lineno, __pyx_filename); Py_DECREF(__pyx_m); __pyx_m = 0; } else if (!PyErr_Occurred()) { PyErr_SetString(PyExc_ImportError, "init spacy.en"); } __pyx_L0:; __Pyx_RefNannyFinishContext(); #if PY_MAJOR_VERSION < 3 return; #else return __pyx_m; #endif } /* Runtime support code */ #if CYTHON_REFNANNY static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { PyObject *m = NULL, *p = NULL; void *r = NULL; m = PyImport_ImportModule((char *)modname); if (!m) goto end; p = PyObject_GetAttrString(m, (char *)"RefNannyAPI"); if (!p) goto end; r = PyLong_AsVoidPtr(p); end: Py_XDECREF(p); Py_XDECREF(m); return (__Pyx_RefNannyAPIStruct *)r; } #endif /* CYTHON_REFNANNY */ static PyObject *__Pyx_GetBuiltinName(PyObject *name) { PyObject* result = __Pyx_PyObject_GetAttrStr(__pyx_b, name); if (unlikely(!result)) { PyErr_Format(PyExc_NameError, #if PY_MAJOR_VERSION >= 3 "name '%U' is not defined", name); #else "name '%.200s' is not defined", PyString_AS_STRING(name)); #endif } return result; } static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) { #if CYTHON_COMPILING_IN_PYPY return PyObject_RichCompareBool(s1, s2, equals); #else if (s1 == s2) { return (equals == Py_EQ); } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) { const char *ps1, *ps2; Py_ssize_t length = PyBytes_GET_SIZE(s1); if (length != PyBytes_GET_SIZE(s2)) return (equals == Py_NE); ps1 = PyBytes_AS_STRING(s1); ps2 = PyBytes_AS_STRING(s2); if (ps1[0] != ps2[0]) { return (equals == Py_NE); } else if (length == 1) { return (equals == Py_EQ); } else { int result = memcmp(ps1, ps2, (size_t)length); return (equals == Py_EQ) ? (result == 0) : (result != 0); } } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) { return (equals == Py_NE); } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) { return (equals == Py_NE); } else { int result; PyObject* py_result = PyObject_RichCompare(s1, s2, equals); if (!py_result) return -1; result = __Pyx_PyObject_IsTrue(py_result); Py_DECREF(py_result); return result; } #endif } static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) { #if CYTHON_COMPILING_IN_PYPY return PyObject_RichCompareBool(s1, s2, equals); #else #if PY_MAJOR_VERSION < 3 PyObject* owned_ref = NULL; #endif int s1_is_unicode, s2_is_unicode; if (s1 == s2) { goto return_eq; } s1_is_unicode = PyUnicode_CheckExact(s1); s2_is_unicode = PyUnicode_CheckExact(s2); #if PY_MAJOR_VERSION < 3 if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) { owned_ref = PyUnicode_FromObject(s2); if (unlikely(!owned_ref)) return -1; s2 = owned_ref; s2_is_unicode = 1; } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) { owned_ref = PyUnicode_FromObject(s1); if (unlikely(!owned_ref)) return -1; s1 = owned_ref; s1_is_unicode = 1; } else if (((!s2_is_unicode) & (!s1_is_unicode))) { return __Pyx_PyBytes_Equals(s1, s2, equals); } #endif if (s1_is_unicode & s2_is_unicode) { Py_ssize_t length; int kind; void *data1, *data2; #if CYTHON_PEP393_ENABLED if (unlikely(PyUnicode_READY(s1) < 0) || unlikely(PyUnicode_READY(s2) < 0)) return -1; #endif length = __Pyx_PyUnicode_GET_LENGTH(s1); if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) { goto return_ne; } kind = __Pyx_PyUnicode_KIND(s1); if (kind != __Pyx_PyUnicode_KIND(s2)) { goto return_ne; } data1 = __Pyx_PyUnicode_DATA(s1); data2 = __Pyx_PyUnicode_DATA(s2); if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) { goto return_ne; } else if (length == 1) { goto return_eq; } else { int result = memcmp(data1, data2, length * kind); #if PY_MAJOR_VERSION < 3 Py_XDECREF(owned_ref); #endif return (equals == Py_EQ) ? (result == 0) : (result != 0); } } else if ((s1 == Py_None) & s2_is_unicode) { goto return_ne; } else if ((s2 == Py_None) & s1_is_unicode) { goto return_ne; } else { int result; PyObject* py_result = PyObject_RichCompare(s1, s2, equals); if (!py_result) return -1; result = __Pyx_PyObject_IsTrue(py_result); Py_DECREF(py_result); return result; } return_eq: #if PY_MAJOR_VERSION < 3 Py_XDECREF(owned_ref); #endif return (equals == Py_EQ); return_ne: #if PY_MAJOR_VERSION < 3 Py_XDECREF(owned_ref); #endif return (equals == Py_NE); #endif } static void __Pyx_RaiseArgumentTypeInvalid(const char* name, PyObject *obj, PyTypeObject *type) { PyErr_Format(PyExc_TypeError, "Argument '%.200s' has incorrect type (expected %.200s, got %.200s)", name, type->tp_name, Py_TYPE(obj)->tp_name); } static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, const char *name, int exact) { if (unlikely(!type)) { PyErr_SetString(PyExc_SystemError, "Missing type object"); return 0; } if (none_allowed && obj == Py_None) return 1; else if (exact) { if (likely(Py_TYPE(obj) == type)) return 1; #if PY_MAJOR_VERSION == 2 else if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; #endif } else { if (likely(PyObject_TypeCheck(obj, type))) return 1; } __Pyx_RaiseArgumentTypeInvalid(name, obj, type); return 0; } static void __Pyx_RaiseArgtupleInvalid( const char* func_name, int exact, Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found) { Py_ssize_t num_expected; const char *more_or_less; if (num_found < num_min) { num_expected = num_min; more_or_less = "at least"; } else { num_expected = num_max; more_or_less = "at most"; } if (exact) { more_or_less = "exactly"; } PyErr_Format(PyExc_TypeError, "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)", func_name, more_or_less, num_expected, (num_expected == 1) ? "" : "s", num_found); } static void __Pyx_RaiseDoubleKeywordsError( const char* func_name, PyObject* kw_name) { PyErr_Format(PyExc_TypeError, #if PY_MAJOR_VERSION >= 3 "%s() got multiple values for keyword argument '%U'", func_name, kw_name); #else "%s() got multiple values for keyword argument '%s'", func_name, PyString_AsString(kw_name)); #endif } static int __Pyx_ParseOptionalKeywords( PyObject *kwds, PyObject **argnames[], PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, const char* function_name) { PyObject *key = 0, *value = 0; Py_ssize_t pos = 0; PyObject*** name; PyObject*** first_kw_arg = argnames + num_pos_args; while (PyDict_Next(kwds, &pos, &key, &value)) { name = first_kw_arg; while (*name && (**name != key)) name++; if (*name) { values[name-argnames] = value; continue; } name = first_kw_arg; #if PY_MAJOR_VERSION < 3 if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) { while (*name) { if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) && _PyString_Eq(**name, key)) { values[name-argnames] = value; break; } name++; } if (*name) continue; else { PyObject*** argname = argnames; while (argname != first_kw_arg) { if ((**argname == key) || ( (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) && _PyString_Eq(**argname, key))) { goto arg_passed_twice; } argname++; } } } else #endif if (likely(PyUnicode_Check(key))) { while (*name) { int cmp = (**name == key) ? 0 : #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 : #endif PyUnicode_Compare(**name, key); if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; if (cmp == 0) { values[name-argnames] = value; break; } name++; } if (*name) continue; else { PyObject*** argname = argnames; while (argname != first_kw_arg) { int cmp = (**argname == key) ? 0 : #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 : #endif PyUnicode_Compare(**argname, key); if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; if (cmp == 0) goto arg_passed_twice; argname++; } } } else goto invalid_keyword_type; if (kwds2) { if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; } else { goto invalid_keyword; } } return 0; arg_passed_twice: __Pyx_RaiseDoubleKeywordsError(function_name, key); goto bad; invalid_keyword_type: PyErr_Format(PyExc_TypeError, "%.200s() keywords must be strings", function_name); goto bad; invalid_keyword: PyErr_Format(PyExc_TypeError, #if PY_MAJOR_VERSION < 3 "%.200s() got an unexpected keyword argument '%.200s'", function_name, PyString_AsString(key)); #else "%s() got an unexpected keyword argument '%U'", function_name, key); #endif bad: return -1; } static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name) { PyObject *result; #if CYTHON_COMPILING_IN_CPYTHON result = PyDict_GetItem(__pyx_d, name); if (result) { Py_INCREF(result); } else { #else result = PyObject_GetItem(__pyx_d, name); if (!result) { PyErr_Clear(); #endif result = __Pyx_GetBuiltinName(name); } return result; } static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) { PyObject *r; if (!j) return NULL; r = PyObject_GetItem(o, j); Py_DECREF(j); return r; } static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, int wraparound, int boundscheck) { #if CYTHON_COMPILING_IN_CPYTHON if (wraparound & unlikely(i < 0)) i += PyList_GET_SIZE(o); if ((!boundscheck) || likely((0 <= i) & (i < PyList_GET_SIZE(o)))) { PyObject *r = PyList_GET_ITEM(o, i); Py_INCREF(r); return r; } return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); #else return PySequence_GetItem(o, i); #endif } static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, int wraparound, int boundscheck) { #if CYTHON_COMPILING_IN_CPYTHON if (wraparound & unlikely(i < 0)) i += PyTuple_GET_SIZE(o); if ((!boundscheck) || likely((0 <= i) & (i < PyTuple_GET_SIZE(o)))) { PyObject *r = PyTuple_GET_ITEM(o, i); Py_INCREF(r); return r; } return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); #else return PySequence_GetItem(o, i); #endif } static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, int wraparound, int boundscheck) { #if CYTHON_COMPILING_IN_CPYTHON if (is_list || PyList_CheckExact(o)) { Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o); if ((!boundscheck) || (likely((n >= 0) & (n < PyList_GET_SIZE(o))))) { PyObject *r = PyList_GET_ITEM(o, n); Py_INCREF(r); return r; } } else if (PyTuple_CheckExact(o)) { Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o); if ((!boundscheck) || likely((n >= 0) & (n < PyTuple_GET_SIZE(o)))) { PyObject *r = PyTuple_GET_ITEM(o, n); Py_INCREF(r); return r; } } else { PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence; if (likely(m && m->sq_item)) { if (wraparound && unlikely(i < 0) && likely(m->sq_length)) { Py_ssize_t l = m->sq_length(o); if (likely(l >= 0)) { i += l; } else { if (PyErr_ExceptionMatches(PyExc_OverflowError)) PyErr_Clear(); else return NULL; } } return m->sq_item(o, i); } } #else if (is_list || PySequence_Check(o)) { return PySequence_GetItem(o, i); } #endif return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); } #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { PyObject *result; ternaryfunc call = func->ob_type->tp_call; if (unlikely(!call)) return PyObject_Call(func, arg, kw); #if PY_VERSION_HEX >= 0x02060000 if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) return NULL; #endif result = (*call)(func, arg, kw); #if PY_VERSION_HEX >= 0x02060000 Py_LeaveRecursiveCall(); #endif if (unlikely(!result) && unlikely(!PyErr_Occurred())) { PyErr_SetString( PyExc_SystemError, "NULL result without error in PyObject_Call"); } return result; } #endif static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb) { #if CYTHON_COMPILING_IN_CPYTHON PyObject *tmp_type, *tmp_value, *tmp_tb; PyThreadState *tstate = PyThreadState_GET(); tmp_type = tstate->curexc_type; tmp_value = tstate->curexc_value; tmp_tb = tstate->curexc_traceback; tstate->curexc_type = type; tstate->curexc_value = value; tstate->curexc_traceback = tb; Py_XDECREF(tmp_type); Py_XDECREF(tmp_value); Py_XDECREF(tmp_tb); #else PyErr_Restore(type, value, tb); #endif } static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb) { #if CYTHON_COMPILING_IN_CPYTHON PyThreadState *tstate = PyThreadState_GET(); *type = tstate->curexc_type; *value = tstate->curexc_value; *tb = tstate->curexc_traceback; tstate->curexc_type = 0; tstate->curexc_value = 0; tstate->curexc_traceback = 0; #else PyErr_Fetch(type, value, tb); #endif } #if PY_MAJOR_VERSION < 3 static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, CYTHON_UNUSED PyObject *cause) { Py_XINCREF(type); if (!value || value == Py_None) value = NULL; else Py_INCREF(value); if (!tb || tb == Py_None) tb = NULL; else { Py_INCREF(tb); if (!PyTraceBack_Check(tb)) { PyErr_SetString(PyExc_TypeError, "raise: arg 3 must be a traceback or None"); goto raise_error; } } #if PY_VERSION_HEX < 0x02050000 if (PyClass_Check(type)) { #else if (PyType_Check(type)) { #endif #if CYTHON_COMPILING_IN_PYPY if (!value) { Py_INCREF(Py_None); value = Py_None; } #endif PyErr_NormalizeException(&type, &value, &tb); } else { if (value) { PyErr_SetString(PyExc_TypeError, "instance exception may not have a separate value"); goto raise_error; } value = type; #if PY_VERSION_HEX < 0x02050000 if (PyInstance_Check(type)) { type = (PyObject*) ((PyInstanceObject*)type)->in_class; Py_INCREF(type); } else { type = 0; PyErr_SetString(PyExc_TypeError, "raise: exception must be an old-style class or instance"); goto raise_error; } #else type = (PyObject*) Py_TYPE(type); Py_INCREF(type); if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) { PyErr_SetString(PyExc_TypeError, "raise: exception class must be a subclass of BaseException"); goto raise_error; } #endif } __Pyx_ErrRestore(type, value, tb); return; raise_error: Py_XDECREF(value); Py_XDECREF(type); Py_XDECREF(tb); return; } #else /* Python 3+ */ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { PyObject* owned_instance = NULL; if (tb == Py_None) { tb = 0; } else if (tb && !PyTraceBack_Check(tb)) { PyErr_SetString(PyExc_TypeError, "raise: arg 3 must be a traceback or None"); goto bad; } if (value == Py_None) value = 0; if (PyExceptionInstance_Check(type)) { if (value) { PyErr_SetString(PyExc_TypeError, "instance exception may not have a separate value"); goto bad; } value = type; type = (PyObject*) Py_TYPE(value); } else if (PyExceptionClass_Check(type)) { PyObject *instance_class = NULL; if (value && PyExceptionInstance_Check(value)) { instance_class = (PyObject*) Py_TYPE(value); if (instance_class != type) { if (PyObject_IsSubclass(instance_class, type)) { type = instance_class; } else { instance_class = NULL; } } } if (!instance_class) { PyObject *args; if (!value) args = PyTuple_New(0); else if (PyTuple_Check(value)) { Py_INCREF(value); args = value; } else args = PyTuple_Pack(1, value); if (!args) goto bad; owned_instance = PyObject_Call(type, args, NULL); Py_DECREF(args); if (!owned_instance) goto bad; value = owned_instance; if (!PyExceptionInstance_Check(value)) { PyErr_Format(PyExc_TypeError, "calling %R should have returned an instance of " "BaseException, not %R", type, Py_TYPE(value)); goto bad; } } } else { PyErr_SetString(PyExc_TypeError, "raise: exception class must be a subclass of BaseException"); goto bad; } #if PY_VERSION_HEX >= 0x03030000 if (cause) { #else if (cause && cause != Py_None) { #endif PyObject *fixed_cause; if (cause == Py_None) { fixed_cause = NULL; } else if (PyExceptionClass_Check(cause)) { fixed_cause = PyObject_CallObject(cause, NULL); if (fixed_cause == NULL) goto bad; } else if (PyExceptionInstance_Check(cause)) { fixed_cause = cause; Py_INCREF(fixed_cause); } else { PyErr_SetString(PyExc_TypeError, "exception causes must derive from " "BaseException"); goto bad; } PyException_SetCause(value, fixed_cause); } PyErr_SetObject(type, value); if (tb) { PyThreadState *tstate = PyThreadState_GET(); PyObject* tmp_tb = tstate->curexc_traceback; if (tb != tmp_tb) { Py_INCREF(tb); tstate->curexc_traceback = tb; Py_XDECREF(tmp_tb); } } bad: Py_XDECREF(owned_instance); return; } #endif static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring, Py_ssize_t i, int wraparound, int boundscheck) { Py_ssize_t length; #if CYTHON_PEP393_ENABLED if (unlikely(__Pyx_PyUnicode_READY(ustring) < 0)) return (Py_UCS4)-1; #endif if (wraparound | boundscheck) { length = __Pyx_PyUnicode_GET_LENGTH(ustring); if (wraparound & unlikely(i < 0)) i += length; if ((!boundscheck) || likely((0 <= i) & (i < length))) { return __Pyx_PyUnicode_READ_CHAR(ustring, i); } else { PyErr_SetString(PyExc_IndexError, "string index out of range"); return (Py_UCS4)-1; } } else { return __Pyx_PyUnicode_READ_CHAR(ustring, i); } } static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring( PyObject* text, Py_ssize_t start, Py_ssize_t stop) { Py_ssize_t length; #if CYTHON_PEP393_ENABLED if (unlikely(PyUnicode_READY(text) == -1)) return NULL; length = PyUnicode_GET_LENGTH(text); #else length = PyUnicode_GET_SIZE(text); #endif if (start < 0) { start += length; if (start < 0) start = 0; } if (stop < 0) stop += length; else if (stop > length) stop = length; length = stop - start; if (length <= 0) return PyUnicode_FromUnicode(NULL, 0); #if CYTHON_PEP393_ENABLED return PyUnicode_FromKindAndData(PyUnicode_KIND(text), PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start); #else return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start); #endif } static CYTHON_INLINE int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v) { int r; if (!j) return -1; r = PyObject_SetItem(o, j, v); Py_DECREF(j); return r; } static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, int is_list, int wraparound, int boundscheck) { #if CYTHON_COMPILING_IN_CPYTHON if (is_list || PyList_CheckExact(o)) { Py_ssize_t n = (!wraparound) ? i : ((likely(i >= 0)) ? i : i + PyList_GET_SIZE(o)); if ((!boundscheck) || likely((n >= 0) & (n < PyList_GET_SIZE(o)))) { PyObject* old = PyList_GET_ITEM(o, n); Py_INCREF(v); PyList_SET_ITEM(o, n, v); Py_DECREF(old); return 1; } } else { PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence; if (likely(m && m->sq_ass_item)) { if (wraparound && unlikely(i < 0) && likely(m->sq_length)) { Py_ssize_t l = m->sq_length(o); if (likely(l >= 0)) { i += l; } else { if (PyErr_ExceptionMatches(PyExc_OverflowError)) PyErr_Clear(); else return -1; } } return m->sq_ass_item(o, i, v); } } #else #if CYTHON_COMPILING_IN_PYPY if (is_list || (PySequence_Check(o) && !PyDict_Check(o))) { #else if (is_list || PySequence_Check(o)) { #endif return PySequence_SetItem(o, i, v); } #endif return __Pyx_SetItemInt_Generic(o, PyInt_FromSsize_t(i), v); } static void __Pyx_WriteUnraisable(const char *name, CYTHON_UNUSED int clineno, CYTHON_UNUSED int lineno, CYTHON_UNUSED const char *filename, int full_traceback) { PyObject *old_exc, *old_val, *old_tb; PyObject *ctx; __Pyx_ErrFetch(&old_exc, &old_val, &old_tb); if (full_traceback) { Py_XINCREF(old_exc); Py_XINCREF(old_val); Py_XINCREF(old_tb); __Pyx_ErrRestore(old_exc, old_val, old_tb); PyErr_PrintEx(1); } #if PY_MAJOR_VERSION < 3 ctx = PyString_FromString(name); #else ctx = PyUnicode_FromString(name); #endif __Pyx_ErrRestore(old_exc, old_val, old_tb); if (!ctx) { PyErr_WriteUnraisable(Py_None); } else { PyErr_WriteUnraisable(ctx); Py_DECREF(ctx); } } #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func) \ { \ func_type value = func(x); \ if (sizeof(target_type) < sizeof(func_type)) { \ if (unlikely(value != (func_type) (target_type) value)) { \ func_type zero = 0; \ PyErr_SetString(PyExc_OverflowError, \ (is_unsigned && unlikely(value < zero)) ? \ "can't convert negative value to " #target_type : \ "value too large to convert to " #target_type); \ return (target_type) -1; \ } \ } \ return (target_type) value; \ } #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS #include "longintrepr.h" #endif #endif static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { const int neg_one = (int) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; #if PY_MAJOR_VERSION < 3 if (likely(PyInt_Check(x))) { if (sizeof(int) < sizeof(long)) { __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG) } else { long val = PyInt_AS_LONG(x); if (is_unsigned && unlikely(val < 0)) { PyErr_SetString(PyExc_OverflowError, "can't convert negative value to int"); return (int) -1; } return (int) val; } } else #endif if (likely(PyLong_Check(x))) { if (is_unsigned) { #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS if (sizeof(digit) <= sizeof(int)) { switch (Py_SIZE(x)) { case 0: return 0; case 1: return (int) ((PyLongObject*)x)->ob_digit[0]; } } #endif #endif if (unlikely(Py_SIZE(x) < 0)) { PyErr_SetString(PyExc_OverflowError, "can't convert negative value to int"); return (int) -1; } if (sizeof(int) <= sizeof(unsigned long)) { __PYX_VERIFY_RETURN_INT(int, unsigned long, PyLong_AsUnsignedLong) } else if (sizeof(int) <= sizeof(unsigned long long)) { __PYX_VERIFY_RETURN_INT(int, unsigned long long, PyLong_AsUnsignedLongLong) } } else { #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS if (sizeof(digit) <= sizeof(int)) { switch (Py_SIZE(x)) { case 0: return 0; case 1: return +(int) ((PyLongObject*)x)->ob_digit[0]; case -1: return -(int) ((PyLongObject*)x)->ob_digit[0]; } } #endif #endif if (sizeof(int) <= sizeof(long)) { __PYX_VERIFY_RETURN_INT(int, long, PyLong_AsLong) } else if (sizeof(int) <= sizeof(long long)) { __PYX_VERIFY_RETURN_INT(int, long long, PyLong_AsLongLong) } } { #if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) PyErr_SetString(PyExc_RuntimeError, "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); #else int val; PyObject *v = __Pyx_PyNumber_Int(x); #if PY_MAJOR_VERSION < 3 if (likely(v) && !PyLong_Check(v)) { PyObject *tmp = v; v = PyNumber_Long(tmp); Py_DECREF(tmp); } #endif if (likely(v)) { int one = 1; int is_little = (int)*(unsigned char *)&one; unsigned char *bytes = (unsigned char *)&val; int ret = _PyLong_AsByteArray((PyLongObject *)v, bytes, sizeof(val), is_little, !is_unsigned); Py_DECREF(v); if (likely(!ret)) return val; } #endif return (int) -1; } } else { int val; PyObject *tmp = __Pyx_PyNumber_Int(x); if (!tmp) return (int) -1; val = __Pyx_PyInt_As_int(tmp); Py_DECREF(tmp); return val; } } #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS #include "longintrepr.h" #endif #endif static CYTHON_INLINE uint64_t __Pyx_PyInt_As_uint64_t(PyObject *x) { const uint64_t neg_one = (uint64_t) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; #if PY_MAJOR_VERSION < 3 if (likely(PyInt_Check(x))) { if (sizeof(uint64_t) < sizeof(long)) { __PYX_VERIFY_RETURN_INT(uint64_t, long, PyInt_AS_LONG) } else { long val = PyInt_AS_LONG(x); if (is_unsigned && unlikely(val < 0)) { PyErr_SetString(PyExc_OverflowError, "can't convert negative value to uint64_t"); return (uint64_t) -1; } return (uint64_t) val; } } else #endif if (likely(PyLong_Check(x))) { if (is_unsigned) { #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS if (sizeof(digit) <= sizeof(uint64_t)) { switch (Py_SIZE(x)) { case 0: return 0; case 1: return (uint64_t) ((PyLongObject*)x)->ob_digit[0]; } } #endif #endif if (unlikely(Py_SIZE(x) < 0)) { PyErr_SetString(PyExc_OverflowError, "can't convert negative value to uint64_t"); return (uint64_t) -1; } if (sizeof(uint64_t) <= sizeof(unsigned long)) { __PYX_VERIFY_RETURN_INT(uint64_t, unsigned long, PyLong_AsUnsignedLong) } else if (sizeof(uint64_t) <= sizeof(unsigned long long)) { __PYX_VERIFY_RETURN_INT(uint64_t, unsigned long long, PyLong_AsUnsignedLongLong) } } else { #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS if (sizeof(digit) <= sizeof(uint64_t)) { switch (Py_SIZE(x)) { case 0: return 0; case 1: return +(uint64_t) ((PyLongObject*)x)->ob_digit[0]; case -1: return -(uint64_t) ((PyLongObject*)x)->ob_digit[0]; } } #endif #endif if (sizeof(uint64_t) <= sizeof(long)) { __PYX_VERIFY_RETURN_INT(uint64_t, long, PyLong_AsLong) } else if (sizeof(uint64_t) <= sizeof(long long)) { __PYX_VERIFY_RETURN_INT(uint64_t, long long, PyLong_AsLongLong) } } { #if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) PyErr_SetString(PyExc_RuntimeError, "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); #else uint64_t val; PyObject *v = __Pyx_PyNumber_Int(x); #if PY_MAJOR_VERSION < 3 if (likely(v) && !PyLong_Check(v)) { PyObject *tmp = v; v = PyNumber_Long(tmp); Py_DECREF(tmp); } #endif if (likely(v)) { int one = 1; int is_little = (int)*(unsigned char *)&one; unsigned char *bytes = (unsigned char *)&val; int ret = _PyLong_AsByteArray((PyLongObject *)v, bytes, sizeof(val), is_little, !is_unsigned); Py_DECREF(v); if (likely(!ret)) return val; } #endif return (uint64_t) -1; } } else { uint64_t val; PyObject *tmp = __Pyx_PyNumber_Int(x); if (!tmp) return (uint64_t) -1; val = __Pyx_PyInt_As_uint64_t(tmp); Py_DECREF(tmp); return val; } } #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS #include "longintrepr.h" #endif #endif static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *x) { const size_t neg_one = (size_t) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; #if PY_MAJOR_VERSION < 3 if (likely(PyInt_Check(x))) { if (sizeof(size_t) < sizeof(long)) { __PYX_VERIFY_RETURN_INT(size_t, long, PyInt_AS_LONG) } else { long val = PyInt_AS_LONG(x); if (is_unsigned && unlikely(val < 0)) { PyErr_SetString(PyExc_OverflowError, "can't convert negative value to size_t"); return (size_t) -1; } return (size_t) val; } } else #endif if (likely(PyLong_Check(x))) { if (is_unsigned) { #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS if (sizeof(digit) <= sizeof(size_t)) { switch (Py_SIZE(x)) { case 0: return 0; case 1: return (size_t) ((PyLongObject*)x)->ob_digit[0]; } } #endif #endif if (unlikely(Py_SIZE(x) < 0)) { PyErr_SetString(PyExc_OverflowError, "can't convert negative value to size_t"); return (size_t) -1; } if (sizeof(size_t) <= sizeof(unsigned long)) { __PYX_VERIFY_RETURN_INT(size_t, unsigned long, PyLong_AsUnsignedLong) } else if (sizeof(size_t) <= sizeof(unsigned long long)) { __PYX_VERIFY_RETURN_INT(size_t, unsigned long long, PyLong_AsUnsignedLongLong) } } else { #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS if (sizeof(digit) <= sizeof(size_t)) { switch (Py_SIZE(x)) { case 0: return 0; case 1: return +(size_t) ((PyLongObject*)x)->ob_digit[0]; case -1: return -(size_t) ((PyLongObject*)x)->ob_digit[0]; } } #endif #endif if (sizeof(size_t) <= sizeof(long)) { __PYX_VERIFY_RETURN_INT(size_t, long, PyLong_AsLong) } else if (sizeof(size_t) <= sizeof(long long)) { __PYX_VERIFY_RETURN_INT(size_t, long long, PyLong_AsLongLong) } } { #if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) PyErr_SetString(PyExc_RuntimeError, "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); #else size_t val; PyObject *v = __Pyx_PyNumber_Int(x); #if PY_MAJOR_VERSION < 3 if (likely(v) && !PyLong_Check(v)) { PyObject *tmp = v; v = PyNumber_Long(tmp); Py_DECREF(tmp); } #endif if (likely(v)) { int one = 1; int is_little = (int)*(unsigned char *)&one; unsigned char *bytes = (unsigned char *)&val; int ret = _PyLong_AsByteArray((PyLongObject *)v, bytes, sizeof(val), is_little, !is_unsigned); Py_DECREF(v); if (likely(!ret)) return val; } #endif return (size_t) -1; } } else { size_t val; PyObject *tmp = __Pyx_PyNumber_Int(x); if (!tmp) return (size_t) -1; val = __Pyx_PyInt_As_size_t(tmp); Py_DECREF(tmp); return val; } } static CYTHON_INLINE PyObject* __Pyx_PyInt_From_uint64_t(uint64_t value) { const uint64_t neg_one = (uint64_t) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; if (is_unsigned) { if (sizeof(uint64_t) < sizeof(long)) { return PyInt_FromLong((long) value); } else if (sizeof(uint64_t) <= sizeof(unsigned long)) { return PyLong_FromUnsignedLong((unsigned long) value); } else if (sizeof(uint64_t) <= sizeof(unsigned long long)) { return PyLong_FromUnsignedLongLong((unsigned long long) value); } } else { if (sizeof(uint64_t) <= sizeof(long)) { return PyInt_FromLong((long) value); } else if (sizeof(uint64_t) <= sizeof(long long)) { return PyLong_FromLongLong((long long) value); } } { int one = 1; int little = (int)*(unsigned char *)&one; unsigned char *bytes = (unsigned char *)&value; return _PyLong_FromByteArray(bytes, sizeof(uint64_t), little, !is_unsigned); } } static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { const long neg_one = (long) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; if (is_unsigned) { if (sizeof(long) < sizeof(long)) { return PyInt_FromLong((long) value); } else if (sizeof(long) <= sizeof(unsigned long)) { return PyLong_FromUnsignedLong((unsigned long) value); } else if (sizeof(long) <= sizeof(unsigned long long)) { return PyLong_FromUnsignedLongLong((unsigned long long) value); } } else { if (sizeof(long) <= sizeof(long)) { return PyInt_FromLong((long) value); } else if (sizeof(long) <= sizeof(long long)) { return PyLong_FromLongLong((long long) value); } } { int one = 1; int little = (int)*(unsigned char *)&one; unsigned char *bytes = (unsigned char *)&value; return _PyLong_FromByteArray(bytes, sizeof(long), little, !is_unsigned); } } #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS #include "longintrepr.h" #endif #endif static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { const long neg_one = (long) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; #if PY_MAJOR_VERSION < 3 if (likely(PyInt_Check(x))) { if (sizeof(long) < sizeof(long)) { __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG) } else { long val = PyInt_AS_LONG(x); if (is_unsigned && unlikely(val < 0)) { PyErr_SetString(PyExc_OverflowError, "can't convert negative value to long"); return (long) -1; } return (long) val; } } else #endif if (likely(PyLong_Check(x))) { if (is_unsigned) { #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS if (sizeof(digit) <= sizeof(long)) { switch (Py_SIZE(x)) { case 0: return 0; case 1: return (long) ((PyLongObject*)x)->ob_digit[0]; } } #endif #endif if (unlikely(Py_SIZE(x) < 0)) { PyErr_SetString(PyExc_OverflowError, "can't convert negative value to long"); return (long) -1; } if (sizeof(long) <= sizeof(unsigned long)) { __PYX_VERIFY_RETURN_INT(long, unsigned long, PyLong_AsUnsignedLong) } else if (sizeof(long) <= sizeof(unsigned long long)) { __PYX_VERIFY_RETURN_INT(long, unsigned long long, PyLong_AsUnsignedLongLong) } } else { #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS if (sizeof(digit) <= sizeof(long)) { switch (Py_SIZE(x)) { case 0: return 0; case 1: return +(long) ((PyLongObject*)x)->ob_digit[0]; case -1: return -(long) ((PyLongObject*)x)->ob_digit[0]; } } #endif #endif if (sizeof(long) <= sizeof(long)) { __PYX_VERIFY_RETURN_INT(long, long, PyLong_AsLong) } else if (sizeof(long) <= sizeof(long long)) { __PYX_VERIFY_RETURN_INT(long, long long, PyLong_AsLongLong) } } { #if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) PyErr_SetString(PyExc_RuntimeError, "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); #else long val; PyObject *v = __Pyx_PyNumber_Int(x); #if PY_MAJOR_VERSION < 3 if (likely(v) && !PyLong_Check(v)) { PyObject *tmp = v; v = PyNumber_Long(tmp); Py_DECREF(tmp); } #endif if (likely(v)) { int one = 1; int is_little = (int)*(unsigned char *)&one; unsigned char *bytes = (unsigned char *)&val; int ret = _PyLong_AsByteArray((PyLongObject *)v, bytes, sizeof(val), is_little, !is_unsigned); Py_DECREF(v); if (likely(!ret)) return val; } #endif return (long) -1; } } else { long val; PyObject *tmp = __Pyx_PyNumber_Int(x); if (!tmp) return (long) -1; val = __Pyx_PyInt_As_long(tmp); Py_DECREF(tmp); return val; } } static int __Pyx_check_binary_version(void) { char ctversion[4], rtversion[4]; PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION); PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion()); if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) { char message[200]; PyOS_snprintf(message, sizeof(message), "compiletime version %s of module '%.100s' " "does not match runtime version %s", ctversion, __Pyx_MODULE_NAME, rtversion); #if PY_VERSION_HEX < 0x02050000 return PyErr_Warn(NULL, message); #else return PyErr_WarnEx(NULL, message, 1); #endif } return 0; } static int __Pyx_ExportVoidPtr(PyObject *name, void *p, const char *sig) { PyObject *d; PyObject *cobj = 0; d = PyDict_GetItem(__pyx_d, __pyx_n_s_pyx_capi); Py_XINCREF(d); if (!d) { d = PyDict_New(); if (!d) goto bad; if (__Pyx_PyObject_SetAttrStr(__pyx_m, __pyx_n_s_pyx_capi, d) < 0) goto bad; } #if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION==3 && PY_MINOR_VERSION==0) cobj = PyCapsule_New(p, sig, 0); #else cobj = PyCObject_FromVoidPtrAndDesc(p, (void *)sig, 0); #endif if (!cobj) goto bad; if (PyDict_SetItem(d, name, cobj) < 0) goto bad; Py_DECREF(cobj); Py_DECREF(d); return 0; bad: Py_XDECREF(cobj); Py_XDECREF(d); return -1; } static int __Pyx_ExportFunction(const char *name, void (*f)(void), const char *sig) { PyObject *d = 0; PyObject *cobj = 0; union { void (*fp)(void); void *p; } tmp; d = PyObject_GetAttrString(__pyx_m, (char *)"__pyx_capi__"); if (!d) { PyErr_Clear(); d = PyDict_New(); if (!d) goto bad; Py_INCREF(d); if (PyModule_AddObject(__pyx_m, (char *)"__pyx_capi__", d) < 0) goto bad; } tmp.fp = f; #if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION==3&&PY_MINOR_VERSION==0) cobj = PyCapsule_New(tmp.p, sig, 0); #else cobj = PyCObject_FromVoidPtrAndDesc(tmp.p, (void *)sig, 0); #endif if (!cobj) goto bad; if (PyDict_SetItemString(d, name, cobj) < 0) goto bad; Py_DECREF(cobj); Py_DECREF(d); return 0; bad: Py_XDECREF(cobj); Py_XDECREF(d); return -1; } static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { int start = 0, mid = 0, end = count - 1; if (end >= 0 && code_line > entries[end].code_line) { return count; } while (start < end) { mid = (start + end) / 2; if (code_line < entries[mid].code_line) { end = mid; } else if (code_line > entries[mid].code_line) { start = mid + 1; } else { return mid; } } if (code_line <= entries[mid].code_line) { return mid; } else { return mid + 1; } } static PyCodeObject *__pyx_find_code_object(int code_line) { PyCodeObject* code_object; int pos; if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { return NULL; } pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { return NULL; } code_object = __pyx_code_cache.entries[pos].code_object; Py_INCREF(code_object); return code_object; } static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { int pos, i; __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; if (unlikely(!code_line)) { return; } if (unlikely(!entries)) { entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); if (likely(entries)) { __pyx_code_cache.entries = entries; __pyx_code_cache.max_count = 64; __pyx_code_cache.count = 1; entries[0].code_line = code_line; entries[0].code_object = code_object; Py_INCREF(code_object); } return; } pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { PyCodeObject* tmp = entries[pos].code_object; entries[pos].code_object = code_object; Py_DECREF(tmp); return; } if (__pyx_code_cache.count == __pyx_code_cache.max_count) { int new_max = __pyx_code_cache.max_count + 64; entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( __pyx_code_cache.entries, new_max*sizeof(__Pyx_CodeObjectCacheEntry)); if (unlikely(!entries)) { return; } __pyx_code_cache.entries = entries; __pyx_code_cache.max_count = new_max; } for (i=__pyx_code_cache.count; i>pos; i--) { entries[i] = entries[i-1]; } entries[pos].code_line = code_line; entries[pos].code_object = code_object; __pyx_code_cache.count++; Py_INCREF(code_object); } #include "compile.h" #include "frameobject.h" #include "traceback.h" static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( const char *funcname, int c_line, int py_line, const char *filename) { PyCodeObject *py_code = 0; PyObject *py_srcfile = 0; PyObject *py_funcname = 0; #if PY_MAJOR_VERSION < 3 py_srcfile = PyString_FromString(filename); #else py_srcfile = PyUnicode_FromString(filename); #endif if (!py_srcfile) goto bad; if (c_line) { #if PY_MAJOR_VERSION < 3 py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); #else py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); #endif } else { #if PY_MAJOR_VERSION < 3 py_funcname = PyString_FromString(funcname); #else py_funcname = PyUnicode_FromString(funcname); #endif } if (!py_funcname) goto bad; py_code = __Pyx_PyCode_New( 0, /*int argcount,*/ 0, /*int kwonlyargcount,*/ 0, /*int nlocals,*/ 0, /*int stacksize,*/ 0, /*int flags,*/ __pyx_empty_bytes, /*PyObject *code,*/ __pyx_empty_tuple, /*PyObject *consts,*/ __pyx_empty_tuple, /*PyObject *names,*/ __pyx_empty_tuple, /*PyObject *varnames,*/ __pyx_empty_tuple, /*PyObject *freevars,*/ __pyx_empty_tuple, /*PyObject *cellvars,*/ py_srcfile, /*PyObject *filename,*/ py_funcname, /*PyObject *name,*/ py_line, /*int firstlineno,*/ __pyx_empty_bytes /*PyObject *lnotab*/ ); Py_DECREF(py_srcfile); Py_DECREF(py_funcname); return py_code; bad: Py_XDECREF(py_srcfile); Py_XDECREF(py_funcname); return NULL; } static void __Pyx_AddTraceback(const char *funcname, int c_line, int py_line, const char *filename) { PyCodeObject *py_code = 0; PyObject *py_globals = 0; PyFrameObject *py_frame = 0; py_code = __pyx_find_code_object(c_line ? c_line : py_line); if (!py_code) { py_code = __Pyx_CreateCodeObjectForTraceback( funcname, c_line, py_line, filename); if (!py_code) goto bad; __pyx_insert_code_object(c_line ? c_line : py_line, py_code); } py_globals = PyModule_GetDict(__pyx_m); if (!py_globals) goto bad; py_frame = PyFrame_New( PyThreadState_GET(), /*PyThreadState *tstate,*/ py_code, /*PyCodeObject *code,*/ py_globals, /*PyObject *globals,*/ 0 /*PyObject *locals*/ ); if (!py_frame) goto bad; py_frame->f_lineno = py_line; PyTraceBack_Here(py_frame); bad: Py_XDECREF(py_code); Py_XDECREF(py_frame); } static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { while (t->p) { #if PY_MAJOR_VERSION < 3 if (t->is_unicode) { *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); } else if (t->intern) { *t->p = PyString_InternFromString(t->s); } else { *t->p = PyString_FromStringAndSize(t->s, t->n - 1); } #else /* Python 3+ has unicode identifiers */ if (t->is_unicode | t->is_str) { if (t->intern) { *t->p = PyUnicode_InternFromString(t->s); } else if (t->encoding) { *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL); } else { *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1); } } else { *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1); } #endif if (!*t->p) return -1; ++t; } return 0; } static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char* c_str) { return __Pyx_PyUnicode_FromStringAndSize(c_str, strlen(c_str)); } static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) { Py_ssize_t ignore; return __Pyx_PyObject_AsStringAndSize(o, &ignore); } static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { #if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT if ( #if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII __Pyx_sys_getdefaultencoding_not_ascii && #endif PyUnicode_Check(o)) { #if PY_VERSION_HEX < 0x03030000 char* defenc_c; PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); if (!defenc) return NULL; defenc_c = PyBytes_AS_STRING(defenc); #if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII { char* end = defenc_c + PyBytes_GET_SIZE(defenc); char* c; for (c = defenc_c; c < end; c++) { if ((unsigned char) (*c) >= 128) { PyUnicode_AsASCIIString(o); return NULL; } } } #endif /*__PYX_DEFAULT_STRING_ENCODING_IS_ASCII*/ *length = PyBytes_GET_SIZE(defenc); return defenc_c; #else /* PY_VERSION_HEX < 0x03030000 */ if (PyUnicode_READY(o) == -1) return NULL; #if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII if (PyUnicode_IS_ASCII(o)) { *length = PyUnicode_GET_DATA_SIZE(o); return PyUnicode_AsUTF8(o); } else { PyUnicode_AsASCIIString(o); return NULL; } #else /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */ return PyUnicode_AsUTF8AndSize(o, length); #endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */ #endif /* PY_VERSION_HEX < 0x03030000 */ } else #endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT */ #if !CYTHON_COMPILING_IN_PYPY #if PY_VERSION_HEX >= 0x02060000 if (PyByteArray_Check(o)) { *length = PyByteArray_GET_SIZE(o); return PyByteArray_AS_STRING(o); } else #endif #endif { char* result; int r = PyBytes_AsStringAndSize(o, &result, length); if (unlikely(r < 0)) { return NULL; } else { return result; } } } static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { int is_true = x == Py_True; if (is_true | (x == Py_False) | (x == Py_None)) return is_true; else return PyObject_IsTrue(x); } static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) { PyNumberMethods *m; const char *name = NULL; PyObject *res = NULL; #if PY_MAJOR_VERSION < 3 if (PyInt_Check(x) || PyLong_Check(x)) #else if (PyLong_Check(x)) #endif return Py_INCREF(x), x; m = Py_TYPE(x)->tp_as_number; #if PY_MAJOR_VERSION < 3 if (m && m->nb_int) { name = "int"; res = PyNumber_Int(x); } else if (m && m->nb_long) { name = "long"; res = PyNumber_Long(x); } #else if (m && m->nb_int) { name = "int"; res = PyNumber_Long(x); } #endif if (res) { #if PY_MAJOR_VERSION < 3 if (!PyInt_Check(res) && !PyLong_Check(res)) { #else if (!PyLong_Check(res)) { #endif PyErr_Format(PyExc_TypeError, "__%.4s__ returned non-%.4s (type %.200s)", name, name, Py_TYPE(res)->tp_name); Py_DECREF(res); return NULL; } } else if (!PyErr_Occurred()) { PyErr_SetString(PyExc_TypeError, "an integer is required"); } return res; } #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS #include "longintrepr.h" #endif #endif static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { Py_ssize_t ival; PyObject *x; #if PY_MAJOR_VERSION < 3 if (likely(PyInt_CheckExact(b))) return PyInt_AS_LONG(b); #endif if (likely(PyLong_CheckExact(b))) { #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS switch (Py_SIZE(b)) { case -1: return -(sdigit)((PyLongObject*)b)->ob_digit[0]; case 0: return 0; case 1: return ((PyLongObject*)b)->ob_digit[0]; } #endif #endif #if PY_VERSION_HEX < 0x02060000 return PyInt_AsSsize_t(b); #else return PyLong_AsSsize_t(b); #endif } x = PyNumber_Index(b); if (!x) return -1; ival = PyInt_AsSsize_t(x); Py_DECREF(x); return ival; } static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { #if PY_VERSION_HEX < 0x02050000 if (ival <= LONG_MAX) return PyInt_FromLong((long)ival); else { unsigned char *bytes = (unsigned char *) &ival; int one = 1; int little = (int)*(unsigned char*)&one; return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0); } #else return PyInt_FromSize_t(ival); #endif } #endif /* Py_PYTHON_H */