mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
5281 lines
204 KiB
C++
5281 lines
204 KiB
C++
/* Generated by Cython 0.20.1 on Mon Jul 7 00:02:26 2014 */
|
|
|
|
#define PY_SSIZE_T_CLEAN
|
|
#ifndef CYTHON_USE_PYLONG_INTERNALS
|
|
#ifdef PYLONG_BITS_IN_DIGIT
|
|
#define CYTHON_USE_PYLONG_INTERNALS 0
|
|
#else
|
|
#include "pyconfig.h"
|
|
#ifdef PYLONG_BITS_IN_DIGIT
|
|
#define CYTHON_USE_PYLONG_INTERNALS 1
|
|
#else
|
|
#define CYTHON_USE_PYLONG_INTERNALS 0
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#include "Python.h"
|
|
#ifndef Py_PYTHON_H
|
|
#error Python headers needed to compile C extensions, please install development version of Python.
|
|
#elif PY_VERSION_HEX < 0x02040000
|
|
#error Cython requires Python 2.4+.
|
|
#else
|
|
#define CYTHON_ABI "0_20_1"
|
|
#include <stddef.h> /* For offsetof */
|
|
#ifndef offsetof
|
|
#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
|
|
#endif
|
|
#if !defined(WIN32) && !defined(MS_WINDOWS)
|
|
#ifndef __stdcall
|
|
#define __stdcall
|
|
#endif
|
|
#ifndef __cdecl
|
|
#define __cdecl
|
|
#endif
|
|
#ifndef __fastcall
|
|
#define __fastcall
|
|
#endif
|
|
#endif
|
|
#ifndef DL_IMPORT
|
|
#define DL_IMPORT(t) t
|
|
#endif
|
|
#ifndef DL_EXPORT
|
|
#define DL_EXPORT(t) t
|
|
#endif
|
|
#ifndef PY_LONG_LONG
|
|
#define PY_LONG_LONG LONG_LONG
|
|
#endif
|
|
#ifndef Py_HUGE_VAL
|
|
#define Py_HUGE_VAL HUGE_VAL
|
|
#endif
|
|
#ifdef PYPY_VERSION
|
|
#define CYTHON_COMPILING_IN_PYPY 1
|
|
#define CYTHON_COMPILING_IN_CPYTHON 0
|
|
#else
|
|
#define CYTHON_COMPILING_IN_PYPY 0
|
|
#define CYTHON_COMPILING_IN_CPYTHON 1
|
|
#endif
|
|
#if CYTHON_COMPILING_IN_PYPY
|
|
#define Py_OptimizeFlag 0
|
|
#endif
|
|
#if PY_VERSION_HEX < 0x02050000
|
|
typedef int Py_ssize_t;
|
|
#define PY_SSIZE_T_MAX INT_MAX
|
|
#define PY_SSIZE_T_MIN INT_MIN
|
|
#define PY_FORMAT_SIZE_T ""
|
|
#define CYTHON_FORMAT_SSIZE_T ""
|
|
#define PyInt_FromSsize_t(z) PyInt_FromLong(z)
|
|
#define PyInt_AsSsize_t(o) __Pyx_PyInt_As_int(o)
|
|
#define PyNumber_Index(o) ((PyNumber_Check(o) && !PyFloat_Check(o)) ? PyNumber_Int(o) : \
|
|
(PyErr_Format(PyExc_TypeError, \
|
|
"expected index value, got %.200s", Py_TYPE(o)->tp_name), \
|
|
(PyObject*)0))
|
|
#define __Pyx_PyIndex_Check(o) (PyNumber_Check(o) && !PyFloat_Check(o) && \
|
|
!PyComplex_Check(o))
|
|
#define PyIndex_Check __Pyx_PyIndex_Check
|
|
#define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)
|
|
#define __PYX_BUILD_PY_SSIZE_T "i"
|
|
#else
|
|
#define __PYX_BUILD_PY_SSIZE_T "n"
|
|
#define CYTHON_FORMAT_SSIZE_T "z"
|
|
#define __Pyx_PyIndex_Check PyIndex_Check
|
|
#endif
|
|
#if PY_VERSION_HEX < 0x02060000
|
|
#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
|
|
#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
|
|
#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)
|
|
#define PyVarObject_HEAD_INIT(type, size) \
|
|
PyObject_HEAD_INIT(type) size,
|
|
#define PyType_Modified(t)
|
|
typedef struct {
|
|
void *buf;
|
|
PyObject *obj;
|
|
Py_ssize_t len;
|
|
Py_ssize_t itemsize;
|
|
int readonly;
|
|
int ndim;
|
|
char *format;
|
|
Py_ssize_t *shape;
|
|
Py_ssize_t *strides;
|
|
Py_ssize_t *suboffsets;
|
|
void *internal;
|
|
} Py_buffer;
|
|
#define PyBUF_SIMPLE 0
|
|
#define PyBUF_WRITABLE 0x0001
|
|
#define PyBUF_FORMAT 0x0004
|
|
#define PyBUF_ND 0x0008
|
|
#define PyBUF_STRIDES (0x0010 | PyBUF_ND)
|
|
#define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)
|
|
#define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)
|
|
#define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)
|
|
#define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)
|
|
#define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_FORMAT | PyBUF_WRITABLE)
|
|
#define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_FORMAT | PyBUF_WRITABLE)
|
|
typedef int (*getbufferproc)(PyObject *, Py_buffer *, int);
|
|
typedef void (*releasebufferproc)(PyObject *, Py_buffer *);
|
|
#endif
|
|
#if PY_MAJOR_VERSION < 3
|
|
#define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
|
|
#define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
|
|
PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
|
|
#define __Pyx_DefaultClassType PyClass_Type
|
|
#else
|
|
#define __Pyx_BUILTIN_MODULE_NAME "builtins"
|
|
#define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
|
|
PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
|
|
#define __Pyx_DefaultClassType PyType_Type
|
|
#endif
|
|
#if PY_VERSION_HEX < 0x02060000
|
|
#define PyUnicode_FromString(s) PyUnicode_Decode(s, strlen(s), "UTF-8", "strict")
|
|
#endif
|
|
#if PY_MAJOR_VERSION >= 3
|
|
#define Py_TPFLAGS_CHECKTYPES 0
|
|
#define Py_TPFLAGS_HAVE_INDEX 0
|
|
#endif
|
|
#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3)
|
|
#define Py_TPFLAGS_HAVE_NEWBUFFER 0
|
|
#endif
|
|
#if PY_VERSION_HEX < 0x02060000
|
|
#define Py_TPFLAGS_HAVE_VERSION_TAG 0
|
|
#endif
|
|
#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TPFLAGS_IS_ABSTRACT)
|
|
#define Py_TPFLAGS_IS_ABSTRACT 0
|
|
#endif
|
|
#if PY_VERSION_HEX < 0x030400a1 && !defined(Py_TPFLAGS_HAVE_FINALIZE)
|
|
#define Py_TPFLAGS_HAVE_FINALIZE 0
|
|
#endif
|
|
#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
|
|
#define CYTHON_PEP393_ENABLED 1
|
|
#define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ? \
|
|
0 : _PyUnicode_Ready((PyObject *)(op)))
|
|
#define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
|
|
#define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
|
|
#define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u)
|
|
#define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u)
|
|
#define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i)
|
|
#else
|
|
#define CYTHON_PEP393_ENABLED 0
|
|
#define __Pyx_PyUnicode_READY(op) (0)
|
|
#define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
|
|
#define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
|
|
#define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE))
|
|
#define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u))
|
|
#define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
|
|
#endif
|
|
#if CYTHON_COMPILING_IN_PYPY
|
|
#define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b)
|
|
#define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b)
|
|
#else
|
|
#define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b)
|
|
#define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ? \
|
|
PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
|
|
#endif
|
|
#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
|
|
#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
|
|
#if PY_MAJOR_VERSION >= 3
|
|
#define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b)
|
|
#else
|
|
#define __Pyx_PyString_Format(a, b) PyString_Format(a, b)
|
|
#endif
|
|
#if PY_MAJOR_VERSION >= 3
|
|
#define PyBaseString_Type PyUnicode_Type
|
|
#define PyStringObject PyUnicodeObject
|
|
#define PyString_Type PyUnicode_Type
|
|
#define PyString_Check PyUnicode_Check
|
|
#define PyString_CheckExact PyUnicode_CheckExact
|
|
#endif
|
|
#if PY_VERSION_HEX < 0x02060000
|
|
#define PyBytesObject PyStringObject
|
|
#define PyBytes_Type PyString_Type
|
|
#define PyBytes_Check PyString_Check
|
|
#define PyBytes_CheckExact PyString_CheckExact
|
|
#define PyBytes_FromString PyString_FromString
|
|
#define PyBytes_FromStringAndSize PyString_FromStringAndSize
|
|
#define PyBytes_FromFormat PyString_FromFormat
|
|
#define PyBytes_DecodeEscape PyString_DecodeEscape
|
|
#define PyBytes_AsString PyString_AsString
|
|
#define PyBytes_AsStringAndSize PyString_AsStringAndSize
|
|
#define PyBytes_Size PyString_Size
|
|
#define PyBytes_AS_STRING PyString_AS_STRING
|
|
#define PyBytes_GET_SIZE PyString_GET_SIZE
|
|
#define PyBytes_Repr PyString_Repr
|
|
#define PyBytes_Concat PyString_Concat
|
|
#define PyBytes_ConcatAndDel PyString_ConcatAndDel
|
|
#endif
|
|
#if PY_MAJOR_VERSION >= 3
|
|
#define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
|
|
#define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
|
|
#else
|
|
#define __Pyx_PyBaseString_Check(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj) || \
|
|
PyString_Check(obj) || PyUnicode_Check(obj))
|
|
#define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
|
|
#endif
|
|
#if PY_VERSION_HEX < 0x02060000
|
|
#define PySet_Check(obj) PyObject_TypeCheck(obj, &PySet_Type)
|
|
#define PyFrozenSet_Check(obj) PyObject_TypeCheck(obj, &PyFrozenSet_Type)
|
|
#endif
|
|
#ifndef PySet_CheckExact
|
|
#define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type)
|
|
#endif
|
|
#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
|
|
#if PY_MAJOR_VERSION >= 3
|
|
#define PyIntObject PyLongObject
|
|
#define PyInt_Type PyLong_Type
|
|
#define PyInt_Check(op) PyLong_Check(op)
|
|
#define PyInt_CheckExact(op) PyLong_CheckExact(op)
|
|
#define PyInt_FromString PyLong_FromString
|
|
#define PyInt_FromUnicode PyLong_FromUnicode
|
|
#define PyInt_FromLong PyLong_FromLong
|
|
#define PyInt_FromSize_t PyLong_FromSize_t
|
|
#define PyInt_FromSsize_t PyLong_FromSsize_t
|
|
#define PyInt_AsLong PyLong_AsLong
|
|
#define PyInt_AS_LONG PyLong_AS_LONG
|
|
#define PyInt_AsSsize_t PyLong_AsSsize_t
|
|
#define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask
|
|
#define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
|
|
#define PyNumber_Int PyNumber_Long
|
|
#endif
|
|
#if PY_MAJOR_VERSION >= 3
|
|
#define PyBoolObject PyLongObject
|
|
#endif
|
|
#if PY_VERSION_HEX < 0x030200A4
|
|
typedef long Py_hash_t;
|
|
#define __Pyx_PyInt_FromHash_t PyInt_FromLong
|
|
#define __Pyx_PyInt_AsHash_t PyInt_AsLong
|
|
#else
|
|
#define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
|
|
#define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t
|
|
#endif
|
|
#if (PY_MAJOR_VERSION < 3) || (PY_VERSION_HEX >= 0x03010300)
|
|
#define __Pyx_PySequence_GetSlice(obj, a, b) PySequence_GetSlice(obj, a, b)
|
|
#define __Pyx_PySequence_SetSlice(obj, a, b, value) PySequence_SetSlice(obj, a, b, value)
|
|
#define __Pyx_PySequence_DelSlice(obj, a, b) PySequence_DelSlice(obj, a, b)
|
|
#else
|
|
#define __Pyx_PySequence_GetSlice(obj, a, b) (unlikely(!(obj)) ? \
|
|
(PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), (PyObject*)0) : \
|
|
(likely((obj)->ob_type->tp_as_mapping) ? (PySequence_GetSlice(obj, a, b)) : \
|
|
(PyErr_Format(PyExc_TypeError, "'%.200s' object is unsliceable", (obj)->ob_type->tp_name), (PyObject*)0)))
|
|
#define __Pyx_PySequence_SetSlice(obj, a, b, value) (unlikely(!(obj)) ? \
|
|
(PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), -1) : \
|
|
(likely((obj)->ob_type->tp_as_mapping) ? (PySequence_SetSlice(obj, a, b, value)) : \
|
|
(PyErr_Format(PyExc_TypeError, "'%.200s' object doesn't support slice assignment", (obj)->ob_type->tp_name), -1)))
|
|
#define __Pyx_PySequence_DelSlice(obj, a, b) (unlikely(!(obj)) ? \
|
|
(PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), -1) : \
|
|
(likely((obj)->ob_type->tp_as_mapping) ? (PySequence_DelSlice(obj, a, b)) : \
|
|
(PyErr_Format(PyExc_TypeError, "'%.200s' object doesn't support slice deletion", (obj)->ob_type->tp_name), -1)))
|
|
#endif
|
|
#if PY_MAJOR_VERSION >= 3
|
|
#define PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func))
|
|
#endif
|
|
#if PY_VERSION_HEX < 0x02050000
|
|
#define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),((char *)(n)))
|
|
#define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),((char *)(n)),(a))
|
|
#define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),((char *)(n)))
|
|
#else
|
|
#define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),(n))
|
|
#define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),(n),(a))
|
|
#define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),(n))
|
|
#endif
|
|
#if PY_VERSION_HEX < 0x02050000
|
|
#define __Pyx_NAMESTR(n) ((char *)(n))
|
|
#define __Pyx_DOCSTR(n) ((char *)(n))
|
|
#else
|
|
#define __Pyx_NAMESTR(n) (n)
|
|
#define __Pyx_DOCSTR(n) (n)
|
|
#endif
|
|
#ifndef CYTHON_INLINE
|
|
#if defined(__GNUC__)
|
|
#define CYTHON_INLINE __inline__
|
|
#elif defined(_MSC_VER)
|
|
#define CYTHON_INLINE __inline
|
|
#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
|
|
#define CYTHON_INLINE inline
|
|
#else
|
|
#define CYTHON_INLINE
|
|
#endif
|
|
#endif
|
|
#ifndef CYTHON_RESTRICT
|
|
#if defined(__GNUC__)
|
|
#define CYTHON_RESTRICT __restrict__
|
|
#elif defined(_MSC_VER) && _MSC_VER >= 1400
|
|
#define CYTHON_RESTRICT __restrict
|
|
#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
|
|
#define CYTHON_RESTRICT restrict
|
|
#else
|
|
#define CYTHON_RESTRICT
|
|
#endif
|
|
#endif
|
|
#ifdef NAN
|
|
#define __PYX_NAN() ((float) NAN)
|
|
#else
|
|
static CYTHON_INLINE float __PYX_NAN() {
|
|
/* Initialize NaN. The sign is irrelevant, an exponent with all bits 1 and
|
|
a nonzero mantissa means NaN. If the first bit in the mantissa is 1, it is
|
|
a quiet NaN. */
|
|
float value;
|
|
memset(&value, 0xFF, sizeof(value));
|
|
return value;
|
|
}
|
|
#endif
|
|
|
|
|
|
#if PY_MAJOR_VERSION >= 3
|
|
#define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y)
|
|
#define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y)
|
|
#else
|
|
#define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y)
|
|
#define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y)
|
|
#endif
|
|
|
|
#ifndef __PYX_EXTERN_C
|
|
#ifdef __cplusplus
|
|
#define __PYX_EXTERN_C extern "C"
|
|
#else
|
|
#define __PYX_EXTERN_C extern
|
|
#endif
|
|
#endif
|
|
|
|
#if defined(WIN32) || defined(MS_WINDOWS)
|
|
#define _USE_MATH_DEFINES
|
|
#endif
|
|
#include <math.h>
|
|
#define __PYX_HAVE__spacy__en
|
|
#define __PYX_HAVE_API__spacy__en
|
|
#include <utility>
|
|
#include "ios"
|
|
#include "new"
|
|
#include "stdexcept"
|
|
#include "typeinfo"
|
|
#include <vector>
|
|
#include "stdint.h"
|
|
#include "sparsehash/dense_hash_map"
|
|
#include "string.h"
|
|
#include "stdlib.h"
|
|
#include "../include/MurmurHash3.h"
|
|
#include "../include/MurmurHash2.h"
|
|
#ifdef _OPENMP
|
|
#include <omp.h>
|
|
#endif /* _OPENMP */
|
|
|
|
#ifdef PYREX_WITHOUT_ASSERTIONS
|
|
#define CYTHON_WITHOUT_ASSERTIONS
|
|
#endif
|
|
|
|
#ifndef CYTHON_UNUSED
|
|
# if defined(__GNUC__)
|
|
# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
|
|
# define CYTHON_UNUSED __attribute__ ((__unused__))
|
|
# else
|
|
# define CYTHON_UNUSED
|
|
# endif
|
|
# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
|
|
# define CYTHON_UNUSED __attribute__ ((__unused__))
|
|
# else
|
|
# define CYTHON_UNUSED
|
|
# endif
|
|
#endif
|
|
typedef struct {PyObject **p; char *s; const Py_ssize_t n; const char* encoding;
|
|
const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; /*proto*/
|
|
|
|
#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
|
|
#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0
|
|
#define __PYX_DEFAULT_STRING_ENCODING ""
|
|
#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
|
|
#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
|
|
#define __Pyx_fits_Py_ssize_t(v, type, is_signed) ( \
|
|
(sizeof(type) < sizeof(Py_ssize_t)) || \
|
|
(sizeof(type) > sizeof(Py_ssize_t) && \
|
|
likely(v < (type)PY_SSIZE_T_MAX || \
|
|
v == (type)PY_SSIZE_T_MAX) && \
|
|
(!is_signed || likely(v > (type)PY_SSIZE_T_MIN || \
|
|
v == (type)PY_SSIZE_T_MIN))) || \
|
|
(sizeof(type) == sizeof(Py_ssize_t) && \
|
|
(is_signed || likely(v < (type)PY_SSIZE_T_MAX || \
|
|
v == (type)PY_SSIZE_T_MAX))) )
|
|
static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*);
|
|
static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
|
|
#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
|
|
#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
|
|
#define __Pyx_PyBytes_FromString PyBytes_FromString
|
|
#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
|
|
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*);
|
|
#if PY_MAJOR_VERSION < 3
|
|
#define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString
|
|
#define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
|
|
#else
|
|
#define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString
|
|
#define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
|
|
#endif
|
|
#define __Pyx_PyObject_AsSString(s) ((signed char*) __Pyx_PyObject_AsString(s))
|
|
#define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s))
|
|
#define __Pyx_PyObject_FromUString(s) __Pyx_PyObject_FromString((char*)s)
|
|
#define __Pyx_PyBytes_FromUString(s) __Pyx_PyBytes_FromString((char*)s)
|
|
#define __Pyx_PyByteArray_FromUString(s) __Pyx_PyByteArray_FromString((char*)s)
|
|
#define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((char*)s)
|
|
#define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s)
|
|
#if PY_MAJOR_VERSION < 3
|
|
static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u)
|
|
{
|
|
const Py_UNICODE *u_end = u;
|
|
while (*u_end++) ;
|
|
return u_end - u - 1;
|
|
}
|
|
#else
|
|
#define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen
|
|
#endif
|
|
#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
|
|
#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
|
|
#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode
|
|
#define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None)
|
|
#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
|
|
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
|
|
static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x);
|
|
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
|
|
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
|
|
#else
|
|
#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
|
|
#endif
|
|
#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
|
|
#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
|
|
static int __Pyx_sys_getdefaultencoding_not_ascii;
|
|
static int __Pyx_init_sys_getdefaultencoding_params(void) {
|
|
PyObject* sys = NULL;
|
|
PyObject* default_encoding = NULL;
|
|
PyObject* ascii_chars_u = NULL;
|
|
PyObject* ascii_chars_b = NULL;
|
|
sys = PyImport_ImportModule("sys");
|
|
if (sys == NULL) goto bad;
|
|
default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
|
|
if (default_encoding == NULL) goto bad;
|
|
if (strcmp(PyBytes_AsString(default_encoding), "ascii") == 0) {
|
|
__Pyx_sys_getdefaultencoding_not_ascii = 0;
|
|
} else {
|
|
const char* default_encoding_c = PyBytes_AS_STRING(default_encoding);
|
|
char ascii_chars[128];
|
|
int c;
|
|
for (c = 0; c < 128; c++) {
|
|
ascii_chars[c] = c;
|
|
}
|
|
__Pyx_sys_getdefaultencoding_not_ascii = 1;
|
|
ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
|
|
if (ascii_chars_u == NULL) goto bad;
|
|
ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
|
|
if (ascii_chars_b == NULL || strncmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
|
|
PyErr_Format(
|
|
PyExc_ValueError,
|
|
"This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
|
|
default_encoding_c);
|
|
goto bad;
|
|
}
|
|
}
|
|
Py_XDECREF(sys);
|
|
Py_XDECREF(default_encoding);
|
|
Py_XDECREF(ascii_chars_u);
|
|
Py_XDECREF(ascii_chars_b);
|
|
return 0;
|
|
bad:
|
|
Py_XDECREF(sys);
|
|
Py_XDECREF(default_encoding);
|
|
Py_XDECREF(ascii_chars_u);
|
|
Py_XDECREF(ascii_chars_b);
|
|
return -1;
|
|
}
|
|
#endif
|
|
#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
|
|
#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
|
|
#else
|
|
#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
|
|
#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
|
|
static char* __PYX_DEFAULT_STRING_ENCODING;
|
|
static int __Pyx_init_sys_getdefaultencoding_params(void) {
|
|
PyObject* sys = NULL;
|
|
PyObject* default_encoding = NULL;
|
|
char* default_encoding_c;
|
|
sys = PyImport_ImportModule("sys");
|
|
if (sys == NULL) goto bad;
|
|
default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
|
|
if (default_encoding == NULL) goto bad;
|
|
default_encoding_c = PyBytes_AS_STRING(default_encoding);
|
|
__PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c));
|
|
strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
|
|
Py_DECREF(sys);
|
|
Py_DECREF(default_encoding);
|
|
return 0;
|
|
bad:
|
|
Py_XDECREF(sys);
|
|
Py_XDECREF(default_encoding);
|
|
return -1;
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
|
|
#ifdef __GNUC__
|
|
/* Test for GCC > 2.95 */
|
|
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
|
|
#define likely(x) __builtin_expect(!!(x), 1)
|
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
|
#else /* __GNUC__ > 2 ... */
|
|
#define likely(x) (x)
|
|
#define unlikely(x) (x)
|
|
#endif /* __GNUC__ > 2 ... */
|
|
#else /* __GNUC__ */
|
|
#define likely(x) (x)
|
|
#define unlikely(x) (x)
|
|
#endif /* __GNUC__ */
|
|
|
|
static PyObject *__pyx_m;
|
|
static PyObject *__pyx_d;
|
|
static PyObject *__pyx_b;
|
|
static PyObject *__pyx_empty_tuple;
|
|
static PyObject *__pyx_empty_bytes;
|
|
static int __pyx_lineno;
|
|
static int __pyx_clineno = 0;
|
|
static const char * __pyx_cfilenm= __FILE__;
|
|
static const char *__pyx_filename;
|
|
|
|
|
|
static const char *__pyx_f[] = {
|
|
"en.pyx",
|
|
};
|
|
|
|
/* "spacy/lexeme.pxd":4
|
|
*
|
|
*
|
|
* ctypedef int ClusterID # <<<<<<<<<<<<<<
|
|
* ctypedef uint64_t StringHash
|
|
*
|
|
*/
|
|
typedef int __pyx_t_5spacy_6lexeme_ClusterID;
|
|
|
|
/* "spacy/lexeme.pxd":5
|
|
*
|
|
* ctypedef int ClusterID
|
|
* ctypedef uint64_t StringHash # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
typedef uint64_t __pyx_t_5spacy_6lexeme_StringHash;
|
|
|
|
/* "spacy/en.pxd":7
|
|
*
|
|
* ctypedef Py_UNICODE* string_ptr
|
|
* ctypedef size_t Lexeme_addr # For python interop # <<<<<<<<<<<<<<
|
|
* ctypedef Lexeme* Lexeme_ptr
|
|
*
|
|
*/
|
|
typedef size_t __pyx_t_5spacy_2en_Lexeme_addr;
|
|
|
|
/*--- Type declarations ---*/
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme;
|
|
|
|
/* "spacy/lexeme.pxd":27
|
|
* # over the Lexeme, via:
|
|
* # for field in range(LexAttr.n): get_attr(Lexeme*, field)
|
|
* cdef enum HashFields: # <<<<<<<<<<<<<<
|
|
* sic
|
|
* lex
|
|
*/
|
|
enum __pyx_t_5spacy_6lexeme_HashFields {
|
|
__pyx_e_5spacy_6lexeme_sic,
|
|
__pyx_e_5spacy_6lexeme_lex,
|
|
__pyx_e_5spacy_6lexeme_normed,
|
|
__pyx_e_5spacy_6lexeme_cluster,
|
|
__pyx_e_5spacy_6lexeme_n
|
|
};
|
|
|
|
/* "spacy/lexeme.pxd":8
|
|
*
|
|
*
|
|
* cdef struct Lexeme: # <<<<<<<<<<<<<<
|
|
* StringHash sic # Hash of the original string
|
|
* StringHash lex # Hash of the word, with punctuation and clitics split off
|
|
*/
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme {
|
|
__pyx_t_5spacy_6lexeme_StringHash sic;
|
|
__pyx_t_5spacy_6lexeme_StringHash lex;
|
|
__pyx_t_5spacy_6lexeme_StringHash normed;
|
|
__pyx_t_5spacy_6lexeme_StringHash last3;
|
|
Py_UNICODE first;
|
|
double prob;
|
|
__pyx_t_5spacy_6lexeme_ClusterID cluster;
|
|
int oft_upper;
|
|
int oft_title;
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme *tail;
|
|
};
|
|
|
|
/* "spacy/en.pxd":6
|
|
*
|
|
*
|
|
* ctypedef Py_UNICODE* string_ptr # <<<<<<<<<<<<<<
|
|
* ctypedef size_t Lexeme_addr # For python interop
|
|
* ctypedef Lexeme* Lexeme_ptr
|
|
*/
|
|
typedef Py_UNICODE *__pyx_t_5spacy_2en_string_ptr;
|
|
|
|
/* "spacy/en.pxd":8
|
|
* ctypedef Py_UNICODE* string_ptr
|
|
* ctypedef size_t Lexeme_addr # For python interop
|
|
* ctypedef Lexeme* Lexeme_ptr # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
typedef struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_t_5spacy_2en_Lexeme_ptr;
|
|
#ifndef CYTHON_REFNANNY
|
|
#define CYTHON_REFNANNY 0
|
|
#endif
|
|
#if CYTHON_REFNANNY
|
|
typedef struct {
|
|
void (*INCREF)(void*, PyObject*, int);
|
|
void (*DECREF)(void*, PyObject*, int);
|
|
void (*GOTREF)(void*, PyObject*, int);
|
|
void (*GIVEREF)(void*, PyObject*, int);
|
|
void* (*SetupContext)(const char*, int, const char*);
|
|
void (*FinishContext)(void**);
|
|
} __Pyx_RefNannyAPIStruct;
|
|
static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
|
|
static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); /*proto*/
|
|
#define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
|
|
#ifdef WITH_THREAD
|
|
#define __Pyx_RefNannySetupContext(name, acquire_gil) \
|
|
if (acquire_gil) { \
|
|
PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure(); \
|
|
__pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \
|
|
PyGILState_Release(__pyx_gilstate_save); \
|
|
} else { \
|
|
__pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \
|
|
}
|
|
#else
|
|
#define __Pyx_RefNannySetupContext(name, acquire_gil) \
|
|
__pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
|
|
#endif
|
|
#define __Pyx_RefNannyFinishContext() \
|
|
__Pyx_RefNanny->FinishContext(&__pyx_refnanny)
|
|
#define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
|
|
#define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
|
|
#define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
|
|
#define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
|
|
#define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
|
|
#define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
|
|
#define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
|
|
#define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
|
|
#else
|
|
#define __Pyx_RefNannyDeclarations
|
|
#define __Pyx_RefNannySetupContext(name, acquire_gil)
|
|
#define __Pyx_RefNannyFinishContext()
|
|
#define __Pyx_INCREF(r) Py_INCREF(r)
|
|
#define __Pyx_DECREF(r) Py_DECREF(r)
|
|
#define __Pyx_GOTREF(r)
|
|
#define __Pyx_GIVEREF(r)
|
|
#define __Pyx_XINCREF(r) Py_XINCREF(r)
|
|
#define __Pyx_XDECREF(r) Py_XDECREF(r)
|
|
#define __Pyx_XGOTREF(r)
|
|
#define __Pyx_XGIVEREF(r)
|
|
#endif /* CYTHON_REFNANNY */
|
|
#define __Pyx_XDECREF_SET(r, v) do { \
|
|
PyObject *tmp = (PyObject *) r; \
|
|
r = v; __Pyx_XDECREF(tmp); \
|
|
} while (0)
|
|
#define __Pyx_DECREF_SET(r, v) do { \
|
|
PyObject *tmp = (PyObject *) r; \
|
|
r = v; __Pyx_DECREF(tmp); \
|
|
} while (0)
|
|
#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
|
|
#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
|
|
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) {
|
|
PyTypeObject* tp = Py_TYPE(obj);
|
|
if (likely(tp->tp_getattro))
|
|
return tp->tp_getattro(obj, attr_name);
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (likely(tp->tp_getattr))
|
|
return tp->tp_getattr(obj, PyString_AS_STRING(attr_name));
|
|
#endif
|
|
return PyObject_GetAttr(obj, attr_name);
|
|
}
|
|
#else
|
|
#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n)
|
|
#endif
|
|
|
|
static PyObject *__Pyx_GetBuiltinName(PyObject *name); /*proto*/
|
|
|
|
static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected);
|
|
|
|
static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index);
|
|
|
|
static CYTHON_INLINE int __Pyx_IterFinish(void); /*proto*/
|
|
|
|
static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected); /*proto*/
|
|
|
|
#include <string.h>
|
|
|
|
static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/
|
|
|
|
static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); /*proto*/
|
|
|
|
static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed,
|
|
const char *name, int exact); /*proto*/
|
|
|
|
static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
|
|
Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); /*proto*/
|
|
|
|
static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); /*proto*/
|
|
|
|
static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[], \
|
|
PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, \
|
|
const char* function_name); /*proto*/
|
|
|
|
static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name); /*proto*/
|
|
|
|
#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \
|
|
(__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \
|
|
__Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) : \
|
|
(is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) : \
|
|
__Pyx_GetItemInt_Generic(o, to_py_func(i))))
|
|
#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \
|
|
(__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \
|
|
__Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) : \
|
|
(PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL))
|
|
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i,
|
|
int wraparound, int boundscheck);
|
|
#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \
|
|
(__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \
|
|
__Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) : \
|
|
(PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL))
|
|
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i,
|
|
int wraparound, int boundscheck);
|
|
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j);
|
|
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i,
|
|
int is_list, int wraparound, int boundscheck);
|
|
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); /*proto*/
|
|
#else
|
|
#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw)
|
|
#endif
|
|
|
|
static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb); /*proto*/
|
|
static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb); /*proto*/
|
|
|
|
static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); /*proto*/
|
|
|
|
#define __Pyx_GetItemInt_Unicode(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \
|
|
(__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \
|
|
__Pyx_GetItemInt_Unicode_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) : \
|
|
(PyErr_SetString(PyExc_IndexError, "string index out of range"), (Py_UCS4)-1))
|
|
static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring, Py_ssize_t i,
|
|
int wraparound, int boundscheck);
|
|
|
|
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
|
|
PyObject* text, Py_ssize_t start, Py_ssize_t stop);
|
|
|
|
#define __Pyx_SetItemInt(o, i, v, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \
|
|
(__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \
|
|
__Pyx_SetItemInt_Fast(o, (Py_ssize_t)i, v, is_list, wraparound, boundscheck) : \
|
|
(is_list ? (PyErr_SetString(PyExc_IndexError, "list assignment index out of range"), -1) : \
|
|
__Pyx_SetItemInt_Generic(o, to_py_func(i), v)))
|
|
static CYTHON_INLINE int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v);
|
|
static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v,
|
|
int is_list, int wraparound, int boundscheck);
|
|
|
|
static int __Pyx_PyUnicode_Tailmatch(PyObject* s, PyObject* substr,
|
|
Py_ssize_t start, Py_ssize_t end, int direction) {
|
|
if (unlikely(PyTuple_Check(substr))) {
|
|
Py_ssize_t i, count = PyTuple_GET_SIZE(substr);
|
|
for (i = 0; i < count; i++) {
|
|
int result;
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
result = PyUnicode_Tailmatch(s, PyTuple_GET_ITEM(substr, i),
|
|
start, end, direction);
|
|
#else
|
|
PyObject* sub = PySequence_GetItem(substr, i);
|
|
if (unlikely(!sub)) return -1;
|
|
result = PyUnicode_Tailmatch(s, sub, start, end, direction);
|
|
Py_DECREF(sub);
|
|
#endif
|
|
if (result) {
|
|
return result;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
return PyUnicode_Tailmatch(s, substr, start, end, direction);
|
|
}
|
|
|
|
static void __Pyx_WriteUnraisable(const char *name, int clineno,
|
|
int lineno, const char *filename,
|
|
int full_traceback); /*proto*/
|
|
|
|
static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); /*proto*/
|
|
|
|
static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); /*proto*/
|
|
|
|
static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
|
|
|
|
static CYTHON_INLINE uint64_t __Pyx_PyInt_As_uint64_t(PyObject *);
|
|
|
|
static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *);
|
|
|
|
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_uint64_t(uint64_t value);
|
|
|
|
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);
|
|
|
|
static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);
|
|
|
|
static int __Pyx_check_binary_version(void);
|
|
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
#define __Pyx_PyObject_DelAttrStr(o,n) __Pyx_PyObject_SetAttrStr(o,n,NULL)
|
|
static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value) {
|
|
PyTypeObject* tp = Py_TYPE(obj);
|
|
if (likely(tp->tp_setattro))
|
|
return tp->tp_setattro(obj, attr_name, value);
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (likely(tp->tp_setattr))
|
|
return tp->tp_setattr(obj, PyString_AS_STRING(attr_name), value);
|
|
#endif
|
|
return PyObject_SetAttr(obj, attr_name, value);
|
|
}
|
|
#else
|
|
#define __Pyx_PyObject_DelAttrStr(o,n) PyObject_DelAttr(o,n)
|
|
#define __Pyx_PyObject_SetAttrStr(o,n,v) PyObject_SetAttr(o,n,v)
|
|
#endif
|
|
|
|
static int __Pyx_ExportVoidPtr(PyObject *name, void *p, const char *sig); /*proto*/
|
|
|
|
static int __Pyx_ExportFunction(const char *name, void (*f)(void), const char *sig); /*proto*/
|
|
|
|
typedef struct {
|
|
int code_line;
|
|
PyCodeObject* code_object;
|
|
} __Pyx_CodeObjectCacheEntry;
|
|
struct __Pyx_CodeObjectCache {
|
|
int count;
|
|
int max_count;
|
|
__Pyx_CodeObjectCacheEntry* entries;
|
|
};
|
|
static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
|
|
static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
|
|
static PyCodeObject *__pyx_find_code_object(int code_line);
|
|
static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
|
|
|
|
static void __Pyx_AddTraceback(const char *funcname, int c_line,
|
|
int py_line, const char *filename); /*proto*/
|
|
|
|
static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
|
|
|
|
|
|
/* Module declarations from 'libcpp.utility' */
|
|
|
|
/* Module declarations from 'libcpp.vector' */
|
|
|
|
/* Module declarations from 'libc.stdint' */
|
|
|
|
/* Module declarations from 'ext.sparsehash' */
|
|
|
|
/* Module declarations from 'spacy.lexeme' */
|
|
|
|
/* Module declarations from 'libc.string' */
|
|
|
|
/* Module declarations from 'libc.stdlib' */
|
|
|
|
/* Module declarations from 'ext.murmurhash' */
|
|
|
|
/* Module declarations from 'spacy.en' */
|
|
static google::dense_hash_map<__pyx_t_5spacy_6lexeme_StringHash,__pyx_t_5spacy_2en_Lexeme_ptr> __pyx_v_5spacy_2en_LEXEMES;
|
|
static struct __pyx_t_5spacy_6lexeme_Lexeme __pyx_v_5spacy_2en_BLANK_WORD;
|
|
static __pyx_t_5spacy_2en_Lexeme_addr __pyx_f_5spacy_2en_lookup(PyObject *, int __pyx_skip_dispatch); /*proto*/
|
|
static __pyx_t_5spacy_2en_Lexeme_addr __pyx_f_5spacy_2en_lookup_chunk(PyObject *, int, int, int __pyx_skip_dispatch); /*proto*/
|
|
static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_2en_hash_string(PyObject *, size_t); /*proto*/
|
|
static PyObject *__pyx_f_5spacy_2en_unhash(__pyx_t_5spacy_6lexeme_StringHash, int __pyx_skip_dispatch); /*proto*/
|
|
static PyObject *__pyx_f_5spacy_2en_normalize_word_string(PyObject *); /*proto*/
|
|
static PyObject *__pyx_f_5spacy_2en__substr(PyObject *, int, int, size_t, int __pyx_skip_dispatch); /*proto*/
|
|
static struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_f_5spacy_2en__add(__pyx_t_5spacy_6lexeme_StringHash, PyObject *, int, size_t); /*proto*/
|
|
static struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_f_5spacy_2en__init_lexeme(PyObject *, __pyx_t_5spacy_6lexeme_StringHash, int, size_t); /*proto*/
|
|
static size_t __pyx_f_5spacy_2en__find_split(PyObject *, size_t); /*proto*/
|
|
static int __pyx_f_5spacy_2en_is_punct(PyObject *, size_t, size_t); /*proto*/
|
|
#define __Pyx_MODULE_NAME "spacy.en"
|
|
int __pyx_module_is_main_spacy__en = 0;
|
|
|
|
/* Implementation of 'spacy.en' */
|
|
static PyObject *__pyx_builtin_enumerate;
|
|
static PyObject *__pyx_builtin_ValueError;
|
|
static PyObject *__pyx_pf_5spacy_2en_load_tokenization(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_token_rules); /* proto */
|
|
static PyObject *__pyx_pf_5spacy_2en_2lookup(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string); /* proto */
|
|
static PyObject *__pyx_pf_5spacy_2en_4lookup_chunk(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string, int __pyx_v_start, int __pyx_v_end); /* proto */
|
|
static PyObject *__pyx_pf_5spacy_2en_6unhash(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_StringHash __pyx_v_hash_value); /* proto */
|
|
static PyObject *__pyx_pf_5spacy_2en_8_substr(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string, int __pyx_v_start, int __pyx_v_end, size_t __pyx_v_length); /* proto */
|
|
static char __pyx_k_[] = "";
|
|
static char __pyx_k_i[] = "i";
|
|
static char __pyx_k_s[] = "'s";
|
|
static char __pyx_k_en[] = "en";
|
|
static char __pyx_k_end[] = "end";
|
|
static char __pyx_k_lex[] = "lex";
|
|
static char __pyx_k_sic[] = "sic";
|
|
static char __pyx_k_YEAR[] = "!YEAR";
|
|
static char __pyx_k_main[] = "__main__";
|
|
static char __pyx_k_prob[] = "prob";
|
|
static char __pyx_k_tail[] = "tail";
|
|
static char __pyx_k_test[] = "__test__";
|
|
static char __pyx_k_util[] = "util";
|
|
static char __pyx_k_word[] = "word";
|
|
static char __pyx_k_chunk[] = "chunk";
|
|
static char __pyx_k_first[] = "first";
|
|
static char __pyx_k_last3[] = "last3";
|
|
static char __pyx_k_lower[] = "lower";
|
|
static char __pyx_k_s_d_s[] = "%s:@:%d:@:%s";
|
|
static char __pyx_k_start[] = "start";
|
|
static char __pyx_k_DIGITS[] = "!DIGITS";
|
|
static char __pyx_k_hashed[] = "hashed";
|
|
static char __pyx_k_import[] = "__import__";
|
|
static char __pyx_k_length[] = "length";
|
|
static char __pyx_k_normed[] = "normed";
|
|
static char __pyx_k_string[] = "string";
|
|
static char __pyx_k_tokens[] = "tokens";
|
|
static char __pyx_k_LEXEMES[] = "LEXEMES";
|
|
static char __pyx_k_STRINGS[] = "STRINGS";
|
|
static char __pyx_k_cluster[] = "cluster";
|
|
static char __pyx_k_isdigit[] = "isdigit";
|
|
static char __pyx_k_pyx_capi[] = "__pyx_capi__";
|
|
static char __pyx_k_spacy_en[] = "spacy.en";
|
|
static char __pyx_k_enumerate[] = "enumerate";
|
|
static char __pyx_k_oft_title[] = "oft_title";
|
|
static char __pyx_k_oft_upper[] = "oft_upper";
|
|
static char __pyx_k_ValueError[] = "ValueError";
|
|
static char __pyx_k_token_rules[] = "token_rules";
|
|
static char __pyx_k_token_string[] = "token_string";
|
|
static char __pyx_k_load_tokenization[] = "load_tokenization";
|
|
static char __pyx_k_read_tokenization[] = "read_tokenization";
|
|
static char __pyx_k_Users_matt_repos_spaCy_spacy_en[] = "/Users/matt/repos/spaCy/spacy/en.pyx";
|
|
static char __pyx_k_Serve_pointers_to_Lexeme_structs[] = "Serve pointers to Lexeme structs, given strings. Maintain a reverse index,\nso that strings can be retrieved from hashes. Use 64-bit hash values and\nboldly assume no collisions.\n";
|
|
static PyObject *__pyx_n_s_;
|
|
static PyObject *__pyx_kp_u_;
|
|
static PyObject *__pyx_kp_u_DIGITS;
|
|
static PyObject *__pyx_n_s_LEXEMES;
|
|
static PyObject *__pyx_n_s_STRINGS;
|
|
static PyObject *__pyx_kp_s_Users_matt_repos_spaCy_spacy_en;
|
|
static PyObject *__pyx_n_s_ValueError;
|
|
static PyObject *__pyx_kp_u_YEAR;
|
|
static PyObject *__pyx_n_s_chunk;
|
|
static PyObject *__pyx_n_s_cluster;
|
|
static PyObject *__pyx_n_u_en;
|
|
static PyObject *__pyx_n_s_end;
|
|
static PyObject *__pyx_n_s_enumerate;
|
|
static PyObject *__pyx_n_s_first;
|
|
static PyObject *__pyx_n_s_hashed;
|
|
static PyObject *__pyx_n_s_i;
|
|
static PyObject *__pyx_n_s_import;
|
|
static PyObject *__pyx_n_s_isdigit;
|
|
static PyObject *__pyx_n_s_last3;
|
|
static PyObject *__pyx_n_s_length;
|
|
static PyObject *__pyx_n_s_lex;
|
|
static PyObject *__pyx_n_s_load_tokenization;
|
|
static PyObject *__pyx_n_s_lower;
|
|
static PyObject *__pyx_n_s_main;
|
|
static PyObject *__pyx_n_s_normed;
|
|
static PyObject *__pyx_n_s_oft_title;
|
|
static PyObject *__pyx_n_s_oft_upper;
|
|
static PyObject *__pyx_n_s_prob;
|
|
static PyObject *__pyx_n_s_pyx_capi;
|
|
static PyObject *__pyx_n_s_read_tokenization;
|
|
static PyObject *__pyx_kp_u_s;
|
|
static PyObject *__pyx_kp_u_s_d_s;
|
|
static PyObject *__pyx_n_s_sic;
|
|
static PyObject *__pyx_n_s_spacy_en;
|
|
static PyObject *__pyx_n_s_start;
|
|
static PyObject *__pyx_n_s_string;
|
|
static PyObject *__pyx_n_s_tail;
|
|
static PyObject *__pyx_n_s_test;
|
|
static PyObject *__pyx_n_s_token_rules;
|
|
static PyObject *__pyx_n_s_token_string;
|
|
static PyObject *__pyx_n_s_tokens;
|
|
static PyObject *__pyx_n_s_util;
|
|
static PyObject *__pyx_n_s_word;
|
|
static PyObject *__pyx_int_0;
|
|
static PyObject *__pyx_int_1;
|
|
static PyObject *__pyx_tuple__2;
|
|
static PyObject *__pyx_tuple__4;
|
|
static PyObject *__pyx_codeobj__3;
|
|
|
|
/* "spacy/en.pyx":24
|
|
*
|
|
*
|
|
* def load_tokenization(token_rules): # <<<<<<<<<<<<<<
|
|
* cdef Lexeme* word
|
|
* cdef StringHash hashed
|
|
*/
|
|
|
|
/* Python wrapper */
|
|
static PyObject *__pyx_pw_5spacy_2en_1load_tokenization(PyObject *__pyx_self, PyObject *__pyx_v_token_rules); /*proto*/
|
|
static PyMethodDef __pyx_mdef_5spacy_2en_1load_tokenization = {__Pyx_NAMESTR("load_tokenization"), (PyCFunction)__pyx_pw_5spacy_2en_1load_tokenization, METH_O, __Pyx_DOCSTR(0)};
|
|
static PyObject *__pyx_pw_5spacy_2en_1load_tokenization(PyObject *__pyx_self, PyObject *__pyx_v_token_rules) {
|
|
PyObject *__pyx_r = 0;
|
|
__Pyx_RefNannyDeclarations
|
|
__Pyx_RefNannySetupContext("load_tokenization (wrapper)", 0);
|
|
__pyx_r = __pyx_pf_5spacy_2en_load_tokenization(__pyx_self, ((PyObject *)__pyx_v_token_rules));
|
|
|
|
/* function exit code */
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
static PyObject *__pyx_pf_5spacy_2en_load_tokenization(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_token_rules) {
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_word;
|
|
__pyx_t_5spacy_6lexeme_StringHash __pyx_v_hashed;
|
|
PyObject *__pyx_v_chunk = NULL;
|
|
PyObject *__pyx_v_lex = NULL;
|
|
PyObject *__pyx_v_tokens = NULL;
|
|
PyObject *__pyx_v_i = NULL;
|
|
PyObject *__pyx_v_token_string = NULL;
|
|
Py_ssize_t __pyx_v_length;
|
|
PyObject *__pyx_r = NULL;
|
|
__Pyx_RefNannyDeclarations
|
|
PyObject *__pyx_t_1 = NULL;
|
|
Py_ssize_t __pyx_t_2;
|
|
PyObject *(*__pyx_t_3)(PyObject *);
|
|
PyObject *__pyx_t_4 = NULL;
|
|
PyObject *__pyx_t_5 = NULL;
|
|
PyObject *__pyx_t_6 = NULL;
|
|
PyObject *__pyx_t_7 = NULL;
|
|
PyObject *__pyx_t_8 = NULL;
|
|
PyObject *(*__pyx_t_9)(PyObject *);
|
|
Py_ssize_t __pyx_t_10;
|
|
__pyx_t_5spacy_6lexeme_StringHash __pyx_t_11;
|
|
Py_ssize_t __pyx_t_12;
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_t_13;
|
|
PyObject *(*__pyx_t_14)(PyObject *);
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("load_tokenization", 0);
|
|
|
|
/* "spacy/en.pyx":27
|
|
* cdef Lexeme* word
|
|
* cdef StringHash hashed
|
|
* for chunk, lex, tokens in token_rules: # <<<<<<<<<<<<<<
|
|
* hashed = hash_string(chunk, len(chunk))
|
|
* assert LEXEMES[hashed] == NULL
|
|
*/
|
|
if (PyList_CheckExact(__pyx_v_token_rules) || PyTuple_CheckExact(__pyx_v_token_rules)) {
|
|
__pyx_t_1 = __pyx_v_token_rules; __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0;
|
|
__pyx_t_3 = NULL;
|
|
} else {
|
|
__pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_token_rules); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_1);
|
|
__pyx_t_3 = Py_TYPE(__pyx_t_1)->tp_iternext;
|
|
}
|
|
for (;;) {
|
|
if (!__pyx_t_3 && PyList_CheckExact(__pyx_t_1)) {
|
|
if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_1)) break;
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
__pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#else
|
|
__pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#endif
|
|
} else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_1)) {
|
|
if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break;
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
__pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#else
|
|
__pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#endif
|
|
} else {
|
|
__pyx_t_4 = __pyx_t_3(__pyx_t_1);
|
|
if (unlikely(!__pyx_t_4)) {
|
|
PyObject* exc_type = PyErr_Occurred();
|
|
if (exc_type) {
|
|
if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
|
|
else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
break;
|
|
}
|
|
__Pyx_GOTREF(__pyx_t_4);
|
|
}
|
|
if ((likely(PyTuple_CheckExact(__pyx_t_4))) || (PyList_CheckExact(__pyx_t_4))) {
|
|
PyObject* sequence = __pyx_t_4;
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
Py_ssize_t size = Py_SIZE(sequence);
|
|
#else
|
|
Py_ssize_t size = PySequence_Size(sequence);
|
|
#endif
|
|
if (unlikely(size != 3)) {
|
|
if (size > 3) __Pyx_RaiseTooManyValuesError(3);
|
|
else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
if (likely(PyTuple_CheckExact(sequence))) {
|
|
__pyx_t_5 = PyTuple_GET_ITEM(sequence, 0);
|
|
__pyx_t_6 = PyTuple_GET_ITEM(sequence, 1);
|
|
__pyx_t_7 = PyTuple_GET_ITEM(sequence, 2);
|
|
} else {
|
|
__pyx_t_5 = PyList_GET_ITEM(sequence, 0);
|
|
__pyx_t_6 = PyList_GET_ITEM(sequence, 1);
|
|
__pyx_t_7 = PyList_GET_ITEM(sequence, 2);
|
|
}
|
|
__Pyx_INCREF(__pyx_t_5);
|
|
__Pyx_INCREF(__pyx_t_6);
|
|
__Pyx_INCREF(__pyx_t_7);
|
|
#else
|
|
__pyx_t_5 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_5);
|
|
__pyx_t_6 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_6);
|
|
__pyx_t_7 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_7);
|
|
#endif
|
|
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
|
|
} else {
|
|
Py_ssize_t index = -1;
|
|
__pyx_t_8 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_8);
|
|
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
|
|
__pyx_t_9 = Py_TYPE(__pyx_t_8)->tp_iternext;
|
|
index = 0; __pyx_t_5 = __pyx_t_9(__pyx_t_8); if (unlikely(!__pyx_t_5)) goto __pyx_L5_unpacking_failed;
|
|
__Pyx_GOTREF(__pyx_t_5);
|
|
index = 1; __pyx_t_6 = __pyx_t_9(__pyx_t_8); if (unlikely(!__pyx_t_6)) goto __pyx_L5_unpacking_failed;
|
|
__Pyx_GOTREF(__pyx_t_6);
|
|
index = 2; __pyx_t_7 = __pyx_t_9(__pyx_t_8); if (unlikely(!__pyx_t_7)) goto __pyx_L5_unpacking_failed;
|
|
__Pyx_GOTREF(__pyx_t_7);
|
|
if (__Pyx_IternextUnpackEndCheck(__pyx_t_9(__pyx_t_8), 3) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_9 = NULL;
|
|
__Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
|
|
goto __pyx_L6_unpacking_done;
|
|
__pyx_L5_unpacking_failed:;
|
|
__Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
|
|
__pyx_t_9 = NULL;
|
|
if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index);
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_L6_unpacking_done:;
|
|
}
|
|
__Pyx_XDECREF_SET(__pyx_v_chunk, __pyx_t_5);
|
|
__pyx_t_5 = 0;
|
|
__Pyx_XDECREF_SET(__pyx_v_lex, __pyx_t_6);
|
|
__pyx_t_6 = 0;
|
|
__Pyx_XDECREF_SET(__pyx_v_tokens, __pyx_t_7);
|
|
__pyx_t_7 = 0;
|
|
|
|
/* "spacy/en.pyx":28
|
|
* cdef StringHash hashed
|
|
* for chunk, lex, tokens in token_rules:
|
|
* hashed = hash_string(chunk, len(chunk)) # <<<<<<<<<<<<<<
|
|
* assert LEXEMES[hashed] == NULL
|
|
* word = _add(hashed, lex, len(lex), len(lex))
|
|
*/
|
|
if (!(likely(PyUnicode_CheckExact(__pyx_v_chunk))||((__pyx_v_chunk) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "unicode", Py_TYPE(__pyx_v_chunk)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_10 = PyObject_Length(__pyx_v_chunk); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_11 = __pyx_f_5spacy_2en_hash_string(((PyObject*)__pyx_v_chunk), __pyx_t_10); if (unlikely(__pyx_t_11 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_hashed = __pyx_t_11;
|
|
|
|
/* "spacy/en.pyx":29
|
|
* for chunk, lex, tokens in token_rules:
|
|
* hashed = hash_string(chunk, len(chunk))
|
|
* assert LEXEMES[hashed] == NULL # <<<<<<<<<<<<<<
|
|
* word = _add(hashed, lex, len(lex), len(lex))
|
|
* for i, lex in enumerate(tokens):
|
|
*/
|
|
#ifndef CYTHON_WITHOUT_ASSERTIONS
|
|
if (unlikely(!Py_OptimizeFlag)) {
|
|
if (unlikely(!(((__pyx_v_5spacy_2en_LEXEMES[__pyx_v_hashed]) == NULL) != 0))) {
|
|
PyErr_SetNone(PyExc_AssertionError);
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* "spacy/en.pyx":30
|
|
* hashed = hash_string(chunk, len(chunk))
|
|
* assert LEXEMES[hashed] == NULL
|
|
* word = _add(hashed, lex, len(lex), len(lex)) # <<<<<<<<<<<<<<
|
|
* for i, lex in enumerate(tokens):
|
|
* token_string = '%s:@:%d:@:%s' % (chunk, i, lex)
|
|
*/
|
|
if (!(likely(PyUnicode_CheckExact(__pyx_v_lex))||((__pyx_v_lex) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "unicode", Py_TYPE(__pyx_v_lex)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_10 = PyObject_Length(__pyx_v_lex); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_12 = PyObject_Length(__pyx_v_lex); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_13 = __pyx_f_5spacy_2en__add(__pyx_v_hashed, ((PyObject*)__pyx_v_lex), __pyx_t_10, __pyx_t_12); if (unlikely(__pyx_t_13 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_word = __pyx_t_13;
|
|
|
|
/* "spacy/en.pyx":31
|
|
* assert LEXEMES[hashed] == NULL
|
|
* word = _add(hashed, lex, len(lex), len(lex))
|
|
* for i, lex in enumerate(tokens): # <<<<<<<<<<<<<<
|
|
* token_string = '%s:@:%d:@:%s' % (chunk, i, lex)
|
|
* length = len(token_string)
|
|
*/
|
|
__Pyx_INCREF(__pyx_int_0);
|
|
__pyx_t_4 = __pyx_int_0;
|
|
if (PyList_CheckExact(__pyx_v_tokens) || PyTuple_CheckExact(__pyx_v_tokens)) {
|
|
__pyx_t_7 = __pyx_v_tokens; __Pyx_INCREF(__pyx_t_7); __pyx_t_12 = 0;
|
|
__pyx_t_14 = NULL;
|
|
} else {
|
|
__pyx_t_12 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_v_tokens); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_7);
|
|
__pyx_t_14 = Py_TYPE(__pyx_t_7)->tp_iternext;
|
|
}
|
|
for (;;) {
|
|
if (!__pyx_t_14 && PyList_CheckExact(__pyx_t_7)) {
|
|
if (__pyx_t_12 >= PyList_GET_SIZE(__pyx_t_7)) break;
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
__pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_12); __Pyx_INCREF(__pyx_t_6); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#else
|
|
__pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#endif
|
|
} else if (!__pyx_t_14 && PyTuple_CheckExact(__pyx_t_7)) {
|
|
if (__pyx_t_12 >= PyTuple_GET_SIZE(__pyx_t_7)) break;
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
__pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_12); __Pyx_INCREF(__pyx_t_6); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#else
|
|
__pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#endif
|
|
} else {
|
|
__pyx_t_6 = __pyx_t_14(__pyx_t_7);
|
|
if (unlikely(!__pyx_t_6)) {
|
|
PyObject* exc_type = PyErr_Occurred();
|
|
if (exc_type) {
|
|
if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
|
|
else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
break;
|
|
}
|
|
__Pyx_GOTREF(__pyx_t_6);
|
|
}
|
|
__Pyx_DECREF_SET(__pyx_v_lex, __pyx_t_6);
|
|
__pyx_t_6 = 0;
|
|
__Pyx_INCREF(__pyx_t_4);
|
|
__Pyx_XDECREF_SET(__pyx_v_i, __pyx_t_4);
|
|
__pyx_t_6 = PyNumber_Add(__pyx_t_4, __pyx_int_1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_6);
|
|
__Pyx_DECREF(__pyx_t_4);
|
|
__pyx_t_4 = __pyx_t_6;
|
|
__pyx_t_6 = 0;
|
|
|
|
/* "spacy/en.pyx":32
|
|
* word = _add(hashed, lex, len(lex), len(lex))
|
|
* for i, lex in enumerate(tokens):
|
|
* token_string = '%s:@:%d:@:%s' % (chunk, i, lex) # <<<<<<<<<<<<<<
|
|
* length = len(token_string)
|
|
* hashed = hash_string(token_string, length)
|
|
*/
|
|
__pyx_t_6 = PyTuple_New(3); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_6);
|
|
__Pyx_INCREF(__pyx_v_chunk);
|
|
PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_v_chunk);
|
|
__Pyx_GIVEREF(__pyx_v_chunk);
|
|
__Pyx_INCREF(__pyx_v_i);
|
|
PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_v_i);
|
|
__Pyx_GIVEREF(__pyx_v_i);
|
|
__Pyx_INCREF(__pyx_v_lex);
|
|
PyTuple_SET_ITEM(__pyx_t_6, 2, __pyx_v_lex);
|
|
__Pyx_GIVEREF(__pyx_v_lex);
|
|
__pyx_t_5 = PyUnicode_Format(__pyx_kp_u_s_d_s, __pyx_t_6); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_5);
|
|
__Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
|
|
__Pyx_XDECREF_SET(__pyx_v_token_string, ((PyObject*)__pyx_t_5));
|
|
__pyx_t_5 = 0;
|
|
|
|
/* "spacy/en.pyx":33
|
|
* for i, lex in enumerate(tokens):
|
|
* token_string = '%s:@:%d:@:%s' % (chunk, i, lex)
|
|
* length = len(token_string) # <<<<<<<<<<<<<<
|
|
* hashed = hash_string(token_string, length)
|
|
* word.tail = _add(hashed, lex, 0, len(lex))
|
|
*/
|
|
__pyx_t_10 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_token_string); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_length = __pyx_t_10;
|
|
|
|
/* "spacy/en.pyx":34
|
|
* token_string = '%s:@:%d:@:%s' % (chunk, i, lex)
|
|
* length = len(token_string)
|
|
* hashed = hash_string(token_string, length) # <<<<<<<<<<<<<<
|
|
* word.tail = _add(hashed, lex, 0, len(lex))
|
|
* word = word.tail
|
|
*/
|
|
__pyx_t_11 = __pyx_f_5spacy_2en_hash_string(__pyx_v_token_string, __pyx_v_length); if (unlikely(__pyx_t_11 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_hashed = __pyx_t_11;
|
|
|
|
/* "spacy/en.pyx":35
|
|
* length = len(token_string)
|
|
* hashed = hash_string(token_string, length)
|
|
* word.tail = _add(hashed, lex, 0, len(lex)) # <<<<<<<<<<<<<<
|
|
* word = word.tail
|
|
*
|
|
*/
|
|
if (!(likely(PyUnicode_CheckExact(__pyx_v_lex))||((__pyx_v_lex) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "unicode", Py_TYPE(__pyx_v_lex)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_10 = PyObject_Length(__pyx_v_lex); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_13 = __pyx_f_5spacy_2en__add(__pyx_v_hashed, ((PyObject*)__pyx_v_lex), 0, __pyx_t_10); if (unlikely(__pyx_t_13 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_word->tail = __pyx_t_13;
|
|
|
|
/* "spacy/en.pyx":36
|
|
* hashed = hash_string(token_string, length)
|
|
* word.tail = _add(hashed, lex, 0, len(lex))
|
|
* word = word.tail # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
__pyx_t_13 = __pyx_v_word->tail;
|
|
__pyx_v_word = __pyx_t_13;
|
|
}
|
|
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
|
|
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
|
|
}
|
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
|
|
|
/* "spacy/en.pyx":24
|
|
*
|
|
*
|
|
* def load_tokenization(token_rules): # <<<<<<<<<<<<<<
|
|
* cdef Lexeme* word
|
|
* cdef StringHash hashed
|
|
*/
|
|
|
|
/* function exit code */
|
|
__pyx_r = Py_None; __Pyx_INCREF(Py_None);
|
|
goto __pyx_L0;
|
|
__pyx_L1_error:;
|
|
__Pyx_XDECREF(__pyx_t_1);
|
|
__Pyx_XDECREF(__pyx_t_4);
|
|
__Pyx_XDECREF(__pyx_t_5);
|
|
__Pyx_XDECREF(__pyx_t_6);
|
|
__Pyx_XDECREF(__pyx_t_7);
|
|
__Pyx_XDECREF(__pyx_t_8);
|
|
__Pyx_AddTraceback("spacy.en.load_tokenization", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__pyx_r = NULL;
|
|
__pyx_L0:;
|
|
__Pyx_XDECREF(__pyx_v_chunk);
|
|
__Pyx_XDECREF(__pyx_v_lex);
|
|
__Pyx_XDECREF(__pyx_v_tokens);
|
|
__Pyx_XDECREF(__pyx_v_i);
|
|
__Pyx_XDECREF(__pyx_v_token_string);
|
|
__Pyx_XGIVEREF(__pyx_r);
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
/* "spacy/en.pyx":41
|
|
* load_tokenization(util.read_tokenization('en'))
|
|
*
|
|
* cpdef Lexeme_addr lookup(unicode string) except 0: # <<<<<<<<<<<<<<
|
|
* '''.. function:: enumerate(sequence[, start=0])
|
|
* Fetch a Lexeme representing a word string. If the word has not been seen,
|
|
*/
|
|
|
|
static PyObject *__pyx_pw_5spacy_2en_3lookup(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/
|
|
static __pyx_t_5spacy_2en_Lexeme_addr __pyx_f_5spacy_2en_lookup(PyObject *__pyx_v_string, CYTHON_UNUSED int __pyx_skip_dispatch) {
|
|
size_t __pyx_v_length;
|
|
__pyx_t_5spacy_6lexeme_StringHash __pyx_v_hashed;
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_word_ptr;
|
|
__pyx_t_5spacy_2en_Lexeme_addr __pyx_r;
|
|
__Pyx_RefNannyDeclarations
|
|
int __pyx_t_1;
|
|
int __pyx_t_2;
|
|
Py_ssize_t __pyx_t_3;
|
|
__pyx_t_5spacy_6lexeme_StringHash __pyx_t_4;
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_t_5;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("lookup", 0);
|
|
|
|
/* "spacy/en.pyx":49
|
|
* To specify the boundaries of the word if it has not been seen, use lookup_chunk.
|
|
* '''
|
|
* if string == '': # <<<<<<<<<<<<<<
|
|
* return <Lexeme_addr>&BLANK_WORD
|
|
* cdef size_t length = len(string)
|
|
*/
|
|
__pyx_t_1 = (__Pyx_PyUnicode_Equals(__pyx_v_string, __pyx_kp_u_, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_2 = (__pyx_t_1 != 0);
|
|
if (__pyx_t_2) {
|
|
|
|
/* "spacy/en.pyx":50
|
|
* '''
|
|
* if string == '':
|
|
* return <Lexeme_addr>&BLANK_WORD # <<<<<<<<<<<<<<
|
|
* cdef size_t length = len(string)
|
|
* cdef StringHash hashed = hash_string(string, length)
|
|
*/
|
|
__pyx_r = ((__pyx_t_5spacy_2en_Lexeme_addr)(&__pyx_v_5spacy_2en_BLANK_WORD));
|
|
goto __pyx_L0;
|
|
}
|
|
|
|
/* "spacy/en.pyx":51
|
|
* if string == '':
|
|
* return <Lexeme_addr>&BLANK_WORD
|
|
* cdef size_t length = len(string) # <<<<<<<<<<<<<<
|
|
* cdef StringHash hashed = hash_string(string, length)
|
|
* cdef Lexeme* word_ptr = LEXEMES[hashed]
|
|
*/
|
|
if (unlikely(__pyx_v_string == Py_None)) {
|
|
PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
__pyx_t_3 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_string); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_length = __pyx_t_3;
|
|
|
|
/* "spacy/en.pyx":52
|
|
* return <Lexeme_addr>&BLANK_WORD
|
|
* cdef size_t length = len(string)
|
|
* cdef StringHash hashed = hash_string(string, length) # <<<<<<<<<<<<<<
|
|
* cdef Lexeme* word_ptr = LEXEMES[hashed]
|
|
* cdef size_t n
|
|
*/
|
|
__pyx_t_4 = __pyx_f_5spacy_2en_hash_string(__pyx_v_string, __pyx_v_length); if (unlikely(__pyx_t_4 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_hashed = __pyx_t_4;
|
|
|
|
/* "spacy/en.pyx":53
|
|
* cdef size_t length = len(string)
|
|
* cdef StringHash hashed = hash_string(string, length)
|
|
* cdef Lexeme* word_ptr = LEXEMES[hashed] # <<<<<<<<<<<<<<
|
|
* cdef size_t n
|
|
* if word_ptr == NULL:
|
|
*/
|
|
__pyx_v_word_ptr = (__pyx_v_5spacy_2en_LEXEMES[__pyx_v_hashed]);
|
|
|
|
/* "spacy/en.pyx":55
|
|
* cdef Lexeme* word_ptr = LEXEMES[hashed]
|
|
* cdef size_t n
|
|
* if word_ptr == NULL: # <<<<<<<<<<<<<<
|
|
* word_ptr = _add(hashed, string, _find_split(string, length), length)
|
|
* return <Lexeme_addr>word_ptr
|
|
*/
|
|
__pyx_t_2 = ((__pyx_v_word_ptr == NULL) != 0);
|
|
if (__pyx_t_2) {
|
|
|
|
/* "spacy/en.pyx":56
|
|
* cdef size_t n
|
|
* if word_ptr == NULL:
|
|
* word_ptr = _add(hashed, string, _find_split(string, length), length) # <<<<<<<<<<<<<<
|
|
* return <Lexeme_addr>word_ptr
|
|
*
|
|
*/
|
|
__pyx_t_5 = __pyx_f_5spacy_2en__add(__pyx_v_hashed, __pyx_v_string, __pyx_f_5spacy_2en__find_split(__pyx_v_string, __pyx_v_length), __pyx_v_length); if (unlikely(__pyx_t_5 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_word_ptr = __pyx_t_5;
|
|
goto __pyx_L4;
|
|
}
|
|
__pyx_L4:;
|
|
|
|
/* "spacy/en.pyx":57
|
|
* if word_ptr == NULL:
|
|
* word_ptr = _add(hashed, string, _find_split(string, length), length)
|
|
* return <Lexeme_addr>word_ptr # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
__pyx_r = ((__pyx_t_5spacy_2en_Lexeme_addr)__pyx_v_word_ptr);
|
|
goto __pyx_L0;
|
|
|
|
/* "spacy/en.pyx":41
|
|
* load_tokenization(util.read_tokenization('en'))
|
|
*
|
|
* cpdef Lexeme_addr lookup(unicode string) except 0: # <<<<<<<<<<<<<<
|
|
* '''.. function:: enumerate(sequence[, start=0])
|
|
* Fetch a Lexeme representing a word string. If the word has not been seen,
|
|
*/
|
|
|
|
/* function exit code */
|
|
__pyx_L1_error:;
|
|
__Pyx_AddTraceback("spacy.en.lookup", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__pyx_r = 0;
|
|
__pyx_L0:;
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
/* Python wrapper */
|
|
static PyObject *__pyx_pw_5spacy_2en_3lookup(PyObject *__pyx_self, PyObject *__pyx_v_string); /*proto*/
|
|
static char __pyx_doc_5spacy_2en_2lookup[] = ".. function:: enumerate(sequence[, start=0])\n Fetch a Lexeme representing a word string. If the word has not been seen,\n construct one, splitting off any attached punctuation or clitics. A\n reference to BLANK_WORD is returned for the empty string.\n \n To specify the boundaries of the word if it has not been seen, use lookup_chunk.\n ";
|
|
static PyObject *__pyx_pw_5spacy_2en_3lookup(PyObject *__pyx_self, PyObject *__pyx_v_string) {
|
|
CYTHON_UNUSED int __pyx_lineno = 0;
|
|
CYTHON_UNUSED const char *__pyx_filename = NULL;
|
|
CYTHON_UNUSED int __pyx_clineno = 0;
|
|
PyObject *__pyx_r = 0;
|
|
__Pyx_RefNannyDeclarations
|
|
__Pyx_RefNannySetupContext("lookup (wrapper)", 0);
|
|
if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyUnicode_Type), 1, "string", 1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_r = __pyx_pf_5spacy_2en_2lookup(__pyx_self, ((PyObject*)__pyx_v_string));
|
|
|
|
/* function exit code */
|
|
goto __pyx_L0;
|
|
__pyx_L1_error:;
|
|
__pyx_r = NULL;
|
|
__pyx_L0:;
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
static PyObject *__pyx_pf_5spacy_2en_2lookup(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string) {
|
|
PyObject *__pyx_r = NULL;
|
|
__Pyx_RefNannyDeclarations
|
|
__pyx_t_5spacy_2en_Lexeme_addr __pyx_t_1;
|
|
PyObject *__pyx_t_2 = NULL;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("lookup", 0);
|
|
__Pyx_XDECREF(__pyx_r);
|
|
__pyx_t_1 = __pyx_f_5spacy_2en_lookup(__pyx_v_string, 0); if (unlikely(__pyx_t_1 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_2 = __Pyx_PyInt_FromSize_t(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_2);
|
|
__pyx_r = __pyx_t_2;
|
|
__pyx_t_2 = 0;
|
|
goto __pyx_L0;
|
|
|
|
/* function exit code */
|
|
__pyx_L1_error:;
|
|
__Pyx_XDECREF(__pyx_t_2);
|
|
__Pyx_AddTraceback("spacy.en.lookup", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__pyx_r = NULL;
|
|
__pyx_L0:;
|
|
__Pyx_XGIVEREF(__pyx_r);
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
/* "spacy/en.pyx":60
|
|
*
|
|
*
|
|
* cpdef Lexeme_addr lookup_chunk(unicode string, int start, int end) except 0: # <<<<<<<<<<<<<<
|
|
* '''Fetch a Lexeme representing a word string. If the word has not been seen,
|
|
* construct one, given the specified start and end indices. A negative index
|
|
*/
|
|
|
|
static PyObject *__pyx_pw_5spacy_2en_5lookup_chunk(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
|
|
static __pyx_t_5spacy_2en_Lexeme_addr __pyx_f_5spacy_2en_lookup_chunk(PyObject *__pyx_v_string, int __pyx_v_start, CYTHON_UNUSED int __pyx_v_end, CYTHON_UNUSED int __pyx_skip_dispatch) {
|
|
size_t __pyx_v_length;
|
|
__pyx_t_5spacy_6lexeme_StringHash __pyx_v_hashed;
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_chunk_ptr;
|
|
__pyx_t_5spacy_2en_Lexeme_addr __pyx_r;
|
|
__Pyx_RefNannyDeclarations
|
|
int __pyx_t_1;
|
|
int __pyx_t_2;
|
|
Py_ssize_t __pyx_t_3;
|
|
__pyx_t_5spacy_6lexeme_StringHash __pyx_t_4;
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_t_5;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("lookup_chunk", 0);
|
|
|
|
/* "spacy/en.pyx":68
|
|
* A reference to BLANK_WORD is returned for the empty string.
|
|
* '''
|
|
* if string == '': # <<<<<<<<<<<<<<
|
|
* return <Lexeme_addr>&BLANK_WORD
|
|
* cdef size_t length = len(string)
|
|
*/
|
|
__pyx_t_1 = (__Pyx_PyUnicode_Equals(__pyx_v_string, __pyx_kp_u_, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_2 = (__pyx_t_1 != 0);
|
|
if (__pyx_t_2) {
|
|
|
|
/* "spacy/en.pyx":69
|
|
* '''
|
|
* if string == '':
|
|
* return <Lexeme_addr>&BLANK_WORD # <<<<<<<<<<<<<<
|
|
* cdef size_t length = len(string)
|
|
* cdef StringHash hashed = hash_string(string, length)
|
|
*/
|
|
__pyx_r = ((__pyx_t_5spacy_2en_Lexeme_addr)(&__pyx_v_5spacy_2en_BLANK_WORD));
|
|
goto __pyx_L0;
|
|
}
|
|
|
|
/* "spacy/en.pyx":70
|
|
* if string == '':
|
|
* return <Lexeme_addr>&BLANK_WORD
|
|
* cdef size_t length = len(string) # <<<<<<<<<<<<<<
|
|
* cdef StringHash hashed = hash_string(string, length)
|
|
* cdef Lexeme* chunk_ptr = LEXEMES[hashed]
|
|
*/
|
|
if (unlikely(__pyx_v_string == Py_None)) {
|
|
PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
__pyx_t_3 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_string); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_length = __pyx_t_3;
|
|
|
|
/* "spacy/en.pyx":71
|
|
* return <Lexeme_addr>&BLANK_WORD
|
|
* cdef size_t length = len(string)
|
|
* cdef StringHash hashed = hash_string(string, length) # <<<<<<<<<<<<<<
|
|
* cdef Lexeme* chunk_ptr = LEXEMES[hashed]
|
|
* if chunk_ptr == NULL:
|
|
*/
|
|
__pyx_t_4 = __pyx_f_5spacy_2en_hash_string(__pyx_v_string, __pyx_v_length); if (unlikely(__pyx_t_4 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_hashed = __pyx_t_4;
|
|
|
|
/* "spacy/en.pyx":72
|
|
* cdef size_t length = len(string)
|
|
* cdef StringHash hashed = hash_string(string, length)
|
|
* cdef Lexeme* chunk_ptr = LEXEMES[hashed] # <<<<<<<<<<<<<<
|
|
* if chunk_ptr == NULL:
|
|
* chunk_ptr = _add(hashed, string, start, length)
|
|
*/
|
|
__pyx_v_chunk_ptr = (__pyx_v_5spacy_2en_LEXEMES[__pyx_v_hashed]);
|
|
|
|
/* "spacy/en.pyx":73
|
|
* cdef StringHash hashed = hash_string(string, length)
|
|
* cdef Lexeme* chunk_ptr = LEXEMES[hashed]
|
|
* if chunk_ptr == NULL: # <<<<<<<<<<<<<<
|
|
* chunk_ptr = _add(hashed, string, start, length)
|
|
* return <Lexeme_addr>chunk_ptr
|
|
*/
|
|
__pyx_t_2 = ((__pyx_v_chunk_ptr == NULL) != 0);
|
|
if (__pyx_t_2) {
|
|
|
|
/* "spacy/en.pyx":74
|
|
* cdef Lexeme* chunk_ptr = LEXEMES[hashed]
|
|
* if chunk_ptr == NULL:
|
|
* chunk_ptr = _add(hashed, string, start, length) # <<<<<<<<<<<<<<
|
|
* return <Lexeme_addr>chunk_ptr
|
|
*
|
|
*/
|
|
__pyx_t_5 = __pyx_f_5spacy_2en__add(__pyx_v_hashed, __pyx_v_string, __pyx_v_start, __pyx_v_length); if (unlikely(__pyx_t_5 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_chunk_ptr = __pyx_t_5;
|
|
goto __pyx_L4;
|
|
}
|
|
__pyx_L4:;
|
|
|
|
/* "spacy/en.pyx":75
|
|
* if chunk_ptr == NULL:
|
|
* chunk_ptr = _add(hashed, string, start, length)
|
|
* return <Lexeme_addr>chunk_ptr # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
__pyx_r = ((__pyx_t_5spacy_2en_Lexeme_addr)__pyx_v_chunk_ptr);
|
|
goto __pyx_L0;
|
|
|
|
/* "spacy/en.pyx":60
|
|
*
|
|
*
|
|
* cpdef Lexeme_addr lookup_chunk(unicode string, int start, int end) except 0: # <<<<<<<<<<<<<<
|
|
* '''Fetch a Lexeme representing a word string. If the word has not been seen,
|
|
* construct one, given the specified start and end indices. A negative index
|
|
*/
|
|
|
|
/* function exit code */
|
|
__pyx_L1_error:;
|
|
__Pyx_AddTraceback("spacy.en.lookup_chunk", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__pyx_r = 0;
|
|
__pyx_L0:;
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
/* Python wrapper */
|
|
static PyObject *__pyx_pw_5spacy_2en_5lookup_chunk(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
|
|
static char __pyx_doc_5spacy_2en_4lookup_chunk[] = "Fetch a Lexeme representing a word string. If the word has not been seen,\n construct one, given the specified start and end indices. A negative index\n significes 0 for start, and the string length for end --- i.e. the string\n will not be sliced if start == -1 and end == -1.\n \n A reference to BLANK_WORD is returned for the empty string.\n ";
|
|
static PyObject *__pyx_pw_5spacy_2en_5lookup_chunk(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
|
|
PyObject *__pyx_v_string = 0;
|
|
int __pyx_v_start;
|
|
int __pyx_v_end;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
PyObject *__pyx_r = 0;
|
|
__Pyx_RefNannyDeclarations
|
|
__Pyx_RefNannySetupContext("lookup_chunk (wrapper)", 0);
|
|
{
|
|
static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_string,&__pyx_n_s_start,&__pyx_n_s_end,0};
|
|
PyObject* values[3] = {0,0,0};
|
|
if (unlikely(__pyx_kwds)) {
|
|
Py_ssize_t kw_args;
|
|
const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
|
|
switch (pos_args) {
|
|
case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
|
|
case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
|
|
case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
|
|
case 0: break;
|
|
default: goto __pyx_L5_argtuple_error;
|
|
}
|
|
kw_args = PyDict_Size(__pyx_kwds);
|
|
switch (pos_args) {
|
|
case 0:
|
|
if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_string)) != 0)) kw_args--;
|
|
else goto __pyx_L5_argtuple_error;
|
|
case 1:
|
|
if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_start)) != 0)) kw_args--;
|
|
else {
|
|
__Pyx_RaiseArgtupleInvalid("lookup_chunk", 1, 3, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
}
|
|
case 2:
|
|
if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_end)) != 0)) kw_args--;
|
|
else {
|
|
__Pyx_RaiseArgtupleInvalid("lookup_chunk", 1, 3, 3, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
}
|
|
}
|
|
if (unlikely(kw_args > 0)) {
|
|
if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "lookup_chunk") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
}
|
|
} else if (PyTuple_GET_SIZE(__pyx_args) != 3) {
|
|
goto __pyx_L5_argtuple_error;
|
|
} else {
|
|
values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
|
|
values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
|
|
values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
|
|
}
|
|
__pyx_v_string = ((PyObject*)values[0]);
|
|
__pyx_v_start = __Pyx_PyInt_As_int(values[1]); if (unlikely((__pyx_v_start == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
__pyx_v_end = __Pyx_PyInt_As_int(values[2]); if (unlikely((__pyx_v_end == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
}
|
|
goto __pyx_L4_argument_unpacking_done;
|
|
__pyx_L5_argtuple_error:;
|
|
__Pyx_RaiseArgtupleInvalid("lookup_chunk", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
__pyx_L3_error:;
|
|
__Pyx_AddTraceback("spacy.en.lookup_chunk", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__Pyx_RefNannyFinishContext();
|
|
return NULL;
|
|
__pyx_L4_argument_unpacking_done:;
|
|
if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyUnicode_Type), 1, "string", 1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_r = __pyx_pf_5spacy_2en_4lookup_chunk(__pyx_self, __pyx_v_string, __pyx_v_start, __pyx_v_end);
|
|
|
|
/* function exit code */
|
|
goto __pyx_L0;
|
|
__pyx_L1_error:;
|
|
__pyx_r = NULL;
|
|
__pyx_L0:;
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
static PyObject *__pyx_pf_5spacy_2en_4lookup_chunk(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string, int __pyx_v_start, int __pyx_v_end) {
|
|
PyObject *__pyx_r = NULL;
|
|
__Pyx_RefNannyDeclarations
|
|
__pyx_t_5spacy_2en_Lexeme_addr __pyx_t_1;
|
|
PyObject *__pyx_t_2 = NULL;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("lookup_chunk", 0);
|
|
__Pyx_XDECREF(__pyx_r);
|
|
__pyx_t_1 = __pyx_f_5spacy_2en_lookup_chunk(__pyx_v_string, __pyx_v_start, __pyx_v_end, 0); if (unlikely(__pyx_t_1 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_2 = __Pyx_PyInt_FromSize_t(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_2);
|
|
__pyx_r = __pyx_t_2;
|
|
__pyx_t_2 = 0;
|
|
goto __pyx_L0;
|
|
|
|
/* function exit code */
|
|
__pyx_L1_error:;
|
|
__Pyx_XDECREF(__pyx_t_2);
|
|
__Pyx_AddTraceback("spacy.en.lookup_chunk", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__pyx_r = NULL;
|
|
__pyx_L0:;
|
|
__Pyx_XGIVEREF(__pyx_r);
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
/* "spacy/en.pyx":78
|
|
*
|
|
*
|
|
* cdef StringHash hash_string(unicode s, size_t length) except 0: # <<<<<<<<<<<<<<
|
|
* '''Hash unicode with MurmurHash64A'''
|
|
* assert length
|
|
*/
|
|
|
|
static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_2en_hash_string(PyObject *__pyx_v_s, size_t __pyx_v_length) {
|
|
__pyx_t_5spacy_6lexeme_StringHash __pyx_r;
|
|
__Pyx_RefNannyDeclarations
|
|
__pyx_t_5spacy_2en_string_ptr __pyx_t_1;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("hash_string", 0);
|
|
|
|
/* "spacy/en.pyx":80
|
|
* cdef StringHash hash_string(unicode s, size_t length) except 0:
|
|
* '''Hash unicode with MurmurHash64A'''
|
|
* assert length # <<<<<<<<<<<<<<
|
|
* return MurmurHash64A(<string_ptr>s, length * sizeof(Py_UNICODE), 0)
|
|
*
|
|
*/
|
|
#ifndef CYTHON_WITHOUT_ASSERTIONS
|
|
if (unlikely(!Py_OptimizeFlag)) {
|
|
if (unlikely(!(__pyx_v_length != 0))) {
|
|
PyErr_SetNone(PyExc_AssertionError);
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 80; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* "spacy/en.pyx":81
|
|
* '''Hash unicode with MurmurHash64A'''
|
|
* assert length
|
|
* return MurmurHash64A(<string_ptr>s, length * sizeof(Py_UNICODE), 0) # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
__pyx_t_1 = __Pyx_PyUnicode_AsUnicode(__pyx_v_s); if (unlikely((!__pyx_t_1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_r = MurmurHash64A(((__pyx_t_5spacy_2en_string_ptr)__pyx_t_1), (__pyx_v_length * (sizeof(Py_UNICODE))), 0);
|
|
goto __pyx_L0;
|
|
|
|
/* "spacy/en.pyx":78
|
|
*
|
|
*
|
|
* cdef StringHash hash_string(unicode s, size_t length) except 0: # <<<<<<<<<<<<<<
|
|
* '''Hash unicode with MurmurHash64A'''
|
|
* assert length
|
|
*/
|
|
|
|
/* function exit code */
|
|
__pyx_L1_error:;
|
|
__Pyx_AddTraceback("spacy.en.hash_string", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__pyx_r = 0;
|
|
__pyx_L0:;
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
/* "spacy/en.pyx":84
|
|
*
|
|
*
|
|
* cpdef unicode unhash(StringHash hash_value): # <<<<<<<<<<<<<<
|
|
* '''Fetch a string from the reverse index, given its hash value.'''
|
|
* cdef string_ptr string = STRINGS[hash_value]
|
|
*/
|
|
|
|
static PyObject *__pyx_pw_5spacy_2en_7unhash(PyObject *__pyx_self, PyObject *__pyx_arg_hash_value); /*proto*/
|
|
static PyObject *__pyx_f_5spacy_2en_unhash(__pyx_t_5spacy_6lexeme_StringHash __pyx_v_hash_value, CYTHON_UNUSED int __pyx_skip_dispatch) {
|
|
__pyx_t_5spacy_2en_string_ptr __pyx_v_string;
|
|
PyObject *__pyx_r = NULL;
|
|
__Pyx_RefNannyDeclarations
|
|
PyObject *__pyx_t_1 = NULL;
|
|
PyObject *__pyx_t_2 = NULL;
|
|
__pyx_t_5spacy_2en_string_ptr __pyx_t_3;
|
|
int __pyx_t_4;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("unhash", 0);
|
|
|
|
/* "spacy/en.pyx":86
|
|
* cpdef unicode unhash(StringHash hash_value):
|
|
* '''Fetch a string from the reverse index, given its hash value.'''
|
|
* cdef string_ptr string = STRINGS[hash_value] # <<<<<<<<<<<<<<
|
|
* if string == NULL:
|
|
* raise ValueError(hash_value)
|
|
*/
|
|
__pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_STRINGS); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_1);
|
|
__pyx_t_2 = __Pyx_GetItemInt(__pyx_t_1, __pyx_v_hash_value, __pyx_t_5spacy_6lexeme_StringHash, 0, __Pyx_PyInt_From_uint64_t, 0, 0, 1); if (unlikely(__pyx_t_2 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
|
|
__Pyx_GOTREF(__pyx_t_2);
|
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
|
__pyx_t_3 = __Pyx_PyUnicode_AsUnicode(__pyx_t_2); if (unlikely((!__pyx_t_3) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
|
|
__pyx_v_string = __pyx_t_3;
|
|
|
|
/* "spacy/en.pyx":87
|
|
* '''Fetch a string from the reverse index, given its hash value.'''
|
|
* cdef string_ptr string = STRINGS[hash_value]
|
|
* if string == NULL: # <<<<<<<<<<<<<<
|
|
* raise ValueError(hash_value)
|
|
*
|
|
*/
|
|
__pyx_t_4 = ((__pyx_v_string == NULL) != 0);
|
|
if (__pyx_t_4) {
|
|
|
|
/* "spacy/en.pyx":88
|
|
* cdef string_ptr string = STRINGS[hash_value]
|
|
* if string == NULL:
|
|
* raise ValueError(hash_value) # <<<<<<<<<<<<<<
|
|
*
|
|
* return string
|
|
*/
|
|
__pyx_t_2 = __Pyx_PyInt_From_uint64_t(__pyx_v_hash_value); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_2);
|
|
__pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_1);
|
|
PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_2);
|
|
__Pyx_GIVEREF(__pyx_t_2);
|
|
__pyx_t_2 = 0;
|
|
__pyx_t_2 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_1, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_2);
|
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
|
__Pyx_Raise(__pyx_t_2, 0, 0, 0);
|
|
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
|
|
/* "spacy/en.pyx":90
|
|
* raise ValueError(hash_value)
|
|
*
|
|
* return string # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
__Pyx_XDECREF(__pyx_r);
|
|
__pyx_t_2 = __Pyx_PyUnicode_FromUnicode(__pyx_v_string); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_2);
|
|
if (!(likely(PyUnicode_CheckExact(__pyx_t_2))||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "unicode", Py_TYPE(__pyx_t_2)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_r = ((PyObject*)__pyx_t_2);
|
|
__pyx_t_2 = 0;
|
|
goto __pyx_L0;
|
|
|
|
/* "spacy/en.pyx":84
|
|
*
|
|
*
|
|
* cpdef unicode unhash(StringHash hash_value): # <<<<<<<<<<<<<<
|
|
* '''Fetch a string from the reverse index, given its hash value.'''
|
|
* cdef string_ptr string = STRINGS[hash_value]
|
|
*/
|
|
|
|
/* function exit code */
|
|
__pyx_L1_error:;
|
|
__Pyx_XDECREF(__pyx_t_1);
|
|
__Pyx_XDECREF(__pyx_t_2);
|
|
__Pyx_AddTraceback("spacy.en.unhash", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__pyx_r = 0;
|
|
__pyx_L0:;
|
|
__Pyx_XGIVEREF(__pyx_r);
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
/* Python wrapper */
|
|
static PyObject *__pyx_pw_5spacy_2en_7unhash(PyObject *__pyx_self, PyObject *__pyx_arg_hash_value); /*proto*/
|
|
static char __pyx_doc_5spacy_2en_6unhash[] = "Fetch a string from the reverse index, given its hash value.";
|
|
static PyObject *__pyx_pw_5spacy_2en_7unhash(PyObject *__pyx_self, PyObject *__pyx_arg_hash_value) {
|
|
__pyx_t_5spacy_6lexeme_StringHash __pyx_v_hash_value;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
PyObject *__pyx_r = 0;
|
|
__Pyx_RefNannyDeclarations
|
|
__Pyx_RefNannySetupContext("unhash (wrapper)", 0);
|
|
assert(__pyx_arg_hash_value); {
|
|
__pyx_v_hash_value = __Pyx_PyInt_As_uint64_t(__pyx_arg_hash_value); if (unlikely((__pyx_v_hash_value == (uint64_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
}
|
|
goto __pyx_L4_argument_unpacking_done;
|
|
__pyx_L3_error:;
|
|
__Pyx_AddTraceback("spacy.en.unhash", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__Pyx_RefNannyFinishContext();
|
|
return NULL;
|
|
__pyx_L4_argument_unpacking_done:;
|
|
__pyx_r = __pyx_pf_5spacy_2en_6unhash(__pyx_self, ((__pyx_t_5spacy_6lexeme_StringHash)__pyx_v_hash_value));
|
|
|
|
/* function exit code */
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
static PyObject *__pyx_pf_5spacy_2en_6unhash(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_StringHash __pyx_v_hash_value) {
|
|
PyObject *__pyx_r = NULL;
|
|
__Pyx_RefNannyDeclarations
|
|
PyObject *__pyx_t_1 = NULL;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("unhash", 0);
|
|
__Pyx_XDECREF(__pyx_r);
|
|
__pyx_t_1 = __pyx_f_5spacy_2en_unhash(__pyx_v_hash_value, 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_1);
|
|
__pyx_r = __pyx_t_1;
|
|
__pyx_t_1 = 0;
|
|
goto __pyx_L0;
|
|
|
|
/* function exit code */
|
|
__pyx_L1_error:;
|
|
__Pyx_XDECREF(__pyx_t_1);
|
|
__Pyx_AddTraceback("spacy.en.unhash", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__pyx_r = NULL;
|
|
__pyx_L0:;
|
|
__Pyx_XGIVEREF(__pyx_r);
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
/* "spacy/en.pyx":93
|
|
*
|
|
*
|
|
* cdef unicode normalize_word_string(unicode word): # <<<<<<<<<<<<<<
|
|
* '''Return a normalized version of the word, mapping:
|
|
* - 4 digit strings into !YEAR
|
|
*/
|
|
|
|
static PyObject *__pyx_f_5spacy_2en_normalize_word_string(PyObject *__pyx_v_word) {
|
|
PyObject *__pyx_r = NULL;
|
|
__Pyx_RefNannyDeclarations
|
|
PyObject *__pyx_t_1 = NULL;
|
|
PyObject *__pyx_t_2 = NULL;
|
|
int __pyx_t_3;
|
|
Py_ssize_t __pyx_t_4;
|
|
int __pyx_t_5;
|
|
int __pyx_t_6;
|
|
Py_UCS4 __pyx_t_7;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("normalize_word_string", 0);
|
|
|
|
/* "spacy/en.pyx":100
|
|
* '''
|
|
* cdef unicode s
|
|
* if word.isdigit() and len(word) == 4: # <<<<<<<<<<<<<<
|
|
* return '!YEAR'
|
|
* elif word[0].isdigit():
|
|
*/
|
|
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_isdigit); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_1);
|
|
__pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_2);
|
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
|
__pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_3 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
|
|
if (__pyx_t_3) {
|
|
if (unlikely(__pyx_v_word == Py_None)) {
|
|
PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
__pyx_t_4 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_word); if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_5 = (__pyx_t_4 == 4);
|
|
__pyx_t_6 = __pyx_t_5;
|
|
} else {
|
|
__pyx_t_6 = __pyx_t_3;
|
|
}
|
|
if (__pyx_t_6) {
|
|
|
|
/* "spacy/en.pyx":101
|
|
* cdef unicode s
|
|
* if word.isdigit() and len(word) == 4:
|
|
* return '!YEAR' # <<<<<<<<<<<<<<
|
|
* elif word[0].isdigit():
|
|
* return '!DIGITS'
|
|
*/
|
|
__Pyx_XDECREF(__pyx_r);
|
|
__Pyx_INCREF(__pyx_kp_u_YEAR);
|
|
__pyx_r = __pyx_kp_u_YEAR;
|
|
goto __pyx_L0;
|
|
}
|
|
|
|
/* "spacy/en.pyx":102
|
|
* if word.isdigit() and len(word) == 4:
|
|
* return '!YEAR'
|
|
* elif word[0].isdigit(): # <<<<<<<<<<<<<<
|
|
* return '!DIGITS'
|
|
* else:
|
|
*/
|
|
__pyx_t_7 = __Pyx_GetItemInt_Unicode(__pyx_v_word, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(__pyx_t_7 == (Py_UCS4)-1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
|
|
__pyx_t_6 = Py_UNICODE_ISDIGIT(__pyx_t_7);
|
|
if ((__pyx_t_6 != 0)) {
|
|
|
|
/* "spacy/en.pyx":103
|
|
* return '!YEAR'
|
|
* elif word[0].isdigit():
|
|
* return '!DIGITS' # <<<<<<<<<<<<<<
|
|
* else:
|
|
* return word.lower()
|
|
*/
|
|
__Pyx_XDECREF(__pyx_r);
|
|
__Pyx_INCREF(__pyx_kp_u_DIGITS);
|
|
__pyx_r = __pyx_kp_u_DIGITS;
|
|
goto __pyx_L0;
|
|
}
|
|
/*else*/ {
|
|
|
|
/* "spacy/en.pyx":105
|
|
* return '!DIGITS'
|
|
* else:
|
|
* return word.lower() # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
__Pyx_XDECREF(__pyx_r);
|
|
__pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_lower); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_2);
|
|
__pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_1);
|
|
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
|
|
if (!(likely(PyUnicode_CheckExact(__pyx_t_1))||((__pyx_t_1) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "unicode", Py_TYPE(__pyx_t_1)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_r = ((PyObject*)__pyx_t_1);
|
|
__pyx_t_1 = 0;
|
|
goto __pyx_L0;
|
|
}
|
|
|
|
/* "spacy/en.pyx":93
|
|
*
|
|
*
|
|
* cdef unicode normalize_word_string(unicode word): # <<<<<<<<<<<<<<
|
|
* '''Return a normalized version of the word, mapping:
|
|
* - 4 digit strings into !YEAR
|
|
*/
|
|
|
|
/* function exit code */
|
|
__pyx_L1_error:;
|
|
__Pyx_XDECREF(__pyx_t_1);
|
|
__Pyx_XDECREF(__pyx_t_2);
|
|
__Pyx_AddTraceback("spacy.en.normalize_word_string", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__pyx_r = 0;
|
|
__pyx_L0:;
|
|
__Pyx_XGIVEREF(__pyx_r);
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
/* "spacy/en.pyx":108
|
|
*
|
|
*
|
|
* cpdef unicode _substr(unicode string, int start, int end, size_t length): # <<<<<<<<<<<<<<
|
|
* if end >= length:
|
|
* end = -1
|
|
*/
|
|
|
|
static PyObject *__pyx_pw_5spacy_2en_9_substr(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
|
|
static PyObject *__pyx_f_5spacy_2en__substr(PyObject *__pyx_v_string, int __pyx_v_start, int __pyx_v_end, size_t __pyx_v_length, CYTHON_UNUSED int __pyx_skip_dispatch) {
|
|
PyObject *__pyx_r = NULL;
|
|
__Pyx_RefNannyDeclarations
|
|
int __pyx_t_1;
|
|
int __pyx_t_2;
|
|
int __pyx_t_3;
|
|
PyObject *__pyx_t_4 = NULL;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("_substr", 0);
|
|
|
|
/* "spacy/en.pyx":109
|
|
*
|
|
* cpdef unicode _substr(unicode string, int start, int end, size_t length):
|
|
* if end >= length: # <<<<<<<<<<<<<<
|
|
* end = -1
|
|
* if start >= length:
|
|
*/
|
|
__pyx_t_1 = ((__pyx_v_end >= __pyx_v_length) != 0);
|
|
if (__pyx_t_1) {
|
|
|
|
/* "spacy/en.pyx":110
|
|
* cpdef unicode _substr(unicode string, int start, int end, size_t length):
|
|
* if end >= length:
|
|
* end = -1 # <<<<<<<<<<<<<<
|
|
* if start >= length:
|
|
* start = 0
|
|
*/
|
|
__pyx_v_end = -1;
|
|
goto __pyx_L3;
|
|
}
|
|
__pyx_L3:;
|
|
|
|
/* "spacy/en.pyx":111
|
|
* if end >= length:
|
|
* end = -1
|
|
* if start >= length: # <<<<<<<<<<<<<<
|
|
* start = 0
|
|
* if start <= 0 and end < 0:
|
|
*/
|
|
__pyx_t_1 = ((__pyx_v_start >= __pyx_v_length) != 0);
|
|
if (__pyx_t_1) {
|
|
|
|
/* "spacy/en.pyx":112
|
|
* end = -1
|
|
* if start >= length:
|
|
* start = 0 # <<<<<<<<<<<<<<
|
|
* if start <= 0 and end < 0:
|
|
* return string
|
|
*/
|
|
__pyx_v_start = 0;
|
|
goto __pyx_L4;
|
|
}
|
|
__pyx_L4:;
|
|
|
|
/* "spacy/en.pyx":113
|
|
* if start >= length:
|
|
* start = 0
|
|
* if start <= 0 and end < 0: # <<<<<<<<<<<<<<
|
|
* return string
|
|
* elif start < 0:
|
|
*/
|
|
__pyx_t_1 = ((__pyx_v_start <= 0) != 0);
|
|
if (__pyx_t_1) {
|
|
__pyx_t_2 = ((__pyx_v_end < 0) != 0);
|
|
__pyx_t_3 = __pyx_t_2;
|
|
} else {
|
|
__pyx_t_3 = __pyx_t_1;
|
|
}
|
|
if (__pyx_t_3) {
|
|
|
|
/* "spacy/en.pyx":114
|
|
* start = 0
|
|
* if start <= 0 and end < 0:
|
|
* return string # <<<<<<<<<<<<<<
|
|
* elif start < 0:
|
|
* start = 0
|
|
*/
|
|
__Pyx_XDECREF(__pyx_r);
|
|
__Pyx_INCREF(__pyx_v_string);
|
|
__pyx_r = __pyx_v_string;
|
|
goto __pyx_L0;
|
|
}
|
|
|
|
/* "spacy/en.pyx":115
|
|
* if start <= 0 and end < 0:
|
|
* return string
|
|
* elif start < 0: # <<<<<<<<<<<<<<
|
|
* start = 0
|
|
* elif end < 0:
|
|
*/
|
|
__pyx_t_3 = ((__pyx_v_start < 0) != 0);
|
|
if (__pyx_t_3) {
|
|
|
|
/* "spacy/en.pyx":116
|
|
* return string
|
|
* elif start < 0:
|
|
* start = 0 # <<<<<<<<<<<<<<
|
|
* elif end < 0:
|
|
* end = length
|
|
*/
|
|
__pyx_v_start = 0;
|
|
goto __pyx_L5;
|
|
}
|
|
|
|
/* "spacy/en.pyx":117
|
|
* elif start < 0:
|
|
* start = 0
|
|
* elif end < 0: # <<<<<<<<<<<<<<
|
|
* end = length
|
|
* return string[start:end]
|
|
*/
|
|
__pyx_t_3 = ((__pyx_v_end < 0) != 0);
|
|
if (__pyx_t_3) {
|
|
|
|
/* "spacy/en.pyx":118
|
|
* start = 0
|
|
* elif end < 0:
|
|
* end = length # <<<<<<<<<<<<<<
|
|
* return string[start:end]
|
|
*
|
|
*/
|
|
__pyx_v_end = __pyx_v_length;
|
|
goto __pyx_L5;
|
|
}
|
|
__pyx_L5:;
|
|
|
|
/* "spacy/en.pyx":119
|
|
* elif end < 0:
|
|
* end = length
|
|
* return string[start:end] # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
__Pyx_XDECREF(__pyx_r);
|
|
if (unlikely(__pyx_v_string == Py_None)) {
|
|
PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 119; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
__pyx_t_4 = __Pyx_PyUnicode_Substring(__pyx_v_string, __pyx_v_start, __pyx_v_end); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 119; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_4);
|
|
__pyx_r = ((PyObject*)__pyx_t_4);
|
|
__pyx_t_4 = 0;
|
|
goto __pyx_L0;
|
|
|
|
/* "spacy/en.pyx":108
|
|
*
|
|
*
|
|
* cpdef unicode _substr(unicode string, int start, int end, size_t length): # <<<<<<<<<<<<<<
|
|
* if end >= length:
|
|
* end = -1
|
|
*/
|
|
|
|
/* function exit code */
|
|
__pyx_L1_error:;
|
|
__Pyx_XDECREF(__pyx_t_4);
|
|
__Pyx_AddTraceback("spacy.en._substr", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__pyx_r = 0;
|
|
__pyx_L0:;
|
|
__Pyx_XGIVEREF(__pyx_r);
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
/* Python wrapper */
|
|
static PyObject *__pyx_pw_5spacy_2en_9_substr(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
|
|
static PyObject *__pyx_pw_5spacy_2en_9_substr(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
|
|
PyObject *__pyx_v_string = 0;
|
|
int __pyx_v_start;
|
|
int __pyx_v_end;
|
|
size_t __pyx_v_length;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
PyObject *__pyx_r = 0;
|
|
__Pyx_RefNannyDeclarations
|
|
__Pyx_RefNannySetupContext("_substr (wrapper)", 0);
|
|
{
|
|
static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_string,&__pyx_n_s_start,&__pyx_n_s_end,&__pyx_n_s_length,0};
|
|
PyObject* values[4] = {0,0,0,0};
|
|
if (unlikely(__pyx_kwds)) {
|
|
Py_ssize_t kw_args;
|
|
const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
|
|
switch (pos_args) {
|
|
case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
|
|
case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
|
|
case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
|
|
case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
|
|
case 0: break;
|
|
default: goto __pyx_L5_argtuple_error;
|
|
}
|
|
kw_args = PyDict_Size(__pyx_kwds);
|
|
switch (pos_args) {
|
|
case 0:
|
|
if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_string)) != 0)) kw_args--;
|
|
else goto __pyx_L5_argtuple_error;
|
|
case 1:
|
|
if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_start)) != 0)) kw_args--;
|
|
else {
|
|
__Pyx_RaiseArgtupleInvalid("_substr", 1, 4, 4, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
}
|
|
case 2:
|
|
if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_end)) != 0)) kw_args--;
|
|
else {
|
|
__Pyx_RaiseArgtupleInvalid("_substr", 1, 4, 4, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
}
|
|
case 3:
|
|
if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_length)) != 0)) kw_args--;
|
|
else {
|
|
__Pyx_RaiseArgtupleInvalid("_substr", 1, 4, 4, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
}
|
|
}
|
|
if (unlikely(kw_args > 0)) {
|
|
if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "_substr") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
}
|
|
} else if (PyTuple_GET_SIZE(__pyx_args) != 4) {
|
|
goto __pyx_L5_argtuple_error;
|
|
} else {
|
|
values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
|
|
values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
|
|
values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
|
|
values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
|
|
}
|
|
__pyx_v_string = ((PyObject*)values[0]);
|
|
__pyx_v_start = __Pyx_PyInt_As_int(values[1]); if (unlikely((__pyx_v_start == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
__pyx_v_end = __Pyx_PyInt_As_int(values[2]); if (unlikely((__pyx_v_end == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
__pyx_v_length = __Pyx_PyInt_As_size_t(values[3]); if (unlikely((__pyx_v_length == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
}
|
|
goto __pyx_L4_argument_unpacking_done;
|
|
__pyx_L5_argtuple_error:;
|
|
__Pyx_RaiseArgtupleInvalid("_substr", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
|
|
__pyx_L3_error:;
|
|
__Pyx_AddTraceback("spacy.en._substr", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__Pyx_RefNannyFinishContext();
|
|
return NULL;
|
|
__pyx_L4_argument_unpacking_done:;
|
|
if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_string), (&PyUnicode_Type), 1, "string", 1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_r = __pyx_pf_5spacy_2en_8_substr(__pyx_self, __pyx_v_string, __pyx_v_start, __pyx_v_end, __pyx_v_length);
|
|
|
|
/* function exit code */
|
|
goto __pyx_L0;
|
|
__pyx_L1_error:;
|
|
__pyx_r = NULL;
|
|
__pyx_L0:;
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
static PyObject *__pyx_pf_5spacy_2en_8_substr(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_string, int __pyx_v_start, int __pyx_v_end, size_t __pyx_v_length) {
|
|
PyObject *__pyx_r = NULL;
|
|
__Pyx_RefNannyDeclarations
|
|
PyObject *__pyx_t_1 = NULL;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("_substr", 0);
|
|
__Pyx_XDECREF(__pyx_r);
|
|
__pyx_t_1 = __pyx_f_5spacy_2en__substr(__pyx_v_string, __pyx_v_start, __pyx_v_end, __pyx_v_length, 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_1);
|
|
__pyx_r = __pyx_t_1;
|
|
__pyx_t_1 = 0;
|
|
goto __pyx_L0;
|
|
|
|
/* function exit code */
|
|
__pyx_L1_error:;
|
|
__Pyx_XDECREF(__pyx_t_1);
|
|
__Pyx_AddTraceback("spacy.en._substr", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__pyx_r = NULL;
|
|
__pyx_L0:;
|
|
__Pyx_XGIVEREF(__pyx_r);
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
/* "spacy/en.pyx":122
|
|
*
|
|
*
|
|
* cdef Lexeme* _add(StringHash hashed, unicode string, int split, size_t length) except NULL: # <<<<<<<<<<<<<<
|
|
* assert string
|
|
* assert split <= length
|
|
*/
|
|
|
|
static struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_f_5spacy_2en__add(__pyx_t_5spacy_6lexeme_StringHash __pyx_v_hashed, PyObject *__pyx_v_string, int __pyx_v_split, size_t __pyx_v_length) {
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_word;
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_r;
|
|
__Pyx_RefNannyDeclarations
|
|
int __pyx_t_1;
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_t_2;
|
|
PyObject *__pyx_t_3 = NULL;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("_add", 0);
|
|
|
|
/* "spacy/en.pyx":123
|
|
*
|
|
* cdef Lexeme* _add(StringHash hashed, unicode string, int split, size_t length) except NULL:
|
|
* assert string # <<<<<<<<<<<<<<
|
|
* assert split <= length
|
|
* word = _init_lexeme(string, hashed, split, length)
|
|
*/
|
|
#ifndef CYTHON_WITHOUT_ASSERTIONS
|
|
if (unlikely(!Py_OptimizeFlag)) {
|
|
__pyx_t_1 = (__pyx_v_string != Py_None) && (PyUnicode_GET_SIZE(__pyx_v_string) != 0);
|
|
if (unlikely(!__pyx_t_1)) {
|
|
PyErr_SetNone(PyExc_AssertionError);
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* "spacy/en.pyx":124
|
|
* cdef Lexeme* _add(StringHash hashed, unicode string, int split, size_t length) except NULL:
|
|
* assert string
|
|
* assert split <= length # <<<<<<<<<<<<<<
|
|
* word = _init_lexeme(string, hashed, split, length)
|
|
* LEXEMES[hashed] = word
|
|
*/
|
|
#ifndef CYTHON_WITHOUT_ASSERTIONS
|
|
if (unlikely(!Py_OptimizeFlag)) {
|
|
if (unlikely(!((__pyx_v_split <= __pyx_v_length) != 0))) {
|
|
PyErr_SetNone(PyExc_AssertionError);
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* "spacy/en.pyx":125
|
|
* assert string
|
|
* assert split <= length
|
|
* word = _init_lexeme(string, hashed, split, length) # <<<<<<<<<<<<<<
|
|
* LEXEMES[hashed] = word
|
|
* STRINGS[hashed] = string
|
|
*/
|
|
__pyx_t_2 = __pyx_f_5spacy_2en__init_lexeme(__pyx_v_string, __pyx_v_hashed, __pyx_v_split, __pyx_v_length); if (unlikely(__pyx_t_2 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_word = __pyx_t_2;
|
|
|
|
/* "spacy/en.pyx":126
|
|
* assert split <= length
|
|
* word = _init_lexeme(string, hashed, split, length)
|
|
* LEXEMES[hashed] = word # <<<<<<<<<<<<<<
|
|
* STRINGS[hashed] = string
|
|
* return word
|
|
*/
|
|
(__pyx_v_5spacy_2en_LEXEMES[__pyx_v_hashed]) = __pyx_v_word;
|
|
|
|
/* "spacy/en.pyx":127
|
|
* word = _init_lexeme(string, hashed, split, length)
|
|
* LEXEMES[hashed] = word
|
|
* STRINGS[hashed] = string # <<<<<<<<<<<<<<
|
|
* return word
|
|
*
|
|
*/
|
|
__pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_STRINGS); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_3);
|
|
if (unlikely(__Pyx_SetItemInt(__pyx_t_3, __pyx_v_hashed, __pyx_v_string, __pyx_t_5spacy_6lexeme_StringHash, 0, __Pyx_PyInt_From_uint64_t, 0, 0, 1) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
|
|
|
|
/* "spacy/en.pyx":128
|
|
* LEXEMES[hashed] = word
|
|
* STRINGS[hashed] = string
|
|
* return word # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
__pyx_r = __pyx_v_word;
|
|
goto __pyx_L0;
|
|
|
|
/* "spacy/en.pyx":122
|
|
*
|
|
*
|
|
* cdef Lexeme* _add(StringHash hashed, unicode string, int split, size_t length) except NULL: # <<<<<<<<<<<<<<
|
|
* assert string
|
|
* assert split <= length
|
|
*/
|
|
|
|
/* function exit code */
|
|
__pyx_L1_error:;
|
|
__Pyx_XDECREF(__pyx_t_3);
|
|
__Pyx_AddTraceback("spacy.en._add", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__pyx_r = NULL;
|
|
__pyx_L0:;
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
/* "spacy/en.pyx":131
|
|
*
|
|
*
|
|
* cdef Lexeme* _init_lexeme(unicode string, StringHash hashed, # <<<<<<<<<<<<<<
|
|
* int split, size_t length) except NULL:
|
|
* assert split <= length
|
|
*/
|
|
|
|
static struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_f_5spacy_2en__init_lexeme(PyObject *__pyx_v_string, __pyx_t_5spacy_6lexeme_StringHash __pyx_v_hashed, int __pyx_v_split, size_t __pyx_v_length) {
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_word;
|
|
PyObject *__pyx_v_tail_string = 0;
|
|
PyObject *__pyx_v_lex = 0;
|
|
PyObject *__pyx_v_normed = 0;
|
|
PyObject *__pyx_v_last3 = 0;
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_r;
|
|
__Pyx_RefNannyDeclarations
|
|
long __pyx_t_1;
|
|
int __pyx_t_2;
|
|
Py_UCS4 __pyx_t_3;
|
|
int __pyx_t_4;
|
|
int __pyx_t_5;
|
|
PyObject *__pyx_t_6 = NULL;
|
|
Py_ssize_t __pyx_t_7;
|
|
__pyx_t_5spacy_6lexeme_StringHash __pyx_t_8;
|
|
__pyx_t_5spacy_2en_Lexeme_addr __pyx_t_9;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("_init_lexeme", 0);
|
|
|
|
/* "spacy/en.pyx":133
|
|
* cdef Lexeme* _init_lexeme(unicode string, StringHash hashed,
|
|
* int split, size_t length) except NULL:
|
|
* assert split <= length # <<<<<<<<<<<<<<
|
|
* cdef Lexeme* word = <Lexeme*>calloc(1, sizeof(Lexeme))
|
|
*
|
|
*/
|
|
#ifndef CYTHON_WITHOUT_ASSERTIONS
|
|
if (unlikely(!Py_OptimizeFlag)) {
|
|
if (unlikely(!((__pyx_v_split <= __pyx_v_length) != 0))) {
|
|
PyErr_SetNone(PyExc_AssertionError);
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 133; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* "spacy/en.pyx":134
|
|
* int split, size_t length) except NULL:
|
|
* assert split <= length
|
|
* cdef Lexeme* word = <Lexeme*>calloc(1, sizeof(Lexeme)) # <<<<<<<<<<<<<<
|
|
*
|
|
* word.first = <Py_UNICODE>(string[0] if string else 0)
|
|
*/
|
|
__pyx_v_word = ((struct __pyx_t_5spacy_6lexeme_Lexeme *)calloc(1, (sizeof(struct __pyx_t_5spacy_6lexeme_Lexeme))));
|
|
|
|
/* "spacy/en.pyx":136
|
|
* cdef Lexeme* word = <Lexeme*>calloc(1, sizeof(Lexeme))
|
|
*
|
|
* word.first = <Py_UNICODE>(string[0] if string else 0) # <<<<<<<<<<<<<<
|
|
* word.sic = hashed
|
|
*
|
|
*/
|
|
__pyx_t_2 = (__pyx_v_string != Py_None) && (PyUnicode_GET_SIZE(__pyx_v_string) != 0);
|
|
if (__pyx_t_2) {
|
|
__pyx_t_3 = __Pyx_GetItemInt_Unicode(__pyx_v_string, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(__pyx_t_3 == (Py_UCS4)-1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 136; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
|
|
__pyx_t_1 = __pyx_t_3;
|
|
} else {
|
|
__pyx_t_1 = 0;
|
|
}
|
|
__pyx_v_word->first = ((Py_UNICODE)__pyx_t_1);
|
|
|
|
/* "spacy/en.pyx":137
|
|
*
|
|
* word.first = <Py_UNICODE>(string[0] if string else 0)
|
|
* word.sic = hashed # <<<<<<<<<<<<<<
|
|
*
|
|
* cdef unicode tail_string
|
|
*/
|
|
__pyx_v_word->sic = __pyx_v_hashed;
|
|
|
|
/* "spacy/en.pyx":141
|
|
* cdef unicode tail_string
|
|
* cdef unicode lex
|
|
* if split != 0 and split < length: # <<<<<<<<<<<<<<
|
|
* lex = _substr(string, 0, split, length)
|
|
* tail_string = _substr(string, split, length, length)
|
|
*/
|
|
__pyx_t_2 = ((__pyx_v_split != 0) != 0);
|
|
if (__pyx_t_2) {
|
|
__pyx_t_4 = ((__pyx_v_split < __pyx_v_length) != 0);
|
|
__pyx_t_5 = __pyx_t_4;
|
|
} else {
|
|
__pyx_t_5 = __pyx_t_2;
|
|
}
|
|
if (__pyx_t_5) {
|
|
|
|
/* "spacy/en.pyx":142
|
|
* cdef unicode lex
|
|
* if split != 0 and split < length:
|
|
* lex = _substr(string, 0, split, length) # <<<<<<<<<<<<<<
|
|
* tail_string = _substr(string, split, length, length)
|
|
* else:
|
|
*/
|
|
__pyx_t_6 = __pyx_f_5spacy_2en__substr(__pyx_v_string, 0, __pyx_v_split, __pyx_v_length, 0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_6);
|
|
__pyx_v_lex = ((PyObject*)__pyx_t_6);
|
|
__pyx_t_6 = 0;
|
|
|
|
/* "spacy/en.pyx":143
|
|
* if split != 0 and split < length:
|
|
* lex = _substr(string, 0, split, length)
|
|
* tail_string = _substr(string, split, length, length) # <<<<<<<<<<<<<<
|
|
* else:
|
|
* lex = string
|
|
*/
|
|
__pyx_t_6 = __pyx_f_5spacy_2en__substr(__pyx_v_string, __pyx_v_split, __pyx_v_length, __pyx_v_length, 0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 143; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_6);
|
|
__pyx_v_tail_string = ((PyObject*)__pyx_t_6);
|
|
__pyx_t_6 = 0;
|
|
goto __pyx_L3;
|
|
}
|
|
/*else*/ {
|
|
|
|
/* "spacy/en.pyx":145
|
|
* tail_string = _substr(string, split, length, length)
|
|
* else:
|
|
* lex = string # <<<<<<<<<<<<<<
|
|
* tail_string = ''
|
|
* assert lex
|
|
*/
|
|
__Pyx_INCREF(__pyx_v_string);
|
|
__pyx_v_lex = __pyx_v_string;
|
|
|
|
/* "spacy/en.pyx":146
|
|
* else:
|
|
* lex = string
|
|
* tail_string = '' # <<<<<<<<<<<<<<
|
|
* assert lex
|
|
* cdef unicode normed = normalize_word_string(lex)
|
|
*/
|
|
__Pyx_INCREF(__pyx_kp_u_);
|
|
__pyx_v_tail_string = __pyx_kp_u_;
|
|
}
|
|
__pyx_L3:;
|
|
|
|
/* "spacy/en.pyx":147
|
|
* lex = string
|
|
* tail_string = ''
|
|
* assert lex # <<<<<<<<<<<<<<
|
|
* cdef unicode normed = normalize_word_string(lex)
|
|
* cdef unicode last3 = _substr(string, length - 3, length, length)
|
|
*/
|
|
#ifndef CYTHON_WITHOUT_ASSERTIONS
|
|
if (unlikely(!Py_OptimizeFlag)) {
|
|
__pyx_t_5 = (__pyx_v_lex != Py_None) && (PyUnicode_GET_SIZE(__pyx_v_lex) != 0);
|
|
if (unlikely(!__pyx_t_5)) {
|
|
PyErr_SetNone(PyExc_AssertionError);
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* "spacy/en.pyx":148
|
|
* tail_string = ''
|
|
* assert lex
|
|
* cdef unicode normed = normalize_word_string(lex) # <<<<<<<<<<<<<<
|
|
* cdef unicode last3 = _substr(string, length - 3, length, length)
|
|
*
|
|
*/
|
|
__pyx_t_6 = __pyx_f_5spacy_2en_normalize_word_string(__pyx_v_lex); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 148; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_6);
|
|
__pyx_v_normed = ((PyObject*)__pyx_t_6);
|
|
__pyx_t_6 = 0;
|
|
|
|
/* "spacy/en.pyx":149
|
|
* assert lex
|
|
* cdef unicode normed = normalize_word_string(lex)
|
|
* cdef unicode last3 = _substr(string, length - 3, length, length) # <<<<<<<<<<<<<<
|
|
*
|
|
* assert normed
|
|
*/
|
|
__pyx_t_6 = __pyx_f_5spacy_2en__substr(__pyx_v_string, (__pyx_v_length - 3), __pyx_v_length, __pyx_v_length, 0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_6);
|
|
__pyx_v_last3 = ((PyObject*)__pyx_t_6);
|
|
__pyx_t_6 = 0;
|
|
|
|
/* "spacy/en.pyx":151
|
|
* cdef unicode last3 = _substr(string, length - 3, length, length)
|
|
*
|
|
* assert normed # <<<<<<<<<<<<<<
|
|
* assert len(normed)
|
|
*
|
|
*/
|
|
#ifndef CYTHON_WITHOUT_ASSERTIONS
|
|
if (unlikely(!Py_OptimizeFlag)) {
|
|
__pyx_t_5 = (__pyx_v_normed != Py_None) && (PyUnicode_GET_SIZE(__pyx_v_normed) != 0);
|
|
if (unlikely(!__pyx_t_5)) {
|
|
PyErr_SetNone(PyExc_AssertionError);
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 151; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* "spacy/en.pyx":152
|
|
*
|
|
* assert normed
|
|
* assert len(normed) # <<<<<<<<<<<<<<
|
|
*
|
|
* word.lex = hash_string(lex, len(lex))
|
|
*/
|
|
#ifndef CYTHON_WITHOUT_ASSERTIONS
|
|
if (unlikely(!Py_OptimizeFlag)) {
|
|
if (unlikely(__pyx_v_normed == Py_None)) {
|
|
PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
__pyx_t_7 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_normed); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
if (unlikely(!(__pyx_t_7 != 0))) {
|
|
PyErr_SetNone(PyExc_AssertionError);
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* "spacy/en.pyx":154
|
|
* assert len(normed)
|
|
*
|
|
* word.lex = hash_string(lex, len(lex)) # <<<<<<<<<<<<<<
|
|
* word.normed = hash_string(normed, len(normed))
|
|
* word.last3 = hash_string(last3, len(last3))
|
|
*/
|
|
if (unlikely(__pyx_v_lex == Py_None)) {
|
|
PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
__pyx_t_7 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_lex); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_8 = __pyx_f_5spacy_2en_hash_string(__pyx_v_lex, __pyx_t_7); if (unlikely(__pyx_t_8 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 154; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_word->lex = __pyx_t_8;
|
|
|
|
/* "spacy/en.pyx":155
|
|
*
|
|
* word.lex = hash_string(lex, len(lex))
|
|
* word.normed = hash_string(normed, len(normed)) # <<<<<<<<<<<<<<
|
|
* word.last3 = hash_string(last3, len(last3))
|
|
*
|
|
*/
|
|
if (unlikely(__pyx_v_normed == Py_None)) {
|
|
PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 155; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
__pyx_t_7 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_normed); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 155; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_8 = __pyx_f_5spacy_2en_hash_string(__pyx_v_normed, __pyx_t_7); if (unlikely(__pyx_t_8 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 155; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_word->normed = __pyx_t_8;
|
|
|
|
/* "spacy/en.pyx":156
|
|
* word.lex = hash_string(lex, len(lex))
|
|
* word.normed = hash_string(normed, len(normed))
|
|
* word.last3 = hash_string(last3, len(last3)) # <<<<<<<<<<<<<<
|
|
*
|
|
* STRINGS[word.lex] = lex
|
|
*/
|
|
if (unlikely(__pyx_v_last3 == Py_None)) {
|
|
PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 156; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
__pyx_t_7 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_last3); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 156; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_t_8 = __pyx_f_5spacy_2en_hash_string(__pyx_v_last3, __pyx_t_7); if (unlikely(__pyx_t_8 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 156; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_word->last3 = __pyx_t_8;
|
|
|
|
/* "spacy/en.pyx":158
|
|
* word.last3 = hash_string(last3, len(last3))
|
|
*
|
|
* STRINGS[word.lex] = lex # <<<<<<<<<<<<<<
|
|
* STRINGS[word.normed] = normed
|
|
* STRINGS[word.last3] = last3
|
|
*/
|
|
__pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_STRINGS); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_6);
|
|
if (unlikely(__Pyx_SetItemInt(__pyx_t_6, __pyx_v_word->lex, __pyx_v_lex, __pyx_t_5spacy_6lexeme_StringHash, 0, __Pyx_PyInt_From_uint64_t, 0, 0, 1) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
|
|
|
|
/* "spacy/en.pyx":159
|
|
*
|
|
* STRINGS[word.lex] = lex
|
|
* STRINGS[word.normed] = normed # <<<<<<<<<<<<<<
|
|
* STRINGS[word.last3] = last3
|
|
*
|
|
*/
|
|
__pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_STRINGS); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_6);
|
|
if (unlikely(__Pyx_SetItemInt(__pyx_t_6, __pyx_v_word->normed, __pyx_v_normed, __pyx_t_5spacy_6lexeme_StringHash, 0, __Pyx_PyInt_From_uint64_t, 0, 0, 1) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
|
|
|
|
/* "spacy/en.pyx":160
|
|
* STRINGS[word.lex] = lex
|
|
* STRINGS[word.normed] = normed
|
|
* STRINGS[word.last3] = last3 # <<<<<<<<<<<<<<
|
|
*
|
|
* # These are loaded later
|
|
*/
|
|
__pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_STRINGS); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 160; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_6);
|
|
if (unlikely(__Pyx_SetItemInt(__pyx_t_6, __pyx_v_word->last3, __pyx_v_last3, __pyx_t_5spacy_6lexeme_StringHash, 0, __Pyx_PyInt_From_uint64_t, 0, 0, 1) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 160; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
|
|
|
|
/* "spacy/en.pyx":163
|
|
*
|
|
* # These are loaded later
|
|
* word.prob = 0 # <<<<<<<<<<<<<<
|
|
* word.cluster = 0
|
|
* word.oft_upper = False
|
|
*/
|
|
__pyx_v_word->prob = 0.0;
|
|
|
|
/* "spacy/en.pyx":164
|
|
* # These are loaded later
|
|
* word.prob = 0
|
|
* word.cluster = 0 # <<<<<<<<<<<<<<
|
|
* word.oft_upper = False
|
|
* word.oft_title = False
|
|
*/
|
|
__pyx_v_word->cluster = 0;
|
|
|
|
/* "spacy/en.pyx":165
|
|
* word.prob = 0
|
|
* word.cluster = 0
|
|
* word.oft_upper = False # <<<<<<<<<<<<<<
|
|
* word.oft_title = False
|
|
*
|
|
*/
|
|
__pyx_v_word->oft_upper = 0;
|
|
|
|
/* "spacy/en.pyx":166
|
|
* word.cluster = 0
|
|
* word.oft_upper = False
|
|
* word.oft_title = False # <<<<<<<<<<<<<<
|
|
*
|
|
* # Now recurse, and deal with the tail
|
|
*/
|
|
__pyx_v_word->oft_title = 0;
|
|
|
|
/* "spacy/en.pyx":169
|
|
*
|
|
* # Now recurse, and deal with the tail
|
|
* if tail_string: # <<<<<<<<<<<<<<
|
|
* word.tail = <Lexeme*>lookup(tail_string)
|
|
* return word
|
|
*/
|
|
__pyx_t_5 = (__pyx_v_tail_string != Py_None) && (PyUnicode_GET_SIZE(__pyx_v_tail_string) != 0);
|
|
if (__pyx_t_5) {
|
|
|
|
/* "spacy/en.pyx":170
|
|
* # Now recurse, and deal with the tail
|
|
* if tail_string:
|
|
* word.tail = <Lexeme*>lookup(tail_string) # <<<<<<<<<<<<<<
|
|
* return word
|
|
*
|
|
*/
|
|
__pyx_t_9 = __pyx_f_5spacy_2en_lookup(__pyx_v_tail_string, 0); if (unlikely(__pyx_t_9 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_v_word->tail = ((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_t_9);
|
|
goto __pyx_L4;
|
|
}
|
|
__pyx_L4:;
|
|
|
|
/* "spacy/en.pyx":171
|
|
* if tail_string:
|
|
* word.tail = <Lexeme*>lookup(tail_string)
|
|
* return word # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
__pyx_r = __pyx_v_word;
|
|
goto __pyx_L0;
|
|
|
|
/* "spacy/en.pyx":131
|
|
*
|
|
*
|
|
* cdef Lexeme* _init_lexeme(unicode string, StringHash hashed, # <<<<<<<<<<<<<<
|
|
* int split, size_t length) except NULL:
|
|
* assert split <= length
|
|
*/
|
|
|
|
/* function exit code */
|
|
__pyx_L1_error:;
|
|
__Pyx_XDECREF(__pyx_t_6);
|
|
__Pyx_AddTraceback("spacy.en._init_lexeme", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
__pyx_r = NULL;
|
|
__pyx_L0:;
|
|
__Pyx_XDECREF(__pyx_v_tail_string);
|
|
__Pyx_XDECREF(__pyx_v_lex);
|
|
__Pyx_XDECREF(__pyx_v_normed);
|
|
__Pyx_XDECREF(__pyx_v_last3);
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
/* "spacy/en.pyx":174
|
|
*
|
|
*
|
|
* cdef size_t _find_split(unicode word, size_t length): # <<<<<<<<<<<<<<
|
|
* cdef int i = 0
|
|
* # Contractions
|
|
*/
|
|
|
|
static size_t __pyx_f_5spacy_2en__find_split(PyObject *__pyx_v_word, size_t __pyx_v_length) {
|
|
int __pyx_v_i;
|
|
size_t __pyx_r;
|
|
__Pyx_RefNannyDeclarations
|
|
int __pyx_t_1;
|
|
int __pyx_t_2;
|
|
int __pyx_t_3;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("_find_split", 0);
|
|
|
|
/* "spacy/en.pyx":175
|
|
*
|
|
* cdef size_t _find_split(unicode word, size_t length):
|
|
* cdef int i = 0 # <<<<<<<<<<<<<<
|
|
* # Contractions
|
|
* if word.endswith("'s"):
|
|
*/
|
|
__pyx_v_i = 0;
|
|
|
|
/* "spacy/en.pyx":177
|
|
* cdef int i = 0
|
|
* # Contractions
|
|
* if word.endswith("'s"): # <<<<<<<<<<<<<<
|
|
* return length - 2
|
|
* # Leading punctuation
|
|
*/
|
|
if (unlikely(__pyx_v_word == Py_None)) {
|
|
PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "endswith");
|
|
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
__pyx_t_1 = __Pyx_PyUnicode_Tailmatch(__pyx_v_word, __pyx_kp_u_s, 0, PY_SSIZE_T_MAX, 1); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
if ((__pyx_t_1 != 0)) {
|
|
|
|
/* "spacy/en.pyx":178
|
|
* # Contractions
|
|
* if word.endswith("'s"):
|
|
* return length - 2 # <<<<<<<<<<<<<<
|
|
* # Leading punctuation
|
|
* if is_punct(word, 0, length):
|
|
*/
|
|
__pyx_r = (__pyx_v_length - 2);
|
|
goto __pyx_L0;
|
|
}
|
|
|
|
/* "spacy/en.pyx":180
|
|
* return length - 2
|
|
* # Leading punctuation
|
|
* if is_punct(word, 0, length): # <<<<<<<<<<<<<<
|
|
* return 1
|
|
* elif length >= 1 and is_punct(word, length - 1, length):
|
|
*/
|
|
__pyx_t_1 = (__pyx_f_5spacy_2en_is_punct(__pyx_v_word, 0, __pyx_v_length) != 0);
|
|
if (__pyx_t_1) {
|
|
|
|
/* "spacy/en.pyx":181
|
|
* # Leading punctuation
|
|
* if is_punct(word, 0, length):
|
|
* return 1 # <<<<<<<<<<<<<<
|
|
* elif length >= 1 and is_punct(word, length - 1, length):
|
|
* # Split off all trailing punctuation characters
|
|
*/
|
|
__pyx_r = 1;
|
|
goto __pyx_L0;
|
|
}
|
|
|
|
/* "spacy/en.pyx":182
|
|
* if is_punct(word, 0, length):
|
|
* return 1
|
|
* elif length >= 1 and is_punct(word, length - 1, length): # <<<<<<<<<<<<<<
|
|
* # Split off all trailing punctuation characters
|
|
* i = length - 1
|
|
*/
|
|
__pyx_t_1 = ((__pyx_v_length >= 1) != 0);
|
|
if (__pyx_t_1) {
|
|
__pyx_t_2 = (__pyx_f_5spacy_2en_is_punct(__pyx_v_word, (__pyx_v_length - 1), __pyx_v_length) != 0);
|
|
__pyx_t_3 = __pyx_t_2;
|
|
} else {
|
|
__pyx_t_3 = __pyx_t_1;
|
|
}
|
|
if (__pyx_t_3) {
|
|
|
|
/* "spacy/en.pyx":184
|
|
* elif length >= 1 and is_punct(word, length - 1, length):
|
|
* # Split off all trailing punctuation characters
|
|
* i = length - 1 # <<<<<<<<<<<<<<
|
|
* while i >= 2 and is_punct(word, i-1, length):
|
|
* i -= 1
|
|
*/
|
|
__pyx_v_i = (__pyx_v_length - 1);
|
|
|
|
/* "spacy/en.pyx":185
|
|
* # Split off all trailing punctuation characters
|
|
* i = length - 1
|
|
* while i >= 2 and is_punct(word, i-1, length): # <<<<<<<<<<<<<<
|
|
* i -= 1
|
|
* return i
|
|
*/
|
|
while (1) {
|
|
__pyx_t_3 = ((__pyx_v_i >= 2) != 0);
|
|
if (__pyx_t_3) {
|
|
__pyx_t_1 = (__pyx_f_5spacy_2en_is_punct(__pyx_v_word, (__pyx_v_i - 1), __pyx_v_length) != 0);
|
|
__pyx_t_2 = __pyx_t_1;
|
|
} else {
|
|
__pyx_t_2 = __pyx_t_3;
|
|
}
|
|
if (!__pyx_t_2) break;
|
|
|
|
/* "spacy/en.pyx":186
|
|
* i = length - 1
|
|
* while i >= 2 and is_punct(word, i-1, length):
|
|
* i -= 1 # <<<<<<<<<<<<<<
|
|
* return i
|
|
*
|
|
*/
|
|
__pyx_v_i = (__pyx_v_i - 1);
|
|
}
|
|
goto __pyx_L4;
|
|
}
|
|
__pyx_L4:;
|
|
|
|
/* "spacy/en.pyx":187
|
|
* while i >= 2 and is_punct(word, i-1, length):
|
|
* i -= 1
|
|
* return i # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
__pyx_r = __pyx_v_i;
|
|
goto __pyx_L0;
|
|
|
|
/* "spacy/en.pyx":174
|
|
*
|
|
*
|
|
* cdef size_t _find_split(unicode word, size_t length): # <<<<<<<<<<<<<<
|
|
* cdef int i = 0
|
|
* # Contractions
|
|
*/
|
|
|
|
/* function exit code */
|
|
__pyx_L1_error:;
|
|
__Pyx_WriteUnraisable("spacy.en._find_split", __pyx_clineno, __pyx_lineno, __pyx_filename, 0);
|
|
__pyx_r = 0;
|
|
__pyx_L0:;
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
/* "spacy/en.pyx":190
|
|
*
|
|
*
|
|
* cdef bint is_punct(unicode word, size_t i, size_t length): # <<<<<<<<<<<<<<
|
|
* return not word[i].isalnum()
|
|
*/
|
|
|
|
static int __pyx_f_5spacy_2en_is_punct(PyObject *__pyx_v_word, size_t __pyx_v_i, CYTHON_UNUSED size_t __pyx_v_length) {
|
|
int __pyx_r;
|
|
__Pyx_RefNannyDeclarations
|
|
Py_UCS4 __pyx_t_1;
|
|
int __pyx_t_2;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannySetupContext("is_punct", 0);
|
|
|
|
/* "spacy/en.pyx":191
|
|
*
|
|
* cdef bint is_punct(unicode word, size_t i, size_t length):
|
|
* return not word[i].isalnum() # <<<<<<<<<<<<<<
|
|
*/
|
|
__pyx_t_1 = __Pyx_GetItemInt_Unicode(__pyx_v_word, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(__pyx_t_1 == (Py_UCS4)-1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
|
|
__pyx_t_2 = Py_UNICODE_ISALNUM(__pyx_t_1);
|
|
__pyx_r = (!(__pyx_t_2 != 0));
|
|
goto __pyx_L0;
|
|
|
|
/* "spacy/en.pyx":190
|
|
*
|
|
*
|
|
* cdef bint is_punct(unicode word, size_t i, size_t length): # <<<<<<<<<<<<<<
|
|
* return not word[i].isalnum()
|
|
*/
|
|
|
|
/* function exit code */
|
|
__pyx_L1_error:;
|
|
__Pyx_WriteUnraisable("spacy.en.is_punct", __pyx_clineno, __pyx_lineno, __pyx_filename, 0);
|
|
__pyx_r = 0;
|
|
__pyx_L0:;
|
|
__Pyx_RefNannyFinishContext();
|
|
return __pyx_r;
|
|
}
|
|
|
|
static PyMethodDef __pyx_methods[] = {
|
|
{__Pyx_NAMESTR("lookup"), (PyCFunction)__pyx_pw_5spacy_2en_3lookup, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_2en_2lookup)},
|
|
{__Pyx_NAMESTR("lookup_chunk"), (PyCFunction)__pyx_pw_5spacy_2en_5lookup_chunk, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_5spacy_2en_4lookup_chunk)},
|
|
{__Pyx_NAMESTR("unhash"), (PyCFunction)__pyx_pw_5spacy_2en_7unhash, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_2en_6unhash)},
|
|
{__Pyx_NAMESTR("_substr"), (PyCFunction)__pyx_pw_5spacy_2en_9_substr, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)},
|
|
{0, 0, 0, 0}
|
|
};
|
|
|
|
#if PY_MAJOR_VERSION >= 3
|
|
static struct PyModuleDef __pyx_moduledef = {
|
|
#if PY_VERSION_HEX < 0x03020000
|
|
{ PyObject_HEAD_INIT(NULL) NULL, 0, NULL },
|
|
#else
|
|
PyModuleDef_HEAD_INIT,
|
|
#endif
|
|
__Pyx_NAMESTR("en"),
|
|
__Pyx_DOCSTR(__pyx_k_Serve_pointers_to_Lexeme_structs), /* m_doc */
|
|
-1, /* m_size */
|
|
__pyx_methods /* m_methods */,
|
|
NULL, /* m_reload */
|
|
NULL, /* m_traverse */
|
|
NULL, /* m_clear */
|
|
NULL /* m_free */
|
|
};
|
|
#endif
|
|
|
|
static __Pyx_StringTabEntry __pyx_string_tab[] = {
|
|
{&__pyx_n_s_, __pyx_k_, sizeof(__pyx_k_), 0, 0, 1, 1},
|
|
{&__pyx_kp_u_, __pyx_k_, sizeof(__pyx_k_), 0, 1, 0, 0},
|
|
{&__pyx_kp_u_DIGITS, __pyx_k_DIGITS, sizeof(__pyx_k_DIGITS), 0, 1, 0, 0},
|
|
{&__pyx_n_s_LEXEMES, __pyx_k_LEXEMES, sizeof(__pyx_k_LEXEMES), 0, 0, 1, 1},
|
|
{&__pyx_n_s_STRINGS, __pyx_k_STRINGS, sizeof(__pyx_k_STRINGS), 0, 0, 1, 1},
|
|
{&__pyx_kp_s_Users_matt_repos_spaCy_spacy_en, __pyx_k_Users_matt_repos_spaCy_spacy_en, sizeof(__pyx_k_Users_matt_repos_spaCy_spacy_en), 0, 0, 1, 0},
|
|
{&__pyx_n_s_ValueError, __pyx_k_ValueError, sizeof(__pyx_k_ValueError), 0, 0, 1, 1},
|
|
{&__pyx_kp_u_YEAR, __pyx_k_YEAR, sizeof(__pyx_k_YEAR), 0, 1, 0, 0},
|
|
{&__pyx_n_s_chunk, __pyx_k_chunk, sizeof(__pyx_k_chunk), 0, 0, 1, 1},
|
|
{&__pyx_n_s_cluster, __pyx_k_cluster, sizeof(__pyx_k_cluster), 0, 0, 1, 1},
|
|
{&__pyx_n_u_en, __pyx_k_en, sizeof(__pyx_k_en), 0, 1, 0, 1},
|
|
{&__pyx_n_s_end, __pyx_k_end, sizeof(__pyx_k_end), 0, 0, 1, 1},
|
|
{&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1},
|
|
{&__pyx_n_s_first, __pyx_k_first, sizeof(__pyx_k_first), 0, 0, 1, 1},
|
|
{&__pyx_n_s_hashed, __pyx_k_hashed, sizeof(__pyx_k_hashed), 0, 0, 1, 1},
|
|
{&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1},
|
|
{&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1},
|
|
{&__pyx_n_s_isdigit, __pyx_k_isdigit, sizeof(__pyx_k_isdigit), 0, 0, 1, 1},
|
|
{&__pyx_n_s_last3, __pyx_k_last3, sizeof(__pyx_k_last3), 0, 0, 1, 1},
|
|
{&__pyx_n_s_length, __pyx_k_length, sizeof(__pyx_k_length), 0, 0, 1, 1},
|
|
{&__pyx_n_s_lex, __pyx_k_lex, sizeof(__pyx_k_lex), 0, 0, 1, 1},
|
|
{&__pyx_n_s_load_tokenization, __pyx_k_load_tokenization, sizeof(__pyx_k_load_tokenization), 0, 0, 1, 1},
|
|
{&__pyx_n_s_lower, __pyx_k_lower, sizeof(__pyx_k_lower), 0, 0, 1, 1},
|
|
{&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1},
|
|
{&__pyx_n_s_normed, __pyx_k_normed, sizeof(__pyx_k_normed), 0, 0, 1, 1},
|
|
{&__pyx_n_s_oft_title, __pyx_k_oft_title, sizeof(__pyx_k_oft_title), 0, 0, 1, 1},
|
|
{&__pyx_n_s_oft_upper, __pyx_k_oft_upper, sizeof(__pyx_k_oft_upper), 0, 0, 1, 1},
|
|
{&__pyx_n_s_prob, __pyx_k_prob, sizeof(__pyx_k_prob), 0, 0, 1, 1},
|
|
{&__pyx_n_s_pyx_capi, __pyx_k_pyx_capi, sizeof(__pyx_k_pyx_capi), 0, 0, 1, 1},
|
|
{&__pyx_n_s_read_tokenization, __pyx_k_read_tokenization, sizeof(__pyx_k_read_tokenization), 0, 0, 1, 1},
|
|
{&__pyx_kp_u_s, __pyx_k_s, sizeof(__pyx_k_s), 0, 1, 0, 0},
|
|
{&__pyx_kp_u_s_d_s, __pyx_k_s_d_s, sizeof(__pyx_k_s_d_s), 0, 1, 0, 0},
|
|
{&__pyx_n_s_sic, __pyx_k_sic, sizeof(__pyx_k_sic), 0, 0, 1, 1},
|
|
{&__pyx_n_s_spacy_en, __pyx_k_spacy_en, sizeof(__pyx_k_spacy_en), 0, 0, 1, 1},
|
|
{&__pyx_n_s_start, __pyx_k_start, sizeof(__pyx_k_start), 0, 0, 1, 1},
|
|
{&__pyx_n_s_string, __pyx_k_string, sizeof(__pyx_k_string), 0, 0, 1, 1},
|
|
{&__pyx_n_s_tail, __pyx_k_tail, sizeof(__pyx_k_tail), 0, 0, 1, 1},
|
|
{&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1},
|
|
{&__pyx_n_s_token_rules, __pyx_k_token_rules, sizeof(__pyx_k_token_rules), 0, 0, 1, 1},
|
|
{&__pyx_n_s_token_string, __pyx_k_token_string, sizeof(__pyx_k_token_string), 0, 0, 1, 1},
|
|
{&__pyx_n_s_tokens, __pyx_k_tokens, sizeof(__pyx_k_tokens), 0, 0, 1, 1},
|
|
{&__pyx_n_s_util, __pyx_k_util, sizeof(__pyx_k_util), 0, 0, 1, 1},
|
|
{&__pyx_n_s_word, __pyx_k_word, sizeof(__pyx_k_word), 0, 0, 1, 1},
|
|
{0, 0, 0, 0, 0, 0, 0}
|
|
};
|
|
static int __Pyx_InitCachedBuiltins(void) {
|
|
__pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
return 0;
|
|
__pyx_L1_error:;
|
|
return -1;
|
|
}
|
|
|
|
static int __Pyx_InitCachedConstants(void) {
|
|
__Pyx_RefNannyDeclarations
|
|
__Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0);
|
|
|
|
/* "spacy/en.pyx":24
|
|
*
|
|
*
|
|
* def load_tokenization(token_rules): # <<<<<<<<<<<<<<
|
|
* cdef Lexeme* word
|
|
* cdef StringHash hashed
|
|
*/
|
|
__pyx_tuple__2 = PyTuple_Pack(9, __pyx_n_s_token_rules, __pyx_n_s_word, __pyx_n_s_hashed, __pyx_n_s_chunk, __pyx_n_s_lex, __pyx_n_s_tokens, __pyx_n_s_i, __pyx_n_s_token_string, __pyx_n_s_length); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_tuple__2);
|
|
__Pyx_GIVEREF(__pyx_tuple__2);
|
|
__pyx_codeobj__3 = (PyObject*)__Pyx_PyCode_New(1, 0, 9, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__2, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_matt_repos_spaCy_spacy_en, __pyx_n_s_load_tokenization, 24, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
|
|
/* "spacy/en.pyx":39
|
|
*
|
|
*
|
|
* load_tokenization(util.read_tokenization('en')) # <<<<<<<<<<<<<<
|
|
*
|
|
* cpdef Lexeme_addr lookup(unicode string) except 0:
|
|
*/
|
|
__pyx_tuple__4 = PyTuple_Pack(1, __pyx_n_u_en); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_tuple__4);
|
|
__Pyx_GIVEREF(__pyx_tuple__4);
|
|
__Pyx_RefNannyFinishContext();
|
|
return 0;
|
|
__pyx_L1_error:;
|
|
__Pyx_RefNannyFinishContext();
|
|
return -1;
|
|
}
|
|
|
|
static int __Pyx_InitGlobals(void) {
|
|
if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
|
|
__pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
return 0;
|
|
__pyx_L1_error:;
|
|
return -1;
|
|
}
|
|
|
|
#if PY_MAJOR_VERSION < 3
|
|
PyMODINIT_FUNC initen(void); /*proto*/
|
|
PyMODINIT_FUNC initen(void)
|
|
#else
|
|
PyMODINIT_FUNC PyInit_en(void); /*proto*/
|
|
PyMODINIT_FUNC PyInit_en(void)
|
|
#endif
|
|
{
|
|
PyObject *__pyx_t_1 = NULL;
|
|
PyObject *__pyx_t_2 = NULL;
|
|
struct __pyx_t_5spacy_6lexeme_Lexeme __pyx_t_3;
|
|
PyObject *__pyx_t_4 = NULL;
|
|
int __pyx_lineno = 0;
|
|
const char *__pyx_filename = NULL;
|
|
int __pyx_clineno = 0;
|
|
__Pyx_RefNannyDeclarations
|
|
#if CYTHON_REFNANNY
|
|
__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny");
|
|
if (!__Pyx_RefNanny) {
|
|
PyErr_Clear();
|
|
__Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny");
|
|
if (!__Pyx_RefNanny)
|
|
Py_FatalError("failed to import 'refnanny' module");
|
|
}
|
|
#endif
|
|
__Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_en(void)", 0);
|
|
if ( __Pyx_check_binary_version() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#ifdef __Pyx_CyFunction_USED
|
|
if (__Pyx_CyFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#endif
|
|
#ifdef __Pyx_FusedFunction_USED
|
|
if (__pyx_FusedFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#endif
|
|
#ifdef __Pyx_Generator_USED
|
|
if (__pyx_Generator_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#endif
|
|
/*--- Library function declarations ---*/
|
|
/*--- Threads initialization code ---*/
|
|
#if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS
|
|
#ifdef WITH_THREAD /* Python build with threading support? */
|
|
PyEval_InitThreads();
|
|
#endif
|
|
#endif
|
|
/*--- Module creation code ---*/
|
|
#if PY_MAJOR_VERSION < 3
|
|
__pyx_m = Py_InitModule4(__Pyx_NAMESTR("en"), __pyx_methods, __Pyx_DOCSTR(__pyx_k_Serve_pointers_to_Lexeme_structs), 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m);
|
|
#else
|
|
__pyx_m = PyModule_Create(&__pyx_moduledef);
|
|
#endif
|
|
if (unlikely(!__pyx_m)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
Py_INCREF(__pyx_d);
|
|
__pyx_b = PyImport_AddModule(__Pyx_NAMESTR(__Pyx_BUILTIN_MODULE_NAME)); if (unlikely(!__pyx_b)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#if CYTHON_COMPILING_IN_PYPY
|
|
Py_INCREF(__pyx_b);
|
|
#endif
|
|
if (__Pyx_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
|
|
/*--- Initialize various global constants etc. ---*/
|
|
if (unlikely(__Pyx_InitGlobals() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
|
|
if (__Pyx_init_sys_getdefaultencoding_params() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
#endif
|
|
if (__pyx_module_is_main_spacy__en) {
|
|
if (__Pyx_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
|
|
}
|
|
#if PY_MAJOR_VERSION >= 3
|
|
{
|
|
PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
if (!PyDict_GetItemString(modules, "spacy.en")) {
|
|
if (unlikely(PyDict_SetItemString(modules, "spacy.en", __pyx_m) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
}
|
|
}
|
|
#endif
|
|
/*--- Builtin init code ---*/
|
|
if (unlikely(__Pyx_InitCachedBuiltins() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
/*--- Constants init code ---*/
|
|
if (unlikely(__Pyx_InitCachedConstants() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
/*--- Global init code ---*/
|
|
/*--- Variable export code ---*/
|
|
if (__Pyx_ExportVoidPtr(__pyx_n_s_LEXEMES, (void *)&__pyx_v_5spacy_2en_LEXEMES, "google::dense_hash_map<__pyx_t_5spacy_6lexeme_StringHash,__pyx_t_5spacy_2en_Lexeme_ptr>") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
/*--- Function export code ---*/
|
|
if (__Pyx_ExportFunction("lookup", (void (*)(void))__pyx_f_5spacy_2en_lookup, "__pyx_t_5spacy_2en_Lexeme_addr (PyObject *, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
if (__Pyx_ExportFunction("lookup_chunk", (void (*)(void))__pyx_f_5spacy_2en_lookup_chunk, "__pyx_t_5spacy_2en_Lexeme_addr (PyObject *, int, int, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
if (__Pyx_ExportFunction("hash_string", (void (*)(void))__pyx_f_5spacy_2en_hash_string, "__pyx_t_5spacy_6lexeme_StringHash (PyObject *, size_t)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
if (__Pyx_ExportFunction("unhash", (void (*)(void))__pyx_f_5spacy_2en_unhash, "PyObject *(__pyx_t_5spacy_6lexeme_StringHash, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
/*--- Type init code ---*/
|
|
/*--- Type import code ---*/
|
|
/*--- Variable import code ---*/
|
|
/*--- Function import code ---*/
|
|
/*--- Execution code ---*/
|
|
|
|
/* "spacy/en.pyx":13
|
|
* from ext.murmurhash cimport MurmurHash64A
|
|
* from ext.murmurhash cimport MurmurHash64B
|
|
* from . import util # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
__pyx_t_1 = PyList_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_1);
|
|
__Pyx_INCREF(__pyx_n_s_util);
|
|
PyList_SET_ITEM(__pyx_t_1, 0, __pyx_n_s_util);
|
|
__Pyx_GIVEREF(__pyx_n_s_util);
|
|
__pyx_t_2 = __Pyx_Import(__pyx_n_s_, __pyx_t_1, 1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_2);
|
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
|
__pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_util); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_1);
|
|
if (PyDict_SetItem(__pyx_d, __pyx_n_s_util, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 13; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
|
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
|
|
|
|
/* "spacy/en.pyx":16
|
|
*
|
|
*
|
|
* STRINGS = {} # <<<<<<<<<<<<<<
|
|
* LEXEMES = dense_hash_map[StringHash, Lexeme_ptr]()
|
|
* LEXEMES.set_empty_key(0)
|
|
*/
|
|
__pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_2);
|
|
if (PyDict_SetItem(__pyx_d, __pyx_n_s_STRINGS, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
|
|
|
|
/* "spacy/en.pyx":17
|
|
*
|
|
* STRINGS = {}
|
|
* LEXEMES = dense_hash_map[StringHash, Lexeme_ptr]() # <<<<<<<<<<<<<<
|
|
* LEXEMES.set_empty_key(0)
|
|
*
|
|
*/
|
|
__pyx_v_5spacy_2en_LEXEMES = google::dense_hash_map<__pyx_t_5spacy_6lexeme_StringHash,__pyx_t_5spacy_2en_Lexeme_ptr>();
|
|
|
|
/* "spacy/en.pyx":18
|
|
* STRINGS = {}
|
|
* LEXEMES = dense_hash_map[StringHash, Lexeme_ptr]()
|
|
* LEXEMES.set_empty_key(0) # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
__pyx_v_5spacy_2en_LEXEMES.set_empty_key(0);
|
|
|
|
/* "spacy/en.pyx":21
|
|
*
|
|
*
|
|
* cdef Lexeme BLANK_WORD = Lexeme(0, 0, 0, 0, 0, 0.0, 0, False, False, NULL) # <<<<<<<<<<<<<<
|
|
*
|
|
*
|
|
*/
|
|
__pyx_t_3.sic = 0;
|
|
__pyx_t_3.lex = 0;
|
|
__pyx_t_3.normed = 0;
|
|
__pyx_t_3.last3 = 0;
|
|
__pyx_t_3.first = 0;
|
|
__pyx_t_3.prob = 0.0;
|
|
__pyx_t_3.cluster = 0;
|
|
__pyx_t_3.oft_upper = 0;
|
|
__pyx_t_3.oft_title = 0;
|
|
__pyx_t_3.tail = NULL;
|
|
__pyx_v_5spacy_2en_BLANK_WORD = __pyx_t_3;
|
|
|
|
/* "spacy/en.pyx":24
|
|
*
|
|
*
|
|
* def load_tokenization(token_rules): # <<<<<<<<<<<<<<
|
|
* cdef Lexeme* word
|
|
* cdef StringHash hashed
|
|
*/
|
|
__pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5spacy_2en_1load_tokenization, NULL, __pyx_n_s_spacy_en); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_2);
|
|
if (PyDict_SetItem(__pyx_d, __pyx_n_s_load_tokenization, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
|
|
|
|
/* "spacy/en.pyx":39
|
|
*
|
|
*
|
|
* load_tokenization(util.read_tokenization('en')) # <<<<<<<<<<<<<<
|
|
*
|
|
* cpdef Lexeme_addr lookup(unicode string) except 0:
|
|
*/
|
|
__pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_load_tokenization); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_2);
|
|
__pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_util); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_1);
|
|
__pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_read_tokenization); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_4);
|
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
|
__pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_1);
|
|
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
|
|
__pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_4);
|
|
PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1);
|
|
__Pyx_GIVEREF(__pyx_t_1);
|
|
__pyx_t_1 = 0;
|
|
__pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_1);
|
|
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
|
|
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
|
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
|
|
|
/* "spacy/en.pyx":1
|
|
* '''Serve pointers to Lexeme structs, given strings. Maintain a reverse index, # <<<<<<<<<<<<<<
|
|
* so that strings can be retrieved from hashes. Use 64-bit hash values and
|
|
* boldly assume no collisions.
|
|
*/
|
|
__pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_GOTREF(__pyx_t_1);
|
|
if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
|
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
|
goto __pyx_L0;
|
|
__pyx_L1_error:;
|
|
__Pyx_XDECREF(__pyx_t_1);
|
|
__Pyx_XDECREF(__pyx_t_2);
|
|
__Pyx_XDECREF(__pyx_t_4);
|
|
if (__pyx_m) {
|
|
__Pyx_AddTraceback("init spacy.en", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
|
Py_DECREF(__pyx_m); __pyx_m = 0;
|
|
} else if (!PyErr_Occurred()) {
|
|
PyErr_SetString(PyExc_ImportError, "init spacy.en");
|
|
}
|
|
__pyx_L0:;
|
|
__Pyx_RefNannyFinishContext();
|
|
#if PY_MAJOR_VERSION < 3
|
|
return;
|
|
#else
|
|
return __pyx_m;
|
|
#endif
|
|
}
|
|
|
|
/* Runtime support code */
|
|
#if CYTHON_REFNANNY
|
|
static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) {
|
|
PyObject *m = NULL, *p = NULL;
|
|
void *r = NULL;
|
|
m = PyImport_ImportModule((char *)modname);
|
|
if (!m) goto end;
|
|
p = PyObject_GetAttrString(m, (char *)"RefNannyAPI");
|
|
if (!p) goto end;
|
|
r = PyLong_AsVoidPtr(p);
|
|
end:
|
|
Py_XDECREF(p);
|
|
Py_XDECREF(m);
|
|
return (__Pyx_RefNannyAPIStruct *)r;
|
|
}
|
|
#endif /* CYTHON_REFNANNY */
|
|
|
|
static PyObject *__Pyx_GetBuiltinName(PyObject *name) {
|
|
PyObject* result = __Pyx_PyObject_GetAttrStr(__pyx_b, name);
|
|
if (unlikely(!result)) {
|
|
PyErr_Format(PyExc_NameError,
|
|
#if PY_MAJOR_VERSION >= 3
|
|
"name '%U' is not defined", name);
|
|
#else
|
|
"name '%.200s' is not defined", PyString_AS_STRING(name));
|
|
#endif
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) {
|
|
PyErr_Format(PyExc_ValueError,
|
|
"too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected);
|
|
}
|
|
|
|
static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) {
|
|
PyErr_Format(PyExc_ValueError,
|
|
"need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack",
|
|
index, (index == 1) ? "" : "s");
|
|
}
|
|
|
|
static CYTHON_INLINE int __Pyx_IterFinish(void) {
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
PyThreadState *tstate = PyThreadState_GET();
|
|
PyObject* exc_type = tstate->curexc_type;
|
|
if (unlikely(exc_type)) {
|
|
if (likely(exc_type == PyExc_StopIteration) || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration)) {
|
|
PyObject *exc_value, *exc_tb;
|
|
exc_value = tstate->curexc_value;
|
|
exc_tb = tstate->curexc_traceback;
|
|
tstate->curexc_type = 0;
|
|
tstate->curexc_value = 0;
|
|
tstate->curexc_traceback = 0;
|
|
Py_DECREF(exc_type);
|
|
Py_XDECREF(exc_value);
|
|
Py_XDECREF(exc_tb);
|
|
return 0;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
return 0;
|
|
#else
|
|
if (unlikely(PyErr_Occurred())) {
|
|
if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) {
|
|
PyErr_Clear();
|
|
return 0;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected) {
|
|
if (unlikely(retval)) {
|
|
Py_DECREF(retval);
|
|
__Pyx_RaiseTooManyValuesError(expected);
|
|
return -1;
|
|
} else {
|
|
return __Pyx_IterFinish();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) {
|
|
#if CYTHON_COMPILING_IN_PYPY
|
|
return PyObject_RichCompareBool(s1, s2, equals);
|
|
#else
|
|
if (s1 == s2) {
|
|
return (equals == Py_EQ);
|
|
} else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) {
|
|
const char *ps1, *ps2;
|
|
Py_ssize_t length = PyBytes_GET_SIZE(s1);
|
|
if (length != PyBytes_GET_SIZE(s2))
|
|
return (equals == Py_NE);
|
|
ps1 = PyBytes_AS_STRING(s1);
|
|
ps2 = PyBytes_AS_STRING(s2);
|
|
if (ps1[0] != ps2[0]) {
|
|
return (equals == Py_NE);
|
|
} else if (length == 1) {
|
|
return (equals == Py_EQ);
|
|
} else {
|
|
int result = memcmp(ps1, ps2, (size_t)length);
|
|
return (equals == Py_EQ) ? (result == 0) : (result != 0);
|
|
}
|
|
} else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) {
|
|
return (equals == Py_NE);
|
|
} else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) {
|
|
return (equals == Py_NE);
|
|
} else {
|
|
int result;
|
|
PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
|
|
if (!py_result)
|
|
return -1;
|
|
result = __Pyx_PyObject_IsTrue(py_result);
|
|
Py_DECREF(py_result);
|
|
return result;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) {
|
|
#if CYTHON_COMPILING_IN_PYPY
|
|
return PyObject_RichCompareBool(s1, s2, equals);
|
|
#else
|
|
#if PY_MAJOR_VERSION < 3
|
|
PyObject* owned_ref = NULL;
|
|
#endif
|
|
int s1_is_unicode, s2_is_unicode;
|
|
if (s1 == s2) {
|
|
goto return_eq;
|
|
}
|
|
s1_is_unicode = PyUnicode_CheckExact(s1);
|
|
s2_is_unicode = PyUnicode_CheckExact(s2);
|
|
#if PY_MAJOR_VERSION < 3
|
|
if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) {
|
|
owned_ref = PyUnicode_FromObject(s2);
|
|
if (unlikely(!owned_ref))
|
|
return -1;
|
|
s2 = owned_ref;
|
|
s2_is_unicode = 1;
|
|
} else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) {
|
|
owned_ref = PyUnicode_FromObject(s1);
|
|
if (unlikely(!owned_ref))
|
|
return -1;
|
|
s1 = owned_ref;
|
|
s1_is_unicode = 1;
|
|
} else if (((!s2_is_unicode) & (!s1_is_unicode))) {
|
|
return __Pyx_PyBytes_Equals(s1, s2, equals);
|
|
}
|
|
#endif
|
|
if (s1_is_unicode & s2_is_unicode) {
|
|
Py_ssize_t length;
|
|
int kind;
|
|
void *data1, *data2;
|
|
#if CYTHON_PEP393_ENABLED
|
|
if (unlikely(PyUnicode_READY(s1) < 0) || unlikely(PyUnicode_READY(s2) < 0))
|
|
return -1;
|
|
#endif
|
|
length = __Pyx_PyUnicode_GET_LENGTH(s1);
|
|
if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) {
|
|
goto return_ne;
|
|
}
|
|
kind = __Pyx_PyUnicode_KIND(s1);
|
|
if (kind != __Pyx_PyUnicode_KIND(s2)) {
|
|
goto return_ne;
|
|
}
|
|
data1 = __Pyx_PyUnicode_DATA(s1);
|
|
data2 = __Pyx_PyUnicode_DATA(s2);
|
|
if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) {
|
|
goto return_ne;
|
|
} else if (length == 1) {
|
|
goto return_eq;
|
|
} else {
|
|
int result = memcmp(data1, data2, length * kind);
|
|
#if PY_MAJOR_VERSION < 3
|
|
Py_XDECREF(owned_ref);
|
|
#endif
|
|
return (equals == Py_EQ) ? (result == 0) : (result != 0);
|
|
}
|
|
} else if ((s1 == Py_None) & s2_is_unicode) {
|
|
goto return_ne;
|
|
} else if ((s2 == Py_None) & s1_is_unicode) {
|
|
goto return_ne;
|
|
} else {
|
|
int result;
|
|
PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
|
|
if (!py_result)
|
|
return -1;
|
|
result = __Pyx_PyObject_IsTrue(py_result);
|
|
Py_DECREF(py_result);
|
|
return result;
|
|
}
|
|
return_eq:
|
|
#if PY_MAJOR_VERSION < 3
|
|
Py_XDECREF(owned_ref);
|
|
#endif
|
|
return (equals == Py_EQ);
|
|
return_ne:
|
|
#if PY_MAJOR_VERSION < 3
|
|
Py_XDECREF(owned_ref);
|
|
#endif
|
|
return (equals == Py_NE);
|
|
#endif
|
|
}
|
|
|
|
static void __Pyx_RaiseArgumentTypeInvalid(const char* name, PyObject *obj, PyTypeObject *type) {
|
|
PyErr_Format(PyExc_TypeError,
|
|
"Argument '%.200s' has incorrect type (expected %.200s, got %.200s)",
|
|
name, type->tp_name, Py_TYPE(obj)->tp_name);
|
|
}
|
|
static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed,
|
|
const char *name, int exact)
|
|
{
|
|
if (unlikely(!type)) {
|
|
PyErr_SetString(PyExc_SystemError, "Missing type object");
|
|
return 0;
|
|
}
|
|
if (none_allowed && obj == Py_None) return 1;
|
|
else if (exact) {
|
|
if (likely(Py_TYPE(obj) == type)) return 1;
|
|
#if PY_MAJOR_VERSION == 2
|
|
else if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1;
|
|
#endif
|
|
}
|
|
else {
|
|
if (likely(PyObject_TypeCheck(obj, type))) return 1;
|
|
}
|
|
__Pyx_RaiseArgumentTypeInvalid(name, obj, type);
|
|
return 0;
|
|
}
|
|
|
|
static void __Pyx_RaiseArgtupleInvalid(
|
|
const char* func_name,
|
|
int exact,
|
|
Py_ssize_t num_min,
|
|
Py_ssize_t num_max,
|
|
Py_ssize_t num_found)
|
|
{
|
|
Py_ssize_t num_expected;
|
|
const char *more_or_less;
|
|
if (num_found < num_min) {
|
|
num_expected = num_min;
|
|
more_or_less = "at least";
|
|
} else {
|
|
num_expected = num_max;
|
|
more_or_less = "at most";
|
|
}
|
|
if (exact) {
|
|
more_or_less = "exactly";
|
|
}
|
|
PyErr_Format(PyExc_TypeError,
|
|
"%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)",
|
|
func_name, more_or_less, num_expected,
|
|
(num_expected == 1) ? "" : "s", num_found);
|
|
}
|
|
|
|
static void __Pyx_RaiseDoubleKeywordsError(
|
|
const char* func_name,
|
|
PyObject* kw_name)
|
|
{
|
|
PyErr_Format(PyExc_TypeError,
|
|
#if PY_MAJOR_VERSION >= 3
|
|
"%s() got multiple values for keyword argument '%U'", func_name, kw_name);
|
|
#else
|
|
"%s() got multiple values for keyword argument '%s'", func_name,
|
|
PyString_AsString(kw_name));
|
|
#endif
|
|
}
|
|
|
|
static int __Pyx_ParseOptionalKeywords(
|
|
PyObject *kwds,
|
|
PyObject **argnames[],
|
|
PyObject *kwds2,
|
|
PyObject *values[],
|
|
Py_ssize_t num_pos_args,
|
|
const char* function_name)
|
|
{
|
|
PyObject *key = 0, *value = 0;
|
|
Py_ssize_t pos = 0;
|
|
PyObject*** name;
|
|
PyObject*** first_kw_arg = argnames + num_pos_args;
|
|
while (PyDict_Next(kwds, &pos, &key, &value)) {
|
|
name = first_kw_arg;
|
|
while (*name && (**name != key)) name++;
|
|
if (*name) {
|
|
values[name-argnames] = value;
|
|
continue;
|
|
}
|
|
name = first_kw_arg;
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) {
|
|
while (*name) {
|
|
if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key))
|
|
&& _PyString_Eq(**name, key)) {
|
|
values[name-argnames] = value;
|
|
break;
|
|
}
|
|
name++;
|
|
}
|
|
if (*name) continue;
|
|
else {
|
|
PyObject*** argname = argnames;
|
|
while (argname != first_kw_arg) {
|
|
if ((**argname == key) || (
|
|
(CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key))
|
|
&& _PyString_Eq(**argname, key))) {
|
|
goto arg_passed_twice;
|
|
}
|
|
argname++;
|
|
}
|
|
}
|
|
} else
|
|
#endif
|
|
if (likely(PyUnicode_Check(key))) {
|
|
while (*name) {
|
|
int cmp = (**name == key) ? 0 :
|
|
#if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
|
|
(PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :
|
|
#endif
|
|
PyUnicode_Compare(**name, key);
|
|
if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
|
|
if (cmp == 0) {
|
|
values[name-argnames] = value;
|
|
break;
|
|
}
|
|
name++;
|
|
}
|
|
if (*name) continue;
|
|
else {
|
|
PyObject*** argname = argnames;
|
|
while (argname != first_kw_arg) {
|
|
int cmp = (**argname == key) ? 0 :
|
|
#if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
|
|
(PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :
|
|
#endif
|
|
PyUnicode_Compare(**argname, key);
|
|
if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
|
|
if (cmp == 0) goto arg_passed_twice;
|
|
argname++;
|
|
}
|
|
}
|
|
} else
|
|
goto invalid_keyword_type;
|
|
if (kwds2) {
|
|
if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad;
|
|
} else {
|
|
goto invalid_keyword;
|
|
}
|
|
}
|
|
return 0;
|
|
arg_passed_twice:
|
|
__Pyx_RaiseDoubleKeywordsError(function_name, key);
|
|
goto bad;
|
|
invalid_keyword_type:
|
|
PyErr_Format(PyExc_TypeError,
|
|
"%.200s() keywords must be strings", function_name);
|
|
goto bad;
|
|
invalid_keyword:
|
|
PyErr_Format(PyExc_TypeError,
|
|
#if PY_MAJOR_VERSION < 3
|
|
"%.200s() got an unexpected keyword argument '%.200s'",
|
|
function_name, PyString_AsString(key));
|
|
#else
|
|
"%s() got an unexpected keyword argument '%U'",
|
|
function_name, key);
|
|
#endif
|
|
bad:
|
|
return -1;
|
|
}
|
|
|
|
static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name) {
|
|
PyObject *result;
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
result = PyDict_GetItem(__pyx_d, name);
|
|
if (result) {
|
|
Py_INCREF(result);
|
|
} else {
|
|
#else
|
|
result = PyObject_GetItem(__pyx_d, name);
|
|
if (!result) {
|
|
PyErr_Clear();
|
|
#endif
|
|
result = __Pyx_GetBuiltinName(name);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) {
|
|
PyObject *r;
|
|
if (!j) return NULL;
|
|
r = PyObject_GetItem(o, j);
|
|
Py_DECREF(j);
|
|
return r;
|
|
}
|
|
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i,
|
|
int wraparound, int boundscheck) {
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
if (wraparound & unlikely(i < 0)) i += PyList_GET_SIZE(o);
|
|
if ((!boundscheck) || likely((0 <= i) & (i < PyList_GET_SIZE(o)))) {
|
|
PyObject *r = PyList_GET_ITEM(o, i);
|
|
Py_INCREF(r);
|
|
return r;
|
|
}
|
|
return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
|
|
#else
|
|
return PySequence_GetItem(o, i);
|
|
#endif
|
|
}
|
|
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i,
|
|
int wraparound, int boundscheck) {
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
if (wraparound & unlikely(i < 0)) i += PyTuple_GET_SIZE(o);
|
|
if ((!boundscheck) || likely((0 <= i) & (i < PyTuple_GET_SIZE(o)))) {
|
|
PyObject *r = PyTuple_GET_ITEM(o, i);
|
|
Py_INCREF(r);
|
|
return r;
|
|
}
|
|
return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
|
|
#else
|
|
return PySequence_GetItem(o, i);
|
|
#endif
|
|
}
|
|
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i,
|
|
int is_list, int wraparound, int boundscheck) {
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
if (is_list || PyList_CheckExact(o)) {
|
|
Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o);
|
|
if ((!boundscheck) || (likely((n >= 0) & (n < PyList_GET_SIZE(o))))) {
|
|
PyObject *r = PyList_GET_ITEM(o, n);
|
|
Py_INCREF(r);
|
|
return r;
|
|
}
|
|
}
|
|
else if (PyTuple_CheckExact(o)) {
|
|
Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o);
|
|
if ((!boundscheck) || likely((n >= 0) & (n < PyTuple_GET_SIZE(o)))) {
|
|
PyObject *r = PyTuple_GET_ITEM(o, n);
|
|
Py_INCREF(r);
|
|
return r;
|
|
}
|
|
} else {
|
|
PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence;
|
|
if (likely(m && m->sq_item)) {
|
|
if (wraparound && unlikely(i < 0) && likely(m->sq_length)) {
|
|
Py_ssize_t l = m->sq_length(o);
|
|
if (likely(l >= 0)) {
|
|
i += l;
|
|
} else {
|
|
if (PyErr_ExceptionMatches(PyExc_OverflowError))
|
|
PyErr_Clear();
|
|
else
|
|
return NULL;
|
|
}
|
|
}
|
|
return m->sq_item(o, i);
|
|
}
|
|
}
|
|
#else
|
|
if (is_list || PySequence_Check(o)) {
|
|
return PySequence_GetItem(o, i);
|
|
}
|
|
#endif
|
|
return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
|
|
}
|
|
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) {
|
|
PyObject *result;
|
|
ternaryfunc call = func->ob_type->tp_call;
|
|
if (unlikely(!call))
|
|
return PyObject_Call(func, arg, kw);
|
|
#if PY_VERSION_HEX >= 0x02060000
|
|
if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
|
|
return NULL;
|
|
#endif
|
|
result = (*call)(func, arg, kw);
|
|
#if PY_VERSION_HEX >= 0x02060000
|
|
Py_LeaveRecursiveCall();
|
|
#endif
|
|
if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
|
|
PyErr_SetString(
|
|
PyExc_SystemError,
|
|
"NULL result without error in PyObject_Call");
|
|
}
|
|
return result;
|
|
}
|
|
#endif
|
|
|
|
static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb) {
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
PyObject *tmp_type, *tmp_value, *tmp_tb;
|
|
PyThreadState *tstate = PyThreadState_GET();
|
|
tmp_type = tstate->curexc_type;
|
|
tmp_value = tstate->curexc_value;
|
|
tmp_tb = tstate->curexc_traceback;
|
|
tstate->curexc_type = type;
|
|
tstate->curexc_value = value;
|
|
tstate->curexc_traceback = tb;
|
|
Py_XDECREF(tmp_type);
|
|
Py_XDECREF(tmp_value);
|
|
Py_XDECREF(tmp_tb);
|
|
#else
|
|
PyErr_Restore(type, value, tb);
|
|
#endif
|
|
}
|
|
static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb) {
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
PyThreadState *tstate = PyThreadState_GET();
|
|
*type = tstate->curexc_type;
|
|
*value = tstate->curexc_value;
|
|
*tb = tstate->curexc_traceback;
|
|
tstate->curexc_type = 0;
|
|
tstate->curexc_value = 0;
|
|
tstate->curexc_traceback = 0;
|
|
#else
|
|
PyErr_Fetch(type, value, tb);
|
|
#endif
|
|
}
|
|
|
|
#if PY_MAJOR_VERSION < 3
|
|
static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb,
|
|
CYTHON_UNUSED PyObject *cause) {
|
|
Py_XINCREF(type);
|
|
if (!value || value == Py_None)
|
|
value = NULL;
|
|
else
|
|
Py_INCREF(value);
|
|
if (!tb || tb == Py_None)
|
|
tb = NULL;
|
|
else {
|
|
Py_INCREF(tb);
|
|
if (!PyTraceBack_Check(tb)) {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"raise: arg 3 must be a traceback or None");
|
|
goto raise_error;
|
|
}
|
|
}
|
|
#if PY_VERSION_HEX < 0x02050000
|
|
if (PyClass_Check(type)) {
|
|
#else
|
|
if (PyType_Check(type)) {
|
|
#endif
|
|
#if CYTHON_COMPILING_IN_PYPY
|
|
if (!value) {
|
|
Py_INCREF(Py_None);
|
|
value = Py_None;
|
|
}
|
|
#endif
|
|
PyErr_NormalizeException(&type, &value, &tb);
|
|
} else {
|
|
if (value) {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"instance exception may not have a separate value");
|
|
goto raise_error;
|
|
}
|
|
value = type;
|
|
#if PY_VERSION_HEX < 0x02050000
|
|
if (PyInstance_Check(type)) {
|
|
type = (PyObject*) ((PyInstanceObject*)type)->in_class;
|
|
Py_INCREF(type);
|
|
} else {
|
|
type = 0;
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"raise: exception must be an old-style class or instance");
|
|
goto raise_error;
|
|
}
|
|
#else
|
|
type = (PyObject*) Py_TYPE(type);
|
|
Py_INCREF(type);
|
|
if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"raise: exception class must be a subclass of BaseException");
|
|
goto raise_error;
|
|
}
|
|
#endif
|
|
}
|
|
__Pyx_ErrRestore(type, value, tb);
|
|
return;
|
|
raise_error:
|
|
Py_XDECREF(value);
|
|
Py_XDECREF(type);
|
|
Py_XDECREF(tb);
|
|
return;
|
|
}
|
|
#else /* Python 3+ */
|
|
static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) {
|
|
PyObject* owned_instance = NULL;
|
|
if (tb == Py_None) {
|
|
tb = 0;
|
|
} else if (tb && !PyTraceBack_Check(tb)) {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"raise: arg 3 must be a traceback or None");
|
|
goto bad;
|
|
}
|
|
if (value == Py_None)
|
|
value = 0;
|
|
if (PyExceptionInstance_Check(type)) {
|
|
if (value) {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"instance exception may not have a separate value");
|
|
goto bad;
|
|
}
|
|
value = type;
|
|
type = (PyObject*) Py_TYPE(value);
|
|
} else if (PyExceptionClass_Check(type)) {
|
|
PyObject *instance_class = NULL;
|
|
if (value && PyExceptionInstance_Check(value)) {
|
|
instance_class = (PyObject*) Py_TYPE(value);
|
|
if (instance_class != type) {
|
|
if (PyObject_IsSubclass(instance_class, type)) {
|
|
type = instance_class;
|
|
} else {
|
|
instance_class = NULL;
|
|
}
|
|
}
|
|
}
|
|
if (!instance_class) {
|
|
PyObject *args;
|
|
if (!value)
|
|
args = PyTuple_New(0);
|
|
else if (PyTuple_Check(value)) {
|
|
Py_INCREF(value);
|
|
args = value;
|
|
} else
|
|
args = PyTuple_Pack(1, value);
|
|
if (!args)
|
|
goto bad;
|
|
owned_instance = PyObject_Call(type, args, NULL);
|
|
Py_DECREF(args);
|
|
if (!owned_instance)
|
|
goto bad;
|
|
value = owned_instance;
|
|
if (!PyExceptionInstance_Check(value)) {
|
|
PyErr_Format(PyExc_TypeError,
|
|
"calling %R should have returned an instance of "
|
|
"BaseException, not %R",
|
|
type, Py_TYPE(value));
|
|
goto bad;
|
|
}
|
|
}
|
|
} else {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"raise: exception class must be a subclass of BaseException");
|
|
goto bad;
|
|
}
|
|
#if PY_VERSION_HEX >= 0x03030000
|
|
if (cause) {
|
|
#else
|
|
if (cause && cause != Py_None) {
|
|
#endif
|
|
PyObject *fixed_cause;
|
|
if (cause == Py_None) {
|
|
fixed_cause = NULL;
|
|
} else if (PyExceptionClass_Check(cause)) {
|
|
fixed_cause = PyObject_CallObject(cause, NULL);
|
|
if (fixed_cause == NULL)
|
|
goto bad;
|
|
} else if (PyExceptionInstance_Check(cause)) {
|
|
fixed_cause = cause;
|
|
Py_INCREF(fixed_cause);
|
|
} else {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"exception causes must derive from "
|
|
"BaseException");
|
|
goto bad;
|
|
}
|
|
PyException_SetCause(value, fixed_cause);
|
|
}
|
|
PyErr_SetObject(type, value);
|
|
if (tb) {
|
|
PyThreadState *tstate = PyThreadState_GET();
|
|
PyObject* tmp_tb = tstate->curexc_traceback;
|
|
if (tb != tmp_tb) {
|
|
Py_INCREF(tb);
|
|
tstate->curexc_traceback = tb;
|
|
Py_XDECREF(tmp_tb);
|
|
}
|
|
}
|
|
bad:
|
|
Py_XDECREF(owned_instance);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring, Py_ssize_t i,
|
|
int wraparound, int boundscheck) {
|
|
Py_ssize_t length;
|
|
#if CYTHON_PEP393_ENABLED
|
|
if (unlikely(__Pyx_PyUnicode_READY(ustring) < 0)) return (Py_UCS4)-1;
|
|
#endif
|
|
if (wraparound | boundscheck) {
|
|
length = __Pyx_PyUnicode_GET_LENGTH(ustring);
|
|
if (wraparound & unlikely(i < 0)) i += length;
|
|
if ((!boundscheck) || likely((0 <= i) & (i < length))) {
|
|
return __Pyx_PyUnicode_READ_CHAR(ustring, i);
|
|
} else {
|
|
PyErr_SetString(PyExc_IndexError, "string index out of range");
|
|
return (Py_UCS4)-1;
|
|
}
|
|
} else {
|
|
return __Pyx_PyUnicode_READ_CHAR(ustring, i);
|
|
}
|
|
}
|
|
|
|
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
|
|
PyObject* text, Py_ssize_t start, Py_ssize_t stop) {
|
|
Py_ssize_t length;
|
|
#if CYTHON_PEP393_ENABLED
|
|
if (unlikely(PyUnicode_READY(text) == -1)) return NULL;
|
|
length = PyUnicode_GET_LENGTH(text);
|
|
#else
|
|
length = PyUnicode_GET_SIZE(text);
|
|
#endif
|
|
if (start < 0) {
|
|
start += length;
|
|
if (start < 0)
|
|
start = 0;
|
|
}
|
|
if (stop < 0)
|
|
stop += length;
|
|
else if (stop > length)
|
|
stop = length;
|
|
length = stop - start;
|
|
if (length <= 0)
|
|
return PyUnicode_FromUnicode(NULL, 0);
|
|
#if CYTHON_PEP393_ENABLED
|
|
return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
|
|
PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start);
|
|
#else
|
|
return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start);
|
|
#endif
|
|
}
|
|
|
|
static CYTHON_INLINE int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v) {
|
|
int r;
|
|
if (!j) return -1;
|
|
r = PyObject_SetItem(o, j, v);
|
|
Py_DECREF(j);
|
|
return r;
|
|
}
|
|
static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v,
|
|
int is_list, int wraparound, int boundscheck) {
|
|
#if CYTHON_COMPILING_IN_CPYTHON
|
|
if (is_list || PyList_CheckExact(o)) {
|
|
Py_ssize_t n = (!wraparound) ? i : ((likely(i >= 0)) ? i : i + PyList_GET_SIZE(o));
|
|
if ((!boundscheck) || likely((n >= 0) & (n < PyList_GET_SIZE(o)))) {
|
|
PyObject* old = PyList_GET_ITEM(o, n);
|
|
Py_INCREF(v);
|
|
PyList_SET_ITEM(o, n, v);
|
|
Py_DECREF(old);
|
|
return 1;
|
|
}
|
|
} else {
|
|
PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence;
|
|
if (likely(m && m->sq_ass_item)) {
|
|
if (wraparound && unlikely(i < 0) && likely(m->sq_length)) {
|
|
Py_ssize_t l = m->sq_length(o);
|
|
if (likely(l >= 0)) {
|
|
i += l;
|
|
} else {
|
|
if (PyErr_ExceptionMatches(PyExc_OverflowError))
|
|
PyErr_Clear();
|
|
else
|
|
return -1;
|
|
}
|
|
}
|
|
return m->sq_ass_item(o, i, v);
|
|
}
|
|
}
|
|
#else
|
|
#if CYTHON_COMPILING_IN_PYPY
|
|
if (is_list || (PySequence_Check(o) && !PyDict_Check(o))) {
|
|
#else
|
|
if (is_list || PySequence_Check(o)) {
|
|
#endif
|
|
return PySequence_SetItem(o, i, v);
|
|
}
|
|
#endif
|
|
return __Pyx_SetItemInt_Generic(o, PyInt_FromSsize_t(i), v);
|
|
}
|
|
|
|
static void __Pyx_WriteUnraisable(const char *name, CYTHON_UNUSED int clineno,
|
|
CYTHON_UNUSED int lineno, CYTHON_UNUSED const char *filename,
|
|
int full_traceback) {
|
|
PyObject *old_exc, *old_val, *old_tb;
|
|
PyObject *ctx;
|
|
__Pyx_ErrFetch(&old_exc, &old_val, &old_tb);
|
|
if (full_traceback) {
|
|
Py_XINCREF(old_exc);
|
|
Py_XINCREF(old_val);
|
|
Py_XINCREF(old_tb);
|
|
__Pyx_ErrRestore(old_exc, old_val, old_tb);
|
|
PyErr_PrintEx(1);
|
|
}
|
|
#if PY_MAJOR_VERSION < 3
|
|
ctx = PyString_FromString(name);
|
|
#else
|
|
ctx = PyUnicode_FromString(name);
|
|
#endif
|
|
__Pyx_ErrRestore(old_exc, old_val, old_tb);
|
|
if (!ctx) {
|
|
PyErr_WriteUnraisable(Py_None);
|
|
} else {
|
|
PyErr_WriteUnraisable(ctx);
|
|
Py_DECREF(ctx);
|
|
}
|
|
}
|
|
|
|
static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) {
|
|
PyObject* value = __Pyx_PyObject_GetAttrStr(module, name);
|
|
if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) {
|
|
PyErr_Format(PyExc_ImportError,
|
|
#if PY_MAJOR_VERSION < 3
|
|
"cannot import name %.230s", PyString_AS_STRING(name));
|
|
#else
|
|
"cannot import name %S", name);
|
|
#endif
|
|
}
|
|
return value;
|
|
}
|
|
|
|
static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) {
|
|
PyObject *empty_list = 0;
|
|
PyObject *module = 0;
|
|
PyObject *global_dict = 0;
|
|
PyObject *empty_dict = 0;
|
|
PyObject *list;
|
|
#if PY_VERSION_HEX < 0x03030000
|
|
PyObject *py_import;
|
|
py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import);
|
|
if (!py_import)
|
|
goto bad;
|
|
#endif
|
|
if (from_list)
|
|
list = from_list;
|
|
else {
|
|
empty_list = PyList_New(0);
|
|
if (!empty_list)
|
|
goto bad;
|
|
list = empty_list;
|
|
}
|
|
global_dict = PyModule_GetDict(__pyx_m);
|
|
if (!global_dict)
|
|
goto bad;
|
|
empty_dict = PyDict_New();
|
|
if (!empty_dict)
|
|
goto bad;
|
|
#if PY_VERSION_HEX >= 0x02050000
|
|
{
|
|
#if PY_MAJOR_VERSION >= 3
|
|
if (level == -1) {
|
|
if (strchr(__Pyx_MODULE_NAME, '.')) {
|
|
#if PY_VERSION_HEX < 0x03030000
|
|
PyObject *py_level = PyInt_FromLong(1);
|
|
if (!py_level)
|
|
goto bad;
|
|
module = PyObject_CallFunctionObjArgs(py_import,
|
|
name, global_dict, empty_dict, list, py_level, NULL);
|
|
Py_DECREF(py_level);
|
|
#else
|
|
module = PyImport_ImportModuleLevelObject(
|
|
name, global_dict, empty_dict, list, 1);
|
|
#endif
|
|
if (!module) {
|
|
if (!PyErr_ExceptionMatches(PyExc_ImportError))
|
|
goto bad;
|
|
PyErr_Clear();
|
|
}
|
|
}
|
|
level = 0; /* try absolute import on failure */
|
|
}
|
|
#endif
|
|
if (!module) {
|
|
#if PY_VERSION_HEX < 0x03030000
|
|
PyObject *py_level = PyInt_FromLong(level);
|
|
if (!py_level)
|
|
goto bad;
|
|
module = PyObject_CallFunctionObjArgs(py_import,
|
|
name, global_dict, empty_dict, list, py_level, NULL);
|
|
Py_DECREF(py_level);
|
|
#else
|
|
module = PyImport_ImportModuleLevelObject(
|
|
name, global_dict, empty_dict, list, level);
|
|
#endif
|
|
}
|
|
}
|
|
#else
|
|
if (level>0) {
|
|
PyErr_SetString(PyExc_RuntimeError, "Relative import is not supported for Python <=2.4.");
|
|
goto bad;
|
|
}
|
|
module = PyObject_CallFunctionObjArgs(py_import,
|
|
name, global_dict, empty_dict, list, NULL);
|
|
#endif
|
|
bad:
|
|
#if PY_VERSION_HEX < 0x03030000
|
|
Py_XDECREF(py_import);
|
|
#endif
|
|
Py_XDECREF(empty_list);
|
|
Py_XDECREF(empty_dict);
|
|
return module;
|
|
}
|
|
|
|
#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func) \
|
|
{ \
|
|
func_type value = func(x); \
|
|
if (sizeof(target_type) < sizeof(func_type)) { \
|
|
if (unlikely(value != (func_type) (target_type) value)) { \
|
|
func_type zero = 0; \
|
|
PyErr_SetString(PyExc_OverflowError, \
|
|
(is_unsigned && unlikely(value < zero)) ? \
|
|
"can't convert negative value to " #target_type : \
|
|
"value too large to convert to " #target_type); \
|
|
return (target_type) -1; \
|
|
} \
|
|
} \
|
|
return (target_type) value; \
|
|
}
|
|
|
|
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
|
|
#if CYTHON_USE_PYLONG_INTERNALS
|
|
#include "longintrepr.h"
|
|
#endif
|
|
#endif
|
|
static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) {
|
|
const int neg_one = (int) -1, const_zero = 0;
|
|
const int is_unsigned = neg_one > const_zero;
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (likely(PyInt_Check(x))) {
|
|
if (sizeof(int) < sizeof(long)) {
|
|
__PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG)
|
|
} else {
|
|
long val = PyInt_AS_LONG(x);
|
|
if (is_unsigned && unlikely(val < 0)) {
|
|
PyErr_SetString(PyExc_OverflowError,
|
|
"can't convert negative value to int");
|
|
return (int) -1;
|
|
}
|
|
return (int) val;
|
|
}
|
|
} else
|
|
#endif
|
|
if (likely(PyLong_Check(x))) {
|
|
if (is_unsigned) {
|
|
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
|
|
#if CYTHON_USE_PYLONG_INTERNALS
|
|
if (sizeof(digit) <= sizeof(int)) {
|
|
switch (Py_SIZE(x)) {
|
|
case 0: return 0;
|
|
case 1: return (int) ((PyLongObject*)x)->ob_digit[0];
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
if (unlikely(Py_SIZE(x) < 0)) {
|
|
PyErr_SetString(PyExc_OverflowError,
|
|
"can't convert negative value to int");
|
|
return (int) -1;
|
|
}
|
|
if (sizeof(int) <= sizeof(unsigned long)) {
|
|
__PYX_VERIFY_RETURN_INT(int, unsigned long, PyLong_AsUnsignedLong)
|
|
} else if (sizeof(int) <= sizeof(unsigned long long)) {
|
|
__PYX_VERIFY_RETURN_INT(int, unsigned long long, PyLong_AsUnsignedLongLong)
|
|
}
|
|
} else {
|
|
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
|
|
#if CYTHON_USE_PYLONG_INTERNALS
|
|
if (sizeof(digit) <= sizeof(int)) {
|
|
switch (Py_SIZE(x)) {
|
|
case 0: return 0;
|
|
case 1: return +(int) ((PyLongObject*)x)->ob_digit[0];
|
|
case -1: return -(int) ((PyLongObject*)x)->ob_digit[0];
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
if (sizeof(int) <= sizeof(long)) {
|
|
__PYX_VERIFY_RETURN_INT(int, long, PyLong_AsLong)
|
|
} else if (sizeof(int) <= sizeof(long long)) {
|
|
__PYX_VERIFY_RETURN_INT(int, long long, PyLong_AsLongLong)
|
|
}
|
|
}
|
|
{
|
|
#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
|
|
PyErr_SetString(PyExc_RuntimeError,
|
|
"_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
|
|
#else
|
|
int val;
|
|
PyObject *v = __Pyx_PyNumber_Int(x);
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (likely(v) && !PyLong_Check(v)) {
|
|
PyObject *tmp = v;
|
|
v = PyNumber_Long(tmp);
|
|
Py_DECREF(tmp);
|
|
}
|
|
#endif
|
|
if (likely(v)) {
|
|
int one = 1; int is_little = (int)*(unsigned char *)&one;
|
|
unsigned char *bytes = (unsigned char *)&val;
|
|
int ret = _PyLong_AsByteArray((PyLongObject *)v,
|
|
bytes, sizeof(val),
|
|
is_little, !is_unsigned);
|
|
Py_DECREF(v);
|
|
if (likely(!ret))
|
|
return val;
|
|
}
|
|
#endif
|
|
return (int) -1;
|
|
}
|
|
} else {
|
|
int val;
|
|
PyObject *tmp = __Pyx_PyNumber_Int(x);
|
|
if (!tmp) return (int) -1;
|
|
val = __Pyx_PyInt_As_int(tmp);
|
|
Py_DECREF(tmp);
|
|
return val;
|
|
}
|
|
}
|
|
|
|
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
|
|
#if CYTHON_USE_PYLONG_INTERNALS
|
|
#include "longintrepr.h"
|
|
#endif
|
|
#endif
|
|
static CYTHON_INLINE uint64_t __Pyx_PyInt_As_uint64_t(PyObject *x) {
|
|
const uint64_t neg_one = (uint64_t) -1, const_zero = 0;
|
|
const int is_unsigned = neg_one > const_zero;
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (likely(PyInt_Check(x))) {
|
|
if (sizeof(uint64_t) < sizeof(long)) {
|
|
__PYX_VERIFY_RETURN_INT(uint64_t, long, PyInt_AS_LONG)
|
|
} else {
|
|
long val = PyInt_AS_LONG(x);
|
|
if (is_unsigned && unlikely(val < 0)) {
|
|
PyErr_SetString(PyExc_OverflowError,
|
|
"can't convert negative value to uint64_t");
|
|
return (uint64_t) -1;
|
|
}
|
|
return (uint64_t) val;
|
|
}
|
|
} else
|
|
#endif
|
|
if (likely(PyLong_Check(x))) {
|
|
if (is_unsigned) {
|
|
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
|
|
#if CYTHON_USE_PYLONG_INTERNALS
|
|
if (sizeof(digit) <= sizeof(uint64_t)) {
|
|
switch (Py_SIZE(x)) {
|
|
case 0: return 0;
|
|
case 1: return (uint64_t) ((PyLongObject*)x)->ob_digit[0];
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
if (unlikely(Py_SIZE(x) < 0)) {
|
|
PyErr_SetString(PyExc_OverflowError,
|
|
"can't convert negative value to uint64_t");
|
|
return (uint64_t) -1;
|
|
}
|
|
if (sizeof(uint64_t) <= sizeof(unsigned long)) {
|
|
__PYX_VERIFY_RETURN_INT(uint64_t, unsigned long, PyLong_AsUnsignedLong)
|
|
} else if (sizeof(uint64_t) <= sizeof(unsigned long long)) {
|
|
__PYX_VERIFY_RETURN_INT(uint64_t, unsigned long long, PyLong_AsUnsignedLongLong)
|
|
}
|
|
} else {
|
|
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
|
|
#if CYTHON_USE_PYLONG_INTERNALS
|
|
if (sizeof(digit) <= sizeof(uint64_t)) {
|
|
switch (Py_SIZE(x)) {
|
|
case 0: return 0;
|
|
case 1: return +(uint64_t) ((PyLongObject*)x)->ob_digit[0];
|
|
case -1: return -(uint64_t) ((PyLongObject*)x)->ob_digit[0];
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
if (sizeof(uint64_t) <= sizeof(long)) {
|
|
__PYX_VERIFY_RETURN_INT(uint64_t, long, PyLong_AsLong)
|
|
} else if (sizeof(uint64_t) <= sizeof(long long)) {
|
|
__PYX_VERIFY_RETURN_INT(uint64_t, long long, PyLong_AsLongLong)
|
|
}
|
|
}
|
|
{
|
|
#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
|
|
PyErr_SetString(PyExc_RuntimeError,
|
|
"_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
|
|
#else
|
|
uint64_t val;
|
|
PyObject *v = __Pyx_PyNumber_Int(x);
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (likely(v) && !PyLong_Check(v)) {
|
|
PyObject *tmp = v;
|
|
v = PyNumber_Long(tmp);
|
|
Py_DECREF(tmp);
|
|
}
|
|
#endif
|
|
if (likely(v)) {
|
|
int one = 1; int is_little = (int)*(unsigned char *)&one;
|
|
unsigned char *bytes = (unsigned char *)&val;
|
|
int ret = _PyLong_AsByteArray((PyLongObject *)v,
|
|
bytes, sizeof(val),
|
|
is_little, !is_unsigned);
|
|
Py_DECREF(v);
|
|
if (likely(!ret))
|
|
return val;
|
|
}
|
|
#endif
|
|
return (uint64_t) -1;
|
|
}
|
|
} else {
|
|
uint64_t val;
|
|
PyObject *tmp = __Pyx_PyNumber_Int(x);
|
|
if (!tmp) return (uint64_t) -1;
|
|
val = __Pyx_PyInt_As_uint64_t(tmp);
|
|
Py_DECREF(tmp);
|
|
return val;
|
|
}
|
|
}
|
|
|
|
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
|
|
#if CYTHON_USE_PYLONG_INTERNALS
|
|
#include "longintrepr.h"
|
|
#endif
|
|
#endif
|
|
static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *x) {
|
|
const size_t neg_one = (size_t) -1, const_zero = 0;
|
|
const int is_unsigned = neg_one > const_zero;
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (likely(PyInt_Check(x))) {
|
|
if (sizeof(size_t) < sizeof(long)) {
|
|
__PYX_VERIFY_RETURN_INT(size_t, long, PyInt_AS_LONG)
|
|
} else {
|
|
long val = PyInt_AS_LONG(x);
|
|
if (is_unsigned && unlikely(val < 0)) {
|
|
PyErr_SetString(PyExc_OverflowError,
|
|
"can't convert negative value to size_t");
|
|
return (size_t) -1;
|
|
}
|
|
return (size_t) val;
|
|
}
|
|
} else
|
|
#endif
|
|
if (likely(PyLong_Check(x))) {
|
|
if (is_unsigned) {
|
|
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
|
|
#if CYTHON_USE_PYLONG_INTERNALS
|
|
if (sizeof(digit) <= sizeof(size_t)) {
|
|
switch (Py_SIZE(x)) {
|
|
case 0: return 0;
|
|
case 1: return (size_t) ((PyLongObject*)x)->ob_digit[0];
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
if (unlikely(Py_SIZE(x) < 0)) {
|
|
PyErr_SetString(PyExc_OverflowError,
|
|
"can't convert negative value to size_t");
|
|
return (size_t) -1;
|
|
}
|
|
if (sizeof(size_t) <= sizeof(unsigned long)) {
|
|
__PYX_VERIFY_RETURN_INT(size_t, unsigned long, PyLong_AsUnsignedLong)
|
|
} else if (sizeof(size_t) <= sizeof(unsigned long long)) {
|
|
__PYX_VERIFY_RETURN_INT(size_t, unsigned long long, PyLong_AsUnsignedLongLong)
|
|
}
|
|
} else {
|
|
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
|
|
#if CYTHON_USE_PYLONG_INTERNALS
|
|
if (sizeof(digit) <= sizeof(size_t)) {
|
|
switch (Py_SIZE(x)) {
|
|
case 0: return 0;
|
|
case 1: return +(size_t) ((PyLongObject*)x)->ob_digit[0];
|
|
case -1: return -(size_t) ((PyLongObject*)x)->ob_digit[0];
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
if (sizeof(size_t) <= sizeof(long)) {
|
|
__PYX_VERIFY_RETURN_INT(size_t, long, PyLong_AsLong)
|
|
} else if (sizeof(size_t) <= sizeof(long long)) {
|
|
__PYX_VERIFY_RETURN_INT(size_t, long long, PyLong_AsLongLong)
|
|
}
|
|
}
|
|
{
|
|
#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
|
|
PyErr_SetString(PyExc_RuntimeError,
|
|
"_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
|
|
#else
|
|
size_t val;
|
|
PyObject *v = __Pyx_PyNumber_Int(x);
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (likely(v) && !PyLong_Check(v)) {
|
|
PyObject *tmp = v;
|
|
v = PyNumber_Long(tmp);
|
|
Py_DECREF(tmp);
|
|
}
|
|
#endif
|
|
if (likely(v)) {
|
|
int one = 1; int is_little = (int)*(unsigned char *)&one;
|
|
unsigned char *bytes = (unsigned char *)&val;
|
|
int ret = _PyLong_AsByteArray((PyLongObject *)v,
|
|
bytes, sizeof(val),
|
|
is_little, !is_unsigned);
|
|
Py_DECREF(v);
|
|
if (likely(!ret))
|
|
return val;
|
|
}
|
|
#endif
|
|
return (size_t) -1;
|
|
}
|
|
} else {
|
|
size_t val;
|
|
PyObject *tmp = __Pyx_PyNumber_Int(x);
|
|
if (!tmp) return (size_t) -1;
|
|
val = __Pyx_PyInt_As_size_t(tmp);
|
|
Py_DECREF(tmp);
|
|
return val;
|
|
}
|
|
}
|
|
|
|
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_uint64_t(uint64_t value) {
|
|
const uint64_t neg_one = (uint64_t) -1, const_zero = 0;
|
|
const int is_unsigned = neg_one > const_zero;
|
|
if (is_unsigned) {
|
|
if (sizeof(uint64_t) < sizeof(long)) {
|
|
return PyInt_FromLong((long) value);
|
|
} else if (sizeof(uint64_t) <= sizeof(unsigned long)) {
|
|
return PyLong_FromUnsignedLong((unsigned long) value);
|
|
} else if (sizeof(uint64_t) <= sizeof(unsigned long long)) {
|
|
return PyLong_FromUnsignedLongLong((unsigned long long) value);
|
|
}
|
|
} else {
|
|
if (sizeof(uint64_t) <= sizeof(long)) {
|
|
return PyInt_FromLong((long) value);
|
|
} else if (sizeof(uint64_t) <= sizeof(long long)) {
|
|
return PyLong_FromLongLong((long long) value);
|
|
}
|
|
}
|
|
{
|
|
int one = 1; int little = (int)*(unsigned char *)&one;
|
|
unsigned char *bytes = (unsigned char *)&value;
|
|
return _PyLong_FromByteArray(bytes, sizeof(uint64_t),
|
|
little, !is_unsigned);
|
|
}
|
|
}
|
|
|
|
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
|
|
const long neg_one = (long) -1, const_zero = 0;
|
|
const int is_unsigned = neg_one > const_zero;
|
|
if (is_unsigned) {
|
|
if (sizeof(long) < sizeof(long)) {
|
|
return PyInt_FromLong((long) value);
|
|
} else if (sizeof(long) <= sizeof(unsigned long)) {
|
|
return PyLong_FromUnsignedLong((unsigned long) value);
|
|
} else if (sizeof(long) <= sizeof(unsigned long long)) {
|
|
return PyLong_FromUnsignedLongLong((unsigned long long) value);
|
|
}
|
|
} else {
|
|
if (sizeof(long) <= sizeof(long)) {
|
|
return PyInt_FromLong((long) value);
|
|
} else if (sizeof(long) <= sizeof(long long)) {
|
|
return PyLong_FromLongLong((long long) value);
|
|
}
|
|
}
|
|
{
|
|
int one = 1; int little = (int)*(unsigned char *)&one;
|
|
unsigned char *bytes = (unsigned char *)&value;
|
|
return _PyLong_FromByteArray(bytes, sizeof(long),
|
|
little, !is_unsigned);
|
|
}
|
|
}
|
|
|
|
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
|
|
#if CYTHON_USE_PYLONG_INTERNALS
|
|
#include "longintrepr.h"
|
|
#endif
|
|
#endif
|
|
static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) {
|
|
const long neg_one = (long) -1, const_zero = 0;
|
|
const int is_unsigned = neg_one > const_zero;
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (likely(PyInt_Check(x))) {
|
|
if (sizeof(long) < sizeof(long)) {
|
|
__PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG)
|
|
} else {
|
|
long val = PyInt_AS_LONG(x);
|
|
if (is_unsigned && unlikely(val < 0)) {
|
|
PyErr_SetString(PyExc_OverflowError,
|
|
"can't convert negative value to long");
|
|
return (long) -1;
|
|
}
|
|
return (long) val;
|
|
}
|
|
} else
|
|
#endif
|
|
if (likely(PyLong_Check(x))) {
|
|
if (is_unsigned) {
|
|
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
|
|
#if CYTHON_USE_PYLONG_INTERNALS
|
|
if (sizeof(digit) <= sizeof(long)) {
|
|
switch (Py_SIZE(x)) {
|
|
case 0: return 0;
|
|
case 1: return (long) ((PyLongObject*)x)->ob_digit[0];
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
if (unlikely(Py_SIZE(x) < 0)) {
|
|
PyErr_SetString(PyExc_OverflowError,
|
|
"can't convert negative value to long");
|
|
return (long) -1;
|
|
}
|
|
if (sizeof(long) <= sizeof(unsigned long)) {
|
|
__PYX_VERIFY_RETURN_INT(long, unsigned long, PyLong_AsUnsignedLong)
|
|
} else if (sizeof(long) <= sizeof(unsigned long long)) {
|
|
__PYX_VERIFY_RETURN_INT(long, unsigned long long, PyLong_AsUnsignedLongLong)
|
|
}
|
|
} else {
|
|
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
|
|
#if CYTHON_USE_PYLONG_INTERNALS
|
|
if (sizeof(digit) <= sizeof(long)) {
|
|
switch (Py_SIZE(x)) {
|
|
case 0: return 0;
|
|
case 1: return +(long) ((PyLongObject*)x)->ob_digit[0];
|
|
case -1: return -(long) ((PyLongObject*)x)->ob_digit[0];
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
if (sizeof(long) <= sizeof(long)) {
|
|
__PYX_VERIFY_RETURN_INT(long, long, PyLong_AsLong)
|
|
} else if (sizeof(long) <= sizeof(long long)) {
|
|
__PYX_VERIFY_RETURN_INT(long, long long, PyLong_AsLongLong)
|
|
}
|
|
}
|
|
{
|
|
#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
|
|
PyErr_SetString(PyExc_RuntimeError,
|
|
"_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
|
|
#else
|
|
long val;
|
|
PyObject *v = __Pyx_PyNumber_Int(x);
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (likely(v) && !PyLong_Check(v)) {
|
|
PyObject *tmp = v;
|
|
v = PyNumber_Long(tmp);
|
|
Py_DECREF(tmp);
|
|
}
|
|
#endif
|
|
if (likely(v)) {
|
|
int one = 1; int is_little = (int)*(unsigned char *)&one;
|
|
unsigned char *bytes = (unsigned char *)&val;
|
|
int ret = _PyLong_AsByteArray((PyLongObject *)v,
|
|
bytes, sizeof(val),
|
|
is_little, !is_unsigned);
|
|
Py_DECREF(v);
|
|
if (likely(!ret))
|
|
return val;
|
|
}
|
|
#endif
|
|
return (long) -1;
|
|
}
|
|
} else {
|
|
long val;
|
|
PyObject *tmp = __Pyx_PyNumber_Int(x);
|
|
if (!tmp) return (long) -1;
|
|
val = __Pyx_PyInt_As_long(tmp);
|
|
Py_DECREF(tmp);
|
|
return val;
|
|
}
|
|
}
|
|
|
|
static int __Pyx_check_binary_version(void) {
|
|
char ctversion[4], rtversion[4];
|
|
PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION);
|
|
PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion());
|
|
if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) {
|
|
char message[200];
|
|
PyOS_snprintf(message, sizeof(message),
|
|
"compiletime version %s of module '%.100s' "
|
|
"does not match runtime version %s",
|
|
ctversion, __Pyx_MODULE_NAME, rtversion);
|
|
#if PY_VERSION_HEX < 0x02050000
|
|
return PyErr_Warn(NULL, message);
|
|
#else
|
|
return PyErr_WarnEx(NULL, message, 1);
|
|
#endif
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int __Pyx_ExportVoidPtr(PyObject *name, void *p, const char *sig) {
|
|
PyObject *d;
|
|
PyObject *cobj = 0;
|
|
d = PyDict_GetItem(__pyx_d, __pyx_n_s_pyx_capi);
|
|
Py_XINCREF(d);
|
|
if (!d) {
|
|
d = PyDict_New();
|
|
if (!d)
|
|
goto bad;
|
|
if (__Pyx_PyObject_SetAttrStr(__pyx_m, __pyx_n_s_pyx_capi, d) < 0)
|
|
goto bad;
|
|
}
|
|
#if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION==3 && PY_MINOR_VERSION==0)
|
|
cobj = PyCapsule_New(p, sig, 0);
|
|
#else
|
|
cobj = PyCObject_FromVoidPtrAndDesc(p, (void *)sig, 0);
|
|
#endif
|
|
if (!cobj)
|
|
goto bad;
|
|
if (PyDict_SetItem(d, name, cobj) < 0)
|
|
goto bad;
|
|
Py_DECREF(cobj);
|
|
Py_DECREF(d);
|
|
return 0;
|
|
bad:
|
|
Py_XDECREF(cobj);
|
|
Py_XDECREF(d);
|
|
return -1;
|
|
}
|
|
|
|
static int __Pyx_ExportFunction(const char *name, void (*f)(void), const char *sig) {
|
|
PyObject *d = 0;
|
|
PyObject *cobj = 0;
|
|
union {
|
|
void (*fp)(void);
|
|
void *p;
|
|
} tmp;
|
|
d = PyObject_GetAttrString(__pyx_m, (char *)"__pyx_capi__");
|
|
if (!d) {
|
|
PyErr_Clear();
|
|
d = PyDict_New();
|
|
if (!d)
|
|
goto bad;
|
|
Py_INCREF(d);
|
|
if (PyModule_AddObject(__pyx_m, (char *)"__pyx_capi__", d) < 0)
|
|
goto bad;
|
|
}
|
|
tmp.fp = f;
|
|
#if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION==3&&PY_MINOR_VERSION==0)
|
|
cobj = PyCapsule_New(tmp.p, sig, 0);
|
|
#else
|
|
cobj = PyCObject_FromVoidPtrAndDesc(tmp.p, (void *)sig, 0);
|
|
#endif
|
|
if (!cobj)
|
|
goto bad;
|
|
if (PyDict_SetItemString(d, name, cobj) < 0)
|
|
goto bad;
|
|
Py_DECREF(cobj);
|
|
Py_DECREF(d);
|
|
return 0;
|
|
bad:
|
|
Py_XDECREF(cobj);
|
|
Py_XDECREF(d);
|
|
return -1;
|
|
}
|
|
|
|
static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) {
|
|
int start = 0, mid = 0, end = count - 1;
|
|
if (end >= 0 && code_line > entries[end].code_line) {
|
|
return count;
|
|
}
|
|
while (start < end) {
|
|
mid = (start + end) / 2;
|
|
if (code_line < entries[mid].code_line) {
|
|
end = mid;
|
|
} else if (code_line > entries[mid].code_line) {
|
|
start = mid + 1;
|
|
} else {
|
|
return mid;
|
|
}
|
|
}
|
|
if (code_line <= entries[mid].code_line) {
|
|
return mid;
|
|
} else {
|
|
return mid + 1;
|
|
}
|
|
}
|
|
static PyCodeObject *__pyx_find_code_object(int code_line) {
|
|
PyCodeObject* code_object;
|
|
int pos;
|
|
if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) {
|
|
return NULL;
|
|
}
|
|
pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
|
|
if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) {
|
|
return NULL;
|
|
}
|
|
code_object = __pyx_code_cache.entries[pos].code_object;
|
|
Py_INCREF(code_object);
|
|
return code_object;
|
|
}
|
|
static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
|
|
int pos, i;
|
|
__Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries;
|
|
if (unlikely(!code_line)) {
|
|
return;
|
|
}
|
|
if (unlikely(!entries)) {
|
|
entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry));
|
|
if (likely(entries)) {
|
|
__pyx_code_cache.entries = entries;
|
|
__pyx_code_cache.max_count = 64;
|
|
__pyx_code_cache.count = 1;
|
|
entries[0].code_line = code_line;
|
|
entries[0].code_object = code_object;
|
|
Py_INCREF(code_object);
|
|
}
|
|
return;
|
|
}
|
|
pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
|
|
if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) {
|
|
PyCodeObject* tmp = entries[pos].code_object;
|
|
entries[pos].code_object = code_object;
|
|
Py_DECREF(tmp);
|
|
return;
|
|
}
|
|
if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
|
|
int new_max = __pyx_code_cache.max_count + 64;
|
|
entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
|
|
__pyx_code_cache.entries, new_max*sizeof(__Pyx_CodeObjectCacheEntry));
|
|
if (unlikely(!entries)) {
|
|
return;
|
|
}
|
|
__pyx_code_cache.entries = entries;
|
|
__pyx_code_cache.max_count = new_max;
|
|
}
|
|
for (i=__pyx_code_cache.count; i>pos; i--) {
|
|
entries[i] = entries[i-1];
|
|
}
|
|
entries[pos].code_line = code_line;
|
|
entries[pos].code_object = code_object;
|
|
__pyx_code_cache.count++;
|
|
Py_INCREF(code_object);
|
|
}
|
|
|
|
#include "compile.h"
|
|
#include "frameobject.h"
|
|
#include "traceback.h"
|
|
static PyCodeObject* __Pyx_CreateCodeObjectForTraceback(
|
|
const char *funcname, int c_line,
|
|
int py_line, const char *filename) {
|
|
PyCodeObject *py_code = 0;
|
|
PyObject *py_srcfile = 0;
|
|
PyObject *py_funcname = 0;
|
|
#if PY_MAJOR_VERSION < 3
|
|
py_srcfile = PyString_FromString(filename);
|
|
#else
|
|
py_srcfile = PyUnicode_FromString(filename);
|
|
#endif
|
|
if (!py_srcfile) goto bad;
|
|
if (c_line) {
|
|
#if PY_MAJOR_VERSION < 3
|
|
py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
|
|
#else
|
|
py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
|
|
#endif
|
|
}
|
|
else {
|
|
#if PY_MAJOR_VERSION < 3
|
|
py_funcname = PyString_FromString(funcname);
|
|
#else
|
|
py_funcname = PyUnicode_FromString(funcname);
|
|
#endif
|
|
}
|
|
if (!py_funcname) goto bad;
|
|
py_code = __Pyx_PyCode_New(
|
|
0, /*int argcount,*/
|
|
0, /*int kwonlyargcount,*/
|
|
0, /*int nlocals,*/
|
|
0, /*int stacksize,*/
|
|
0, /*int flags,*/
|
|
__pyx_empty_bytes, /*PyObject *code,*/
|
|
__pyx_empty_tuple, /*PyObject *consts,*/
|
|
__pyx_empty_tuple, /*PyObject *names,*/
|
|
__pyx_empty_tuple, /*PyObject *varnames,*/
|
|
__pyx_empty_tuple, /*PyObject *freevars,*/
|
|
__pyx_empty_tuple, /*PyObject *cellvars,*/
|
|
py_srcfile, /*PyObject *filename,*/
|
|
py_funcname, /*PyObject *name,*/
|
|
py_line, /*int firstlineno,*/
|
|
__pyx_empty_bytes /*PyObject *lnotab*/
|
|
);
|
|
Py_DECREF(py_srcfile);
|
|
Py_DECREF(py_funcname);
|
|
return py_code;
|
|
bad:
|
|
Py_XDECREF(py_srcfile);
|
|
Py_XDECREF(py_funcname);
|
|
return NULL;
|
|
}
|
|
static void __Pyx_AddTraceback(const char *funcname, int c_line,
|
|
int py_line, const char *filename) {
|
|
PyCodeObject *py_code = 0;
|
|
PyObject *py_globals = 0;
|
|
PyFrameObject *py_frame = 0;
|
|
py_code = __pyx_find_code_object(c_line ? c_line : py_line);
|
|
if (!py_code) {
|
|
py_code = __Pyx_CreateCodeObjectForTraceback(
|
|
funcname, c_line, py_line, filename);
|
|
if (!py_code) goto bad;
|
|
__pyx_insert_code_object(c_line ? c_line : py_line, py_code);
|
|
}
|
|
py_globals = PyModule_GetDict(__pyx_m);
|
|
if (!py_globals) goto bad;
|
|
py_frame = PyFrame_New(
|
|
PyThreadState_GET(), /*PyThreadState *tstate,*/
|
|
py_code, /*PyCodeObject *code,*/
|
|
py_globals, /*PyObject *globals,*/
|
|
0 /*PyObject *locals*/
|
|
);
|
|
if (!py_frame) goto bad;
|
|
py_frame->f_lineno = py_line;
|
|
PyTraceBack_Here(py_frame);
|
|
bad:
|
|
Py_XDECREF(py_code);
|
|
Py_XDECREF(py_frame);
|
|
}
|
|
|
|
static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
|
|
while (t->p) {
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (t->is_unicode) {
|
|
*t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
|
|
} else if (t->intern) {
|
|
*t->p = PyString_InternFromString(t->s);
|
|
} else {
|
|
*t->p = PyString_FromStringAndSize(t->s, t->n - 1);
|
|
}
|
|
#else /* Python 3+ has unicode identifiers */
|
|
if (t->is_unicode | t->is_str) {
|
|
if (t->intern) {
|
|
*t->p = PyUnicode_InternFromString(t->s);
|
|
} else if (t->encoding) {
|
|
*t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);
|
|
} else {
|
|
*t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
|
|
}
|
|
} else {
|
|
*t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);
|
|
}
|
|
#endif
|
|
if (!*t->p)
|
|
return -1;
|
|
++t;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char* c_str) {
|
|
return __Pyx_PyUnicode_FromStringAndSize(c_str, strlen(c_str));
|
|
}
|
|
static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) {
|
|
Py_ssize_t ignore;
|
|
return __Pyx_PyObject_AsStringAndSize(o, &ignore);
|
|
}
|
|
static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
|
|
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
|
|
if (
|
|
#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
|
|
__Pyx_sys_getdefaultencoding_not_ascii &&
|
|
#endif
|
|
PyUnicode_Check(o)) {
|
|
#if PY_VERSION_HEX < 0x03030000
|
|
char* defenc_c;
|
|
PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL);
|
|
if (!defenc) return NULL;
|
|
defenc_c = PyBytes_AS_STRING(defenc);
|
|
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
|
|
{
|
|
char* end = defenc_c + PyBytes_GET_SIZE(defenc);
|
|
char* c;
|
|
for (c = defenc_c; c < end; c++) {
|
|
if ((unsigned char) (*c) >= 128) {
|
|
PyUnicode_AsASCIIString(o);
|
|
return NULL;
|
|
}
|
|
}
|
|
}
|
|
#endif /*__PYX_DEFAULT_STRING_ENCODING_IS_ASCII*/
|
|
*length = PyBytes_GET_SIZE(defenc);
|
|
return defenc_c;
|
|
#else /* PY_VERSION_HEX < 0x03030000 */
|
|
if (PyUnicode_READY(o) == -1) return NULL;
|
|
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
|
|
if (PyUnicode_IS_ASCII(o)) {
|
|
*length = PyUnicode_GET_DATA_SIZE(o);
|
|
return PyUnicode_AsUTF8(o);
|
|
} else {
|
|
PyUnicode_AsASCIIString(o);
|
|
return NULL;
|
|
}
|
|
#else /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
|
|
return PyUnicode_AsUTF8AndSize(o, length);
|
|
#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
|
|
#endif /* PY_VERSION_HEX < 0x03030000 */
|
|
} else
|
|
#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT */
|
|
#if !CYTHON_COMPILING_IN_PYPY
|
|
#if PY_VERSION_HEX >= 0x02060000
|
|
if (PyByteArray_Check(o)) {
|
|
*length = PyByteArray_GET_SIZE(o);
|
|
return PyByteArray_AS_STRING(o);
|
|
} else
|
|
#endif
|
|
#endif
|
|
{
|
|
char* result;
|
|
int r = PyBytes_AsStringAndSize(o, &result, length);
|
|
if (unlikely(r < 0)) {
|
|
return NULL;
|
|
} else {
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
|
|
int is_true = x == Py_True;
|
|
if (is_true | (x == Py_False) | (x == Py_None)) return is_true;
|
|
else return PyObject_IsTrue(x);
|
|
}
|
|
static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {
|
|
PyNumberMethods *m;
|
|
const char *name = NULL;
|
|
PyObject *res = NULL;
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (PyInt_Check(x) || PyLong_Check(x))
|
|
#else
|
|
if (PyLong_Check(x))
|
|
#endif
|
|
return Py_INCREF(x), x;
|
|
m = Py_TYPE(x)->tp_as_number;
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (m && m->nb_int) {
|
|
name = "int";
|
|
res = PyNumber_Int(x);
|
|
}
|
|
else if (m && m->nb_long) {
|
|
name = "long";
|
|
res = PyNumber_Long(x);
|
|
}
|
|
#else
|
|
if (m && m->nb_int) {
|
|
name = "int";
|
|
res = PyNumber_Long(x);
|
|
}
|
|
#endif
|
|
if (res) {
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (!PyInt_Check(res) && !PyLong_Check(res)) {
|
|
#else
|
|
if (!PyLong_Check(res)) {
|
|
#endif
|
|
PyErr_Format(PyExc_TypeError,
|
|
"__%.4s__ returned non-%.4s (type %.200s)",
|
|
name, name, Py_TYPE(res)->tp_name);
|
|
Py_DECREF(res);
|
|
return NULL;
|
|
}
|
|
}
|
|
else if (!PyErr_Occurred()) {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"an integer is required");
|
|
}
|
|
return res;
|
|
}
|
|
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
|
|
#if CYTHON_USE_PYLONG_INTERNALS
|
|
#include "longintrepr.h"
|
|
#endif
|
|
#endif
|
|
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
|
|
Py_ssize_t ival;
|
|
PyObject *x;
|
|
#if PY_MAJOR_VERSION < 3
|
|
if (likely(PyInt_CheckExact(b)))
|
|
return PyInt_AS_LONG(b);
|
|
#endif
|
|
if (likely(PyLong_CheckExact(b))) {
|
|
#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
|
|
#if CYTHON_USE_PYLONG_INTERNALS
|
|
switch (Py_SIZE(b)) {
|
|
case -1: return -(sdigit)((PyLongObject*)b)->ob_digit[0];
|
|
case 0: return 0;
|
|
case 1: return ((PyLongObject*)b)->ob_digit[0];
|
|
}
|
|
#endif
|
|
#endif
|
|
#if PY_VERSION_HEX < 0x02060000
|
|
return PyInt_AsSsize_t(b);
|
|
#else
|
|
return PyLong_AsSsize_t(b);
|
|
#endif
|
|
}
|
|
x = PyNumber_Index(b);
|
|
if (!x) return -1;
|
|
ival = PyInt_AsSsize_t(x);
|
|
Py_DECREF(x);
|
|
return ival;
|
|
}
|
|
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
|
|
#if PY_VERSION_HEX < 0x02050000
|
|
if (ival <= LONG_MAX)
|
|
return PyInt_FromLong((long)ival);
|
|
else {
|
|
unsigned char *bytes = (unsigned char *) &ival;
|
|
int one = 1; int little = (int)*(unsigned char*)&one;
|
|
return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0);
|
|
}
|
|
#else
|
|
return PyInt_FromSize_t(ival);
|
|
#endif
|
|
}
|
|
|
|
|
|
#endif /* Py_PYTHON_H */
|