2014-07-07 06:21:06 +04:00
/* Generated by Cython 0.20.1 on Mon Jul 7 04:19:15 2014 */
2014-07-05 22:51:42 +04:00
# define PY_SSIZE_T_CLEAN
# ifndef CYTHON_USE_PYLONG_INTERNALS
# ifdef PYLONG_BITS_IN_DIGIT
# define CYTHON_USE_PYLONG_INTERNALS 0
# else
# include "pyconfig.h"
# ifdef PYLONG_BITS_IN_DIGIT
# define CYTHON_USE_PYLONG_INTERNALS 1
# else
# define CYTHON_USE_PYLONG_INTERNALS 0
# endif
# endif
# endif
# include "Python.h"
# ifndef Py_PYTHON_H
# error Python headers needed to compile C extensions, please install development version of Python.
# elif PY_VERSION_HEX < 0x02040000
# error Cython requires Python 2.4+.
# else
# define CYTHON_ABI "0_20_1"
# include <stddef.h> /* For offsetof */
# ifndef offsetof
# define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
# endif
# if !defined(WIN32) && !defined(MS_WINDOWS)
# ifndef __stdcall
# define __stdcall
# endif
# ifndef __cdecl
# define __cdecl
# endif
# ifndef __fastcall
# define __fastcall
# endif
# endif
# ifndef DL_IMPORT
# define DL_IMPORT(t) t
# endif
# ifndef DL_EXPORT
# define DL_EXPORT(t) t
# endif
# ifndef PY_LONG_LONG
# define PY_LONG_LONG LONG_LONG
# endif
# ifndef Py_HUGE_VAL
# define Py_HUGE_VAL HUGE_VAL
# endif
# ifdef PYPY_VERSION
# define CYTHON_COMPILING_IN_PYPY 1
# define CYTHON_COMPILING_IN_CPYTHON 0
# else
# define CYTHON_COMPILING_IN_PYPY 0
# define CYTHON_COMPILING_IN_CPYTHON 1
# endif
# if CYTHON_COMPILING_IN_PYPY
# define Py_OptimizeFlag 0
# endif
# if PY_VERSION_HEX < 0x02050000
typedef int Py_ssize_t ;
# define PY_SSIZE_T_MAX INT_MAX
# define PY_SSIZE_T_MIN INT_MIN
# define PY_FORMAT_SIZE_T ""
# define CYTHON_FORMAT_SSIZE_T ""
# define PyInt_FromSsize_t(z) PyInt_FromLong(z)
# define PyInt_AsSsize_t(o) __Pyx_PyInt_As_int(o)
# define PyNumber_Index(o) ((PyNumber_Check(o) && !PyFloat_Check(o)) ? PyNumber_Int(o) : \
( PyErr_Format ( PyExc_TypeError , \
" expected index value, got %.200s " , Py_TYPE ( o ) - > tp_name ) , \
( PyObject * ) 0 ) )
# define __Pyx_PyIndex_Check(o) (PyNumber_Check(o) && !PyFloat_Check(o) && \
! PyComplex_Check ( o ) )
# define PyIndex_Check __Pyx_PyIndex_Check
# define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)
# define __PYX_BUILD_PY_SSIZE_T "i"
# else
# define __PYX_BUILD_PY_SSIZE_T "n"
# define CYTHON_FORMAT_SSIZE_T "z"
# define __Pyx_PyIndex_Check PyIndex_Check
# endif
# if PY_VERSION_HEX < 0x02060000
# define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
# define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
# define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)
# define PyVarObject_HEAD_INIT(type, size) \
PyObject_HEAD_INIT ( type ) size ,
# define PyType_Modified(t)
typedef struct {
void * buf ;
PyObject * obj ;
Py_ssize_t len ;
Py_ssize_t itemsize ;
int readonly ;
int ndim ;
char * format ;
Py_ssize_t * shape ;
Py_ssize_t * strides ;
Py_ssize_t * suboffsets ;
void * internal ;
} Py_buffer ;
# define PyBUF_SIMPLE 0
# define PyBUF_WRITABLE 0x0001
# define PyBUF_FORMAT 0x0004
# define PyBUF_ND 0x0008
# define PyBUF_STRIDES (0x0010 | PyBUF_ND)
# define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)
# define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)
# define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)
# define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)
# define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_FORMAT | PyBUF_WRITABLE)
# define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_FORMAT | PyBUF_WRITABLE)
typedef int ( * getbufferproc ) ( PyObject * , Py_buffer * , int ) ;
typedef void ( * releasebufferproc ) ( PyObject * , Py_buffer * ) ;
# endif
# if PY_MAJOR_VERSION < 3
# define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
# define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
PyCode_New ( a + k , l , s , f , code , c , n , v , fv , cell , fn , name , fline , lnos )
# define __Pyx_DefaultClassType PyClass_Type
# else
# define __Pyx_BUILTIN_MODULE_NAME "builtins"
# define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
PyCode_New ( a , k , l , s , f , code , c , n , v , fv , cell , fn , name , fline , lnos )
# define __Pyx_DefaultClassType PyType_Type
# endif
# if PY_VERSION_HEX < 0x02060000
# define PyUnicode_FromString(s) PyUnicode_Decode(s, strlen(s), "UTF-8", "strict")
# endif
# if PY_MAJOR_VERSION >= 3
# define Py_TPFLAGS_CHECKTYPES 0
# define Py_TPFLAGS_HAVE_INDEX 0
# endif
# if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3)
# define Py_TPFLAGS_HAVE_NEWBUFFER 0
# endif
# if PY_VERSION_HEX < 0x02060000
# define Py_TPFLAGS_HAVE_VERSION_TAG 0
# endif
# if PY_VERSION_HEX < 0x02060000 && !defined(Py_TPFLAGS_IS_ABSTRACT)
# define Py_TPFLAGS_IS_ABSTRACT 0
# endif
# if PY_VERSION_HEX < 0x030400a1 && !defined(Py_TPFLAGS_HAVE_FINALIZE)
# define Py_TPFLAGS_HAVE_FINALIZE 0
# endif
# if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
# define CYTHON_PEP393_ENABLED 1
# define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ? \
0 : _PyUnicode_Ready ( ( PyObject * ) ( op ) ) )
# define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
# define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
# define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u)
# define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u)
# define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i)
# else
# define CYTHON_PEP393_ENABLED 0
# define __Pyx_PyUnicode_READY(op) (0)
# define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
# define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
# define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE))
# define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u))
# define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
# endif
# if CYTHON_COMPILING_IN_PYPY
# define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b)
# define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b)
# else
# define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b)
# define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ? \
PyNumber_Add ( a , b ) : __Pyx_PyUnicode_Concat ( a , b ) )
# endif
# define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
# define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
# if PY_MAJOR_VERSION >= 3
# define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b)
# else
# define __Pyx_PyString_Format(a, b) PyString_Format(a, b)
# endif
# if PY_MAJOR_VERSION >= 3
# define PyBaseString_Type PyUnicode_Type
# define PyStringObject PyUnicodeObject
# define PyString_Type PyUnicode_Type
# define PyString_Check PyUnicode_Check
# define PyString_CheckExact PyUnicode_CheckExact
# endif
# if PY_VERSION_HEX < 0x02060000
# define PyBytesObject PyStringObject
# define PyBytes_Type PyString_Type
# define PyBytes_Check PyString_Check
# define PyBytes_CheckExact PyString_CheckExact
# define PyBytes_FromString PyString_FromString
# define PyBytes_FromStringAndSize PyString_FromStringAndSize
# define PyBytes_FromFormat PyString_FromFormat
# define PyBytes_DecodeEscape PyString_DecodeEscape
# define PyBytes_AsString PyString_AsString
# define PyBytes_AsStringAndSize PyString_AsStringAndSize
# define PyBytes_Size PyString_Size
# define PyBytes_AS_STRING PyString_AS_STRING
# define PyBytes_GET_SIZE PyString_GET_SIZE
# define PyBytes_Repr PyString_Repr
# define PyBytes_Concat PyString_Concat
# define PyBytes_ConcatAndDel PyString_ConcatAndDel
# endif
# if PY_MAJOR_VERSION >= 3
# define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
# define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
# else
# define __Pyx_PyBaseString_Check(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj) || \
PyString_Check ( obj ) | | PyUnicode_Check ( obj ) )
# define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
# endif
# if PY_VERSION_HEX < 0x02060000
# define PySet_Check(obj) PyObject_TypeCheck(obj, &PySet_Type)
# define PyFrozenSet_Check(obj) PyObject_TypeCheck(obj, &PyFrozenSet_Type)
# endif
# ifndef PySet_CheckExact
# define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type)
# endif
# define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
# if PY_MAJOR_VERSION >= 3
# define PyIntObject PyLongObject
# define PyInt_Type PyLong_Type
# define PyInt_Check(op) PyLong_Check(op)
# define PyInt_CheckExact(op) PyLong_CheckExact(op)
# define PyInt_FromString PyLong_FromString
# define PyInt_FromUnicode PyLong_FromUnicode
# define PyInt_FromLong PyLong_FromLong
# define PyInt_FromSize_t PyLong_FromSize_t
# define PyInt_FromSsize_t PyLong_FromSsize_t
# define PyInt_AsLong PyLong_AsLong
# define PyInt_AS_LONG PyLong_AS_LONG
# define PyInt_AsSsize_t PyLong_AsSsize_t
# define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask
# define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
# define PyNumber_Int PyNumber_Long
# endif
# if PY_MAJOR_VERSION >= 3
# define PyBoolObject PyLongObject
# endif
# if PY_VERSION_HEX < 0x030200A4
typedef long Py_hash_t ;
# define __Pyx_PyInt_FromHash_t PyInt_FromLong
# define __Pyx_PyInt_AsHash_t PyInt_AsLong
# else
# define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
# define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t
# endif
# if (PY_MAJOR_VERSION < 3) || (PY_VERSION_HEX >= 0x03010300)
# define __Pyx_PySequence_GetSlice(obj, a, b) PySequence_GetSlice(obj, a, b)
# define __Pyx_PySequence_SetSlice(obj, a, b, value) PySequence_SetSlice(obj, a, b, value)
# define __Pyx_PySequence_DelSlice(obj, a, b) PySequence_DelSlice(obj, a, b)
# else
# define __Pyx_PySequence_GetSlice(obj, a, b) (unlikely(!(obj)) ? \
( PyErr_SetString ( PyExc_SystemError , " null argument to internal routine " ) , ( PyObject * ) 0 ) : \
( likely ( ( obj ) - > ob_type - > tp_as_mapping ) ? ( PySequence_GetSlice ( obj , a , b ) ) : \
( PyErr_Format ( PyExc_TypeError , " '%.200s' object is unsliceable " , ( obj ) - > ob_type - > tp_name ) , ( PyObject * ) 0 ) ) )
# define __Pyx_PySequence_SetSlice(obj, a, b, value) (unlikely(!(obj)) ? \
( PyErr_SetString ( PyExc_SystemError , " null argument to internal routine " ) , - 1 ) : \
( likely ( ( obj ) - > ob_type - > tp_as_mapping ) ? ( PySequence_SetSlice ( obj , a , b , value ) ) : \
( PyErr_Format ( PyExc_TypeError , " '%.200s' object doesn't support slice assignment " , ( obj ) - > ob_type - > tp_name ) , - 1 ) ) )
# define __Pyx_PySequence_DelSlice(obj, a, b) (unlikely(!(obj)) ? \
( PyErr_SetString ( PyExc_SystemError , " null argument to internal routine " ) , - 1 ) : \
( likely ( ( obj ) - > ob_type - > tp_as_mapping ) ? ( PySequence_DelSlice ( obj , a , b ) ) : \
( PyErr_Format ( PyExc_TypeError , " '%.200s' object doesn't support slice deletion " , ( obj ) - > ob_type - > tp_name ) , - 1 ) ) )
# endif
# if PY_MAJOR_VERSION >= 3
# define PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func))
# endif
# if PY_VERSION_HEX < 0x02050000
# define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),((char *)(n)))
# define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),((char *)(n)),(a))
# define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),((char *)(n)))
# else
# define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),(n))
# define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),(n),(a))
# define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),(n))
# endif
# if PY_VERSION_HEX < 0x02050000
# define __Pyx_NAMESTR(n) ((char *)(n))
# define __Pyx_DOCSTR(n) ((char *)(n))
# else
# define __Pyx_NAMESTR(n) (n)
# define __Pyx_DOCSTR(n) (n)
# endif
# ifndef CYTHON_INLINE
# if defined(__GNUC__)
# define CYTHON_INLINE __inline__
# elif defined(_MSC_VER)
# define CYTHON_INLINE __inline
# elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
# define CYTHON_INLINE inline
# else
# define CYTHON_INLINE
# endif
# endif
# ifndef CYTHON_RESTRICT
# if defined(__GNUC__)
# define CYTHON_RESTRICT __restrict__
# elif defined(_MSC_VER) && _MSC_VER >= 1400
# define CYTHON_RESTRICT __restrict
# elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
# define CYTHON_RESTRICT restrict
# else
# define CYTHON_RESTRICT
# endif
# endif
# ifdef NAN
# define __PYX_NAN() ((float) NAN)
# else
static CYTHON_INLINE float __PYX_NAN ( ) {
/* Initialize NaN. The sign is irrelevant, an exponent with all bits 1 and
a nonzero mantissa means NaN . If the first bit in the mantissa is 1 , it is
a quiet NaN . */
float value ;
memset ( & value , 0xFF , sizeof ( value ) ) ;
return value ;
}
# endif
# if PY_MAJOR_VERSION >= 3
# define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y)
# define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y)
# else
# define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y)
# define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y)
# endif
# ifndef __PYX_EXTERN_C
# ifdef __cplusplus
# define __PYX_EXTERN_C extern "C"
# else
# define __PYX_EXTERN_C extern
# endif
# endif
# if defined(WIN32) || defined(MS_WINDOWS)
# define _USE_MATH_DEFINES
# endif
# include <math.h>
# define __PYX_HAVE__spacy__lexeme
# define __PYX_HAVE_API__spacy__lexeme
# include "stdint.h"
2014-07-07 06:21:06 +04:00
# include <vector>
# include "ios"
# include "new"
# include "stdexcept"
# include "typeinfo"
# include <utility>
# include "sparsehash/dense_hash_map"
# include "string.h"
# include "stdlib.h"
2014-07-05 22:51:42 +04:00
# ifdef _OPENMP
# include <omp.h>
# endif /* _OPENMP */
# ifdef PYREX_WITHOUT_ASSERTIONS
# define CYTHON_WITHOUT_ASSERTIONS
# endif
# ifndef CYTHON_UNUSED
# if defined(__GNUC__)
# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
# define CYTHON_UNUSED __attribute__ ((__unused__))
# else
# define CYTHON_UNUSED
# endif
# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
# define CYTHON_UNUSED __attribute__ ((__unused__))
# else
# define CYTHON_UNUSED
# endif
# endif
typedef struct { PyObject * * p ; char * s ; const Py_ssize_t n ; const char * encoding ;
const char is_unicode ; const char is_str ; const char intern ; } __Pyx_StringTabEntry ; /*proto*/
# define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
# define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0
# define __PYX_DEFAULT_STRING_ENCODING ""
# define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
# define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
# define __Pyx_fits_Py_ssize_t(v, type, is_signed) ( \
( sizeof ( type ) < sizeof ( Py_ssize_t ) ) | | \
( sizeof ( type ) > sizeof ( Py_ssize_t ) & & \
likely ( v < ( type ) PY_SSIZE_T_MAX | | \
v = = ( type ) PY_SSIZE_T_MAX ) & & \
( ! is_signed | | likely ( v > ( type ) PY_SSIZE_T_MIN | | \
v = = ( type ) PY_SSIZE_T_MIN ) ) ) | | \
( sizeof ( type ) = = sizeof ( Py_ssize_t ) & & \
( is_signed | | likely ( v < ( type ) PY_SSIZE_T_MAX | | \
v = = ( type ) PY_SSIZE_T_MAX ) ) ) )
static CYTHON_INLINE char * __Pyx_PyObject_AsString ( PyObject * ) ;
static CYTHON_INLINE char * __Pyx_PyObject_AsStringAndSize ( PyObject * , Py_ssize_t * length ) ;
# define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
# define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
# define __Pyx_PyBytes_FromString PyBytes_FromString
# define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
static CYTHON_INLINE PyObject * __Pyx_PyUnicode_FromString ( char * ) ;
# if PY_MAJOR_VERSION < 3
# define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString
# define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
# else
# define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString
# define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
# endif
# define __Pyx_PyObject_AsSString(s) ((signed char*) __Pyx_PyObject_AsString(s))
# define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s))
# define __Pyx_PyObject_FromUString(s) __Pyx_PyObject_FromString((char*)s)
# define __Pyx_PyBytes_FromUString(s) __Pyx_PyBytes_FromString((char*)s)
# define __Pyx_PyByteArray_FromUString(s) __Pyx_PyByteArray_FromString((char*)s)
# define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((char*)s)
# define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s)
# if PY_MAJOR_VERSION < 3
static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen ( const Py_UNICODE * u )
{
const Py_UNICODE * u_end = u ;
while ( * u_end + + ) ;
return u_end - u - 1 ;
}
# else
# define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen
# endif
# define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
# define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
# define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode
# define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None)
# define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
static CYTHON_INLINE int __Pyx_PyObject_IsTrue ( PyObject * ) ;
static CYTHON_INLINE PyObject * __Pyx_PyNumber_Int ( PyObject * x ) ;
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t ( PyObject * ) ;
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t ( size_t ) ;
# if CYTHON_COMPILING_IN_CPYTHON
# define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
# else
# define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
# endif
# define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
# if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
static int __Pyx_sys_getdefaultencoding_not_ascii ;
static int __Pyx_init_sys_getdefaultencoding_params ( void ) {
PyObject * sys = NULL ;
PyObject * default_encoding = NULL ;
PyObject * ascii_chars_u = NULL ;
PyObject * ascii_chars_b = NULL ;
sys = PyImport_ImportModule ( " sys " ) ;
if ( sys = = NULL ) goto bad ;
default_encoding = PyObject_CallMethod ( sys , ( char * ) ( const char * ) " getdefaultencoding " , NULL ) ;
if ( default_encoding = = NULL ) goto bad ;
if ( strcmp ( PyBytes_AsString ( default_encoding ) , " ascii " ) = = 0 ) {
__Pyx_sys_getdefaultencoding_not_ascii = 0 ;
} else {
const char * default_encoding_c = PyBytes_AS_STRING ( default_encoding ) ;
char ascii_chars [ 128 ] ;
int c ;
for ( c = 0 ; c < 128 ; c + + ) {
ascii_chars [ c ] = c ;
}
__Pyx_sys_getdefaultencoding_not_ascii = 1 ;
ascii_chars_u = PyUnicode_DecodeASCII ( ascii_chars , 128 , NULL ) ;
if ( ascii_chars_u = = NULL ) goto bad ;
ascii_chars_b = PyUnicode_AsEncodedString ( ascii_chars_u , default_encoding_c , NULL ) ;
if ( ascii_chars_b = = NULL | | strncmp ( ascii_chars , PyBytes_AS_STRING ( ascii_chars_b ) , 128 ) ! = 0 ) {
PyErr_Format (
PyExc_ValueError ,
" This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii. " ,
default_encoding_c ) ;
goto bad ;
}
}
Py_XDECREF ( sys ) ;
Py_XDECREF ( default_encoding ) ;
Py_XDECREF ( ascii_chars_u ) ;
Py_XDECREF ( ascii_chars_b ) ;
return 0 ;
bad :
Py_XDECREF ( sys ) ;
Py_XDECREF ( default_encoding ) ;
Py_XDECREF ( ascii_chars_u ) ;
Py_XDECREF ( ascii_chars_b ) ;
return - 1 ;
}
# endif
# if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
# define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
# else
# define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
# if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
static char * __PYX_DEFAULT_STRING_ENCODING ;
static int __Pyx_init_sys_getdefaultencoding_params ( void ) {
PyObject * sys = NULL ;
PyObject * default_encoding = NULL ;
char * default_encoding_c ;
sys = PyImport_ImportModule ( " sys " ) ;
if ( sys = = NULL ) goto bad ;
default_encoding = PyObject_CallMethod ( sys , ( char * ) ( const char * ) " getdefaultencoding " , NULL ) ;
if ( default_encoding = = NULL ) goto bad ;
default_encoding_c = PyBytes_AS_STRING ( default_encoding ) ;
__PYX_DEFAULT_STRING_ENCODING = ( char * ) malloc ( strlen ( default_encoding_c ) ) ;
strcpy ( __PYX_DEFAULT_STRING_ENCODING , default_encoding_c ) ;
Py_DECREF ( sys ) ;
Py_DECREF ( default_encoding ) ;
return 0 ;
bad :
Py_XDECREF ( sys ) ;
Py_XDECREF ( default_encoding ) ;
return - 1 ;
}
# endif
# endif
# ifdef __GNUC__
/* Test for GCC > 2.95 */
# if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
# define likely(x) __builtin_expect(!!(x), 1)
# define unlikely(x) __builtin_expect(!!(x), 0)
# else /* __GNUC__ > 2 ... */
# define likely(x) (x)
# define unlikely(x) (x)
# endif /* __GNUC__ > 2 ... */
# else /* __GNUC__ */
# define likely(x) (x)
# define unlikely(x) (x)
# endif /* __GNUC__ */
static PyObject * __pyx_m ;
static PyObject * __pyx_d ;
static PyObject * __pyx_b ;
static PyObject * __pyx_empty_tuple ;
static PyObject * __pyx_empty_bytes ;
static int __pyx_lineno ;
static int __pyx_clineno = 0 ;
static const char * __pyx_cfilenm = __FILE__ ;
static const char * __pyx_filename ;
static const char * __pyx_f [ ] = {
" lexeme.pyx " ,
} ;
2014-07-07 06:21:06 +04:00
/* "spacy/spacy.pxd":7
2014-07-05 22:51:42 +04:00
*
2014-07-07 06:21:06 +04:00
* # Circular import problems here
* ctypedef size_t Lexeme_addr # < < < < < < < < < < < < < <
* ctypedef uint64_t StringHash
* ctypedef dense_hash_map [ StringHash , Lexeme_addr ] Vocab
*/
typedef size_t __pyx_t_5spacy_5spacy_Lexeme_addr ;
/* "spacy/spacy.pxd":8
* # Circular import problems here
* ctypedef size_t Lexeme_addr
* ctypedef uint64_t StringHash # < < < < < < < < < < < < < <
* ctypedef dense_hash_map [ StringHash , Lexeme_addr ] Vocab
* ctypedef int ( * Splitter ) ( unicode word , size_t length )
*/
typedef uint64_t __pyx_t_5spacy_5spacy_StringHash ;
/* "spacy/lexeme.pxd":4
2014-07-05 22:51:42 +04:00
*
2014-07-07 06:21:06 +04:00
* # Put these above import to avoid circular import problem
2014-07-05 22:51:42 +04:00
* ctypedef int ClusterID # < < < < < < < < < < < < < <
* ctypedef uint64_t StringHash
2014-07-07 06:21:06 +04:00
* ctypedef size_t Lexeme_addr
2014-07-05 22:51:42 +04:00
*/
typedef int __pyx_t_5spacy_6lexeme_ClusterID ;
/* "spacy/lexeme.pxd":5
2014-07-07 06:21:06 +04:00
* # Put these above import to avoid circular import problem
2014-07-05 22:51:42 +04:00
* ctypedef int ClusterID
* ctypedef uint64_t StringHash # < < < < < < < < < < < < < <
2014-07-07 06:21:06 +04:00
* ctypedef size_t Lexeme_addr
2014-07-05 22:51:42 +04:00
*
*/
typedef uint64_t __pyx_t_5spacy_6lexeme_StringHash ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pxd":6
* ctypedef int ClusterID
* ctypedef uint64_t StringHash
* ctypedef size_t Lexeme_addr # < < < < < < < < < < < < < <
*
* from spacy . spacy cimport Vocab
*/
typedef size_t __pyx_t_5spacy_6lexeme_Lexeme_addr ;
2014-07-05 22:51:42 +04:00
/*--- Type declarations ---*/
2014-07-07 06:21:06 +04:00
/* "spacy/spacy.pxd":9
* ctypedef size_t Lexeme_addr
* ctypedef uint64_t StringHash
* ctypedef dense_hash_map [ StringHash , Lexeme_addr ] Vocab # < < < < < < < < < < < < < <
* ctypedef int ( * Splitter ) ( unicode word , size_t length )
*
*/
typedef google : : dense_hash_map < __pyx_t_5spacy_5spacy_StringHash , __pyx_t_5spacy_5spacy_Lexeme_addr > __pyx_t_5spacy_5spacy_Vocab ;
/* "spacy/spacy.pxd":10
* ctypedef uint64_t StringHash
* ctypedef dense_hash_map [ StringHash , Lexeme_addr ] Vocab
* ctypedef int ( * Splitter ) ( unicode word , size_t length ) # < < < < < < < < < < < < < <
*
*
*/
typedef int ( * __pyx_t_5spacy_5spacy_Splitter ) ( PyObject * , size_t ) ;
2014-07-05 22:51:42 +04:00
struct __pyx_t_5spacy_6lexeme_Lexeme ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pxd":36
2014-07-05 22:51:42 +04:00
* # over the Lexeme , via :
* # for field in range ( LexAttr . n ) : get_attr ( Lexeme * , field )
* cdef enum HashFields : # < < < < < < < < < < < < < <
* sic
* lex
*/
enum __pyx_t_5spacy_6lexeme_HashFields {
__pyx_e_5spacy_6lexeme_sic ,
__pyx_e_5spacy_6lexeme_lex ,
__pyx_e_5spacy_6lexeme_normed ,
__pyx_e_5spacy_6lexeme_cluster ,
__pyx_e_5spacy_6lexeme_n
} ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pxd":11
* from spacy . spacy cimport Splitter
2014-07-05 22:51:42 +04:00
*
* cdef struct Lexeme : # < < < < < < < < < < < < < <
* StringHash sic # Hash of the original string
* StringHash lex # Hash of the word , with punctuation and clitics split off
*/
struct __pyx_t_5spacy_6lexeme_Lexeme {
__pyx_t_5spacy_6lexeme_StringHash sic ;
__pyx_t_5spacy_6lexeme_StringHash lex ;
__pyx_t_5spacy_6lexeme_StringHash normed ;
__pyx_t_5spacy_6lexeme_StringHash last3 ;
Py_UNICODE first ;
double prob ;
__pyx_t_5spacy_6lexeme_ClusterID cluster ;
int oft_upper ;
int oft_title ;
struct __pyx_t_5spacy_6lexeme_Lexeme * tail ;
} ;
# ifndef CYTHON_REFNANNY
# define CYTHON_REFNANNY 0
# endif
# if CYTHON_REFNANNY
typedef struct {
void ( * INCREF ) ( void * , PyObject * , int ) ;
void ( * DECREF ) ( void * , PyObject * , int ) ;
void ( * GOTREF ) ( void * , PyObject * , int ) ;
void ( * GIVEREF ) ( void * , PyObject * , int ) ;
void * ( * SetupContext ) ( const char * , int , const char * ) ;
void ( * FinishContext ) ( void * * ) ;
} __Pyx_RefNannyAPIStruct ;
static __Pyx_RefNannyAPIStruct * __Pyx_RefNanny = NULL ;
static __Pyx_RefNannyAPIStruct * __Pyx_RefNannyImportAPI ( const char * modname ) ; /*proto*/
# define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
# ifdef WITH_THREAD
# define __Pyx_RefNannySetupContext(name, acquire_gil) \
if ( acquire_gil ) { \
PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure ( ) ; \
__pyx_refnanny = __Pyx_RefNanny - > SetupContext ( ( name ) , __LINE__ , __FILE__ ) ; \
PyGILState_Release ( __pyx_gilstate_save ) ; \
} else { \
__pyx_refnanny = __Pyx_RefNanny - > SetupContext ( ( name ) , __LINE__ , __FILE__ ) ; \
}
# else
# define __Pyx_RefNannySetupContext(name, acquire_gil) \
__pyx_refnanny = __Pyx_RefNanny - > SetupContext ( ( name ) , __LINE__ , __FILE__ )
# endif
# define __Pyx_RefNannyFinishContext() \
__Pyx_RefNanny - > FinishContext ( & __pyx_refnanny )
# define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
# define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
# define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
# define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
# define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
# define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
# define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
# define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
# else
# define __Pyx_RefNannyDeclarations
# define __Pyx_RefNannySetupContext(name, acquire_gil)
# define __Pyx_RefNannyFinishContext()
# define __Pyx_INCREF(r) Py_INCREF(r)
# define __Pyx_DECREF(r) Py_DECREF(r)
# define __Pyx_GOTREF(r)
# define __Pyx_GIVEREF(r)
# define __Pyx_XINCREF(r) Py_XINCREF(r)
# define __Pyx_XDECREF(r) Py_XDECREF(r)
# define __Pyx_XGOTREF(r)
# define __Pyx_XGIVEREF(r)
# endif /* CYTHON_REFNANNY */
# define __Pyx_XDECREF_SET(r, v) do { \
PyObject * tmp = ( PyObject * ) r ; \
r = v ; __Pyx_XDECREF ( tmp ) ; \
} while ( 0 )
# define __Pyx_DECREF_SET(r, v) do { \
PyObject * tmp = ( PyObject * ) r ; \
r = v ; __Pyx_DECREF ( tmp ) ; \
} while ( 0 )
# define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
# define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
2014-07-07 06:21:06 +04:00
# define __Pyx_GetItemInt_Unicode(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \
( __Pyx_fits_Py_ssize_t ( i , type , is_signed ) ? \
__Pyx_GetItemInt_Unicode_Fast ( o , ( Py_ssize_t ) i , wraparound , boundscheck ) : \
( PyErr_SetString ( PyExc_IndexError , " string index out of range " ) , ( Py_UCS4 ) - 1 ) )
static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Fast ( PyObject * ustring , Py_ssize_t i ,
int wraparound , int boundscheck ) ;
2014-07-05 22:51:42 +04:00
static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t ( PyObject * ) ;
2014-07-07 06:21:06 +04:00
static CYTHON_INLINE PyObject * __Pyx_PyInt_From_long ( long value ) ;
2014-07-05 22:51:42 +04:00
static CYTHON_INLINE PyObject * __Pyx_PyInt_From_uint64_t ( uint64_t value ) ;
static CYTHON_INLINE PyObject * __Pyx_PyInt_From_int ( int value ) ;
static CYTHON_INLINE long __Pyx_PyInt_As_long ( PyObject * ) ;
static CYTHON_INLINE int __Pyx_PyInt_As_int ( PyObject * ) ;
static int __Pyx_check_binary_version ( void ) ;
2014-07-07 06:21:06 +04:00
# if CYTHON_COMPILING_IN_CPYTHON
# define __Pyx_PyObject_DelAttrStr(o,n) __Pyx_PyObject_SetAttrStr(o,n,NULL)
static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr ( PyObject * obj , PyObject * attr_name , PyObject * value ) {
PyTypeObject * tp = Py_TYPE ( obj ) ;
if ( likely ( tp - > tp_setattro ) )
return tp - > tp_setattro ( obj , attr_name , value ) ;
# if PY_MAJOR_VERSION < 3
if ( likely ( tp - > tp_setattr ) )
return tp - > tp_setattr ( obj , PyString_AS_STRING ( attr_name ) , value ) ;
# endif
return PyObject_SetAttr ( obj , attr_name , value ) ;
}
# else
# define __Pyx_PyObject_DelAttrStr(o,n) PyObject_DelAttr(o,n)
# define __Pyx_PyObject_SetAttrStr(o,n,v) PyObject_SetAttr(o,n,v)
# endif
static int __Pyx_ExportVoidPtr ( PyObject * name , void * p , const char * sig ) ; /*proto*/
static int __Pyx_ExportFunction ( const char * name , void ( * f ) ( void ) , const char * sig ) ; /*proto*/
# if !defined(__Pyx_PyIdentifier_FromString)
# if PY_MAJOR_VERSION < 3
# define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s)
# else
# define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s)
# endif
# endif
static PyObject * __Pyx_ImportModule ( const char * name ) ; /*proto*/
static int __Pyx_ImportFunction ( PyObject * module , const char * funcname , void ( * * f ) ( void ) , const char * sig ) ; /*proto*/
2014-07-05 22:51:42 +04:00
typedef struct {
int code_line ;
PyCodeObject * code_object ;
} __Pyx_CodeObjectCacheEntry ;
struct __Pyx_CodeObjectCache {
int count ;
int max_count ;
__Pyx_CodeObjectCacheEntry * entries ;
} ;
static struct __Pyx_CodeObjectCache __pyx_code_cache = { 0 , 0 , NULL } ;
static int __pyx_bisect_code_objects ( __Pyx_CodeObjectCacheEntry * entries , int count , int code_line ) ;
static PyCodeObject * __pyx_find_code_object ( int code_line ) ;
static void __pyx_insert_code_object ( int code_line , PyCodeObject * code_object ) ;
static void __Pyx_AddTraceback ( const char * funcname , int c_line ,
int py_line , const char * filename ) ; /*proto*/
static int __Pyx_InitStrings ( __Pyx_StringTabEntry * t ) ; /*proto*/
/* Module declarations from 'libc.stdint' */
2014-07-07 06:21:06 +04:00
/* Module declarations from 'libcpp.vector' */
/* Module declarations from 'libcpp.utility' */
/* Module declarations from 'ext.sparsehash' */
/* Module declarations from 'spacy.spacy' */
static __pyx_t_5spacy_5spacy_Lexeme_addr ( * __pyx_f_5spacy_5spacy_lookup ) ( __pyx_t_5spacy_5spacy_Vocab & , PyObject * , __pyx_t_5spacy_5spacy_Splitter , int , PyObject * ) ; /*proto*/
static __pyx_t_5spacy_5spacy_StringHash ( * __pyx_f_5spacy_5spacy_hash_string ) ( PyObject * , size_t ) ; /*proto*/
/* Module declarations from 'spacy.string_tools' */
static PyObject * ( * __pyx_f_5spacy_12string_tools_substr ) ( PyObject * , int , int , size_t , int __pyx_skip_dispatch ) ; /*proto*/
/* Module declarations from 'libc.string' */
/* Module declarations from 'libc.stdlib' */
2014-07-05 22:51:42 +04:00
/* Module declarations from 'spacy.lexeme' */
2014-07-07 06:21:06 +04:00
static struct __pyx_t_5spacy_6lexeme_Lexeme __pyx_v_5spacy_6lexeme_BLANK_WORD ;
2014-07-05 22:51:42 +04:00
static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_6lexeme_sic_of ( size_t , int __pyx_skip_dispatch ) ; /*proto*/
static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_6lexeme_lex_of ( size_t , int __pyx_skip_dispatch ) ; /*proto*/
static __pyx_t_5spacy_6lexeme_ClusterID __pyx_f_5spacy_6lexeme_cluster_of ( size_t , int __pyx_skip_dispatch ) ; /*proto*/
static Py_UNICODE __pyx_f_5spacy_6lexeme_first_of ( size_t , int __pyx_skip_dispatch ) ; /*proto*/
static double __pyx_f_5spacy_6lexeme_prob_of ( size_t , int __pyx_skip_dispatch ) ; /*proto*/
static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_6lexeme_last3_of ( size_t , int __pyx_skip_dispatch ) ; /*proto*/
static int __pyx_f_5spacy_6lexeme_is_oft_upper ( size_t , int __pyx_skip_dispatch ) ; /*proto*/
static int __pyx_f_5spacy_6lexeme_is_oft_title ( size_t , int __pyx_skip_dispatch ) ; /*proto*/
# define __Pyx_MODULE_NAME "spacy.lexeme"
int __pyx_module_is_main_spacy__lexeme = 0 ;
/* Implementation of 'spacy.lexeme' */
static PyObject * __pyx_pf_5spacy_6lexeme_sic_of ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) ; /* proto */
static PyObject * __pyx_pf_5spacy_6lexeme_2lex_of ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) ; /* proto */
static PyObject * __pyx_pf_5spacy_6lexeme_4cluster_of ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) ; /* proto */
static PyObject * __pyx_pf_5spacy_6lexeme_6first_of ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) ; /* proto */
static PyObject * __pyx_pf_5spacy_6lexeme_8prob_of ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) ; /* proto */
static PyObject * __pyx_pf_5spacy_6lexeme_10last3_of ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) ; /* proto */
static PyObject * __pyx_pf_5spacy_6lexeme_12is_oft_upper ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) ; /* proto */
static PyObject * __pyx_pf_5spacy_6lexeme_14is_oft_title ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) ; /* proto */
2014-07-07 06:21:06 +04:00
static char __pyx_k_ [ ] = " " ;
static char __pyx_k__2 [ ] = " ? " ;
2014-07-05 22:51:42 +04:00
static char __pyx_k_main [ ] = " __main__ " ;
static char __pyx_k_test [ ] = " __test__ " ;
2014-07-07 06:21:06 +04:00
static char __pyx_k_pyx_capi [ ] = " __pyx_capi__ " ;
static char __pyx_k_BLANK_WORD [ ] = " BLANK_WORD " ;
static char __pyx_k_lex_of_line_73 [ ] = " lex_of (line 73) " ;
static char __pyx_k_sic_of_line_61 [ ] = " sic_of (line 61) " ;
static char __pyx_k_prob_of_line_117 [ ] = " prob_of (line 117) " ;
static char __pyx_k_first_of_line_106 [ ] = " first_of (line 106) " ;
static char __pyx_k_last3_of_line_130 [ ] = " last3_of (line 130) " ;
static char __pyx_k_cluster_of_line_87 [ ] = " cluster_of (line 87) " ;
static char __pyx_k_is_oft_title_line_156 [ ] = " is_oft_title (line 156) " ;
static char __pyx_k_is_oft_upper_line_141 [ ] = " is_oft_upper (line 141) " ;
2014-07-05 22:51:42 +04:00
static char __pyx_k_Access_the_cluster_field_of_the [ ] = " Access the `cluster' field of the Lexeme pointed to by lex_id, which \n gives an integer representation of the cluster ID of the word, \n which should be understood as a binary address: \n \n >>> strings = (u'pineapple', u'apple', u'dapple', u'scalable') \n >>> token_ids = [lookup(s) for s in strings] \n >>> clusters = [cluster_of(t) for t in token_ids] \n >>> print [ \" {0:b \" } % cluster_of(t) for t in token_ids] \n [ \" 100111110110 \" , \" 100111100100 \" , \" 01010111011001 \" , \" 100111110110 \" ] \n \n The clusterings are unideal, but often slightly useful. \n \" pineapple \" and \" apple \" share a long prefix, indicating a similar meaning, \n while \" dapple \" is totally different. On the other hand, \" scalable \" receives \n the same cluster ID as \" pineapple \" , which is not what we'd like. \n " ;
static char __pyx_k_Accessors_for_Lexeme_properties [ ] = " Accessors for Lexeme properties, given a lex_id, which is cast to a Lexeme*. \n Mostly useful from Python-space. From Cython-space, you can just cast to \n Lexeme* yourself. \n " ;
static char __pyx_k_Access_the_first_field_of_the_Le [ ] = " Access the `first' field of the Lexeme pointed to by lex_id, which \n stores the first character of the lex string of the word. \n \n >>> lex_id = lookup(u'Hello') \n >>> unhash(first_of(lex_id)) \n u'H' \n " ;
static char __pyx_k_Access_the_last3_field_of_the_Le [ ] = " Access the `last3' field of the Lexeme pointed to by lex_id, which stores \n the hash of the last three characters of the word: \n \n >>> lex_ids = [lookup(w) for w in (u'Hello', u'!')] \n >>> [unhash(last3_of(lex_id)) for lex_id in lex_ids] \n [u'llo', u'!'] \n " ;
static char __pyx_k_Access_the_lex_field_of_the_Lexe [ ] = " Access the `lex' field of the Lexeme pointed to by lex_id. \n \n The lex field is the hash of the string you would expect to get back from \n a standard tokenizer, i.e. the word with punctuation and other non-whitespace \n delimited tokens split off. The other fields refer to properties of the \n string that the lex field stores a hash of, except sic and tail. \n \n >>> [unhash(lex_of(lex_id) for lex_id in from_string(u'Hi! world')] \n [u'Hi', u'!', u'world'] \n " ;
static char __pyx_k_Access_the_oft_upper_field_of_th [ ] = " Access the `oft_upper' field of the Lexeme pointed to by lex_id, which \n stores whether the lowered version of the string hashed by `lex' is found \n in all-upper case frequently in a large sample of text. Users are free \n to load different data, by default we use a sample from Wikipedia, with \n a threshold of 0.95, picked to maximize mutual information for POS tagging. \n \n >>> is_oft_upper(lookup(u'abc')) \n True \n >>> is_oft_upper(lookup(u'aBc')) # This must get the same answer \n True \n " ;
static char __pyx_k_Access_the_prob_field_of_the_Lex [ ] = " Access the `prob' field of the Lexeme pointed to by lex_id, which stores \n the smoothed unigram log probability of the word, as estimated from a large \n text corpus. By default, probabilities are based on counts from Gigaword, \n smoothed using Knesser-Ney; but any probabilities file can be supplied to \n load_probs. \n \n >>> prob_of(lookup(u'world')) \n -20.10340371976182 \n " ;
static char __pyx_k_Access_the_sic_field_of_the_Lexe [ ] = " Access the `sic' field of the Lexeme pointed to by lex_id. \n \n The sic field stores the hash of the whitespace-delimited string-chunk used to \n construct the Lexeme. \n \n >>> [unhash(sic_of(lex_id)) for lex_id in from_string(u'Hi! world')] \n [u'Hi!', u'', u'world] \n " ;
static char __pyx_k_Access_the_oft_upper_field_of_th_2 [ ] = " Access the `oft_upper' field of the Lexeme pointed to by lex_id, which \n stores whether the lowered version of the string hashed by `lex' is found \n title-cased frequently in a large sample of text. Users are free \n to load different data, by default we use a sample from Wikipedia, with \n a threshold of 0.3, picked to maximize mutual information for POS tagging. \n \n >>> is_oft_title(lookup(u'marcus')) \n True \n >>> is_oft_title(lookup(u'MARCUS')) # This must get the same value \n True \n " ;
2014-07-07 06:21:06 +04:00
static PyObject * __pyx_kp_u_ ;
2014-07-05 22:51:42 +04:00
static PyObject * __pyx_kp_u_Access_the_cluster_field_of_the ;
static PyObject * __pyx_kp_u_Access_the_first_field_of_the_Le ;
static PyObject * __pyx_kp_u_Access_the_last3_field_of_the_Le ;
static PyObject * __pyx_kp_u_Access_the_lex_field_of_the_Lexe ;
static PyObject * __pyx_kp_u_Access_the_oft_upper_field_of_th ;
static PyObject * __pyx_kp_u_Access_the_oft_upper_field_of_th_2 ;
static PyObject * __pyx_kp_u_Access_the_prob_field_of_the_Lex ;
static PyObject * __pyx_kp_u_Access_the_sic_field_of_the_Lexe ;
2014-07-07 06:21:06 +04:00
static PyObject * __pyx_n_s_BLANK_WORD ;
static PyObject * __pyx_kp_u__2 ;
static PyObject * __pyx_kp_u_cluster_of_line_87 ;
static PyObject * __pyx_kp_u_first_of_line_106 ;
static PyObject * __pyx_kp_u_is_oft_title_line_156 ;
static PyObject * __pyx_kp_u_is_oft_upper_line_141 ;
static PyObject * __pyx_kp_u_last3_of_line_130 ;
static PyObject * __pyx_kp_u_lex_of_line_73 ;
2014-07-05 22:51:42 +04:00
static PyObject * __pyx_n_s_main ;
2014-07-07 06:21:06 +04:00
static PyObject * __pyx_kp_u_prob_of_line_117 ;
static PyObject * __pyx_n_s_pyx_capi ;
static PyObject * __pyx_kp_u_sic_of_line_61 ;
2014-07-05 22:51:42 +04:00
static PyObject * __pyx_n_s_test ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":16
*
*
* cdef Lexeme * init_lexeme ( Vocab vocab , dict bacov , Splitter find_split , # < < < < < < < < < < < < < <
* unicode string , StringHash hashed ,
* int split , size_t length ) except NULL :
*/
static struct __pyx_t_5spacy_6lexeme_Lexeme * __pyx_f_5spacy_6lexeme_init_lexeme ( __pyx_t_5spacy_5spacy_Vocab __pyx_v_vocab , PyObject * __pyx_v_bacov , __pyx_t_5spacy_5spacy_Splitter __pyx_v_find_split , PyObject * __pyx_v_string , __pyx_t_5spacy_6lexeme_StringHash __pyx_v_hashed , int __pyx_v_split , size_t __pyx_v_length ) {
struct __pyx_t_5spacy_6lexeme_Lexeme * __pyx_v_word ;
PyObject * __pyx_v_tail_string = 0 ;
PyObject * __pyx_v_lex = 0 ;
PyObject * __pyx_v_normed = 0 ;
PyObject * __pyx_v_last3 = 0 ;
struct __pyx_t_5spacy_6lexeme_Lexeme * __pyx_r ;
__Pyx_RefNannyDeclarations
long __pyx_t_1 ;
int __pyx_t_2 ;
Py_UCS4 __pyx_t_3 ;
int __pyx_t_4 ;
int __pyx_t_5 ;
PyObject * __pyx_t_6 = NULL ;
Py_ssize_t __pyx_t_7 ;
__pyx_t_5spacy_5spacy_StringHash __pyx_t_8 ;
__pyx_t_5spacy_5spacy_Lexeme_addr __pyx_t_9 ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
__Pyx_RefNannySetupContext ( " init_lexeme " , 0 ) ;
/* "spacy/lexeme.pyx":19
* unicode string , StringHash hashed ,
* int split , size_t length ) except NULL :
* assert split < = length # < < < < < < < < < < < < < <
* cdef Lexeme * word = < Lexeme * > calloc ( 1 , sizeof ( Lexeme ) )
*
*/
# ifndef CYTHON_WITHOUT_ASSERTIONS
if ( unlikely ( ! Py_OptimizeFlag ) ) {
if ( unlikely ( ! ( ( __pyx_v_split < = __pyx_v_length ) ! = 0 ) ) ) {
PyErr_SetNone ( PyExc_AssertionError ) ;
{ __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 19 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
}
}
# endif
/* "spacy/lexeme.pyx":20
* int split , size_t length ) except NULL :
* assert split < = length
* cdef Lexeme * word = < Lexeme * > calloc ( 1 , sizeof ( Lexeme ) ) # < < < < < < < < < < < < < <
*
* word . first = < Py_UNICODE > ( string [ 0 ] if string else 0 )
*/
__pyx_v_word = ( ( struct __pyx_t_5spacy_6lexeme_Lexeme * ) calloc ( 1 , ( sizeof ( struct __pyx_t_5spacy_6lexeme_Lexeme ) ) ) ) ;
/* "spacy/lexeme.pyx":22
* cdef Lexeme * word = < Lexeme * > calloc ( 1 , sizeof ( Lexeme ) )
*
* word . first = < Py_UNICODE > ( string [ 0 ] if string else 0 ) # < < < < < < < < < < < < < <
* word . sic = hashed
*
*/
__pyx_t_2 = ( __pyx_v_string ! = Py_None ) & & ( PyUnicode_GET_SIZE ( __pyx_v_string ) ! = 0 ) ;
if ( __pyx_t_2 ) {
__pyx_t_3 = __Pyx_GetItemInt_Unicode ( __pyx_v_string , 0 , long , 1 , __Pyx_PyInt_From_long , 0 , 0 , 1 ) ; if ( unlikely ( __pyx_t_3 = = ( Py_UCS4 ) - 1 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 22 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; } ;
__pyx_t_1 = __pyx_t_3 ;
} else {
__pyx_t_1 = 0 ;
}
__pyx_v_word - > first = ( ( Py_UNICODE ) __pyx_t_1 ) ;
/* "spacy/lexeme.pyx":23
*
* word . first = < Py_UNICODE > ( string [ 0 ] if string else 0 )
* word . sic = hashed # < < < < < < < < < < < < < <
*
* cdef unicode tail_string
*/
__pyx_v_word - > sic = __pyx_v_hashed ;
/* "spacy/lexeme.pyx":27
* cdef unicode tail_string
* cdef unicode lex
* if split ! = 0 and split < length : # < < < < < < < < < < < < < <
* lex = substr ( string , 0 , split , length )
* tail_string = substr ( string , split , length , length )
*/
__pyx_t_2 = ( ( __pyx_v_split ! = 0 ) ! = 0 ) ;
if ( __pyx_t_2 ) {
__pyx_t_4 = ( ( __pyx_v_split < __pyx_v_length ) ! = 0 ) ;
__pyx_t_5 = __pyx_t_4 ;
} else {
__pyx_t_5 = __pyx_t_2 ;
}
if ( __pyx_t_5 ) {
/* "spacy/lexeme.pyx":28
* cdef unicode lex
* if split ! = 0 and split < length :
* lex = substr ( string , 0 , split , length ) # < < < < < < < < < < < < < <
* tail_string = substr ( string , split , length , length )
* else :
*/
__pyx_t_6 = __pyx_f_5spacy_12string_tools_substr ( __pyx_v_string , 0 , __pyx_v_split , __pyx_v_length , 0 ) ; if ( unlikely ( ! __pyx_t_6 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 28 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__Pyx_GOTREF ( __pyx_t_6 ) ;
__pyx_v_lex = ( ( PyObject * ) __pyx_t_6 ) ;
__pyx_t_6 = 0 ;
/* "spacy/lexeme.pyx":29
* if split ! = 0 and split < length :
* lex = substr ( string , 0 , split , length )
* tail_string = substr ( string , split , length , length ) # < < < < < < < < < < < < < <
* else :
* lex = string
*/
__pyx_t_6 = __pyx_f_5spacy_12string_tools_substr ( __pyx_v_string , __pyx_v_split , __pyx_v_length , __pyx_v_length , 0 ) ; if ( unlikely ( ! __pyx_t_6 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 29 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__Pyx_GOTREF ( __pyx_t_6 ) ;
__pyx_v_tail_string = ( ( PyObject * ) __pyx_t_6 ) ;
__pyx_t_6 = 0 ;
goto __pyx_L3 ;
}
/*else*/ {
/* "spacy/lexeme.pyx":31
* tail_string = substr ( string , split , length , length )
* else :
* lex = string # < < < < < < < < < < < < < <
* tail_string = ' '
* assert lex
*/
__Pyx_INCREF ( __pyx_v_string ) ;
__pyx_v_lex = __pyx_v_string ;
/* "spacy/lexeme.pyx":32
* else :
* lex = string
* tail_string = ' ' # < < < < < < < < < < < < < <
* assert lex
* # cdef unicode normed = normalize_word_string ( lex )
*/
__Pyx_INCREF ( __pyx_kp_u_ ) ;
__pyx_v_tail_string = __pyx_kp_u_ ;
}
__pyx_L3 : ;
/* "spacy/lexeme.pyx":33
* lex = string
* tail_string = ' '
* assert lex # < < < < < < < < < < < < < <
* # cdef unicode normed = normalize_word_string ( lex )
* cdef unicode normed = ' ? '
*/
# ifndef CYTHON_WITHOUT_ASSERTIONS
if ( unlikely ( ! Py_OptimizeFlag ) ) {
__pyx_t_5 = ( __pyx_v_lex ! = Py_None ) & & ( PyUnicode_GET_SIZE ( __pyx_v_lex ) ! = 0 ) ;
if ( unlikely ( ! __pyx_t_5 ) ) {
PyErr_SetNone ( PyExc_AssertionError ) ;
{ __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 33 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
}
}
# endif
/* "spacy/lexeme.pyx":35
* assert lex
* # cdef unicode normed = normalize_word_string ( lex )
* cdef unicode normed = ' ? ' # < < < < < < < < < < < < < <
* cdef unicode last3 = substr ( string , length - 3 , length , length )
*
*/
__Pyx_INCREF ( __pyx_kp_u__2 ) ;
__pyx_v_normed = __pyx_kp_u__2 ;
/* "spacy/lexeme.pyx":36
* # cdef unicode normed = normalize_word_string ( lex )
* cdef unicode normed = ' ? '
* cdef unicode last3 = substr ( string , length - 3 , length , length ) # < < < < < < < < < < < < < <
*
* assert normed
*/
__pyx_t_6 = __pyx_f_5spacy_12string_tools_substr ( __pyx_v_string , ( __pyx_v_length - 3 ) , __pyx_v_length , __pyx_v_length , 0 ) ; if ( unlikely ( ! __pyx_t_6 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 36 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__Pyx_GOTREF ( __pyx_t_6 ) ;
__pyx_v_last3 = ( ( PyObject * ) __pyx_t_6 ) ;
__pyx_t_6 = 0 ;
/* "spacy/lexeme.pyx":38
* cdef unicode last3 = substr ( string , length - 3 , length , length )
*
* assert normed # < < < < < < < < < < < < < <
* assert len ( normed )
*
*/
# ifndef CYTHON_WITHOUT_ASSERTIONS
if ( unlikely ( ! Py_OptimizeFlag ) ) {
__pyx_t_5 = ( __pyx_v_normed ! = Py_None ) & & ( PyUnicode_GET_SIZE ( __pyx_v_normed ) ! = 0 ) ;
if ( unlikely ( ! __pyx_t_5 ) ) {
PyErr_SetNone ( PyExc_AssertionError ) ;
{ __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 38 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
}
}
# endif
/* "spacy/lexeme.pyx":39
*
* assert normed
* assert len ( normed ) # < < < < < < < < < < < < < <
*
* word . lex = hash_string ( lex , len ( lex ) )
*/
# ifndef CYTHON_WITHOUT_ASSERTIONS
if ( unlikely ( ! Py_OptimizeFlag ) ) {
__pyx_t_7 = __Pyx_PyUnicode_GET_LENGTH ( __pyx_v_normed ) ; if ( unlikely ( __pyx_t_7 = = - 1 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 39 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
if ( unlikely ( ! ( __pyx_t_7 ! = 0 ) ) ) {
PyErr_SetNone ( PyExc_AssertionError ) ;
{ __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 39 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
}
}
# endif
/* "spacy/lexeme.pyx":41
* assert len ( normed )
*
* word . lex = hash_string ( lex , len ( lex ) ) # < < < < < < < < < < < < < <
* word . normed = hash_string ( normed , len ( normed ) )
* word . last3 = hash_string ( last3 , len ( last3 ) )
*/
if ( unlikely ( __pyx_v_lex = = Py_None ) ) {
PyErr_SetString ( PyExc_TypeError , " object of type 'NoneType' has no len() " ) ;
{ __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 41 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
}
__pyx_t_7 = __Pyx_PyUnicode_GET_LENGTH ( __pyx_v_lex ) ; if ( unlikely ( __pyx_t_7 = = - 1 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 41 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__pyx_t_8 = __pyx_f_5spacy_5spacy_hash_string ( __pyx_v_lex , __pyx_t_7 ) ; if ( unlikely ( __pyx_t_8 = = 0 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 41 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__pyx_v_word - > lex = __pyx_t_8 ;
/* "spacy/lexeme.pyx":42
*
* word . lex = hash_string ( lex , len ( lex ) )
* word . normed = hash_string ( normed , len ( normed ) ) # < < < < < < < < < < < < < <
* word . last3 = hash_string ( last3 , len ( last3 ) )
*
*/
__pyx_t_7 = __Pyx_PyUnicode_GET_LENGTH ( __pyx_v_normed ) ; if ( unlikely ( __pyx_t_7 = = - 1 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 42 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__pyx_t_8 = __pyx_f_5spacy_5spacy_hash_string ( __pyx_v_normed , __pyx_t_7 ) ; if ( unlikely ( __pyx_t_8 = = 0 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 42 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__pyx_v_word - > normed = __pyx_t_8 ;
/* "spacy/lexeme.pyx":43
* word . lex = hash_string ( lex , len ( lex ) )
* word . normed = hash_string ( normed , len ( normed ) )
* word . last3 = hash_string ( last3 , len ( last3 ) ) # < < < < < < < < < < < < < <
*
* bacov [ word . lex ] = lex
*/
if ( unlikely ( __pyx_v_last3 = = Py_None ) ) {
PyErr_SetString ( PyExc_TypeError , " object of type 'NoneType' has no len() " ) ;
{ __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 43 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
}
__pyx_t_7 = __Pyx_PyUnicode_GET_LENGTH ( __pyx_v_last3 ) ; if ( unlikely ( __pyx_t_7 = = - 1 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 43 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__pyx_t_8 = __pyx_f_5spacy_5spacy_hash_string ( __pyx_v_last3 , __pyx_t_7 ) ; if ( unlikely ( __pyx_t_8 = = 0 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 43 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__pyx_v_word - > last3 = __pyx_t_8 ;
/* "spacy/lexeme.pyx":45
* word . last3 = hash_string ( last3 , len ( last3 ) )
*
* bacov [ word . lex ] = lex # < < < < < < < < < < < < < <
* bacov [ word . normed ] = normed
* bacov [ word . last3 ] = last3
*/
if ( unlikely ( __pyx_v_bacov = = Py_None ) ) {
PyErr_SetString ( PyExc_TypeError , " 'NoneType' object is not subscriptable " ) ;
{ __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 45 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
}
__pyx_t_6 = __Pyx_PyInt_From_uint64_t ( __pyx_v_word - > lex ) ; if ( unlikely ( ! __pyx_t_6 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 45 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__Pyx_GOTREF ( __pyx_t_6 ) ;
if ( unlikely ( PyDict_SetItem ( __pyx_v_bacov , __pyx_t_6 , __pyx_v_lex ) < 0 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 45 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__Pyx_DECREF ( __pyx_t_6 ) ; __pyx_t_6 = 0 ;
/* "spacy/lexeme.pyx":46
*
* bacov [ word . lex ] = lex
* bacov [ word . normed ] = normed # < < < < < < < < < < < < < <
* bacov [ word . last3 ] = last3
*
*/
if ( unlikely ( __pyx_v_bacov = = Py_None ) ) {
PyErr_SetString ( PyExc_TypeError , " 'NoneType' object is not subscriptable " ) ;
{ __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 46 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
}
__pyx_t_6 = __Pyx_PyInt_From_uint64_t ( __pyx_v_word - > normed ) ; if ( unlikely ( ! __pyx_t_6 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 46 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__Pyx_GOTREF ( __pyx_t_6 ) ;
if ( unlikely ( PyDict_SetItem ( __pyx_v_bacov , __pyx_t_6 , __pyx_v_normed ) < 0 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 46 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__Pyx_DECREF ( __pyx_t_6 ) ; __pyx_t_6 = 0 ;
/* "spacy/lexeme.pyx":47
* bacov [ word . lex ] = lex
* bacov [ word . normed ] = normed
* bacov [ word . last3 ] = last3 # < < < < < < < < < < < < < <
*
* # These are loaded later
*/
if ( unlikely ( __pyx_v_bacov = = Py_None ) ) {
PyErr_SetString ( PyExc_TypeError , " 'NoneType' object is not subscriptable " ) ;
{ __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 47 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
}
__pyx_t_6 = __Pyx_PyInt_From_uint64_t ( __pyx_v_word - > last3 ) ; if ( unlikely ( ! __pyx_t_6 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 47 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__Pyx_GOTREF ( __pyx_t_6 ) ;
if ( unlikely ( PyDict_SetItem ( __pyx_v_bacov , __pyx_t_6 , __pyx_v_last3 ) < 0 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 47 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__Pyx_DECREF ( __pyx_t_6 ) ; __pyx_t_6 = 0 ;
/* "spacy/lexeme.pyx":50
*
* # These are loaded later
* word . prob = 0 # < < < < < < < < < < < < < <
* word . cluster = 0
* word . oft_upper = False
*/
__pyx_v_word - > prob = 0.0 ;
/* "spacy/lexeme.pyx":51
* # These are loaded later
* word . prob = 0
* word . cluster = 0 # < < < < < < < < < < < < < <
* word . oft_upper = False
* word . oft_title = False
*/
__pyx_v_word - > cluster = 0 ;
/* "spacy/lexeme.pyx":52
* word . prob = 0
* word . cluster = 0
* word . oft_upper = False # < < < < < < < < < < < < < <
* word . oft_title = False
*
*/
__pyx_v_word - > oft_upper = 0 ;
/* "spacy/lexeme.pyx":53
* word . cluster = 0
* word . oft_upper = False
* word . oft_title = False # < < < < < < < < < < < < < <
*
* # Now recurse , and deal with the tail
*/
__pyx_v_word - > oft_title = 0 ;
/* "spacy/lexeme.pyx":56
*
* # Now recurse , and deal with the tail
* if tail_string : # < < < < < < < < < < < < < <
* word . tail = < Lexeme * > lookup ( vocab , bacov , find_split , - 1 , tail_string )
* return word
*/
__pyx_t_5 = ( __pyx_v_tail_string ! = Py_None ) & & ( PyUnicode_GET_SIZE ( __pyx_v_tail_string ) ! = 0 ) ;
if ( __pyx_t_5 ) {
/* "spacy/lexeme.pyx":57
* # Now recurse , and deal with the tail
* if tail_string :
* word . tail = < Lexeme * > lookup ( vocab , bacov , find_split , - 1 , tail_string ) # < < < < < < < < < < < < < <
* return word
*
*/
__pyx_t_9 = __pyx_f_5spacy_5spacy_lookup ( __pyx_v_vocab , __pyx_v_bacov , __pyx_v_find_split , - 1 , __pyx_v_tail_string ) ; if ( unlikely ( __pyx_t_9 = = 0 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 57 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__pyx_v_word - > tail = ( ( struct __pyx_t_5spacy_6lexeme_Lexeme * ) __pyx_t_9 ) ;
goto __pyx_L4 ;
}
__pyx_L4 : ;
/* "spacy/lexeme.pyx":58
* if tail_string :
* word . tail = < Lexeme * > lookup ( vocab , bacov , find_split , - 1 , tail_string )
* return word # < < < < < < < < < < < < < <
*
*
*/
__pyx_r = __pyx_v_word ;
goto __pyx_L0 ;
/* "spacy/lexeme.pyx":16
*
*
* cdef Lexeme * init_lexeme ( Vocab vocab , dict bacov , Splitter find_split , # < < < < < < < < < < < < < <
* unicode string , StringHash hashed ,
* int split , size_t length ) except NULL :
*/
/* function exit code */
__pyx_L1_error : ;
__Pyx_XDECREF ( __pyx_t_6 ) ;
__Pyx_AddTraceback ( " spacy.lexeme.init_lexeme " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__pyx_r = NULL ;
__pyx_L0 : ;
__Pyx_XDECREF ( __pyx_v_tail_string ) ;
__Pyx_XDECREF ( __pyx_v_lex ) ;
__Pyx_XDECREF ( __pyx_v_normed ) ;
__Pyx_XDECREF ( __pyx_v_last3 ) ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
/* "spacy/lexeme.pyx":61
2014-07-05 22:51:42 +04:00
*
*
* cpdef StringHash sic_of ( size_t lex_id ) except 0 : # < < < < < < < < < < < < < <
* ' ' ' Access the ` sic ' field of the Lexeme pointed to by lex_id .
*
*/
static PyObject * __pyx_pw_5spacy_6lexeme_1sic_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_6lexeme_sic_of ( size_t __pyx_v_lex_id , CYTHON_UNUSED int __pyx_skip_dispatch ) {
__pyx_t_5spacy_6lexeme_StringHash __pyx_r ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " sic_of " , 0 ) ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":70
2014-07-05 22:51:42 +04:00
* [ u ' Hi ! ' , u ' ' , u ' world ]
* ' ' '
* return ( < Lexeme * > lex_id ) . sic # < < < < < < < < < < < < < <
*
*
*/
__pyx_r = ( ( struct __pyx_t_5spacy_6lexeme_Lexeme * ) __pyx_v_lex_id ) - > sic ;
goto __pyx_L0 ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":61
2014-07-05 22:51:42 +04:00
*
*
* cpdef StringHash sic_of ( size_t lex_id ) except 0 : # < < < < < < < < < < < < < <
* ' ' ' Access the ` sic ' field of the Lexeme pointed to by lex_id .
*
*/
/* function exit code */
__pyx_L0 : ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
/* Python wrapper */
static PyObject * __pyx_pw_5spacy_6lexeme_1sic_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static char __pyx_doc_5spacy_6lexeme_sic_of [ ] = " Access the `sic' field of the Lexeme pointed to by lex_id. \n \n The sic field stores the hash of the whitespace-delimited string-chunk used to \n construct the Lexeme. \n \n >>> [unhash(sic_of(lex_id)) for lex_id in from_string(u'Hi! world')] \n [u'Hi!', u'', u'world] \n " ;
static PyObject * __pyx_pw_5spacy_6lexeme_1sic_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) {
size_t __pyx_v_lex_id ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
PyObject * __pyx_r = 0 ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " sic_of (wrapper) " , 0 ) ;
assert ( __pyx_arg_lex_id ) ; {
2014-07-07 06:21:06 +04:00
__pyx_v_lex_id = __Pyx_PyInt_As_size_t ( __pyx_arg_lex_id ) ; if ( unlikely ( ( __pyx_v_lex_id = = ( size_t ) - 1 ) & & PyErr_Occurred ( ) ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 61 ; __pyx_clineno = __LINE__ ; goto __pyx_L3_error ; }
2014-07-05 22:51:42 +04:00
}
goto __pyx_L4_argument_unpacking_done ;
__pyx_L3_error : ;
__Pyx_AddTraceback ( " spacy.lexeme.sic_of " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__Pyx_RefNannyFinishContext ( ) ;
return NULL ;
__pyx_L4_argument_unpacking_done : ;
__pyx_r = __pyx_pf_5spacy_6lexeme_sic_of ( __pyx_self , ( ( size_t ) __pyx_v_lex_id ) ) ;
/* function exit code */
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
static PyObject * __pyx_pf_5spacy_6lexeme_sic_of ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) {
PyObject * __pyx_r = NULL ;
__Pyx_RefNannyDeclarations
__pyx_t_5spacy_6lexeme_StringHash __pyx_t_1 ;
PyObject * __pyx_t_2 = NULL ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
__Pyx_RefNannySetupContext ( " sic_of " , 0 ) ;
__Pyx_XDECREF ( __pyx_r ) ;
2014-07-07 06:21:06 +04:00
__pyx_t_1 = __pyx_f_5spacy_6lexeme_sic_of ( __pyx_v_lex_id , 0 ) ; if ( unlikely ( __pyx_t_1 = = 0 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 61 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__pyx_t_2 = __Pyx_PyInt_From_uint64_t ( __pyx_t_1 ) ; if ( unlikely ( ! __pyx_t_2 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 61 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
2014-07-05 22:51:42 +04:00
__Pyx_GOTREF ( __pyx_t_2 ) ;
__pyx_r = __pyx_t_2 ;
__pyx_t_2 = 0 ;
goto __pyx_L0 ;
/* function exit code */
__pyx_L1_error : ;
__Pyx_XDECREF ( __pyx_t_2 ) ;
__Pyx_AddTraceback ( " spacy.lexeme.sic_of " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__pyx_r = NULL ;
__pyx_L0 : ;
__Pyx_XGIVEREF ( __pyx_r ) ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":73
2014-07-05 22:51:42 +04:00
*
*
* cpdef StringHash lex_of ( size_t lex_id ) except 0 : # < < < < < < < < < < < < < <
* ' ' ' Access the ` lex ' field of the Lexeme pointed to by lex_id .
*
*/
static PyObject * __pyx_pw_5spacy_6lexeme_3lex_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_6lexeme_lex_of ( size_t __pyx_v_lex_id , CYTHON_UNUSED int __pyx_skip_dispatch ) {
__pyx_t_5spacy_6lexeme_StringHash __pyx_r ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " lex_of " , 0 ) ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":84
2014-07-05 22:51:42 +04:00
* [ u ' Hi ' , u ' ! ' , u ' world ' ]
* ' ' '
* return ( < Lexeme * > lex_id ) . lex # < < < < < < < < < < < < < <
*
*
*/
__pyx_r = ( ( struct __pyx_t_5spacy_6lexeme_Lexeme * ) __pyx_v_lex_id ) - > lex ;
goto __pyx_L0 ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":73
2014-07-05 22:51:42 +04:00
*
*
* cpdef StringHash lex_of ( size_t lex_id ) except 0 : # < < < < < < < < < < < < < <
* ' ' ' Access the ` lex ' field of the Lexeme pointed to by lex_id .
*
*/
/* function exit code */
__pyx_L0 : ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
/* Python wrapper */
static PyObject * __pyx_pw_5spacy_6lexeme_3lex_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static char __pyx_doc_5spacy_6lexeme_2lex_of [ ] = " Access the `lex' field of the Lexeme pointed to by lex_id. \n \n The lex field is the hash of the string you would expect to get back from \n a standard tokenizer, i.e. the word with punctuation and other non-whitespace \n delimited tokens split off. The other fields refer to properties of the \n string that the lex field stores a hash of, except sic and tail. \n \n >>> [unhash(lex_of(lex_id) for lex_id in from_string(u'Hi! world')] \n [u'Hi', u'!', u'world'] \n " ;
static PyObject * __pyx_pw_5spacy_6lexeme_3lex_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) {
size_t __pyx_v_lex_id ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
PyObject * __pyx_r = 0 ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " lex_of (wrapper) " , 0 ) ;
assert ( __pyx_arg_lex_id ) ; {
2014-07-07 06:21:06 +04:00
__pyx_v_lex_id = __Pyx_PyInt_As_size_t ( __pyx_arg_lex_id ) ; if ( unlikely ( ( __pyx_v_lex_id = = ( size_t ) - 1 ) & & PyErr_Occurred ( ) ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 73 ; __pyx_clineno = __LINE__ ; goto __pyx_L3_error ; }
2014-07-05 22:51:42 +04:00
}
goto __pyx_L4_argument_unpacking_done ;
__pyx_L3_error : ;
__Pyx_AddTraceback ( " spacy.lexeme.lex_of " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__Pyx_RefNannyFinishContext ( ) ;
return NULL ;
__pyx_L4_argument_unpacking_done : ;
__pyx_r = __pyx_pf_5spacy_6lexeme_2lex_of ( __pyx_self , ( ( size_t ) __pyx_v_lex_id ) ) ;
/* function exit code */
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
static PyObject * __pyx_pf_5spacy_6lexeme_2lex_of ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) {
PyObject * __pyx_r = NULL ;
__Pyx_RefNannyDeclarations
__pyx_t_5spacy_6lexeme_StringHash __pyx_t_1 ;
PyObject * __pyx_t_2 = NULL ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
__Pyx_RefNannySetupContext ( " lex_of " , 0 ) ;
__Pyx_XDECREF ( __pyx_r ) ;
2014-07-07 06:21:06 +04:00
__pyx_t_1 = __pyx_f_5spacy_6lexeme_lex_of ( __pyx_v_lex_id , 0 ) ; if ( unlikely ( __pyx_t_1 = = 0 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 73 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__pyx_t_2 = __Pyx_PyInt_From_uint64_t ( __pyx_t_1 ) ; if ( unlikely ( ! __pyx_t_2 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 73 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
2014-07-05 22:51:42 +04:00
__Pyx_GOTREF ( __pyx_t_2 ) ;
__pyx_r = __pyx_t_2 ;
__pyx_t_2 = 0 ;
goto __pyx_L0 ;
/* function exit code */
__pyx_L1_error : ;
__Pyx_XDECREF ( __pyx_t_2 ) ;
__Pyx_AddTraceback ( " spacy.lexeme.lex_of " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__pyx_r = NULL ;
__pyx_L0 : ;
__Pyx_XGIVEREF ( __pyx_r ) ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":87
2014-07-05 22:51:42 +04:00
*
*
* cpdef ClusterID cluster_of ( size_t lex_id ) : # < < < < < < < < < < < < < <
* ' ' ' Access the ` cluster ' field of the Lexeme pointed to by lex_id , which
* gives an integer representation of the cluster ID of the word ,
*/
static PyObject * __pyx_pw_5spacy_6lexeme_5cluster_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static __pyx_t_5spacy_6lexeme_ClusterID __pyx_f_5spacy_6lexeme_cluster_of ( size_t __pyx_v_lex_id , CYTHON_UNUSED int __pyx_skip_dispatch ) {
__pyx_t_5spacy_6lexeme_ClusterID __pyx_r ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " cluster_of " , 0 ) ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":103
2014-07-05 22:51:42 +04:00
* the same cluster ID as " pineapple " , which is not what we ' d like .
* ' ' '
* return ( < Lexeme * > lex_id ) . cluster # < < < < < < < < < < < < < <
*
*
*/
__pyx_r = ( ( struct __pyx_t_5spacy_6lexeme_Lexeme * ) __pyx_v_lex_id ) - > cluster ;
goto __pyx_L0 ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":87
2014-07-05 22:51:42 +04:00
*
*
* cpdef ClusterID cluster_of ( size_t lex_id ) : # < < < < < < < < < < < < < <
* ' ' ' Access the ` cluster ' field of the Lexeme pointed to by lex_id , which
* gives an integer representation of the cluster ID of the word ,
*/
/* function exit code */
__pyx_L0 : ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
/* Python wrapper */
static PyObject * __pyx_pw_5spacy_6lexeme_5cluster_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static char __pyx_doc_5spacy_6lexeme_4cluster_of [ ] = " Access the `cluster' field of the Lexeme pointed to by lex_id, which \n gives an integer representation of the cluster ID of the word, \n which should be understood as a binary address: \n \n >>> strings = (u'pineapple', u'apple', u'dapple', u'scalable') \n >>> token_ids = [lookup(s) for s in strings] \n >>> clusters = [cluster_of(t) for t in token_ids] \n >>> print [ \" {0:b \" } % cluster_of(t) for t in token_ids] \n [ \" 100111110110 \" , \" 100111100100 \" , \" 01010111011001 \" , \" 100111110110 \" ] \n \n The clusterings are unideal, but often slightly useful. \n \" pineapple \" and \" apple \" share a long prefix, indicating a similar meaning, \n while \" dapple \" is totally different. On the other hand, \" scalable \" receives \n the same cluster ID as \" pineapple \" , which is not what we'd like. \n " ;
static PyObject * __pyx_pw_5spacy_6lexeme_5cluster_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) {
size_t __pyx_v_lex_id ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
PyObject * __pyx_r = 0 ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " cluster_of (wrapper) " , 0 ) ;
assert ( __pyx_arg_lex_id ) ; {
2014-07-07 06:21:06 +04:00
__pyx_v_lex_id = __Pyx_PyInt_As_size_t ( __pyx_arg_lex_id ) ; if ( unlikely ( ( __pyx_v_lex_id = = ( size_t ) - 1 ) & & PyErr_Occurred ( ) ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 87 ; __pyx_clineno = __LINE__ ; goto __pyx_L3_error ; }
2014-07-05 22:51:42 +04:00
}
goto __pyx_L4_argument_unpacking_done ;
__pyx_L3_error : ;
__Pyx_AddTraceback ( " spacy.lexeme.cluster_of " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__Pyx_RefNannyFinishContext ( ) ;
return NULL ;
__pyx_L4_argument_unpacking_done : ;
__pyx_r = __pyx_pf_5spacy_6lexeme_4cluster_of ( __pyx_self , ( ( size_t ) __pyx_v_lex_id ) ) ;
/* function exit code */
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
static PyObject * __pyx_pf_5spacy_6lexeme_4cluster_of ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) {
PyObject * __pyx_r = NULL ;
__Pyx_RefNannyDeclarations
PyObject * __pyx_t_1 = NULL ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
__Pyx_RefNannySetupContext ( " cluster_of " , 0 ) ;
__Pyx_XDECREF ( __pyx_r ) ;
2014-07-07 06:21:06 +04:00
__pyx_t_1 = __Pyx_PyInt_From_int ( __pyx_f_5spacy_6lexeme_cluster_of ( __pyx_v_lex_id , 0 ) ) ; if ( unlikely ( ! __pyx_t_1 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 87 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
2014-07-05 22:51:42 +04:00
__Pyx_GOTREF ( __pyx_t_1 ) ;
__pyx_r = __pyx_t_1 ;
__pyx_t_1 = 0 ;
goto __pyx_L0 ;
/* function exit code */
__pyx_L1_error : ;
__Pyx_XDECREF ( __pyx_t_1 ) ;
__Pyx_AddTraceback ( " spacy.lexeme.cluster_of " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__pyx_r = NULL ;
__pyx_L0 : ;
__Pyx_XGIVEREF ( __pyx_r ) ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":106
2014-07-05 22:51:42 +04:00
*
*
* cpdef Py_UNICODE first_of ( size_t lex_id ) : # < < < < < < < < < < < < < <
* ' ' ' Access the ` first ' field of the Lexeme pointed to by lex_id , which
* stores the first character of the lex string of the word .
*/
static PyObject * __pyx_pw_5spacy_6lexeme_7first_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static Py_UNICODE __pyx_f_5spacy_6lexeme_first_of ( size_t __pyx_v_lex_id , CYTHON_UNUSED int __pyx_skip_dispatch ) {
Py_UNICODE __pyx_r ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " first_of " , 0 ) ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":114
2014-07-05 22:51:42 +04:00
* u ' H '
* ' ' '
* return ( < Lexeme * > lex_id ) . first # < < < < < < < < < < < < < <
*
*
*/
__pyx_r = ( ( struct __pyx_t_5spacy_6lexeme_Lexeme * ) __pyx_v_lex_id ) - > first ;
goto __pyx_L0 ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":106
2014-07-05 22:51:42 +04:00
*
*
* cpdef Py_UNICODE first_of ( size_t lex_id ) : # < < < < < < < < < < < < < <
* ' ' ' Access the ` first ' field of the Lexeme pointed to by lex_id , which
* stores the first character of the lex string of the word .
*/
/* function exit code */
__pyx_L0 : ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
/* Python wrapper */
static PyObject * __pyx_pw_5spacy_6lexeme_7first_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static char __pyx_doc_5spacy_6lexeme_6first_of [ ] = " Access the `first' field of the Lexeme pointed to by lex_id, which \n stores the first character of the lex string of the word. \n \n >>> lex_id = lookup(u'Hello') \n >>> unhash(first_of(lex_id)) \n u'H' \n " ;
static PyObject * __pyx_pw_5spacy_6lexeme_7first_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) {
size_t __pyx_v_lex_id ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
PyObject * __pyx_r = 0 ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " first_of (wrapper) " , 0 ) ;
assert ( __pyx_arg_lex_id ) ; {
2014-07-07 06:21:06 +04:00
__pyx_v_lex_id = __Pyx_PyInt_As_size_t ( __pyx_arg_lex_id ) ; if ( unlikely ( ( __pyx_v_lex_id = = ( size_t ) - 1 ) & & PyErr_Occurred ( ) ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 106 ; __pyx_clineno = __LINE__ ; goto __pyx_L3_error ; }
2014-07-05 22:51:42 +04:00
}
goto __pyx_L4_argument_unpacking_done ;
__pyx_L3_error : ;
__Pyx_AddTraceback ( " spacy.lexeme.first_of " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__Pyx_RefNannyFinishContext ( ) ;
return NULL ;
__pyx_L4_argument_unpacking_done : ;
__pyx_r = __pyx_pf_5spacy_6lexeme_6first_of ( __pyx_self , ( ( size_t ) __pyx_v_lex_id ) ) ;
/* function exit code */
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
static PyObject * __pyx_pf_5spacy_6lexeme_6first_of ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) {
PyObject * __pyx_r = NULL ;
__Pyx_RefNannyDeclarations
PyObject * __pyx_t_1 = NULL ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
__Pyx_RefNannySetupContext ( " first_of " , 0 ) ;
__Pyx_XDECREF ( __pyx_r ) ;
2014-07-07 06:21:06 +04:00
__pyx_t_1 = PyUnicode_FromOrdinal ( __pyx_f_5spacy_6lexeme_first_of ( __pyx_v_lex_id , 0 ) ) ; if ( unlikely ( ! __pyx_t_1 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 106 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
2014-07-05 22:51:42 +04:00
__Pyx_GOTREF ( __pyx_t_1 ) ;
__pyx_r = __pyx_t_1 ;
__pyx_t_1 = 0 ;
goto __pyx_L0 ;
/* function exit code */
__pyx_L1_error : ;
__Pyx_XDECREF ( __pyx_t_1 ) ;
__Pyx_AddTraceback ( " spacy.lexeme.first_of " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__pyx_r = NULL ;
__pyx_L0 : ;
__Pyx_XGIVEREF ( __pyx_r ) ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":117
2014-07-05 22:51:42 +04:00
*
*
* cpdef double prob_of ( size_t lex_id ) : # < < < < < < < < < < < < < <
* ' ' ' Access the ` prob ' field of the Lexeme pointed to by lex_id , which stores
* the smoothed unigram log probability of the word , as estimated from a large
*/
static PyObject * __pyx_pw_5spacy_6lexeme_9prob_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static double __pyx_f_5spacy_6lexeme_prob_of ( CYTHON_UNUSED size_t __pyx_v_lex_id , CYTHON_UNUSED int __pyx_skip_dispatch ) {
double __pyx_r ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " prob_of " , 0 ) ;
/* function exit code */
__pyx_r = 0 ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
/* Python wrapper */
static PyObject * __pyx_pw_5spacy_6lexeme_9prob_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static char __pyx_doc_5spacy_6lexeme_8prob_of [ ] = " Access the `prob' field of the Lexeme pointed to by lex_id, which stores \n the smoothed unigram log probability of the word, as estimated from a large \n text corpus. By default, probabilities are based on counts from Gigaword, \n smoothed using Knesser-Ney; but any probabilities file can be supplied to \n load_probs. \n \n >>> prob_of(lookup(u'world')) \n -20.10340371976182 \n " ;
static PyObject * __pyx_pw_5spacy_6lexeme_9prob_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) {
size_t __pyx_v_lex_id ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
PyObject * __pyx_r = 0 ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " prob_of (wrapper) " , 0 ) ;
assert ( __pyx_arg_lex_id ) ; {
2014-07-07 06:21:06 +04:00
__pyx_v_lex_id = __Pyx_PyInt_As_size_t ( __pyx_arg_lex_id ) ; if ( unlikely ( ( __pyx_v_lex_id = = ( size_t ) - 1 ) & & PyErr_Occurred ( ) ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 117 ; __pyx_clineno = __LINE__ ; goto __pyx_L3_error ; }
2014-07-05 22:51:42 +04:00
}
goto __pyx_L4_argument_unpacking_done ;
__pyx_L3_error : ;
__Pyx_AddTraceback ( " spacy.lexeme.prob_of " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__Pyx_RefNannyFinishContext ( ) ;
return NULL ;
__pyx_L4_argument_unpacking_done : ;
__pyx_r = __pyx_pf_5spacy_6lexeme_8prob_of ( __pyx_self , ( ( size_t ) __pyx_v_lex_id ) ) ;
/* function exit code */
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
static PyObject * __pyx_pf_5spacy_6lexeme_8prob_of ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) {
PyObject * __pyx_r = NULL ;
__Pyx_RefNannyDeclarations
PyObject * __pyx_t_1 = NULL ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
__Pyx_RefNannySetupContext ( " prob_of " , 0 ) ;
__Pyx_XDECREF ( __pyx_r ) ;
2014-07-07 06:21:06 +04:00
__pyx_t_1 = PyFloat_FromDouble ( __pyx_f_5spacy_6lexeme_prob_of ( __pyx_v_lex_id , 0 ) ) ; if ( unlikely ( ! __pyx_t_1 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 117 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
2014-07-05 22:51:42 +04:00
__Pyx_GOTREF ( __pyx_t_1 ) ;
__pyx_r = __pyx_t_1 ;
__pyx_t_1 = 0 ;
goto __pyx_L0 ;
/* function exit code */
__pyx_L1_error : ;
__Pyx_XDECREF ( __pyx_t_1 ) ;
__Pyx_AddTraceback ( " spacy.lexeme.prob_of " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__pyx_r = NULL ;
__pyx_L0 : ;
__Pyx_XGIVEREF ( __pyx_r ) ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":130
2014-07-05 22:51:42 +04:00
*
*
* cpdef StringHash last3_of ( size_t lex_id ) : # < < < < < < < < < < < < < <
* ' ' ' Access the ` last3 ' field of the Lexeme pointed to by lex_id , which stores
* the hash of the last three characters of the word :
*/
static PyObject * __pyx_pw_5spacy_6lexeme_11last3_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_6lexeme_last3_of ( size_t __pyx_v_lex_id , CYTHON_UNUSED int __pyx_skip_dispatch ) {
__pyx_t_5spacy_6lexeme_StringHash __pyx_r ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " last3_of " , 0 ) ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":138
2014-07-05 22:51:42 +04:00
* [ u ' llo ' , u ' ! ' ]
* ' ' '
* return ( < Lexeme * > lex_id ) . last3 # < < < < < < < < < < < < < <
*
*
*/
__pyx_r = ( ( struct __pyx_t_5spacy_6lexeme_Lexeme * ) __pyx_v_lex_id ) - > last3 ;
goto __pyx_L0 ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":130
2014-07-05 22:51:42 +04:00
*
*
* cpdef StringHash last3_of ( size_t lex_id ) : # < < < < < < < < < < < < < <
* ' ' ' Access the ` last3 ' field of the Lexeme pointed to by lex_id , which stores
* the hash of the last three characters of the word :
*/
/* function exit code */
__pyx_L0 : ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
/* Python wrapper */
static PyObject * __pyx_pw_5spacy_6lexeme_11last3_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static char __pyx_doc_5spacy_6lexeme_10last3_of [ ] = " Access the `last3' field of the Lexeme pointed to by lex_id, which stores \n the hash of the last three characters of the word: \n \n >>> lex_ids = [lookup(w) for w in (u'Hello', u'!')] \n >>> [unhash(last3_of(lex_id)) for lex_id in lex_ids] \n [u'llo', u'!'] \n " ;
static PyObject * __pyx_pw_5spacy_6lexeme_11last3_of ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) {
size_t __pyx_v_lex_id ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
PyObject * __pyx_r = 0 ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " last3_of (wrapper) " , 0 ) ;
assert ( __pyx_arg_lex_id ) ; {
2014-07-07 06:21:06 +04:00
__pyx_v_lex_id = __Pyx_PyInt_As_size_t ( __pyx_arg_lex_id ) ; if ( unlikely ( ( __pyx_v_lex_id = = ( size_t ) - 1 ) & & PyErr_Occurred ( ) ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 130 ; __pyx_clineno = __LINE__ ; goto __pyx_L3_error ; }
2014-07-05 22:51:42 +04:00
}
goto __pyx_L4_argument_unpacking_done ;
__pyx_L3_error : ;
__Pyx_AddTraceback ( " spacy.lexeme.last3_of " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__Pyx_RefNannyFinishContext ( ) ;
return NULL ;
__pyx_L4_argument_unpacking_done : ;
__pyx_r = __pyx_pf_5spacy_6lexeme_10last3_of ( __pyx_self , ( ( size_t ) __pyx_v_lex_id ) ) ;
/* function exit code */
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
static PyObject * __pyx_pf_5spacy_6lexeme_10last3_of ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) {
PyObject * __pyx_r = NULL ;
__Pyx_RefNannyDeclarations
PyObject * __pyx_t_1 = NULL ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
__Pyx_RefNannySetupContext ( " last3_of " , 0 ) ;
__Pyx_XDECREF ( __pyx_r ) ;
2014-07-07 06:21:06 +04:00
__pyx_t_1 = __Pyx_PyInt_From_uint64_t ( __pyx_f_5spacy_6lexeme_last3_of ( __pyx_v_lex_id , 0 ) ) ; if ( unlikely ( ! __pyx_t_1 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 130 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
2014-07-05 22:51:42 +04:00
__Pyx_GOTREF ( __pyx_t_1 ) ;
__pyx_r = __pyx_t_1 ;
__pyx_t_1 = 0 ;
goto __pyx_L0 ;
/* function exit code */
__pyx_L1_error : ;
__Pyx_XDECREF ( __pyx_t_1 ) ;
__Pyx_AddTraceback ( " spacy.lexeme.last3_of " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__pyx_r = NULL ;
__pyx_L0 : ;
__Pyx_XGIVEREF ( __pyx_r ) ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":141
2014-07-05 22:51:42 +04:00
*
*
* cpdef bint is_oft_upper ( size_t lex_id ) : # < < < < < < < < < < < < < <
* ' ' ' Access the ` oft_upper ' field of the Lexeme pointed to by lex_id , which
* stores whether the lowered version of the string hashed by ` lex ' is found
*/
static PyObject * __pyx_pw_5spacy_6lexeme_13is_oft_upper ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static int __pyx_f_5spacy_6lexeme_is_oft_upper ( size_t __pyx_v_lex_id , CYTHON_UNUSED int __pyx_skip_dispatch ) {
int __pyx_r ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " is_oft_upper " , 0 ) ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":153
2014-07-05 22:51:42 +04:00
* True
* ' ' '
* return ( < Lexeme * > lex_id ) . oft_upper # < < < < < < < < < < < < < <
*
*
*/
__pyx_r = ( ( struct __pyx_t_5spacy_6lexeme_Lexeme * ) __pyx_v_lex_id ) - > oft_upper ;
goto __pyx_L0 ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":141
2014-07-05 22:51:42 +04:00
*
*
* cpdef bint is_oft_upper ( size_t lex_id ) : # < < < < < < < < < < < < < <
* ' ' ' Access the ` oft_upper ' field of the Lexeme pointed to by lex_id , which
* stores whether the lowered version of the string hashed by ` lex ' is found
*/
/* function exit code */
__pyx_L0 : ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
/* Python wrapper */
static PyObject * __pyx_pw_5spacy_6lexeme_13is_oft_upper ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static char __pyx_doc_5spacy_6lexeme_12is_oft_upper [ ] = " Access the `oft_upper' field of the Lexeme pointed to by lex_id, which \n stores whether the lowered version of the string hashed by `lex' is found \n in all-upper case frequently in a large sample of text. Users are free \n to load different data, by default we use a sample from Wikipedia, with \n a threshold of 0.95, picked to maximize mutual information for POS tagging. \n \n >>> is_oft_upper(lookup(u'abc')) \n True \n >>> is_oft_upper(lookup(u'aBc')) # This must get the same answer \n True \n " ;
static PyObject * __pyx_pw_5spacy_6lexeme_13is_oft_upper ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) {
size_t __pyx_v_lex_id ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
PyObject * __pyx_r = 0 ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " is_oft_upper (wrapper) " , 0 ) ;
assert ( __pyx_arg_lex_id ) ; {
2014-07-07 06:21:06 +04:00
__pyx_v_lex_id = __Pyx_PyInt_As_size_t ( __pyx_arg_lex_id ) ; if ( unlikely ( ( __pyx_v_lex_id = = ( size_t ) - 1 ) & & PyErr_Occurred ( ) ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 141 ; __pyx_clineno = __LINE__ ; goto __pyx_L3_error ; }
2014-07-05 22:51:42 +04:00
}
goto __pyx_L4_argument_unpacking_done ;
__pyx_L3_error : ;
__Pyx_AddTraceback ( " spacy.lexeme.is_oft_upper " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__Pyx_RefNannyFinishContext ( ) ;
return NULL ;
__pyx_L4_argument_unpacking_done : ;
__pyx_r = __pyx_pf_5spacy_6lexeme_12is_oft_upper ( __pyx_self , ( ( size_t ) __pyx_v_lex_id ) ) ;
/* function exit code */
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
static PyObject * __pyx_pf_5spacy_6lexeme_12is_oft_upper ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) {
PyObject * __pyx_r = NULL ;
__Pyx_RefNannyDeclarations
PyObject * __pyx_t_1 = NULL ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
__Pyx_RefNannySetupContext ( " is_oft_upper " , 0 ) ;
__Pyx_XDECREF ( __pyx_r ) ;
2014-07-07 06:21:06 +04:00
__pyx_t_1 = __Pyx_PyBool_FromLong ( __pyx_f_5spacy_6lexeme_is_oft_upper ( __pyx_v_lex_id , 0 ) ) ; if ( unlikely ( ! __pyx_t_1 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 141 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
2014-07-05 22:51:42 +04:00
__Pyx_GOTREF ( __pyx_t_1 ) ;
__pyx_r = __pyx_t_1 ;
__pyx_t_1 = 0 ;
goto __pyx_L0 ;
/* function exit code */
__pyx_L1_error : ;
__Pyx_XDECREF ( __pyx_t_1 ) ;
__Pyx_AddTraceback ( " spacy.lexeme.is_oft_upper " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__pyx_r = NULL ;
__pyx_L0 : ;
__Pyx_XGIVEREF ( __pyx_r ) ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":156
2014-07-05 22:51:42 +04:00
*
*
* cpdef bint is_oft_title ( size_t lex_id ) : # < < < < < < < < < < < < < <
* ' ' ' Access the ` oft_upper ' field of the Lexeme pointed to by lex_id , which
* stores whether the lowered version of the string hashed by ` lex ' is found
*/
static PyObject * __pyx_pw_5spacy_6lexeme_15is_oft_title ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static int __pyx_f_5spacy_6lexeme_is_oft_title ( size_t __pyx_v_lex_id , CYTHON_UNUSED int __pyx_skip_dispatch ) {
int __pyx_r ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " is_oft_title " , 0 ) ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":168
2014-07-05 22:51:42 +04:00
* True
* ' ' '
* return ( < Lexeme * > lex_id ) . oft_title # < < < < < < < < < < < < < <
*/
__pyx_r = ( ( struct __pyx_t_5spacy_6lexeme_Lexeme * ) __pyx_v_lex_id ) - > oft_title ;
goto __pyx_L0 ;
2014-07-07 06:21:06 +04:00
/* "spacy/lexeme.pyx":156
2014-07-05 22:51:42 +04:00
*
*
* cpdef bint is_oft_title ( size_t lex_id ) : # < < < < < < < < < < < < < <
* ' ' ' Access the ` oft_upper ' field of the Lexeme pointed to by lex_id , which
* stores whether the lowered version of the string hashed by ` lex ' is found
*/
/* function exit code */
__pyx_L0 : ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
/* Python wrapper */
static PyObject * __pyx_pw_5spacy_6lexeme_15is_oft_title ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) ; /*proto*/
static char __pyx_doc_5spacy_6lexeme_14is_oft_title [ ] = " Access the `oft_upper' field of the Lexeme pointed to by lex_id, which \n stores whether the lowered version of the string hashed by `lex' is found \n title-cased frequently in a large sample of text. Users are free \n to load different data, by default we use a sample from Wikipedia, with \n a threshold of 0.3, picked to maximize mutual information for POS tagging. \n \n >>> is_oft_title(lookup(u'marcus')) \n True \n >>> is_oft_title(lookup(u'MARCUS')) # This must get the same value \n True \n " ;
static PyObject * __pyx_pw_5spacy_6lexeme_15is_oft_title ( PyObject * __pyx_self , PyObject * __pyx_arg_lex_id ) {
size_t __pyx_v_lex_id ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
PyObject * __pyx_r = 0 ;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " is_oft_title (wrapper) " , 0 ) ;
assert ( __pyx_arg_lex_id ) ; {
2014-07-07 06:21:06 +04:00
__pyx_v_lex_id = __Pyx_PyInt_As_size_t ( __pyx_arg_lex_id ) ; if ( unlikely ( ( __pyx_v_lex_id = = ( size_t ) - 1 ) & & PyErr_Occurred ( ) ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 156 ; __pyx_clineno = __LINE__ ; goto __pyx_L3_error ; }
2014-07-05 22:51:42 +04:00
}
goto __pyx_L4_argument_unpacking_done ;
__pyx_L3_error : ;
__Pyx_AddTraceback ( " spacy.lexeme.is_oft_title " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__Pyx_RefNannyFinishContext ( ) ;
return NULL ;
__pyx_L4_argument_unpacking_done : ;
__pyx_r = __pyx_pf_5spacy_6lexeme_14is_oft_title ( __pyx_self , ( ( size_t ) __pyx_v_lex_id ) ) ;
/* function exit code */
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
static PyObject * __pyx_pf_5spacy_6lexeme_14is_oft_title ( CYTHON_UNUSED PyObject * __pyx_self , size_t __pyx_v_lex_id ) {
PyObject * __pyx_r = NULL ;
__Pyx_RefNannyDeclarations
PyObject * __pyx_t_1 = NULL ;
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
__Pyx_RefNannySetupContext ( " is_oft_title " , 0 ) ;
__Pyx_XDECREF ( __pyx_r ) ;
2014-07-07 06:21:06 +04:00
__pyx_t_1 = __Pyx_PyBool_FromLong ( __pyx_f_5spacy_6lexeme_is_oft_title ( __pyx_v_lex_id , 0 ) ) ; if ( unlikely ( ! __pyx_t_1 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 156 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
2014-07-05 22:51:42 +04:00
__Pyx_GOTREF ( __pyx_t_1 ) ;
__pyx_r = __pyx_t_1 ;
__pyx_t_1 = 0 ;
goto __pyx_L0 ;
/* function exit code */
__pyx_L1_error : ;
__Pyx_XDECREF ( __pyx_t_1 ) ;
__Pyx_AddTraceback ( " spacy.lexeme.is_oft_title " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
__pyx_r = NULL ;
__pyx_L0 : ;
__Pyx_XGIVEREF ( __pyx_r ) ;
__Pyx_RefNannyFinishContext ( ) ;
return __pyx_r ;
}
static PyMethodDef __pyx_methods [ ] = {
{ __Pyx_NAMESTR ( " sic_of " ) , ( PyCFunction ) __pyx_pw_5spacy_6lexeme_1sic_of , METH_O , __Pyx_DOCSTR ( __pyx_doc_5spacy_6lexeme_sic_of ) } ,
{ __Pyx_NAMESTR ( " lex_of " ) , ( PyCFunction ) __pyx_pw_5spacy_6lexeme_3lex_of , METH_O , __Pyx_DOCSTR ( __pyx_doc_5spacy_6lexeme_2lex_of ) } ,
{ __Pyx_NAMESTR ( " cluster_of " ) , ( PyCFunction ) __pyx_pw_5spacy_6lexeme_5cluster_of , METH_O , __Pyx_DOCSTR ( __pyx_doc_5spacy_6lexeme_4cluster_of ) } ,
{ __Pyx_NAMESTR ( " first_of " ) , ( PyCFunction ) __pyx_pw_5spacy_6lexeme_7first_of , METH_O , __Pyx_DOCSTR ( __pyx_doc_5spacy_6lexeme_6first_of ) } ,
{ __Pyx_NAMESTR ( " prob_of " ) , ( PyCFunction ) __pyx_pw_5spacy_6lexeme_9prob_of , METH_O , __Pyx_DOCSTR ( __pyx_doc_5spacy_6lexeme_8prob_of ) } ,
{ __Pyx_NAMESTR ( " last3_of " ) , ( PyCFunction ) __pyx_pw_5spacy_6lexeme_11last3_of , METH_O , __Pyx_DOCSTR ( __pyx_doc_5spacy_6lexeme_10last3_of ) } ,
{ __Pyx_NAMESTR ( " is_oft_upper " ) , ( PyCFunction ) __pyx_pw_5spacy_6lexeme_13is_oft_upper , METH_O , __Pyx_DOCSTR ( __pyx_doc_5spacy_6lexeme_12is_oft_upper ) } ,
{ __Pyx_NAMESTR ( " is_oft_title " ) , ( PyCFunction ) __pyx_pw_5spacy_6lexeme_15is_oft_title , METH_O , __Pyx_DOCSTR ( __pyx_doc_5spacy_6lexeme_14is_oft_title ) } ,
{ 0 , 0 , 0 , 0 }
} ;
# if PY_MAJOR_VERSION >= 3
static struct PyModuleDef __pyx_moduledef = {
# if PY_VERSION_HEX < 0x03020000
{ PyObject_HEAD_INIT ( NULL ) NULL , 0 , NULL } ,
# else
PyModuleDef_HEAD_INIT ,
# endif
__Pyx_NAMESTR ( " lexeme " ) ,
__Pyx_DOCSTR ( __pyx_k_Accessors_for_Lexeme_properties ) , /* m_doc */
- 1 , /* m_size */
__pyx_methods /* m_methods */ ,
NULL , /* m_reload */
NULL , /* m_traverse */
NULL , /* m_clear */
NULL /* m_free */
} ;
# endif
static __Pyx_StringTabEntry __pyx_string_tab [ ] = {
2014-07-07 06:21:06 +04:00
{ & __pyx_kp_u_ , __pyx_k_ , sizeof ( __pyx_k_ ) , 0 , 1 , 0 , 0 } ,
2014-07-05 22:51:42 +04:00
{ & __pyx_kp_u_Access_the_cluster_field_of_the , __pyx_k_Access_the_cluster_field_of_the , sizeof ( __pyx_k_Access_the_cluster_field_of_the ) , 0 , 1 , 0 , 0 } ,
{ & __pyx_kp_u_Access_the_first_field_of_the_Le , __pyx_k_Access_the_first_field_of_the_Le , sizeof ( __pyx_k_Access_the_first_field_of_the_Le ) , 0 , 1 , 0 , 0 } ,
{ & __pyx_kp_u_Access_the_last3_field_of_the_Le , __pyx_k_Access_the_last3_field_of_the_Le , sizeof ( __pyx_k_Access_the_last3_field_of_the_Le ) , 0 , 1 , 0 , 0 } ,
{ & __pyx_kp_u_Access_the_lex_field_of_the_Lexe , __pyx_k_Access_the_lex_field_of_the_Lexe , sizeof ( __pyx_k_Access_the_lex_field_of_the_Lexe ) , 0 , 1 , 0 , 0 } ,
{ & __pyx_kp_u_Access_the_oft_upper_field_of_th , __pyx_k_Access_the_oft_upper_field_of_th , sizeof ( __pyx_k_Access_the_oft_upper_field_of_th ) , 0 , 1 , 0 , 0 } ,
{ & __pyx_kp_u_Access_the_oft_upper_field_of_th_2 , __pyx_k_Access_the_oft_upper_field_of_th_2 , sizeof ( __pyx_k_Access_the_oft_upper_field_of_th_2 ) , 0 , 1 , 0 , 0 } ,
{ & __pyx_kp_u_Access_the_prob_field_of_the_Lex , __pyx_k_Access_the_prob_field_of_the_Lex , sizeof ( __pyx_k_Access_the_prob_field_of_the_Lex ) , 0 , 1 , 0 , 0 } ,
{ & __pyx_kp_u_Access_the_sic_field_of_the_Lexe , __pyx_k_Access_the_sic_field_of_the_Lexe , sizeof ( __pyx_k_Access_the_sic_field_of_the_Lexe ) , 0 , 1 , 0 , 0 } ,
2014-07-07 06:21:06 +04:00
{ & __pyx_n_s_BLANK_WORD , __pyx_k_BLANK_WORD , sizeof ( __pyx_k_BLANK_WORD ) , 0 , 0 , 1 , 1 } ,
{ & __pyx_kp_u__2 , __pyx_k__2 , sizeof ( __pyx_k__2 ) , 0 , 1 , 0 , 0 } ,
{ & __pyx_kp_u_cluster_of_line_87 , __pyx_k_cluster_of_line_87 , sizeof ( __pyx_k_cluster_of_line_87 ) , 0 , 1 , 0 , 0 } ,
{ & __pyx_kp_u_first_of_line_106 , __pyx_k_first_of_line_106 , sizeof ( __pyx_k_first_of_line_106 ) , 0 , 1 , 0 , 0 } ,
{ & __pyx_kp_u_is_oft_title_line_156 , __pyx_k_is_oft_title_line_156 , sizeof ( __pyx_k_is_oft_title_line_156 ) , 0 , 1 , 0 , 0 } ,
{ & __pyx_kp_u_is_oft_upper_line_141 , __pyx_k_is_oft_upper_line_141 , sizeof ( __pyx_k_is_oft_upper_line_141 ) , 0 , 1 , 0 , 0 } ,
{ & __pyx_kp_u_last3_of_line_130 , __pyx_k_last3_of_line_130 , sizeof ( __pyx_k_last3_of_line_130 ) , 0 , 1 , 0 , 0 } ,
{ & __pyx_kp_u_lex_of_line_73 , __pyx_k_lex_of_line_73 , sizeof ( __pyx_k_lex_of_line_73 ) , 0 , 1 , 0 , 0 } ,
2014-07-05 22:51:42 +04:00
{ & __pyx_n_s_main , __pyx_k_main , sizeof ( __pyx_k_main ) , 0 , 0 , 1 , 1 } ,
2014-07-07 06:21:06 +04:00
{ & __pyx_kp_u_prob_of_line_117 , __pyx_k_prob_of_line_117 , sizeof ( __pyx_k_prob_of_line_117 ) , 0 , 1 , 0 , 0 } ,
{ & __pyx_n_s_pyx_capi , __pyx_k_pyx_capi , sizeof ( __pyx_k_pyx_capi ) , 0 , 0 , 1 , 1 } ,
{ & __pyx_kp_u_sic_of_line_61 , __pyx_k_sic_of_line_61 , sizeof ( __pyx_k_sic_of_line_61 ) , 0 , 1 , 0 , 0 } ,
2014-07-05 22:51:42 +04:00
{ & __pyx_n_s_test , __pyx_k_test , sizeof ( __pyx_k_test ) , 0 , 0 , 1 , 1 } ,
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 }
} ;
static int __Pyx_InitCachedBuiltins ( void ) {
return 0 ;
}
static int __Pyx_InitCachedConstants ( void ) {
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext ( " __Pyx_InitCachedConstants " , 0 ) ;
__Pyx_RefNannyFinishContext ( ) ;
return 0 ;
}
static int __Pyx_InitGlobals ( void ) {
if ( __Pyx_InitStrings ( __pyx_string_tab ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; } ;
return 0 ;
__pyx_L1_error : ;
return - 1 ;
}
# if PY_MAJOR_VERSION < 3
PyMODINIT_FUNC initlexeme ( void ) ; /*proto*/
PyMODINIT_FUNC initlexeme ( void )
# else
PyMODINIT_FUNC PyInit_lexeme ( void ) ; /*proto*/
PyMODINIT_FUNC PyInit_lexeme ( void )
# endif
{
PyObject * __pyx_t_1 = NULL ;
2014-07-07 06:21:06 +04:00
PyObject * __pyx_t_2 = NULL ;
PyObject * __pyx_t_3 = NULL ;
2014-07-05 22:51:42 +04:00
int __pyx_lineno = 0 ;
const char * __pyx_filename = NULL ;
int __pyx_clineno = 0 ;
__Pyx_RefNannyDeclarations
# if CYTHON_REFNANNY
__Pyx_RefNanny = __Pyx_RefNannyImportAPI ( " refnanny " ) ;
if ( ! __Pyx_RefNanny ) {
PyErr_Clear ( ) ;
__Pyx_RefNanny = __Pyx_RefNannyImportAPI ( " Cython.Runtime.refnanny " ) ;
if ( ! __Pyx_RefNanny )
Py_FatalError ( " failed to import 'refnanny' module " ) ;
}
# endif
__Pyx_RefNannySetupContext ( " PyMODINIT_FUNC PyInit_lexeme(void) " , 0 ) ;
if ( __Pyx_check_binary_version ( ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__pyx_empty_tuple = PyTuple_New ( 0 ) ; if ( unlikely ( ! __pyx_empty_tuple ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__pyx_empty_bytes = PyBytes_FromStringAndSize ( " " , 0 ) ; if ( unlikely ( ! __pyx_empty_bytes ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
# ifdef __Pyx_CyFunction_USED
if ( __Pyx_CyFunction_init ( ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
# endif
# ifdef __Pyx_FusedFunction_USED
if ( __pyx_FusedFunction_init ( ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
# endif
# ifdef __Pyx_Generator_USED
if ( __pyx_Generator_init ( ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
# endif
/*--- Library function declarations ---*/
/*--- Threads initialization code ---*/
# if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS
# ifdef WITH_THREAD /* Python build with threading support? */
PyEval_InitThreads ( ) ;
# endif
# endif
/*--- Module creation code ---*/
# if PY_MAJOR_VERSION < 3
__pyx_m = Py_InitModule4 ( __Pyx_NAMESTR ( " lexeme " ) , __pyx_methods , __Pyx_DOCSTR ( __pyx_k_Accessors_for_Lexeme_properties ) , 0 , PYTHON_API_VERSION ) ; Py_XINCREF ( __pyx_m ) ;
# else
__pyx_m = PyModule_Create ( & __pyx_moduledef ) ;
# endif
if ( unlikely ( ! __pyx_m ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__pyx_d = PyModule_GetDict ( __pyx_m ) ; if ( unlikely ( ! __pyx_d ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
Py_INCREF ( __pyx_d ) ;
__pyx_b = PyImport_AddModule ( __Pyx_NAMESTR ( __Pyx_BUILTIN_MODULE_NAME ) ) ; if ( unlikely ( ! __pyx_b ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
# if CYTHON_COMPILING_IN_PYPY
Py_INCREF ( __pyx_b ) ;
# endif
if ( __Pyx_SetAttrString ( __pyx_m , " __builtins__ " , __pyx_b ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; } ;
/*--- Initialize various global constants etc. ---*/
if ( unlikely ( __Pyx_InitGlobals ( ) < 0 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
# if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
if ( __Pyx_init_sys_getdefaultencoding_params ( ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
# endif
if ( __pyx_module_is_main_spacy__lexeme ) {
if ( __Pyx_SetAttrString ( __pyx_m , " __name__ " , __pyx_n_s_main ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; } ;
}
# if PY_MAJOR_VERSION >= 3
{
PyObject * modules = PyImport_GetModuleDict ( ) ; if ( unlikely ( ! modules ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
if ( ! PyDict_GetItemString ( modules , " spacy.lexeme " ) ) {
if ( unlikely ( PyDict_SetItemString ( modules , " spacy.lexeme " , __pyx_m ) < 0 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
}
}
# endif
/*--- Builtin init code ---*/
if ( unlikely ( __Pyx_InitCachedBuiltins ( ) < 0 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
/*--- Constants init code ---*/
if ( unlikely ( __Pyx_InitCachedConstants ( ) < 0 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
/*--- Global init code ---*/
/*--- Variable export code ---*/
2014-07-07 06:21:06 +04:00
if ( __Pyx_ExportVoidPtr ( __pyx_n_s_BLANK_WORD , ( void * ) & __pyx_v_5spacy_6lexeme_BLANK_WORD , " struct __pyx_t_5spacy_6lexeme_Lexeme " ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
2014-07-05 22:51:42 +04:00
/*--- Function export code ---*/
2014-07-07 06:21:06 +04:00
if ( __Pyx_ExportFunction ( " init_lexeme " , ( void ( * ) ( void ) ) __pyx_f_5spacy_6lexeme_init_lexeme , " struct __pyx_t_5spacy_6lexeme_Lexeme *(__pyx_t_5spacy_5spacy_Vocab, PyObject *, __pyx_t_5spacy_5spacy_Splitter, PyObject *, __pyx_t_5spacy_6lexeme_StringHash, int, size_t) " ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
2014-07-05 22:51:42 +04:00
/*--- Type init code ---*/
/*--- Type import code ---*/
/*--- Variable import code ---*/
/*--- Function import code ---*/
2014-07-07 06:21:06 +04:00
__pyx_t_1 = __Pyx_ImportModule ( " spacy.spacy " ) ; if ( ! __pyx_t_1 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
if ( __Pyx_ImportFunction ( __pyx_t_1 , " lookup " , ( void ( * * ) ( void ) ) & __pyx_f_5spacy_5spacy_lookup , " __pyx_t_5spacy_5spacy_Lexeme_addr (__pyx_t_5spacy_5spacy_Vocab &, PyObject *, __pyx_t_5spacy_5spacy_Splitter, int, PyObject *) " ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
if ( __Pyx_ImportFunction ( __pyx_t_1 , " hash_string " , ( void ( * * ) ( void ) ) & __pyx_f_5spacy_5spacy_hash_string , " __pyx_t_5spacy_5spacy_StringHash (PyObject *, size_t) " ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
Py_DECREF ( __pyx_t_1 ) ; __pyx_t_1 = 0 ;
__pyx_t_2 = __Pyx_ImportModule ( " spacy.string_tools " ) ; if ( ! __pyx_t_2 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
if ( __Pyx_ImportFunction ( __pyx_t_2 , " substr " , ( void ( * * ) ( void ) ) & __pyx_f_5spacy_12string_tools_substr , " PyObject *(PyObject *, int, int, size_t, int __pyx_skip_dispatch) " ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
Py_DECREF ( __pyx_t_2 ) ; __pyx_t_2 = 0 ;
2014-07-05 22:51:42 +04:00
/*--- Execution code ---*/
/* "spacy/lexeme.pyx":1
* ' ' ' Accessors for Lexeme properties , given a lex_id , which is cast to a Lexeme * . # < < < < < < < < < < < < < <
* Mostly useful from Python - space . From Cython - space , you can just cast to
* Lexeme * yourself .
*/
2014-07-07 06:21:06 +04:00
__pyx_t_3 = PyDict_New ( ) ; if ( unlikely ( ! __pyx_t_3 ) ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__Pyx_GOTREF ( __pyx_t_3 ) ;
if ( PyDict_SetItem ( __pyx_t_3 , __pyx_kp_u_sic_of_line_61 , __pyx_kp_u_Access_the_sic_field_of_the_Lexe ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
if ( PyDict_SetItem ( __pyx_t_3 , __pyx_kp_u_lex_of_line_73 , __pyx_kp_u_Access_the_lex_field_of_the_Lexe ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
if ( PyDict_SetItem ( __pyx_t_3 , __pyx_kp_u_cluster_of_line_87 , __pyx_kp_u_Access_the_cluster_field_of_the ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
if ( PyDict_SetItem ( __pyx_t_3 , __pyx_kp_u_first_of_line_106 , __pyx_kp_u_Access_the_first_field_of_the_Le ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
if ( PyDict_SetItem ( __pyx_t_3 , __pyx_kp_u_prob_of_line_117 , __pyx_kp_u_Access_the_prob_field_of_the_Lex ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
if ( PyDict_SetItem ( __pyx_t_3 , __pyx_kp_u_last3_of_line_130 , __pyx_kp_u_Access_the_last3_field_of_the_Le ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
if ( PyDict_SetItem ( __pyx_t_3 , __pyx_kp_u_is_oft_upper_line_141 , __pyx_kp_u_Access_the_oft_upper_field_of_th ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
if ( PyDict_SetItem ( __pyx_t_3 , __pyx_kp_u_is_oft_title_line_156 , __pyx_kp_u_Access_the_oft_upper_field_of_th_2 ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
if ( PyDict_SetItem ( __pyx_d , __pyx_n_s_test , __pyx_t_3 ) < 0 ) { __pyx_filename = __pyx_f [ 0 ] ; __pyx_lineno = 1 ; __pyx_clineno = __LINE__ ; goto __pyx_L1_error ; }
__Pyx_DECREF ( __pyx_t_3 ) ; __pyx_t_3 = 0 ;
2014-07-05 22:51:42 +04:00
goto __pyx_L0 ;
__pyx_L1_error : ;
__Pyx_XDECREF ( __pyx_t_1 ) ;
2014-07-07 06:21:06 +04:00
__Pyx_XDECREF ( __pyx_t_2 ) ;
__Pyx_XDECREF ( __pyx_t_3 ) ;
2014-07-05 22:51:42 +04:00
if ( __pyx_m ) {
__Pyx_AddTraceback ( " init spacy.lexeme " , __pyx_clineno , __pyx_lineno , __pyx_filename ) ;
Py_DECREF ( __pyx_m ) ; __pyx_m = 0 ;
} else if ( ! PyErr_Occurred ( ) ) {
PyErr_SetString ( PyExc_ImportError , " init spacy.lexeme " ) ;
}
__pyx_L0 : ;
__Pyx_RefNannyFinishContext ( ) ;
# if PY_MAJOR_VERSION < 3
return ;
# else
return __pyx_m ;
# endif
}
/* Runtime support code */
# if CYTHON_REFNANNY
static __Pyx_RefNannyAPIStruct * __Pyx_RefNannyImportAPI ( const char * modname ) {
PyObject * m = NULL , * p = NULL ;
void * r = NULL ;
m = PyImport_ImportModule ( ( char * ) modname ) ;
if ( ! m ) goto end ;
p = PyObject_GetAttrString ( m , ( char * ) " RefNannyAPI " ) ;
if ( ! p ) goto end ;
r = PyLong_AsVoidPtr ( p ) ;
end :
Py_XDECREF ( p ) ;
Py_XDECREF ( m ) ;
return ( __Pyx_RefNannyAPIStruct * ) r ;
}
# endif /* CYTHON_REFNANNY */
2014-07-07 06:21:06 +04:00
static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Fast ( PyObject * ustring , Py_ssize_t i ,
int wraparound , int boundscheck ) {
Py_ssize_t length ;
# if CYTHON_PEP393_ENABLED
if ( unlikely ( __Pyx_PyUnicode_READY ( ustring ) < 0 ) ) return ( Py_UCS4 ) - 1 ;
# endif
if ( wraparound | boundscheck ) {
length = __Pyx_PyUnicode_GET_LENGTH ( ustring ) ;
if ( wraparound & unlikely ( i < 0 ) ) i + = length ;
if ( ( ! boundscheck ) | | likely ( ( 0 < = i ) & ( i < length ) ) ) {
return __Pyx_PyUnicode_READ_CHAR ( ustring , i ) ;
} else {
PyErr_SetString ( PyExc_IndexError , " string index out of range " ) ;
return ( Py_UCS4 ) - 1 ;
}
} else {
return __Pyx_PyUnicode_READ_CHAR ( ustring , i ) ;
}
}
2014-07-05 22:51:42 +04:00
# define __PYX_VERIFY_RETURN_INT(target_type, func_type, func) \
{ \
func_type value = func ( x ) ; \
if ( sizeof ( target_type ) < sizeof ( func_type ) ) { \
if ( unlikely ( value ! = ( func_type ) ( target_type ) value ) ) { \
func_type zero = 0 ; \
PyErr_SetString ( PyExc_OverflowError , \
( is_unsigned & & unlikely ( value < zero ) ) ? \
" can't convert negative value to " # target_type : \
" value too large to convert to " # target_type ) ; \
return ( target_type ) - 1 ; \
} \
} \
return ( target_type ) value ; \
}
# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
# if CYTHON_USE_PYLONG_INTERNALS
# include "longintrepr.h"
# endif
# endif
static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t ( PyObject * x ) {
const size_t neg_one = ( size_t ) - 1 , const_zero = 0 ;
const int is_unsigned = neg_one > const_zero ;
# if PY_MAJOR_VERSION < 3
if ( likely ( PyInt_Check ( x ) ) ) {
if ( sizeof ( size_t ) < sizeof ( long ) ) {
__PYX_VERIFY_RETURN_INT ( size_t , long , PyInt_AS_LONG )
} else {
long val = PyInt_AS_LONG ( x ) ;
if ( is_unsigned & & unlikely ( val < 0 ) ) {
PyErr_SetString ( PyExc_OverflowError ,
" can't convert negative value to size_t " ) ;
return ( size_t ) - 1 ;
}
return ( size_t ) val ;
}
} else
# endif
if ( likely ( PyLong_Check ( x ) ) ) {
if ( is_unsigned ) {
# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
# if CYTHON_USE_PYLONG_INTERNALS
if ( sizeof ( digit ) < = sizeof ( size_t ) ) {
switch ( Py_SIZE ( x ) ) {
case 0 : return 0 ;
case 1 : return ( size_t ) ( ( PyLongObject * ) x ) - > ob_digit [ 0 ] ;
}
}
# endif
# endif
if ( unlikely ( Py_SIZE ( x ) < 0 ) ) {
PyErr_SetString ( PyExc_OverflowError ,
" can't convert negative value to size_t " ) ;
return ( size_t ) - 1 ;
}
if ( sizeof ( size_t ) < = sizeof ( unsigned long ) ) {
__PYX_VERIFY_RETURN_INT ( size_t , unsigned long , PyLong_AsUnsignedLong )
} else if ( sizeof ( size_t ) < = sizeof ( unsigned long long ) ) {
__PYX_VERIFY_RETURN_INT ( size_t , unsigned long long , PyLong_AsUnsignedLongLong )
}
} else {
# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
# if CYTHON_USE_PYLONG_INTERNALS
if ( sizeof ( digit ) < = sizeof ( size_t ) ) {
switch ( Py_SIZE ( x ) ) {
case 0 : return 0 ;
case 1 : return + ( size_t ) ( ( PyLongObject * ) x ) - > ob_digit [ 0 ] ;
case - 1 : return - ( size_t ) ( ( PyLongObject * ) x ) - > ob_digit [ 0 ] ;
}
}
# endif
# endif
if ( sizeof ( size_t ) < = sizeof ( long ) ) {
__PYX_VERIFY_RETURN_INT ( size_t , long , PyLong_AsLong )
} else if ( sizeof ( size_t ) < = sizeof ( long long ) ) {
__PYX_VERIFY_RETURN_INT ( size_t , long long , PyLong_AsLongLong )
}
}
{
# if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
PyErr_SetString ( PyExc_RuntimeError ,
" _PyLong_AsByteArray() not available in PyPy, cannot convert large numbers " ) ;
# else
size_t val ;
PyObject * v = __Pyx_PyNumber_Int ( x ) ;
# if PY_MAJOR_VERSION < 3
if ( likely ( v ) & & ! PyLong_Check ( v ) ) {
PyObject * tmp = v ;
v = PyNumber_Long ( tmp ) ;
Py_DECREF ( tmp ) ;
}
# endif
if ( likely ( v ) ) {
int one = 1 ; int is_little = ( int ) * ( unsigned char * ) & one ;
unsigned char * bytes = ( unsigned char * ) & val ;
int ret = _PyLong_AsByteArray ( ( PyLongObject * ) v ,
bytes , sizeof ( val ) ,
is_little , ! is_unsigned ) ;
Py_DECREF ( v ) ;
if ( likely ( ! ret ) )
return val ;
}
# endif
return ( size_t ) - 1 ;
}
} else {
size_t val ;
PyObject * tmp = __Pyx_PyNumber_Int ( x ) ;
if ( ! tmp ) return ( size_t ) - 1 ;
val = __Pyx_PyInt_As_size_t ( tmp ) ;
Py_DECREF ( tmp ) ;
return val ;
}
}
2014-07-07 06:21:06 +04:00
static CYTHON_INLINE PyObject * __Pyx_PyInt_From_long ( long value ) {
const long neg_one = ( long ) - 1 , const_zero = 0 ;
2014-07-05 22:51:42 +04:00
const int is_unsigned = neg_one > const_zero ;
if ( is_unsigned ) {
2014-07-07 06:21:06 +04:00
if ( sizeof ( long ) < sizeof ( long ) ) {
2014-07-05 22:51:42 +04:00
return PyInt_FromLong ( ( long ) value ) ;
2014-07-07 06:21:06 +04:00
} else if ( sizeof ( long ) < = sizeof ( unsigned long ) ) {
2014-07-05 22:51:42 +04:00
return PyLong_FromUnsignedLong ( ( unsigned long ) value ) ;
2014-07-07 06:21:06 +04:00
} else if ( sizeof ( long ) < = sizeof ( unsigned long long ) ) {
2014-07-05 22:51:42 +04:00
return PyLong_FromUnsignedLongLong ( ( unsigned long long ) value ) ;
}
} else {
2014-07-07 06:21:06 +04:00
if ( sizeof ( long ) < = sizeof ( long ) ) {
2014-07-05 22:51:42 +04:00
return PyInt_FromLong ( ( long ) value ) ;
2014-07-07 06:21:06 +04:00
} else if ( sizeof ( long ) < = sizeof ( long long ) ) {
2014-07-05 22:51:42 +04:00
return PyLong_FromLongLong ( ( long long ) value ) ;
}
}
{
int one = 1 ; int little = ( int ) * ( unsigned char * ) & one ;
unsigned char * bytes = ( unsigned char * ) & value ;
2014-07-07 06:21:06 +04:00
return _PyLong_FromByteArray ( bytes , sizeof ( long ) ,
2014-07-05 22:51:42 +04:00
little , ! is_unsigned ) ;
}
}
2014-07-07 06:21:06 +04:00
static CYTHON_INLINE PyObject * __Pyx_PyInt_From_uint64_t ( uint64_t value ) {
const uint64_t neg_one = ( uint64_t ) - 1 , const_zero = 0 ;
2014-07-05 22:51:42 +04:00
const int is_unsigned = neg_one > const_zero ;
if ( is_unsigned ) {
2014-07-07 06:21:06 +04:00
if ( sizeof ( uint64_t ) < sizeof ( long ) ) {
2014-07-05 22:51:42 +04:00
return PyInt_FromLong ( ( long ) value ) ;
2014-07-07 06:21:06 +04:00
} else if ( sizeof ( uint64_t ) < = sizeof ( unsigned long ) ) {
2014-07-05 22:51:42 +04:00
return PyLong_FromUnsignedLong ( ( unsigned long ) value ) ;
2014-07-07 06:21:06 +04:00
} else if ( sizeof ( uint64_t ) < = sizeof ( unsigned long long ) ) {
2014-07-05 22:51:42 +04:00
return PyLong_FromUnsignedLongLong ( ( unsigned long long ) value ) ;
}
} else {
2014-07-07 06:21:06 +04:00
if ( sizeof ( uint64_t ) < = sizeof ( long ) ) {
2014-07-05 22:51:42 +04:00
return PyInt_FromLong ( ( long ) value ) ;
2014-07-07 06:21:06 +04:00
} else if ( sizeof ( uint64_t ) < = sizeof ( long long ) ) {
2014-07-05 22:51:42 +04:00
return PyLong_FromLongLong ( ( long long ) value ) ;
}
}
{
int one = 1 ; int little = ( int ) * ( unsigned char * ) & one ;
unsigned char * bytes = ( unsigned char * ) & value ;
2014-07-07 06:21:06 +04:00
return _PyLong_FromByteArray ( bytes , sizeof ( uint64_t ) ,
2014-07-05 22:51:42 +04:00
little , ! is_unsigned ) ;
}
}
2014-07-07 06:21:06 +04:00
static CYTHON_INLINE PyObject * __Pyx_PyInt_From_int ( int value ) {
const int neg_one = ( int ) - 1 , const_zero = 0 ;
2014-07-05 22:51:42 +04:00
const int is_unsigned = neg_one > const_zero ;
if ( is_unsigned ) {
2014-07-07 06:21:06 +04:00
if ( sizeof ( int ) < sizeof ( long ) ) {
2014-07-05 22:51:42 +04:00
return PyInt_FromLong ( ( long ) value ) ;
2014-07-07 06:21:06 +04:00
} else if ( sizeof ( int ) < = sizeof ( unsigned long ) ) {
2014-07-05 22:51:42 +04:00
return PyLong_FromUnsignedLong ( ( unsigned long ) value ) ;
2014-07-07 06:21:06 +04:00
} else if ( sizeof ( int ) < = sizeof ( unsigned long long ) ) {
2014-07-05 22:51:42 +04:00
return PyLong_FromUnsignedLongLong ( ( unsigned long long ) value ) ;
}
} else {
2014-07-07 06:21:06 +04:00
if ( sizeof ( int ) < = sizeof ( long ) ) {
2014-07-05 22:51:42 +04:00
return PyInt_FromLong ( ( long ) value ) ;
2014-07-07 06:21:06 +04:00
} else if ( sizeof ( int ) < = sizeof ( long long ) ) {
2014-07-05 22:51:42 +04:00
return PyLong_FromLongLong ( ( long long ) value ) ;
}
}
{
int one = 1 ; int little = ( int ) * ( unsigned char * ) & one ;
unsigned char * bytes = ( unsigned char * ) & value ;
2014-07-07 06:21:06 +04:00
return _PyLong_FromByteArray ( bytes , sizeof ( int ) ,
2014-07-05 22:51:42 +04:00
little , ! is_unsigned ) ;
}
}
# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
# if CYTHON_USE_PYLONG_INTERNALS
# include "longintrepr.h"
# endif
# endif
static CYTHON_INLINE long __Pyx_PyInt_As_long ( PyObject * x ) {
const long neg_one = ( long ) - 1 , const_zero = 0 ;
const int is_unsigned = neg_one > const_zero ;
# if PY_MAJOR_VERSION < 3
if ( likely ( PyInt_Check ( x ) ) ) {
if ( sizeof ( long ) < sizeof ( long ) ) {
__PYX_VERIFY_RETURN_INT ( long , long , PyInt_AS_LONG )
} else {
long val = PyInt_AS_LONG ( x ) ;
if ( is_unsigned & & unlikely ( val < 0 ) ) {
PyErr_SetString ( PyExc_OverflowError ,
" can't convert negative value to long " ) ;
return ( long ) - 1 ;
}
return ( long ) val ;
}
} else
# endif
if ( likely ( PyLong_Check ( x ) ) ) {
if ( is_unsigned ) {
# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
# if CYTHON_USE_PYLONG_INTERNALS
if ( sizeof ( digit ) < = sizeof ( long ) ) {
switch ( Py_SIZE ( x ) ) {
case 0 : return 0 ;
case 1 : return ( long ) ( ( PyLongObject * ) x ) - > ob_digit [ 0 ] ;
}
}
# endif
# endif
if ( unlikely ( Py_SIZE ( x ) < 0 ) ) {
PyErr_SetString ( PyExc_OverflowError ,
" can't convert negative value to long " ) ;
return ( long ) - 1 ;
}
if ( sizeof ( long ) < = sizeof ( unsigned long ) ) {
__PYX_VERIFY_RETURN_INT ( long , unsigned long , PyLong_AsUnsignedLong )
} else if ( sizeof ( long ) < = sizeof ( unsigned long long ) ) {
__PYX_VERIFY_RETURN_INT ( long , unsigned long long , PyLong_AsUnsignedLongLong )
}
} else {
# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
# if CYTHON_USE_PYLONG_INTERNALS
if ( sizeof ( digit ) < = sizeof ( long ) ) {
switch ( Py_SIZE ( x ) ) {
case 0 : return 0 ;
case 1 : return + ( long ) ( ( PyLongObject * ) x ) - > ob_digit [ 0 ] ;
case - 1 : return - ( long ) ( ( PyLongObject * ) x ) - > ob_digit [ 0 ] ;
}
}
# endif
# endif
if ( sizeof ( long ) < = sizeof ( long ) ) {
__PYX_VERIFY_RETURN_INT ( long , long , PyLong_AsLong )
} else if ( sizeof ( long ) < = sizeof ( long long ) ) {
__PYX_VERIFY_RETURN_INT ( long , long long , PyLong_AsLongLong )
}
}
{
# if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
PyErr_SetString ( PyExc_RuntimeError ,
" _PyLong_AsByteArray() not available in PyPy, cannot convert large numbers " ) ;
# else
long val ;
PyObject * v = __Pyx_PyNumber_Int ( x ) ;
# if PY_MAJOR_VERSION < 3
if ( likely ( v ) & & ! PyLong_Check ( v ) ) {
PyObject * tmp = v ;
v = PyNumber_Long ( tmp ) ;
Py_DECREF ( tmp ) ;
}
# endif
if ( likely ( v ) ) {
int one = 1 ; int is_little = ( int ) * ( unsigned char * ) & one ;
unsigned char * bytes = ( unsigned char * ) & val ;
int ret = _PyLong_AsByteArray ( ( PyLongObject * ) v ,
bytes , sizeof ( val ) ,
is_little , ! is_unsigned ) ;
Py_DECREF ( v ) ;
if ( likely ( ! ret ) )
return val ;
}
# endif
return ( long ) - 1 ;
}
} else {
long val ;
PyObject * tmp = __Pyx_PyNumber_Int ( x ) ;
if ( ! tmp ) return ( long ) - 1 ;
val = __Pyx_PyInt_As_long ( tmp ) ;
Py_DECREF ( tmp ) ;
return val ;
}
}
# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
# if CYTHON_USE_PYLONG_INTERNALS
# include "longintrepr.h"
# endif
# endif
static CYTHON_INLINE int __Pyx_PyInt_As_int ( PyObject * x ) {
const int neg_one = ( int ) - 1 , const_zero = 0 ;
const int is_unsigned = neg_one > const_zero ;
# if PY_MAJOR_VERSION < 3
if ( likely ( PyInt_Check ( x ) ) ) {
if ( sizeof ( int ) < sizeof ( long ) ) {
__PYX_VERIFY_RETURN_INT ( int , long , PyInt_AS_LONG )
} else {
long val = PyInt_AS_LONG ( x ) ;
if ( is_unsigned & & unlikely ( val < 0 ) ) {
PyErr_SetString ( PyExc_OverflowError ,
" can't convert negative value to int " ) ;
return ( int ) - 1 ;
}
return ( int ) val ;
}
} else
# endif
if ( likely ( PyLong_Check ( x ) ) ) {
if ( is_unsigned ) {
# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
# if CYTHON_USE_PYLONG_INTERNALS
if ( sizeof ( digit ) < = sizeof ( int ) ) {
switch ( Py_SIZE ( x ) ) {
case 0 : return 0 ;
case 1 : return ( int ) ( ( PyLongObject * ) x ) - > ob_digit [ 0 ] ;
}
}
# endif
# endif
if ( unlikely ( Py_SIZE ( x ) < 0 ) ) {
PyErr_SetString ( PyExc_OverflowError ,
" can't convert negative value to int " ) ;
return ( int ) - 1 ;
}
if ( sizeof ( int ) < = sizeof ( unsigned long ) ) {
__PYX_VERIFY_RETURN_INT ( int , unsigned long , PyLong_AsUnsignedLong )
} else if ( sizeof ( int ) < = sizeof ( unsigned long long ) ) {
__PYX_VERIFY_RETURN_INT ( int , unsigned long long , PyLong_AsUnsignedLongLong )
}
} else {
# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
# if CYTHON_USE_PYLONG_INTERNALS
if ( sizeof ( digit ) < = sizeof ( int ) ) {
switch ( Py_SIZE ( x ) ) {
case 0 : return 0 ;
case 1 : return + ( int ) ( ( PyLongObject * ) x ) - > ob_digit [ 0 ] ;
case - 1 : return - ( int ) ( ( PyLongObject * ) x ) - > ob_digit [ 0 ] ;
}
}
# endif
# endif
if ( sizeof ( int ) < = sizeof ( long ) ) {
__PYX_VERIFY_RETURN_INT ( int , long , PyLong_AsLong )
} else if ( sizeof ( int ) < = sizeof ( long long ) ) {
__PYX_VERIFY_RETURN_INT ( int , long long , PyLong_AsLongLong )
}
}
{
# if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
PyErr_SetString ( PyExc_RuntimeError ,
" _PyLong_AsByteArray() not available in PyPy, cannot convert large numbers " ) ;
# else
int val ;
PyObject * v = __Pyx_PyNumber_Int ( x ) ;
# if PY_MAJOR_VERSION < 3
if ( likely ( v ) & & ! PyLong_Check ( v ) ) {
PyObject * tmp = v ;
v = PyNumber_Long ( tmp ) ;
Py_DECREF ( tmp ) ;
}
# endif
if ( likely ( v ) ) {
int one = 1 ; int is_little = ( int ) * ( unsigned char * ) & one ;
unsigned char * bytes = ( unsigned char * ) & val ;
int ret = _PyLong_AsByteArray ( ( PyLongObject * ) v ,
bytes , sizeof ( val ) ,
is_little , ! is_unsigned ) ;
Py_DECREF ( v ) ;
if ( likely ( ! ret ) )
return val ;
}
# endif
return ( int ) - 1 ;
}
} else {
int val ;
PyObject * tmp = __Pyx_PyNumber_Int ( x ) ;
if ( ! tmp ) return ( int ) - 1 ;
val = __Pyx_PyInt_As_int ( tmp ) ;
Py_DECREF ( tmp ) ;
return val ;
}
}
static int __Pyx_check_binary_version ( void ) {
char ctversion [ 4 ] , rtversion [ 4 ] ;
PyOS_snprintf ( ctversion , 4 , " %d.%d " , PY_MAJOR_VERSION , PY_MINOR_VERSION ) ;
PyOS_snprintf ( rtversion , 4 , " %s " , Py_GetVersion ( ) ) ;
if ( ctversion [ 0 ] ! = rtversion [ 0 ] | | ctversion [ 2 ] ! = rtversion [ 2 ] ) {
char message [ 200 ] ;
PyOS_snprintf ( message , sizeof ( message ) ,
" compiletime version %s of module '%.100s' "
" does not match runtime version %s " ,
ctversion , __Pyx_MODULE_NAME , rtversion ) ;
# if PY_VERSION_HEX < 0x02050000
return PyErr_Warn ( NULL , message ) ;
# else
return PyErr_WarnEx ( NULL , message , 1 ) ;
# endif
}
return 0 ;
}
2014-07-07 06:21:06 +04:00
static int __Pyx_ExportVoidPtr ( PyObject * name , void * p , const char * sig ) {
PyObject * d ;
PyObject * cobj = 0 ;
d = PyDict_GetItem ( __pyx_d , __pyx_n_s_pyx_capi ) ;
Py_XINCREF ( d ) ;
if ( ! d ) {
d = PyDict_New ( ) ;
if ( ! d )
goto bad ;
if ( __Pyx_PyObject_SetAttrStr ( __pyx_m , __pyx_n_s_pyx_capi , d ) < 0 )
goto bad ;
}
# if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION==3 && PY_MINOR_VERSION==0)
cobj = PyCapsule_New ( p , sig , 0 ) ;
# else
cobj = PyCObject_FromVoidPtrAndDesc ( p , ( void * ) sig , 0 ) ;
# endif
if ( ! cobj )
goto bad ;
if ( PyDict_SetItem ( d , name , cobj ) < 0 )
goto bad ;
Py_DECREF ( cobj ) ;
Py_DECREF ( d ) ;
return 0 ;
bad :
Py_XDECREF ( cobj ) ;
Py_XDECREF ( d ) ;
return - 1 ;
}
static int __Pyx_ExportFunction ( const char * name , void ( * f ) ( void ) , const char * sig ) {
PyObject * d = 0 ;
PyObject * cobj = 0 ;
union {
void ( * fp ) ( void ) ;
void * p ;
} tmp ;
d = PyObject_GetAttrString ( __pyx_m , ( char * ) " __pyx_capi__ " ) ;
if ( ! d ) {
PyErr_Clear ( ) ;
d = PyDict_New ( ) ;
if ( ! d )
goto bad ;
Py_INCREF ( d ) ;
if ( PyModule_AddObject ( __pyx_m , ( char * ) " __pyx_capi__ " , d ) < 0 )
goto bad ;
}
tmp . fp = f ;
# if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION==3&&PY_MINOR_VERSION==0)
cobj = PyCapsule_New ( tmp . p , sig , 0 ) ;
# else
cobj = PyCObject_FromVoidPtrAndDesc ( tmp . p , ( void * ) sig , 0 ) ;
# endif
if ( ! cobj )
goto bad ;
if ( PyDict_SetItemString ( d , name , cobj ) < 0 )
goto bad ;
Py_DECREF ( cobj ) ;
Py_DECREF ( d ) ;
return 0 ;
bad :
Py_XDECREF ( cobj ) ;
Py_XDECREF ( d ) ;
return - 1 ;
}
# ifndef __PYX_HAVE_RT_ImportModule
# define __PYX_HAVE_RT_ImportModule
static PyObject * __Pyx_ImportModule ( const char * name ) {
PyObject * py_name = 0 ;
PyObject * py_module = 0 ;
py_name = __Pyx_PyIdentifier_FromString ( name ) ;
if ( ! py_name )
goto bad ;
py_module = PyImport_Import ( py_name ) ;
Py_DECREF ( py_name ) ;
return py_module ;
bad :
Py_XDECREF ( py_name ) ;
return 0 ;
}
# endif
# ifndef __PYX_HAVE_RT_ImportFunction
# define __PYX_HAVE_RT_ImportFunction
static int __Pyx_ImportFunction ( PyObject * module , const char * funcname , void ( * * f ) ( void ) , const char * sig ) {
PyObject * d = 0 ;
PyObject * cobj = 0 ;
union {
void ( * fp ) ( void ) ;
void * p ;
} tmp ;
d = PyObject_GetAttrString ( module , ( char * ) " __pyx_capi__ " ) ;
if ( ! d )
goto bad ;
cobj = PyDict_GetItemString ( d , funcname ) ;
if ( ! cobj ) {
PyErr_Format ( PyExc_ImportError ,
" %.200s does not export expected C function %.200s " ,
PyModule_GetName ( module ) , funcname ) ;
goto bad ;
}
# if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION==3 && PY_MINOR_VERSION==0)
if ( ! PyCapsule_IsValid ( cobj , sig ) ) {
PyErr_Format ( PyExc_TypeError ,
" C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s) " ,
PyModule_GetName ( module ) , funcname , sig , PyCapsule_GetName ( cobj ) ) ;
goto bad ;
}
tmp . p = PyCapsule_GetPointer ( cobj , sig ) ;
# else
{ const char * desc , * s1 , * s2 ;
desc = ( const char * ) PyCObject_GetDesc ( cobj ) ;
if ( ! desc )
goto bad ;
s1 = desc ; s2 = sig ;
while ( * s1 ! = ' \0 ' & & * s1 = = * s2 ) { s1 + + ; s2 + + ; }
if ( * s1 ! = * s2 ) {
PyErr_Format ( PyExc_TypeError ,
" C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s) " ,
PyModule_GetName ( module ) , funcname , sig , desc ) ;
goto bad ;
}
tmp . p = PyCObject_AsVoidPtr ( cobj ) ; }
# endif
* f = tmp . fp ;
if ( ! ( * f ) )
goto bad ;
Py_DECREF ( d ) ;
return 0 ;
bad :
Py_XDECREF ( d ) ;
return - 1 ;
}
# endif
2014-07-05 22:51:42 +04:00
static int __pyx_bisect_code_objects ( __Pyx_CodeObjectCacheEntry * entries , int count , int code_line ) {
int start = 0 , mid = 0 , end = count - 1 ;
if ( end > = 0 & & code_line > entries [ end ] . code_line ) {
return count ;
}
while ( start < end ) {
mid = ( start + end ) / 2 ;
if ( code_line < entries [ mid ] . code_line ) {
end = mid ;
} else if ( code_line > entries [ mid ] . code_line ) {
start = mid + 1 ;
} else {
return mid ;
}
}
if ( code_line < = entries [ mid ] . code_line ) {
return mid ;
} else {
return mid + 1 ;
}
}
static PyCodeObject * __pyx_find_code_object ( int code_line ) {
PyCodeObject * code_object ;
int pos ;
if ( unlikely ( ! code_line ) | | unlikely ( ! __pyx_code_cache . entries ) ) {
return NULL ;
}
pos = __pyx_bisect_code_objects ( __pyx_code_cache . entries , __pyx_code_cache . count , code_line ) ;
if ( unlikely ( pos > = __pyx_code_cache . count ) | | unlikely ( __pyx_code_cache . entries [ pos ] . code_line ! = code_line ) ) {
return NULL ;
}
code_object = __pyx_code_cache . entries [ pos ] . code_object ;
Py_INCREF ( code_object ) ;
return code_object ;
}
static void __pyx_insert_code_object ( int code_line , PyCodeObject * code_object ) {
int pos , i ;
__Pyx_CodeObjectCacheEntry * entries = __pyx_code_cache . entries ;
if ( unlikely ( ! code_line ) ) {
return ;
}
if ( unlikely ( ! entries ) ) {
entries = ( __Pyx_CodeObjectCacheEntry * ) PyMem_Malloc ( 64 * sizeof ( __Pyx_CodeObjectCacheEntry ) ) ;
if ( likely ( entries ) ) {
__pyx_code_cache . entries = entries ;
__pyx_code_cache . max_count = 64 ;
__pyx_code_cache . count = 1 ;
entries [ 0 ] . code_line = code_line ;
entries [ 0 ] . code_object = code_object ;
Py_INCREF ( code_object ) ;
}
return ;
}
pos = __pyx_bisect_code_objects ( __pyx_code_cache . entries , __pyx_code_cache . count , code_line ) ;
if ( ( pos < __pyx_code_cache . count ) & & unlikely ( __pyx_code_cache . entries [ pos ] . code_line = = code_line ) ) {
PyCodeObject * tmp = entries [ pos ] . code_object ;
entries [ pos ] . code_object = code_object ;
Py_DECREF ( tmp ) ;
return ;
}
if ( __pyx_code_cache . count = = __pyx_code_cache . max_count ) {
int new_max = __pyx_code_cache . max_count + 64 ;
entries = ( __Pyx_CodeObjectCacheEntry * ) PyMem_Realloc (
__pyx_code_cache . entries , new_max * sizeof ( __Pyx_CodeObjectCacheEntry ) ) ;
if ( unlikely ( ! entries ) ) {
return ;
}
__pyx_code_cache . entries = entries ;
__pyx_code_cache . max_count = new_max ;
}
for ( i = __pyx_code_cache . count ; i > pos ; i - - ) {
entries [ i ] = entries [ i - 1 ] ;
}
entries [ pos ] . code_line = code_line ;
entries [ pos ] . code_object = code_object ;
__pyx_code_cache . count + + ;
Py_INCREF ( code_object ) ;
}
# include "compile.h"
# include "frameobject.h"
# include "traceback.h"
static PyCodeObject * __Pyx_CreateCodeObjectForTraceback (
const char * funcname , int c_line ,
int py_line , const char * filename ) {
PyCodeObject * py_code = 0 ;
PyObject * py_srcfile = 0 ;
PyObject * py_funcname = 0 ;
# if PY_MAJOR_VERSION < 3
py_srcfile = PyString_FromString ( filename ) ;
# else
py_srcfile = PyUnicode_FromString ( filename ) ;
# endif
if ( ! py_srcfile ) goto bad ;
if ( c_line ) {
# if PY_MAJOR_VERSION < 3
py_funcname = PyString_FromFormat ( " %s (%s:%d) " , funcname , __pyx_cfilenm , c_line ) ;
# else
py_funcname = PyUnicode_FromFormat ( " %s (%s:%d) " , funcname , __pyx_cfilenm , c_line ) ;
# endif
}
else {
# if PY_MAJOR_VERSION < 3
py_funcname = PyString_FromString ( funcname ) ;
# else
py_funcname = PyUnicode_FromString ( funcname ) ;
# endif
}
if ( ! py_funcname ) goto bad ;
py_code = __Pyx_PyCode_New (
0 , /*int argcount,*/
0 , /*int kwonlyargcount,*/
0 , /*int nlocals,*/
0 , /*int stacksize,*/
0 , /*int flags,*/
__pyx_empty_bytes , /*PyObject *code,*/
__pyx_empty_tuple , /*PyObject *consts,*/
__pyx_empty_tuple , /*PyObject *names,*/
__pyx_empty_tuple , /*PyObject *varnames,*/
__pyx_empty_tuple , /*PyObject *freevars,*/
__pyx_empty_tuple , /*PyObject *cellvars,*/
py_srcfile , /*PyObject *filename,*/
py_funcname , /*PyObject *name,*/
py_line , /*int firstlineno,*/
__pyx_empty_bytes /*PyObject *lnotab*/
) ;
Py_DECREF ( py_srcfile ) ;
Py_DECREF ( py_funcname ) ;
return py_code ;
bad :
Py_XDECREF ( py_srcfile ) ;
Py_XDECREF ( py_funcname ) ;
return NULL ;
}
static void __Pyx_AddTraceback ( const char * funcname , int c_line ,
int py_line , const char * filename ) {
PyCodeObject * py_code = 0 ;
PyObject * py_globals = 0 ;
PyFrameObject * py_frame = 0 ;
py_code = __pyx_find_code_object ( c_line ? c_line : py_line ) ;
if ( ! py_code ) {
py_code = __Pyx_CreateCodeObjectForTraceback (
funcname , c_line , py_line , filename ) ;
if ( ! py_code ) goto bad ;
__pyx_insert_code_object ( c_line ? c_line : py_line , py_code ) ;
}
py_globals = PyModule_GetDict ( __pyx_m ) ;
if ( ! py_globals ) goto bad ;
py_frame = PyFrame_New (
PyThreadState_GET ( ) , /*PyThreadState *tstate,*/
py_code , /*PyCodeObject *code,*/
py_globals , /*PyObject *globals,*/
0 /*PyObject *locals*/
) ;
if ( ! py_frame ) goto bad ;
py_frame - > f_lineno = py_line ;
PyTraceBack_Here ( py_frame ) ;
bad :
Py_XDECREF ( py_code ) ;
Py_XDECREF ( py_frame ) ;
}
static int __Pyx_InitStrings ( __Pyx_StringTabEntry * t ) {
while ( t - > p ) {
# if PY_MAJOR_VERSION < 3
if ( t - > is_unicode ) {
* t - > p = PyUnicode_DecodeUTF8 ( t - > s , t - > n - 1 , NULL ) ;
} else if ( t - > intern ) {
* t - > p = PyString_InternFromString ( t - > s ) ;
} else {
* t - > p = PyString_FromStringAndSize ( t - > s , t - > n - 1 ) ;
}
# else /* Python 3+ has unicode identifiers */
if ( t - > is_unicode | t - > is_str ) {
if ( t - > intern ) {
* t - > p = PyUnicode_InternFromString ( t - > s ) ;
} else if ( t - > encoding ) {
* t - > p = PyUnicode_Decode ( t - > s , t - > n - 1 , t - > encoding , NULL ) ;
} else {
* t - > p = PyUnicode_FromStringAndSize ( t - > s , t - > n - 1 ) ;
}
} else {
* t - > p = PyBytes_FromStringAndSize ( t - > s , t - > n - 1 ) ;
}
# endif
if ( ! * t - > p )
return - 1 ;
+ + t ;
}
return 0 ;
}
static CYTHON_INLINE PyObject * __Pyx_PyUnicode_FromString ( char * c_str ) {
return __Pyx_PyUnicode_FromStringAndSize ( c_str , strlen ( c_str ) ) ;
}
static CYTHON_INLINE char * __Pyx_PyObject_AsString ( PyObject * o ) {
Py_ssize_t ignore ;
return __Pyx_PyObject_AsStringAndSize ( o , & ignore ) ;
}
static CYTHON_INLINE char * __Pyx_PyObject_AsStringAndSize ( PyObject * o , Py_ssize_t * length ) {
# if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
if (
# if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
__Pyx_sys_getdefaultencoding_not_ascii & &
# endif
PyUnicode_Check ( o ) ) {
# if PY_VERSION_HEX < 0x03030000
char * defenc_c ;
PyObject * defenc = _PyUnicode_AsDefaultEncodedString ( o , NULL ) ;
if ( ! defenc ) return NULL ;
defenc_c = PyBytes_AS_STRING ( defenc ) ;
# if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
{
char * end = defenc_c + PyBytes_GET_SIZE ( defenc ) ;
char * c ;
for ( c = defenc_c ; c < end ; c + + ) {
if ( ( unsigned char ) ( * c ) > = 128 ) {
PyUnicode_AsASCIIString ( o ) ;
return NULL ;
}
}
}
# endif /*__PYX_DEFAULT_STRING_ENCODING_IS_ASCII*/
* length = PyBytes_GET_SIZE ( defenc ) ;
return defenc_c ;
# else /* PY_VERSION_HEX < 0x03030000 */
if ( PyUnicode_READY ( o ) = = - 1 ) return NULL ;
# if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
if ( PyUnicode_IS_ASCII ( o ) ) {
* length = PyUnicode_GET_DATA_SIZE ( o ) ;
return PyUnicode_AsUTF8 ( o ) ;
} else {
PyUnicode_AsASCIIString ( o ) ;
return NULL ;
}
# else /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
return PyUnicode_AsUTF8AndSize ( o , length ) ;
# endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
# endif /* PY_VERSION_HEX < 0x03030000 */
} else
# endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT */
# if !CYTHON_COMPILING_IN_PYPY
# if PY_VERSION_HEX >= 0x02060000
if ( PyByteArray_Check ( o ) ) {
* length = PyByteArray_GET_SIZE ( o ) ;
return PyByteArray_AS_STRING ( o ) ;
} else
# endif
# endif
{
char * result ;
int r = PyBytes_AsStringAndSize ( o , & result , length ) ;
if ( unlikely ( r < 0 ) ) {
return NULL ;
} else {
return result ;
}
}
}
static CYTHON_INLINE int __Pyx_PyObject_IsTrue ( PyObject * x ) {
int is_true = x = = Py_True ;
if ( is_true | ( x = = Py_False ) | ( x = = Py_None ) ) return is_true ;
else return PyObject_IsTrue ( x ) ;
}
static CYTHON_INLINE PyObject * __Pyx_PyNumber_Int ( PyObject * x ) {
PyNumberMethods * m ;
const char * name = NULL ;
PyObject * res = NULL ;
# if PY_MAJOR_VERSION < 3
if ( PyInt_Check ( x ) | | PyLong_Check ( x ) )
# else
if ( PyLong_Check ( x ) )
# endif
return Py_INCREF ( x ) , x ;
m = Py_TYPE ( x ) - > tp_as_number ;
# if PY_MAJOR_VERSION < 3
if ( m & & m - > nb_int ) {
name = " int " ;
res = PyNumber_Int ( x ) ;
}
else if ( m & & m - > nb_long ) {
name = " long " ;
res = PyNumber_Long ( x ) ;
}
# else
if ( m & & m - > nb_int ) {
name = " int " ;
res = PyNumber_Long ( x ) ;
}
# endif
if ( res ) {
# if PY_MAJOR_VERSION < 3
if ( ! PyInt_Check ( res ) & & ! PyLong_Check ( res ) ) {
# else
if ( ! PyLong_Check ( res ) ) {
# endif
PyErr_Format ( PyExc_TypeError ,
" __%.4s__ returned non-%.4s (type %.200s) " ,
name , name , Py_TYPE ( res ) - > tp_name ) ;
Py_DECREF ( res ) ;
return NULL ;
}
}
else if ( ! PyErr_Occurred ( ) ) {
PyErr_SetString ( PyExc_TypeError ,
" an integer is required " ) ;
}
return res ;
}
# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
# if CYTHON_USE_PYLONG_INTERNALS
# include "longintrepr.h"
# endif
# endif
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t ( PyObject * b ) {
Py_ssize_t ival ;
PyObject * x ;
# if PY_MAJOR_VERSION < 3
if ( likely ( PyInt_CheckExact ( b ) ) )
return PyInt_AS_LONG ( b ) ;
# endif
if ( likely ( PyLong_CheckExact ( b ) ) ) {
# if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
# if CYTHON_USE_PYLONG_INTERNALS
switch ( Py_SIZE ( b ) ) {
case - 1 : return - ( sdigit ) ( ( PyLongObject * ) b ) - > ob_digit [ 0 ] ;
case 0 : return 0 ;
case 1 : return ( ( PyLongObject * ) b ) - > ob_digit [ 0 ] ;
}
# endif
# endif
# if PY_VERSION_HEX < 0x02060000
return PyInt_AsSsize_t ( b ) ;
# else
return PyLong_AsSsize_t ( b ) ;
# endif
}
x = PyNumber_Index ( b ) ;
if ( ! x ) return - 1 ;
ival = PyInt_AsSsize_t ( x ) ;
Py_DECREF ( x ) ;
return ival ;
}
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t ( size_t ival ) {
# if PY_VERSION_HEX < 0x02050000
if ( ival < = LONG_MAX )
return PyInt_FromLong ( ( long ) ival ) ;
else {
unsigned char * bytes = ( unsigned char * ) & ival ;
int one = 1 ; int little = ( int ) * ( unsigned char * ) & one ;
return _PyLong_FromByteArray ( bytes , sizeof ( size_t ) , little , 0 ) ;
}
# else
return PyInt_FromSize_t ( ival ) ;
# endif
}
# endif /* Py_PYTHON_H */