From f39dcb1d89c78313ae01580383ada5cb9862b59a Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 20 Aug 2014 17:03:44 +0200 Subject: [PATCH] * Add orthography --- spacy/orthography/__init__.pxd | 0 spacy/orthography/__init__.py | 0 spacy/orthography/__init__.pyx | 0 spacy/orthography/latin.cpp | 3567 ++++++++++++++++++++++++++++++++ spacy/orthography/latin.pxd | 33 + spacy/orthography/latin.pyx | 177 ++ 6 files changed, 3777 insertions(+) create mode 100644 spacy/orthography/__init__.pxd create mode 100644 spacy/orthography/__init__.py create mode 100644 spacy/orthography/__init__.pyx create mode 100644 spacy/orthography/latin.cpp create mode 100644 spacy/orthography/latin.pxd create mode 100644 spacy/orthography/latin.pyx diff --git a/spacy/orthography/__init__.pxd b/spacy/orthography/__init__.pxd new file mode 100644 index 000000000..e69de29bb diff --git a/spacy/orthography/__init__.py b/spacy/orthography/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/spacy/orthography/__init__.pyx b/spacy/orthography/__init__.pyx new file mode 100644 index 000000000..e69de29bb diff --git a/spacy/orthography/latin.cpp b/spacy/orthography/latin.cpp new file mode 100644 index 000000000..1f3d2c9c3 --- /dev/null +++ b/spacy/orthography/latin.cpp @@ -0,0 +1,3567 @@ +/* Generated by Cython 0.20.2 on Wed Aug 20 03:05:50 2014 */ + +#define PY_SSIZE_T_CLEAN +#ifndef CYTHON_USE_PYLONG_INTERNALS +#ifdef PYLONG_BITS_IN_DIGIT +#define CYTHON_USE_PYLONG_INTERNALS 0 +#else +#include "pyconfig.h" +#ifdef PYLONG_BITS_IN_DIGIT +#define CYTHON_USE_PYLONG_INTERNALS 1 +#else +#define CYTHON_USE_PYLONG_INTERNALS 0 +#endif +#endif +#endif +#include "Python.h" +#ifndef Py_PYTHON_H + #error Python headers needed to compile C extensions, please install development version of Python. +#elif PY_VERSION_HEX < 0x02040000 + #error Cython requires Python 2.4+. +#else +#define CYTHON_ABI "0_20_2" +#include /* For offsetof */ +#ifndef offsetof +#define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) +#endif +#if !defined(WIN32) && !defined(MS_WINDOWS) + #ifndef __stdcall + #define __stdcall + #endif + #ifndef __cdecl + #define __cdecl + #endif + #ifndef __fastcall + #define __fastcall + #endif +#endif +#ifndef DL_IMPORT + #define DL_IMPORT(t) t +#endif +#ifndef DL_EXPORT + #define DL_EXPORT(t) t +#endif +#ifndef PY_LONG_LONG + #define PY_LONG_LONG LONG_LONG +#endif +#ifndef Py_HUGE_VAL + #define Py_HUGE_VAL HUGE_VAL +#endif +#ifdef PYPY_VERSION +#define CYTHON_COMPILING_IN_PYPY 1 +#define CYTHON_COMPILING_IN_CPYTHON 0 +#else +#define CYTHON_COMPILING_IN_PYPY 0 +#define CYTHON_COMPILING_IN_CPYTHON 1 +#endif +#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 +#define Py_OptimizeFlag 0 +#endif +#if PY_VERSION_HEX < 0x02050000 + typedef int Py_ssize_t; + #define PY_SSIZE_T_MAX INT_MAX + #define PY_SSIZE_T_MIN INT_MIN + #define PY_FORMAT_SIZE_T "" + #define CYTHON_FORMAT_SSIZE_T "" + #define PyInt_FromSsize_t(z) PyInt_FromLong(z) + #define PyInt_AsSsize_t(o) __Pyx_PyInt_As_int(o) + #define PyNumber_Index(o) ((PyNumber_Check(o) && !PyFloat_Check(o)) ? PyNumber_Int(o) : \ + (PyErr_Format(PyExc_TypeError, \ + "expected index value, got %.200s", Py_TYPE(o)->tp_name), \ + (PyObject*)0)) + #define __Pyx_PyIndex_Check(o) (PyNumber_Check(o) && !PyFloat_Check(o) && \ + !PyComplex_Check(o)) + #define PyIndex_Check __Pyx_PyIndex_Check + #define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message) + #define __PYX_BUILD_PY_SSIZE_T "i" +#else + #define __PYX_BUILD_PY_SSIZE_T "n" + #define CYTHON_FORMAT_SSIZE_T "z" + #define __Pyx_PyIndex_Check PyIndex_Check +#endif +#if PY_VERSION_HEX < 0x02060000 + #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt) + #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) + #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) + #define PyVarObject_HEAD_INIT(type, size) \ + PyObject_HEAD_INIT(type) size, + #define PyType_Modified(t) + typedef struct { + void *buf; + PyObject *obj; + Py_ssize_t len; + Py_ssize_t itemsize; + int readonly; + int ndim; + char *format; + Py_ssize_t *shape; + Py_ssize_t *strides; + Py_ssize_t *suboffsets; + void *internal; + } Py_buffer; + #define PyBUF_SIMPLE 0 + #define PyBUF_WRITABLE 0x0001 + #define PyBUF_FORMAT 0x0004 + #define PyBUF_ND 0x0008 + #define PyBUF_STRIDES (0x0010 | PyBUF_ND) + #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES) + #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES) + #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES) + #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES) + #define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_FORMAT | PyBUF_WRITABLE) + #define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_FORMAT | PyBUF_WRITABLE) + typedef int (*getbufferproc)(PyObject *, Py_buffer *, int); + typedef void (*releasebufferproc)(PyObject *, Py_buffer *); +#endif +#if PY_MAJOR_VERSION < 3 + #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" + #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \ + PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) + #define __Pyx_DefaultClassType PyClass_Type +#else + #define __Pyx_BUILTIN_MODULE_NAME "builtins" + #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \ + PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) + #define __Pyx_DefaultClassType PyType_Type +#endif +#if PY_VERSION_HEX < 0x02060000 + #define PyUnicode_FromString(s) PyUnicode_Decode(s, strlen(s), "UTF-8", "strict") +#endif +#if PY_MAJOR_VERSION >= 3 + #define Py_TPFLAGS_CHECKTYPES 0 + #define Py_TPFLAGS_HAVE_INDEX 0 +#endif +#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3) + #define Py_TPFLAGS_HAVE_NEWBUFFER 0 +#endif +#if PY_VERSION_HEX < 0x02060000 + #define Py_TPFLAGS_HAVE_VERSION_TAG 0 +#endif +#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TPFLAGS_IS_ABSTRACT) + #define Py_TPFLAGS_IS_ABSTRACT 0 +#endif +#if PY_VERSION_HEX < 0x030400a1 && !defined(Py_TPFLAGS_HAVE_FINALIZE) + #define Py_TPFLAGS_HAVE_FINALIZE 0 +#endif +#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) + #define CYTHON_PEP393_ENABLED 1 + #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ? \ + 0 : _PyUnicode_Ready((PyObject *)(op))) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) + #define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u) + #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) + #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) +#else + #define CYTHON_PEP393_ENABLED 0 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) + #define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE)) + #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) +#endif +#if CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) +#else + #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ? \ + PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) +#endif +#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) +#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) +#else + #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBaseString_Type PyUnicode_Type + #define PyStringObject PyUnicodeObject + #define PyString_Type PyUnicode_Type + #define PyString_Check PyUnicode_Check + #define PyString_CheckExact PyUnicode_CheckExact +#endif +#if PY_VERSION_HEX < 0x02060000 + #define PyBytesObject PyStringObject + #define PyBytes_Type PyString_Type + #define PyBytes_Check PyString_Check + #define PyBytes_CheckExact PyString_CheckExact + #define PyBytes_FromString PyString_FromString + #define PyBytes_FromStringAndSize PyString_FromStringAndSize + #define PyBytes_FromFormat PyString_FromFormat + #define PyBytes_DecodeEscape PyString_DecodeEscape + #define PyBytes_AsString PyString_AsString + #define PyBytes_AsStringAndSize PyString_AsStringAndSize + #define PyBytes_Size PyString_Size + #define PyBytes_AS_STRING PyString_AS_STRING + #define PyBytes_GET_SIZE PyString_GET_SIZE + #define PyBytes_Repr PyString_Repr + #define PyBytes_Concat PyString_Concat + #define PyBytes_ConcatAndDel PyString_ConcatAndDel +#endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) + #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) +#else + #define __Pyx_PyBaseString_Check(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj) || \ + PyString_Check(obj) || PyUnicode_Check(obj)) + #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) +#endif +#if PY_VERSION_HEX < 0x02060000 + #define PySet_Check(obj) PyObject_TypeCheck(obj, &PySet_Type) + #define PyFrozenSet_Check(obj) PyObject_TypeCheck(obj, &PyFrozenSet_Type) +#endif +#ifndef PySet_CheckExact + #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type) +#endif +#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) +#if PY_MAJOR_VERSION >= 3 + #define PyIntObject PyLongObject + #define PyInt_Type PyLong_Type + #define PyInt_Check(op) PyLong_Check(op) + #define PyInt_CheckExact(op) PyLong_CheckExact(op) + #define PyInt_FromString PyLong_FromString + #define PyInt_FromUnicode PyLong_FromUnicode + #define PyInt_FromLong PyLong_FromLong + #define PyInt_FromSize_t PyLong_FromSize_t + #define PyInt_FromSsize_t PyLong_FromSsize_t + #define PyInt_AsLong PyLong_AsLong + #define PyInt_AS_LONG PyLong_AS_LONG + #define PyInt_AsSsize_t PyLong_AsSsize_t + #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask + #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask + #define PyNumber_Int PyNumber_Long +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBoolObject PyLongObject +#endif +#if PY_VERSION_HEX < 0x030200A4 + typedef long Py_hash_t; + #define __Pyx_PyInt_FromHash_t PyInt_FromLong + #define __Pyx_PyInt_AsHash_t PyInt_AsLong +#else + #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t + #define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t +#endif +#if (PY_MAJOR_VERSION < 3) || (PY_VERSION_HEX >= 0x03010300) + #define __Pyx_PySequence_GetSlice(obj, a, b) PySequence_GetSlice(obj, a, b) + #define __Pyx_PySequence_SetSlice(obj, a, b, value) PySequence_SetSlice(obj, a, b, value) + #define __Pyx_PySequence_DelSlice(obj, a, b) PySequence_DelSlice(obj, a, b) +#else + #define __Pyx_PySequence_GetSlice(obj, a, b) (unlikely(!(obj)) ? \ + (PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), (PyObject*)0) : \ + (likely((obj)->ob_type->tp_as_mapping) ? (PySequence_GetSlice(obj, a, b)) : \ + (PyErr_Format(PyExc_TypeError, "'%.200s' object is unsliceable", (obj)->ob_type->tp_name), (PyObject*)0))) + #define __Pyx_PySequence_SetSlice(obj, a, b, value) (unlikely(!(obj)) ? \ + (PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), -1) : \ + (likely((obj)->ob_type->tp_as_mapping) ? (PySequence_SetSlice(obj, a, b, value)) : \ + (PyErr_Format(PyExc_TypeError, "'%.200s' object doesn't support slice assignment", (obj)->ob_type->tp_name), -1))) + #define __Pyx_PySequence_DelSlice(obj, a, b) (unlikely(!(obj)) ? \ + (PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), -1) : \ + (likely((obj)->ob_type->tp_as_mapping) ? (PySequence_DelSlice(obj, a, b)) : \ + (PyErr_Format(PyExc_TypeError, "'%.200s' object doesn't support slice deletion", (obj)->ob_type->tp_name), -1))) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func)) +#endif +#if PY_VERSION_HEX < 0x02050000 + #define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),((char *)(n))) + #define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),((char *)(n)),(a)) + #define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),((char *)(n))) +#else + #define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),(n)) + #define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),(n),(a)) + #define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),(n)) +#endif +#if PY_VERSION_HEX < 0x02050000 + #define __Pyx_NAMESTR(n) ((char *)(n)) + #define __Pyx_DOCSTR(n) ((char *)(n)) +#else + #define __Pyx_NAMESTR(n) (n) + #define __Pyx_DOCSTR(n) (n) +#endif +#ifndef CYTHON_INLINE + #if defined(__GNUC__) + #define CYTHON_INLINE __inline__ + #elif defined(_MSC_VER) + #define CYTHON_INLINE __inline + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_INLINE inline + #else + #define CYTHON_INLINE + #endif +#endif +#ifndef CYTHON_RESTRICT + #if defined(__GNUC__) + #define CYTHON_RESTRICT __restrict__ + #elif defined(_MSC_VER) && _MSC_VER >= 1400 + #define CYTHON_RESTRICT __restrict + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_RESTRICT restrict + #else + #define CYTHON_RESTRICT + #endif +#endif +#ifdef NAN +#define __PYX_NAN() ((float) NAN) +#else +static CYTHON_INLINE float __PYX_NAN() { + /* Initialize NaN. The sign is irrelevant, an exponent with all bits 1 and + a nonzero mantissa means NaN. If the first bit in the mantissa is 1, it is + a quiet NaN. */ + float value; + memset(&value, 0xFF, sizeof(value)); + return value; +} +#endif +#ifdef __cplusplus +template +void __Pyx_call_destructor(T* x) { + x->~T(); +} +#endif + + +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) +#else + #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) +#endif + +#ifndef __PYX_EXTERN_C + #ifdef __cplusplus + #define __PYX_EXTERN_C extern "C" + #else + #define __PYX_EXTERN_C extern + #endif +#endif + +#if defined(WIN32) || defined(MS_WINDOWS) +#define _USE_MATH_DEFINES +#endif +#include +#define __PYX_HAVE__spacy__orthography__latin +#define __PYX_HAVE_API__spacy__orthography__latin +#include "stdint.h" +#ifdef _OPENMP +#include +#endif /* _OPENMP */ + +#ifdef PYREX_WITHOUT_ASSERTIONS +#define CYTHON_WITHOUT_ASSERTIONS +#endif + +#ifndef CYTHON_UNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +#endif +typedef struct {PyObject **p; char *s; const Py_ssize_t n; const char* encoding; + const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; /*proto*/ + +#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0 +#define __PYX_DEFAULT_STRING_ENCODING "" +#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString +#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#define __Pyx_fits_Py_ssize_t(v, type, is_signed) ( \ + (sizeof(type) < sizeof(Py_ssize_t)) || \ + (sizeof(type) > sizeof(Py_ssize_t) && \ + likely(v < (type)PY_SSIZE_T_MAX || \ + v == (type)PY_SSIZE_T_MAX) && \ + (!is_signed || likely(v > (type)PY_SSIZE_T_MIN || \ + v == (type)PY_SSIZE_T_MIN))) || \ + (sizeof(type) == sizeof(Py_ssize_t) && \ + (is_signed || likely(v < (type)PY_SSIZE_T_MAX || \ + v == (type)PY_SSIZE_T_MAX))) ) +static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*); +static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); +#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s)) +#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) +#define __Pyx_PyBytes_FromString PyBytes_FromString +#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); +#if PY_MAJOR_VERSION < 3 + #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#else + #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize +#endif +#define __Pyx_PyObject_AsSString(s) ((signed char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_FromUString(s) __Pyx_PyObject_FromString((const char*)s) +#define __Pyx_PyBytes_FromUString(s) __Pyx_PyBytes_FromString((const char*)s) +#define __Pyx_PyByteArray_FromUString(s) __Pyx_PyByteArray_FromString((const char*)s) +#define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((const char*)s) +#define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((const char*)s) +#if PY_MAJOR_VERSION < 3 +static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) +{ + const Py_UNICODE *u_end = u; + while (*u_end++) ; + return (size_t)(u_end - u - 1); +} +#else +#define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen +#endif +#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u)) +#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode +#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode +#define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None) +#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False)) +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); +static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x); +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); +#if CYTHON_COMPILING_IN_CPYTHON +#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) +#else +#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) +#endif +#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII +static int __Pyx_sys_getdefaultencoding_not_ascii; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + PyObject* ascii_chars_u = NULL; + PyObject* ascii_chars_b = NULL; + const char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + if (strcmp(default_encoding_c, "ascii") == 0) { + __Pyx_sys_getdefaultencoding_not_ascii = 0; + } else { + char ascii_chars[128]; + int c; + for (c = 0; c < 128; c++) { + ascii_chars[c] = c; + } + __Pyx_sys_getdefaultencoding_not_ascii = 1; + ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); + if (!ascii_chars_u) goto bad; + ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); + if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { + PyErr_Format( + PyExc_ValueError, + "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", + default_encoding_c); + goto bad; + } + Py_DECREF(ascii_chars_u); + Py_DECREF(ascii_chars_b); + } + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + Py_XDECREF(ascii_chars_u); + Py_XDECREF(ascii_chars_b); + return -1; +} +#endif +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) +#else +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +static char* __PYX_DEFAULT_STRING_ENCODING; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c)); + if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; + strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + return -1; +} +#endif +#endif + + +/* Test for GCC > 2.95 */ +#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) + #define likely(x) __builtin_expect(!!(x), 1) + #define unlikely(x) __builtin_expect(!!(x), 0) +#else /* !__GNUC__ or GCC < 2.95 */ + #define likely(x) (x) + #define unlikely(x) (x) +#endif /* __GNUC__ */ + +static PyObject *__pyx_m; +static PyObject *__pyx_d; +static PyObject *__pyx_b; +static PyObject *__pyx_empty_tuple; +static PyObject *__pyx_empty_bytes; +static int __pyx_lineno; +static int __pyx_clineno = 0; +static const char * __pyx_cfilenm= __FILE__; +static const char *__pyx_filename; + + +static const char *__pyx_f[] = { + "latin.pyx", +}; + +/* "spacy/lexeme.pxd":5 + * + * + * ctypedef int ClusterID # <<<<<<<<<<<<<< + * ctypedef uint32_t StringHash + * ctypedef size_t LexID + */ +typedef int __pyx_t_5spacy_6lexeme_ClusterID; + +/* "spacy/lexeme.pxd":6 + * + * ctypedef int ClusterID + * ctypedef uint32_t StringHash # <<<<<<<<<<<<<< + * ctypedef size_t LexID + * ctypedef char OrthFlags + */ +typedef uint32_t __pyx_t_5spacy_6lexeme_StringHash; + +/* "spacy/lexeme.pxd":7 + * ctypedef int ClusterID + * ctypedef uint32_t StringHash + * ctypedef size_t LexID # <<<<<<<<<<<<<< + * ctypedef char OrthFlags + * ctypedef char DistFlags + */ +typedef size_t __pyx_t_5spacy_6lexeme_LexID; + +/* "spacy/lexeme.pxd":8 + * ctypedef uint32_t StringHash + * ctypedef size_t LexID + * ctypedef char OrthFlags # <<<<<<<<<<<<<< + * ctypedef char DistFlags + * ctypedef uint64_t TagFlags + */ +typedef char __pyx_t_5spacy_6lexeme_OrthFlags; + +/* "spacy/lexeme.pxd":9 + * ctypedef size_t LexID + * ctypedef char OrthFlags + * ctypedef char DistFlags # <<<<<<<<<<<<<< + * ctypedef uint64_t TagFlags + * + */ +typedef char __pyx_t_5spacy_6lexeme_DistFlags; + +/* "spacy/lexeme.pxd":10 + * ctypedef char OrthFlags + * ctypedef char DistFlags + * ctypedef uint64_t TagFlags # <<<<<<<<<<<<<< + * + * + */ +typedef uint64_t __pyx_t_5spacy_6lexeme_TagFlags; + +/*--- Type declarations ---*/ +struct __pyx_t_5spacy_6lexeme_Lexeme; + +/* "spacy/lexeme.pxd":13 + * + * + * cdef struct Lexeme: # <<<<<<<<<<<<<< + * StringHash lex + * char* string + */ +struct __pyx_t_5spacy_6lexeme_Lexeme { + __pyx_t_5spacy_6lexeme_StringHash lex; + char *string; + size_t length; + double prob; + __pyx_t_5spacy_6lexeme_ClusterID cluster; + __pyx_t_5spacy_6lexeme_TagFlags possible_tags; + __pyx_t_5spacy_6lexeme_DistFlags dist_flags; + __pyx_t_5spacy_6lexeme_OrthFlags orth_flags; + __pyx_t_5spacy_6lexeme_StringHash *string_views; +}; + +/* "spacy/orthography/latin.pxd":1 + * cdef enum OrthFlag: # <<<<<<<<<<<<<< + * IS_ALPHA + * IS_DIGIT + */ +enum __pyx_t_5spacy_11orthography_5latin_OrthFlag { + __pyx_e_5spacy_11orthography_5latin_IS_ALPHA, + __pyx_e_5spacy_11orthography_5latin_IS_DIGIT, + __pyx_e_5spacy_11orthography_5latin_IS_PUNCT, + __pyx_e_5spacy_11orthography_5latin_IS_SPACE, + __pyx_e_5spacy_11orthography_5latin_IS_LOWER, + __pyx_e_5spacy_11orthography_5latin_IS_UPPER, + __pyx_e_5spacy_11orthography_5latin_IS_TITLE, + __pyx_e_5spacy_11orthography_5latin_IS_ASCII +}; + +/* "spacy/orthography/latin.pxd":12 + * + * + * cdef enum: # <<<<<<<<<<<<<< + * LEX + * LAST3 + */ +enum { + __pyx_e_5spacy_11orthography_5latin_LEX, + __pyx_e_5spacy_11orthography_5latin_LAST3, + __pyx_e_5spacy_11orthography_5latin_NORM, + __pyx_e_5spacy_11orthography_5latin_SHAPE +}; +#ifndef CYTHON_REFNANNY + #define CYTHON_REFNANNY 0 +#endif +#if CYTHON_REFNANNY + typedef struct { + void (*INCREF)(void*, PyObject*, int); + void (*DECREF)(void*, PyObject*, int); + void (*GOTREF)(void*, PyObject*, int); + void (*GIVEREF)(void*, PyObject*, int); + void* (*SetupContext)(const char*, int, const char*); + void (*FinishContext)(void**); + } __Pyx_RefNannyAPIStruct; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); /*proto*/ + #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; +#ifdef WITH_THREAD + #define __Pyx_RefNannySetupContext(name, acquire_gil) \ + if (acquire_gil) { \ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure(); \ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \ + PyGILState_Release(__pyx_gilstate_save); \ + } else { \ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \ + } +#else + #define __Pyx_RefNannySetupContext(name, acquire_gil) \ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__) +#endif + #define __Pyx_RefNannyFinishContext() \ + __Pyx_RefNanny->FinishContext(&__pyx_refnanny) + #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0) + #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0) + #define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0) + #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0) +#else + #define __Pyx_RefNannyDeclarations + #define __Pyx_RefNannySetupContext(name, acquire_gil) + #define __Pyx_RefNannyFinishContext() + #define __Pyx_INCREF(r) Py_INCREF(r) + #define __Pyx_DECREF(r) Py_DECREF(r) + #define __Pyx_GOTREF(r) + #define __Pyx_GIVEREF(r) + #define __Pyx_XINCREF(r) Py_XINCREF(r) + #define __Pyx_XDECREF(r) Py_XDECREF(r) + #define __Pyx_XGOTREF(r) + #define __Pyx_XGIVEREF(r) +#endif /* CYTHON_REFNANNY */ +#define __Pyx_XDECREF_SET(r, v) do { \ + PyObject *tmp = (PyObject *) r; \ + r = v; __Pyx_XDECREF(tmp); \ + } while (0) +#define __Pyx_DECREF_SET(r, v) do { \ + PyObject *tmp = (PyObject *) r; \ + r = v; __Pyx_DECREF(tmp); \ + } while (0) +#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) +#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) + +static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, + const char *name, int exact); /*proto*/ + +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro)) + return tp->tp_getattro(obj, attr_name); +#if PY_MAJOR_VERSION < 3 + if (likely(tp->tp_getattr)) + return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); +#endif + return PyObject_GetAttr(obj, attr_name); +} +#else +#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) +#endif + +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); /*proto*/ +#else +#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) +#endif + +static PyObject *__Pyx_GetBuiltinName(PyObject *name); /*proto*/ + +static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name); /*proto*/ + +static CYTHON_INLINE int __Pyx_init_unicode_iteration( + PyObject* ustring, Py_ssize_t *length, void** data, int *kind); /* proto */ + +static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *); + +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_uint32_t(uint32_t value); + +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); + +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); + +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); + +static int __Pyx_check_binary_version(void); + +static int __Pyx_ExportFunction(const char *name, void (*f)(void), const char *sig); /*proto*/ + +typedef struct { + int code_line; + PyCodeObject* code_object; +} __Pyx_CodeObjectCacheEntry; +struct __Pyx_CodeObjectCache { + int count; + int max_count; + __Pyx_CodeObjectCacheEntry* entries; +}; +static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); +static PyCodeObject *__pyx_find_code_object(int code_line); +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); + +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename); /*proto*/ + +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/ + + +/* Module declarations from 'libc.stdint' */ + +/* Module declarations from 'spacy.lexeme' */ + +/* Module declarations from 'spacy.orthography.latin' */ +static int __pyx_f_5spacy_11orthography_5latin_is_alpha(__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_digit(__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_punct(__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_space(__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_lower(__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_upper(__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_title(__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_ascii(__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch); /*proto*/ +static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_11orthography_5latin_norm_of(__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch); /*proto*/ +static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_11orthography_5latin_shape_of(__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch); /*proto*/ +static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_11orthography_5latin_last3_of(__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch); /*proto*/ +#define __Pyx_MODULE_NAME "spacy.orthography.latin" +int __pyx_module_is_main_spacy__orthography__latin = 0; + +/* Implementation of 'spacy.orthography.latin' */ +static PyObject *__pyx_pf_5spacy_11orthography_5latin_get_normalized(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_lex); /* proto */ +static PyObject *__pyx_pf_5spacy_11orthography_5latin_2get_word_shape(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_lex); /* proto */ +static PyObject *__pyx_pf_5spacy_11orthography_5latin_4is_alpha(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id); /* proto */ +static PyObject *__pyx_pf_5spacy_11orthography_5latin_6is_digit(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id); /* proto */ +static PyObject *__pyx_pf_5spacy_11orthography_5latin_8is_punct(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id); /* proto */ +static PyObject *__pyx_pf_5spacy_11orthography_5latin_10is_space(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id); /* proto */ +static PyObject *__pyx_pf_5spacy_11orthography_5latin_12is_lower(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id); /* proto */ +static PyObject *__pyx_pf_5spacy_11orthography_5latin_14is_upper(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id); /* proto */ +static PyObject *__pyx_pf_5spacy_11orthography_5latin_16is_title(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id); /* proto */ +static PyObject *__pyx_pf_5spacy_11orthography_5latin_18is_ascii(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id); /* proto */ +static PyObject *__pyx_pf_5spacy_11orthography_5latin_20norm_of(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id); /* proto */ +static PyObject *__pyx_pf_5spacy_11orthography_5latin_22shape_of(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id); /* proto */ +static PyObject *__pyx_pf_5spacy_11orthography_5latin_24last3_of(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id); /* proto */ +static char __pyx_k_[] = ""; +static char __pyx_k_X[] = "X"; +static char __pyx_k_c[] = "c"; +static char __pyx_k_d[] = "d"; +static char __pyx_k_x[] = "x"; +static char __pyx_k_lex[] = "lex"; +static char __pyx_k_seq[] = "seq"; +static char __pyx_k_last[] = "last"; +static char __pyx_k_main[] = "__main__"; +static char __pyx_k_test[] = "__test__"; +static char __pyx_k_shape[] = "shape"; +static char __pyx_k_length[] = "length"; +static char __pyx_k_isalpha[] = "isalpha"; +static char __pyx_k_islower[] = "islower"; +static char __pyx_k_shape_char[] = "shape_char"; +static char __pyx_k_get_normalized[] = "get_normalized"; +static char __pyx_k_get_word_shape[] = "get_word_shape"; +static char __pyx_k_is_alpha_line_37[] = "is_alpha (line 37)"; +static char __pyx_k_is_digit_line_50[] = "is_digit (line 50)"; +static char __pyx_k_is_lower_line_90[] = "is_lower (line 90)"; +static char __pyx_k_is_punct_line_63[] = "is_punct (line 63)"; +static char __pyx_k_is_space_line_77[] = "is_space (line 77)"; +static char __pyx_k_norm_of_line_142[] = "norm_of (line 142)"; +static char __pyx_k_is_ascii_line_129[] = "is_ascii (line 129)"; +static char __pyx_k_is_title_line_116[] = "is_title (line 116)"; +static char __pyx_k_is_upper_line_103[] = "is_upper (line 103)"; +static char __pyx_k_last3_of_line_169[] = "last3_of (line 169)"; +static char __pyx_k_shape_of_line_155[] = "shape_of (line 155)"; +static char __pyx_k_spacy_orthography_latin[] = "spacy.orthography.latin"; +static char __pyx_k_Return_the_hash_of_a_normalized[] = "Return the hash of a normalized version of the string.\n\n >>> unhash(norm_of(lookupu'Hi'))\n u'hi'\n >>> unhash(norm_of(lookup(u'255667')))\n u'shape=dddd'\n >>> unhash(norm_of(lookup(u'...')))\n u'...'\n "; +static char __pyx_k_Users_matt_repos_spaCy_spacy_or[] = "/Users/matt/repos/spaCy/spacy/orthography/latin.pyx"; +static char __pyx_k_Access_the_last3_field_of_the_Le[] = "Access the `last3' field of the Lexeme pointed to by lex_id, which stores\n the hash of the last three characters of the word:\n >>> lex_ids = [lookup(w) for w in (u'Hello', u'!')]\n >>> [unhash(last3_of(lex_id)) for lex_id in lex_ids]\n [u'llo', u'!']\n "; +static char __pyx_k_Give_the_result_of_checking_whet[] = "Give the result of checking whether all characters belong to a punctuation\n unicode data category for a Lexeme ID.\n\n >>> is_punct(lookup(u'.'))\n True\n >>> is_punct(lookup(u'\342\201\222'))\n True\n >>> is_punct(lookup(u' '))\n False\n "; +static char __pyx_k_Give_the_result_of_unicode_isalp[] = "Give the result of unicode.isalpha() for a Lexeme ID.\n\n >>> is_alpha(lookup(u'Hello'))\n True\n >>> is_alpha(lookup(u'\330\247\331\204\330\271\330\261\330\250'))\n True\n >>> is_alpha(lookup(u'10'))\n False\n "; +static char __pyx_k_Give_the_result_of_unicode_isdig[] = "Give the result of unicode.isdigit() for a Lexeme ID.\n\n >>> is_digit(lookup(u'10'))\n True\n >>> is_digit(lookup(u'\340\271\220'))\n True\n >>> is_digit(lookup(u'one'))\n False\n "; +static char __pyx_k_Give_the_result_of_unicode_islow[] = "Give the result of unicode.islower() for a Lexeme ID.\n\n >>> is_lower(lookup(u'hi'))\n True\n >>> is_lower(lookup())\n True\n >>> is_lower(lookup(u'10'))\n False\n "; +static char __pyx_k_Give_the_result_of_unicode_isspa[] = "Give the result of unicode.isspace() for a Lexeme ID.\n\n >>> is_space(lookup(u'\t'))\n True\n >>> is_space(lookup(u''))\n True\n >>> is_space(lookup(u'Hi\n'))\n False\n "; +static char __pyx_k_Give_the_result_of_unicode_istit[] = "Give the result of unicode.istitle() for a Lexeme ID.\n\n >>> is_title(lookup(u'Hi'))\n True\n >>> is_title(lookup(u'Hi1'))\n True\n >>> is_title(lookup(u'1'))\n False\n "; +static char __pyx_k_Give_the_result_of_unicode_isupp[] = "Give the result of unicode.isupper() for a Lexeme ID.\n\n >>> is_upper(lookup(u'HI'))\n True\n >>> is_upper(lookup(u'H10'))\n True\n >>> is_upper(lookup(u'10'))\n False\n "; +static char __pyx_k_Return_the_hash_of_the_string_sh[] = "Return the hash of the string shape.\n\n >>> unhash(shape_of(lookupu'Hi'))\n u'Xx'\n >>> unhash(shape_of(lookup(u'255667')))\n u'dddd'\n >>> unhash(shape_of(lookup(u'...')))\n u'...'\n "; +static char __pyx_k_Give_the_result_of_checking_whet_2[] = "Give the result of checking whether all characters in the string are ascii.\n\n >>> is_ascii(lookup(u'Hi'))\n True\n >>> is_ascii(lookup(u' '))\n True\n >>> is_title(lookup(u''))\n False\n "; +static PyObject *__pyx_kp_s_; +static PyObject *__pyx_kp_u_Access_the_last3_field_of_the_Le; +static PyObject *__pyx_kp_u_Give_the_result_of_checking_whet; +static PyObject *__pyx_kp_u_Give_the_result_of_checking_whet_2; +static PyObject *__pyx_kp_u_Give_the_result_of_unicode_isalp; +static PyObject *__pyx_kp_u_Give_the_result_of_unicode_isdig; +static PyObject *__pyx_kp_u_Give_the_result_of_unicode_islow; +static PyObject *__pyx_kp_u_Give_the_result_of_unicode_isspa; +static PyObject *__pyx_kp_u_Give_the_result_of_unicode_istit; +static PyObject *__pyx_kp_u_Give_the_result_of_unicode_isupp; +static PyObject *__pyx_kp_u_Return_the_hash_of_a_normalized; +static PyObject *__pyx_kp_u_Return_the_hash_of_the_string_sh; +static PyObject *__pyx_kp_s_Users_matt_repos_spaCy_spacy_or; +static PyObject *__pyx_n_s_X; +static PyObject *__pyx_n_s_c; +static PyObject *__pyx_n_s_d; +static PyObject *__pyx_n_s_get_normalized; +static PyObject *__pyx_n_s_get_word_shape; +static PyObject *__pyx_kp_u_is_alpha_line_37; +static PyObject *__pyx_kp_u_is_ascii_line_129; +static PyObject *__pyx_kp_u_is_digit_line_50; +static PyObject *__pyx_kp_u_is_lower_line_90; +static PyObject *__pyx_kp_u_is_punct_line_63; +static PyObject *__pyx_kp_u_is_space_line_77; +static PyObject *__pyx_kp_u_is_title_line_116; +static PyObject *__pyx_kp_u_is_upper_line_103; +static PyObject *__pyx_n_s_isalpha; +static PyObject *__pyx_n_s_islower; +static PyObject *__pyx_n_s_last; +static PyObject *__pyx_kp_u_last3_of_line_169; +static PyObject *__pyx_n_s_length; +static PyObject *__pyx_n_s_lex; +static PyObject *__pyx_n_s_main; +static PyObject *__pyx_kp_u_norm_of_line_142; +static PyObject *__pyx_n_s_seq; +static PyObject *__pyx_n_s_shape; +static PyObject *__pyx_n_s_shape_char; +static PyObject *__pyx_kp_u_shape_of_line_155; +static PyObject *__pyx_n_s_spacy_orthography_latin; +static PyObject *__pyx_n_s_test; +static PyObject *__pyx_n_s_x; +static PyObject *__pyx_int_0; +static PyObject *__pyx_int_1; +static PyObject *__pyx_int_3; +static PyObject *__pyx_tuple__2; +static PyObject *__pyx_tuple__4; +static PyObject *__pyx_codeobj__3; +static PyObject *__pyx_codeobj__5; + +/* "spacy/orthography/latin.pyx":3 + * from spacy.lexeme cimport Lexeme + * + * def get_normalized(unicode lex): # <<<<<<<<<<<<<< + * if lex.isalpha() and lex.islower(): + * return lex + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_5spacy_11orthography_5latin_1get_normalized(PyObject *__pyx_self, PyObject *__pyx_v_lex); /*proto*/ +static PyMethodDef __pyx_mdef_5spacy_11orthography_5latin_1get_normalized = {__Pyx_NAMESTR("get_normalized"), (PyCFunction)__pyx_pw_5spacy_11orthography_5latin_1get_normalized, METH_O, __Pyx_DOCSTR(0)}; +static PyObject *__pyx_pw_5spacy_11orthography_5latin_1get_normalized(PyObject *__pyx_self, PyObject *__pyx_v_lex) { + CYTHON_UNUSED int __pyx_lineno = 0; + CYTHON_UNUSED const char *__pyx_filename = NULL; + CYTHON_UNUSED int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("get_normalized (wrapper)", 0); + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_lex), (&PyUnicode_Type), 1, "lex", 1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_r = __pyx_pf_5spacy_11orthography_5latin_get_normalized(__pyx_self, ((PyObject*)__pyx_v_lex)); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_5spacy_11orthography_5latin_get_normalized(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_lex) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + int __pyx_t_3; + int __pyx_t_4; + int __pyx_t_5; + PyObject *__pyx_t_6 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("get_normalized", 0); + + /* "spacy/orthography/latin.pyx":4 + * + * def get_normalized(unicode lex): + * if lex.isalpha() and lex.islower(): # <<<<<<<<<<<<<< + * return lex + * else: + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_lex, __pyx_n_s_isalpha); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_3 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + if (__pyx_t_3) { + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_lex, __pyx_n_s_islower); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_empty_tuple, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_5 = __pyx_t_4; + } else { + __pyx_t_5 = __pyx_t_3; + } + if (__pyx_t_5) { + + /* "spacy/orthography/latin.pyx":5 + * def get_normalized(unicode lex): + * if lex.isalpha() and lex.islower(): + * return lex # <<<<<<<<<<<<<< + * else: + * return get_word_shape(lex) + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_lex); + __pyx_r = __pyx_v_lex; + goto __pyx_L0; + } + /*else*/ { + + /* "spacy/orthography/latin.pyx":7 + * return lex + * else: + * return get_word_shape(lex) # <<<<<<<<<<<<<< + * + * + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_get_word_shape); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __Pyx_INCREF(__pyx_v_lex); + PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_lex); + __Pyx_GIVEREF(__pyx_v_lex); + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_2, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 7; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_r = __pyx_t_6; + __pyx_t_6 = 0; + goto __pyx_L0; + } + + /* "spacy/orthography/latin.pyx":3 + * from spacy.lexeme cimport Lexeme + * + * def get_normalized(unicode lex): # <<<<<<<<<<<<<< + * if lex.isalpha() and lex.islower(): + * return lex + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_AddTraceback("spacy.orthography.latin.get_normalized", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "spacy/orthography/latin.pyx":10 + * + * + * def get_word_shape(unicode lex): # <<<<<<<<<<<<<< + * cdef size_t length = len(lex) + * shape = "" + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_5spacy_11orthography_5latin_3get_word_shape(PyObject *__pyx_self, PyObject *__pyx_v_lex); /*proto*/ +static PyMethodDef __pyx_mdef_5spacy_11orthography_5latin_3get_word_shape = {__Pyx_NAMESTR("get_word_shape"), (PyCFunction)__pyx_pw_5spacy_11orthography_5latin_3get_word_shape, METH_O, __Pyx_DOCSTR(0)}; +static PyObject *__pyx_pw_5spacy_11orthography_5latin_3get_word_shape(PyObject *__pyx_self, PyObject *__pyx_v_lex) { + CYTHON_UNUSED int __pyx_lineno = 0; + CYTHON_UNUSED const char *__pyx_filename = NULL; + CYTHON_UNUSED int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("get_word_shape (wrapper)", 0); + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_lex), (&PyUnicode_Type), 1, "lex", 1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_r = __pyx_pf_5spacy_11orthography_5latin_2get_word_shape(__pyx_self, ((PyObject*)__pyx_v_lex)); + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __pyx_r = NULL; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_5spacy_11orthography_5latin_2get_word_shape(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_lex) { + CYTHON_UNUSED size_t __pyx_v_length; + PyObject *__pyx_v_shape = NULL; + PyObject *__pyx_v_last = NULL; + PyObject *__pyx_v_shape_char = NULL; + PyObject *__pyx_v_seq = NULL; + Py_UCS4 __pyx_v_c; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + Py_ssize_t __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + Py_ssize_t __pyx_t_3; + void *__pyx_t_4; + int __pyx_t_5; + int __pyx_t_6; + Py_ssize_t __pyx_t_7; + int __pyx_t_8; + PyObject *__pyx_t_9 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("get_word_shape", 0); + + /* "spacy/orthography/latin.pyx":11 + * + * def get_word_shape(unicode lex): + * cdef size_t length = len(lex) # <<<<<<<<<<<<<< + * shape = "" + * last = "" + */ + if (unlikely(__pyx_v_lex == Py_None)) { + PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __pyx_t_1 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_lex); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_length = __pyx_t_1; + + /* "spacy/orthography/latin.pyx":12 + * def get_word_shape(unicode lex): + * cdef size_t length = len(lex) + * shape = "" # <<<<<<<<<<<<<< + * last = "" + * shape_char = "" + */ + __Pyx_INCREF(__pyx_kp_s_); + __pyx_v_shape = __pyx_kp_s_; + + /* "spacy/orthography/latin.pyx":13 + * cdef size_t length = len(lex) + * shape = "" + * last = "" # <<<<<<<<<<<<<< + * shape_char = "" + * seq = 0 + */ + __Pyx_INCREF(__pyx_kp_s_); + __pyx_v_last = __pyx_kp_s_; + + /* "spacy/orthography/latin.pyx":14 + * shape = "" + * last = "" + * shape_char = "" # <<<<<<<<<<<<<< + * seq = 0 + * for c in lex: + */ + __Pyx_INCREF(__pyx_kp_s_); + __pyx_v_shape_char = __pyx_kp_s_; + + /* "spacy/orthography/latin.pyx":15 + * last = "" + * shape_char = "" + * seq = 0 # <<<<<<<<<<<<<< + * for c in lex: + * if c.isalpha(): + */ + __Pyx_INCREF(__pyx_int_0); + __pyx_v_seq = __pyx_int_0; + + /* "spacy/orthography/latin.pyx":16 + * shape_char = "" + * seq = 0 + * for c in lex: # <<<<<<<<<<<<<< + * if c.isalpha(): + * if c.isupper(): + */ + if (unlikely(__pyx_v_lex == Py_None)) { + PyErr_SetString(PyExc_TypeError, "'NoneType' is not iterable"); + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __Pyx_INCREF(__pyx_v_lex); + __pyx_t_2 = __pyx_v_lex; + __pyx_t_6 = __Pyx_init_unicode_iteration(__pyx_t_2, (&__pyx_t_3), (&__pyx_t_4), (&__pyx_t_5)); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 16; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_3; __pyx_t_7++) { + __pyx_t_1 = __pyx_t_7; + __pyx_v_c = __Pyx_PyUnicode_READ(__pyx_t_5, __pyx_t_4, __pyx_t_1); + + /* "spacy/orthography/latin.pyx":17 + * seq = 0 + * for c in lex: + * if c.isalpha(): # <<<<<<<<<<<<<< + * if c.isupper(): + * shape_char = "X" + */ + __pyx_t_8 = Py_UNICODE_ISALPHA(__pyx_v_c); + if ((__pyx_t_8 != 0)) { + + /* "spacy/orthography/latin.pyx":18 + * for c in lex: + * if c.isalpha(): + * if c.isupper(): # <<<<<<<<<<<<<< + * shape_char = "X" + * else: + */ + __pyx_t_8 = Py_UNICODE_ISUPPER(__pyx_v_c); + if ((__pyx_t_8 != 0)) { + + /* "spacy/orthography/latin.pyx":19 + * if c.isalpha(): + * if c.isupper(): + * shape_char = "X" # <<<<<<<<<<<<<< + * else: + * shape_char = "x" + */ + __Pyx_INCREF(__pyx_n_s_X); + __Pyx_DECREF_SET(__pyx_v_shape_char, __pyx_n_s_X); + goto __pyx_L6; + } + /*else*/ { + + /* "spacy/orthography/latin.pyx":21 + * shape_char = "X" + * else: + * shape_char = "x" # <<<<<<<<<<<<<< + * elif c.isdigit(): + * shape_char = "d" + */ + __Pyx_INCREF(__pyx_n_s_x); + __Pyx_DECREF_SET(__pyx_v_shape_char, __pyx_n_s_x); + } + __pyx_L6:; + goto __pyx_L5; + } + + /* "spacy/orthography/latin.pyx":22 + * else: + * shape_char = "x" + * elif c.isdigit(): # <<<<<<<<<<<<<< + * shape_char = "d" + * else: + */ + __pyx_t_8 = Py_UNICODE_ISDIGIT(__pyx_v_c); + if ((__pyx_t_8 != 0)) { + + /* "spacy/orthography/latin.pyx":23 + * shape_char = "x" + * elif c.isdigit(): + * shape_char = "d" # <<<<<<<<<<<<<< + * else: + * shape_char = c + */ + __Pyx_INCREF(__pyx_n_s_d); + __Pyx_DECREF_SET(__pyx_v_shape_char, __pyx_n_s_d); + goto __pyx_L5; + } + /*else*/ { + + /* "spacy/orthography/latin.pyx":25 + * shape_char = "d" + * else: + * shape_char = c # <<<<<<<<<<<<<< + * if shape_char == last: + * seq += 1 + */ + __pyx_t_9 = PyUnicode_FromOrdinal(__pyx_v_c); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 25; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_9); + __Pyx_DECREF_SET(__pyx_v_shape_char, __pyx_t_9); + __pyx_t_9 = 0; + } + __pyx_L5:; + + /* "spacy/orthography/latin.pyx":26 + * else: + * shape_char = c + * if shape_char == last: # <<<<<<<<<<<<<< + * seq += 1 + * else: + */ + __pyx_t_9 = PyObject_RichCompare(__pyx_v_shape_char, __pyx_v_last, Py_EQ); __Pyx_XGOTREF(__pyx_t_9); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_9); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + if (__pyx_t_8) { + + /* "spacy/orthography/latin.pyx":27 + * shape_char = c + * if shape_char == last: + * seq += 1 # <<<<<<<<<<<<<< + * else: + * seq = 0 + */ + __pyx_t_9 = PyNumber_InPlaceAdd(__pyx_v_seq, __pyx_int_1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_9); + __Pyx_DECREF_SET(__pyx_v_seq, __pyx_t_9); + __pyx_t_9 = 0; + goto __pyx_L7; + } + /*else*/ { + + /* "spacy/orthography/latin.pyx":29 + * seq += 1 + * else: + * seq = 0 # <<<<<<<<<<<<<< + * last = shape_char + * if seq < 3: + */ + __Pyx_INCREF(__pyx_int_0); + __Pyx_DECREF_SET(__pyx_v_seq, __pyx_int_0); + + /* "spacy/orthography/latin.pyx":30 + * else: + * seq = 0 + * last = shape_char # <<<<<<<<<<<<<< + * if seq < 3: + * shape += shape_char + */ + __Pyx_INCREF(__pyx_v_shape_char); + __Pyx_DECREF_SET(__pyx_v_last, __pyx_v_shape_char); + } + __pyx_L7:; + + /* "spacy/orthography/latin.pyx":31 + * seq = 0 + * last = shape_char + * if seq < 3: # <<<<<<<<<<<<<< + * shape += shape_char + * assert shape + */ + __pyx_t_9 = PyObject_RichCompare(__pyx_v_seq, __pyx_int_3, Py_LT); __Pyx_XGOTREF(__pyx_t_9); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_9); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + if (__pyx_t_8) { + + /* "spacy/orthography/latin.pyx":32 + * last = shape_char + * if seq < 3: + * shape += shape_char # <<<<<<<<<<<<<< + * assert shape + * return shape + */ + __pyx_t_9 = PyNumber_InPlaceAdd(__pyx_v_shape, __pyx_v_shape_char); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_9); + __Pyx_DECREF_SET(__pyx_v_shape, __pyx_t_9); + __pyx_t_9 = 0; + goto __pyx_L8; + } + __pyx_L8:; + } + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "spacy/orthography/latin.pyx":33 + * if seq < 3: + * shape += shape_char + * assert shape # <<<<<<<<<<<<<< + * return shape + * + */ + #ifndef CYTHON_WITHOUT_ASSERTIONS + if (unlikely(!Py_OptimizeFlag)) { + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_v_shape); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_t_8)) { + PyErr_SetNone(PyExc_AssertionError); + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + } + #endif + + /* "spacy/orthography/latin.pyx":34 + * shape += shape_char + * assert shape + * return shape # <<<<<<<<<<<<<< + * + * + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_v_shape); + __pyx_r = __pyx_v_shape; + goto __pyx_L0; + + /* "spacy/orthography/latin.pyx":10 + * + * + * def get_word_shape(unicode lex): # <<<<<<<<<<<<<< + * cdef size_t length = len(lex) + * shape = "" + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_9); + __Pyx_AddTraceback("spacy.orthography.latin.get_word_shape", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_shape); + __Pyx_XDECREF(__pyx_v_last); + __Pyx_XDECREF(__pyx_v_shape_char); + __Pyx_XDECREF(__pyx_v_seq); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "spacy/orthography/latin.pyx":37 + * + * + * cpdef bint is_alpha(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of unicode.isalpha() for a Lexeme ID. + * + */ + +static PyObject *__pyx_pw_5spacy_11orthography_5latin_5is_alpha(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_alpha(__pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id, CYTHON_UNUSED int __pyx_skip_dispatch) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_alpha", 0); + + /* "spacy/orthography/latin.pyx":47 + * False + * """ + * return (lex_id).orth_flags & 1 << IS_ALPHA # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = (((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_v_lex_id)->orth_flags & (1 << __pyx_e_5spacy_11orthography_5latin_IS_ALPHA)); + goto __pyx_L0; + + /* "spacy/orthography/latin.pyx":37 + * + * + * cpdef bint is_alpha(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of unicode.isalpha() for a Lexeme ID. + * + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5spacy_11orthography_5latin_5is_alpha(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static char __pyx_doc_5spacy_11orthography_5latin_4is_alpha[] = "Give the result of unicode.isalpha() for a Lexeme ID.\n\n >>> is_alpha(lookup(u'Hello'))\n True\n >>> is_alpha(lookup(u'\330\247\331\204\330\271\330\261\330\250'))\n True\n >>> is_alpha(lookup(u'10'))\n False\n "; +static PyObject *__pyx_pw_5spacy_11orthography_5latin_5is_alpha(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id) { + __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_alpha (wrapper)", 0); + assert(__pyx_arg_lex_id); { + __pyx_v_lex_id = __Pyx_PyInt_As_size_t(__pyx_arg_lex_id); if (unlikely((__pyx_v_lex_id == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + __Pyx_AddTraceback("spacy.orthography.latin.is_alpha", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5spacy_11orthography_5latin_4is_alpha(__pyx_self, ((__pyx_t_5spacy_6lexeme_LexID)__pyx_v_lex_id)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_5spacy_11orthography_5latin_4is_alpha(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("is_alpha", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_5spacy_11orthography_5latin_is_alpha(__pyx_v_lex_id, 0); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("spacy.orthography.latin.is_alpha", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "spacy/orthography/latin.pyx":50 + * + * + * cpdef bint is_digit(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of unicode.isdigit() for a Lexeme ID. + * + */ + +static PyObject *__pyx_pw_5spacy_11orthography_5latin_7is_digit(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_digit(__pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id, CYTHON_UNUSED int __pyx_skip_dispatch) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_digit", 0); + + /* "spacy/orthography/latin.pyx":60 + * False + * """ + * return (lex_id).orth_flags & 1 << IS_DIGIT # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = (((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_v_lex_id)->orth_flags & (1 << __pyx_e_5spacy_11orthography_5latin_IS_DIGIT)); + goto __pyx_L0; + + /* "spacy/orthography/latin.pyx":50 + * + * + * cpdef bint is_digit(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of unicode.isdigit() for a Lexeme ID. + * + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5spacy_11orthography_5latin_7is_digit(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static char __pyx_doc_5spacy_11orthography_5latin_6is_digit[] = "Give the result of unicode.isdigit() for a Lexeme ID.\n\n >>> is_digit(lookup(u'10'))\n True\n >>> is_digit(lookup(u'\340\271\220'))\n True\n >>> is_digit(lookup(u'one'))\n False\n "; +static PyObject *__pyx_pw_5spacy_11orthography_5latin_7is_digit(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id) { + __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_digit (wrapper)", 0); + assert(__pyx_arg_lex_id); { + __pyx_v_lex_id = __Pyx_PyInt_As_size_t(__pyx_arg_lex_id); if (unlikely((__pyx_v_lex_id == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + __Pyx_AddTraceback("spacy.orthography.latin.is_digit", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5spacy_11orthography_5latin_6is_digit(__pyx_self, ((__pyx_t_5spacy_6lexeme_LexID)__pyx_v_lex_id)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_5spacy_11orthography_5latin_6is_digit(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("is_digit", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_5spacy_11orthography_5latin_is_digit(__pyx_v_lex_id, 0); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("spacy.orthography.latin.is_digit", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "spacy/orthography/latin.pyx":63 + * + * + * cpdef bint is_punct(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of checking whether all characters belong to a punctuation + * unicode data category for a Lexeme ID. + */ + +static PyObject *__pyx_pw_5spacy_11orthography_5latin_9is_punct(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_punct(__pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id, CYTHON_UNUSED int __pyx_skip_dispatch) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_punct", 0); + + /* "spacy/orthography/latin.pyx":74 + * False + * """ + * return (lex_id).orth_flags & 1 << IS_PUNCT # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = (((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_v_lex_id)->orth_flags & (1 << __pyx_e_5spacy_11orthography_5latin_IS_PUNCT)); + goto __pyx_L0; + + /* "spacy/orthography/latin.pyx":63 + * + * + * cpdef bint is_punct(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of checking whether all characters belong to a punctuation + * unicode data category for a Lexeme ID. + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5spacy_11orthography_5latin_9is_punct(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static char __pyx_doc_5spacy_11orthography_5latin_8is_punct[] = "Give the result of checking whether all characters belong to a punctuation\n unicode data category for a Lexeme ID.\n\n >>> is_punct(lookup(u'.'))\n True\n >>> is_punct(lookup(u'\342\201\222'))\n True\n >>> is_punct(lookup(u' '))\n False\n "; +static PyObject *__pyx_pw_5spacy_11orthography_5latin_9is_punct(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id) { + __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_punct (wrapper)", 0); + assert(__pyx_arg_lex_id); { + __pyx_v_lex_id = __Pyx_PyInt_As_size_t(__pyx_arg_lex_id); if (unlikely((__pyx_v_lex_id == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + __Pyx_AddTraceback("spacy.orthography.latin.is_punct", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5spacy_11orthography_5latin_8is_punct(__pyx_self, ((__pyx_t_5spacy_6lexeme_LexID)__pyx_v_lex_id)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_5spacy_11orthography_5latin_8is_punct(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("is_punct", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_5spacy_11orthography_5latin_is_punct(__pyx_v_lex_id, 0); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("spacy.orthography.latin.is_punct", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "spacy/orthography/latin.pyx":77 + * + * + * cpdef bint is_space(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of unicode.isspace() for a Lexeme ID. + * + */ + +static PyObject *__pyx_pw_5spacy_11orthography_5latin_11is_space(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_space(__pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id, CYTHON_UNUSED int __pyx_skip_dispatch) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_space", 0); + + /* "spacy/orthography/latin.pyx":87 + * False + * """ + * return (lex_id).orth_flags & 1 << IS_SPACE # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = (((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_v_lex_id)->orth_flags & (1 << __pyx_e_5spacy_11orthography_5latin_IS_SPACE)); + goto __pyx_L0; + + /* "spacy/orthography/latin.pyx":77 + * + * + * cpdef bint is_space(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of unicode.isspace() for a Lexeme ID. + * + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5spacy_11orthography_5latin_11is_space(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static char __pyx_doc_5spacy_11orthography_5latin_10is_space[] = "Give the result of unicode.isspace() for a Lexeme ID.\n\n >>> is_space(lookup(u'\t'))\n True\n >>> is_space(lookup(u''))\n True\n >>> is_space(lookup(u'Hi\n'))\n False\n "; +static PyObject *__pyx_pw_5spacy_11orthography_5latin_11is_space(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id) { + __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_space (wrapper)", 0); + assert(__pyx_arg_lex_id); { + __pyx_v_lex_id = __Pyx_PyInt_As_size_t(__pyx_arg_lex_id); if (unlikely((__pyx_v_lex_id == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + __Pyx_AddTraceback("spacy.orthography.latin.is_space", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5spacy_11orthography_5latin_10is_space(__pyx_self, ((__pyx_t_5spacy_6lexeme_LexID)__pyx_v_lex_id)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_5spacy_11orthography_5latin_10is_space(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("is_space", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_5spacy_11orthography_5latin_is_space(__pyx_v_lex_id, 0); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 77; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("spacy.orthography.latin.is_space", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "spacy/orthography/latin.pyx":90 + * + * + * cpdef bint is_lower(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of unicode.islower() for a Lexeme ID. + * + */ + +static PyObject *__pyx_pw_5spacy_11orthography_5latin_13is_lower(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_lower(__pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id, CYTHON_UNUSED int __pyx_skip_dispatch) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_lower", 0); + + /* "spacy/orthography/latin.pyx":100 + * False + * """ + * return (lex_id).orth_flags & 1 << IS_LOWER # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = (((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_v_lex_id)->orth_flags & (1 << __pyx_e_5spacy_11orthography_5latin_IS_LOWER)); + goto __pyx_L0; + + /* "spacy/orthography/latin.pyx":90 + * + * + * cpdef bint is_lower(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of unicode.islower() for a Lexeme ID. + * + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5spacy_11orthography_5latin_13is_lower(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static char __pyx_doc_5spacy_11orthography_5latin_12is_lower[] = "Give the result of unicode.islower() for a Lexeme ID.\n\n >>> is_lower(lookup(u'hi'))\n True\n >>> is_lower(lookup())\n True\n >>> is_lower(lookup(u'10'))\n False\n "; +static PyObject *__pyx_pw_5spacy_11orthography_5latin_13is_lower(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id) { + __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_lower (wrapper)", 0); + assert(__pyx_arg_lex_id); { + __pyx_v_lex_id = __Pyx_PyInt_As_size_t(__pyx_arg_lex_id); if (unlikely((__pyx_v_lex_id == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + __Pyx_AddTraceback("spacy.orthography.latin.is_lower", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5spacy_11orthography_5latin_12is_lower(__pyx_self, ((__pyx_t_5spacy_6lexeme_LexID)__pyx_v_lex_id)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_5spacy_11orthography_5latin_12is_lower(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("is_lower", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_5spacy_11orthography_5latin_is_lower(__pyx_v_lex_id, 0); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("spacy.orthography.latin.is_lower", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "spacy/orthography/latin.pyx":103 + * + * + * cpdef bint is_upper(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of unicode.isupper() for a Lexeme ID. + * + */ + +static PyObject *__pyx_pw_5spacy_11orthography_5latin_15is_upper(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_upper(__pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id, CYTHON_UNUSED int __pyx_skip_dispatch) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_upper", 0); + + /* "spacy/orthography/latin.pyx":113 + * False + * """ + * return (lex_id).orth_flags & 1 << IS_UPPER # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = (((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_v_lex_id)->orth_flags & (1 << __pyx_e_5spacy_11orthography_5latin_IS_UPPER)); + goto __pyx_L0; + + /* "spacy/orthography/latin.pyx":103 + * + * + * cpdef bint is_upper(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of unicode.isupper() for a Lexeme ID. + * + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5spacy_11orthography_5latin_15is_upper(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static char __pyx_doc_5spacy_11orthography_5latin_14is_upper[] = "Give the result of unicode.isupper() for a Lexeme ID.\n\n >>> is_upper(lookup(u'HI'))\n True\n >>> is_upper(lookup(u'H10'))\n True\n >>> is_upper(lookup(u'10'))\n False\n "; +static PyObject *__pyx_pw_5spacy_11orthography_5latin_15is_upper(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id) { + __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_upper (wrapper)", 0); + assert(__pyx_arg_lex_id); { + __pyx_v_lex_id = __Pyx_PyInt_As_size_t(__pyx_arg_lex_id); if (unlikely((__pyx_v_lex_id == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + __Pyx_AddTraceback("spacy.orthography.latin.is_upper", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5spacy_11orthography_5latin_14is_upper(__pyx_self, ((__pyx_t_5spacy_6lexeme_LexID)__pyx_v_lex_id)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_5spacy_11orthography_5latin_14is_upper(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("is_upper", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_5spacy_11orthography_5latin_is_upper(__pyx_v_lex_id, 0); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("spacy.orthography.latin.is_upper", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "spacy/orthography/latin.pyx":116 + * + * + * cpdef bint is_title(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of unicode.istitle() for a Lexeme ID. + * + */ + +static PyObject *__pyx_pw_5spacy_11orthography_5latin_17is_title(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_title(__pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id, CYTHON_UNUSED int __pyx_skip_dispatch) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_title", 0); + + /* "spacy/orthography/latin.pyx":126 + * False + * """ + * return (lex_id).orth_flags & 1 << IS_TITLE # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = (((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_v_lex_id)->orth_flags & (1 << __pyx_e_5spacy_11orthography_5latin_IS_TITLE)); + goto __pyx_L0; + + /* "spacy/orthography/latin.pyx":116 + * + * + * cpdef bint is_title(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of unicode.istitle() for a Lexeme ID. + * + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5spacy_11orthography_5latin_17is_title(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static char __pyx_doc_5spacy_11orthography_5latin_16is_title[] = "Give the result of unicode.istitle() for a Lexeme ID.\n\n >>> is_title(lookup(u'Hi'))\n True\n >>> is_title(lookup(u'Hi1'))\n True\n >>> is_title(lookup(u'1'))\n False\n "; +static PyObject *__pyx_pw_5spacy_11orthography_5latin_17is_title(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id) { + __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_title (wrapper)", 0); + assert(__pyx_arg_lex_id); { + __pyx_v_lex_id = __Pyx_PyInt_As_size_t(__pyx_arg_lex_id); if (unlikely((__pyx_v_lex_id == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + __Pyx_AddTraceback("spacy.orthography.latin.is_title", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5spacy_11orthography_5latin_16is_title(__pyx_self, ((__pyx_t_5spacy_6lexeme_LexID)__pyx_v_lex_id)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_5spacy_11orthography_5latin_16is_title(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("is_title", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_5spacy_11orthography_5latin_is_title(__pyx_v_lex_id, 0); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("spacy.orthography.latin.is_title", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "spacy/orthography/latin.pyx":129 + * + * + * cpdef bint is_ascii(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of checking whether all characters in the string are ascii. + * + */ + +static PyObject *__pyx_pw_5spacy_11orthography_5latin_19is_ascii(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static int __pyx_f_5spacy_11orthography_5latin_is_ascii(__pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id, CYTHON_UNUSED int __pyx_skip_dispatch) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_ascii", 0); + + /* "spacy/orthography/latin.pyx":139 + * False + * """ + * return (lex_id).orth_flags & 1 << IS_ASCII # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = (((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_v_lex_id)->orth_flags & (1 << __pyx_e_5spacy_11orthography_5latin_IS_ASCII)); + goto __pyx_L0; + + /* "spacy/orthography/latin.pyx":129 + * + * + * cpdef bint is_ascii(LexID lex_id) except *: # <<<<<<<<<<<<<< + * """Give the result of checking whether all characters in the string are ascii. + * + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5spacy_11orthography_5latin_19is_ascii(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static char __pyx_doc_5spacy_11orthography_5latin_18is_ascii[] = "Give the result of checking whether all characters in the string are ascii.\n\n >>> is_ascii(lookup(u'Hi'))\n True\n >>> is_ascii(lookup(u' '))\n True\n >>> is_title(lookup(u''))\n False\n "; +static PyObject *__pyx_pw_5spacy_11orthography_5latin_19is_ascii(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id) { + __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("is_ascii (wrapper)", 0); + assert(__pyx_arg_lex_id); { + __pyx_v_lex_id = __Pyx_PyInt_As_size_t(__pyx_arg_lex_id); if (unlikely((__pyx_v_lex_id == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + __Pyx_AddTraceback("spacy.orthography.latin.is_ascii", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5spacy_11orthography_5latin_18is_ascii(__pyx_self, ((__pyx_t_5spacy_6lexeme_LexID)__pyx_v_lex_id)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_5spacy_11orthography_5latin_18is_ascii(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("is_ascii", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_5spacy_11orthography_5latin_is_ascii(__pyx_v_lex_id, 0); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("spacy.orthography.latin.is_ascii", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "spacy/orthography/latin.pyx":142 + * + * + * cpdef StringHash norm_of(LexID lex_id) except 0: # <<<<<<<<<<<<<< + * """Return the hash of a normalized version of the string. + * + */ + +static PyObject *__pyx_pw_5spacy_11orthography_5latin_21norm_of(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_11orthography_5latin_norm_of(__pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id, CYTHON_UNUSED int __pyx_skip_dispatch) { + __pyx_t_5spacy_6lexeme_StringHash __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("norm_of", 0); + + /* "spacy/orthography/latin.pyx":152 + * u'...' + * """ + * return (lex_id).string_views[NORM] # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = (((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_v_lex_id)->string_views[__pyx_e_5spacy_11orthography_5latin_NORM]); + goto __pyx_L0; + + /* "spacy/orthography/latin.pyx":142 + * + * + * cpdef StringHash norm_of(LexID lex_id) except 0: # <<<<<<<<<<<<<< + * """Return the hash of a normalized version of the string. + * + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5spacy_11orthography_5latin_21norm_of(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static char __pyx_doc_5spacy_11orthography_5latin_20norm_of[] = "Return the hash of a normalized version of the string.\n\n >>> unhash(norm_of(lookupu'Hi'))\n u'hi'\n >>> unhash(norm_of(lookup(u'255667')))\n u'shape=dddd'\n >>> unhash(norm_of(lookup(u'...')))\n u'...'\n "; +static PyObject *__pyx_pw_5spacy_11orthography_5latin_21norm_of(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id) { + __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("norm_of (wrapper)", 0); + assert(__pyx_arg_lex_id); { + __pyx_v_lex_id = __Pyx_PyInt_As_size_t(__pyx_arg_lex_id); if (unlikely((__pyx_v_lex_id == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + __Pyx_AddTraceback("spacy.orthography.latin.norm_of", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5spacy_11orthography_5latin_20norm_of(__pyx_self, ((__pyx_t_5spacy_6lexeme_LexID)__pyx_v_lex_id)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_5spacy_11orthography_5latin_20norm_of(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + __pyx_t_5spacy_6lexeme_StringHash __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("norm_of", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_5spacy_11orthography_5latin_norm_of(__pyx_v_lex_id, 0); if (unlikely(__pyx_t_1 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_From_uint32_t(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 142; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("spacy.orthography.latin.norm_of", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "spacy/orthography/latin.pyx":155 + * + * + * cpdef StringHash shape_of(LexID lex_id) except 0: # <<<<<<<<<<<<<< + * """Return the hash of the string shape. + * + */ + +static PyObject *__pyx_pw_5spacy_11orthography_5latin_23shape_of(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_11orthography_5latin_shape_of(__pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id, CYTHON_UNUSED int __pyx_skip_dispatch) { + struct __pyx_t_5spacy_6lexeme_Lexeme *__pyx_v_w; + __pyx_t_5spacy_6lexeme_StringHash __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("shape_of", 0); + + /* "spacy/orthography/latin.pyx":165 + * u'...' + * """ + * cdef Lexeme* w = lex_id # <<<<<<<<<<<<<< + * return w.string_views[SHAPE] + * + */ + __pyx_v_w = ((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_v_lex_id); + + /* "spacy/orthography/latin.pyx":166 + * """ + * cdef Lexeme* w = lex_id + * return w.string_views[SHAPE] # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = (__pyx_v_w->string_views[__pyx_e_5spacy_11orthography_5latin_SHAPE]); + goto __pyx_L0; + + /* "spacy/orthography/latin.pyx":155 + * + * + * cpdef StringHash shape_of(LexID lex_id) except 0: # <<<<<<<<<<<<<< + * """Return the hash of the string shape. + * + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5spacy_11orthography_5latin_23shape_of(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static char __pyx_doc_5spacy_11orthography_5latin_22shape_of[] = "Return the hash of the string shape.\n\n >>> unhash(shape_of(lookupu'Hi'))\n u'Xx'\n >>> unhash(shape_of(lookup(u'255667')))\n u'dddd'\n >>> unhash(shape_of(lookup(u'...')))\n u'...'\n "; +static PyObject *__pyx_pw_5spacy_11orthography_5latin_23shape_of(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id) { + __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("shape_of (wrapper)", 0); + assert(__pyx_arg_lex_id); { + __pyx_v_lex_id = __Pyx_PyInt_As_size_t(__pyx_arg_lex_id); if (unlikely((__pyx_v_lex_id == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 155; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + __Pyx_AddTraceback("spacy.orthography.latin.shape_of", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5spacy_11orthography_5latin_22shape_of(__pyx_self, ((__pyx_t_5spacy_6lexeme_LexID)__pyx_v_lex_id)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_5spacy_11orthography_5latin_22shape_of(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + __pyx_t_5spacy_6lexeme_StringHash __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("shape_of", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_5spacy_11orthography_5latin_shape_of(__pyx_v_lex_id, 0); if (unlikely(__pyx_t_1 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 155; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_From_uint32_t(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 155; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("spacy.orthography.latin.shape_of", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "spacy/orthography/latin.pyx":169 + * + * + * cpdef StringHash last3_of(LexID lex_id) except 0: # <<<<<<<<<<<<<< + * '''Access the `last3' field of the Lexeme pointed to by lex_id, which stores + * the hash of the last three characters of the word: + */ + +static PyObject *__pyx_pw_5spacy_11orthography_5latin_25last3_of(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static __pyx_t_5spacy_6lexeme_StringHash __pyx_f_5spacy_11orthography_5latin_last3_of(__pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id, CYTHON_UNUSED int __pyx_skip_dispatch) { + __pyx_t_5spacy_6lexeme_StringHash __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("last3_of", 0); + + /* "spacy/orthography/latin.pyx":176 + * [u'llo', u'!'] + * ''' + * return (lex_id).string_views[LAST3] # <<<<<<<<<<<<<< + */ + __pyx_r = (((struct __pyx_t_5spacy_6lexeme_Lexeme *)__pyx_v_lex_id)->string_views[__pyx_e_5spacy_11orthography_5latin_LAST3]); + goto __pyx_L0; + + /* "spacy/orthography/latin.pyx":169 + * + * + * cpdef StringHash last3_of(LexID lex_id) except 0: # <<<<<<<<<<<<<< + * '''Access the `last3' field of the Lexeme pointed to by lex_id, which stores + * the hash of the last three characters of the word: + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5spacy_11orthography_5latin_25last3_of(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id); /*proto*/ +static char __pyx_doc_5spacy_11orthography_5latin_24last3_of[] = "Access the `last3' field of the Lexeme pointed to by lex_id, which stores\n the hash of the last three characters of the word:\n >>> lex_ids = [lookup(w) for w in (u'Hello', u'!')]\n >>> [unhash(last3_of(lex_id)) for lex_id in lex_ids]\n [u'llo', u'!']\n "; +static PyObject *__pyx_pw_5spacy_11orthography_5latin_25last3_of(PyObject *__pyx_self, PyObject *__pyx_arg_lex_id) { + __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("last3_of (wrapper)", 0); + assert(__pyx_arg_lex_id); { + __pyx_v_lex_id = __Pyx_PyInt_As_size_t(__pyx_arg_lex_id); if (unlikely((__pyx_v_lex_id == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L3_error:; + __Pyx_AddTraceback("spacy.orthography.latin.last3_of", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5spacy_11orthography_5latin_24last3_of(__pyx_self, ((__pyx_t_5spacy_6lexeme_LexID)__pyx_v_lex_id)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_5spacy_11orthography_5latin_24last3_of(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_5spacy_6lexeme_LexID __pyx_v_lex_id) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + __pyx_t_5spacy_6lexeme_StringHash __pyx_t_1; + PyObject *__pyx_t_2 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("last3_of", 0); + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = __pyx_f_5spacy_11orthography_5latin_last3_of(__pyx_v_lex_id, 0); if (unlikely(__pyx_t_1 == 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_From_uint32_t(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_r = __pyx_t_2; + __pyx_t_2 = 0; + goto __pyx_L0; + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_2); + __Pyx_AddTraceback("spacy.orthography.latin.last3_of", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyMethodDef __pyx_methods[] = { + {__Pyx_NAMESTR("is_alpha"), (PyCFunction)__pyx_pw_5spacy_11orthography_5latin_5is_alpha, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_11orthography_5latin_4is_alpha)}, + {__Pyx_NAMESTR("is_digit"), (PyCFunction)__pyx_pw_5spacy_11orthography_5latin_7is_digit, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_11orthography_5latin_6is_digit)}, + {__Pyx_NAMESTR("is_punct"), (PyCFunction)__pyx_pw_5spacy_11orthography_5latin_9is_punct, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_11orthography_5latin_8is_punct)}, + {__Pyx_NAMESTR("is_space"), (PyCFunction)__pyx_pw_5spacy_11orthography_5latin_11is_space, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_11orthography_5latin_10is_space)}, + {__Pyx_NAMESTR("is_lower"), (PyCFunction)__pyx_pw_5spacy_11orthography_5latin_13is_lower, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_11orthography_5latin_12is_lower)}, + {__Pyx_NAMESTR("is_upper"), (PyCFunction)__pyx_pw_5spacy_11orthography_5latin_15is_upper, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_11orthography_5latin_14is_upper)}, + {__Pyx_NAMESTR("is_title"), (PyCFunction)__pyx_pw_5spacy_11orthography_5latin_17is_title, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_11orthography_5latin_16is_title)}, + {__Pyx_NAMESTR("is_ascii"), (PyCFunction)__pyx_pw_5spacy_11orthography_5latin_19is_ascii, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_11orthography_5latin_18is_ascii)}, + {__Pyx_NAMESTR("norm_of"), (PyCFunction)__pyx_pw_5spacy_11orthography_5latin_21norm_of, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_11orthography_5latin_20norm_of)}, + {__Pyx_NAMESTR("shape_of"), (PyCFunction)__pyx_pw_5spacy_11orthography_5latin_23shape_of, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_11orthography_5latin_22shape_of)}, + {__Pyx_NAMESTR("last3_of"), (PyCFunction)__pyx_pw_5spacy_11orthography_5latin_25last3_of, METH_O, __Pyx_DOCSTR(__pyx_doc_5spacy_11orthography_5latin_24last3_of)}, + {0, 0, 0, 0} +}; + +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef __pyx_moduledef = { + #if PY_VERSION_HEX < 0x03020000 + { PyObject_HEAD_INIT(NULL) NULL, 0, NULL }, + #else + PyModuleDef_HEAD_INIT, + #endif + __Pyx_NAMESTR("latin"), + 0, /* m_doc */ + -1, /* m_size */ + __pyx_methods /* m_methods */, + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL /* m_free */ +}; +#endif + +static __Pyx_StringTabEntry __pyx_string_tab[] = { + {&__pyx_kp_s_, __pyx_k_, sizeof(__pyx_k_), 0, 0, 1, 0}, + {&__pyx_kp_u_Access_the_last3_field_of_the_Le, __pyx_k_Access_the_last3_field_of_the_Le, sizeof(__pyx_k_Access_the_last3_field_of_the_Le), 0, 1, 0, 0}, + {&__pyx_kp_u_Give_the_result_of_checking_whet, __pyx_k_Give_the_result_of_checking_whet, sizeof(__pyx_k_Give_the_result_of_checking_whet), 0, 1, 0, 0}, + {&__pyx_kp_u_Give_the_result_of_checking_whet_2, __pyx_k_Give_the_result_of_checking_whet_2, sizeof(__pyx_k_Give_the_result_of_checking_whet_2), 0, 1, 0, 0}, + {&__pyx_kp_u_Give_the_result_of_unicode_isalp, __pyx_k_Give_the_result_of_unicode_isalp, sizeof(__pyx_k_Give_the_result_of_unicode_isalp), 0, 1, 0, 0}, + {&__pyx_kp_u_Give_the_result_of_unicode_isdig, __pyx_k_Give_the_result_of_unicode_isdig, sizeof(__pyx_k_Give_the_result_of_unicode_isdig), 0, 1, 0, 0}, + {&__pyx_kp_u_Give_the_result_of_unicode_islow, __pyx_k_Give_the_result_of_unicode_islow, sizeof(__pyx_k_Give_the_result_of_unicode_islow), 0, 1, 0, 0}, + {&__pyx_kp_u_Give_the_result_of_unicode_isspa, __pyx_k_Give_the_result_of_unicode_isspa, sizeof(__pyx_k_Give_the_result_of_unicode_isspa), 0, 1, 0, 0}, + {&__pyx_kp_u_Give_the_result_of_unicode_istit, __pyx_k_Give_the_result_of_unicode_istit, sizeof(__pyx_k_Give_the_result_of_unicode_istit), 0, 1, 0, 0}, + {&__pyx_kp_u_Give_the_result_of_unicode_isupp, __pyx_k_Give_the_result_of_unicode_isupp, sizeof(__pyx_k_Give_the_result_of_unicode_isupp), 0, 1, 0, 0}, + {&__pyx_kp_u_Return_the_hash_of_a_normalized, __pyx_k_Return_the_hash_of_a_normalized, sizeof(__pyx_k_Return_the_hash_of_a_normalized), 0, 1, 0, 0}, + {&__pyx_kp_u_Return_the_hash_of_the_string_sh, __pyx_k_Return_the_hash_of_the_string_sh, sizeof(__pyx_k_Return_the_hash_of_the_string_sh), 0, 1, 0, 0}, + {&__pyx_kp_s_Users_matt_repos_spaCy_spacy_or, __pyx_k_Users_matt_repos_spaCy_spacy_or, sizeof(__pyx_k_Users_matt_repos_spaCy_spacy_or), 0, 0, 1, 0}, + {&__pyx_n_s_X, __pyx_k_X, sizeof(__pyx_k_X), 0, 0, 1, 1}, + {&__pyx_n_s_c, __pyx_k_c, sizeof(__pyx_k_c), 0, 0, 1, 1}, + {&__pyx_n_s_d, __pyx_k_d, sizeof(__pyx_k_d), 0, 0, 1, 1}, + {&__pyx_n_s_get_normalized, __pyx_k_get_normalized, sizeof(__pyx_k_get_normalized), 0, 0, 1, 1}, + {&__pyx_n_s_get_word_shape, __pyx_k_get_word_shape, sizeof(__pyx_k_get_word_shape), 0, 0, 1, 1}, + {&__pyx_kp_u_is_alpha_line_37, __pyx_k_is_alpha_line_37, sizeof(__pyx_k_is_alpha_line_37), 0, 1, 0, 0}, + {&__pyx_kp_u_is_ascii_line_129, __pyx_k_is_ascii_line_129, sizeof(__pyx_k_is_ascii_line_129), 0, 1, 0, 0}, + {&__pyx_kp_u_is_digit_line_50, __pyx_k_is_digit_line_50, sizeof(__pyx_k_is_digit_line_50), 0, 1, 0, 0}, + {&__pyx_kp_u_is_lower_line_90, __pyx_k_is_lower_line_90, sizeof(__pyx_k_is_lower_line_90), 0, 1, 0, 0}, + {&__pyx_kp_u_is_punct_line_63, __pyx_k_is_punct_line_63, sizeof(__pyx_k_is_punct_line_63), 0, 1, 0, 0}, + {&__pyx_kp_u_is_space_line_77, __pyx_k_is_space_line_77, sizeof(__pyx_k_is_space_line_77), 0, 1, 0, 0}, + {&__pyx_kp_u_is_title_line_116, __pyx_k_is_title_line_116, sizeof(__pyx_k_is_title_line_116), 0, 1, 0, 0}, + {&__pyx_kp_u_is_upper_line_103, __pyx_k_is_upper_line_103, sizeof(__pyx_k_is_upper_line_103), 0, 1, 0, 0}, + {&__pyx_n_s_isalpha, __pyx_k_isalpha, sizeof(__pyx_k_isalpha), 0, 0, 1, 1}, + {&__pyx_n_s_islower, __pyx_k_islower, sizeof(__pyx_k_islower), 0, 0, 1, 1}, + {&__pyx_n_s_last, __pyx_k_last, sizeof(__pyx_k_last), 0, 0, 1, 1}, + {&__pyx_kp_u_last3_of_line_169, __pyx_k_last3_of_line_169, sizeof(__pyx_k_last3_of_line_169), 0, 1, 0, 0}, + {&__pyx_n_s_length, __pyx_k_length, sizeof(__pyx_k_length), 0, 0, 1, 1}, + {&__pyx_n_s_lex, __pyx_k_lex, sizeof(__pyx_k_lex), 0, 0, 1, 1}, + {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, + {&__pyx_kp_u_norm_of_line_142, __pyx_k_norm_of_line_142, sizeof(__pyx_k_norm_of_line_142), 0, 1, 0, 0}, + {&__pyx_n_s_seq, __pyx_k_seq, sizeof(__pyx_k_seq), 0, 0, 1, 1}, + {&__pyx_n_s_shape, __pyx_k_shape, sizeof(__pyx_k_shape), 0, 0, 1, 1}, + {&__pyx_n_s_shape_char, __pyx_k_shape_char, sizeof(__pyx_k_shape_char), 0, 0, 1, 1}, + {&__pyx_kp_u_shape_of_line_155, __pyx_k_shape_of_line_155, sizeof(__pyx_k_shape_of_line_155), 0, 1, 0, 0}, + {&__pyx_n_s_spacy_orthography_latin, __pyx_k_spacy_orthography_latin, sizeof(__pyx_k_spacy_orthography_latin), 0, 0, 1, 1}, + {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, + {&__pyx_n_s_x, __pyx_k_x, sizeof(__pyx_k_x), 0, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0} +}; +static int __Pyx_InitCachedBuiltins(void) { + return 0; +} + +static int __Pyx_InitCachedConstants(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); + + /* "spacy/orthography/latin.pyx":3 + * from spacy.lexeme cimport Lexeme + * + * def get_normalized(unicode lex): # <<<<<<<<<<<<<< + * if lex.isalpha() and lex.islower(): + * return lex + */ + __pyx_tuple__2 = PyTuple_Pack(1, __pyx_n_s_lex); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__2); + __Pyx_GIVEREF(__pyx_tuple__2); + __pyx_codeobj__3 = (PyObject*)__Pyx_PyCode_New(1, 0, 1, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__2, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_matt_repos_spaCy_spacy_or, __pyx_n_s_get_normalized, 3, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + + /* "spacy/orthography/latin.pyx":10 + * + * + * def get_word_shape(unicode lex): # <<<<<<<<<<<<<< + * cdef size_t length = len(lex) + * shape = "" + */ + __pyx_tuple__4 = PyTuple_Pack(7, __pyx_n_s_lex, __pyx_n_s_length, __pyx_n_s_shape, __pyx_n_s_last, __pyx_n_s_shape_char, __pyx_n_s_seq, __pyx_n_s_c); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__4); + __Pyx_GIVEREF(__pyx_tuple__4); + __pyx_codeobj__5 = (PyObject*)__Pyx_PyCode_New(1, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__4, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_matt_repos_spaCy_spacy_or, __pyx_n_s_get_word_shape, 10, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_RefNannyFinishContext(); + return -1; +} + +static int __Pyx_InitGlobals(void) { + if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_int_3 = PyInt_FromLong(3); if (unlikely(!__pyx_int_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + return 0; + __pyx_L1_error:; + return -1; +} + +#if PY_MAJOR_VERSION < 3 +PyMODINIT_FUNC initlatin(void); /*proto*/ +PyMODINIT_FUNC initlatin(void) +#else +PyMODINIT_FUNC PyInit_latin(void); /*proto*/ +PyMODINIT_FUNC PyInit_latin(void) +#endif +{ + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannyDeclarations + #if CYTHON_REFNANNY + __Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); + if (!__Pyx_RefNanny) { + PyErr_Clear(); + __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); + if (!__Pyx_RefNanny) + Py_FatalError("failed to import 'refnanny' module"); + } + #endif + __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_latin(void)", 0); + if ( __Pyx_check_binary_version() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #ifdef __Pyx_CyFunction_USED + if (__Pyx_CyFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + #ifdef __Pyx_FusedFunction_USED + if (__pyx_FusedFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + #ifdef __Pyx_Generator_USED + if (__pyx_Generator_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + /*--- Library function declarations ---*/ + /*--- Threads initialization code ---*/ + #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS + #ifdef WITH_THREAD /* Python build with threading support? */ + PyEval_InitThreads(); + #endif + #endif + /*--- Module creation code ---*/ + #if PY_MAJOR_VERSION < 3 + __pyx_m = Py_InitModule4(__Pyx_NAMESTR("latin"), __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); + #else + __pyx_m = PyModule_Create(&__pyx_moduledef); + #endif + if (unlikely(!__pyx_m)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + Py_INCREF(__pyx_d); + __pyx_b = PyImport_AddModule(__Pyx_NAMESTR(__Pyx_BUILTIN_MODULE_NAME)); if (unlikely(!__pyx_b)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #if CYTHON_COMPILING_IN_PYPY + Py_INCREF(__pyx_b); + #endif + if (__Pyx_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + /*--- Initialize various global constants etc. ---*/ + if (unlikely(__Pyx_InitGlobals() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) + if (__Pyx_init_sys_getdefaultencoding_params() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + if (__pyx_module_is_main_spacy__orthography__latin) { + if (__Pyx_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + } + #if PY_MAJOR_VERSION >= 3 + { + PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!PyDict_GetItemString(modules, "spacy.orthography.latin")) { + if (unlikely(PyDict_SetItemString(modules, "spacy.orthography.latin", __pyx_m) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + } + #endif + /*--- Builtin init code ---*/ + if (unlikely(__Pyx_InitCachedBuiltins() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + /*--- Constants init code ---*/ + if (unlikely(__Pyx_InitCachedConstants() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + /*--- Global init code ---*/ + /*--- Variable export code ---*/ + /*--- Function export code ---*/ + if (__Pyx_ExportFunction("is_alpha", (void (*)(void))__pyx_f_5spacy_11orthography_5latin_is_alpha, "int (__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ExportFunction("is_digit", (void (*)(void))__pyx_f_5spacy_11orthography_5latin_is_digit, "int (__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ExportFunction("is_punct", (void (*)(void))__pyx_f_5spacy_11orthography_5latin_is_punct, "int (__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ExportFunction("is_space", (void (*)(void))__pyx_f_5spacy_11orthography_5latin_is_space, "int (__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ExportFunction("is_lower", (void (*)(void))__pyx_f_5spacy_11orthography_5latin_is_lower, "int (__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ExportFunction("is_upper", (void (*)(void))__pyx_f_5spacy_11orthography_5latin_is_upper, "int (__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ExportFunction("is_title", (void (*)(void))__pyx_f_5spacy_11orthography_5latin_is_title, "int (__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ExportFunction("is_ascii", (void (*)(void))__pyx_f_5spacy_11orthography_5latin_is_ascii, "int (__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ExportFunction("norm_of", (void (*)(void))__pyx_f_5spacy_11orthography_5latin_norm_of, "__pyx_t_5spacy_6lexeme_StringHash (__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ExportFunction("shape_of", (void (*)(void))__pyx_f_5spacy_11orthography_5latin_shape_of, "__pyx_t_5spacy_6lexeme_StringHash (__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ExportFunction("last3_of", (void (*)(void))__pyx_f_5spacy_11orthography_5latin_last3_of, "__pyx_t_5spacy_6lexeme_StringHash (__pyx_t_5spacy_6lexeme_LexID, int __pyx_skip_dispatch)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + /*--- Type init code ---*/ + /*--- Type import code ---*/ + /*--- Variable import code ---*/ + /*--- Function import code ---*/ + /*--- Execution code ---*/ + + /* "spacy/orthography/latin.pyx":3 + * from spacy.lexeme cimport Lexeme + * + * def get_normalized(unicode lex): # <<<<<<<<<<<<<< + * if lex.isalpha() and lex.islower(): + * return lex + */ + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5spacy_11orthography_5latin_1get_normalized, NULL, __pyx_n_s_spacy_orthography_latin); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_get_normalized, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "spacy/orthography/latin.pyx":10 + * + * + * def get_word_shape(unicode lex): # <<<<<<<<<<<<<< + * cdef size_t length = len(lex) + * shape = "" + */ + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5spacy_11orthography_5latin_3get_word_shape, NULL, __pyx_n_s_spacy_orthography_latin); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_get_word_shape, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "spacy/orthography/latin.pyx":1 + * from spacy.lexeme cimport Lexeme # <<<<<<<<<<<<<< + * + * def get_normalized(unicode lex): + */ + __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_t_1, __pyx_kp_u_is_alpha_line_37, __pyx_kp_u_Give_the_result_of_unicode_isalp) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, __pyx_kp_u_is_digit_line_50, __pyx_kp_u_Give_the_result_of_unicode_isdig) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, __pyx_kp_u_is_punct_line_63, __pyx_kp_u_Give_the_result_of_checking_whet) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, __pyx_kp_u_is_space_line_77, __pyx_kp_u_Give_the_result_of_unicode_isspa) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, __pyx_kp_u_is_lower_line_90, __pyx_kp_u_Give_the_result_of_unicode_islow) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, __pyx_kp_u_is_upper_line_103, __pyx_kp_u_Give_the_result_of_unicode_isupp) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, __pyx_kp_u_is_title_line_116, __pyx_kp_u_Give_the_result_of_unicode_istit) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, __pyx_kp_u_is_ascii_line_129, __pyx_kp_u_Give_the_result_of_checking_whet_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, __pyx_kp_u_norm_of_line_142, __pyx_kp_u_Return_the_hash_of_a_normalized) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, __pyx_kp_u_shape_of_line_155, __pyx_kp_u_Return_the_hash_of_the_string_sh) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, __pyx_kp_u_last3_of_line_169, __pyx_kp_u_Access_the_last3_field_of_the_Le) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + if (__pyx_m) { + __Pyx_AddTraceback("init spacy.orthography.latin", __pyx_clineno, __pyx_lineno, __pyx_filename); + Py_DECREF(__pyx_m); __pyx_m = 0; + } else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ImportError, "init spacy.orthography.latin"); + } + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + #if PY_MAJOR_VERSION < 3 + return; + #else + return __pyx_m; + #endif +} + +/* Runtime support code */ +#if CYTHON_REFNANNY +static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { + PyObject *m = NULL, *p = NULL; + void *r = NULL; + m = PyImport_ImportModule((char *)modname); + if (!m) goto end; + p = PyObject_GetAttrString(m, (char *)"RefNannyAPI"); + if (!p) goto end; + r = PyLong_AsVoidPtr(p); +end: + Py_XDECREF(p); + Py_XDECREF(m); + return (__Pyx_RefNannyAPIStruct *)r; +} +#endif /* CYTHON_REFNANNY */ + +static void __Pyx_RaiseArgumentTypeInvalid(const char* name, PyObject *obj, PyTypeObject *type) { + PyErr_Format(PyExc_TypeError, + "Argument '%.200s' has incorrect type (expected %.200s, got %.200s)", + name, type->tp_name, Py_TYPE(obj)->tp_name); +} +static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, + const char *name, int exact) +{ + if (unlikely(!type)) { + PyErr_SetString(PyExc_SystemError, "Missing type object"); + return 0; + } + if (none_allowed && obj == Py_None) return 1; + else if (exact) { + if (likely(Py_TYPE(obj) == type)) return 1; + #if PY_MAJOR_VERSION == 2 + else if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; + #endif + } + else { + if (likely(PyObject_TypeCheck(obj, type))) return 1; + } + __Pyx_RaiseArgumentTypeInvalid(name, obj, type); + return 0; +} + +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *result; + ternaryfunc call = func->ob_type->tp_call; + if (unlikely(!call)) + return PyObject_Call(func, arg, kw); +#if PY_VERSION_HEX >= 0x02060000 + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; +#endif + result = (*call)(func, arg, kw); +#if PY_VERSION_HEX >= 0x02060000 + Py_LeaveRecursiveCall(); +#endif + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +static PyObject *__Pyx_GetBuiltinName(PyObject *name) { + PyObject* result = __Pyx_PyObject_GetAttrStr(__pyx_b, name); + if (unlikely(!result)) { + PyErr_Format(PyExc_NameError, +#if PY_MAJOR_VERSION >= 3 + "name '%U' is not defined", name); +#else + "name '%.200s' is not defined", PyString_AS_STRING(name)); +#endif + } + return result; +} + +static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name) { + PyObject *result; +#if CYTHON_COMPILING_IN_CPYTHON + result = PyDict_GetItem(__pyx_d, name); + if (result) { + Py_INCREF(result); + } else { +#else + result = PyObject_GetItem(__pyx_d, name); + if (!result) { + PyErr_Clear(); +#endif + result = __Pyx_GetBuiltinName(name); + } + return result; +} + +static CYTHON_INLINE int __Pyx_init_unicode_iteration( + PyObject* ustring, Py_ssize_t *length, void** data, int *kind) { +#if CYTHON_PEP393_ENABLED + if (unlikely(__Pyx_PyUnicode_READY(ustring) < 0)) return -1; + *kind = PyUnicode_KIND(ustring); + *length = PyUnicode_GET_LENGTH(ustring); + *data = PyUnicode_DATA(ustring); +#else + *kind = 0; + *length = PyUnicode_GET_SIZE(ustring); + *data = (void*)PyUnicode_AS_UNICODE(ustring); +#endif + return 0; +} + +#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func) \ + { \ + func_type value = func(x); \ + if (sizeof(target_type) < sizeof(func_type)) { \ + if (unlikely(value != (func_type) (target_type) value)) { \ + func_type zero = 0; \ + PyErr_SetString(PyExc_OverflowError, \ + (is_unsigned && unlikely(value < zero)) ? \ + "can't convert negative value to " #target_type : \ + "value too large to convert to " #target_type); \ + return (target_type) -1; \ + } \ + } \ + return (target_type) value; \ + } + +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + #include "longintrepr.h" + #endif +#endif +static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *x) { + const size_t neg_one = (size_t) -1, const_zero = 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(size_t) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(size_t, long, PyInt_AS_LONG) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to size_t"); + return (size_t) -1; + } + return (size_t) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + if (sizeof(digit) <= sizeof(size_t)) { + switch (Py_SIZE(x)) { + case 0: return 0; + case 1: return (size_t) ((PyLongObject*)x)->ob_digit[0]; + } + } + #endif +#endif + if (unlikely(Py_SIZE(x) < 0)) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to size_t"); + return (size_t) -1; + } + if (sizeof(size_t) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long, PyLong_AsUnsignedLong) + } else if (sizeof(size_t) <= sizeof(unsigned long long)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long long, PyLong_AsUnsignedLongLong) + } + } else { +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + if (sizeof(digit) <= sizeof(size_t)) { + switch (Py_SIZE(x)) { + case 0: return 0; + case 1: return +(size_t) ((PyLongObject*)x)->ob_digit[0]; + case -1: return -(size_t) ((PyLongObject*)x)->ob_digit[0]; + } + } + #endif +#endif + if (sizeof(size_t) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT(size_t, long, PyLong_AsLong) + } else if (sizeof(size_t) <= sizeof(long long)) { + __PYX_VERIFY_RETURN_INT(size_t, long long, PyLong_AsLongLong) + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + size_t val; + PyObject *v = __Pyx_PyNumber_Int(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (size_t) -1; + } + } else { + size_t val; + PyObject *tmp = __Pyx_PyNumber_Int(x); + if (!tmp) return (size_t) -1; + val = __Pyx_PyInt_As_size_t(tmp); + Py_DECREF(tmp); + return val; + } +} + +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_uint32_t(uint32_t value) { + const uint32_t neg_one = (uint32_t) -1, const_zero = 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(uint32_t) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(uint32_t) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); + } else if (sizeof(uint32_t) <= sizeof(unsigned long long)) { + return PyLong_FromUnsignedLongLong((unsigned long long) value); + } + } else { + if (sizeof(uint32_t) <= sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(uint32_t) <= sizeof(long long)) { + return PyLong_FromLongLong((long long) value); + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(uint32_t), + little, !is_unsigned); + } +} + +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { + const long neg_one = (long) -1, const_zero = 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(long) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(long) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); + } else if (sizeof(long) <= sizeof(unsigned long long)) { + return PyLong_FromUnsignedLongLong((unsigned long long) value); + } + } else { + if (sizeof(long) <= sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(long) <= sizeof(long long)) { + return PyLong_FromLongLong((long long) value); + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(long), + little, !is_unsigned); + } +} + +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + #include "longintrepr.h" + #endif +#endif +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { + const long neg_one = (long) -1, const_zero = 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(long) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to long"); + return (long) -1; + } + return (long) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + if (sizeof(digit) <= sizeof(long)) { + switch (Py_SIZE(x)) { + case 0: return 0; + case 1: return (long) ((PyLongObject*)x)->ob_digit[0]; + } + } + #endif +#endif + if (unlikely(Py_SIZE(x) < 0)) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to long"); + return (long) -1; + } + if (sizeof(long) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, PyLong_AsUnsignedLong) + } else if (sizeof(long) <= sizeof(unsigned long long)) { + __PYX_VERIFY_RETURN_INT(long, unsigned long long, PyLong_AsUnsignedLongLong) + } + } else { +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + if (sizeof(digit) <= sizeof(long)) { + switch (Py_SIZE(x)) { + case 0: return 0; + case 1: return +(long) ((PyLongObject*)x)->ob_digit[0]; + case -1: return -(long) ((PyLongObject*)x)->ob_digit[0]; + } + } + #endif +#endif + if (sizeof(long) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT(long, long, PyLong_AsLong) + } else if (sizeof(long) <= sizeof(long long)) { + __PYX_VERIFY_RETURN_INT(long, long long, PyLong_AsLongLong) + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + long val; + PyObject *v = __Pyx_PyNumber_Int(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (long) -1; + } + } else { + long val; + PyObject *tmp = __Pyx_PyNumber_Int(x); + if (!tmp) return (long) -1; + val = __Pyx_PyInt_As_long(tmp); + Py_DECREF(tmp); + return val; + } +} + +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + #include "longintrepr.h" + #endif +#endif +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { + const int neg_one = (int) -1, const_zero = 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(int) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to int"); + return (int) -1; + } + return (int) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + if (sizeof(digit) <= sizeof(int)) { + switch (Py_SIZE(x)) { + case 0: return 0; + case 1: return (int) ((PyLongObject*)x)->ob_digit[0]; + } + } + #endif +#endif + if (unlikely(Py_SIZE(x) < 0)) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to int"); + return (int) -1; + } + if (sizeof(int) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, PyLong_AsUnsignedLong) + } else if (sizeof(int) <= sizeof(unsigned long long)) { + __PYX_VERIFY_RETURN_INT(int, unsigned long long, PyLong_AsUnsignedLongLong) + } + } else { +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + if (sizeof(digit) <= sizeof(int)) { + switch (Py_SIZE(x)) { + case 0: return 0; + case 1: return +(int) ((PyLongObject*)x)->ob_digit[0]; + case -1: return -(int) ((PyLongObject*)x)->ob_digit[0]; + } + } + #endif +#endif + if (sizeof(int) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT(int, long, PyLong_AsLong) + } else if (sizeof(int) <= sizeof(long long)) { + __PYX_VERIFY_RETURN_INT(int, long long, PyLong_AsLongLong) + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + int val; + PyObject *v = __Pyx_PyNumber_Int(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (int) -1; + } + } else { + int val; + PyObject *tmp = __Pyx_PyNumber_Int(x); + if (!tmp) return (int) -1; + val = __Pyx_PyInt_As_int(tmp); + Py_DECREF(tmp); + return val; + } +} + +static int __Pyx_check_binary_version(void) { + char ctversion[4], rtversion[4]; + PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION); + PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion()); + if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) { + char message[200]; + PyOS_snprintf(message, sizeof(message), + "compiletime version %s of module '%.100s' " + "does not match runtime version %s", + ctversion, __Pyx_MODULE_NAME, rtversion); + #if PY_VERSION_HEX < 0x02050000 + return PyErr_Warn(NULL, message); + #else + return PyErr_WarnEx(NULL, message, 1); + #endif + } + return 0; +} + +static int __Pyx_ExportFunction(const char *name, void (*f)(void), const char *sig) { + PyObject *d = 0; + PyObject *cobj = 0; + union { + void (*fp)(void); + void *p; + } tmp; + d = PyObject_GetAttrString(__pyx_m, (char *)"__pyx_capi__"); + if (!d) { + PyErr_Clear(); + d = PyDict_New(); + if (!d) + goto bad; + Py_INCREF(d); + if (PyModule_AddObject(__pyx_m, (char *)"__pyx_capi__", d) < 0) + goto bad; + } + tmp.fp = f; +#if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION==3&&PY_MINOR_VERSION==0) + cobj = PyCapsule_New(tmp.p, sig, 0); +#else + cobj = PyCObject_FromVoidPtrAndDesc(tmp.p, (void *)sig, 0); +#endif + if (!cobj) + goto bad; + if (PyDict_SetItemString(d, name, cobj) < 0) + goto bad; + Py_DECREF(cobj); + Py_DECREF(d); + return 0; +bad: + Py_XDECREF(cobj); + Py_XDECREF(d); + return -1; +} + +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { + int start = 0, mid = 0, end = count - 1; + if (end >= 0 && code_line > entries[end].code_line) { + return count; + } + while (start < end) { + mid = (start + end) / 2; + if (code_line < entries[mid].code_line) { + end = mid; + } else if (code_line > entries[mid].code_line) { + start = mid + 1; + } else { + return mid; + } + } + if (code_line <= entries[mid].code_line) { + return mid; + } else { + return mid + 1; + } +} +static PyCodeObject *__pyx_find_code_object(int code_line) { + PyCodeObject* code_object; + int pos; + if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { + return NULL; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { + return NULL; + } + code_object = __pyx_code_cache.entries[pos].code_object; + Py_INCREF(code_object); + return code_object; +} +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { + int pos, i; + __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; + if (unlikely(!code_line)) { + return; + } + if (unlikely(!entries)) { + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); + if (likely(entries)) { + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = 64; + __pyx_code_cache.count = 1; + entries[0].code_line = code_line; + entries[0].code_object = code_object; + Py_INCREF(code_object); + } + return; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { + PyCodeObject* tmp = entries[pos].code_object; + entries[pos].code_object = code_object; + Py_DECREF(tmp); + return; + } + if (__pyx_code_cache.count == __pyx_code_cache.max_count) { + int new_max = __pyx_code_cache.max_count + 64; + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( + __pyx_code_cache.entries, (size_t)new_max*sizeof(__Pyx_CodeObjectCacheEntry)); + if (unlikely(!entries)) { + return; + } + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = new_max; + } + for (i=__pyx_code_cache.count; i>pos; i--) { + entries[i] = entries[i-1]; + } + entries[pos].code_line = code_line; + entries[pos].code_object = code_object; + __pyx_code_cache.count++; + Py_INCREF(code_object); +} + +#include "compile.h" +#include "frameobject.h" +#include "traceback.h" +static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( + const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyObject *py_srcfile = 0; + PyObject *py_funcname = 0; + #if PY_MAJOR_VERSION < 3 + py_srcfile = PyString_FromString(filename); + #else + py_srcfile = PyUnicode_FromString(filename); + #endif + if (!py_srcfile) goto bad; + if (c_line) { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + #else + py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + #endif + } + else { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromString(funcname); + #else + py_funcname = PyUnicode_FromString(funcname); + #endif + } + if (!py_funcname) goto bad; + py_code = __Pyx_PyCode_New( + 0, /*int argcount,*/ + 0, /*int kwonlyargcount,*/ + 0, /*int nlocals,*/ + 0, /*int stacksize,*/ + 0, /*int flags,*/ + __pyx_empty_bytes, /*PyObject *code,*/ + __pyx_empty_tuple, /*PyObject *consts,*/ + __pyx_empty_tuple, /*PyObject *names,*/ + __pyx_empty_tuple, /*PyObject *varnames,*/ + __pyx_empty_tuple, /*PyObject *freevars,*/ + __pyx_empty_tuple, /*PyObject *cellvars,*/ + py_srcfile, /*PyObject *filename,*/ + py_funcname, /*PyObject *name,*/ + py_line, /*int firstlineno,*/ + __pyx_empty_bytes /*PyObject *lnotab*/ + ); + Py_DECREF(py_srcfile); + Py_DECREF(py_funcname); + return py_code; +bad: + Py_XDECREF(py_srcfile); + Py_XDECREF(py_funcname); + return NULL; +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyObject *py_globals = 0; + PyFrameObject *py_frame = 0; + py_code = __pyx_find_code_object(c_line ? c_line : py_line); + if (!py_code) { + py_code = __Pyx_CreateCodeObjectForTraceback( + funcname, c_line, py_line, filename); + if (!py_code) goto bad; + __pyx_insert_code_object(c_line ? c_line : py_line, py_code); + } + py_globals = PyModule_GetDict(__pyx_m); + if (!py_globals) goto bad; + py_frame = PyFrame_New( + PyThreadState_GET(), /*PyThreadState *tstate,*/ + py_code, /*PyCodeObject *code,*/ + py_globals, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ + ); + if (!py_frame) goto bad; + py_frame->f_lineno = py_line; + PyTraceBack_Here(py_frame); +bad: + Py_XDECREF(py_code); + Py_XDECREF(py_frame); +} + +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { + while (t->p) { + #if PY_MAJOR_VERSION < 3 + if (t->is_unicode) { + *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); + } else if (t->intern) { + *t->p = PyString_InternFromString(t->s); + } else { + *t->p = PyString_FromStringAndSize(t->s, t->n - 1); + } + #else /* Python 3+ has unicode identifiers */ + if (t->is_unicode | t->is_str) { + if (t->intern) { + *t->p = PyUnicode_InternFromString(t->s); + } else if (t->encoding) { + *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL); + } else { + *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1); + } + } else { + *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1); + } + #endif + if (!*t->p) + return -1; + ++t; + } + return 0; +} + +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { + return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str)); +} +static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) { + Py_ssize_t ignore; + return __Pyx_PyObject_AsStringAndSize(o, &ignore); +} +static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT + if ( +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + __Pyx_sys_getdefaultencoding_not_ascii && +#endif + PyUnicode_Check(o)) { +#if PY_VERSION_HEX < 0x03030000 + char* defenc_c; + PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); + if (!defenc) return NULL; + defenc_c = PyBytes_AS_STRING(defenc); +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + { + char* end = defenc_c + PyBytes_GET_SIZE(defenc); + char* c; + for (c = defenc_c; c < end; c++) { + if ((unsigned char) (*c) >= 128) { + PyUnicode_AsASCIIString(o); + return NULL; + } + } + } +#endif /*__PYX_DEFAULT_STRING_ENCODING_IS_ASCII*/ + *length = PyBytes_GET_SIZE(defenc); + return defenc_c; +#else /* PY_VERSION_HEX < 0x03030000 */ + if (PyUnicode_READY(o) == -1) return NULL; +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + if (PyUnicode_IS_ASCII(o)) { + *length = PyUnicode_GET_LENGTH(o); + return PyUnicode_AsUTF8(o); + } else { + PyUnicode_AsASCIIString(o); + return NULL; + } +#else /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */ + return PyUnicode_AsUTF8AndSize(o, length); +#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */ +#endif /* PY_VERSION_HEX < 0x03030000 */ + } else +#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT */ +#if !CYTHON_COMPILING_IN_PYPY +#if PY_VERSION_HEX >= 0x02060000 + if (PyByteArray_Check(o)) { + *length = PyByteArray_GET_SIZE(o); + return PyByteArray_AS_STRING(o); + } else +#endif +#endif + { + char* result; + int r = PyBytes_AsStringAndSize(o, &result, length); + if (unlikely(r < 0)) { + return NULL; + } else { + return result; + } + } +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { + int is_true = x == Py_True; + if (is_true | (x == Py_False) | (x == Py_None)) return is_true; + else return PyObject_IsTrue(x); +} +static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) { + PyNumberMethods *m; + const char *name = NULL; + PyObject *res = NULL; +#if PY_MAJOR_VERSION < 3 + if (PyInt_Check(x) || PyLong_Check(x)) +#else + if (PyLong_Check(x)) +#endif + return Py_INCREF(x), x; + m = Py_TYPE(x)->tp_as_number; +#if PY_MAJOR_VERSION < 3 + if (m && m->nb_int) { + name = "int"; + res = PyNumber_Int(x); + } + else if (m && m->nb_long) { + name = "long"; + res = PyNumber_Long(x); + } +#else + if (m && m->nb_int) { + name = "int"; + res = PyNumber_Long(x); + } +#endif + if (res) { +#if PY_MAJOR_VERSION < 3 + if (!PyInt_Check(res) && !PyLong_Check(res)) { +#else + if (!PyLong_Check(res)) { +#endif + PyErr_Format(PyExc_TypeError, + "__%.4s__ returned non-%.4s (type %.200s)", + name, name, Py_TYPE(res)->tp_name); + Py_DECREF(res); + return NULL; + } + } + else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "an integer is required"); + } + return res; +} +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + #include "longintrepr.h" + #endif +#endif +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { + Py_ssize_t ival; + PyObject *x; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_CheckExact(b))) + return PyInt_AS_LONG(b); +#endif + if (likely(PyLong_CheckExact(b))) { + #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + switch (Py_SIZE(b)) { + case -1: return -(sdigit)((PyLongObject*)b)->ob_digit[0]; + case 0: return 0; + case 1: return ((PyLongObject*)b)->ob_digit[0]; + } + #endif + #endif + #if PY_VERSION_HEX < 0x02060000 + return PyInt_AsSsize_t(b); + #else + return PyLong_AsSsize_t(b); + #endif + } + x = PyNumber_Index(b); + if (!x) return -1; + ival = PyInt_AsSsize_t(x); + Py_DECREF(x); + return ival; +} +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { +#if PY_VERSION_HEX < 0x02050000 + if (ival <= LONG_MAX) + return PyInt_FromLong((long)ival); + else { + unsigned char *bytes = (unsigned char *) &ival; + int one = 1; int little = (int)*(unsigned char*)&one; + return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0); + } +#else + return PyInt_FromSize_t(ival); +#endif +} + + +#endif /* Py_PYTHON_H */ diff --git a/spacy/orthography/latin.pxd b/spacy/orthography/latin.pxd new file mode 100644 index 000000000..9c3e853ec --- /dev/null +++ b/spacy/orthography/latin.pxd @@ -0,0 +1,33 @@ +cdef enum OrthFlag: + IS_ALPHA + IS_DIGIT + IS_PUNCT + IS_SPACE + IS_LOWER + IS_UPPER + IS_TITLE + IS_ASCII + + +cdef enum: + LEX + LAST3 + NORM + SHAPE + +from spacy.lexeme cimport LexID +from spacy.lexeme cimport StringHash + +cpdef bint is_alpha(LexID lex_id) except * +cpdef bint is_digit(LexID lex_id) except * +cpdef bint is_punct(LexID lex_id) except * +cpdef bint is_space(LexID lex_id) except * +cpdef bint is_lower(LexID lex_id) except * +cpdef bint is_upper(LexID lex_id) except * +cpdef bint is_title(LexID lex_id) except * +cpdef bint is_ascii(LexID lex_id) except * + + +cpdef StringHash norm_of(LexID lex_id) except 0 +cpdef StringHash shape_of(LexID lex_id) except 0 +cpdef StringHash last3_of(LexID lex_id) except 0 diff --git a/spacy/orthography/latin.pyx b/spacy/orthography/latin.pyx new file mode 100644 index 000000000..684eb4c2e --- /dev/null +++ b/spacy/orthography/latin.pyx @@ -0,0 +1,177 @@ +# cython: embedsignature=True +from spacy.lexeme cimport Lexeme + +def get_normalized(unicode lex): + if lex.isalpha() and lex.islower(): + return lex + else: + return get_word_shape(lex) + + +def get_word_shape(unicode lex): + cdef size_t length = len(lex) + shape = "" + last = "" + shape_char = "" + seq = 0 + for c in lex: + if c.isalpha(): + if c.isupper(): + shape_char = "X" + else: + shape_char = "x" + elif c.isdigit(): + shape_char = "d" + else: + shape_char = c + if shape_char == last: + seq += 1 + else: + seq = 0 + last = shape_char + if seq < 3: + shape += shape_char + assert shape + return shape + + +cpdef bint is_alpha(LexID lex_id) except *: + """Give the result of unicode.isalpha() for a Lexeme ID. + + >>> is_alpha(lookup(u'Hello')) + True + >>> is_alpha(lookup(u'العرب')) + True + >>> is_alpha(lookup(u'10')) + False + """ + return (lex_id).orth_flags & 1 << IS_ALPHA + + +cpdef bint is_digit(LexID lex_id) except *: + """Give the result of unicode.isdigit() for a Lexeme ID. + + >>> is_digit(lookup(u'10')) + True + >>> is_digit(lookup(u'๐')) + True + >>> is_digit(lookup(u'one')) + False + """ + return (lex_id).orth_flags & 1 << IS_DIGIT + + +cpdef bint is_punct(LexID lex_id) except *: + """Give the result of checking whether all characters belong to a punctuation + unicode data category for a Lexeme ID. + + >>> is_punct(lookup(u'.')) + True + >>> is_punct(lookup(u'⁒')) + True + >>> is_punct(lookup(u' ')) + False + """ + return (lex_id).orth_flags & 1 << IS_PUNCT + + +cpdef bint is_space(LexID lex_id) except *: + """Give the result of unicode.isspace() for a Lexeme ID. + + >>> is_space(lookup(u'\t')) + True + >>> is_space(lookup(u'')) + True + >>> is_space(lookup(u'Hi\n')) + False + """ + return (lex_id).orth_flags & 1 << IS_SPACE + + +cpdef bint is_lower(LexID lex_id) except *: + """Give the result of unicode.islower() for a Lexeme ID. + + >>> is_lower(lookup(u'hi')) + True + >>> is_lower(lookup()) + True + >>> is_lower(lookup(u'10')) + False + """ + return (lex_id).orth_flags & 1 << IS_LOWER + + +cpdef bint is_upper(LexID lex_id) except *: + """Give the result of unicode.isupper() for a Lexeme ID. + + >>> is_upper(lookup(u'HI')) + True + >>> is_upper(lookup(u'H10')) + True + >>> is_upper(lookup(u'10')) + False + """ + return (lex_id).orth_flags & 1 << IS_UPPER + + +cpdef bint is_title(LexID lex_id) except *: + """Give the result of unicode.istitle() for a Lexeme ID. + + >>> is_title(lookup(u'Hi')) + True + >>> is_title(lookup(u'Hi1')) + True + >>> is_title(lookup(u'1')) + False + """ + return (lex_id).orth_flags & 1 << IS_TITLE + + +cpdef bint is_ascii(LexID lex_id) except *: + """Give the result of checking whether all characters in the string are ascii. + + >>> is_ascii(lookup(u'Hi')) + True + >>> is_ascii(lookup(u' ')) + True + >>> is_title(lookup(u'')) + False + """ + return (lex_id).orth_flags & 1 << IS_ASCII + + +cpdef StringHash norm_of(LexID lex_id) except 0: + """Return the hash of a normalized version of the string. + + >>> unhash(norm_of(lookupu'Hi')) + u'hi' + >>> unhash(norm_of(lookup(u'255667'))) + u'shape=dddd' + >>> unhash(norm_of(lookup(u'...'))) + u'...' + """ + return (lex_id).string_views[NORM] + + +cpdef StringHash shape_of(LexID lex_id) except 0: + """Return the hash of the string shape. + + >>> unhash(shape_of(lookupu'Hi')) + u'Xx' + >>> unhash(shape_of(lookup(u'255667'))) + u'dddd' + >>> unhash(shape_of(lookup(u'...'))) + u'...' + """ + cdef Lexeme* w = lex_id + return w.string_views[SHAPE] + + +cpdef StringHash last3_of(LexID lex_id) except 0: + '''Access the `last3' field of the Lexeme pointed to by lex_id, which stores + the hash of the last three characters of the word: + >>> lex_ids = [lookup(w) for w in (u'Hello', u'!')] + >>> [unhash(last3_of(lex_id)) for lex_id in lex_ids] + [u'llo', u'!'] + ''' + return (lex_id).string_views[LAST3]