psycopg2/psycopg/typecast_array.c

296 lines
8.6 KiB
C
Raw Normal View History

2005-03-22 17:20:20 +03:00
/* typecast_array.c - array typecasters
*
* Copyright (C) 2005-2010 Federico Di Gregorio <fog@debian.org>
2005-03-22 17:20:20 +03:00
*
* This file is part of psycopg.
2005-03-22 17:20:20 +03:00
*
* psycopg2 is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
2005-03-22 17:20:20 +03:00
*
* In addition, as a special exception, the copyright holders give
* permission to link this program with the OpenSSL library (or with
* modified versions of OpenSSL that use the same license as OpenSSL),
* and distribute linked combinations including the two.
2005-03-22 17:20:20 +03:00
*
* You must obey the GNU Lesser General Public License in all respects for
* all of the code used other than OpenSSL.
*
* psycopg2 is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
2005-03-22 17:20:20 +03:00
*/
2005-03-23 13:32:30 +03:00
#define MAX_DIMENSIONS 16
/** typecast_array_cleanup - remove the horrible [...]= stuff **/
static int
typecast_array_cleanup(const char **str, Py_ssize_t *len)
{
2007-04-13 05:16:22 +04:00
Py_ssize_t i, depth = 1;
if ((*str)[0] != '[') return -1;
for (i=1 ; depth > 0 && i < *len ; i++) {
if ((*str)[i] == '[')
depth += 1;
else if ((*str)[i] == ']')
depth -= 1;
}
if ((*str)[i] != '=') return -1;
*str = &((*str)[i+1]);
2006-06-11 09:09:59 +04:00
*len = *len - i - 1;
return 0;
}
2005-03-22 17:20:20 +03:00
/** typecast_array_scan - scan a string looking for array items **/
2005-03-23 13:32:30 +03:00
#define ASCAN_ERROR -1
#define ASCAN_EOF 0
#define ASCAN_BEGIN 1
#define ASCAN_END 2
#define ASCAN_TOKEN 3
#define ASCAN_QUOTED 4
static int
typecast_array_tokenize(const char *str, Py_ssize_t strlength,
2007-04-13 05:16:22 +04:00
Py_ssize_t *pos, char** token,
Py_ssize_t *length, int *quotes)
{
2005-03-23 20:17:48 +03:00
/* FORTRAN glory */
2007-04-13 05:16:22 +04:00
Py_ssize_t i, l;
int q, b, res;
2007-04-13 05:16:22 +04:00
Dprintf("typecast_array_tokenize: '%s', "
FORMAT_CODE_PY_SSIZE_T "/" FORMAT_CODE_PY_SSIZE_T,
2005-03-23 13:32:30 +03:00
&str[*pos], *pos, strlength);
2005-03-23 20:17:48 +03:00
/* we always get called with pos pointing at the start of a token, so a
fast check is enough for ASCAN_EOF, ASCAN_BEGIN and ASCAN_END */
if (*pos == strlength) {
return ASCAN_EOF;
}
else if (str[*pos] == '{') {
*pos += 1;
return ASCAN_BEGIN;
}
else if (str[*pos] == '}') {
*pos += 1;
if (str[*pos] == ',')
*pos += 1;
return ASCAN_END;
}
/* now we start looking for the first unquoted ',' or '}', the only two
tokens that can limit an array element */
q = 0; /* if q is odd we're inside quotes */
b = 0; /* if b is 1 we just encountered a backslash */
res = ASCAN_TOKEN;
for (i = *pos ; i < strlength ; i++) {
switch (str[i]) {
2005-03-23 13:32:30 +03:00
case '"':
2005-03-23 20:17:48 +03:00
if (b == 0)
q += 1;
else
b = 0;
break;
2005-03-23 13:32:30 +03:00
case '\\':
res = ASCAN_QUOTED;
2005-03-23 20:17:48 +03:00
if (b == 0)
b = 1;
2005-03-23 13:32:30 +03:00
else
2005-03-23 20:17:48 +03:00
/* we're backslashing a backslash */
b = 0;
break;
case '}':
case ',':
if (b == 0 && ((q&1) == 0))
2005-03-23 13:32:30 +03:00
goto tokenize;
2005-03-23 20:17:48 +03:00
break;
default:
/* reset the backslash counter */
b = 0;
break;
}
}
2005-03-23 13:32:30 +03:00
tokenize:
2005-03-23 20:17:48 +03:00
/* remove initial quoting character and calculate raw length */
*quotes = 0;
2005-03-23 20:17:48 +03:00
l = i - *pos;
if (str[*pos] == '"') {
*pos += 1;
l -= 2;
2007-04-13 05:16:22 +04:00
*quotes = 1;
2005-03-23 20:17:48 +03:00
}
if (res == ASCAN_QUOTED) {
const char *j, *jj;
2005-10-18 05:29:47 +04:00
char *buffer = PyMem_Malloc(l+1);
if (buffer == NULL) {
PyErr_NoMemory();
return ASCAN_ERROR;
}
2005-03-23 13:32:30 +03:00
*token = buffer;
for (j = str + *pos, jj = j + l; j < jj; ++j) {
if (*j == '\\') { ++j; }
*(buffer++) = *j;
2005-03-23 13:32:30 +03:00
}
2005-03-23 13:32:30 +03:00
*buffer = '\0';
2007-04-13 05:16:22 +04:00
/* The variable that was used to indicate the size of buffer is of type
* Py_ssize_t, so a subsegment of buffer couldn't possibly exceed
* PY_SSIZE_T_MAX: */
*length = (Py_ssize_t) (buffer - *token);
2005-03-23 13:32:30 +03:00
}
else {
*token = (char *)&str[*pos];
2005-03-23 13:32:30 +03:00
*length = l;
}
2005-03-23 20:17:48 +03:00
*pos = i;
2005-03-23 20:17:48 +03:00
/* skip the comma and set position to the start of next token */
if (str[i] == ',') *pos += 1;
2005-03-23 13:32:30 +03:00
return res;
}
2012-03-04 08:38:44 +04:00
RAISES_NEG static int
typecast_array_scan(const char *str, Py_ssize_t strlength,
PyObject *curs, PyObject *base, PyObject *array)
{
int state, quotes = 0;
2007-04-13 05:16:22 +04:00
Py_ssize_t length = 0, pos = 0;
2005-10-18 05:29:47 +04:00
char *token;
2005-03-23 13:32:30 +03:00
PyObject *stack[MAX_DIMENSIONS];
2007-04-13 05:16:22 +04:00
size_t stack_index = 0;
while (1) {
2005-03-23 20:17:48 +03:00
token = NULL;
state = typecast_array_tokenize(str, strlength,
&pos, &token, &length, &quotes);
2007-04-13 05:16:22 +04:00
Dprintf("typecast_array_scan: state = %d,"
" length = " FORMAT_CODE_PY_SSIZE_T ", token = '%s'",
2005-03-23 20:17:48 +03:00
state, length, token);
if (state == ASCAN_TOKEN || state == ASCAN_QUOTED) {
PyObject *obj;
2007-04-13 05:16:22 +04:00
if (!quotes && length == 4
&& (token[0] == 'n' || token[0] == 'N')
&& (token[1] == 'u' || token[1] == 'U')
&& (token[2] == 'l' || token[2] == 'L')
&& (token[3] == 'l' || token[3] == 'L'))
{
obj = typecast_cast(base, NULL, 0, curs);
} else {
obj = typecast_cast(base, token, length, curs);
2007-04-13 05:16:22 +04:00
}
2005-03-23 13:32:30 +03:00
/* before anything else we free the memory */
if (state == ASCAN_QUOTED) PyMem_Free(token);
2012-03-04 08:38:44 +04:00
if (obj == NULL) return -1;
2005-03-23 13:32:30 +03:00
PyList_Append(array, obj);
Py_DECREF(obj);
}
2005-03-23 20:17:48 +03:00
2005-03-23 13:32:30 +03:00
else if (state == ASCAN_BEGIN) {
PyObject *sub = PyList_New(0);
2012-03-04 08:38:44 +04:00
if (sub == NULL) return -1;
2005-03-23 13:32:30 +03:00
PyList_Append(array, sub);
Py_DECREF(sub);
if (stack_index == MAX_DIMENSIONS) {
PyErr_SetString(DataError, "excessive array dimensions");
2012-03-04 08:38:44 +04:00
return -1;
}
2005-03-23 13:32:30 +03:00
stack[stack_index++] = array;
array = sub;
}
2005-03-23 13:32:30 +03:00
else if (state == ASCAN_ERROR) {
2012-03-04 08:38:44 +04:00
return -1;
2005-03-23 13:32:30 +03:00
}
2005-03-23 20:17:48 +03:00
else if (state == ASCAN_END) {
if (stack_index == 0) {
PyErr_SetString(DataError, "unbalanced braces in array");
2012-03-04 08:38:44 +04:00
return -1;
}
2012-08-17 16:52:59 +04:00
array = stack[--stack_index];
}
2005-03-23 20:17:48 +03:00
else if (state == ASCAN_EOF)
break;
}
2012-03-04 08:38:44 +04:00
return 0;
}
2005-03-22 17:20:20 +03:00
/** GENERIC - a generic typecaster that can be used when no special actions
have to be taken on the single items **/
2005-03-22 17:20:20 +03:00
static PyObject *
typecast_GENERIC_ARRAY_cast(const char *str, Py_ssize_t len, PyObject *curs)
2005-03-22 17:20:20 +03:00
{
PyObject *obj = NULL;
PyObject *base = ((typecastObject*)((cursorObject*)curs)->caster)->bcast;
2007-04-13 05:16:22 +04:00
Dprintf("typecast_GENERIC_ARRAY_cast: str = '%s',"
" len = " FORMAT_CODE_PY_SSIZE_T, str, len);
2006-06-11 09:09:59 +04:00
2013-04-05 04:25:05 +04:00
if (str == NULL) { Py_RETURN_NONE; }
if (str[0] == '[')
typecast_array_cleanup(&str, &len);
if (str[0] != '{') {
PyErr_SetString(DataError, "array does not start with '{'");
return NULL;
}
if (str[1] == '\0') {
PyErr_SetString(DataError, "malformed array: '{'");
return NULL;
}
2005-03-23 13:32:30 +03:00
2007-04-13 05:16:22 +04:00
Dprintf("typecast_GENERIC_ARRAY_cast: str = '%s',"
" len = " FORMAT_CODE_PY_SSIZE_T, str, len);
if (!(obj = PyList_New(0))) { return NULL; }
2005-03-22 17:20:20 +03:00
/* scan the array skipping the first level of {} */
2012-03-04 08:38:44 +04:00
if (typecast_array_scan(&str[1], len-2, curs, base, obj) < 0) {
Py_CLEAR(obj);
}
2005-03-22 17:20:20 +03:00
return obj;
}
2005-03-23 14:02:13 +03:00
/** almost all the basic array typecasters are derived from GENERIC **/
#define typecast_LONGINTEGERARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_INTEGERARRAY_cast typecast_GENERIC_ARRAY_cast
2005-03-23 14:02:13 +03:00
#define typecast_FLOATARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_DECIMALARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_STRINGARRAY_cast typecast_GENERIC_ARRAY_cast
2005-03-23 14:02:13 +03:00
#define typecast_UNICODEARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_BOOLEANARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_DATETIMEARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_DATEARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_TIMEARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_INTERVALARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_BINARYARRAY_cast typecast_GENERIC_ARRAY_cast
#define typecast_ROWIDARRAY_cast typecast_GENERIC_ARRAY_cast