mirror of
https://github.com/psycopg/psycopg2.git
synced 2025-01-31 09:24:07 +03:00
Solid tokenization code.
This commit is contained in:
parent
cb9cec57c0
commit
75e7273d85
|
@ -1,3 +1,8 @@
|
||||||
|
2005-03-24 Federico Di Gregorio <fog@debian.org>
|
||||||
|
|
||||||
|
* psycopg/typecast_array.c (typecast_array_tokenize): much better
|
||||||
|
tokenization code.
|
||||||
|
|
||||||
2005-03-23 Federico Di Gregorio <fog@debian.org>
|
2005-03-23 Federico Di Gregorio <fog@debian.org>
|
||||||
|
|
||||||
* psycopg/typecast_basic.c: all the basic casters now respect the
|
* psycopg/typecast_basic.c: all the basic casters now respect the
|
||||||
|
|
|
@ -35,81 +35,98 @@ static int
|
||||||
typecast_array_tokenize(unsigned char *str, int strlength,
|
typecast_array_tokenize(unsigned char *str, int strlength,
|
||||||
int *pos, unsigned char** token, int *length)
|
int *pos, unsigned char** token, int *length)
|
||||||
{
|
{
|
||||||
int i, l, res = ASCAN_TOKEN;
|
/* FORTRAN glory */
|
||||||
int qs = 0; /* 2 = in quotes, 1 = quotes closed */
|
int i, j, q, b, l, res;
|
||||||
|
|
||||||
/* first we check for quotes, used when the content of the item contains
|
Dprintf("typecast_array_tokenize: '%s', %d/%d",
|
||||||
special or quoted characters */
|
&str[*pos], *pos, strlength);
|
||||||
|
|
||||||
if (str[*pos] == '"') {
|
/* we always get called with pos pointing at the start of a token, so a
|
||||||
qs = 2;
|
fast check is enough for ASCAN_EOF, ASCAN_BEGIN and ASCAN_END */
|
||||||
|
if (*pos == strlength) {
|
||||||
|
return ASCAN_EOF;
|
||||||
|
}
|
||||||
|
else if (str[*pos] == '{') {
|
||||||
*pos += 1;
|
*pos += 1;
|
||||||
|
return ASCAN_BEGIN;
|
||||||
|
}
|
||||||
|
else if (str[*pos] == '}') {
|
||||||
|
*pos += 1;
|
||||||
|
if (str[*pos] == ',')
|
||||||
|
*pos += 1;
|
||||||
|
return ASCAN_END;
|
||||||
}
|
}
|
||||||
|
|
||||||
Dprintf("typecast_array_tokenize: '%s'; %d/%d",
|
/* now we start looking for the first unquoted ',' or '}', the only two
|
||||||
&str[*pos], *pos, strlength);
|
tokens that can limit an array element */
|
||||||
|
q = 0; /* if q is odd we're inside quotes */
|
||||||
|
b = 0; /* if b is 1 we just encountered a backslash */
|
||||||
|
res = ASCAN_TOKEN;
|
||||||
|
|
||||||
for (i = *pos ; i < strlength ; i++) {
|
for (i = *pos ; i < strlength ; i++) {
|
||||||
switch (str[i]) {
|
switch (str[i]) {
|
||||||
case '{':
|
|
||||||
*pos = i+1;
|
|
||||||
return ASCAN_BEGIN;
|
|
||||||
|
|
||||||
case '}':
|
|
||||||
/* we tokenize the last item in the array and then return it to
|
|
||||||
the user togheter with the closing bracket marker */
|
|
||||||
res = ASCAN_END;
|
|
||||||
goto tokenize;
|
|
||||||
|
|
||||||
case '"':
|
case '"':
|
||||||
/* this will close the quoting only if the previous character was
|
if (b == 0)
|
||||||
NOT a backslash */
|
q += 1;
|
||||||
if (qs == 2 && str[i-1] != '\\') qs = 1;
|
else
|
||||||
continue;
|
b = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
case '\\':
|
case '\\':
|
||||||
/* something has been quoted, sigh, we'll need a copy buffer */
|
|
||||||
res = ASCAN_QUOTED;
|
res = ASCAN_QUOTED;
|
||||||
continue;
|
if (b == 0)
|
||||||
|
b = 1;
|
||||||
case ',':
|
|
||||||
/* if we're inside quotes we use the comma as a normal char */
|
|
||||||
if (qs == 2)
|
|
||||||
continue;
|
|
||||||
else
|
else
|
||||||
goto tokenize;
|
/* we're backslashing a backslash */
|
||||||
}
|
b = 0;
|
||||||
}
|
break;
|
||||||
|
|
||||||
res = ASCAN_EOF;
|
case '}':
|
||||||
|
case ',':
|
||||||
|
if (b == 0 && ((q&1) == 0))
|
||||||
|
goto tokenize;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
/* reset the backslash counter */
|
||||||
|
b = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
tokenize:
|
tokenize:
|
||||||
l = i - *pos - qs;
|
/* remove initial quoting character and calculate raw length */
|
||||||
|
l = i - *pos;
|
||||||
|
if (str[*pos] == '"') {
|
||||||
|
*pos += 1;
|
||||||
|
l -= 2;
|
||||||
|
}
|
||||||
|
|
||||||
/* if res is ASCAN_QUOTED we need to copy the string to a newly allocated
|
|
||||||
buffer and return it */
|
|
||||||
if (res == ASCAN_QUOTED) {
|
if (res == ASCAN_QUOTED) {
|
||||||
unsigned char *buffer = PyMem_Malloc(l+1);
|
unsigned char *buffer = PyMem_Malloc(l+1);
|
||||||
if (buffer == NULL) return ASCAN_ERROR;
|
if (buffer == NULL) return ASCAN_ERROR;
|
||||||
|
|
||||||
*token = buffer;
|
*token = buffer;
|
||||||
|
|
||||||
for (i = *pos; i < l+*pos; i++) {
|
for (j = *pos; j < *pos+l; j++) {
|
||||||
if (str[i] != '\\')
|
if (str[j] != '\\'
|
||||||
*(buffer++) = str[i];
|
|| (j > *pos && str[j-1] == '\\'))
|
||||||
|
*(buffer++) = str[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
*buffer = '\0';
|
*buffer = '\0';
|
||||||
*length = (int)buffer - (int)*token;
|
*length = (int)buffer - (int)*token;
|
||||||
*pos = i+2;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
*token = &str[*pos];
|
*token = &str[*pos];
|
||||||
*length = l;
|
*length = l;
|
||||||
*pos = i+1;
|
|
||||||
if (res == ASCAN_END && str[*pos] == ',')
|
|
||||||
*pos += 1; /* skip both the bracket and the comma */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
*pos = i;
|
||||||
|
|
||||||
|
/* skip the comma and set position to the start of next token */
|
||||||
|
if (str[i] == ',') *pos += 1;
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -117,19 +134,18 @@ static int
|
||||||
typecast_array_scan(unsigned char *str, int strlength,
|
typecast_array_scan(unsigned char *str, int strlength,
|
||||||
PyObject *curs, PyObject *base, PyObject *array)
|
PyObject *curs, PyObject *base, PyObject *array)
|
||||||
{
|
{
|
||||||
int state, length, bracket = 0, pos = 0;
|
int state, length, pos = 0;
|
||||||
unsigned char *token;
|
unsigned char *token;
|
||||||
|
|
||||||
PyObject *stack[MAX_DIMENSIONS];
|
PyObject *stack[MAX_DIMENSIONS];
|
||||||
int stack_index = 0;
|
int stack_index = 0;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
|
token = NULL;
|
||||||
state = typecast_array_tokenize(str, strlength, &pos, &token, &length);
|
state = typecast_array_tokenize(str, strlength, &pos, &token, &length);
|
||||||
if (state == ASCAN_TOKEN
|
Dprintf("typecast_array_scan: state = %d, length = %d, token = '%s'",
|
||||||
|| state == ASCAN_QUOTED
|
state, length, token);
|
||||||
|| (state == ASCAN_EOF && bracket == 0)
|
if (state == ASCAN_TOKEN || state == ASCAN_QUOTED) {
|
||||||
|| (state == ASCAN_END && bracket == 0)) {
|
|
||||||
|
|
||||||
PyObject *obj = typecast_cast(base, token, length, curs);
|
PyObject *obj = typecast_cast(base, token, length, curs);
|
||||||
|
|
||||||
/* before anything else we free the memory */
|
/* before anything else we free the memory */
|
||||||
|
@ -139,6 +155,7 @@ typecast_array_scan(unsigned char *str, int strlength,
|
||||||
PyList_Append(array, obj);
|
PyList_Append(array, obj);
|
||||||
Py_DECREF(obj);
|
Py_DECREF(obj);
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (state == ASCAN_BEGIN) {
|
else if (state == ASCAN_BEGIN) {
|
||||||
PyObject *sub = PyList_New(0);
|
PyObject *sub = PyList_New(0);
|
||||||
if (sub == NULL) return 0;
|
if (sub == NULL) return 0;
|
||||||
|
@ -152,23 +169,19 @@ typecast_array_scan(unsigned char *str, int strlength,
|
||||||
stack[stack_index++] = array;
|
stack[stack_index++] = array;
|
||||||
array = sub;
|
array = sub;
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (state == ASCAN_ERROR) {
|
else if (state == ASCAN_ERROR) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* reset the closing bracket marker just before cheking for ASCAN_END:
|
else if (state == ASCAN_END) {
|
||||||
this is to make sure we don't mistake two closing brackets for an
|
|
||||||
empty item */
|
|
||||||
bracket = 0;
|
|
||||||
|
|
||||||
if (state == ASCAN_END) {
|
|
||||||
if (--stack_index < 0)
|
if (--stack_index < 0)
|
||||||
return 0;
|
return 0;
|
||||||
array = stack[stack_index];
|
array = stack[stack_index];
|
||||||
bracket = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state == ASCAN_EOF) break;
|
else if (state == ASCAN_EOF)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
@ -16,6 +16,6 @@ print d, '->', d[0], d[1], d[2]
|
||||||
curs.execute("SELECT ARRAY[ARRAY[1,2],ARRAY[3,4]] AS foo")
|
curs.execute("SELECT ARRAY[ARRAY[1,2],ARRAY[3,4]] AS foo")
|
||||||
print curs.fetchone()[0]
|
print curs.fetchone()[0]
|
||||||
|
|
||||||
curs.execute("SELECT ARRAY['20:00:01'::time] AS foo")
|
curs.execute("SELECT ARRAY[ARRAY[now(), now()], ARRAY[now(), now()]] AS foo")
|
||||||
print curs.description
|
print curs.description
|
||||||
print curs.fetchone()[0]
|
print curs.fetchone()[0]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user