psycopg2/psycopg/typecast_binary.c

/* typecast_binary.c - binary typecasting functions to python types
 *
 * Copyright (C) 2001-2010 Federico Di Gregorio <fog@debian.org>
 *
 * This file is part of psycopg.
 *
 * psycopg2 is free software: you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * In addition, as a special exception, the copyright holders give
 * permission to link this program with the OpenSSL library (or with
 * modified versions of OpenSSL that use the same license as OpenSSL),
 * and distribute linked combinations including the two.
 *
 * You must obey the GNU Lesser General Public License in all respects for
 * all of the code used other than OpenSSL.
 *
 * psycopg2 is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 * License for more details.
 */

#include "typecast_binary.h"

#include <stdlib.h>


/* Python object holding a memory chunk. The memory is deallocated when
   the object is destroyed. This type is used to let users directly access
   memory chunks holding unescaped binary data through the buffer interface.
 */

static void
chunk_dealloc(chunkObject *self)
{
    Dprintf("chunk_dealloc: deallocating memory at %p, size "
        FORMAT_CODE_PY_SSIZE_T,
        self->base, self->len
      );
    PyMem_Free(self->base);
    Py_TYPE(self)->tp_free((PyObject *)self);
}

static PyObject *
chunk_repr(chunkObject *self)
{
    return PyString_FromFormat(
        "<memory chunk at %p size " FORMAT_CODE_PY_SSIZE_T ">",
        self->base, self->len
      );
}

#if PY_MAJOR_VERSION < 3

static Py_ssize_t
chunk_getreadbuffer(chunkObject *self, Py_ssize_t segment, void **ptr)
{
    if (segment != 0)
    {
        PyErr_SetString(PyExc_SystemError,
                        "accessing non-existant buffer segment");
        return -1;
    }
    *ptr = self->base;
    return self->len;
}

static Py_ssize_t
chunk_getsegcount(chunkObject *self, Py_ssize_t *lenp)
{
    if (lenp != NULL)
        *lenp = self->len;
    return 1;
}

static PyBufferProcs chunk_as_buffer =
{
    (readbufferproc) chunk_getreadbuffer,
    (writebufferproc) NULL,
    (segcountproc) chunk_getsegcount,
    (charbufferproc) NULL
};

#else

/* 3.0 buffer interface */
int chunk_getbuffer(PyObject *_self, Py_buffer *view, int flags)
{
    int rv;
    chunkObject *self = (chunkObject*)_self;
    rv = PyBuffer_FillInfo(view, _self, self->base, self->len, 1, flags);
    if (rv == 0) {
        view->format = "c";
    }
    return rv;
}

static PyBufferProcs chunk_as_buffer =
{
    chunk_getbuffer,
    NULL,
};

#endif

#define chunk_doc "memory chunk"

PyTypeObject chunkType = {
    PyVarObject_HEAD_INIT(NULL, 0)
    "psycopg2._psycopg.chunk",
    sizeof(chunkObject), 0,
    (destructor) chunk_dealloc, /* tp_dealloc*/
    0,                          /* tp_print */
    0,                          /* tp_getattr */
    0,                          /* tp_setattr */
    0,                          /* tp_compare */
    (reprfunc) chunk_repr,      /* tp_repr */
    0,                          /* tp_as_number */
    0,                          /* tp_as_sequence */
    0,                          /* tp_as_mapping */
    0,                          /* tp_hash */
    0,                          /* tp_call */
    0,                          /* tp_str */
    0,                          /* tp_getattro */
    0,                          /* tp_setattro */
    &chunk_as_buffer,           /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /* tp_flags */
    chunk_doc                   /* tp_doc */
};


static char *psycopg_parse_hex(
        const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout);
static char *psycopg_parse_escape(
        const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout);

/* The function is not static and not hidden as we use ctypes to test it. */
PyObject *
typecast_BINARY_cast(const char *s, Py_ssize_t l, PyObject *curs)
{
    chunkObject *chunk = NULL;
    PyObject *res = NULL;
    char *buffer = NULL;
    Py_ssize_t len;

    if (s == NULL) { Py_RETURN_NONE; }

    if (s[0] == '\\' && s[1] == 'x') {
        /* This is a buffer escaped in hex format: libpq before 9.0 can't
         * parse it and we can't detect reliably the libpq version at runtime.
         * So the only robust option is to parse it ourselves - luckily it's
         * an easy format.
         */
        if (NULL == (buffer = psycopg_parse_hex(s, l, &len))) {
            goto exit;
        }
    }
    else {
        /* This is a buffer in the classic bytea format. So we can handle it
         * to the PQunescapeBytea to have it parsed, right? ...Wrong. We
         * could, but then we'd have to record whether buffer was allocated by
         * Python or by the libpq to dispose it properly. Furthermore the
         * PQunescapeBytea interface is not the most brilliant as it wants a
         * null-terminated string even if we have known its length thus
         * requiring a useless memcpy and strlen.
         * So we'll just have our better integrated parser, let's finish this
         * story.
         */
        if (NULL == (buffer = psycopg_parse_escape(s, l, &len))) {
            goto exit;
        }
    }

    chunk = (chunkObject *) PyObject_New(chunkObject, &chunkType);
    if (chunk == NULL) goto exit;

    /* **Transfer** ownership of buffer's memory to the chunkObject: */
    chunk->base = buffer;
    buffer = NULL;
    chunk->len = (Py_ssize_t)len;

#if PY_MAJOR_VERSION < 3
    if ((res = PyBuffer_FromObject((PyObject *)chunk, 0, chunk->len)) == NULL)
        goto exit;
#else
    if ((res = PyMemoryView_FromObject((PyObject*)chunk)) == NULL)
        goto exit;
#endif

exit:
    Py_XDECREF((PyObject *)chunk);
    PyMem_Free(buffer);

    return res;
}


static const char hex_lut[128] = {
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};

/* Parse a bytea output buffer encoded in 'hex' format.
 *
 * the format is described in
 * http://www.postgresql.org/docs/current/static/datatype-binary.html
 *
 * Parse the buffer in 'bufin', whose length is 'sizein'.
 * Return a new buffer allocated by PyMem_Malloc and set 'sizeout' to its size.
 * In case of error set an exception and return NULL.
 */
static char *
psycopg_parse_hex(const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout)
{
    char *ret = NULL;
    const char *bufend = bufin + sizein;
    const char *pi = bufin + 2;     /* past the \x */
    char *bufout;
    char *po;

    po = bufout = PyMem_Malloc((sizein - 2) >> 1);   /* output size upper bound */
    if (NULL == bufout) {
        PyErr_NoMemory();
        goto exit;
    }

    /* Implementation note: we call this function upon database response, not
     * user input (because we are parsing the output format of a buffer) so we
     * don't expect errors. On bad input we reserve the right to return a bad
     * output, not an error.
     */
    while (pi < bufend) {
        char c;
        while (-1 == (c = hex_lut[*pi++ & '\x7f'])) {
            if (pi >= bufend) { goto endloop; }
        }
        *po = c << 4;

        while (-1 == (c = hex_lut[*pi++ & '\x7f'])) {
            if (pi >= bufend) { goto endloop; }
        }
        *po++ |= c;
    }
endloop:

    ret = bufout;
    *sizeout = po - bufout;

exit:
    return ret;
}

/* Parse a bytea output buffer encoded in 'escape' format.
 *
 * the format is described in
 * http://www.postgresql.org/docs/current/static/datatype-binary.html
 *
 * Parse the buffer in 'bufin', whose length is 'sizein'.
 * Return a new buffer allocated by PyMem_Malloc and set 'sizeout' to its size.
 * In case of error set an exception and return NULL.
 */
static char *
psycopg_parse_escape(const char *bufin, Py_ssize_t sizein, Py_ssize_t *sizeout)
{
    char *ret = NULL;
    const char *bufend = bufin + sizein;
    const char *pi = bufin;
    char *bufout;
    char *po;

    po = bufout = PyMem_Malloc(sizein);   /* output size upper bound */
    if (NULL == bufout) {
        PyErr_NoMemory();
        goto exit;
    }

    while (pi < bufend) {
        if (*pi != '\\') {
            /* Unescaped char */
            *po++ = *pi++;
            continue;
        }
        if ((pi[1] >= '0' && pi[1] <= '3') &&
            (pi[2] >= '0' && pi[2] <= '7') &&
            (pi[3] >= '0' && pi[3] <= '7'))
        {
            /* Escaped octal value */
            *po++ = ((pi[1] - '0') << 6) |
                    ((pi[2] - '0') << 3) |
                    ((pi[3] - '0'));
            pi += 4;
        }
        else {
            /* Escaped char */
            *po++ = pi[1];
            pi += 2;
        }
    }

    ret = bufout;
    *sizeout = po - bufout;

exit:
    return ret;
}