From b6a74704540360203b6704df971dce3716dcd3e0 Mon Sep 17 00:00:00 2001 From: Sean Harrington Date: Fri, 29 Jul 2016 10:53:26 -0400 Subject: [PATCH 1/9] Testing functionality of COPY ... FROM ... WITH FORMAT CSV QUOTE AS %s (without paramterizing the function yet...) --- psycopg/cursor_type.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index fe79bbf9..67bfe823 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1343,14 +1343,17 @@ psyco_curs_copy_from(cursorObject *self, PyObject *args, PyObject *kwargs) const char *sep = "\t"; const char *null = "\\N"; + const char *quote = "'"; + const char *command = - "COPY %s%s FROM stdin WITH DELIMITER AS %s NULL AS %s"; + "COPY %s%s FROM stdin WITH DELIMITER AS %s NULL AS %s FORMAT AS CSV QUOTE AS %s"; Py_ssize_t query_size; char *query = NULL; char *columnlist = NULL; char *quoted_delimiter = NULL; char *quoted_null = NULL; + char *quoted_quote = NULL; const char *table_name; Py_ssize_t bufsize = DEFAULT_COPYBUFF; @@ -1382,15 +1385,19 @@ psyco_curs_copy_from(cursorObject *self, PyObject *args, PyObject *kwargs) goto exit; } + if (!(quoted_quote = psycopg_escape_string( + self->conn, quote, 0, NULL, NULL))) { + goto exit; + } query_size = strlen(command) + strlen(table_name) + strlen(columnlist) - + strlen(quoted_delimiter) + strlen(quoted_null) + 1; + + strlen(quoted_delimiter) + strlen(quoted_null) strlen(quoted_quote) + 1; if (!(query = PyMem_New(char, query_size))) { PyErr_NoMemory(); goto exit; } PyOS_snprintf(query, query_size, command, - table_name, columnlist, quoted_delimiter, quoted_null); + table_name, columnlist, quoted_delimiter, quoted_null, quoted_quote); Dprintf("psyco_curs_copy_from: query = %s", query); @@ -1409,6 +1416,7 @@ exit: PyMem_Free(columnlist); PyMem_Free(quoted_delimiter); PyMem_Free(quoted_null); + PyMem_Free(quoted_quote); PyMem_Free(query); return res; From 603e462d32dac87e1ec642e05a47ac930b3fd185 Mon Sep 17 00:00:00 2001 From: Sean Harrington Date: Fri, 29 Jul 2016 11:03:59 -0400 Subject: [PATCH 2/9] Added '+' to fix syntax Error --- psycopg/cursor_type.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index 67bfe823..91b5a72f 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1390,7 +1390,7 @@ psyco_curs_copy_from(cursorObject *self, PyObject *args, PyObject *kwargs) goto exit; } query_size = strlen(command) + strlen(table_name) + strlen(columnlist) - + strlen(quoted_delimiter) + strlen(quoted_null) strlen(quoted_quote) + 1; + + strlen(quoted_delimiter) + strlen(quoted_null) + strlen(quoted_quote) + 1; if (!(query = PyMem_New(char, query_size))) { PyErr_NoMemory(); goto exit; From 2871d09700796771c3e7165867c4964d12e691d8 Mon Sep 17 00:00:00 2001 From: Sean Harrington Date: Fri, 29 Jul 2016 11:11:29 -0400 Subject: [PATCH 3/9] Removed 'FROMAT' from copy_from command --- psycopg/cursor_type.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index 91b5a72f..31023c0f 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1346,7 +1346,7 @@ psyco_curs_copy_from(cursorObject *self, PyObject *args, PyObject *kwargs) const char *quote = "'"; const char *command = - "COPY %s%s FROM stdin WITH DELIMITER AS %s NULL AS %s FORMAT AS CSV QUOTE AS %s"; + "COPY %s%s FROM stdin WITH DELIMITER AS %s NULL AS %s QUOTE AS %s CSV"; Py_ssize_t query_size; char *query = NULL; From ed553cf937e46eaceb7e1e83757a83eeb96d412a Mon Sep 17 00:00:00 2001 From: Sean Harrington Date: Fri, 29 Jul 2016 11:15:53 -0400 Subject: [PATCH 4/9] Changed default 'quote' to be '" " --- psycopg/cursor_type.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index 31023c0f..d5634755 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1343,7 +1343,7 @@ psyco_curs_copy_from(cursorObject *self, PyObject *args, PyObject *kwargs) const char *sep = "\t"; const char *null = "\\N"; - const char *quote = "'"; + const char *quote = '"'; const char *command = "COPY %s%s FROM stdin WITH DELIMITER AS %s NULL AS %s QUOTE AS %s CSV"; From a1c7f4bf5d2797db439f23e69abf1f907211f154 Mon Sep 17 00:00:00 2001 From: Sean Harrington Date: Fri, 29 Jul 2016 11:17:23 -0400 Subject: [PATCH 5/9] Quotes need to be in the form quote_char --- psycopg/cursor_type.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index d5634755..55409799 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1343,7 +1343,7 @@ psyco_curs_copy_from(cursorObject *self, PyObject *args, PyObject *kwargs) const char *sep = "\t"; const char *null = "\\N"; - const char *quote = '"'; + const char *quote = "\""; const char *command = "COPY %s%s FROM stdin WITH DELIMITER AS %s NULL AS %s QUOTE AS %s CSV"; From e61e3d0e7bba5fc3499b6f44061a41005df591c3 Mon Sep 17 00:00:00 2001 From: Sean Harrington Date: Fri, 29 Jul 2016 12:21:36 -0400 Subject: [PATCH 6/9] Added support for CSV and TXT formats --- psycopg/cursor_type.c | 52 +++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index 55409799..b58ec6e9 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1339,14 +1339,13 @@ static PyObject * psyco_curs_copy_from(cursorObject *self, PyObject *args, PyObject *kwargs) { static char *kwlist[] = { - "file", "table", "sep", "null", "size", "columns", NULL}; + "file", "table", "sep", "null", "size", "columns", "quote", "format", NULL}; const char *sep = "\t"; const char *null = "\\N"; const char *quote = "\""; + const char *format = "TXT"; - const char *command = - "COPY %s%s FROM stdin WITH DELIMITER AS %s NULL AS %s QUOTE AS %s CSV"; Py_ssize_t query_size; char *query = NULL; @@ -1356,16 +1355,19 @@ psyco_curs_copy_from(cursorObject *self, PyObject *args, PyObject *kwargs) char *quoted_quote = NULL; const char *table_name; + const char *command; + Py_ssize_t bufsize = DEFAULT_COPYBUFF; PyObject *file, *columns = NULL, *res = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&s|ssnO", kwlist, _psyco_curs_has_read_check, &file, &table_name, &sep, &null, &bufsize, - &columns)) + &columns, "e, &format)) { return NULL; } + EXC_IF_CURS_CLOSED(self); EXC_IF_CURS_ASYNC(self, copy_from); @@ -1384,23 +1386,45 @@ psyco_curs_copy_from(cursorObject *self, PyObject *args, PyObject *kwargs) self->conn, null, 0, NULL, NULL))) { goto exit; } - + if (!(quoted_quote = psycopg_escape_string( self->conn, quote, 0, NULL, NULL))) { goto exit; } - query_size = strlen(command) + strlen(table_name) + strlen(columnlist) - + strlen(quoted_delimiter) + strlen(quoted_null) + strlen(quoted_quote) + 1; - if (!(query = PyMem_New(char, query_size))) { - PyErr_NoMemory(); - goto exit; + + + if(strcmp("TXT", format) == 0){ + // Load by default TXT file type + command = + "COPY %s%s FROM stdin WITH DELIMITER AS %s NULL AS %s"; + query_size = strlen(command) + strlen(table_name) + strlen(columnlist) + + strlen(quoted_delimiter) + strlen(quoted_null) + 1; + + if (!(query = PyMem_New(char, query_size))) { + PyErr_NoMemory(); + goto exit; + } + + PyOS_snprintf(query, query_size, command, + table_name, columnlist, quoted_delimiter, quoted_null); + }else{ + // Load from .CSV + command = + "COPY %s%s FROM stdin WITH DELIMITER AS %s NULL AS %s QUOTE AS %s %s"; + query_size = strlen(command) + strlen(table_name) + strlen(columnlist) + + strlen(quoted_delimiter) + strlen(quoted_null) + strlen(quoted_quote) + + strlen(format) + 1; + + if (!(query = PyMem_New(char, query_size))) { + PyErr_NoMemory(); + goto exit; + } + + PyOS_snprintf(query, query_size, command, + table_name, columnlist, quoted_delimiter, quoted_null, quoted_quote, format); } - PyOS_snprintf(query, query_size, command, - table_name, columnlist, quoted_delimiter, quoted_null, quoted_quote); - Dprintf("psyco_curs_copy_from: query = %s", query); - self->copysize = bufsize; Py_INCREF(file); self->copyfile = file; From 28c4e476228cc9e4148d42604b253cb9cb71091b Mon Sep 17 00:00:00 2001 From: Sean Harrington Date: Fri, 29 Jul 2016 12:39:28 -0400 Subject: [PATCH 7/9] Updated 'define psyco_curs_copy_from_doc --- psycopg/cursor_type.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index b58ec6e9..cc9157a2 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1312,7 +1312,7 @@ exit: /* extension: copy_from - implements COPY FROM */ #define psyco_curs_copy_from_doc \ -"copy_from(file, table, sep='\\t', null='\\\\N', size=8192, columns=None) -- Copy table from file." +"copy_from(file, table, sep='\\t', null='\\\\N', size=8192, columns=None, quote='\"', format='TXT') -- Copy table from file." STEALS(1) static int _psyco_curs_has_read_check(PyObject *o, PyObject **var) From 1cbe93347b28bd134191ce73aee6bb6cd4b739de Mon Sep 17 00:00:00 2001 From: Sean Harrington Date: Fri, 29 Jul 2016 12:48:34 -0400 Subject: [PATCH 8/9] Updaed format_specifier in PyArg_ParseTupleAndKeywords --- psycopg/cursor_type.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index cc9157a2..ec38472e 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1361,7 +1361,7 @@ psyco_curs_copy_from(cursorObject *self, PyObject *args, PyObject *kwargs) PyObject *file, *columns = NULL, *res = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "O&s|ssnO", kwlist, + "O&s|ssnOss", kwlist, _psyco_curs_has_read_check, &file, &table_name, &sep, &null, &bufsize, &columns, "e, &format)) { From 74c8c45795df65cf32ffd45c0708364d8c54d909 Mon Sep 17 00:00:00 2001 From: Sean Harrington Date: Fri, 29 Jul 2016 14:52:16 -0400 Subject: [PATCH 9/9] Added tests for copy_from in support of 'quote' and 'format' parameters now exposed by copy_from(). --- tests/test_copy.py | 47 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/tests/test_copy.py b/tests/test_copy.py index 32134215..c13cca30 100755 --- a/tests/test_copy.py +++ b/tests/test_copy.py @@ -217,6 +217,29 @@ class CopyTests(ConnectingTestCase): curs.execute("select data from tcopy;") self.assertEqual(curs.fetchone()[0], abin) + def _copy_from_csv(self, curs, nrecs, srec, copykw, mock_columns_enclosed_by): + + f = StringIO() + for i, c in izip(xrange(nrecs), cycle(string.ascii_letters)): + l = c * srec + # Enclose '{1}' and '{2}' in the quote char '{0}' (Defaults to '"') + f.write("%s,%s%s%s\n" % (i,mock_columns_enclosed_by,l,mock_columns_enclosed_by)) + + f.seek(0) + copykw['format'] = 'CSV' + + copykw['sep'] = "," + curs.copy_from(MinimalRead(f), "tcopy", **copykw) + + curs.execute("select count(*) from tcopy") + self.assertEqual(nrecs, curs.fetchone()[0]) + + curs.execute("select data from tcopy where id < %s order by id", + (len(string.ascii_letters),)) + for i, (l,) in enumerate(curs): + self.assertEqual(l, string.ascii_letters[i] * srec) + + def _copy_from(self, curs, nrecs, srec, copykw): f = StringIO() for i, c in izip(xrange(nrecs), cycle(string.ascii_letters)): @@ -366,7 +389,29 @@ conn.close() curs.execute("insert into tcopy values (10, 'hi')") self.assertRaises(ZeroDivisionError, curs.copy_to, BrokenWrite(), "tcopy") - + def test_copy_from_csv(self): + curs = self.conn.cursor() + try: + # 'Quote' should default to '"' + self._copy_from_csv(curs, nrecs=1024, srec=10*1024, copykw={}, mock_columns_enclosed_by='"') + finally: + curs.close() + def test_copy_from_csv_specify_column_enclosure(self): + curs = self.conn.cursor() + try: + self._copy_from_csv(curs, nrecs=1024, srec=10*1024, copykw={'quote': "'"}, mock_columns_enclosed_by="'") + finally: + curs.close() + def test_copy_txt_and_set_quote(self): + # this shouldn't return an error... + # b/c format = 'TXT' by default, and we do not + # override this default here, quote does not get included + # in the COPY ... FROM ... command. + curs = self.conn.cursor() + try: + self._copy_from(curs, nrecs=1024, srec=10*1024, copykw={'quote': "'"}) + finally: + curs.close() decorate_all_tests(CopyTests, skip_copy_if_green)