Reduce memory footprint of Database.insert()

This commit is contained in:
Itai Shirav 2017-04-05 17:42:42 +03:00
parent ec99044fab
commit 21907966b2
2 changed files with 16 additions and 9 deletions

View File

@ -7,6 +7,7 @@ Unreleased
- Fix problem with SELECT WITH TOTALS (pilosus) - Fix problem with SELECT WITH TOTALS (pilosus)
- Update serialization format of DateTimeField to 10 digits, zero padded (nikepan) - Update serialization format of DateTimeField to 10 digits, zero padded (nikepan)
- Greatly improve performance when inserting large strings (credit to M1hacka for identifying the problem) - Greatly improve performance when inserting large strings (credit to M1hacka for identifying the problem)
- Reduce memory footprint of Database.insert()
v0.8.0 v0.8.0
------ ------

View File

@ -50,6 +50,7 @@ class Database(object):
def insert(self, model_instances, batch_size=1000): def insert(self, model_instances, batch_size=1000):
from six import next from six import next
from cStringIO import StringIO
i = iter(model_instances) i = iter(model_instances)
try: try:
first_instance = next(i) first_instance = next(i)
@ -61,22 +62,27 @@ class Database(object):
raise DatabaseException("You can't insert into read only table") raise DatabaseException("You can't insert into read only table")
def gen(): def gen():
yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8') buf = StringIO()
buf.write(self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8'))
first_instance.set_database(self) first_instance.set_database(self)
yield (first_instance.to_tsv(include_readonly=False) + '\n').encode('utf-8') buf.write(first_instance.to_tsv(include_readonly=False).encode('utf-8'))
buf.write('\n')
# Collect lines in batches of batch_size # Collect lines in batches of batch_size
batch = [] lines = 2
for instance in i: for instance in i:
instance.set_database(self) instance.set_database(self)
batch.append(instance.to_tsv(include_readonly=False)) buf.write(instance.to_tsv(include_readonly=False).encode('utf-8'))
if len(batch) >= batch_size: buf.write('\n')
lines += 1
if lines >= batch_size:
# Return the current batch of lines # Return the current batch of lines
yield ('\n'.join(batch) + '\n').encode('utf-8') yield buf.getvalue()
# Start a new batch # Start a new batch
batch = [] buf = StringIO()
lines = 0
# Return any remaining lines in partial batch # Return any remaining lines in partial batch
if batch: if lines:
yield ('\n'.join(batch) + '\n').encode('utf-8') yield buf.getvalue()
self._send(gen()) self._send(gen())
def count(self, model_class, conditions=None): def count(self, model_class, conditions=None):