Reduce memory footprint of Database.insert()

This commit is contained in:
Itai Shirav 2017-04-05 17:42:42 +03:00
parent ec99044fab
commit 21907966b2
2 changed files with 16 additions and 9 deletions

View File

@ -7,6 +7,7 @@ Unreleased
- Fix problem with SELECT WITH TOTALS (pilosus)
- Update serialization format of DateTimeField to 10 digits, zero padded (nikepan)
- Greatly improve performance when inserting large strings (credit to M1hacka for identifying the problem)
- Reduce memory footprint of Database.insert()
v0.8.0
------

View File

@ -50,6 +50,7 @@ class Database(object):
def insert(self, model_instances, batch_size=1000):
from six import next
from cStringIO import StringIO
i = iter(model_instances)
try:
first_instance = next(i)
@ -61,22 +62,27 @@ class Database(object):
raise DatabaseException("You can't insert into read only table")
def gen():
yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8')
buf = StringIO()
buf.write(self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8'))
first_instance.set_database(self)
yield (first_instance.to_tsv(include_readonly=False) + '\n').encode('utf-8')
buf.write(first_instance.to_tsv(include_readonly=False).encode('utf-8'))
buf.write('\n')
# Collect lines in batches of batch_size
batch = []
lines = 2
for instance in i:
instance.set_database(self)
batch.append(instance.to_tsv(include_readonly=False))
if len(batch) >= batch_size:
buf.write(instance.to_tsv(include_readonly=False).encode('utf-8'))
buf.write('\n')
lines += 1
if lines >= batch_size:
# Return the current batch of lines
yield ('\n'.join(batch) + '\n').encode('utf-8')
yield buf.getvalue()
# Start a new batch
batch = []
buf = StringIO()
lines = 0
# Return any remaining lines in partial batch
if batch:
yield ('\n'.join(batch) + '\n').encode('utf-8')
if lines:
yield buf.getvalue()
self._send(gen())
def count(self, model_class, conditions=None):