mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2024-11-10 19:36:33 +03:00
Reduce memory footprint of Database.insert()
This commit is contained in:
parent
ec99044fab
commit
21907966b2
|
@ -7,6 +7,7 @@ Unreleased
|
|||
- Fix problem with SELECT WITH TOTALS (pilosus)
|
||||
- Update serialization format of DateTimeField to 10 digits, zero padded (nikepan)
|
||||
- Greatly improve performance when inserting large strings (credit to M1hacka for identifying the problem)
|
||||
- Reduce memory footprint of Database.insert()
|
||||
|
||||
v0.8.0
|
||||
------
|
||||
|
|
|
@ -50,6 +50,7 @@ class Database(object):
|
|||
|
||||
def insert(self, model_instances, batch_size=1000):
|
||||
from six import next
|
||||
from cStringIO import StringIO
|
||||
i = iter(model_instances)
|
||||
try:
|
||||
first_instance = next(i)
|
||||
|
@ -61,22 +62,27 @@ class Database(object):
|
|||
raise DatabaseException("You can't insert into read only table")
|
||||
|
||||
def gen():
|
||||
yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8')
|
||||
buf = StringIO()
|
||||
buf.write(self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8'))
|
||||
first_instance.set_database(self)
|
||||
yield (first_instance.to_tsv(include_readonly=False) + '\n').encode('utf-8')
|
||||
buf.write(first_instance.to_tsv(include_readonly=False).encode('utf-8'))
|
||||
buf.write('\n')
|
||||
# Collect lines in batches of batch_size
|
||||
batch = []
|
||||
lines = 2
|
||||
for instance in i:
|
||||
instance.set_database(self)
|
||||
batch.append(instance.to_tsv(include_readonly=False))
|
||||
if len(batch) >= batch_size:
|
||||
buf.write(instance.to_tsv(include_readonly=False).encode('utf-8'))
|
||||
buf.write('\n')
|
||||
lines += 1
|
||||
if lines >= batch_size:
|
||||
# Return the current batch of lines
|
||||
yield ('\n'.join(batch) + '\n').encode('utf-8')
|
||||
yield buf.getvalue()
|
||||
# Start a new batch
|
||||
batch = []
|
||||
buf = StringIO()
|
||||
lines = 0
|
||||
# Return any remaining lines in partial batch
|
||||
if batch:
|
||||
yield ('\n'.join(batch) + '\n').encode('utf-8')
|
||||
if lines:
|
||||
yield buf.getvalue()
|
||||
self._send(gen())
|
||||
|
||||
def count(self, model_class, conditions=None):
|
||||
|
|
Loading…
Reference in New Issue
Block a user