mirror of
https://github.com/Infinidat/infi.clickhouse_orm.git
synced 2025-07-13 17:42:18 +03:00
Reduce memory footprint of Database.insert()
This commit is contained in:
parent
ec99044fab
commit
21907966b2
|
@ -7,6 +7,7 @@ Unreleased
|
||||||
- Fix problem with SELECT WITH TOTALS (pilosus)
|
- Fix problem with SELECT WITH TOTALS (pilosus)
|
||||||
- Update serialization format of DateTimeField to 10 digits, zero padded (nikepan)
|
- Update serialization format of DateTimeField to 10 digits, zero padded (nikepan)
|
||||||
- Greatly improve performance when inserting large strings (credit to M1hacka for identifying the problem)
|
- Greatly improve performance when inserting large strings (credit to M1hacka for identifying the problem)
|
||||||
|
- Reduce memory footprint of Database.insert()
|
||||||
|
|
||||||
v0.8.0
|
v0.8.0
|
||||||
------
|
------
|
||||||
|
|
|
@ -50,6 +50,7 @@ class Database(object):
|
||||||
|
|
||||||
def insert(self, model_instances, batch_size=1000):
|
def insert(self, model_instances, batch_size=1000):
|
||||||
from six import next
|
from six import next
|
||||||
|
from cStringIO import StringIO
|
||||||
i = iter(model_instances)
|
i = iter(model_instances)
|
||||||
try:
|
try:
|
||||||
first_instance = next(i)
|
first_instance = next(i)
|
||||||
|
@ -61,22 +62,27 @@ class Database(object):
|
||||||
raise DatabaseException("You can't insert into read only table")
|
raise DatabaseException("You can't insert into read only table")
|
||||||
|
|
||||||
def gen():
|
def gen():
|
||||||
yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8')
|
buf = StringIO()
|
||||||
|
buf.write(self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8'))
|
||||||
first_instance.set_database(self)
|
first_instance.set_database(self)
|
||||||
yield (first_instance.to_tsv(include_readonly=False) + '\n').encode('utf-8')
|
buf.write(first_instance.to_tsv(include_readonly=False).encode('utf-8'))
|
||||||
|
buf.write('\n')
|
||||||
# Collect lines in batches of batch_size
|
# Collect lines in batches of batch_size
|
||||||
batch = []
|
lines = 2
|
||||||
for instance in i:
|
for instance in i:
|
||||||
instance.set_database(self)
|
instance.set_database(self)
|
||||||
batch.append(instance.to_tsv(include_readonly=False))
|
buf.write(instance.to_tsv(include_readonly=False).encode('utf-8'))
|
||||||
if len(batch) >= batch_size:
|
buf.write('\n')
|
||||||
|
lines += 1
|
||||||
|
if lines >= batch_size:
|
||||||
# Return the current batch of lines
|
# Return the current batch of lines
|
||||||
yield ('\n'.join(batch) + '\n').encode('utf-8')
|
yield buf.getvalue()
|
||||||
# Start a new batch
|
# Start a new batch
|
||||||
batch = []
|
buf = StringIO()
|
||||||
|
lines = 0
|
||||||
# Return any remaining lines in partial batch
|
# Return any remaining lines in partial batch
|
||||||
if batch:
|
if lines:
|
||||||
yield ('\n'.join(batch) + '\n').encode('utf-8')
|
yield buf.getvalue()
|
||||||
self._send(gen())
|
self._send(gen())
|
||||||
|
|
||||||
def count(self, model_class, conditions=None):
|
def count(self, model_class, conditions=None):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user