This commit is contained in:
M1ha 2017-04-06 10:03:08 +05:00
commit 9bca42fe7e
5 changed files with 37 additions and 15 deletions

View File

@ -1,11 +1,13 @@
Change Log Change Log
========== ==========
Unreleased v0.8.1
---------- ------
- Add support for ReplacingMergeTree (leenr) - Add support for ReplacingMergeTree (leenr)
- Fix problem with SELECT WITH TOTALS (pilosus) - Fix problem with SELECT WITH TOTALS (pilosus)
- Update serialization format of DateTimeField to 10 digits, zero padded (nikepan) - Update serialization format of DateTimeField to 10 digits, zero padded (nikepan)
- Greatly improve performance when inserting large strings (credit to M1hacka for identifying the problem)
- Reduce memory footprint of Database.insert()
v0.8.0 v0.8.0
------ ------

View File

@ -50,6 +50,7 @@ class Database(object):
def insert(self, model_instances, batch_size=1000): def insert(self, model_instances, batch_size=1000):
from six import next from six import next
from cStringIO import StringIO
i = iter(model_instances) i = iter(model_instances)
try: try:
first_instance = next(i) first_instance = next(i)
@ -61,22 +62,27 @@ class Database(object):
raise DatabaseException("You can't insert into read only table") raise DatabaseException("You can't insert into read only table")
def gen(): def gen():
yield self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8') buf = StringIO()
buf.write(self._substitute('INSERT INTO $table FORMAT TabSeparated\n', model_class).encode('utf-8'))
first_instance.set_database(self) first_instance.set_database(self)
yield (first_instance.to_tsv(include_readonly=False) + '\n').encode('utf-8') buf.write(first_instance.to_tsv(include_readonly=False).encode('utf-8'))
buf.write('\n')
# Collect lines in batches of batch_size # Collect lines in batches of batch_size
batch = [] lines = 2
for instance in i: for instance in i:
instance.set_database(self) instance.set_database(self)
batch.append(instance.to_tsv(include_readonly=False)) buf.write(instance.to_tsv(include_readonly=False).encode('utf-8'))
if len(batch) >= batch_size: buf.write('\n')
lines += 1
if lines >= batch_size:
# Return the current batch of lines # Return the current batch of lines
yield ('\n'.join(batch) + '\n').encode('utf-8') yield buf.getvalue()
# Start a new batch # Start a new batch
batch = [] buf = StringIO()
lines = 0
# Return any remaining lines in partial batch # Return any remaining lines in partial batch
if batch: if lines:
yield ('\n'.join(batch) + '\n').encode('utf-8') yield buf.getvalue()
self._send(gen()) self._send(gen())
def count(self, model_class, conditions=None): def count(self, model_class, conditions=None):

View File

@ -154,6 +154,11 @@ class BaseIntField(Field):
except: except:
raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value))
def to_db_string(self, value, quote=True):
# There's no need to call escape since numbers do not contain
# special characters, and never need quoting
return text_type(value)
def validate(self, value): def validate(self, value):
self._range_check(value, self.min_value, self.max_value) self._range_check(value, self.min_value, self.max_value)
@ -222,6 +227,11 @@ class BaseFloatField(Field):
except: except:
raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value)) raise ValueError('Invalid value for %s - %r' % (self.__class__.__name__, value))
def to_db_string(self, value, quote=True):
# There's no need to call escape since numbers do not contain
# special characters, and never need quoting
return text_type(value)
class Float32Field(BaseFloatField): class Float32Field(BaseFloatField):

View File

@ -27,6 +27,7 @@ class ModelBase(type):
fields = base_fields + [item for item in attrs.items() if isinstance(item[1], Field)] fields = base_fields + [item for item in attrs.items() if isinstance(item[1], Field)]
fields.sort(key=lambda item: item[1].creation_counter) fields.sort(key=lambda item: item[1].creation_counter)
setattr(new_cls, '_fields', fields) setattr(new_cls, '_fields', fields)
setattr(new_cls, '_writable_fields', [f for f in fields if not f[1].readonly])
return new_cls return new_cls
@classmethod @classmethod
@ -186,7 +187,7 @@ class Model(with_metaclass(ModelBase)):
:param bool include_readonly: If False, returns only fields, that can be inserted into database :param bool include_readonly: If False, returns only fields, that can be inserted into database
''' '''
data = self.__dict__ data = self.__dict__
fields = self._fields if include_readonly else [f for f in self._fields if not f[1].readonly] fields = self._fields if include_readonly else self._writable_fields
return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields) return '\t'.join(field.to_db_string(data[name], quote=False) for name, field in fields)
def to_dict(self, include_readonly=True, field_names=None): def to_dict(self, include_readonly=True, field_names=None):
@ -195,7 +196,7 @@ class Model(with_metaclass(ModelBase)):
:param bool include_readonly: If False, returns only fields, that can be inserted into database :param bool include_readonly: If False, returns only fields, that can be inserted into database
:param field_names: An iterable of field names to return :param field_names: An iterable of field names to return
''' '''
fields = self._fields if include_readonly else [f for f in self._fields if not f[1].readonly] fields = self._fields if include_readonly else self._writable_fields
if field_names is not None: if field_names is not None:
fields = [f for f in fields if f[0] in field_names] fields = [f for f in fields if f[0] in field_names]

View File

@ -17,15 +17,18 @@ SPECIAL_CHARS = {
SPECIAL_CHARS_REGEX = re.compile("[" + ''.join(SPECIAL_CHARS.values()) + "]") SPECIAL_CHARS_REGEX = re.compile("[" + ''.join(SPECIAL_CHARS.values()) + "]")
def escape(value, quote=True): def escape(value, quote=True):
''' '''
If the value is a string, escapes any special characters and optionally If the value is a string, escapes any special characters and optionally
surrounds it with single quotes. If the value is not a string (e.g. a number), surrounds it with single quotes. If the value is not a string (e.g. a number),
converts it to one. converts it to one.
''' '''
def escape_one(match):
return SPECIAL_CHARS[match.group(0)]
if isinstance(value, string_types): if isinstance(value, string_types):
if SPECIAL_CHARS_REGEX.search(value): value = SPECIAL_CHARS_REGEX.sub(escape_one, value)
value = "".join(SPECIAL_CHARS.get(c, c) for c in value)
if quote: if quote:
value = "'" + value + "'" value = "'" + value + "'"
return text_type(value) return text_type(value)