Use a different schema for file cache which actually persists

Caching the inputFile values would not persist accross several
days so the cache was nearly unnecessary. Saving the id/hash of
the actual inputMedia sent is a much better/persistent idea.
This commit is contained in:
Lonami Exo 2018-01-18 09:52:39 +01:00
parent 428abebed8
commit 55efb2b104
2 changed files with 53 additions and 41 deletions

View File

@ -633,13 +633,6 @@ class TelegramBareClient:
with open(file, 'rb') as stream: with open(file, 'rb') as stream:
file = stream.read() file = stream.read()
hash_md5 = md5(file) hash_md5 = md5(file)
tuple_ = self.session.get_file(hash_md5.digest(), file_size)
if tuple_ and allow_cache:
__log__.info('File was already cached, not uploading again')
return InputFile(name=file_name,
md5_checksum=tuple_[0], id=tuple_[2], parts=tuple_[3])
elif tuple_ and not allow_cache:
self.session.clear_file(hash_md5.digest(), file_size)
else: else:
hash_md5 = None hash_md5 = None
@ -673,9 +666,6 @@ class TelegramBareClient:
if is_large: if is_large:
return InputFileBig(file_id, part_count, file_name) return InputFileBig(file_id, part_count, file_name)
else: else:
self.session.cache_file(
hash_md5.digest(), file_size, file_id, part_count)
return InputFile(file_id, part_count, file_name, return InputFile(file_id, part_count, file_name,
md5_checksum=hash_md5.hexdigest()) md5_checksum=hash_md5.hexdigest())

View File

@ -5,6 +5,7 @@ import sqlite3
import struct import struct
import time import time
from base64 import b64decode from base64 import b64decode
from enum import Enum
from os.path import isfile as file_exists from os.path import isfile as file_exists
from threading import Lock from threading import Lock
@ -12,11 +13,26 @@ from .. import utils
from ..tl import TLObject from ..tl import TLObject
from ..tl.types import ( from ..tl.types import (
PeerUser, PeerChat, PeerChannel, PeerUser, PeerChat, PeerChannel,
InputPeerUser, InputPeerChat, InputPeerChannel InputPeerUser, InputPeerChat, InputPeerChannel,
InputPhoto, InputDocument
) )
EXTENSION = '.session' EXTENSION = '.session'
CURRENT_VERSION = 2 # database version CURRENT_VERSION = 3 # database version
class _SentFileType(Enum):
DOCUMENT = 0
PHOTO = 1
@staticmethod
def from_type(cls):
if cls == InputDocument:
return _SentFileType.DOCUMENT
elif cls == InputPhoto:
return _SentFileType.PHOTO
else:
raise ValueError('The cls must be either InputDocument/InputPhoto')
class Session: class Session:
@ -130,9 +146,10 @@ class Session:
"""sent_files ( """sent_files (
md5_digest blob, md5_digest blob,
file_size integer, file_size integer,
file_id integer, type integer,
part_count integer, id integer,
primary key(md5_digest, file_size) hash integer,
primary key(md5_digest, file_size, type)
)""" )"""
) )
c.execute("insert into version values (?)", (CURRENT_VERSION,)) c.execute("insert into version values (?)", (CURRENT_VERSION,))
@ -171,18 +188,22 @@ class Session:
def _upgrade_database(self, old): def _upgrade_database(self, old):
c = self._conn.cursor() c = self._conn.cursor()
if old == 1: # old == 1 doesn't have the old sent_files so no need to drop
if old == 2:
# Old cache from old sent_files lasts then a day anyway, drop
c.execute('drop table sent_files')
self._create_table(c, """sent_files ( self._create_table(c, """sent_files (
md5_digest blob, md5_digest blob,
file_size integer, file_size integer,
file_id integer, type integer,
part_count integer, id integer,
primary key(md5_digest, file_size) hash integer,
primary key(md5_digest, file_size, type)
)""") )""")
old = 2
c.close() c.close()
def _create_table(self, c, *definitions): @staticmethod
def _create_table(c, *definitions):
""" """
Creates a table given its definition 'name (columns). Creates a table given its definition 'name (columns).
If the sqlite version is >= 3.8.2, it will use "without rowid". If the sqlite version is >= 3.8.2, it will use "without rowid".
@ -420,24 +441,25 @@ class Session:
# File processing # File processing
def get_file(self, md5_digest, file_size): def get_file(self, md5_digest, file_size, cls):
return self._conn.execute( tuple_ = self._conn.execute(
'select * from sent_files ' 'select id, hash from sent_files '
'where md5_digest = ? and file_size = ?', (md5_digest, file_size) 'where md5_digest = ? and file_size = ? and type = ?',
(md5_digest, file_size, _SentFileType.from_type(cls))
).fetchone() ).fetchone()
if tuple_:
# Both allowed classes have (id, access_hash) as parameters
return cls(tuple_[0], tuple_[1])
def cache_file(self, md5_digest, file_size, instance):
if not isinstance(instance, (InputDocument, InputPhoto)):
raise TypeError('Cannot cache %s instance' % type(instance))
def cache_file(self, md5_digest, file_size, file_id, part_count):
with self._db_lock: with self._db_lock:
self._conn.execute( self._conn.execute(
'insert into sent_files values (?,?,?,?)', 'insert into sent_files values (?,?,?,?,?)', (
(md5_digest, file_size, file_id, part_count) md5_digest, file_size,
) _SentFileType.from_type(type(instance)),
self.save() instance.id, instance.access_hash
))
def clear_file(self, md5_digest, file_size):
with self._db_lock:
self._conn.execute(
'delete from sent_files where '
'md5_digest = ? and file_size = ?', (md5_digest, file_size)
)
self.save() self.save()