From a03b1e3b6ccd7f4ebbcbe6d8dba57d6d40a2ec3a Mon Sep 17 00:00:00 2001 From: Andrea Odetti Date: Thu, 10 Oct 2019 19:53:03 +0100 Subject: [PATCH 1/8] Possible example of a readonly cache. --- diskcache/core.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/diskcache/core.py b/diskcache/core.py index c74241e..5bf2125 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -91,6 +91,7 @@ def __repr__(self): u'sqlite_journal_mode': u'wal', u'sqlite_mmap_size': 2 ** 26, # 64mb u'sqlite_synchronous': 1, # NORMAL + u'sqlite_read_only': False, u'disk_min_file_size': 2 ** 15, # 32kb u'disk_pickle_protocol': pickle.HIGHEST_PROTOCOL, } @@ -479,14 +480,16 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): # Set cached attributes: updates settings and sets pragmas. for key, value in sets.items(): - query = 'INSERT OR REPLACE INTO Settings VALUES (?, ?)' - sql(query, (key, value)) - self.reset(key, value) + if not self.sqlite_read_only: + query = 'INSERT OR REPLACE INTO Settings VALUES (?, ?)' + sql(query, (key, value)) + self.reset(key, value, update=not self.sqlite_read_only) for key, value in METADATA.items(): - query = 'INSERT OR IGNORE INTO Settings VALUES (?, ?)' - sql(query, (key, value)) - self.reset(key) + if not self.sqlite_read_only: + query = 'INSERT OR IGNORE INTO Settings VALUES (?, ?)' + sql(query, (key, value)) + self.reset(key, update=not self.sqlite_read_only) (self._page_size,), = sql('PRAGMA page_size').fetchall() @@ -555,10 +558,11 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): # Create tag index if requested. - if self.tag_index: # pylint: disable=no-member - self.create_tag_index() - else: - self.drop_tag_index() + if not self.sqlite_read_only: + if self.tag_index: # pylint: disable=no-member + self.create_tag_index() + else: + self.drop_tag_index() # Close and re-open database connection with given timeout. @@ -706,8 +710,9 @@ def _transact(self, retry=False, filename=None): begin = True self._txn_id = tid break - except sqlite3.OperationalError: - if retry: + except sqlite3.OperationalError as e: + # TODO: this is potentially an infinite loop anyway + if retry and 'readonly' not in str(e): continue if filename is not None: _disk_remove(filename) From 4c61ae481cdcf3edb09e77fc3d88d1deb8bd5327 Mon Sep 17 00:00:00 2001 From: Andrea Odetti Date: Fri, 11 Oct 2019 21:02:45 +0100 Subject: [PATCH 2/8] Possible example of a readonly cache. --- aaa.py | 12 ++++++++++++ diskcache/core.py | 22 +++++++++++++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) create mode 100644 aaa.py diff --git a/aaa.py b/aaa.py new file mode 100644 index 0000000..d738c7e --- /dev/null +++ b/aaa.py @@ -0,0 +1,12 @@ +import diskcache + +a = diskcache.Cache('/tmp/abcde', sqlite_read_only=True) + +o = [1, 2, 3, 4] +a['qq'] = o + +for k in a: + print(k) + +o = [1, 2, 3, 4] +a['qqe'] = o diff --git a/diskcache/core.py b/diskcache/core.py index 5bf2125..7a54ba3 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -440,6 +440,8 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): ' and could not be created' % self._directory ) + self.sqlite_read_only = settings.get('sqlite_read_only', False) + sql = self._sql_retry # Setup Settings table. @@ -604,11 +606,21 @@ def _con(self): con = getattr(self._local, 'con', None) if con is None: - con = self._local.con = sqlite3.connect( - op.join(self._directory, DBNAME), - timeout=self._timeout, - isolation_level=None, - ) + if self.sqlite_read_only: + p = op.join(self._directory, DBNAME) + uri = f'file:{p}?mode=ro' + con = self._local.con = sqlite3.connect( + uri, + uri=True, + timeout=self._timeout, + isolation_level=None, + ) + else: + con = self._local.con = sqlite3.connect( + op.join(self._directory, DBNAME), + timeout=self._timeout, + isolation_level=None, + ) # Some SQLite pragmas work on a per-connection basis so # query the Settings table and reset the pragmas. The From cc462d3e18d5fd280122207afb2718f487115ccb Mon Sep 17 00:00:00 2001 From: Andrea Odetti Date: Sun, 13 Oct 2019 18:56:43 +0100 Subject: [PATCH 3/8] Rename sqlite_read_only -> read_only as sqlite_ prefix is used for PRAGMAs. --- aaa.py | 2 +- diskcache/core.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/aaa.py b/aaa.py index d738c7e..8b09c0e 100644 --- a/aaa.py +++ b/aaa.py @@ -1,6 +1,6 @@ import diskcache -a = diskcache.Cache('/tmp/abcde', sqlite_read_only=True) +a = diskcache.Cache('/tmp/abcde', read_only=True) o = [1, 2, 3, 4] a['qq'] = o diff --git a/diskcache/core.py b/diskcache/core.py index 7a54ba3..466918d 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -86,12 +86,12 @@ def __repr__(self): u'eviction_policy': u'least-recently-stored', u'size_limit': 2 ** 30, # 1gb u'cull_limit': 10, + u'read_only': False, u'sqlite_auto_vacuum': 1, # FULL u'sqlite_cache_size': 2 ** 13, # 8,192 pages u'sqlite_journal_mode': u'wal', u'sqlite_mmap_size': 2 ** 26, # 64mb u'sqlite_synchronous': 1, # NORMAL - u'sqlite_read_only': False, u'disk_min_file_size': 2 ** 15, # 32kb u'disk_pickle_protocol': pickle.HIGHEST_PROTOCOL, } @@ -440,7 +440,7 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): ' and could not be created' % self._directory ) - self.sqlite_read_only = settings.get('sqlite_read_only', False) + self.read_only = settings.get('read_only', False) sql = self._sql_retry @@ -482,16 +482,16 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): # Set cached attributes: updates settings and sets pragmas. for key, value in sets.items(): - if not self.sqlite_read_only: + if not self.read_only: query = 'INSERT OR REPLACE INTO Settings VALUES (?, ?)' sql(query, (key, value)) - self.reset(key, value, update=not self.sqlite_read_only) + self.reset(key, value, update=not self.read_only) for key, value in METADATA.items(): - if not self.sqlite_read_only: + if not self.read_only: query = 'INSERT OR IGNORE INTO Settings VALUES (?, ?)' sql(query, (key, value)) - self.reset(key, update=not self.sqlite_read_only) + self.reset(key, update=not self.read_only) (self._page_size,), = sql('PRAGMA page_size').fetchall() @@ -560,7 +560,7 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): # Create tag index if requested. - if not self.sqlite_read_only: + if not self.read_only: if self.tag_index: # pylint: disable=no-member self.create_tag_index() else: @@ -606,7 +606,7 @@ def _con(self): con = getattr(self._local, 'con', None) if con is None: - if self.sqlite_read_only: + if self.read_only: p = op.join(self._directory, DBNAME) uri = f'file:{p}?mode=ro' con = self._local.con = sqlite3.connect( From 19a278174f44d78c4b580619adf874a8da6cffb7 Mon Sep 17 00:00:00 2001 From: Andrea Odetti Date: Sun, 13 Oct 2019 18:57:51 +0100 Subject: [PATCH 4/8] Add comment about sqlite PRAGMAs. --- diskcache/core.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/diskcache/core.py b/diskcache/core.py index 466918d..1bc82cb 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -80,6 +80,8 @@ def __repr__(self): MODE_TEXT = 3 MODE_PICKLE = 4 +# settings starting with sqlite_ are used as PRAGMAs in sqlite +# use with care DEFAULT_SETTINGS = { u'statistics': 0, # False u'tag_index': 0, # False From a23560b8529af66e346262664275bd2871445af4 Mon Sep 17 00:00:00 2001 From: Andrea Odetti Date: Sun, 13 Oct 2019 19:05:01 +0100 Subject: [PATCH 5/8] Fix formatting for older versions of python. --- diskcache/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/diskcache/core.py b/diskcache/core.py index 1bc82cb..42e3ce6 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -442,6 +442,7 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): ' and could not be created' % self._directory ) + # this must be processed now because it is used in Cache._con self.read_only = settings.get('read_only', False) sql = self._sql_retry @@ -610,7 +611,7 @@ def _con(self): if con is None: if self.read_only: p = op.join(self._directory, DBNAME) - uri = f'file:{p}?mode=ro' + uri = 'file:%s?mode=ro' % p con = self._local.con = sqlite3.connect( uri, uri=True, From b2c562dc20248f767c6e63d4f810edf91d75bc2c Mon Sep 17 00:00:00 2001 From: Andrea Odetti Date: Sun, 13 Oct 2019 20:45:47 +0100 Subject: [PATCH 6/8] Use sqlite PRAGMA. --- aaa.py | 6 +++--- diskcache/core.py | 42 ++++++++++++++++++++---------------------- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/aaa.py b/aaa.py index 8b09c0e..ce156f3 100644 --- a/aaa.py +++ b/aaa.py @@ -1,12 +1,12 @@ import diskcache -a = diskcache.Cache('/tmp/abcde', read_only=True) +a = diskcache.Cache('/tmp/abcde', sqlite_query_only=True) o = [1, 2, 3, 4] -a['qq'] = o +# a['qq'] = o for k in a: print(k) o = [1, 2, 3, 4] -a['qqe'] = o +# a['qqe'] = o diff --git a/diskcache/core.py b/diskcache/core.py index 42e3ce6..a869ee2 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -88,7 +88,7 @@ def __repr__(self): u'eviction_policy': u'least-recently-stored', u'size_limit': 2 ** 30, # 1gb u'cull_limit': 10, - u'read_only': False, + u'sqlite_query_only': False, u'sqlite_auto_vacuum': 1, # FULL u'sqlite_cache_size': 2 ** 13, # 8,192 pages u'sqlite_journal_mode': u'wal', @@ -443,7 +443,7 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): ) # this must be processed now because it is used in Cache._con - self.read_only = settings.get('read_only', False) + self.open_in_query_only = settings.get('sqlite_query_only', False) sql = self._sql_retry @@ -485,16 +485,16 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): # Set cached attributes: updates settings and sets pragmas. for key, value in sets.items(): - if not self.read_only: + if not self.sqlite_query_only: query = 'INSERT OR REPLACE INTO Settings VALUES (?, ?)' sql(query, (key, value)) - self.reset(key, value, update=not self.read_only) + self.reset(key, value, update=not self.sqlite_query_only) for key, value in METADATA.items(): - if not self.read_only: + if not self.sqlite_query_only: query = 'INSERT OR IGNORE INTO Settings VALUES (?, ?)' sql(query, (key, value)) - self.reset(key, update=not self.read_only) + self.reset(key, update=not self.sqlite_query_only) (self._page_size,), = sql('PRAGMA page_size').fetchall() @@ -563,7 +563,7 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): # Create tag index if requested. - if not self.read_only: + if not self.sqlite_query_only: if self.tag_index: # pylint: disable=no-member self.create_tag_index() else: @@ -572,6 +572,8 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): # Close and re-open database connection with given timeout. self.close() + # PRAGMAs are not reapplied to the next connect + # which means transient PRAGMAs as query_only are out of sync self._timeout = timeout self._sql # pylint: disable=pointless-statement @@ -609,21 +611,11 @@ def _con(self): con = getattr(self._local, 'con', None) if con is None: - if self.read_only: - p = op.join(self._directory, DBNAME) - uri = 'file:%s?mode=ro' % p - con = self._local.con = sqlite3.connect( - uri, - uri=True, - timeout=self._timeout, - isolation_level=None, - ) - else: - con = self._local.con = sqlite3.connect( - op.join(self._directory, DBNAME), - timeout=self._timeout, - isolation_level=None, - ) + con = self._local.con = sqlite3.connect( + op.join(self._directory, DBNAME), + timeout=self._timeout, + isolation_level=None, + ) # Some SQLite pragmas work on a per-connection basis so # query the Settings table and reset the pragmas. The @@ -640,6 +632,12 @@ def _con(self): if key.startswith('sqlite_'): self.reset(key, value, update=False) + # must be done *after* the settings update + # as the value of this pragma from settings is always 0 + if self.open_in_query_only: + read_only_pragma = 'PRAGMA query_only = 1' + con.execute(read_only_pragma) + return con From fdb2a98683f5cb9fc951e728076c47965e0c2aae Mon Sep 17 00:00:00 2001 From: Andrea Odetti Date: Sun, 13 Oct 2019 20:51:46 +0100 Subject: [PATCH 7/8] Fix comment. --- diskcache/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diskcache/core.py b/diskcache/core.py index a869ee2..96cd2eb 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -573,7 +573,7 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): self.close() # PRAGMAs are not reapplied to the next connect - # which means transient PRAGMAs as query_only are out of sync + # which means transient PRAGMAs as query_only are out of sync with member variables self._timeout = timeout self._sql # pylint: disable=pointless-statement From f8681ec2f09b1fa254cf2e55a7b0124c7fff66d5 Mon Sep 17 00:00:00 2001 From: Andrea Odetti Date: Sun, 13 Oct 2019 21:23:08 +0100 Subject: [PATCH 8/8] Fix synchronisation of pragma query-only. --- aaa.py | 4 +++- diskcache/core.py | 9 +++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/aaa.py b/aaa.py index ce156f3..4248a3f 100644 --- a/aaa.py +++ b/aaa.py @@ -1,6 +1,7 @@ import diskcache a = diskcache.Cache('/tmp/abcde', sqlite_query_only=True) +print(f'Query-only: ', a.sqlite_query_only) o = [1, 2, 3, 4] # a['qq'] = o @@ -9,4 +10,5 @@ print(k) o = [1, 2, 3, 4] -# a['qqe'] = o +a['qqe'] = o + diff --git a/diskcache/core.py b/diskcache/core.py index 96cd2eb..0dc5791 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -572,8 +572,6 @@ def __init__(self, directory=None, timeout=60, disk=Disk, **settings): # Close and re-open database connection with given timeout. self.close() - # PRAGMAs are not reapplied to the next connect - # which means transient PRAGMAs as query_only are out of sync with member variables self._timeout = timeout self._sql # pylint: disable=pointless-statement @@ -632,11 +630,10 @@ def _con(self): if key.startswith('sqlite_'): self.reset(key, value, update=False) - # must be done *after* the settings update - # as the value of this pragma from settings is always 0 + # the settings read fro the DB never contain query_only + # so we manually force it here, if it has been passed as a parameter if self.open_in_query_only: - read_only_pragma = 'PRAGMA query_only = 1' - con.execute(read_only_pragma) + self.reset('sqlite_query_only', 1, update=False) return con