From 55261f3172a18b10caa04db6de7b5f4680b9535f Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 27 Jul 2020 12:47:26 +0300 Subject: [PATCH 01/12] loadAt loadAt is new optional storage interface that is intended to replace loadBefore with more clean and uniform semantic. Compared to loadBefore, loadAt: 1) returns data=None and serial of the removal, when loaded object was found to be deleted. loadBefore is returning only data=None in such case. This loadAt property allows to fix DemoStorage data corruption when whiteouts in overlay part were not previously correctly taken into account. https://github.com/zopefoundation/ZODB/issues/318 2) for regular data records, does not require storages to return next_serial, in addition to (data, serial). loadBefore requirement to return both serial and next_serial is constraining storages unnecessarily, and, while for FileStorage it is free to implement, for other storages it is not - for example for NEO and RelStorage, finding out next_serial, after looking up oid@at data record, costs one more SQL query: https://lab.nexedi.com/nexedi/neoppod/blob/fb746e6b/neo/storage/database/mysqldb.py#L484-508 https://lab.nexedi.com/nexedi/neoppod/blob/fb746e6b/neo/storage/database/mysqldb.py#L477-482 https://github.com/zodb/relstorage/blob/3.1.1-1-ge7628f9/src/relstorage/storage/load.py#L259-L264 https://github.com/zodb/relstorage/blob/3.1.1-1-ge7628f9/src/relstorage/adapters/mover.py#L177-L199 next_serial is not only about execution overhead - it is semantically redundant to be there and can be removed from load return. The reason I say that next_serial can be removed is that in ZODB/py the only place, that I could find, where next_serial is used on client side is in client cache (e.g. in NEO client cache), and that cache can be remade to work without using that next_serial at all. In simple words whenever after loadAt(oid, at) -> (data, serial) query, the cache can remember data for oid in [serial, at] range. Next, when invalidation message from server is received, cache entries, that had at == client_head, are extended (at -> new_head) for oids that are not present in invalidation message, while for oids that are present in invalidation message no such extension is done. This allows to maintain cache in correct state, invalidate it when there is a need to invalidate, and not to throw away cache entries that should remain live. This of course requires ZODB server to include both modified and just-created objects into invalidation messages ( https://github.com/zopefoundation/ZEO/pull/160 , https://github.com/zopefoundation/ZODB/pull/319 ). Switching to loadAt should thus allow storages like NEO and, maybe, RelStorage, to do 2x less SQL queries on every object access. https://github.com/zopefoundation/ZODB/issues/318#issuecomment-657685745 In other words loadAt unifies return signature to always be (data, serial) instead of POSKeyError object does not exist at all None object was removed (data, serial, next_serial) regular data record used by loadBefore. This patch: - introduces new interface. - introduces ZODB.utils.loadAt helper, that uses either storage.loadAt, or, if the storage does not implement loadAt interface, tries to mimic loadAt semantic via storage.loadBefore to possible extent + emits corresponding warning. - converts MVCCAdapter to use loadAt instead of loadBefore. - changes DemoStorage to use loadAt, and this way fixes above-mentioned data corruption issue; adds corresponding test; converts DemoStorage.loadBefore to be a wrapper around DemoStorage.loadAt. - adds loadAt implementation to FileStorage and MappingStorage. - adapts other tests/code correspondingly. /cc @jimfulton, @jamadden, @vpelletier, @jmuchemb, @arnaud-fontaine, @gidzit, @klawlf82, @hannosch --- src/ZODB/BaseStorage.py | 7 +- src/ZODB/DB.py | 3 +- src/ZODB/DemoStorage.py | 85 +++++++++++++----------- src/ZODB/FileStorage/FileStorage.py | 36 ++++++++++ src/ZODB/MappingStorage.py | 19 ++++++ src/ZODB/blob.py | 7 +- src/ZODB/historical_connections.rst | 9 ++- src/ZODB/interfaces.py | 21 ++++++ src/ZODB/mvccadapter.py | 32 ++++++--- src/ZODB/tests/IExternalGC.test | 18 +++-- src/ZODB/tests/MVCCMappingStorage.py | 1 + src/ZODB/tests/hexstorage.py | 9 ++- src/ZODB/tests/testConnection.py | 9 +++ src/ZODB/tests/testDemoStorage.py | 99 ++++++++++++++++++++++++++++ src/ZODB/tests/test_storage.py | 19 +++++- src/ZODB/tests/testmvcc.py | 6 +- src/ZODB/utils.py | 52 +++++++++++++-- 17 files changed, 353 insertions(+), 79 deletions(-) diff --git a/src/ZODB/BaseStorage.py b/src/ZODB/BaseStorage.py index ec2864b47..0f5140f5b 100644 --- a/src/ZODB/BaseStorage.py +++ b/src/ZODB/BaseStorage.py @@ -59,7 +59,7 @@ class BaseStorage(UndoLogCompatible): If it stores multiple revisions, it should implement loadSerial() - loadBefore() + loadAt() Each storage will have two locks that are accessed via lock acquire and release methods bound to the instance. (Yuck.) @@ -267,9 +267,8 @@ def loadSerial(self, oid, serial): raise POSException.Unsupported( "Retrieval of historical revisions is not supported") - def loadBefore(self, oid, tid): - """Return most recent revision of oid before tid committed.""" - return None + # do not provide loadAt/loadBefore here in BaseStorage - if child forgets + # to override it - storage will always return "no data" instead of failing. def copyTransactionsFrom(self, other, verbose=0): """Copy transactions from another storage. diff --git a/src/ZODB/DB.py b/src/ZODB/DB.py index 084b5f273..4981cc1d2 100644 --- a/src/ZODB/DB.py +++ b/src/ZODB/DB.py @@ -726,8 +726,7 @@ def open(self, transaction_manager=None, at=None, before=None): - `before`: like `at`, but opens the readonly state before the tid or datetime. """ - # `at` is normalized to `before`, since we use storage.loadBefore - # as the underlying implementation of both. + # `at` is normalized to `before`. before = getTID(at, before) if (before is not None and before > self.lastTransaction() and diff --git a/src/ZODB/DemoStorage.py b/src/ZODB/DemoStorage.py index 439f07850..625fd570c 100644 --- a/src/ZODB/DemoStorage.py +++ b/src/ZODB/DemoStorage.py @@ -24,6 +24,7 @@ import random import weakref import tempfile +import warnings import ZODB.BaseStorage import ZODB.blob import ZODB.interfaces @@ -217,45 +218,55 @@ def __len__(self): # still want load for old clients (e.g. zeo servers) load = load_current - def loadBefore(self, oid, tid): - try: - result = self.changes.loadBefore(oid, tid) - except ZODB.POSException.POSKeyError: - # The oid isn't in the changes, so defer to base - return self.base.loadBefore(oid, tid) + def loadAt(self, oid, at): + data, serial = ZODB.utils.loadAt(self.changes, oid, at) + if (data is not None) or (serial != ZODB.utils.z64): + # object is present in changes either as data or deletion record. + return data, serial - if result is None: - # The oid *was* in the changes, but there aren't any - # earlier records. Maybe there are in the base. - try: - result = self.base.loadBefore(oid, tid) - except ZODB.POSException.POSKeyError: - # The oid isn't in the base, so None will be the right result - pass + # object is not present in changes at all - use base + return ZODB.utils.loadAt(self.base, oid, at) + + def loadBefore(self, oid, before): + warnings.warn("loadBefore is deprecated - use loadAt instead", + DeprecationWarning, stacklevel=2) + p64 = ZODB.utils.p64 + u64 = ZODB.utils.u64 + + if before in (maxtid, ZODB.utils.z64): + at = before + else: + at = p64(u64(before)-1) + + data, serial = self.loadAt(oid, at) + + # find out next_serial. + # it is ok to use dumb/slow implementation since loadBefore should not + # be used and is provided only for backward compatibility. + next_serial = maxtid + while 1: + _, s = self.loadAt(oid, p64(u64(next_serial)-1)) + assert s >= serial + if s == serial: + # found - next_serial is serial of the next data record + break + next_serial = s + + if next_serial == maxtid: + next_serial = None + + # next_serial found -> return/raise what loadBefore users expect + if data is None: + if next_serial is None: + # object was never created + raise ZODB.POSException.POSKeyError(oid) else: - if result and not result[-1]: - # The oid is current in the base. We need to find - # the end tid in the base by fining the first tid - # in the changes. Unfortunately, there isn't an - # api for this, so we have to walk back using - # loadBefore. - - if tid == maxtid: - # Special case: we were looking for the - # current value. We won't find anything in - # changes, so we're done. - return result - - end_tid = maxtid - t = self.changes.loadBefore(oid, end_tid) - while t: - end_tid = t[1] - t = self.changes.loadBefore(oid, end_tid) - result = result[:2] + ( - end_tid if end_tid != maxtid else None, - ) - - return result + # object was deleted + return None + + # regular data record + return data, serial, next_serial + def loadBlob(self, oid, serial): try: diff --git a/src/ZODB/FileStorage/FileStorage.py b/src/ZODB/FileStorage/FileStorage.py index 7b84d92a2..4f6d1fd44 100644 --- a/src/ZODB/FileStorage/FileStorage.py +++ b/src/ZODB/FileStorage/FileStorage.py @@ -21,6 +21,7 @@ import logging import os import time +import warnings from struct import pack from struct import unpack @@ -57,6 +58,7 @@ from ZODB.interfaces import IStorageIteration from ZODB.interfaces import IStorageRestoreable from ZODB.interfaces import IStorageUndoable +from ZODB.interfaces import IStorageLoadAt from ZODB.POSException import ConflictError from ZODB.POSException import MultipleUndoErrors from ZODB.POSException import POSKeyError @@ -133,6 +135,7 @@ def __init__(self, afile): IStorageCurrentRecordIteration, IExternalGC, IStorage, + IStorageLoadAt, ) class FileStorage( FileStorageFormatter, @@ -559,7 +562,40 @@ def loadSerial(self, oid, serial): else: return self._loadBack_impl(oid, h.back)[0] + def loadAt(self, oid, at): + """loadAt implements IStorageLoadAt.""" + with self._files.get() as _file: + try: + pos = self._lookup_pos(oid) + except POSKeyError: + # object does not exist + return None, z64 + + while 1: + h = self._read_data_header(pos, oid, _file) + if h.tid <= at: + break + pos = h.prev + if not pos: + # object not yet created as of at + return None, z64 + + # h is the most recent DataHeader with .tid <= at + if h.plen: + # regular data record + return _file.read(h.plen), h.tid + elif h.back: + # backpointer + data, _, _, _ = self._loadBack_impl(oid, h.back, + fail=False, _file=_file) + return data, h.tid + else: + # deletion + return None, h.tid + def loadBefore(self, oid, tid): + warnings.warn("loadBefore is deprecated - use loadAt instead", + DeprecationWarning, stacklevel=2) with self._files.get() as _file: pos = self._lookup_pos(oid) end_tid = None diff --git a/src/ZODB/MappingStorage.py b/src/ZODB/MappingStorage.py index 8d74bb81b..6d85dd9c8 100644 --- a/src/ZODB/MappingStorage.py +++ b/src/ZODB/MappingStorage.py @@ -19,6 +19,7 @@ import BTrees import time +import warnings import ZODB.BaseStorage import ZODB.interfaces import ZODB.POSException @@ -30,6 +31,7 @@ @zope.interface.implementer( ZODB.interfaces.IStorage, ZODB.interfaces.IStorageIteration, + ZODB.interfaces.IStorageLoadAt, ) class MappingStorage(object): """In-memory storage implementation @@ -148,9 +150,26 @@ def __len__(self): load = ZODB.utils.load_current + # ZODB.interfaces.IStorageLoadAt + @ZODB.utils.locked(opened) + def loadAt(self, oid, at): + z64 = ZODB.utils.z64 + tid_data = self._data.get(oid) + if not tid_data: + return None, z64 + if at == z64: + return None, z64 + tids_at = tid_data.keys(None, at) + if not tids_at: + return None, z64 + serial = tids_at[-1] + return tid_data[serial], serial + # ZODB.interfaces.IStorage @ZODB.utils.locked(opened) def loadBefore(self, oid, tid): + warnings.warn("loadBefore is deprecated - use loadAt instead", + DeprecationWarning, stacklevel=2) tid_data = self._data.get(oid) if tid_data: before = ZODB.utils.u64(tid) diff --git a/src/ZODB/blob.py b/src/ZODB/blob.py index 0a1420737..cf1b90e77 100644 --- a/src/ZODB/blob.py +++ b/src/ZODB/blob.py @@ -866,10 +866,10 @@ def undo(self, serial_id, transaction): for oid in self.fshelper.getOIDsForSerial(serial_id): # we want to find the serial id of the previous revision # of this blob object. - load_result = self.loadBefore(oid, serial_id) - - if load_result is None: + at_before = utils.p64(utils.u64(serial_id)-1) + _, serial_before = utils.loadAt(self, oid, at_before) + if serial_before == utils.z64: # There was no previous revision of this blob # object. The blob was created in the transaction # represented by serial_id. We copy the blob data @@ -884,7 +884,6 @@ def undo(self, serial_id, transaction): # transaction implied by "serial_id". We copy the blob # data to a new file that references the undo transaction # in case a user wishes to undo this undo. - data, serial_before, serial_after = load_result orig_fn = self.fshelper.getBlobFilename(oid, serial_before) new_fn = self.fshelper.getBlobFilename(oid, undo_serial) with open(orig_fn, "rb") as orig: diff --git a/src/ZODB/historical_connections.rst b/src/ZODB/historical_connections.rst index 14a6d4f25..a294718b2 100644 --- a/src/ZODB/historical_connections.rst +++ b/src/ZODB/historical_connections.rst @@ -36,7 +36,7 @@ development continues on a "development" head. A database can be opened historically ``at`` or ``before`` a given transaction serial or datetime. Here's a simple example. It should work with any storage -that supports ``loadBefore``. +that supports ``loadAt`` or ``loadBefore``. We'll begin our example with a fairly standard set up. We @@ -138,10 +138,9 @@ root. >>> historical_conn.root()['first']['count'] 0 -In fact, ``at`` arguments are translated into ``before`` values because the -underlying mechanism is a storage's loadBefore method. When you look at a -connection's ``before`` attribute, it is normalized into a ``before`` serial, -no matter what you pass into ``db.open``. +In fact, ``at`` arguments are translated into ``before`` values. +When you look at a connection's ``before`` attribute, it is normalized into a +``before`` serial, no matter what you pass into ``db.open``. >>> print(conn.before) None diff --git a/src/ZODB/interfaces.py b/src/ZODB/interfaces.py index e73016d78..ceeab9415 100644 --- a/src/ZODB/interfaces.py +++ b/src/ZODB/interfaces.py @@ -710,6 +710,9 @@ def __len__(): def loadBefore(oid, tid): """Load the object data written before a transaction id + ( This method is deprecated and kept for backward-compatibility. + Please use loadAt instead. ) + If there isn't data before the object before the given transaction, then None is returned, otherwise three values are returned: @@ -886,6 +889,24 @@ def prefetch(oids, tid): more than once. """ +class IStorageLoadAt(Interface): + + def loadAt(oid, at): # -> (data, serial) + """Load object data as observed at given database state. + + loadAt returns data for object with given object ID as observed by + database state ≤ at. Two values are returned: + + - The data record, + - The transaction ID of the data record. + + If the object does not exist, or is deleted as of `at` database state, + loadAt returns data=None, and serial indicates transaction ID of the + most recent deletion done in transaction with ID ≤ at, or null tid if + there is no such deletion. + + Note: no POSKeyError is raised even if object id is not in the storage. + """ class IMultiCommitStorage(IStorage): """A multi-commit storage can commit multiple transactions at once. diff --git a/src/ZODB/mvccadapter.py b/src/ZODB/mvccadapter.py index 56e95c09f..5b3088993 100644 --- a/src/ZODB/mvccadapter.py +++ b/src/ZODB/mvccadapter.py @@ -10,7 +10,7 @@ import zope.interface from . import interfaces, serialize, POSException -from .utils import p64, u64, Lock, oid_repr, tid_repr +from .utils import p64, u64, z64, maxtid, Lock, loadAt, oid_repr, tid_repr class Base(object): @@ -99,7 +99,7 @@ class MVCCAdapterInstance(Base): 'checkCurrentSerialInTransaction', 'tpc_abort', ) - _start = None # Transaction start time + _start = None # Transaction start time (before) _ltid = b'' # Last storage transaction id def __init__(self, base): @@ -151,8 +151,20 @@ def poll_invalidations(self): def load(self, oid): assert self._start is not None - r = self._storage.loadBefore(oid, self._start) - if r is None: + at = p64(u64(self._start)-1) + data, serial = loadAt(self._storage, oid, at) + if data is None: + # raise POSKeyError if object does not exist at all + # TODO raise POSKeyError always and switch to raising ReadOnlyError only when + # actually detecting that load is being affected by simultaneous pack (see below). + if serial == z64: + # XXX second call to loadAt - it will become unneeded once we + # switch to raising POSKeyError. + _, serial_exists = loadAt(self._storage, oid, maxtid) + if serial_exists == z64: + # object does not exist at all + raise POSException.POSKeyError(oid) + # object was deleted or not-yet-created. # raise ReadConflictError - not - POSKeyError due to backward # compatibility: a pack(t+δ) could be running simultaneously to our @@ -174,11 +186,14 @@ def load(self, oid): # whether pack is/was actually running and its details, take that # into account, and raise ReadConflictError only in the presence of # database being simultaneously updated from back of its log. + # + # See https://github.com/zopefoundation/ZODB/pull/322 for + # preliminary steps in this direction. raise POSException.ReadConflictError( "load %s @%s: object deleted, likely by simultaneous pack" % (oid_repr(oid), tid_repr(p64(u64(self._start) - 1)))) - return r[:2] + return data, serial def prefetch(self, oids): try: @@ -257,10 +272,11 @@ def poll_invalidations(self): new_oid = pack = store = read_only_writer def load(self, oid, version=''): - r = self._storage.loadBefore(oid, self._before) - if r is None: + at = p64(u64(self._before)-1) + data, serial = loadAt(self._storage, oid, at) + if data is None: raise POSException.POSKeyError(oid) - return r[:2] + return data, serial class UndoAdapterInstance(Base): diff --git a/src/ZODB/tests/IExternalGC.test b/src/ZODB/tests/IExternalGC.test index 52983d36f..f2296bea0 100644 --- a/src/ZODB/tests/IExternalGC.test +++ b/src/ZODB/tests/IExternalGC.test @@ -16,6 +16,7 @@ A create_storage function is provided that creates a storage. >>> transaction.commit() >>> oid0 = conn.root()[0]._p_oid >>> oid1 = conn.root()[1]._p_oid + >>> atLive = conn.root()._p_serial >>> del conn.root()[0] >>> del conn.root()[1] >>> transaction.commit() @@ -66,9 +67,10 @@ Now if we try to load data for the objects, we get a POSKeyError: We can still get the data if we load before the time we deleted. - >>> storage.loadBefore(oid0, conn.root()._p_serial) == (p0, s0, tid) + >>> from ZODB.utils import loadAt, z64 + >>> loadAt(storage, oid0, atLive) == (p0, s0) True - >>> storage.loadBefore(oid1, conn.root()._p_serial) == (p1, s1, tid) + >>> loadAt(storage, oid1, atLive) == (p1, s1) True >>> with open(storage.loadBlob(oid1, s1)) as fp: fp.read() 'some data' @@ -92,15 +94,11 @@ gone: ... POSKeyError: ... - >>> storage.loadBefore(oid0, conn.root()._p_serial) # doctest: +ELLIPSIS - Traceback (most recent call last): - ... - POSKeyError: ... + >>> loadAt(storage, oid0, atLive) == (None, z64) + True - >>> storage.loadBefore(oid1, conn.root()._p_serial) # doctest: +ELLIPSIS - Traceback (most recent call last): - ... - POSKeyError: ... + >>> loadAt(storage, oid1, atLive) == (None, z64) + True >>> storage.loadBlob(oid1, s1) # doctest: +ELLIPSIS Traceback (most recent call last): diff --git a/src/ZODB/tests/MVCCMappingStorage.py b/src/ZODB/tests/MVCCMappingStorage.py index e87b0be80..4c76b12a9 100644 --- a/src/ZODB/tests/MVCCMappingStorage.py +++ b/src/ZODB/tests/MVCCMappingStorage.py @@ -46,6 +46,7 @@ def new_instance(self): inst.new_oid = self.new_oid inst.pack = self.pack inst.loadBefore = self.loadBefore + inst.loadAt = self.loadAt inst._ltid = self._ltid inst._main_lock = self._lock return inst diff --git a/src/ZODB/tests/hexstorage.py b/src/ZODB/tests/hexstorage.py index f27c0e3ac..b4f556a7f 100644 --- a/src/ZODB/tests/hexstorage.py +++ b/src/ZODB/tests/hexstorage.py @@ -39,6 +39,13 @@ def __init__(self, base): setattr(self, name, v) zope.interface.directlyProvides(self, zope.interface.providedBy(base)) + if hasattr(base, 'loadAt') and 'loadAt' not in self.copied_methods: + def loadAt(oid, at): + data, serial = self.base.loadAt(oid, at) + if data is not None: + data = unhexlify(data[2:]) + return data, serial + self.loadAt = loadAt def __getattr__(self, name): return getattr(self.base, name) @@ -130,7 +137,7 @@ class ServerHexStorage(HexStorage): """ copied_methods = HexStorage.copied_methods + ( - 'load', 'loadBefore', 'loadSerial', 'store', 'restore', + 'load', 'loadAt', 'loadBefore', 'loadSerial', 'store', 'restore', 'iterator', 'storeBlob', 'restoreBlob', 'record_iternext', ) diff --git a/src/ZODB/tests/testConnection.py b/src/ZODB/tests/testConnection.py index 7d36cca63..a579c98d8 100644 --- a/src/ZODB/tests/testConnection.py +++ b/src/ZODB/tests/testConnection.py @@ -1314,7 +1314,16 @@ def load(self, oid, version=''): raise TypeError('StubStorage does not support versions.') return self._data[oid] + def loadAt(self, oid, at): + try: + data, serial = self._transdata[oid] + except KeyError: + return None, z64 + return data, serial + def loadBefore(self, oid, tid): + warnings.warn("loadBefore is deprecated - use loadAt instead", + DeprecationWarning, stacklevel=2) return self._data[oid] + (None, ) def store(self, oid, serial, p, version, transaction): diff --git a/src/ZODB/tests/testDemoStorage.py b/src/ZODB/tests/testDemoStorage.py index 25d32e217..90d4cb185 100644 --- a/src/ZODB/tests/testDemoStorage.py +++ b/src/ZODB/tests/testDemoStorage.py @@ -23,6 +23,7 @@ StorageTestBase, Synchronization, ) +from ZODB.tests.MinPO import MinPO import os if os.environ.get('USE_ZOPE_TESTING_DOCTEST'): @@ -33,7 +34,9 @@ import re import transaction import unittest +import ZODB.Connection import ZODB.DemoStorage +import ZODB.FileStorage import ZODB.tests.hexstorage import ZODB.tests.util import ZODB.utils @@ -264,6 +267,101 @@ def load_before_base_storage_current(): >>> base.close() """ +# additional DemoStorage tests that do not fit into common DemoStorageTests setup. +class DemoStorageTests2(ZODB.tests.util.TestCase): + def checkLoadAfterDelete(self): + """Verify that DemoStorage correctly handles load requests for objects + deleted in read-write part of the storage. + + https://github.com/zopefoundation/ZODB/issues/318 + """ + FileStorage = ZODB.FileStorage.FileStorage + DemoStorage = ZODB.DemoStorage.DemoStorage + TransactionMetaData = ZODB.Connection.TransactionMetaData + + # mkbase prepares base part of the storage. + def mkbase(): # -> zbase + zbase = FileStorage("base.fs") + db = DB(zbase) + conn = db.open() + root = conn.root() + + root['obj'] = obj = MinPO(0) + transaction.commit() + + obj.value += 1 + transaction.commit() + + conn.close() + db.close() + zbase.close() + + zbase = FileStorage("base.fs", read_only=True) + return zbase + + # prepare base + overlay + zbase = mkbase() + zoverlay = FileStorage("overlay.fs") + zdemo = DemoStorage(base=zbase, changes=zoverlay) + + # overlay: modify obj and root + db = DB(zdemo) + conn = db.open() + root = conn.root() + obj = root['obj'] + oid = obj._p_oid + obj.value += 1 + # modify root as well so that there is root revision saved in overlay that points to obj + root['x'] = 1 + transaction.commit() + atLive = obj._p_serial + + # overlay: delete obj from root making it a garbage + del root['obj'] + transaction.commit() + atUnlink = root._p_serial + + # unmount DemoStorage + conn.close() + db.close() + zdemo.close() # closes zbase and zoverlay as well + del zbase, zoverlay + + # simulate GC on base+overlay + zoverlay = FileStorage("overlay.fs") + txn = transaction.get() + txn_meta = TransactionMetaData(txn.user, txn.description, txn.extension) + zoverlay.tpc_begin(txn_meta) + zoverlay.deleteObject(oid, atLive, txn_meta) + zoverlay.tpc_vote(txn_meta) + atGC = zoverlay.tpc_finish(txn_meta) + + # remount base+overlay + zbase = FileStorage("base.fs", read_only=True) + zdemo = ZODB.DemoStorage.DemoStorage(base=zbase, changes=zoverlay) + db = DB(zdemo) + + # verify: + # load(obj, atLive) -> 2 + # load(obj, atUnlink) -> 2 (garbage, but still in DB) + # load(obj, atGC) -> POSKeyError, not 1 from base + def getObjAt(at): + conn = db.open(at=at) + obj = conn.get(oid) + self.assertIsInstance(obj, MinPO) + v = obj.value + conn.close() + return v + + self.assertEqual(getObjAt(atLive), 2) + self.assertEqual(getObjAt(atUnlink), 2) + self.assertRaises(ZODB.POSException.POSKeyError, getObjAt, atGC) + + # end + db.close() + zdemo.close() # closes zbase and zoverlay as well + + def test_suite(): suite = unittest.TestSuite(( doctest.DocTestSuite( @@ -285,4 +383,5 @@ def test_suite(): 'check')) suite.addTest(unittest.makeSuite(DemoStorageWrappedAroundHexMappingStorage, 'check')) + suite.addTest(unittest.makeSuite(DemoStorageTests2, 'check')) return suite diff --git a/src/ZODB/tests/test_storage.py b/src/ZODB/tests/test_storage.py index 6cb47ccb1..ae26a3abe 100644 --- a/src/ZODB/tests/test_storage.py +++ b/src/ZODB/tests/test_storage.py @@ -20,10 +20,11 @@ """ import bisect import unittest +import warnings from ZODB.BaseStorage import BaseStorage from ZODB import POSException -from ZODB.utils import z64 +from ZODB.utils import p64, u64, z64 from ZODB.tests import StorageTestBase from ZODB.tests import BasicStorage, MTStorage, Synchronization @@ -105,6 +106,11 @@ def _finish(self, tid, u, d, e): self._ltid = self._tid def loadBefore(self, the_oid, the_tid): + warnings.warn("loadBefore is deprecated - use loadAt instead", + DeprecationWarning, stacklevel=2) + return self._loadBefore(the_oid, the_tid) + + def _loadBefore(self, the_oid, the_tid): # It's okay if loadBefore() is really expensive, because this # storage is just used for testing. with self._lock: @@ -126,6 +132,17 @@ def loadBefore(self, the_oid, the_tid): return self._index[(the_oid, tid)], tid, end_tid + def loadAt(self, oid, at): + try: + r = self._loadBefore(oid, p64(u64(at)+1)) + except KeyError: + return None, z64 + if r is None: + # not-yet created (deleteObject not supported -> serial=0) + return None, z64 + data, serial, _ = r + return data, serial + def loadSerial(self, oid, serial): return self._index[(oid, serial)] diff --git a/src/ZODB/tests/testmvcc.py b/src/ZODB/tests/testmvcc.py index d8f13c8ca..f58f35cfb 100644 --- a/src/ZODB/tests/testmvcc.py +++ b/src/ZODB/tests/testmvcc.py @@ -35,7 +35,7 @@ ***IMPORTANT***: The MVCC approach has changed since these tests were originally written. The new approach is much simpler because we no longer call load to get the current state of an object. We call -loadBefore instead, having gotten a transaction time at the start of a +loadAt instead, having gotten a transaction time at the start of a transaction. As a result, the rhythm of the tests is a little odd, because we no longer need to probe a complex dance that doesn't exist any more. @@ -290,7 +290,7 @@ Now deactivate "b" in the first connection, and (re)fetch it. The first connection should still see 1, due to MVCC, but to get this old state -TmpStore needs to handle the loadBefore() method. +TmpStore needs to handle the loadAt() or loadBefore() methods. >>> r1["b"]._p_deactivate() @@ -322,7 +322,7 @@ Rather than add all the complexity of ZEO to these tests, the MinimalMemoryStorage has a hook. We'll write a subclass that will -deliver an invalidation when it loads (or loadBefore's) an object. +deliver an invalidation when it loads (or loadAt's) an object. The hook allows us to test the Connection code. >>> class TestStorage(MinimalMemoryStorage): diff --git a/src/ZODB/utils.py b/src/ZODB/utils.py index 1df2460d5..199950b58 100644 --- a/src/ZODB/utils.py +++ b/src/ZODB/utils.py @@ -17,6 +17,7 @@ import sys import time import threading +import warnings from binascii import hexlify, unhexlify from tempfile import mkstemp @@ -382,8 +383,51 @@ def load_current(storage, oid, version=''): some time in the future. """ assert not version - r = storage.loadBefore(oid, maxtid) - if r is None: + data, serial = loadAt(storage, oid, maxtid) + if data is None: raise ZODB.POSException.POSKeyError(oid) - assert r[2] is None - return r[:2] + return data, serial + + +_loadAtWarned = set() # of storage class +def loadAt(storage, oid, at): + """loadAt provides IStorageLoadAt semantic for all storages. + + Storages that do not implement loadAt are served via loadBefore. + """ + load_at = getattr(storage, 'loadAt', None) + if load_at is not None: + return load_at(oid, at) + + # storage does not provide IStorageLoadAt - warn + fall back to loadBefore + if type(storage) not in _loadAtWarned: + # there is potential race around _loadAtWarned access, but due to the + # GIL this race cannot result in that set corruption, and can only lead + # to us emitting the warning twice instead of just once. + # -> do not spend CPU on lock and just ignore it. + warnings.warn( + "FIXME %s does not provide loadAt - emulating it via loadBefore, but ...\n" + "\t... 1) access will be potentially slower, and\n" + "\t... 2) not full semantic of loadAt could be provided.\n" + "\t... this can lead to data corruption.\n" + "\t... -> please see https://github.com/zopefoundation/ZODB/issues/318 for details." % + type(storage), DeprecationWarning) + _loadAtWarned.add(type(storage)) + + if at == maxtid: + before = at + else: + before = p64(u64(at)+1) + + try: + r = storage.loadBefore(oid, before) + except ZODB.POSException.POSKeyError: + return (None, z64) # object does not exist at all + + if r is None: + # object was removed; however loadBefore does not tell when. + # return serial=0 - this is the "data corruption" case talked about above. + return (None, z64) + + data, serial, next_serial = r + return (data, serial) From 49f5959e73e6ba0d78187d572275eade27b61fc8 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Tue, 16 Mar 2021 17:24:14 +0300 Subject: [PATCH 02/12] DemoStorage: Add support for deleteObject (IExternalGC) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that it is generally possible to zodbrestore[1,2] zodbdumped[3] database/transactions into DemoStorage. Because without this patch, when dump contains deletion records, e.g. txn 0285cbacc06d3a4c " " user "" description "" extension "" obj 0000000000000007 delete it fails like this: Traceback (most recent call last): ... File "/home/kirr/src/wendelin/z/zodbtools/zodbtools/zodbrestore.py", line 94, in main zodbrestore(stor, asbinstream(sys.stdin), _) File "/home/kirr/src/wendelin/z/zodbtools/zodbtools/zodbrestore.py", line 43, in zodbrestore zodbcommit(stor, at, txn) File "/home/kirr/src/wendelin/z/zodbtools/zodbtools/zodbcommit.py", line 131, in zodbcommit _() File "/home/kirr/src/wendelin/z/zodbtools/zodbtools/zodbcommit.py", line 118, in _ stor.deleteObject(obj.oid, current_serial(obj.oid), txn) AttributeError: 'DemoStorage' object has no attribute 'deleteObject' demo_test.go:105: demo:(file:///tmp/demo470143633/δ0285cbac258bf266/base.fs)/(file:///tmp/demo470143633/δ0285cbac258bf266/δ.fs): zpyrestore: exit status 1 To be able to implement DemoStorage.deleteObject, we have to fix FileStorage.deleteObject first: currently FileStorage.deleteObject raises POSKeyError if an object is not present in the database, but for situation where - demo=base+δ, - object is present in base, and - object is not present in δ it creates a problem because there is no way to add whiteout record for the object into δ. This change should be generally good; let me quote https://lab.nexedi.com/kirr/neo/commit/543041a3 for why: ---- 8< ---- Even though currently it is not possible to create such data record via FileStorage(py).deleteObject (because it raises POSKeyError if there is no previous object revision), being able to use such data records is semantically useful in overlayed DemoStorage settings, where δ part marks an object that exists only in base with delete record whiteout. It is also generally meaningful to be able to create "delete" record even if object was not previously existing: "deleteObject" is actually similar to "store" (and so should be better named as "storeDelete"). If one wants to store deletion, there should not be a reason to reject it, because deleteObject already has seatbelt in the form of oldserial, and if the user calls deleteObject(oid, oldserial=z64), he/she is already telling that "I know that this object does not exist in this storage (oldserial=z64), but still please create a deletion record for it". Once again this is useful in overlayed DemoStorage settings described above. For the reference, such whiteout deletion records pass ZODB/scripts/fstest just fine. ---- 8< ---- DemoStorage.deleteObject implementation is straightforward, but builds on loadAt[4] as precondition. /cc @jimfulton, @jamadden, @perrinjerome [1] https://lab.nexedi.com/nexedi/zodbtools/blob/129afa67/zodbtools/zodbrestore.py [2] https://lab.nexedi.com/nexedi/zodbtools/merge_requests/19 [3] https://lab.nexedi.com/nexedi/zodbtools/blob/129afa67/zodbtools/zodbdump.py [4] https://github.com/zopefoundation/ZODB/pull/323 --- src/ZODB/DemoStorage.py | 40 +++++++++++++++++++++++++++++ src/ZODB/FileStorage/FileStorage.py | 7 ++--- src/ZODB/tests/testDemoStorage.py | 8 ++++++ 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/src/ZODB/DemoStorage.py b/src/ZODB/DemoStorage.py index 625fd570c..8cf315fcf 100644 --- a/src/ZODB/DemoStorage.py +++ b/src/ZODB/DemoStorage.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- ############################################################################## # # Copyright (c) Zope Corporation and Contributors. @@ -108,11 +109,16 @@ def __init__(self, name=None, base=None, changes=None, if close_changes_on_close is None: close_changes_on_close = False else: + self._temporary_changes = False if ZODB.interfaces.IBlobStorage.providedBy(changes): zope.interface.alsoProvides(self, ZODB.interfaces.IBlobStorage) if close_changes_on_close is None: close_changes_on_close = True + if ZODB.interfaces.IExternalGC.providedBy(changes): + zope.interface.alsoProvides(self, ZODB.interfaces.IExternalGC) + self.deleteObject = self._storeDelete + self.changes = changes self.close_changes_on_close = close_changes_on_close @@ -376,6 +382,40 @@ def store(self, oid, serial, data, version, transaction): else: self.changes.store(oid, serial, data, '', transaction) + # _storeDelete serves deleteObject when .changes implements IExternalGC. + def _storeDelete(self, oid, oldserial, transaction): + if transaction is not self._transaction: + raise ZODB.POSException.StorageTransactionError(self, transaction) + + # oldserial ∈ changes -> changes.deleteObject + baseHead = self.base.lastTransaction() + if oldserial > baseHead: + self.changes.deleteObject(oid, oldserial, transaction) + return + + # oldserial ∈ base -> find out it is indeed the latest there and then + # call changes.deleteObject(oldserial=z64) + + _, serial = ZODB.utils.loadAt(self.changes, oid, self.changes.lastTransaction()) + if serial != ZODB.utils.z64: + # object has data or deletion record in changes + raise ZODB.POSException.ConflictError(oid=oid, serials=(serial, oldserial)) + + _, serial = ZODB.utils.loadAt(self.base, oid, baseHead) + if serial != oldserial: + raise ZODB.POSException.ConflictError(oid=oid, serials=(serial, oldserial)) + + # object has no data/deletion record in changes and its latest revision in base == oldserial + # -> changes.deleteObject(oldserial=z64) + correct oldserial back on conflict. + try: + self.changes.deleteObject(oid, ZODB.utils.z64, transaction) + except ZODB.POSException.ConflictError as e: + assert len(e.serials) == 2 + assert e.serials[1] == ZODB.utils.z64 + e.serials = (e.serials[0], oldserial) + raise + + def storeBlob(self, oid, oldserial, data, blobfilename, version, transaction): assert version=='', "versions aren't supported" diff --git a/src/ZODB/FileStorage/FileStorage.py b/src/ZODB/FileStorage/FileStorage.py index 4f6d1fd44..bd0bb1145 100644 --- a/src/ZODB/FileStorage/FileStorage.py +++ b/src/ZODB/FileStorage/FileStorage.py @@ -661,9 +661,10 @@ def deleteObject(self, oid, oldserial, transaction): with self._lock: old = self._index_get(oid, 0) if not old: - raise POSKeyError(oid) - h = self._read_data_header(old, oid) - committed_tid = h.tid + committed_tid = z64 + else: + h = self._read_data_header(old, oid) + committed_tid = h.tid if oldserial != committed_tid: raise ConflictError( diff --git a/src/ZODB/tests/testDemoStorage.py b/src/ZODB/tests/testDemoStorage.py index 90d4cb185..16c2c98a2 100644 --- a/src/ZODB/tests/testDemoStorage.py +++ b/src/ZODB/tests/testDemoStorage.py @@ -384,4 +384,12 @@ def test_suite(): suite.addTest(unittest.makeSuite(DemoStorageWrappedAroundHexMappingStorage, 'check')) suite.addTest(unittest.makeSuite(DemoStorageTests2, 'check')) + + def demo_for_gctest(): + from ZODB.FileStorage import FileStorage + base = ZODB.FileStorage.FileStorage('data.fs', blob_dir="data_blobs") + changes = ZODB.FileStorage.FileStorage('changes.fs', blob_dir="changes_blobs", pack_gc=False) + return ZODB.DemoStorage.DemoStorage(base=base, changes=changes) + suite.addTest(PackableStorage.IExternalGC_suite(demo_for_gctest)) + return suite From 805bf36f4442491599a6b173e46e7972c608c7fe Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Tue, 4 May 2021 16:35:25 +0300 Subject: [PATCH 03/12] loadAt -> loadBeforeEx @d-maurer suggests[1]: The ZODB logic relating to historical data (including MVCC) was largely centered around before. You have changed this to at - requiring wide spread modifications. I would much prefer to keep the before centered approach... (https://github.com/zopefoundation/ZODB/pull/323#pullrequestreview-650963363) So let's change "at"-based logic to "before"-based logic and rename the new method from loadAt to loadBeforeEx. --- src/ZODB/BaseStorage.py | 5 +++-- src/ZODB/DB.py | 3 ++- src/ZODB/DemoStorage.py | 19 +++++----------- src/ZODB/FileStorage/FileStorage.py | 16 +++++++------- src/ZODB/MappingStorage.py | 11 +++++----- src/ZODB/blob.py | 3 +-- src/ZODB/historical_connections.rst | 9 ++++---- src/ZODB/interfaces.py | 18 +++++++-------- src/ZODB/mvccadapter.py | 14 +++++------- src/ZODB/tests/IExternalGC.test | 11 +++++----- src/ZODB/tests/MVCCMappingStorage.py | 2 +- src/ZODB/tests/hexstorage.py | 10 ++++----- src/ZODB/tests/testConnection.py | 4 ++-- src/ZODB/tests/test_storage.py | 8 +++---- src/ZODB/tests/testmvcc.py | 6 ++--- src/ZODB/utils.py | 33 ++++++++++++---------------- 16 files changed, 80 insertions(+), 92 deletions(-) diff --git a/src/ZODB/BaseStorage.py b/src/ZODB/BaseStorage.py index 0f5140f5b..0cc27a25f 100644 --- a/src/ZODB/BaseStorage.py +++ b/src/ZODB/BaseStorage.py @@ -59,7 +59,8 @@ class BaseStorage(UndoLogCompatible): If it stores multiple revisions, it should implement loadSerial() - loadAt() + loadBefore() + loadBeforeEx() Each storage will have two locks that are accessed via lock acquire and release methods bound to the instance. (Yuck.) @@ -267,7 +268,7 @@ def loadSerial(self, oid, serial): raise POSException.Unsupported( "Retrieval of historical revisions is not supported") - # do not provide loadAt/loadBefore here in BaseStorage - if child forgets + # do not provide loadBeforeEx/loadBefore here in BaseStorage - if child forgets # to override it - storage will always return "no data" instead of failing. def copyTransactionsFrom(self, other, verbose=0): diff --git a/src/ZODB/DB.py b/src/ZODB/DB.py index 4981cc1d2..c9065ef24 100644 --- a/src/ZODB/DB.py +++ b/src/ZODB/DB.py @@ -726,7 +726,8 @@ def open(self, transaction_manager=None, at=None, before=None): - `before`: like `at`, but opens the readonly state before the tid or datetime. """ - # `at` is normalized to `before`. + # `at` is normalized to `before`, since we use storage.loadBeforeEx + # as the underlying implementation of both. before = getTID(at, before) if (before is not None and before > self.lastTransaction() and diff --git a/src/ZODB/DemoStorage.py b/src/ZODB/DemoStorage.py index 625fd570c..4f77fe53c 100644 --- a/src/ZODB/DemoStorage.py +++ b/src/ZODB/DemoStorage.py @@ -218,34 +218,27 @@ def __len__(self): # still want load for old clients (e.g. zeo servers) load = load_current - def loadAt(self, oid, at): - data, serial = ZODB.utils.loadAt(self.changes, oid, at) + def loadBeforeEx(self, oid, before): + data, serial = ZODB.utils.loadBeforeEx(self.changes, oid, before) if (data is not None) or (serial != ZODB.utils.z64): # object is present in changes either as data or deletion record. return data, serial # object is not present in changes at all - use base - return ZODB.utils.loadAt(self.base, oid, at) + return ZODB.utils.loadBeforeEx(self.base, oid, before) def loadBefore(self, oid, before): - warnings.warn("loadBefore is deprecated - use loadAt instead", + warnings.warn("loadBefore is deprecated - use loadBeforeEx instead", DeprecationWarning, stacklevel=2) - p64 = ZODB.utils.p64 - u64 = ZODB.utils.u64 - if before in (maxtid, ZODB.utils.z64): - at = before - else: - at = p64(u64(before)-1) - - data, serial = self.loadAt(oid, at) + data, serial = self.loadBeforeEx(oid, before) # find out next_serial. # it is ok to use dumb/slow implementation since loadBefore should not # be used and is provided only for backward compatibility. next_serial = maxtid while 1: - _, s = self.loadAt(oid, p64(u64(next_serial)-1)) + _, s = self.loadBeforeEx(oid, next_serial) assert s >= serial if s == serial: # found - next_serial is serial of the next data record diff --git a/src/ZODB/FileStorage/FileStorage.py b/src/ZODB/FileStorage/FileStorage.py index 4f6d1fd44..c4e1b9522 100644 --- a/src/ZODB/FileStorage/FileStorage.py +++ b/src/ZODB/FileStorage/FileStorage.py @@ -58,7 +58,7 @@ from ZODB.interfaces import IStorageIteration from ZODB.interfaces import IStorageRestoreable from ZODB.interfaces import IStorageUndoable -from ZODB.interfaces import IStorageLoadAt +from ZODB.interfaces import IStorageLoadBeforeEx from ZODB.POSException import ConflictError from ZODB.POSException import MultipleUndoErrors from ZODB.POSException import POSKeyError @@ -135,7 +135,7 @@ def __init__(self, afile): IStorageCurrentRecordIteration, IExternalGC, IStorage, - IStorageLoadAt, + IStorageLoadBeforeEx, ) class FileStorage( FileStorageFormatter, @@ -562,8 +562,8 @@ def loadSerial(self, oid, serial): else: return self._loadBack_impl(oid, h.back)[0] - def loadAt(self, oid, at): - """loadAt implements IStorageLoadAt.""" + def loadBeforeEx(self, oid, before): + """loadBeforeEx implements IStorageLoadBeforeEx.""" with self._files.get() as _file: try: pos = self._lookup_pos(oid) @@ -573,14 +573,14 @@ def loadAt(self, oid, at): while 1: h = self._read_data_header(pos, oid, _file) - if h.tid <= at: + if h.tid < before: break pos = h.prev if not pos: - # object not yet created as of at + # object not yet created as of >> historical_conn.root()['first']['count'] 0 -In fact, ``at`` arguments are translated into ``before`` values. -When you look at a connection's ``before`` attribute, it is normalized into a -``before`` serial, no matter what you pass into ``db.open``. +In fact, ``at`` arguments are translated into ``before`` values because the +underlying mechanism is a storage's loadBeforeEx method. When you look at a +connection's ``before`` attribute, it is normalized into a ``before`` serial, +no matter what you pass into ``db.open``. >>> print(conn.before) None diff --git a/src/ZODB/interfaces.py b/src/ZODB/interfaces.py index ceeab9415..d4de1ebbf 100644 --- a/src/ZODB/interfaces.py +++ b/src/ZODB/interfaces.py @@ -711,7 +711,7 @@ def loadBefore(oid, tid): """Load the object data written before a transaction id ( This method is deprecated and kept for backward-compatibility. - Please use loadAt instead. ) + Please use loadBeforeEx instead. ) If there isn't data before the object before the given transaction, then None is returned, otherwise three values are @@ -889,20 +889,20 @@ def prefetch(oids, tid): more than once. """ -class IStorageLoadAt(Interface): +class IStorageLoadBeforeEx(Interface): - def loadAt(oid, at): # -> (data, serial) - """Load object data as observed at given database state. + def loadBeforeEx(oid, before): # -> (data, serial) + """Load object data as observed before given database state. - loadAt returns data for object with given object ID as observed by - database state ≤ at. Two values are returned: + loadBeforeEx returns data for object with given object ID as observed by + most recent database transaction with ID < before. Two values are returned: - The data record, - The transaction ID of the data record. - If the object does not exist, or is deleted as of `at` database state, - loadAt returns data=None, and serial indicates transaction ID of the - most recent deletion done in transaction with ID ≤ at, or null tid if + If the object does not exist, or is deleted as of requested database state, + loadBeforeEx returns data=None, and serial indicates transaction ID of the + most recent deletion done in transaction with ID < before, or null tid if there is no such deletion. Note: no POSKeyError is raised even if object id is not in the storage. diff --git a/src/ZODB/mvccadapter.py b/src/ZODB/mvccadapter.py index 5b3088993..582176e77 100644 --- a/src/ZODB/mvccadapter.py +++ b/src/ZODB/mvccadapter.py @@ -10,7 +10,7 @@ import zope.interface from . import interfaces, serialize, POSException -from .utils import p64, u64, z64, maxtid, Lock, loadAt, oid_repr, tid_repr +from .utils import p64, u64, z64, maxtid, Lock, loadBeforeEx, oid_repr, tid_repr class Base(object): @@ -99,7 +99,7 @@ class MVCCAdapterInstance(Base): 'checkCurrentSerialInTransaction', 'tpc_abort', ) - _start = None # Transaction start time (before) + _start = None # Transaction start time _ltid = b'' # Last storage transaction id def __init__(self, base): @@ -151,16 +151,15 @@ def poll_invalidations(self): def load(self, oid): assert self._start is not None - at = p64(u64(self._start)-1) - data, serial = loadAt(self._storage, oid, at) + data, serial = loadBeforeEx(self._storage, oid, self._start) if data is None: # raise POSKeyError if object does not exist at all # TODO raise POSKeyError always and switch to raising ReadOnlyError only when # actually detecting that load is being affected by simultaneous pack (see below). if serial == z64: - # XXX second call to loadAt - it will become unneeded once we + # XXX second call to loadBeforeEx - it will become unneeded once we # switch to raising POSKeyError. - _, serial_exists = loadAt(self._storage, oid, maxtid) + _, serial_exists = loadBeforeEx(self._storage, oid, maxtid) if serial_exists == z64: # object does not exist at all raise POSException.POSKeyError(oid) @@ -272,8 +271,7 @@ def poll_invalidations(self): new_oid = pack = store = read_only_writer def load(self, oid, version=''): - at = p64(u64(self._before)-1) - data, serial = loadAt(self._storage, oid, at) + data, serial = loadBeforeEx(self._storage, oid, self._before) if data is None: raise POSException.POSKeyError(oid) return data, serial diff --git a/src/ZODB/tests/IExternalGC.test b/src/ZODB/tests/IExternalGC.test index f2296bea0..050fd182c 100644 --- a/src/ZODB/tests/IExternalGC.test +++ b/src/ZODB/tests/IExternalGC.test @@ -16,7 +16,6 @@ A create_storage function is provided that creates a storage. >>> transaction.commit() >>> oid0 = conn.root()[0]._p_oid >>> oid1 = conn.root()[1]._p_oid - >>> atLive = conn.root()._p_serial >>> del conn.root()[0] >>> del conn.root()[1] >>> transaction.commit() @@ -67,10 +66,10 @@ Now if we try to load data for the objects, we get a POSKeyError: We can still get the data if we load before the time we deleted. - >>> from ZODB.utils import loadAt, z64 - >>> loadAt(storage, oid0, atLive) == (p0, s0) + >>> from ZODB.utils import loadBeforeEx, z64 + >>> loadBeforeEx(storage, oid0, conn.root()._p_serial) == (p0, s0) True - >>> loadAt(storage, oid1, atLive) == (p1, s1) + >>> loadBeforeEx(storage, oid1, conn.root()._p_serial) == (p1, s1) True >>> with open(storage.loadBlob(oid1, s1)) as fp: fp.read() 'some data' @@ -94,10 +93,10 @@ gone: ... POSKeyError: ... - >>> loadAt(storage, oid0, atLive) == (None, z64) + >>> loadBeforeEx(storage, oid0, conn.root()._p_serial) == (None, z64) True - >>> loadAt(storage, oid1, atLive) == (None, z64) + >>> loadBeforeEx(storage, oid1, conn.root()._p_serial) == (None, z64) True >>> storage.loadBlob(oid1, s1) # doctest: +ELLIPSIS diff --git a/src/ZODB/tests/MVCCMappingStorage.py b/src/ZODB/tests/MVCCMappingStorage.py index 4c76b12a9..46aaf1842 100644 --- a/src/ZODB/tests/MVCCMappingStorage.py +++ b/src/ZODB/tests/MVCCMappingStorage.py @@ -46,7 +46,7 @@ def new_instance(self): inst.new_oid = self.new_oid inst.pack = self.pack inst.loadBefore = self.loadBefore - inst.loadAt = self.loadAt + inst.loadBeforeEx = self.loadBeforeEx inst._ltid = self._ltid inst._main_lock = self._lock return inst diff --git a/src/ZODB/tests/hexstorage.py b/src/ZODB/tests/hexstorage.py index b4f556a7f..25ff7655f 100644 --- a/src/ZODB/tests/hexstorage.py +++ b/src/ZODB/tests/hexstorage.py @@ -39,13 +39,13 @@ def __init__(self, base): setattr(self, name, v) zope.interface.directlyProvides(self, zope.interface.providedBy(base)) - if hasattr(base, 'loadAt') and 'loadAt' not in self.copied_methods: - def loadAt(oid, at): - data, serial = self.base.loadAt(oid, at) + if hasattr(base, 'loadBeforeEx') and 'loadBeforeEx' not in self.copied_methods: + def loadBeforeEx(oid, before): + data, serial = self.base.loadBeforeEx(oid, before) if data is not None: data = unhexlify(data[2:]) return data, serial - self.loadAt = loadAt + self.loadBeforeEx = loadBeforeEx def __getattr__(self, name): return getattr(self.base, name) @@ -137,7 +137,7 @@ class ServerHexStorage(HexStorage): """ copied_methods = HexStorage.copied_methods + ( - 'load', 'loadAt', 'loadBefore', 'loadSerial', 'store', 'restore', + 'load', 'loadBeforeEx', 'loadBefore', 'loadSerial', 'store', 'restore', 'iterator', 'storeBlob', 'restoreBlob', 'record_iternext', ) diff --git a/src/ZODB/tests/testConnection.py b/src/ZODB/tests/testConnection.py index a579c98d8..803eb183d 100644 --- a/src/ZODB/tests/testConnection.py +++ b/src/ZODB/tests/testConnection.py @@ -1314,7 +1314,7 @@ def load(self, oid, version=''): raise TypeError('StubStorage does not support versions.') return self._data[oid] - def loadAt(self, oid, at): + def loadBeforeEx(self, oid, before): try: data, serial = self._transdata[oid] except KeyError: @@ -1322,7 +1322,7 @@ def loadAt(self, oid, at): return data, serial def loadBefore(self, oid, tid): - warnings.warn("loadBefore is deprecated - use loadAt instead", + warnings.warn("loadBefore is deprecated - use loadBeforeEx instead", DeprecationWarning, stacklevel=2) return self._data[oid] + (None, ) diff --git a/src/ZODB/tests/test_storage.py b/src/ZODB/tests/test_storage.py index ae26a3abe..f60831761 100644 --- a/src/ZODB/tests/test_storage.py +++ b/src/ZODB/tests/test_storage.py @@ -24,7 +24,7 @@ from ZODB.BaseStorage import BaseStorage from ZODB import POSException -from ZODB.utils import p64, u64, z64 +from ZODB.utils import z64 from ZODB.tests import StorageTestBase from ZODB.tests import BasicStorage, MTStorage, Synchronization @@ -106,7 +106,7 @@ def _finish(self, tid, u, d, e): self._ltid = self._tid def loadBefore(self, the_oid, the_tid): - warnings.warn("loadBefore is deprecated - use loadAt instead", + warnings.warn("loadBefore is deprecated - use loadBeforeEx instead", DeprecationWarning, stacklevel=2) return self._loadBefore(the_oid, the_tid) @@ -132,9 +132,9 @@ def _loadBefore(self, the_oid, the_tid): return self._index[(the_oid, tid)], tid, end_tid - def loadAt(self, oid, at): + def loadBeforeEx(self, oid, before): try: - r = self._loadBefore(oid, p64(u64(at)+1)) + r = self._loadBefore(oid, before) except KeyError: return None, z64 if r is None: diff --git a/src/ZODB/tests/testmvcc.py b/src/ZODB/tests/testmvcc.py index f58f35cfb..963643863 100644 --- a/src/ZODB/tests/testmvcc.py +++ b/src/ZODB/tests/testmvcc.py @@ -35,7 +35,7 @@ ***IMPORTANT***: The MVCC approach has changed since these tests were originally written. The new approach is much simpler because we no longer call load to get the current state of an object. We call -loadAt instead, having gotten a transaction time at the start of a +loadBeforeEx instead, having gotten a transaction time at the start of a transaction. As a result, the rhythm of the tests is a little odd, because we no longer need to probe a complex dance that doesn't exist any more. @@ -290,7 +290,7 @@ Now deactivate "b" in the first connection, and (re)fetch it. The first connection should still see 1, due to MVCC, but to get this old state -TmpStore needs to handle the loadAt() or loadBefore() methods. +TmpStore needs to handle the loadBeforeEx() or loadBefore() methods. >>> r1["b"]._p_deactivate() @@ -322,7 +322,7 @@ Rather than add all the complexity of ZEO to these tests, the MinimalMemoryStorage has a hook. We'll write a subclass that will -deliver an invalidation when it loads (or loadAt's) an object. +deliver an invalidation when it loads (or loadBeforeEx's) an object. The hook allows us to test the Connection code. >>> class TestStorage(MinimalMemoryStorage): diff --git a/src/ZODB/utils.py b/src/ZODB/utils.py index 199950b58..9393301d0 100644 --- a/src/ZODB/utils.py +++ b/src/ZODB/utils.py @@ -383,41 +383,36 @@ def load_current(storage, oid, version=''): some time in the future. """ assert not version - data, serial = loadAt(storage, oid, maxtid) + data, serial = loadBeforeEx(storage, oid, maxtid) if data is None: raise ZODB.POSException.POSKeyError(oid) return data, serial -_loadAtWarned = set() # of storage class -def loadAt(storage, oid, at): - """loadAt provides IStorageLoadAt semantic for all storages. +_loadBeforeExWarned = set() # of storage class +def loadBeforeEx(storage, oid, before): + """loadBeforeEx provides IStorageLoadBeforeEx semantic for all storages. - Storages that do not implement loadAt are served via loadBefore. + Storages that do not implement loadBeforeEx are served via loadBefore. """ - load_at = getattr(storage, 'loadAt', None) - if load_at is not None: - return load_at(oid, at) + loadBeforeEx = getattr(storage, 'loadBeforeEx', None) + if loadBeforeEx is not None: + return loadBeforeEx(oid, before) - # storage does not provide IStorageLoadAt - warn + fall back to loadBefore - if type(storage) not in _loadAtWarned: - # there is potential race around _loadAtWarned access, but due to the + # storage does not provide IStorageLoadBeforeEx - warn + fall back to loadBefore + if type(storage) not in _loadBeforeExWarned: + # there is potential race around _loadBeforeExWarned access, but due to the # GIL this race cannot result in that set corruption, and can only lead # to us emitting the warning twice instead of just once. # -> do not spend CPU on lock and just ignore it. warnings.warn( - "FIXME %s does not provide loadAt - emulating it via loadBefore, but ...\n" + "FIXME %s does not provide loadBeforeEx - emulating it via loadBefore, but ...\n" "\t... 1) access will be potentially slower, and\n" - "\t... 2) not full semantic of loadAt could be provided.\n" + "\t... 2) not full semantic of loadBeforeEx could be provided.\n" "\t... this can lead to data corruption.\n" "\t... -> please see https://github.com/zopefoundation/ZODB/issues/318 for details." % type(storage), DeprecationWarning) - _loadAtWarned.add(type(storage)) - - if at == maxtid: - before = at - else: - before = p64(u64(at)+1) + _loadBeforeExWarned.add(type(storage)) try: r = storage.loadBefore(oid, before) From d968a9a800b36767e515c3985f742389b4d2240a Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Tue, 4 May 2021 16:40:44 +0300 Subject: [PATCH 04/12] Handle NotImplementedError raised by loadBefore/loadBeforeEx as "interface not provided" Suggested by @d-maurer: https://github.com/zopefoundation/ZODB/pull/323#discussion_r625573381 --- src/ZODB/BaseStorage.py | 11 +++++++++-- src/ZODB/utils.py | 5 ++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/ZODB/BaseStorage.py b/src/ZODB/BaseStorage.py index 0cc27a25f..92737e9ad 100644 --- a/src/ZODB/BaseStorage.py +++ b/src/ZODB/BaseStorage.py @@ -268,8 +268,15 @@ def loadSerial(self, oid, serial): raise POSException.Unsupported( "Retrieval of historical revisions is not supported") - # do not provide loadBeforeEx/loadBefore here in BaseStorage - if child forgets - # to override it - storage will always return "no data" instead of failing. + def loadBefore(self, oid, tid): + """Return most recent revision of oid before tid committed.""" + raise NotImplementedError + + def loadBeforeEx(self, oid, before): + """Return most recent revision of oid as of Date: Tue, 4 May 2021 16:48:38 +0300 Subject: [PATCH 05/12] *: Don't emit warnings on loadBefore @d-maurer suggests to keep loadBefore without deprecation (https://github.com/zopefoundation/ZODB/pull/323#pullrequestreview-650963363). -> Don't emit warnings about deprecating loadBefore. -> Keep the deprecation text in loadBefore interface, since loadBeforeEx should practically provide wider functionality without putting unnecessary constraint on storage implementations. In other words loadBefore deprecation is still there, but less aggressively advertised with the idea to make transition for outside-of-ZODB code to loadBeforeEx more smooth and with a bit more steps (we might want to reinstate the deprecation warnings at a later time). --- src/ZODB/DemoStorage.py | 4 ---- src/ZODB/FileStorage/FileStorage.py | 3 --- src/ZODB/MappingStorage.py | 3 --- src/ZODB/tests/testConnection.py | 2 -- src/ZODB/tests/test_storage.py | 8 +------- 5 files changed, 1 insertion(+), 19 deletions(-) diff --git a/src/ZODB/DemoStorage.py b/src/ZODB/DemoStorage.py index 4f77fe53c..3d44a9131 100644 --- a/src/ZODB/DemoStorage.py +++ b/src/ZODB/DemoStorage.py @@ -24,7 +24,6 @@ import random import weakref import tempfile -import warnings import ZODB.BaseStorage import ZODB.blob import ZODB.interfaces @@ -228,9 +227,6 @@ def loadBeforeEx(self, oid, before): return ZODB.utils.loadBeforeEx(self.base, oid, before) def loadBefore(self, oid, before): - warnings.warn("loadBefore is deprecated - use loadBeforeEx instead", - DeprecationWarning, stacklevel=2) - data, serial = self.loadBeforeEx(oid, before) # find out next_serial. diff --git a/src/ZODB/FileStorage/FileStorage.py b/src/ZODB/FileStorage/FileStorage.py index c4e1b9522..f1f02289e 100644 --- a/src/ZODB/FileStorage/FileStorage.py +++ b/src/ZODB/FileStorage/FileStorage.py @@ -21,7 +21,6 @@ import logging import os import time -import warnings from struct import pack from struct import unpack @@ -594,8 +593,6 @@ def loadBeforeEx(self, oid, before): return None, h.tid def loadBefore(self, oid, tid): - warnings.warn("loadBefore is deprecated - use loadBeforeEx instead", - DeprecationWarning, stacklevel=2) with self._files.get() as _file: pos = self._lookup_pos(oid) end_tid = None diff --git a/src/ZODB/MappingStorage.py b/src/ZODB/MappingStorage.py index e1260c5ed..ea80e4d87 100644 --- a/src/ZODB/MappingStorage.py +++ b/src/ZODB/MappingStorage.py @@ -19,7 +19,6 @@ import BTrees import time -import warnings import ZODB.BaseStorage import ZODB.interfaces import ZODB.POSException @@ -169,8 +168,6 @@ def loadBeforeEx(self, oid, before): # ZODB.interfaces.IStorage @ZODB.utils.locked(opened) def loadBefore(self, oid, tid): - warnings.warn("loadBefore is deprecated - use loadBeforeEx instead", - DeprecationWarning, stacklevel=2) tid_data = self._data.get(oid) if tid_data: before = ZODB.utils.u64(tid) diff --git a/src/ZODB/tests/testConnection.py b/src/ZODB/tests/testConnection.py index 803eb183d..b847061ee 100644 --- a/src/ZODB/tests/testConnection.py +++ b/src/ZODB/tests/testConnection.py @@ -1322,8 +1322,6 @@ def loadBeforeEx(self, oid, before): return data, serial def loadBefore(self, oid, tid): - warnings.warn("loadBefore is deprecated - use loadBeforeEx instead", - DeprecationWarning, stacklevel=2) return self._data[oid] + (None, ) def store(self, oid, serial, p, version, transaction): diff --git a/src/ZODB/tests/test_storage.py b/src/ZODB/tests/test_storage.py index f60831761..6fcd1a0be 100644 --- a/src/ZODB/tests/test_storage.py +++ b/src/ZODB/tests/test_storage.py @@ -20,7 +20,6 @@ """ import bisect import unittest -import warnings from ZODB.BaseStorage import BaseStorage from ZODB import POSException @@ -106,11 +105,6 @@ def _finish(self, tid, u, d, e): self._ltid = self._tid def loadBefore(self, the_oid, the_tid): - warnings.warn("loadBefore is deprecated - use loadBeforeEx instead", - DeprecationWarning, stacklevel=2) - return self._loadBefore(the_oid, the_tid) - - def _loadBefore(self, the_oid, the_tid): # It's okay if loadBefore() is really expensive, because this # storage is just used for testing. with self._lock: @@ -134,7 +128,7 @@ def _loadBefore(self, the_oid, the_tid): def loadBeforeEx(self, oid, before): try: - r = self._loadBefore(oid, before) + r = self.loadBefore(oid, before) except KeyError: return None, z64 if r is None: From 947232594c94c3b621e25a3dad18db1b0e3f87bf Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Thu, 6 May 2021 20:59:56 +0300 Subject: [PATCH 06/12] fixup! Handle NotImplementedError raised by loadBefore/loadBeforeEx as "interface not provided" loadBeforeEx uses the same docstring as loadBefore as @d-maurer suggests: https://github.com/zopefoundation/ZODB/pull/323#discussion_r626255153 --- src/ZODB/BaseStorage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ZODB/BaseStorage.py b/src/ZODB/BaseStorage.py index 92737e9ad..1f7410fe6 100644 --- a/src/ZODB/BaseStorage.py +++ b/src/ZODB/BaseStorage.py @@ -272,8 +272,8 @@ def loadBefore(self, oid, tid): """Return most recent revision of oid before tid committed.""" raise NotImplementedError - def loadBeforeEx(self, oid, before): - """Return most recent revision of oid as of Date: Thu, 6 May 2021 21:09:17 +0300 Subject: [PATCH 07/12] changes: Add draft entry for loadAt/loadBeforeEx/DemoStorage fix The changelog entry uses loadBeforeEx, but we are likely to change this name during https://github.com/zopefoundation/ZODB/pull/323 review. --- CHANGES.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 9e006fca0..dbc06debe 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,13 @@ 5.6.1 (unreleased) ================== +- Fix DemoStorage data corruption in the presence of whiteouts. + This change is implemented via introducing new ``loadBeforeEx`` interface + that extends and corrects ``loadBefore`` semantic. + See `issue 318 `_ + and `PR 323 `_ + for details. + - Fix UnboundLocalError when running fsoids.py script. See `issue 268 `_. From c577e328fac973f8ae06dc2e2eabdda13e29ad5a Mon Sep 17 00:00:00 2001 From: Dieter Maurer Date: Tue, 11 May 2021 20:04:39 +0300 Subject: [PATCH 08/12] fixup! changes: Add draft entry for loadAt/loadBeforeEx/DemoStorage fix @d-maurer says (https://github.com/zopefoundation/ZODB/pull/323#discussion_r627929959): The changes around `loadBeforeEx` have much more impact than the `DemoStorage` fix. -------- kirr: adjusted the text a bit: "Introduces" -> "Introduce"; add "interface". @d-maurer, I hope it is ok. --- CHANGES.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index dbc06debe..baebd7d12 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,9 +5,11 @@ 5.6.1 (unreleased) ================== -- Fix DemoStorage data corruption in the presence of whiteouts. - This change is implemented via introducing new ``loadBeforeEx`` interface - that extends and corrects ``loadBefore`` semantic. +- Introduce a new ``loadBeforeEx`` interface and deprecate ``loadBefore``: + ``loadBeforeEx`` is simpler, provides better information for object delete + records and can be more efficiently implemented by many storages. + ``loadBeforeEx`` is used (and required) to fix a ``DemoStorage`` data corruption + in the presence of object delete records. See `issue 318 `_ and `PR 323 `_ for details. From 5ae48fe1d93372d7c883ffc8cedd7eb78561ce06 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Sun, 6 Jun 2021 20:16:01 +0300 Subject: [PATCH 09/12] Undeprecate loadBefore Dieter Maurer notes that loadBefore cannot be deprecated yet because ZEO essentially depends on the `end_tid` information returned by loadBefore to update its cache: https://github.com/zopefoundation/ZODB/pull/323#issuecomment-842021970 And to remove this dependency it would require to rework ZODB caching layer: https://github.com/zopefoundation/ZODB/pull/323#issuecomment-845917355 So we cannot deprecate loadBefore until this rework is implemented first. -> Remove general loadBefore deprecation, and emit loadBefore vs loadBeforeEx warning only when actually hitting a "deletion" record, because only that case is known to lead to data corruption. --- CHANGES.rst | 2 +- src/ZODB/interfaces.py | 5 ++--- src/ZODB/utils.py | 33 +++++++++++++++++---------------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index baebd7d12..5b720a428 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,7 +5,7 @@ 5.6.1 (unreleased) ================== -- Introduce a new ``loadBeforeEx`` interface and deprecate ``loadBefore``: +- Introduce a new ``loadBeforeEx`` interface that complements ``loadBefore``: ``loadBeforeEx`` is simpler, provides better information for object delete records and can be more efficiently implemented by many storages. ``loadBeforeEx`` is used (and required) to fix a ``DemoStorage`` data corruption diff --git a/src/ZODB/interfaces.py b/src/ZODB/interfaces.py index d4de1ebbf..21659e791 100644 --- a/src/ZODB/interfaces.py +++ b/src/ZODB/interfaces.py @@ -710,9 +710,6 @@ def __len__(): def loadBefore(oid, tid): """Load the object data written before a transaction id - ( This method is deprecated and kept for backward-compatibility. - Please use loadBeforeEx instead. ) - If there isn't data before the object before the given transaction, then None is returned, otherwise three values are returned: @@ -724,6 +721,8 @@ def loadBefore(oid, tid): - The transaction id of the following revision, if any, or None. If the object id isn't in the storage, then POSKeyError is raised. + + See also: IStorageLoadBeforeEx.loadBeforeEx . """ def loadSerial(oid, serial): diff --git a/src/ZODB/utils.py b/src/ZODB/utils.py index 517b17d45..095f7a920 100644 --- a/src/ZODB/utils.py +++ b/src/ZODB/utils.py @@ -402,21 +402,7 @@ def loadBeforeEx(storage, oid, before): except NotImplementedError: pass - # storage does not provide IStorageLoadBeforeEx - warn + fall back to loadBefore - if type(storage) not in _loadBeforeExWarned: - # there is potential race around _loadBeforeExWarned access, but due to the - # GIL this race cannot result in that set corruption, and can only lead - # to us emitting the warning twice instead of just once. - # -> do not spend CPU on lock and just ignore it. - warnings.warn( - "FIXME %s does not provide loadBeforeEx - emulating it via loadBefore, but ...\n" - "\t... 1) access will be potentially slower, and\n" - "\t... 2) not full semantic of loadBeforeEx could be provided.\n" - "\t... this can lead to data corruption.\n" - "\t... -> please see https://github.com/zopefoundation/ZODB/issues/318 for details." % - type(storage), DeprecationWarning) - _loadBeforeExWarned.add(type(storage)) - + # storage does not provide IStorageLoadBeforeEx - fall back to loadBefore try: r = storage.loadBefore(oid, before) except ZODB.POSException.POSKeyError: @@ -424,7 +410,22 @@ def loadBeforeEx(storage, oid, before): if r is None: # object was removed; however loadBefore does not tell when. - # return serial=0 - this is the "data corruption" case talked about above. + # return serial=0. This can, however, lead to data corruption with e.g. + # DemoStorage (https://github.com/zopefoundation/ZODB/issues/318), so + # emit corresponding warning. + if type(storage) not in _loadBeforeExWarned: + # there is potential race around _loadBeforeExWarned access, but due to the + # GIL this race cannot result in that set corruption, and can only lead + # to us emitting the warning twice instead of just once. + # -> do not spend CPU on lock and just ignore it. + warnings.warn( + "FIXME %s does not provide loadBeforeEx - emulating it via loadBefore, but ...\n" + "\t... 1) access is be potentially slower, and\n" + "\t... 2) not full semantic of loadBeforeEx could be provided.\n" + "\t... this can lead to data corruption in the presence of delete records.\n" + "\t... -> please see https://github.com/zopefoundation/ZODB/issues/318 for details." % + type(storage), PendingDeprecationWarning) + _loadBeforeExWarned.add(type(storage)) return (None, z64) data, serial, next_serial = r From daf8264652ddadcfa00ad75d4b1b71ceec1259f8 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Thu, 11 Nov 2021 23:41:26 +0300 Subject: [PATCH 10/12] Make lint happy Sorry, but some of flake8 complaints are really stupid... --- src/ZODB/DemoStorage.py | 1 - src/ZODB/FileStorage/FileStorage.py | 2 +- src/ZODB/interfaces.py | 17 +++++++++------- src/ZODB/mvccadapter.py | 12 ++++++----- src/ZODB/tests/hexstorage.py | 3 ++- src/ZODB/tests/testDemoStorage.py | 31 +++++++++++++++++------------ src/ZODB/utils.py | 25 ++++++++++++++--------- 7 files changed, 54 insertions(+), 37 deletions(-) diff --git a/src/ZODB/DemoStorage.py b/src/ZODB/DemoStorage.py index eb5eb153c..56af9b39c 100644 --- a/src/ZODB/DemoStorage.py +++ b/src/ZODB/DemoStorage.py @@ -256,7 +256,6 @@ def loadBefore(self, oid, before): # regular data record return data, serial, next_serial - def loadBlob(self, oid, serial): try: return self.changes.loadBlob(oid, serial) diff --git a/src/ZODB/FileStorage/FileStorage.py b/src/ZODB/FileStorage/FileStorage.py index 5daec0cb5..4a45798fd 100644 --- a/src/ZODB/FileStorage/FileStorage.py +++ b/src/ZODB/FileStorage/FileStorage.py @@ -599,7 +599,7 @@ def loadBeforeEx(self, oid, before): elif h.back: # backpointer data, _, _, _ = self._loadBack_impl(oid, h.back, - fail=False, _file=_file) + fail=False, _file=_file) return data, h.tid else: # deletion diff --git a/src/ZODB/interfaces.py b/src/ZODB/interfaces.py index d907dd61d..2cd71a9ec 100644 --- a/src/ZODB/interfaces.py +++ b/src/ZODB/interfaces.py @@ -889,25 +889,28 @@ def prefetch(oids, tid): more than once. """ + class IStorageLoadBeforeEx(Interface): - def loadBeforeEx(oid, before): # -> (data, serial) + def loadBeforeEx(oid, before): # -> (data, serial) """Load object data as observed before given database state. - loadBeforeEx returns data for object with given object ID as observed by - most recent database transaction with ID < before. Two values are returned: + loadBeforeEx returns data for object with given object ID as observed + by most recent database transaction with ID < before. Two values are + returned: - The data record, - The transaction ID of the data record. - If the object does not exist, or is deleted as of requested database state, - loadBeforeEx returns data=None, and serial indicates transaction ID of the - most recent deletion done in transaction with ID < before, or null tid if - there is no such deletion. + If the object does not exist, or is deleted as of requested database + state, loadBeforeEx returns data=None, and serial indicates transaction + ID of the most recent deletion done in transaction with ID < before, or + null tid if there is no such deletion. Note: no POSKeyError is raised even if object id is not in the storage. """ + class IMultiCommitStorage(IStorage): """A multi-commit storage can commit multiple transactions at once. diff --git a/src/ZODB/mvccadapter.py b/src/ZODB/mvccadapter.py index 3d09dce8e..05dee5c9e 100644 --- a/src/ZODB/mvccadapter.py +++ b/src/ZODB/mvccadapter.py @@ -10,7 +10,8 @@ import zope.interface from . import interfaces, serialize, POSException -from .utils import p64, u64, z64, maxtid, Lock, loadBeforeEx, oid_repr, tid_repr +from .utils import p64, u64, z64, maxtid, Lock, loadBeforeEx, oid_repr, \ + tid_repr class Base(object): @@ -158,11 +159,12 @@ def load(self, oid): data, serial = loadBeforeEx(self._storage, oid, self._start) if data is None: # raise POSKeyError if object does not exist at all - # TODO raise POSKeyError always and switch to raising ReadOnlyError only when - # actually detecting that load is being affected by simultaneous pack (see below). + # TODO raise POSKeyError always and switch to raising ReadOnlyError + # only when actually detecting that load is being affected by + # simultaneous pack (see below). if serial == z64: - # XXX second call to loadBeforeEx - it will become unneeded once we - # switch to raising POSKeyError. + # XXX second call to loadBeforeEx - it will become unneeded + # once we switch to raising POSKeyError. _, serial_exists = loadBeforeEx(self._storage, oid, maxtid) if serial_exists == z64: # object does not exist at all diff --git a/src/ZODB/tests/hexstorage.py b/src/ZODB/tests/hexstorage.py index 8e7ac38a8..96c64c181 100644 --- a/src/ZODB/tests/hexstorage.py +++ b/src/ZODB/tests/hexstorage.py @@ -39,7 +39,8 @@ def __init__(self, base): setattr(self, name, v) zope.interface.directlyProvides(self, zope.interface.providedBy(base)) - if hasattr(base, 'loadBeforeEx') and 'loadBeforeEx' not in self.copied_methods: + if hasattr(base, 'loadBeforeEx') and \ + 'loadBeforeEx' not in self.copied_methods: def loadBeforeEx(oid, before): data, serial = self.base.loadBeforeEx(oid, before) if data is not None: diff --git a/src/ZODB/tests/testDemoStorage.py b/src/ZODB/tests/testDemoStorage.py index 95b9d29e4..b807625c6 100644 --- a/src/ZODB/tests/testDemoStorage.py +++ b/src/ZODB/tests/testDemoStorage.py @@ -270,7 +270,10 @@ def load_before_base_storage_current(): >>> base.close() """ -# additional DemoStorage tests that do not fit into common DemoStorageTests setup. +# additional DemoStorage tests that do not fit into common DemoStorageTests +# setup. + + class DemoStorageTests2(ZODB.tests.util.TestCase): def checkLoadAfterDelete(self): """Verify that DemoStorage correctly handles load requests for objects @@ -283,11 +286,11 @@ def checkLoadAfterDelete(self): TransactionMetaData = ZODB.Connection.TransactionMetaData # mkbase prepares base part of the storage. - def mkbase(): # -> zbase + def mkbase(): # -> zbase zbase = FileStorage("base.fs") - db = DB(zbase) - conn = db.open() - root = conn.root() + db = DB(zbase) + conn = db.open() + root = conn.root() root['obj'] = obj = MinPO(0) transaction.commit() @@ -303,18 +306,19 @@ def mkbase(): # -> zbase return zbase # prepare base + overlay - zbase = mkbase() + zbase = mkbase() zoverlay = FileStorage("overlay.fs") - zdemo = DemoStorage(base=zbase, changes=zoverlay) + zdemo = DemoStorage(base=zbase, changes=zoverlay) # overlay: modify obj and root - db = DB(zdemo) + db = DB(zdemo) conn = db.open() root = conn.root() obj = root['obj'] oid = obj._p_oid obj.value += 1 - # modify root as well so that there is root revision saved in overlay that points to obj + # modify root as well so that there is root revision saved in overlay + # that points to obj root['x'] = 1 transaction.commit() atLive = obj._p_serial @@ -327,13 +331,14 @@ def mkbase(): # -> zbase # unmount DemoStorage conn.close() db.close() - zdemo.close() # closes zbase and zoverlay as well + zdemo.close() # closes zbase and zoverlay as well del zbase, zoverlay # simulate GC on base+overlay zoverlay = FileStorage("overlay.fs") txn = transaction.get() - txn_meta = TransactionMetaData(txn.user, txn.description, txn.extension) + txn_meta = TransactionMetaData(txn.user, txn.description, + txn.extension) zoverlay.tpc_begin(txn_meta) zoverlay.deleteObject(oid, atLive, txn_meta) zoverlay.tpc_vote(txn_meta) @@ -342,7 +347,7 @@ def mkbase(): # -> zbase # remount base+overlay zbase = FileStorage("base.fs", read_only=True) zdemo = ZODB.DemoStorage.DemoStorage(base=zbase, changes=zoverlay) - db = DB(zdemo) + db = DB(zdemo) # verify: # load(obj, atLive) -> 2 @@ -362,7 +367,7 @@ def getObjAt(at): # end db.close() - zdemo.close() # closes zbase and zoverlay as well + zdemo.close() # closes zbase and zoverlay as well def test_suite(): diff --git a/src/ZODB/utils.py b/src/ZODB/utils.py index 45c15b807..32612c875 100644 --- a/src/ZODB/utils.py +++ b/src/ZODB/utils.py @@ -405,7 +405,9 @@ def load_current(storage, oid, version=''): return data, serial -_loadBeforeExWarned = set() # of storage class +_loadBeforeExWarned = set() # of storage class + + def loadBeforeEx(storage, oid, before): """loadBeforeEx provides IStorageLoadBeforeEx semantic for all storages. @@ -430,16 +432,21 @@ def loadBeforeEx(storage, oid, before): # DemoStorage (https://github.com/zopefoundation/ZODB/issues/318), so # emit corresponding warning. if type(storage) not in _loadBeforeExWarned: - # there is potential race around _loadBeforeExWarned access, but due to the - # GIL this race cannot result in that set corruption, and can only lead - # to us emitting the warning twice instead of just once. - # -> do not spend CPU on lock and just ignore it. + # there is potential race around _loadBeforeExWarned access, but + # due to the GIL this race cannot result in that set corruption, + # and can only lead to us emitting the warning twice instead of + # just once. -> do not spend CPU on lock and just ignore it. warnings.warn( - "FIXME %s does not provide loadBeforeEx - emulating it via loadBefore, but ...\n" + "FIXME %s does not provide loadBeforeEx - emulating it via " + "loadBefore, but ...\n" "\t... 1) access is be potentially slower, and\n" - "\t... 2) not full semantic of loadBeforeEx could be provided.\n" - "\t... this can lead to data corruption in the presence of delete records.\n" - "\t... -> please see https://github.com/zopefoundation/ZODB/issues/318 for details." % + "\t... 2) not full semantic of loadBeforeEx could be " + "provided.\n" + "\t... this can lead to data corruption in the presence " + "of delete records.\n" + "\t... -> please see " + "https://github.com/zopefoundation/ZODB/issues/318 for " + "details." % type(storage), PendingDeprecationWarning) _loadBeforeExWarned.add(type(storage)) return (None, z64) From c3820ff48eda59f6efc14bedc1d9cc37b6d59c7d Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Thu, 18 Nov 2021 20:22:09 +0300 Subject: [PATCH 11/12] fixup! DemoStorage: Add support for deleteObject (IExternalGC) loadAt -> loadBeforeEx since it was renamed so on https://github.com/zopefoundation/ZODB/pull/323/commits/805bf36f . --- src/ZODB/DemoStorage.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/ZODB/DemoStorage.py b/src/ZODB/DemoStorage.py index cf4b29089..c2986018a 100644 --- a/src/ZODB/DemoStorage.py +++ b/src/ZODB/DemoStorage.py @@ -34,7 +34,7 @@ import zope.interface from .ConflictResolution import ConflictResolvingStorage -from .utils import load_current, maxtid +from .utils import load_current, maxtid, p64, u64 @zope.interface.implementer( @@ -384,12 +384,13 @@ def _storeDelete(self, oid, oldserial, transaction): # oldserial ∈ base -> find out it is indeed the latest there and then # call changes.deleteObject(oldserial=z64) - _, serial = ZODB.utils.loadAt(self.changes, oid, self.changes.lastTransaction()) + changesHead = self.changes.lastTransaction() + _, serial = ZODB.utils.loadBeforeEx(self.changes, oid, p64(u64(changesHead) + 1)) if serial != ZODB.utils.z64: # object has data or deletion record in changes raise ZODB.POSException.ConflictError(oid=oid, serials=(serial, oldserial)) - _, serial = ZODB.utils.loadAt(self.base, oid, baseHead) + _, serial = ZODB.utils.loadBeforeEx(self.base, oid, p64(u64(baseHead) + 1)) if serial != oldserial: raise ZODB.POSException.ConflictError(oid=oid, serials=(serial, oldserial)) From 5a1df8c0eb37ca0472bb053438c416c68ad36abe Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Thu, 18 Nov 2021 20:27:23 +0300 Subject: [PATCH 12/12] fixup! DemoStorage: Add support for deleteObject (IExternalGC) Make flake8 happy src/ZODB/DemoStorage.py:388:80: E501 line too long (89 > 79 characters) src/ZODB/DemoStorage.py:391:80: E501 line too long (87 > 79 characters) src/ZODB/DemoStorage.py:393:80: E501 line too long (83 > 79 characters) src/ZODB/DemoStorage.py:395:80: E501 line too long (87 > 79 characters) src/ZODB/DemoStorage.py:397:80: E501 line too long (100 > 79 characters) src/ZODB/DemoStorage.py:398:80: E501 line too long (86 > 79 characters) src/ZODB/DemoStorage.py:408:5: E303 too many blank lines (2) src/ZODB/tests/testDemoStorage.py:397:9: F401 'ZODB.FileStorage.FileStorage' imported but unused src/ZODB/tests/testDemoStorage.py:399:80: E501 line too long (101 > 79 characters) --- src/ZODB/DemoStorage.py | 18 +++++++++++------- src/ZODB/tests/testDemoStorage.py | 5 +++-- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/ZODB/DemoStorage.py b/src/ZODB/DemoStorage.py index c2986018a..a428281c5 100644 --- a/src/ZODB/DemoStorage.py +++ b/src/ZODB/DemoStorage.py @@ -385,17 +385,22 @@ def _storeDelete(self, oid, oldserial, transaction): # call changes.deleteObject(oldserial=z64) changesHead = self.changes.lastTransaction() - _, serial = ZODB.utils.loadBeforeEx(self.changes, oid, p64(u64(changesHead) + 1)) + _, serial = ZODB.utils.loadBeforeEx(self.changes, oid, + p64(u64(changesHead) + 1)) if serial != ZODB.utils.z64: # object has data or deletion record in changes - raise ZODB.POSException.ConflictError(oid=oid, serials=(serial, oldserial)) + raise ZODB.POSException.ConflictError(oid=oid, + serials=(serial, oldserial)) - _, serial = ZODB.utils.loadBeforeEx(self.base, oid, p64(u64(baseHead) + 1)) + _, serial = ZODB.utils.loadBeforeEx(self.base, oid, + p64(u64(baseHead) + 1)) if serial != oldserial: - raise ZODB.POSException.ConflictError(oid=oid, serials=(serial, oldserial)) + raise ZODB.POSException.ConflictError(oid=oid, + serials=(serial, oldserial)) - # object has no data/deletion record in changes and its latest revision in base == oldserial - # -> changes.deleteObject(oldserial=z64) + correct oldserial back on conflict. + # object has no data/deletion record in changes and its latest revision + # in base == oldserial. -> changes.deleteObject(oldserial=z64) + + # correct oldserial back on conflict. try: self.changes.deleteObject(oid, ZODB.utils.z64, transaction) except ZODB.POSException.ConflictError as e: @@ -404,7 +409,6 @@ def _storeDelete(self, oid, oldserial, transaction): e.serials = (e.serials[0], oldserial) raise - def storeBlob(self, oid, oldserial, data, blobfilename, version, transaction): assert version == '', "versions aren't supported" diff --git a/src/ZODB/tests/testDemoStorage.py b/src/ZODB/tests/testDemoStorage.py index 442083c87..8bcbabfeb 100644 --- a/src/ZODB/tests/testDemoStorage.py +++ b/src/ZODB/tests/testDemoStorage.py @@ -394,9 +394,10 @@ def test_suite(): suite.addTest(unittest.makeSuite(DemoStorageTests2, 'check')) def demo_for_gctest(): - from ZODB.FileStorage import FileStorage base = ZODB.FileStorage.FileStorage('data.fs', blob_dir="data_blobs") - changes = ZODB.FileStorage.FileStorage('changes.fs', blob_dir="changes_blobs", pack_gc=False) + changes = ZODB.FileStorage.FileStorage('changes.fs', + blob_dir="changes_blobs", + pack_gc=False) return ZODB.DemoStorage.DemoStorage(base=base, changes=changes) suite.addTest(PackableStorage.IExternalGC_suite(demo_for_gctest))