CLDSRV-922: do not reference null version from master in null-key mode#6190
Conversation
Hello tcarmet,My role is to assist you with the merge of this Available options
Available commands
Status report is not available. |
Incorrect fix versionThe
Considering where you are trying to merge, I ignored possible hotfix versions and I expected to find:
Please check the |
|
LGTM |
❌ 2 Tests Failed:
View the full list of 2 ❄️ flaky test(s)
To view more test analytics, go to the Test Analytics Dashboard |
1a6d248 to
af3bc64
Compare
|
LGTM |
Request integration branchesWaiting for integration branch creation to be requested by the user. To request integration branches, please comment on this pull request with the following command: Alternatively, the |
The backbeat putMetadata path stamped nullVersionId on the new master unconditionally, even when null-version compatibility mode is disabled. In null-key mode the null version is stored under the null key, so a master referencing it via nullVersionId leads the metadata backend to orphan the null key on a later version-specific delete, leaving the bucket non-empty (intermittent BucketNotEmpty on cleanup). Mirror the normal createAndStoreObject path: apply nullVersionId to the master only via extraMD (populated solely in compat mode), and fix the adjacent Object.assign that read options.extraMD instead of the versioningPreprocessing result. Add a deterministic functional test asserting the master carries nullVersionId only in compat mode, and update the routeBackbeat unit test which previously asserted the old (buggy) master nullVersionId.
af3bc64 to
45dc80a
Compare
|
LGTM |
| return routeBackbeatAPIProxy(request, response, requestContexts, log); | ||
| } | ||
|
|
||
| const useMultipleBackend = |
| async () => { | ||
| // If we create a new version of an object (so objMd is null), | ||
| // we should make sure that the masterVersion is versioned. | ||
| // If an object already exists, we just want to update the metadata | ||
| // of the existing object and not create a new one | ||
| if (versioning && !objMd) { | ||
| let masterMD; | ||
|
|
||
| try { | ||
| masterMD = await metadata.getObjectMDPromised(bucketName, objectKey, {}, log); | ||
| } catch (err) { | ||
| if (err.is?.NoSuchKey) { | ||
| log.debug('no master found for versioned object', { | ||
| method: 'putMetadata', | ||
| bucketName, | ||
| objectKey, | ||
| }); | ||
| } else { | ||
| throw err; | ||
| } |
| @@ -789,68 +804,74 @@ function putMetadata(request, response, bucketInfo, objMd, log, callback) { | |||
| }); | |||
| // Treat as success - the repair already completed | |||
| // Get the current metadata to return | |||
| return metadata.getObjectMD(bucketName, objectKey, {}, log, | |||
| (getErr, currentMD) => { | |||
| if (getErr) { | |||
| log.warn('could not retrieve metadata after repair duplicate key error', { | |||
| error: getErr, | |||
| method: 'putMetadata', | |||
| }); | |||
| // Still treat as success since repair likely completed | |||
| return next(null, md || {}); | |||
| } | |||
| return next(null, currentMD || md || {}); | |||
| }); | |||
| return metadata.getObjectMD(bucketName, objectKey, {}, log, (getErr, currentMD) => { | |||
| if (getErr) { | |||
| log.warn('could not retrieve metadata after repair duplicate key error', { | |||
| error: getErr, | |||
| method: 'putMetadata', | |||
| }); | |||
| // Still treat as success since repair likely completed | |||
| return next(null, md || {}); | |||
| } | |||
| return next(null, currentMD || md || {}); | |||
| }); | |||
| } | |||
|
|
|||
| log.error('error putting object metadata', { | |||
| error: err, | |||
| method: 'putMetadata', | |||
| }); | |||
| return next(err); | |||
| } | |||
| pushReplicationMetric(objMd, omVal, bucketName, objectKey, log); | |||
| if (objMd && | |||
| if ( | |||
| objMd && | |||
| headers['x-scal-replication-content'] !== 'METADATA' && | |||
| versionId && | |||
| // The new data location is set to null when archiving to a Cold site. | |||
| // In that case "removing old data location key" is handled by the lifecycle | |||
| // transition processor. Check the content-length as a null location can | |||
| // also be from an empty object. | |||
| (omVal['content-length'] === 0 || | |||
| (omVal.location && Array.isArray(omVal.location))) && | |||
| locationKeysHaveChanged(objMd.location, omVal.location)) { | |||
| (omVal['content-length'] === 0 || (omVal.location && Array.isArray(omVal.location))) && | |||
| locationKeysHaveChanged(objMd.location, omVal.location) | |||
| ) { | |||
| log.info('removing old data locations', { | |||
| method: 'putMetadata', | |||
| bucketName, | |||
| objectKey, | |||
| }); | |||
| async.eachLimit(objMd.location, 5, | |||
| (loc, nextEach) => dataWrapper.data.delete(loc, log, err => { | |||
| if (err) { | |||
| log.warn('error removing old data location key', { | |||
| async.eachLimit( | |||
| objMd.location, | |||
| 5, | |||
| (loc, nextEach) => | |||
| dataWrapper.data.delete(loc, log, err => { | |||
| if (err) { | |||
| log.warn('error removing old data location key', { | |||
| bucketName, | |||
| objectKey, | |||
| locationKey: loc, | |||
| error: err.message, | |||
| }); | |||
| } | |||
| // do not forward the error to let other | |||
| // locations be deleted | |||
| nextEach(); | |||
| }), | |||
| () => { | |||
| log.debug('done removing old data locations', { | |||
| method: 'putMetadata', | |||
| bucketName, | |||
| objectKey, | |||
| locationKey: loc, | |||
| error: err.message, | |||
| }); | |||
| } | |||
| // do not forward the error to let other | |||
| // locations be deleted | |||
| nextEach(); | |||
| }), | |||
| () => { | |||
| log.debug('done removing old data locations', { | |||
| method: 'putMetadata', | |||
| bucketName, | |||
| objectKey, | |||
| }); | |||
| }); | |||
| }, | |||
| ); | |||
| } | |||
| return _respond(response, md, log, next); | |||
| }); | |||
| } | |||
| ], callback); | |||
| }, | |||
| ], | |||
| callback, | |||
| ); | |||
| }); | |||
| } | |||
|
|
|||
| @@ -904,29 +925,27 @@ function putObject(request, response, log, callback) { | |||
| } | |||
| const payloadLen = parseInt(request.headers['content-length'], 10); | |||
| const backendInfo = new BackendInfo(config, storageLocation); | |||
| return dataStore(context, CIPHER, request, payloadLen, {}, backendInfo, log, | |||
| (err, retrievalInfo, md5) => { | |||
| if (err) { | |||
| log.error('error putting data', { | |||
| error: err, | |||
| method: 'putObject', | |||
| }); | |||
| return callback(err); | |||
| } | |||
| if (contentMD5 !== md5) { | |||
| return callback(errors.BadDigest); | |||
| } | |||
| const responsePayload = constructPutResponse({ | |||
| }); | ||
| return callback(err); | ||
| } | ||
| log.debug('batch delete successful', { locations }); | ||
|
|
||
| // Update inflight metrics for the data which has just been freed | ||
| const bucket = request.bucketName; | ||
| const contentLength = locations.reduce((length, loc) => length + loc.size, 0); | ||
|
|
||
| // TODO: `bucket` should probably always be passed, to be confirmed in CLDSRV-643 | ||
| // For now be leniant and skip inflight updates if it is not specified, to avoid any | ||
| // impact esp. on CRR | ||
| if (!bucket || !config.isQuotaEnabled() || contentLength == 0) { | ||
| return _respond(response, null, log, callback); | ||
| } | ||
|
|
||
| return async.waterfall([ | ||
| // eslint-disable-next-line no-unused-vars | ||
| next => metadata.getBucket(bucket, log, (err, bucketMD, raftSessionId) => next(err, bucketMD)), | ||
| (bucketMD, next) => quotaUtils.validateQuotas(request, bucketMD, request.accountQuotas, | ||
| ['objectDelete'], 'objectDelete', -contentLength, false, log, next), | ||
| ], err => { | ||
| }, | ||
| err => { | ||
| if (err) { | ||
| // Ignore error, as the data has been deleted already: only inflight count | ||
| // has not been updated, and will be eventually consistent anyway | ||
| log.warn('batch delete failed to update inflights', { | ||
| log.error('batch delete failed', { | ||
| method: 'batchDelete', | ||
| locations, | ||
| error: err, | ||
| }); | ||
| return callback(err); | ||
| } | ||
| log.debug('batch delete successful', { locations }); | ||
|
|
||
| return _respond(response, null, log, callback); | ||
| }); | ||
| }); | ||
| // Update inflight metrics for the data which has just been freed | ||
| const bucket = request.bucketName; | ||
| const contentLength = locations.reduce((length, loc) => length + loc.size, 0); | ||
|
|
||
| // TODO: `bucket` should probably always be passed, to be confirmed in CLDSRV-643 | ||
| // For now be leniant and skip inflight updates if it is not specified, to avoid any |
| locations, | ||
| error: err, | ||
| }); | ||
| } |
| @@ -1462,59 +1510,45 @@ function listLifecycle(request, response, userInfo, log, cb) { | |||
| } | |||
|
|
|||
| }, | ||
| 's3', | ||
| requestContexts, | ||
| ), | ||
| (userInfo, authorizationResults, next) => | ||
| handleAuthorizationResults(request, authorizationResults, apiMethods[0], undefined, log, err => | ||
| next(err, userInfo), | ||
| ), | ||
| (userInfo, next) => { | ||
| if (request.serverAccessLog) { | ||
| // eslint-disable-next-line no-param-reassign | ||
| request.serverAccessLog.analyticsUserName = userInfo.getIAMdisplayName(); | ||
| request.serverAccessLog.startTurnAroundTime = process.hrtime.bigint(); | ||
| } | ||
| // eslint-disable-next-line no-param-reassign | ||
| request.accountQuotas = infos?.accountQuota; | ||
| return next(err, userInfo, authorizationResults); | ||
| }, 's3', requestContexts), | ||
| (userInfo, authorizationResults, next) => handleAuthorizationResults( | ||
| request, authorizationResults, apiMethods[0], undefined, log, err => next(err, userInfo)), | ||
| (userInfo, next) => { | ||
| if (request.serverAccessLog) { | ||
| // eslint-disable-next-line no-param-reassign | ||
| request.serverAccessLog.startTurnAroundTime = process.hrtime.bigint(); | ||
| } | ||
| // TODO: understand why non-object requests (batchdelete) were not authenticated | ||
| if (!isObjectRequest) { |
| return routeNonObjectRequest(request, response, userInfo, log, next); | ||
| } | ||
| const decodedVidResult = decodeVersionId(request.query); | ||
| if (decodedVidResult instanceof Error) { |
|
LGTM |
|
/approve |
ConflictA conflict has been raised during the creation of I have not created the integration branch. Here are the steps to resolve this conflict: git fetch
git checkout -B w/9.4/bugfix/CLDSRV-922-backbeat-null-version-master origin/development/9.4
git merge origin/bugfix/CLDSRV-922-backbeat-null-version-master
# <intense conflict resolution>
git commit
git push -u origin w/9.4/bugfix/CLDSRV-922-backbeat-null-version-masterThe following options are set: approve |
Build failedThe build for commit did not succeed in branch bugfix/CLDSRV-922-backbeat-null-version-master The following options are set: approve |
|
I have successfully merged the changeset of this pull request
The following branches have NOT changed:
This pull request did not target the following hotfix branch(es) so they
Please check the status of the associated issue CLDSRV-922. Goodbye tcarmet. The following options are set: approve |
When replicating a version on top of a pre-existing null version, the destination master could end up referencing that null version in a way the storage layout no longer supports, which intermittently left a bucket non-empty after all its objects were deleted. This change makes replication follow the same null-version contract as the regular write path so the leftover key can no longer occur.
Added a deterministic test that asserts the master references the null version only in compatibility mode; it fails on the old behavior and passes on the new. The full backbeat-route functional suite was run against a bucketd-backed stack in both compatibility modes with no regressions.