From f31f88faa214c52f5229bb82a343d177c6a31822 Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Fri, 19 Jun 2026 10:44:38 +0200 Subject: [PATCH 01/17] add Resize-additionalVolumes + Tests --- .gitignore | 1 + docs/spec/volume resize.md | 161 ++++++-- pkg/k8sutil/volume_resize_validation.go | 411 ++++++++++++++----- pkg/k8sutil/volume_resize_validation_test.go | 191 ++++++++- test/e2e-helm/9_volume_resize_test.go | 126 ++++-- test/e2e/9_volume_resize_test.go | 105 +++-- 6 files changed, 795 insertions(+), 200 deletions(-) diff --git a/.gitignore b/.gitignore index 7a7feec5..773544da 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ go.work *.swp *.swo *~ +.DS_Store diff --git a/docs/spec/volume resize.md b/docs/spec/volume resize.md index bd409867..2507d4fe 100644 --- a/docs/spec/volume resize.md +++ b/docs/spec/volume resize.md @@ -169,7 +169,7 @@ Cloud-specific limitations such as rate limits, quota exhaustion, cooldown perio #### In Scope -1. Expansion of the primary persistence-backed PVC used by a MarkLogic group. +1. Expansion of the primary persistence-backed PVC and any configured `additionalVolumeClaimTemplates` PVCs used by a MarkLogic group. 2. Declarative resize triggered by increasing the requested storage size in the CR. @@ -190,18 +190,16 @@ Cloud-specific limitations such as rate limits, quota exhaustion, cooldown perio 3. Cross-cluster storage orchestration outside the target MarkLogic resource. -4. Expansion of arbitrary `additionalVolumeClaimTemplates` in the first version, unless explicitly added in a later phase. - -5. Hard guarantees about zero service interruption in storage environments that require offline filesystem expansion. +4. Hard guarantees about zero service interruption in storage environments that require offline filesystem expansion. -6. Concurrent execution of resize with scale, upgrade, or other StatefulSet-mutating operations in v1. +5. Concurrent execution of resize with scale, upgrade, or other StatefulSet-mutating operations in v1. ## API and User Experience Contract ### **Trigger Model** -Users trigger resize by modifying the effective persistence size on `MarklogicCluster`, either through `MarklogicCluster.spec.persistence.size` or a group-specific override such as `MarklogicCluster.spec.markLogicGroups[i].persistence.size`. The operator computes the effective desired size for each managed group and reconciles resize at the `MarklogicGroup` level, where the StatefulSet, Pods, and PVCs are managed. +Users trigger resize by modifying the effective target sizes on `MarklogicCluster`: the primary persistence size (`MarklogicCluster.spec.persistence.size` or a group-specific override such as `MarklogicCluster.spec.markLogicGroups[i].persistence.size`) and/or storage requests in `additionalVolumeClaimTemplates` (cluster-level or group-level). The operator computes the effective desired target for each managed group and reconciles resize at the `MarklogicGroup` level, where the StatefulSet, Pods, and PVCs are managed. Example: @@ -214,7 +212,7 @@ spec: resizeStrategy: parallel # Options: 'parallel' (default) or 'sequential' ``` -The operator shall interpret an increase to the effective persistence size as a request to expand the persistent volumes associated with the target MarkLogic group. +The operator shall interpret an increase to any effective target PVC template size as a request to expand the associated persistent volumes for the target MarkLogic group. The operator shall not treat an unchanged size as a new resize operation. @@ -234,8 +232,9 @@ Detailed resize execution state is owned by `MarklogicGroup.status.volumeResizeS | Field | Type | Description | Notes | |---------------------------------|---------|------------------------------------------------------------------|--------------------------------------------------| -| spec.persistence.enabled | boolean | Indicates whether persistence is enabled for the target workload | Resize is valid only when persistence is enabled | +| spec.persistence.enabled | boolean | Indicates whether primary datadir persistence is enabled for the target workload | Controls the primary `datadir` PVC template only | | spec.persistence.size | string | Desired persistent volume size | Increasing this field triggers resize | +| spec.additionalVolumeClaimTemplates[].spec.resources.requests.storage | string | Desired size for an additional PVC template | Increasing this field triggers resize for that template's PVCs | | spec.persistence.resizeStrategy | enum | Strategy the operator uses when resizing PVCs | Default is parallel | #### Resize Strategy Values @@ -591,19 +590,19 @@ Validate the resize request and the execution environment before making any chan Entry criteria: -1. The effective requested size for the target MarkLogic group is greater than the current size. +1. At least one effective target PVC template size for the target MarkLogic group is greater than the current observed size for that template. 2. No conflicting resize operation is already active for the same target group. Actions: -1. Resolve the effective persistence configuration for the target group. +1. Resolve the effective storage configuration for the target group, including primary persistence and any configured additional PVC templates. 2. Identify the PVCs owned by or associated with the group. -3. Verify that persistence is enabled. +3. Verify that at least one resizable PVC template target exists. -4. Verify that the requested size is greater than the current effective size. +4. Verify that each requested template target is greater than or equal to its current effective size. 5. Verify that all target PVCs are in `Bound` state. @@ -640,7 +639,7 @@ Shared actions: 1. Determine which PVC or PVCs are eligible for resize submission in the current reconcile cycle. -2. Patch the target PVC request size to the desired `targetSize`. +2. Patch each target PVC request size to that PVC's desired target size. 3. Record progress in status. @@ -691,9 +690,9 @@ A PVC is considered checkpointed for phase advancement when the resize request h Concretely, a PVC has reached the required checkpoint when one of the following is true: -1. **Online expansion complete:** `pvc.status.capacity[storage]` >= `targetSize` AND the PVC does not have a condition of type `FileSystemResizePending` with status `True`. In this case, no Pod restart is needed for this PVC. +1. **Online expansion complete:** `pvc.status.capacity[storage]` >= the PVC's desired target size AND the PVC does not have a condition of type `FileSystemResizePending` with status `True`. In this case, no Pod restart is needed for this PVC. -2. **Offline expansion pending:** `pvc.spec.resources.requests[storage]` == `targetSize` AND the PVC has a condition of type `FileSystemResizePending` with status `True`. This confirms the block-level expansion has been acknowledged and workload-side remount is still required. +2. **Offline expansion pending:** `pvc.spec.resources.requests[storage]` == the PVC's desired target size AND the PVC has a condition of type `FileSystemResizePending` with status `True`. This confirms the block-level expansion has been acknowledged and workload-side remount is still required. The operator must record per-PVC checkpoint type (online vs. offline) in `pvcStatuses` so the later restart-handling logic can determine whether any Pod restart is needed and which Pods are eligible for selective restart. @@ -1143,7 +1142,7 @@ At minimum, the persisted recovery record must include: 2. `observedGeneration` -3. `currentSize` and `targetSize` +3. `currentSize`, `targetSize`, and per-PVC desired targets in `pvcStatuses[*].requestedSize` 4. The stable ordered target PVC list @@ -2240,7 +2239,7 @@ The operator must preserve expected behavior for existing workloads that already Compatibility expectations: -1. Existing persisted workloads continue operating normally until the requested size increases +1. Existing persisted workloads continue operating normally until a requested size increases (primary persistence and/or additional PVC templates) 2. The feature does not require users to recreate workloads @@ -2262,7 +2261,7 @@ The resize feature should be treated as a backward-compatible additive capabilit 4. Existing clients are not required to understand resize-specific status fields -If future versions materially change the workflow contract, such as adding support for resizing `additionalVolumeClaimTemplates` or changing concurrent resize semantics, those changes should be documented explicitly as contract evolutions. +If future versions materially change the workflow contract, such as changing concurrent resize semantics or introducing new target-selection rules, those changes should be documented explicitly as contract evolutions. ### Release Readiness Outcomes @@ -2299,12 +2298,6 @@ Allow only one active resize execution per `MarklogicGroup`. Snapshot the active Additionally, if the new desired size is smaller than the active `targetSize` but still larger than `currentSize`, this constitutes a conflicting intent that cannot be satisfied while the active operation is in flight. The operator must not start a second operation. The active operation completes against its snapshotted target. The new smaller-but-still-larger desired size is evaluated only after the active operation reaches a terminal state. -### Should Additional Volume Claim Templates Be Included in Scope? - -Recommendation: - -Keep v1 limited to the primary persistence-backed PVC. Additional volume claim templates should be a later feature because they expand the failure surface and may have different application-level restart implications. - ### What Is the Authoritative Completion Signal? Recommendation: @@ -2360,6 +2353,12 @@ Merge semantics for `resizeStrategy`: 2. If `resizeStrategy` is omitted at the group level but specified at the cluster level, the cluster-level value is inherited. 3. If `resizeStrategy` is omitted at both levels, the default is `parallel`. 4. This follows the same inheritance pattern as other persistence fields. + +Merge semantics for resize target scope: + +1. The effective target set includes the primary `datadir` template when persistence is enabled for the group. +2. The effective target set also includes any additional PVC templates with explicit storage requests. +3. Group-level template definitions override inherited cluster-level template definitions by template name. ## Appendix and Examples @@ -2376,7 +2375,7 @@ Merge semantics for `resizeStrategy`: | Sequential strategy | Resize one PVC at a time and wait for each before continuing | | Checkpoint | The state a PVC must reach before the resize workflow can advance past it. Concretely, either online expansion is complete (`status.capacity` >= target, no `FileSystemResizePending`) or offline expansion is acknowledged (`FileSystemResizePending` is present, confirming the block device was expanded and workload remount is required). | | currentSize | The minimum of `pvc.status.capacity[storage]` across all target PVCs at the time the resize operation begins | -| targetSize | The desired final volume size snapshotted from `spec.persistence.size` when the resize operation starts | +| targetSize | The primary operation target size snapshot (typically derived from persistence size); per-PVC targets are represented by `pvcStatuses[*].requestedSize` | | Safe partial-completion | A state where PVCs have been expanded but the StatefulSet template has not yet been synchronized. This is a tolerable intermediate state: the workload continues to function with expanded storage, and template synchronization can be retried independently. | ### Example Resize Request @@ -2390,6 +2389,14 @@ spec: persistence: enabled: true size: 100Gi + additionalVolumeClaimTemplates: + - metadata: + name: logs + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 50Gi markLogicGroups: - name: dnode replicas: 3 @@ -2407,15 +2414,15 @@ status: operationID: resize-20260331-dnode observedGeneration: 12 phase: WaitingForPVCResize - message: Waiting for PVC checkpoint completion for 2 of 3 PVCs + message: Waiting for PVC checkpoint completion for 2 of 6 PVCs currentSize: 20Gi targetSize: 100Gi deferredTargetSize: 150Gi deferredObservedGeneration: 13 resizeStrategy: sequential activePVC: datadir-dnode-1 - pvcsCheckpointed: 1 - totalPvcs: 3 + pvcsCheckpointed: 2 + totalPvcs: 6 pvcStatuses: - name: datadir-dnode-0 podName: dnode-0 @@ -2436,6 +2443,25 @@ status: observedCapacity: 20Gi state: Pending restartRequired: false + - name: logs-dnode-0 + podName: dnode-0 + requestedSize: 50Gi + observedCapacity: 50Gi + state: Checkpointed + checkpointType: OnlineComplete + restartRequired: false + - name: logs-dnode-1 + podName: dnode-1 + requestedSize: 50Gi + observedCapacity: 20Gi + state: WaitingForCheckpoint + restartRequired: false + - name: logs-dnode-2 + podName: dnode-2 + requestedSize: 20Gi + observedCapacity: 20Gi + state: Pending + restartRequired: false retryCount: 0 lastTransitionTime: "2026-03-31T10:15:00Z" firstStartedTime: "2026-03-31T10:12:34Z" @@ -2453,8 +2479,8 @@ status: currentSize: 20Gi targetSize: 100Gi resizeStrategy: sequential - pvcsCheckpointed: 3 - totalPvcs: 3 + pvcsCheckpointed: 6 + totalPvcs: 6 pvcStatuses: - name: datadir-dnode-0 podName: dnode-0 @@ -2477,6 +2503,27 @@ status: state: Checkpointed checkpointType: OnlineComplete restartRequired: false + - name: logs-dnode-0 + podName: dnode-0 + requestedSize: 50Gi + observedCapacity: 50Gi + state: Checkpointed + checkpointType: OnlineComplete + restartRequired: false + - name: logs-dnode-1 + podName: dnode-1 + requestedSize: 50Gi + observedCapacity: 50Gi + state: Checkpointed + checkpointType: OnlineComplete + restartRequired: false + - name: logs-dnode-2 + podName: dnode-2 + requestedSize: 50Gi + observedCapacity: 50Gi + state: Checkpointed + checkpointType: OnlineComplete + restartRequired: false retryCount: 0 lastTransitionTime: "2026-03-31T10:27:51Z" firstStartedTime: "2026-03-31T10:12:34Z" @@ -2497,7 +2544,7 @@ status: targetSize: 100Gi resizeStrategy: parallel pvcsCheckpointed: 0 - totalPvcs: 3 + totalPvcs: 6 pvcStatuses: - name: datadir-dnode-0 podName: dnode-0 @@ -2520,6 +2567,27 @@ status: state: Failed lastReason: StorageClassNotExpandable lastMessage: StorageClass gp2 does not allow volume expansion + - name: logs-dnode-0 + podName: dnode-0 + requestedSize: 20Gi + observedCapacity: 20Gi + state: Failed + lastReason: StorageClassNotExpandable + lastMessage: StorageClass gp2 does not allow volume expansion + - name: logs-dnode-1 + podName: dnode-1 + requestedSize: 20Gi + observedCapacity: 20Gi + state: Failed + lastReason: StorageClassNotExpandable + lastMessage: StorageClass gp2 does not allow volume expansion + - name: logs-dnode-2 + podName: dnode-2 + requestedSize: 20Gi + observedCapacity: 20Gi + state: Failed + lastReason: StorageClassNotExpandable + lastMessage: StorageClass gp2 does not allow volume expansion retryCount: 0 lastTransitionTime: "2026-03-31T10:14:02Z" firstStartedTime: "2026-03-31T10:13:10Z" @@ -2535,13 +2603,13 @@ status: observedGeneration: 12 phase: Stalled reason: PartialResizeFailure - message: 2 of 3 PVCs reached the required checkpoint; 1 PVC failed + message: 4 of 6 PVCs reached the required checkpoint; 2 PVCs failed currentSize: 20Gi targetSize: 100Gi resizeStrategy: sequential activePVC: datadir-dnode-2 - pvcsCheckpointed: 2 - totalPvcs: 3 + pvcsCheckpointed: 4 + totalPvcs: 6 pvcStatuses: - name: datadir-dnode-0 podName: dnode-0 @@ -2565,6 +2633,28 @@ status: restartRequired: false lastReason: StorageQuotaExceeded lastMessage: Provider rejected resize due to insufficient capacity + - name: logs-dnode-0 + podName: dnode-0 + requestedSize: 50Gi + observedCapacity: 50Gi + state: Checkpointed + checkpointType: OnlineComplete + restartRequired: false + - name: logs-dnode-1 + podName: dnode-1 + requestedSize: 50Gi + observedCapacity: 50Gi + state: Checkpointed + checkpointType: OnlineComplete + restartRequired: false + - name: logs-dnode-2 + podName: dnode-2 + requestedSize: 50Gi + observedCapacity: 20Gi + state: Failed + restartRequired: false + lastReason: StorageQuotaExceeded + lastMessage: Provider rejected resize due to insufficient capacity retryCount: 1 nextRetryTime: "2026-03-31T10:23:00Z" warnings: @@ -2573,6 +2663,9 @@ status: - name: datadir-dnode-2 reason: StorageQuotaExceeded message: Provider rejected resize due to insufficient capacity + - name: logs-dnode-2 + reason: StorageQuotaExceeded + message: Provider rejected resize due to insufficient capacity lastTransitionTime: "2026-03-31T10:22:00Z" firstStartedTime: "2026-03-31T10:12:34Z" ``` diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index 77e0a4ea..c6007367 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -47,18 +47,26 @@ type resizePVCDiscovery struct { missingPVCs []string notBoundPVCs []string minSize *resource.Quantity + minByTemplate map[string]*resource.Quantity + targetByPVC map[string]resource.Quantity } +type templateResizeTargets map[string]resource.Quantity + func (oc *OperatorContext) ReconcileVolumeResizeValidation() result.ReconcileResult { cr := oc.MarklogicGroup - if cr == nil || cr.Spec.Persistence == nil || !cr.Spec.Persistence.Enabled { + if cr == nil { return result.Continue() } - targetSize, err := resource.ParseQuantity(cr.Spec.Persistence.Size) + targets, err := resolveResizeTargetsFromSpec(cr) if err != nil { - return oc.failResizeValidation(marklogicv1.VolumeResizeReasonInvalidResizeRequest, fmt.Sprintf("Invalid persistence size %q", cr.Spec.Persistence.Size)) + return oc.failResizeValidation(marklogicv1.VolumeResizeReasonInvalidResizeRequest, err.Error()) + } + if len(targets) == 0 { + return result.Continue() } + primaryTarget := primaryResizeTarget(targets) currentSts, err := oc.GetStatefulSet(cr.Namespace, cr.Spec.Name) if err != nil { @@ -69,14 +77,14 @@ func (oc *OperatorContext) ReconcileVolumeResizeValidation() result.ReconcileRes return result.Error(err) } - pvcState, err := oc.discoverPrimaryPVCs(currentSts) + pvcState, err := oc.discoverPrimaryPVCs(currentSts, targets) if err != nil { return result.Error(err) } active := cr.Status.VolumeResizeStatus if isResizeOperationActive(active) { - return oc.reconcileActiveResizeOperation(active, targetSize, currentSts) + return oc.reconcileActiveResizeOperation(active, primaryTarget, currentSts) } if pvcState.minSize == nil { @@ -84,29 +92,29 @@ func (oc *OperatorContext) ReconcileVolumeResizeValidation() result.ReconcileRes return result.Continue() } - comparison := targetSize.Cmp(*pvcState.minSize) + comparison, cmpMessage := compareTargetsWithCurrent(pvcState, targets) if comparison == 0 { return result.Continue() } - if shouldIgnoreTerminalResizeRestart(cr.Status.VolumeResizeStatus, cr.Spec.Persistence.Size, cr.Generation) { + if shouldIgnoreTerminalResizeRestart(cr.Status.VolumeResizeStatus, primaryTarget.String(), cr.Generation) { return result.Continue() } - resizeStatus := oc.newResizeStatus(pvcState, targetSize.String()) + resizeStatus := oc.newResizeStatus(pvcState, primaryTarget.String()) claimed, err := oc.claimResizeStatusCAS(resizeStatus) if err != nil { return result.Error(err) } if !claimed { if isResizeOperationActive(oc.MarklogicGroup.Status.VolumeResizeStatus) { - return oc.reconcileActiveResizeOperation(oc.MarklogicGroup.Status.VolumeResizeStatus, targetSize, currentSts) + return oc.reconcileActiveResizeOperation(oc.MarklogicGroup.Status.VolumeResizeStatus, primaryTarget, currentSts) } return result.Continue() } if comparison < 0 { - oc.transitionResizePhase(resizeStatus, marklogicv1.VolumeResizePhaseFailed, marklogicv1.VolumeResizeReasonShrinkNotSupported, fmt.Sprintf("Shrink is not supported: current=%s target=%s", pvcState.minSize.String(), targetSize.String())) + oc.transitionResizePhase(resizeStatus, marklogicv1.VolumeResizePhaseFailed, marklogicv1.VolumeResizeReasonShrinkNotSupported, cmpMessage) oc.emitResizeEvent(corev1.EventTypeWarning, "VolumeResizeFailed", resizeStatus.Message) if err := oc.patchResizeStatus(resizeStatus); err != nil { return result.Error(err) @@ -174,6 +182,10 @@ func (oc *OperatorContext) ReconcileVolumeResizeValidation() result.ReconcileRes func (oc *OperatorContext) failResizeValidation(reason marklogicv1.VolumeResizeReason, message string) result.ReconcileResult { resizeStatus := oc.MarklogicGroup.Status.VolumeResizeStatus if resizeStatus == nil { + targetSize := "" + if oc.MarklogicGroup.Spec.Persistence != nil { + targetSize = oc.MarklogicGroup.Spec.Persistence.Size + } now := metav1.Now() resizeStatus = &marklogicv1.VolumeResizeStatus{ OperationID: "resize-" + generateRandomAlphaNumeric(10), @@ -181,7 +193,7 @@ func (oc *OperatorContext) failResizeValidation(reason marklogicv1.VolumeResizeR FirstStartedTime: &now, LastTransitionTime: &now, CurrentSize: "", - TargetSize: oc.MarklogicGroup.Spec.Persistence.Size, + TargetSize: targetSize, ResizeStrategy: resolveResizeStrategy(oc.MarklogicGroup.Spec.Persistence), Phase: marklogicv1.VolumeResizePhaseValidating, } @@ -302,22 +314,16 @@ func (oc *OperatorContext) processResizeSubmission(status *marklogicv1.VolumeRes return result.RequeueSoon(requeueSecs) } - targetSize, err := resource.ParseQuantity(status.TargetSize) - if err != nil { - oc.transitionResizePhase(status, marklogicv1.VolumeResizePhaseFailed, marklogicv1.VolumeResizeReasonInvalidResizeRequest, fmt.Sprintf("Invalid target size %q", status.TargetSize)) - oc.emitResizeEvent(corev1.EventTypeWarning, "VolumeResizeFailed", status.Message) - if patchErr := oc.patchResizeStatus(status); patchErr != nil { - return result.Error(patchErr) - } - return result.Done() - } - if len(status.PVCStatuses) == 0 { currentSts, stsErr := oc.GetStatefulSet(oc.MarklogicGroup.Namespace, oc.MarklogicGroup.Spec.Name) if stsErr != nil { return result.Error(stsErr) } - pvcState, discoverErr := oc.discoverPrimaryPVCs(currentSts) + targets, targetErr := resolveResizeTargetsFromSpec(oc.MarklogicGroup) + if targetErr != nil { + return result.Error(targetErr) + } + pvcState, discoverErr := oc.discoverPrimaryPVCs(currentSts, targets) if discoverErr != nil { return result.Error(discoverErr) } @@ -342,10 +348,15 @@ func (oc *OperatorContext) processResizeSubmission(status *marklogicv1.VolumeRes if observed.IsZero() { observed = requested } - entry.RequestedSize = requested.String() + entryTarget, targetErr := resizeTargetForPVCEntry(status, entry) + if targetErr != nil { + oc.markPVCFailed(status, entry.Name, marklogicv1.VolumeResizeReasonInvalidResizeRequest, targetErr.Error()) + continue + } + entry.RequestedSize = entryTarget.String() entry.ObservedCapacity = observed.String() - if requested.Cmp(targetSize) >= 0 { + if requested.Cmp(entryTarget) >= 0 { entry.State = marklogicv1.PVCResizeStateWaitingForCheckpoint entry.LastReason = "" entry.LastMessage = "Waiting for resize checkpoint" @@ -356,13 +367,13 @@ func (oc *OperatorContext) processResizeSubmission(status *marklogicv1.VolumeRes if pvc.Spec.Resources.Requests == nil { pvc.Spec.Resources.Requests = corev1.ResourceList{} } - pvc.Spec.Resources.Requests[corev1.ResourceStorage] = targetSize + pvc.Spec.Resources.Requests[corev1.ResourceStorage] = entryTarget if patchErr := oc.Client.Patch(oc.Ctx, pvc, patch); patchErr != nil { oc.markPVCFailed(status, entry.Name, marklogicv1.VolumeResizeReasonResizeFailed, patchErr.Error()) continue } - entry.RequestedSize = targetSize.String() + entry.RequestedSize = entryTarget.String() entry.State = marklogicv1.PVCResizeStateResizeSubmitted entry.LastReason = "" entry.LastMessage = "Resize request submitted" @@ -401,16 +412,6 @@ func (oc *OperatorContext) processResizeWaiting(status *marklogicv1.VolumeResize return result.RequeueSoon(requeueSecs) } - targetSize, err := resource.ParseQuantity(status.TargetSize) - if err != nil { - oc.transitionResizePhase(status, marklogicv1.VolumeResizePhaseFailed, marklogicv1.VolumeResizeReasonInvalidResizeRequest, fmt.Sprintf("Invalid target size %q", status.TargetSize)) - oc.emitResizeEvent(corev1.EventTypeWarning, "VolumeResizeFailed", status.Message) - if patchErr := oc.patchResizeStatus(status); patchErr != nil { - return result.Error(patchErr) - } - return result.Done() - } - for i := range status.PVCStatuses { entry := &status.PVCStatuses[i] if isPVCCheckpointed(entry) { @@ -429,10 +430,15 @@ func (oc *OperatorContext) processResizeWaiting(status *marklogicv1.VolumeResize observed = requested } - entry.RequestedSize = requested.String() + entryTarget, targetErr := resizeTargetForPVCEntry(status, entry) + if targetErr != nil { + oc.markPVCFailed(status, entry.Name, marklogicv1.VolumeResizeReasonInvalidResizeRequest, targetErr.Error()) + continue + } + entry.RequestedSize = entryTarget.String() entry.ObservedCapacity = observed.String() - if requested.Cmp(targetSize) >= 0 && observed.Cmp(targetSize) >= 0 { + if requested.Cmp(entryTarget) >= 0 && observed.Cmp(entryTarget) >= 0 { if hasFileSystemResizePending(pvc) { entry.State = marklogicv1.PVCResizeStateCheckpointed entry.CheckpointType = marklogicv1.PVCResizeCheckpointTypeOfflinePending @@ -502,9 +508,9 @@ func (oc *OperatorContext) processResizeWaiting(status *marklogicv1.VolumeResize } func (oc *OperatorContext) processStatefulSetSynchronization(status *marklogicv1.VolumeResizeStatus, currentSts *appsv1.StatefulSet) result.ReconcileResult { - targetSize, err := resource.ParseQuantity(status.TargetSize) + templateTargets, err := desiredTemplateTargetsFromStatus(status, currentSts.Name) if err != nil { - oc.transitionResizePhase(status, marklogicv1.VolumeResizePhaseFailed, marklogicv1.VolumeResizeReasonInvalidResizeRequest, fmt.Sprintf("Invalid target size %q", status.TargetSize)) + oc.transitionResizePhase(status, marklogicv1.VolumeResizePhaseFailed, marklogicv1.VolumeResizeReasonInvalidResizeRequest, err.Error()) oc.emitResizeEvent(corev1.EventTypeWarning, "VolumeResizeFailed", status.Message) if patchErr := oc.patchResizeStatus(status); patchErr != nil { return result.Error(patchErr) @@ -514,7 +520,7 @@ func (oc *OperatorContext) processStatefulSetSynchronization(status *marklogicv1 addResizeMarker(status, resizeMarkerSyncStarted) if !hasResizeMarker(status, resizeMarkerTemplateSynced) { - synced, syncErr := oc.syncStatefulSetDataDirTemplate(status, currentSts, targetSize) + synced, syncErr := oc.syncStatefulSetPVCTemplates(status, currentSts, templateTargets) if syncErr != nil { return oc.scheduleSyncRetryOrFail(status, marklogicv1.VolumeResizeReasonStatefulSetSyncFailed, "Failed to synchronize StatefulSet template", syncErr) } @@ -630,11 +636,20 @@ func (oc *OperatorContext) processPodsReadyWait(status *marklogicv1.VolumeResize return result.RequeueSoon(3) } - activeEntry.State = marklogicv1.PVCResizeStateRestarted - activeEntry.CheckpointType = marklogicv1.PVCResizeCheckpointTypeOfflineComplete - activeEntry.RestartRequired = false - activeEntry.LastReason = "" - activeEntry.LastMessage = "Pod restart completed" + for i := range status.PVCStatuses { + entry := &status.PVCStatuses[i] + if entry.PodName != activeEntry.PodName { + continue + } + if entry.State != marklogicv1.PVCResizeStateRestartPending { + continue + } + entry.State = marklogicv1.PVCResizeStateRestarted + entry.CheckpointType = marklogicv1.PVCResizeCheckpointTypeOfflineComplete + entry.RestartRequired = false + entry.LastReason = "" + entry.LastMessage = "Pod restart completed" + } status.ActivePVC = "" } } @@ -676,9 +691,9 @@ func (oc *OperatorContext) processResizeVerification(status *marklogicv1.VolumeR return result.RequeueSoon(requeueSecs) } - targetSize, err := resource.ParseQuantity(status.TargetSize) + templateTargets, err := desiredTemplateTargetsFromStatus(status, currentSts.Name) if err != nil { - oc.transitionResizePhase(status, marklogicv1.VolumeResizePhaseFailed, marklogicv1.VolumeResizeReasonInvalidResizeRequest, fmt.Sprintf("Invalid target size %q", status.TargetSize)) + oc.transitionResizePhase(status, marklogicv1.VolumeResizePhaseFailed, marklogicv1.VolumeResizeReasonInvalidResizeRequest, err.Error()) oc.emitResizeEvent(corev1.EventTypeWarning, "VolumeResizeFailed", status.Message) if patchErr := oc.patchResizeStatus(status); patchErr != nil { return result.Error(patchErr) @@ -691,17 +706,24 @@ func (oc *OperatorContext) processResizeVerification(status *marklogicv1.VolumeR oc.emitResizeEvent(corev1.EventTypeNormal, "VolumeResizeProgressing", "Starting resize outcome verification") } - templateRequest, hasTemplate := getStatefulSetDataDirTemplateRequest(currentSts) - if !hasTemplate { - oc.transitionResizePhase(status, marklogicv1.VolumeResizePhaseFailed, marklogicv1.VolumeResizeReasonTemplateUpdateInterrupted, "StatefulSet datadir template is missing during verification") - oc.emitResizeEvent(corev1.EventTypeWarning, "VolumeResizeFailed", status.Message) - if patchErr := oc.patchResizeStatus(status); patchErr != nil { - return result.Error(patchErr) + templatesBelowTarget := make([]string, 0) + for templateName, templateTarget := range templateTargets { + templateRequest, hasTemplate := getStatefulSetTemplateRequest(currentSts, templateName) + if !hasTemplate { + oc.transitionResizePhase(status, marklogicv1.VolumeResizePhaseFailed, marklogicv1.VolumeResizeReasonTemplateUpdateInterrupted, fmt.Sprintf("StatefulSet template %s is missing during verification", templateName)) + oc.emitResizeEvent(corev1.EventTypeWarning, "VolumeResizeFailed", status.Message) + if patchErr := oc.patchResizeStatus(status); patchErr != nil { + return result.Error(patchErr) + } + return result.Done() + } + if templateRequest.Cmp(templateTarget) < 0 { + templatesBelowTarget = append(templatesBelowTarget, fmt.Sprintf("%s=%s", templateName, templateRequest.String())) } - return result.Done() } - if templateRequest.Cmp(targetSize) < 0 { - return oc.scheduleSyncRetryOrFail(status, marklogicv1.VolumeResizeReasonStatefulSetSyncFailed, fmt.Sprintf("StatefulSet datadir template request (%s) has not reached target (%s)", templateRequest.String(), targetSize.String()), fmt.Errorf("template request below target")) + if len(templatesBelowTarget) > 0 { + sort.Strings(templatesBelowTarget) + return oc.scheduleSyncRetryOrFail(status, marklogicv1.VolumeResizeReasonStatefulSetSyncFailed, fmt.Sprintf("StatefulSet template requests are below target: %s", strings.Join(templatesBelowTarget, ",")), fmt.Errorf("template request below target")) } notFinalPVCs := make([]string, 0) @@ -718,10 +740,14 @@ func (oc *OperatorContext) processResizeVerification(status *marklogicv1.VolumeR observed = requested } - entry.RequestedSize = requested.String() + entryTarget, targetErr := resizeTargetForPVCEntry(status, entry) + if targetErr != nil { + return oc.scheduleSyncRetryOrFail(status, marklogicv1.VolumeResizeReasonInvalidResizeRequest, "Invalid PVC target during verification", targetErr) + } + entry.RequestedSize = entryTarget.String() entry.ObservedCapacity = observed.String() - if requested.Cmp(targetSize) < 0 || observed.Cmp(targetSize) < 0 { + if requested.Cmp(entryTarget) < 0 || observed.Cmp(entryTarget) < 0 { notFinalPVCs = append(notFinalPVCs, entry.Name) continue } @@ -798,7 +824,11 @@ func (oc *OperatorContext) initializePVCStatuses(status *marklogicv1.VolumeResiz if len(status.PVCStatuses) == 0 { status.PVCStatuses = make([]marklogicv1.PVCResizeStatus, 0, len(pvcState.expectedNames)) for _, name := range pvcState.expectedNames { - status.PVCStatuses = append(status.PVCStatuses, marklogicv1.PVCResizeStatus{Name: name, State: marklogicv1.PVCResizeStatePending}) + entry := marklogicv1.PVCResizeStatus{Name: name, State: marklogicv1.PVCResizeStatePending} + if target, ok := pvcState.targetByPVC[name]; ok { + entry.RequestedSize = target.String() + } + status.PVCStatuses = append(status.PVCStatuses, entry) } } status.TotalPVCs = int32(len(status.PVCStatuses)) @@ -1166,39 +1196,38 @@ func (oc *OperatorContext) scheduleSyncRetryOrFail(status *marklogicv1.VolumeRes return result.Done() } -func (oc *OperatorContext) syncStatefulSetDataDirTemplate(status *marklogicv1.VolumeResizeStatus, currentSts *appsv1.StatefulSet, targetSize resource.Quantity) (bool, error) { +func (oc *OperatorContext) syncStatefulSetPVCTemplates(status *marklogicv1.VolumeResizeStatus, currentSts *appsv1.StatefulSet, templateTargets templateResizeTargets) (bool, error) { if currentSts == nil { return false, fmt.Errorf("statefulset is nil") } - templateFound := false - templateMatchesTarget := false - for i := range currentSts.Spec.VolumeClaimTemplates { - pvcTemplate := ¤tSts.Spec.VolumeClaimTemplates[i] - if pvcTemplate.Name != dataDirPVCName { + templatesMissingFromStatefulSet := make([]string, 0) + templatesBelowTarget := make([]string, 0) + for templateName, target := range templateTargets { + current, hasTemplate := getStatefulSetTemplateRequest(currentSts, templateName) + if !hasTemplate { + templatesMissingFromStatefulSet = append(templatesMissingFromStatefulSet, templateName) continue } - templateFound = true - if pvcTemplate.Spec.Resources.Requests == nil { - break + if current.Cmp(target) < 0 { + templatesBelowTarget = append(templatesBelowTarget, templateName) } - current := pvcTemplate.Spec.Resources.Requests[corev1.ResourceStorage] - if current.Cmp(targetSize) >= 0 { - templateMatchesTarget = true - } - break } - if !templateFound { - return false, fmt.Errorf("statefulset %s has no %s volumeClaimTemplate", currentSts.Name, dataDirPVCName) + if len(templatesMissingFromStatefulSet) > 0 { + sort.Strings(templatesMissingFromStatefulSet) + return false, fmt.Errorf("statefulset %s is missing volumeClaimTemplates: %s", currentSts.Name, strings.Join(templatesMissingFromStatefulSet, ",")) } - if templateMatchesTarget { + if len(templatesBelowTarget) == 0 { addResizeMarker(status, resizeMarkerTemplateRecreated) addResizeMarker(status, resizeMarkerTemplateSynced) return false, nil } + sort.Strings(templatesBelowTarget) + _ = templatesBelowTarget + addResizeMarker(status, resizeMarkerTemplateRecreateStarted) if hasResizeMarker(status, resizeMarkerTemplateDeleted) { return true, nil @@ -1217,13 +1246,13 @@ func (oc *OperatorContext) syncStatefulSetDataDirTemplate(status *marklogicv1.Vo return true, nil } -func getStatefulSetDataDirTemplateRequest(currentSts *appsv1.StatefulSet) (resource.Quantity, bool) { +func getStatefulSetTemplateRequest(currentSts *appsv1.StatefulSet, templateName string) (resource.Quantity, bool) { if currentSts == nil { return resource.Quantity{}, false } for i := range currentSts.Spec.VolumeClaimTemplates { template := currentSts.Spec.VolumeClaimTemplates[i] - if template.Name != dataDirPVCName { + if template.Name != templateName { continue } if template.Spec.Resources.Requests == nil { @@ -1238,6 +1267,23 @@ func getStatefulSetDataDirTemplateRequest(currentSts *appsv1.StatefulSet) (resou return resource.Quantity{}, false } +func getTemplateNameFromPVCName(statefulSetName, pvcName string) (string, bool) { + ordinal := parseOrdinalFromName(pvcName) + if ordinal < 0 { + return "", false + } + withoutOrdinalSuffix := strings.TrimSuffix(pvcName, fmt.Sprintf("-%d", ordinal)) + statefulSetSuffix := fmt.Sprintf("-%s", statefulSetName) + if !strings.HasSuffix(withoutOrdinalSuffix, statefulSetSuffix) { + return "", false + } + templateName := strings.TrimSuffix(withoutOrdinalSuffix, statefulSetSuffix) + if templateName == "" { + return "", false + } + return templateName, true +} + func getOfflineRestartCandidates(status *marklogicv1.VolumeResizeStatus, statefulSetName string) []string { candidates := make([]string, 0) for idx := range status.PVCStatuses { @@ -1348,15 +1394,11 @@ func parseOrdinalFromName(name string) int { } func derivePodNameFromPVC(statefulSetName, pvcName string) string { - prefix := fmt.Sprintf("%s-%s-", dataDirPVCName, statefulSetName) - if !strings.HasPrefix(pvcName, prefix) { + ordinal := parseOrdinalFromName(pvcName) + if ordinal < 0 { return "" } - ordinal := strings.TrimPrefix(pvcName, prefix) - if ordinal == "" { - return "" - } - return fmt.Sprintf("%s-%s", statefulSetName, ordinal) + return fmt.Sprintf("%s-%d", statefulSetName, ordinal) } func hasResizeMarker(status *marklogicv1.VolumeResizeStatus, marker string) bool { @@ -1433,12 +1475,14 @@ func resolveResizeStrategy(persistence *marklogicv1.Persistence) marklogicv1.Vol return marklogicv1.VolumeResizeStrategyParallel } -func (oc *OperatorContext) discoverPrimaryPVCs(sts *appsv1.StatefulSet) (*resizePVCDiscovery, error) { +func (oc *OperatorContext) discoverPrimaryPVCs(sts *appsv1.StatefulSet, targets templateResizeTargets) (*resizePVCDiscovery, error) { state := &resizePVCDiscovery{ expectedNames: []string{}, foundPVCs: map[string]*corev1.PersistentVolumeClaim{}, missingPVCs: []string{}, notBoundPVCs: []string{}, + minByTemplate: map[string]*resource.Quantity{}, + targetByPVC: map[string]resource.Quantity{}, } replicas := int32(1) @@ -1446,37 +1490,190 @@ func (oc *OperatorContext) discoverPrimaryPVCs(sts *appsv1.StatefulSet) (*resize replicas = *sts.Spec.Replicas } - for i := int32(0); i < replicas; i++ { - name := fmt.Sprintf("%s-%s-%d", dataDirPVCName, sts.Name, i) - state.expectedNames = append(state.expectedNames, name) - pvc := &corev1.PersistentVolumeClaim{} - err := oc.Client.Get(oc.Ctx, client.ObjectKey{Namespace: sts.Namespace, Name: name}, pvc) - if err != nil { - if apierrors.IsNotFound(err) { - state.missingPVCs = append(state.missingPVCs, name) + templateNames := make([]string, 0, len(targets)) + for templateName := range targets { + templateNames = append(templateNames, templateName) + } + if len(templateNames) == 0 { + templateNames = getResizableTemplateNames(sts) + } + sort.Strings(templateNames) + for _, templateName := range templateNames { + templateTarget, hasTarget := targets[templateName] + if !hasTarget { + continue + } + for i := int32(0); i < replicas; i++ { + name := fmt.Sprintf("%s-%s-%d", templateName, sts.Name, i) + state.expectedNames = append(state.expectedNames, name) + state.targetByPVC[name] = templateTarget + pvc := &corev1.PersistentVolumeClaim{} + err := oc.Client.Get(oc.Ctx, client.ObjectKey{Namespace: sts.Namespace, Name: name}, pvc) + if err != nil { + if apierrors.IsNotFound(err) { + state.missingPVCs = append(state.missingPVCs, name) + continue + } + return nil, err + } + state.foundPVCs[name] = pvc + if pvc.Status.Phase != corev1.ClaimBound { + state.notBoundPVCs = append(state.notBoundPVCs, name) + } + + capacity := pvc.Status.Capacity[corev1.ResourceStorage] + if capacity.IsZero() { + capacity = pvc.Spec.Resources.Requests[corev1.ResourceStorage] + } + if capacity.IsZero() { continue } - return nil, err + capCopy := capacity.DeepCopy() + if state.minSize == nil || capCopy.Cmp(*state.minSize) < 0 { + state.minSize = &capCopy + } + if existing, ok := state.minByTemplate[templateName]; !ok || existing == nil || capCopy.Cmp(*existing) < 0 { + state.minByTemplate[templateName] = &capCopy + } } - state.foundPVCs[name] = pvc - if pvc.Status.Phase != corev1.ClaimBound { - state.notBoundPVCs = append(state.notBoundPVCs, name) + } + + return state, nil +} + +func getResizableTemplateNames(sts *appsv1.StatefulSet) []string { + if sts == nil { + return []string{dataDirPVCName} + } + templateNames := make([]string, 0, len(sts.Spec.VolumeClaimTemplates)) + for i := range sts.Spec.VolumeClaimTemplates { + template := sts.Spec.VolumeClaimTemplates[i] + if template.Name == "" { + continue + } + if template.Spec.Resources.Requests == nil { + continue } + if _, hasStorageRequest := template.Spec.Resources.Requests[corev1.ResourceStorage]; !hasStorageRequest { + continue + } + templateNames = append(templateNames, template.Name) + } + if len(templateNames) == 0 { + templateNames = append(templateNames, dataDirPVCName) + } + return templateNames +} - capacity := pvc.Status.Capacity[corev1.ResourceStorage] - if capacity.IsZero() { - capacity = pvc.Spec.Resources.Requests[corev1.ResourceStorage] +func resolveResizeTargetsFromSpec(group *marklogicv1.MarklogicGroup) (templateResizeTargets, error) { + targets := templateResizeTargets{} + if group == nil { + return targets, nil + } + if group.Spec.Persistence != nil && group.Spec.Persistence.Enabled { + size, err := resource.ParseQuantity(group.Spec.Persistence.Size) + if err != nil { + return nil, fmt.Errorf("invalid persistence size %q", group.Spec.Persistence.Size) } - if capacity.IsZero() { + targets[dataDirPVCName] = size + } + if group.Spec.AdditionalVolumeClaimTemplates != nil { + for _, tmpl := range *group.Spec.AdditionalVolumeClaimTemplates { + if tmpl.Name == "" || tmpl.Spec.Resources.Requests == nil { + continue + } + size, ok := tmpl.Spec.Resources.Requests[corev1.ResourceStorage] + if !ok || size.IsZero() { + continue + } + targets[tmpl.Name] = size + } + } + return targets, nil +} + +func primaryResizeTarget(targets templateResizeTargets) resource.Quantity { + if target, ok := targets[dataDirPVCName]; ok { + return target + } + names := make([]string, 0, len(targets)) + for n := range targets { + names = append(names, n) + } + sort.Strings(names) + if len(names) == 0 { + return resource.Quantity{} + } + return targets[names[0]] +} + +func compareTargetsWithCurrent(state *resizePVCDiscovery, targets templateResizeTargets) (int, string) { + hasIncrease := false + for templateName, target := range targets { + current, ok := state.minByTemplate[templateName] + if !ok || current == nil { + hasIncrease = true continue } - capCopy := capacity.DeepCopy() - if state.minSize == nil || capCopy.Cmp(*state.minSize) < 0 { - state.minSize = &capCopy + cmp := target.Cmp(*current) + if cmp < 0 { + return -1, fmt.Sprintf("Shrink is not supported for %s: current=%s target=%s", templateName, current.String(), target.String()) } + if cmp > 0 { + hasIncrease = true + } + } + if hasIncrease { + return 1, "" } + return 0, "" +} - return state, nil +func resizeTargetForPVCEntry(status *marklogicv1.VolumeResizeStatus, entry *marklogicv1.PVCResizeStatus) (resource.Quantity, error) { + if entry != nil && entry.RequestedSize != "" { + if target, err := resource.ParseQuantity(entry.RequestedSize); err == nil { + return target, nil + } + } + if status != nil && status.TargetSize != "" { + if target, err := resource.ParseQuantity(status.TargetSize); err == nil { + return target, nil + } + } + name := "" + if entry != nil { + name = entry.Name + } + return resource.Quantity{}, fmt.Errorf("unable to resolve resize target for pvc %s", name) +} + +func desiredTemplateTargetsFromStatus(status *marklogicv1.VolumeResizeStatus, statefulSetName string) (templateResizeTargets, error) { + targets := templateResizeTargets{} + if status == nil { + return targets, fmt.Errorf("volume resize status is nil") + } + for i := range status.PVCStatuses { + entry := &status.PVCStatuses[i] + templateName, ok := getTemplateNameFromPVCName(statefulSetName, entry.Name) + if !ok { + continue + } + target, err := resizeTargetForPVCEntry(status, entry) + if err != nil { + return nil, err + } + if existing, ok := targets[templateName]; !ok || target.Cmp(existing) > 0 { + targets[templateName] = target + } + } + if len(targets) == 0 { + target, err := resource.ParseQuantity(status.TargetSize) + if err != nil { + return nil, fmt.Errorf("invalid target size %q", status.TargetSize) + } + targets[dataDirPVCName] = target + } + return targets, nil } func (oc *OperatorContext) validateStorageClassExpansionAllowed(foundPVCs map[string]*corev1.PersistentVolumeClaim) error { diff --git a/pkg/k8sutil/volume_resize_validation_test.go b/pkg/k8sutil/volume_resize_validation_test.go index 3dec3b52..a282b519 100644 --- a/pkg/k8sutil/volume_resize_validation_test.go +++ b/pkg/k8sutil/volume_resize_validation_test.go @@ -63,6 +63,141 @@ func TestResizeValidationSuccessInitializesStatus(t *testing.T) { } } +func TestResizeValidationIncludesAdditionalTemplatePVCs(t *testing.T) { + additionalTemplate := corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: "logs"}, + Spec: corev1.PersistentVolumeClaimSpec{ + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resourceMustParse("30Gi")}, + }, + }, + } + oc := newResizeTestContext(t, resizeTestInput{ + desiredSize: "50Gi", + currentSize: "20Gi", + updateStrategy: appsv1.OnDeleteStatefulSetStrategyType, + additionalTemplates: []corev1.PersistentVolumeClaim{additionalTemplate}, + }) + + res := oc.ReconcileVolumeResizeValidation() + if !res.Completed() { + t.Fatalf("expected validation to complete this reconcile step") + } + if _, err := res.Output(); err != nil { + t.Fatalf("unexpected result error: %v", err) + } + + updated := getUpdatedGroup(t, oc) + status := updated.Status.VolumeResizeStatus + if status == nil { + t.Fatalf("expected volumeResizeStatus to be initialized") + } + if status.TotalPVCs != 4 { + t.Fatalf("expected totalPvcs 4 (datadir + logs for 2 replicas), got %d", status.TotalPVCs) + } + if findPVCStatus(status, "logs-dnode-0") == nil || findPVCStatus(status, "logs-dnode-1") == nil { + t.Fatalf("expected logs pvc statuses to be present") + } +} + +func TestResizeSubmissionPatchesAdditionalTemplatePVCs(t *testing.T) { + additionalTemplate := corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: "logs"}, + Spec: corev1.PersistentVolumeClaimSpec{ + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resourceMustParse("40Gi")}, + }, + }, + } + oc := newResizeTestContext(t, resizeTestInput{ + desiredSize: "50Gi", + currentSize: "20Gi", + updateStrategy: appsv1.OnDeleteStatefulSetStrategyType, + additionalTemplates: []corev1.PersistentVolumeClaim{additionalTemplate}, + }) + + // Seed additional PVCs below target so submission must patch them. + replacePVC(t, oc, newBoundPVC("logs-dnode-0", "20Gi")) + replacePVC(t, oc, newBoundPVC("logs-dnode-1", "20Gi")) + + if err := runResizeStep(t, oc); err != nil { + t.Fatalf("initial validation failed: %v", err) + } + if err := runResizeStep(t, oc); err != nil { + t.Fatalf("submission reconcile failed: %v", err) + } + + logsPVC := &corev1.PersistentVolumeClaim{} + if err := oc.Client.Get(oc.Ctx, client.ObjectKey{Name: "logs-dnode-0", Namespace: "testns"}, logsPVC); err != nil { + t.Fatalf("failed to fetch logs pvc: %v", err) + } + request := logsPVC.Spec.Resources.Requests[corev1.ResourceStorage] + if got := request.String(); got != "40Gi" { + t.Fatalf("expected logs pvc request to be 40Gi, got %s", got) + } +} + +func TestResizeVerificationStallsWhenAdditionalTemplateRequestBelowTarget(t *testing.T) { + additionalTemplate := corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: "logs"}, + Spec: corev1.PersistentVolumeClaimSpec{ + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resourceMustParse("40Gi")}, + }, + }, + } + oc := newResizeTestContext(t, resizeTestInput{ + desiredSize: "50Gi", + currentSize: "20Gi", + updateStrategy: appsv1.OnDeleteStatefulSetStrategyType, + additionalTemplates: []corev1.PersistentVolumeClaim{additionalTemplate}, + }) + + now := metav1.Now() + status := &marklogicv1.VolumeResizeStatus{ + OperationID: "resize-verify-additional", + ObservedGeneration: oc.MarklogicGroup.Generation, + Phase: marklogicv1.VolumeResizePhaseVerifyingResizeOutcome, + CurrentSize: "20Gi", + TargetSize: "50Gi", + ResizeStrategy: marklogicv1.VolumeResizeStrategyParallel, + TotalPVCs: 4, + FirstStartedTime: &now, + LastTransitionTime: &now, + PVCStatuses: []marklogicv1.PVCResizeStatus{ + {Name: "datadir-dnode-0", PodName: "dnode-0", RequestedSize: "50Gi", State: marklogicv1.PVCResizeStateCheckpointed}, + {Name: "datadir-dnode-1", PodName: "dnode-1", RequestedSize: "50Gi", State: marklogicv1.PVCResizeStateCheckpointed}, + {Name: "logs-dnode-0", PodName: "dnode-0", RequestedSize: "40Gi", State: marklogicv1.PVCResizeStateCheckpointed}, + {Name: "logs-dnode-1", PodName: "dnode-1", RequestedSize: "40Gi", State: marklogicv1.PVCResizeStateCheckpointed}, + }, + } + if err := oc.patchResizeStatus(status); err != nil { + t.Fatalf("failed to seed verification status: %v", err) + } + + if err := setStatefulSetTemplateRequestByName(oc, dataDirPVCName, "50Gi"); err != nil { + t.Fatalf("failed to set datadir template request: %v", err) + } + if err := setStatefulSetTemplateRequestByName(oc, "logs", "30Gi"); err != nil { + t.Fatalf("failed to set logs template request: %v", err) + } + + if err := runResizeStep(t, oc); err != nil { + t.Fatalf("verification step failed: %v", err) + } + + updated := getUpdatedGroup(t, oc) + if updated.Status.VolumeResizeStatus == nil { + t.Fatalf("expected resize status after verification") + } + if updated.Status.VolumeResizeStatus.Phase != marklogicv1.VolumeResizePhaseStalled { + t.Fatalf("expected Stalled phase when logs template is below target, got %s", updated.Status.VolumeResizeStatus.Phase) + } + if updated.Status.VolumeResizeStatus.Reason != marklogicv1.VolumeResizeReasonStatefulSetSyncFailed { + t.Fatalf("expected StatefulSetSyncFailed reason, got %s", updated.Status.VolumeResizeStatus.Reason) + } +} + func TestResizeValidationShrinkFails(t *testing.T) { oc := newResizeTestContext(t, resizeTestInput{desiredSize: "10Gi", currentSize: "20Gi", updateStrategy: appsv1.OnDeleteStatefulSetStrategyType}) res := oc.ReconcileVolumeResizeValidation() @@ -1332,10 +1467,11 @@ func TestJitteredResizeRetryDelaySeconds_CappedAtMax(t *testing.T) { } type resizeTestInput struct { - desiredSize string - currentSize string - updateStrategy appsv1.StatefulSetUpdateStrategyType - replicas int32 + desiredSize string + currentSize string + updateStrategy appsv1.StatefulSetUpdateStrategyType + replicas int32 + additionalTemplates []corev1.PersistentVolumeClaim } func newResizeTestContext(t *testing.T, in resizeTestInput) *OperatorContext { @@ -1378,6 +1514,14 @@ func newResizeTestContext(t *testing.T, in resizeTestInput) *OperatorContext { }, }, } + if len(in.additionalTemplates) > 0 { + templates := make([]corev1.PersistentVolumeClaim, 0, len(in.additionalTemplates)) + for i := range in.additionalTemplates { + copied := in.additionalTemplates[i].DeepCopy() + templates = append(templates, *copied) + } + group.Spec.AdditionalVolumeClaimTemplates = &templates + } sts := &appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{Name: "dnode", Namespace: "testns"}, @@ -1395,6 +1539,12 @@ func newResizeTestContext(t *testing.T, in resizeTestInput) *OperatorContext { }, }, } + if len(in.additionalTemplates) > 0 { + for i := range in.additionalTemplates { + copied := in.additionalTemplates[i].DeepCopy() + sts.Spec.VolumeClaimTemplates = append(sts.Spec.VolumeClaimTemplates, *copied) + } + } allowExpansion := true sc := &storagev1.StorageClass{ @@ -1407,6 +1557,17 @@ func newResizeTestContext(t *testing.T, in resizeTestInput) *OperatorContext { pvcName := fmt.Sprintf("datadir-dnode-%d", i) podName := fmt.Sprintf("dnode-%d", i) objects = append(objects, newBoundPVC(pvcName, in.currentSize), newGroupPod(podName, true)) + for _, template := range in.additionalTemplates { + if template.Name == "" || template.Spec.Resources.Requests == nil { + continue + } + target, ok := template.Spec.Resources.Requests[corev1.ResourceStorage] + if !ok || target.IsZero() { + continue + } + additionalPVCName := fmt.Sprintf("%s-dnode-%d", template.Name, i) + objects = append(objects, newBoundPVC(additionalPVCName, target.String())) + } } fakeClient := fake.NewClientBuilder(). @@ -1546,17 +1707,29 @@ func seedVerificationStatus(t *testing.T, oc *OperatorContext, operationID, targ } func setStatefulSetTemplateRequest(oc *OperatorContext, size string) error { + return setStatefulSetTemplateRequestByName(oc, dataDirPVCName, size) +} + +func setStatefulSetTemplateRequestByName(oc *OperatorContext, templateName, size string) error { sts := &appsv1.StatefulSet{} if err := oc.Client.Get(oc.Ctx, client.ObjectKey{Name: "dnode", Namespace: "testns"}, sts); err != nil { return err } - if len(sts.Spec.VolumeClaimTemplates) == 0 { - sts.Spec.VolumeClaimTemplates = []corev1.PersistentVolumeClaim{{ObjectMeta: metav1.ObjectMeta{Name: dataDirPVCName}}} + idx := -1 + for i := range sts.Spec.VolumeClaimTemplates { + if sts.Spec.VolumeClaimTemplates[i].Name == templateName { + idx = i + break + } + } + if idx == -1 { + sts.Spec.VolumeClaimTemplates = append(sts.Spec.VolumeClaimTemplates, corev1.PersistentVolumeClaim{ObjectMeta: metav1.ObjectMeta{Name: templateName}}) + idx = len(sts.Spec.VolumeClaimTemplates) - 1 } - if sts.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests == nil { - sts.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests = corev1.ResourceList{} + if sts.Spec.VolumeClaimTemplates[idx].Spec.Resources.Requests == nil { + sts.Spec.VolumeClaimTemplates[idx].Spec.Resources.Requests = corev1.ResourceList{} } - sts.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests[corev1.ResourceStorage] = resourceMustParse(size) + sts.Spec.VolumeClaimTemplates[idx].Spec.Resources.Requests[corev1.ResourceStorage] = resourceMustParse(size) return oc.Client.Update(oc.Ctx, sts) } diff --git a/test/e2e-helm/9_volume_resize_test.go b/test/e2e-helm/9_volume_resize_test.go index e1b230f4..2e3c33eb 100644 --- a/test/e2e-helm/9_volume_resize_test.go +++ b/test/e2e-helm/9_volume_resize_test.go @@ -45,11 +45,12 @@ import ( var resizeNSNamespaces = []string{"ml-ns-resize-a", "ml-ns-resize-b"} const ( - resizeNSClusterName = "ml-ns-resize-cluster" - resizeNSGroupName = "node" - resizeNSInitialSize = "2Gi" - resizeNSTargetSize = "3Gi" - resizeNSWaitTimeout = 15 * time.Minute + resizeNSClusterName = "ml-ns-resize-cluster" + resizeNSGroupName = "node" + resizeNSExtraPVCName = "logs" + resizeNSInitialSize = "2Gi" + resizeNSTargetSize = "3Gi" + resizeNSWaitTimeout = 15 * time.Minute ) type resizeNSOutcome struct { @@ -71,8 +72,12 @@ func TestVolumeResizeNamespaceScoped(t *testing.T) { // ── Pre-flight ───────────────────────────────────────────────────────────── feature.Setup(func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context { - _ = c - t.Skip("skipping namespace-scoped Helm volume-resize test: the current namespace-scoped chart RBAC does not guarantee the operator can read cluster-scoped StorageClasses or patch/update PVCs during reconciliation") + if err := assertResizeNSNamespacesWatched(); err != nil { + t.Fatalf("namespace-scoped Helm resize test misconfigured: %v", err) + } + if err := assertNSStorageClassExpandable(ctx, c.Client()); err != nil { + t.Skipf("Skipping namespace-scoped Helm volume resize test: %v", err) + } return ctx }) @@ -206,6 +211,30 @@ func assertNSStorageClassExpandable(ctx context.Context, client klient.Client) e } } +// assertResizeNSNamespacesWatched validates that all resize namespaces are in +// the Helm operator watch list configured in main_test.go. +func assertResizeNSNamespacesWatched() error { + watched := make(map[string]struct{}) + for _, ns := range strings.Split(watchedNamespaces, ",") { + ns = strings.TrimSpace(ns) + if ns == "" { + continue + } + watched[ns] = struct{}{} + } + + missing := make([]string, 0) + for _, ns := range resizeNSNamespaces { + if _, ok := watched[ns]; !ok { + missing = append(missing, ns) + } + } + if len(missing) > 0 { + return fmt.Errorf("missing from watchedNamespaces: %s", strings.Join(missing, ", ")) + } + return nil +} + // isNSDefaultStorageClass reports whether sc carries either the GA or the // legacy beta default-class annotation set to "true". func isNSDefaultStorageClass(sc storagev1.StorageClass) bool { @@ -234,6 +263,19 @@ func createNSResizeCluster(ctx context.Context, client klient.Client, ns string) Enabled: true, Size: resizeNSInitialSize, }, + AdditionalVolumeClaimTemplates: &[]corev1.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{Name: resizeNSExtraPVCName}, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(resizeNSInitialSize), + }, + }, + }, + }, + }, MarkLogicGroups: []*marklogicv1.MarklogicGroups{ { Name: resizeNSGroupName, @@ -252,11 +294,17 @@ func createNSResizeCluster(ctx context.Context, client klient.Client, ns string) func triggerNSResizeAndWait(ctx context.Context, t *testing.T, client klient.Client, ns string) resizeNSOutcome { out := resizeNSOutcome{namespace: ns, initialSize: resizeNSInitialSize, requestSize: resizeNSTargetSize} - if size, err := minNSPVCCapacity(ctx, client, ns); err == nil { - out.initialSize = size + templatePrefixes := map[string]string{ + "datadir": "datadir-" + resizeNSGroupName + "-", + resizeNSExtraPVCName: resizeNSExtraPVCName + "-" + resizeNSGroupName + "-", } - patch := []byte(fmt.Sprintf(`{"spec":{"persistence":{"size":"%s"}}}`, resizeNSTargetSize)) + if sizes, err := minNSPVCCapacityByTemplate(ctx, client, ns, templatePrefixes); err == nil { + out.initialSize = formatNSTemplateSizes(sizes) + } + + patch := []byte(fmt.Sprintf(`{"spec":{"persistence":{"size":"%s"},"additionalVolumeClaimTemplates":[{"metadata":{"name":"%s"},"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"%s"}}}}]}}`, + resizeNSTargetSize, resizeNSExtraPVCName, resizeNSTargetSize)) cluster := &marklogicv1.MarklogicCluster{ ObjectMeta: metav1.ObjectMeta{Name: resizeNSClusterName, Namespace: ns}, } @@ -264,7 +312,7 @@ func triggerNSResizeAndWait(ctx context.Context, t *testing.T, client klient.Cli out.failReason = fmt.Sprintf("patch cluster: %v", err) return out } - t.Logf("[%s] patched MarklogicCluster persistence.size → %s", ns, resizeNSTargetSize) + t.Logf("[%s] patched MarklogicCluster persistence.size + additionalVolumeClaimTemplates[%s].storage → %s", ns, resizeNSExtraPVCName, resizeNSTargetSize) deadline := time.Now().Add(resizeNSWaitTimeout) for time.Now().Before(deadline) { @@ -276,10 +324,12 @@ func triggerNSResizeAndWait(ctx context.Context, t *testing.T, client klient.Cli if grp.Status.VolumeResizeStatus != nil { out.phase = string(grp.Status.VolumeResizeStatus.Phase) } - obs, _ := minNSPVCCapacity(ctx, client, ns) - out.observedSize = obs + sizes, _ := minNSPVCCapacityByTemplate(ctx, client, ns, templatePrefixes) + out.observedSize = formatNSTemplateSizes(sizes) - if out.phase == string(marklogicv1.VolumeResizePhaseCompleted) && nsSizesEqual(obs, resizeNSTargetSize) { + if out.phase == string(marklogicv1.VolumeResizePhaseCompleted) && + nsTemplateSizeEquals(sizes, "datadir", resizeNSTargetSize) && + nsTemplateSizeEquals(sizes, resizeNSExtraPVCName, resizeNSTargetSize) { out.passed = true return out } @@ -288,37 +338,67 @@ func triggerNSResizeAndWait(ctx context.Context, t *testing.T, client klient.Cli grp.Status.VolumeResizeStatus.Reason, grp.Status.VolumeResizeStatus.Message) return out } - t.Logf("[%s] resize in progress: phase=%s observed=%s target=%s", ns, out.phase, obs, resizeNSTargetSize) + t.Logf("[%s] resize in progress: phase=%s observed=%s target=%s", ns, out.phase, out.observedSize, resizeNSTargetSize) time.Sleep(15 * time.Second) } out.failReason = fmt.Sprintf("timeout after %s (last phase=%s observed=%s)", resizeNSWaitTimeout, out.phase, out.observedSize) return out } -func minNSPVCCapacity(ctx context.Context, client klient.Client, ns string) (string, error) { +func minNSPVCCapacityByTemplate(ctx context.Context, client klient.Client, ns string, templatePrefixes map[string]string) (map[string]string, error) { pvcs := &corev1.PersistentVolumeClaimList{} if err := client.Resources(ns).List(ctx, pvcs); err != nil { - return "", err + return nil, err } - var min *resource.Quantity + minByTemplate := make(map[string]*resource.Quantity, len(templatePrefixes)) for i := range pvcs.Items { pvc := &pvcs.Items[i] - if !strings.HasPrefix(pvc.Name, "datadir-"+resizeNSGroupName+"-") { + matchedTemplate := "" + for templateName, prefix := range templatePrefixes { + if strings.HasPrefix(pvc.Name, prefix) { + matchedTemplate = templateName + break + } + } + if matchedTemplate == "" { continue } q, ok := pvc.Status.Capacity[corev1.ResourceStorage] if !ok { continue } + min := minByTemplate[matchedTemplate] if min == nil || q.Cmp(*min) < 0 { qq := q.DeepCopy() - min = &qq + minByTemplate[matchedTemplate] = &qq } } - if min == nil { - return "", fmt.Errorf("no PVC capacity reported yet") + out := make(map[string]string, len(templatePrefixes)) + for templateName := range templatePrefixes { + if minByTemplate[templateName] == nil { + return nil, fmt.Errorf("no PVC capacity reported yet for template %s", templateName) + } + out[templateName] = minByTemplate[templateName].String() + } + return out, nil +} + +func nsTemplateSizeEquals(sizes map[string]string, templateName, target string) bool { + if sizes == nil { + return false + } + current, ok := sizes[templateName] + if !ok { + return false + } + return nsSizesEqual(current, target) +} + +func formatNSTemplateSizes(sizes map[string]string) string { + if sizes == nil { + return "" } - return min.String(), nil + return fmt.Sprintf("datadir=%s %s=%s", sizes["datadir"], resizeNSExtraPVCName, sizes[resizeNSExtraPVCName]) } func nsSizesEqual(a, b string) bool { diff --git a/test/e2e/9_volume_resize_test.go b/test/e2e/9_volume_resize_test.go index e1866ad8..d73e0f0c 100644 --- a/test/e2e/9_volume_resize_test.go +++ b/test/e2e/9_volume_resize_test.go @@ -45,11 +45,12 @@ import ( var resizeNamespaces = []string{"ml-resize-a", "ml-resize-b"} const ( - resizeClusterName = "ml-resize-cluster" - resizeGroupName = "node" - resizeInitialSize = "2Gi" - resizeTargetSize = "3Gi" - resizeWaitTimeout = 15 * time.Minute + resizeClusterName = "ml-resize-cluster" + resizeGroupName = "node" + resizeExtraPVCName = "logs" + resizeInitialSize = "2Gi" + resizeTargetSize = "3Gi" + resizeWaitTimeout = 15 * time.Minute ) // resizeOutcome captures the per-namespace result for the final summary banner. @@ -247,6 +248,19 @@ func createResizeNamespaceAndCluster(ctx context.Context, t *testing.T, client k Enabled: true, Size: resizeInitialSize, }, + AdditionalVolumeClaimTemplates: &[]corev1.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{Name: resizeExtraPVCName}, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(resizeInitialSize), + }, + }, + }, + }, + }, MarkLogicGroups: []*marklogicv1.MarklogicGroups{ { Name: resizeGroupName, @@ -268,13 +282,19 @@ func createResizeNamespaceAndCluster(ctx context.Context, t *testing.T, client k func triggerAndWaitForResize(ctx context.Context, t *testing.T, client klient.Client, ns string) resizeOutcome { out := resizeOutcome{namespace: ns, initialSize: resizeInitialSize, requestSize: resizeTargetSize} - // Capture the current PVC capacity for the per-namespace summary. - if size, err := minPVCCapacity(ctx, client, ns); err == nil { - out.initialSize = size + templatePrefixes := map[string]string{ + "datadir": "datadir-" + resizeGroupName + "-", + resizeExtraPVCName: resizeExtraPVCName + "-" + resizeGroupName + "-", + } + + // Capture current capacities for the per-namespace summary. + if sizes, err := minPVCCapacityByTemplate(ctx, client, ns, templatePrefixes); err == nil { + out.initialSize = formatTemplateSizes(sizes) } - // Patch spec.persistence.size on the MarklogicCluster (triggers reconcile). - patch := []byte(fmt.Sprintf(`{"spec":{"persistence":{"size":"%s"}}}`, resizeTargetSize)) + // Patch both datadir and additional volume template target sizes. + patch := []byte(fmt.Sprintf(`{"spec":{"persistence":{"size":"%s"},"additionalVolumeClaimTemplates":[{"metadata":{"name":"%s"},"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"%s"}}}}]}}`, + resizeTargetSize, resizeExtraPVCName, resizeTargetSize)) cluster := &marklogicv1.MarklogicCluster{ ObjectMeta: metav1.ObjectMeta{Name: resizeClusterName, Namespace: ns}, } @@ -282,7 +302,7 @@ func triggerAndWaitForResize(ctx context.Context, t *testing.T, client klient.Cl out.failReason = fmt.Sprintf("patch cluster: %v", err) return out } - t.Logf("[%s] patched MarklogicCluster persistence.size → %s", ns, resizeTargetSize) + t.Logf("[%s] patched MarklogicCluster persistence.size + additionalVolumeClaimTemplates[%s].storage → %s", ns, resizeExtraPVCName, resizeTargetSize) // Poll until Completed or timeout. deadline := time.Now().Add(resizeWaitTimeout) @@ -295,11 +315,13 @@ func triggerAndWaitForResize(ctx context.Context, t *testing.T, client klient.Cl if grp.Status.VolumeResizeStatus != nil { out.phase = string(grp.Status.VolumeResizeStatus.Phase) } - obs, _ := minPVCCapacity(ctx, client, ns) - out.observedSize = obs + sizes, _ := minPVCCapacityByTemplate(ctx, client, ns, templatePrefixes) + out.observedSize = formatTemplateSizes(sizes) - // Success criteria: phase=Completed AND every PVC reaches the target. - if out.phase == string(marklogicv1.VolumeResizePhaseCompleted) && sizesEqual(obs, resizeTargetSize) { + // Success criteria: phase=Completed AND every target PVC family reaches the target. + if out.phase == string(marklogicv1.VolumeResizePhaseCompleted) && + templateSizeEquals(sizes, "datadir", resizeTargetSize) && + templateSizeEquals(sizes, resizeExtraPVCName, resizeTargetSize) { out.passed = true return out } @@ -309,40 +331,69 @@ func triggerAndWaitForResize(ctx context.Context, t *testing.T, client klient.Cl grp.Status.VolumeResizeStatus.Reason, grp.Status.VolumeResizeStatus.Message) return out } - t.Logf("[%s] resize in progress: phase=%s observed=%s target=%s", ns, out.phase, obs, resizeTargetSize) + t.Logf("[%s] resize in progress: phase=%s observed=%s target=%s", ns, out.phase, out.observedSize, resizeTargetSize) time.Sleep(15 * time.Second) } out.failReason = fmt.Sprintf("timeout after %s (last phase=%s observed=%s)", resizeWaitTimeout, out.phase, out.observedSize) return out } -// minPVCCapacity returns the smallest .status.capacity.storage across the PVCs -// owned by the resize StatefulSet in ns, formatted as a human-readable string. -func minPVCCapacity(ctx context.Context, client klient.Client, ns string) (string, error) { +// minPVCCapacityByTemplate returns the smallest .status.capacity.storage for +// each tracked PVC template prefix in the namespace. +func minPVCCapacityByTemplate(ctx context.Context, client klient.Client, ns string, templatePrefixes map[string]string) (map[string]string, error) { pvcs := &corev1.PersistentVolumeClaimList{} if err := client.Resources(ns).List(ctx, pvcs); err != nil { - return "", err + return nil, err } - var min *resource.Quantity + minByTemplate := make(map[string]*resource.Quantity, len(templatePrefixes)) for i := range pvcs.Items { pvc := &pvcs.Items[i] - // Restrict to PVCs whose name belongs to the resize StatefulSet. - if !strings.HasPrefix(pvc.Name, "datadir-"+resizeGroupName+"-") { + matchedTemplate := "" + for templateName, prefix := range templatePrefixes { + if strings.HasPrefix(pvc.Name, prefix) { + matchedTemplate = templateName + break + } + } + if matchedTemplate == "" { continue } q, ok := pvc.Status.Capacity[corev1.ResourceStorage] if !ok { continue } + min := minByTemplate[matchedTemplate] if min == nil || q.Cmp(*min) < 0 { qq := q.DeepCopy() - min = &qq + minByTemplate[matchedTemplate] = &qq + } + } + out := make(map[string]string, len(templatePrefixes)) + for templateName := range templatePrefixes { + if minByTemplate[templateName] == nil { + return nil, fmt.Errorf("no PVC capacity reported yet for template %s", templateName) } + out[templateName] = minByTemplate[templateName].String() } - if min == nil { - return "", fmt.Errorf("no PVC capacity reported yet") + return out, nil +} + +func templateSizeEquals(sizes map[string]string, templateName, target string) bool { + if sizes == nil { + return false + } + current, ok := sizes[templateName] + if !ok { + return false + } + return sizesEqual(current, target) +} + +func formatTemplateSizes(sizes map[string]string) string { + if sizes == nil { + return "" } - return min.String(), nil + return fmt.Sprintf("datadir=%s %s=%s", sizes["datadir"], resizeExtraPVCName, sizes[resizeExtraPVCName]) } // sizesEqual compares two storage size strings (e.g. "3Gi" == "3072Mi"). From 62782f31f59020f5c3fc84e1ed248ad1c062248c Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Fri, 19 Jun 2026 10:54:18 +0200 Subject: [PATCH 02/17] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/k8sutil/volume_resize_validation.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index c6007367..8f31fa87 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -319,11 +319,11 @@ func (oc *OperatorContext) processResizeSubmission(status *marklogicv1.VolumeRes if stsErr != nil { return result.Error(stsErr) } - targets, targetErr := resolveResizeTargetsFromSpec(oc.MarklogicGroup) + templateTargets, targetErr := desiredTemplateTargetsFromStatus(status, currentSts.Name) if targetErr != nil { return result.Error(targetErr) } - pvcState, discoverErr := oc.discoverPrimaryPVCs(currentSts, targets) + pvcState, discoverErr := oc.discoverPrimaryPVCs(currentSts, templateTargets) if discoverErr != nil { return result.Error(discoverErr) } From 98c451fa6b2f4fb2f7a22525aee3e0cd75c47afa Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Mon, 22 Jun 2026 16:06:22 +0200 Subject: [PATCH 03/17] fix checkpoint for additional volume --- pkg/k8sutil/volume_resize_validation.go | 15 +++++++++++---- pkg/k8sutil/volume_resize_validation_test.go | 1 + 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index 8f31fa87..b6023d89 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -438,21 +438,23 @@ func (oc *OperatorContext) processResizeWaiting(status *marklogicv1.VolumeResize entry.RequestedSize = entryTarget.String() entry.ObservedCapacity = observed.String() - if requested.Cmp(entryTarget) >= 0 && observed.Cmp(entryTarget) >= 0 { + if requested.Cmp(entryTarget) >= 0 { if hasFileSystemResizePending(pvc) { entry.State = marklogicv1.PVCResizeStateCheckpointed entry.CheckpointType = marklogicv1.PVCResizeCheckpointTypeOfflinePending entry.RestartRequired = true entry.LastReason = "" entry.LastMessage = "Offline checkpoint reached" - } else { + continue + } + if observed.Cmp(entryTarget) >= 0 { entry.State = marklogicv1.PVCResizeStateCheckpointed entry.CheckpointType = marklogicv1.PVCResizeCheckpointTypeOnlineComplete entry.RestartRequired = false entry.LastReason = "" entry.LastMessage = "Online checkpoint reached" + continue } - continue } entry.State = marklogicv1.PVCResizeStateWaitingForCheckpoint @@ -747,7 +749,7 @@ func (oc *OperatorContext) processResizeVerification(status *marklogicv1.VolumeR entry.RequestedSize = entryTarget.String() entry.ObservedCapacity = observed.String() - if requested.Cmp(entryTarget) < 0 || observed.Cmp(entryTarget) < 0 { + if requested.Cmp(entryTarget) < 0 { notFinalPVCs = append(notFinalPVCs, entry.Name) continue } @@ -761,6 +763,11 @@ func (oc *OperatorContext) processResizeVerification(status *marklogicv1.VolumeR continue } + if observed.Cmp(entryTarget) < 0 { + notFinalPVCs = append(notFinalPVCs, entry.Name) + continue + } + if entry.State == marklogicv1.PVCResizeStateRestartPending || entry.RestartRequired { notFinalPVCs = append(notFinalPVCs, entry.Name) entry.LastReason = "" diff --git a/pkg/k8sutil/volume_resize_validation_test.go b/pkg/k8sutil/volume_resize_validation_test.go index a282b519..6725854f 100644 --- a/pkg/k8sutil/volume_resize_validation_test.go +++ b/pkg/k8sutil/volume_resize_validation_test.go @@ -707,6 +707,7 @@ func TestResizeCheckpointClassificationOnlineAndOffline(t *testing.T) { replacePVC(t, oc, newBoundPVC("datadir-dnode-0", "50Gi")) offlinePVC := newBoundPVC("datadir-dnode-1", "50Gi") + offlinePVC.Status.Capacity[corev1.ResourceStorage] = resourceMustParse("20Gi") offlinePVC.Status.Conditions = []corev1.PersistentVolumeClaimCondition{{ Type: corev1.PersistentVolumeClaimFileSystemResizePending, Status: corev1.ConditionTrue, From 6cb21042c2ddde1e70e7ce706e8aa423b6cad960 Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Mon, 22 Jun 2026 17:27:11 +0200 Subject: [PATCH 04/17] try fix volume resize test #1 --- Makefile | 4 +++ pkg/k8sutil/volume_resize_validation.go | 43 +++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/Makefile b/Makefile index 487ed25f..1682dc06 100755 --- a/Makefile +++ b/Makefile @@ -302,6 +302,10 @@ e2e-test-helm-volume-resize: fi E2E_DOCKER_IMAGE=$(IMG) go test -v -count=1 -timeout 30m ./test/e2e-helm -run TestVolumeResizeNamespaceScoped +.PHONY: e2e-test-jenkins-volume-resize ## Run ONLY volume resize tests on Jenkins (cluster-scoped + namespace-scoped via Helm). Optimized for CI/CD pipeline. +e2e-test-jenkins-volume-resize: e2e-test-volume-resize e2e-test-helm-volume-resize + @echo "=====Jenkins volume resize tests complete (cluster-scoped + namespace-scoped)=====" + .PHONY: e2e-setup-minikube e2e-setup-minikube: kustomize controller-gen build docker-build minikube version diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index b6023d89..beddebea 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -729,12 +729,14 @@ func (oc *OperatorContext) processResizeVerification(status *marklogicv1.VolumeR } notFinalPVCs := make([]string, 0) + needsRestartAgain := make([]string, 0) for i := range status.PVCStatuses { entry := &status.PVCStatuses[i] pvc := &corev1.PersistentVolumeClaim{} if getErr := oc.Client.Get(oc.Ctx, client.ObjectKey{Namespace: oc.MarklogicGroup.Namespace, Name: entry.Name}, pvc); getErr != nil { return oc.scheduleSyncRetryOrFail(status, marklogicv1.VolumeResizeReasonMarkLogicHealthCheckFailed, "Failed to fetch PVC during verification", getErr) } + oc.ReqLogger.Info("DEBUG: processResizeVerification - PVC state", "name", entry.Name, "state", entry.State, "checkpointType", entry.CheckpointType, "restartRequired", entry.RestartRequired, "fileSystemResizePending", hasFileSystemResizePending(pvc)) requested := pvc.Spec.Resources.Requests[corev1.ResourceStorage] observed := pvc.Status.Capacity[corev1.ResourceStorage] @@ -760,6 +762,11 @@ func (oc *OperatorContext) processResizeVerification(status *marklogicv1.VolumeR entry.RestartRequired = true entry.LastReason = "" entry.LastMessage = "Filesystem resize still pending" + if entry.State == marklogicv1.PVCResizeStateRestarted { + needsRestartAgain = append(needsRestartAgain, entry.Name) + entry.State = marklogicv1.PVCResizeStateCheckpointed + entry.LastMessage = "Filesystem resize still pending after restart; scheduling another restart" + } continue } @@ -788,6 +795,16 @@ func (oc *OperatorContext) processResizeVerification(status *marklogicv1.VolumeR } oc.recalculatePVCProgress(status) + if len(needsRestartAgain) > 0 { + sort.Strings(needsRestartAgain) + oc.ReqLogger.Info("DEBUG: processResizeVerification - PVCs still pending after restart, triggering another restart", "pvcs", needsRestartAgain) + oc.transitionResizePhase(status, marklogicv1.VolumeResizePhaseRestartingPods, "", fmt.Sprintf("Filesystem resize still pending after restart for PVCs: %s; scheduling another restart", strings.Join(needsRestartAgain, ","))) + oc.emitResizeEvent(corev1.EventTypeNormal, "VolumeResizeProgressing", status.Message) + if patchErr := oc.patchResizeStatus(status); patchErr != nil { + return result.Error(patchErr) + } + return result.RequeueSoon(5) + } if len(notFinalPVCs) > 0 { return oc.scheduleSyncRetryOrFail(status, marklogicv1.VolumeResizeReasonMarkLogicHealthCheckFailed, fmt.Sprintf("Verification pending for PVCs: %s", strings.Join(notFinalPVCs, ",")), fmt.Errorf("final pvc state not satisfied")) } @@ -1223,6 +1240,7 @@ func (oc *OperatorContext) syncStatefulSetPVCTemplates(status *marklogicv1.Volum if len(templatesMissingFromStatefulSet) > 0 { sort.Strings(templatesMissingFromStatefulSet) + oc.ReqLogger.Info("DEBUG: syncStatefulSetPVCTemplates - templates missing from StatefulSet", "statefulSet", currentSts.Name, "missing", templatesMissingFromStatefulSet, "available", getTemplateNamesFromSTS(currentSts)) return false, fmt.Errorf("statefulset %s is missing volumeClaimTemplates: %s", currentSts.Name, strings.Join(templatesMissingFromStatefulSet, ",")) } @@ -1233,6 +1251,7 @@ func (oc *OperatorContext) syncStatefulSetPVCTemplates(status *marklogicv1.Volum } sort.Strings(templatesBelowTarget) + oc.ReqLogger.Info("DEBUG: syncStatefulSetPVCTemplates - templates below target, recreating StatefulSet", "statefulSet", currentSts.Name, "belowTarget", templatesBelowTarget, "allTemplates", getTemplateNamesFromSTS(currentSts)) _ = templatesBelowTarget addResizeMarker(status, resizeMarkerTemplateRecreateStarted) @@ -1274,6 +1293,17 @@ func getStatefulSetTemplateRequest(currentSts *appsv1.StatefulSet, templateName return resource.Quantity{}, false } +func getTemplateNamesFromSTS(sts *appsv1.StatefulSet) []string { + if sts == nil { + return nil + } + names := make([]string, 0, len(sts.Spec.VolumeClaimTemplates)) + for _, t := range sts.Spec.VolumeClaimTemplates { + names = append(names, t.Name) + } + return names +} + func getTemplateNameFromPVCName(statefulSetName, pvcName string) (string, bool) { ordinal := parseOrdinalFromName(pvcName) if ordinal < 0 { @@ -1505,6 +1535,7 @@ func (oc *OperatorContext) discoverPrimaryPVCs(sts *appsv1.StatefulSet, targets templateNames = getResizableTemplateNames(sts) } sort.Strings(templateNames) + oc.ReqLogger.Info("DEBUG: discoverPrimaryPVCs - discovering PVCs", "templateNames", templateNames, "replicas", replicas, "stsName", sts.Name) for _, templateName := range templateNames { templateTarget, hasTarget := targets[templateName] if !hasTarget { @@ -1545,6 +1576,13 @@ func (oc *OperatorContext) discoverPrimaryPVCs(sts *appsv1.StatefulSet, targets } } + templateStateMap := make(map[string]string) + for k, v := range state.minByTemplate { + if v != nil { + templateStateMap[k] = v.String() + } + } + oc.ReqLogger.Info("DEBUG: discoverPrimaryPVCs - discovery complete", "found", len(state.foundPVCs), "expected", len(state.expectedNames), "missing", state.missingPVCs, "minByTemplate", templateStateMap) return state, nil } @@ -1596,6 +1634,11 @@ func resolveResizeTargetsFromSpec(group *marklogicv1.MarklogicGroup) (templateRe targets[tmpl.Name] = size } } + targetMap := make(map[string]string) + for k, v := range targets { + targetMap[k] = v.String() + } + _ = targetMap return targets, nil } From 41e268e6a35b227a36a0d1c8da712fdf9356c985 Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Tue, 23 Jun 2026 17:28:39 +0200 Subject: [PATCH 05/17] try fix #2 --- pkg/k8sutil/volume_resize_validation.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index beddebea..ab94032e 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -334,11 +334,13 @@ func (oc *OperatorContext) processResizeSubmission(status *marklogicv1.VolumeRes for _, idx := range indices { entry := &status.PVCStatuses[idx] if isPVCCheckpointed(entry) { + oc.ReqLogger.Info("DEBUG: processResizeSubmission - PVC already checkpointed, skipping", "name", entry.Name) continue } pvc := &corev1.PersistentVolumeClaim{} if getErr := oc.Client.Get(oc.Ctx, client.ObjectKey{Namespace: oc.MarklogicGroup.Namespace, Name: entry.Name}, pvc); getErr != nil { + oc.ReqLogger.Info("DEBUG: processResizeSubmission - Failed to fetch PVC", "name", entry.Name, "error", getErr.Error()) oc.markPVCFailed(status, entry.Name, marklogicv1.VolumeResizeReasonResizeFailed, getErr.Error()) continue } @@ -350,25 +352,31 @@ func (oc *OperatorContext) processResizeSubmission(status *marklogicv1.VolumeRes } entryTarget, targetErr := resizeTargetForPVCEntry(status, entry) if targetErr != nil { + oc.ReqLogger.Info("DEBUG: processResizeSubmission - Failed to resolve target", "name", entry.Name, "error", targetErr.Error()) oc.markPVCFailed(status, entry.Name, marklogicv1.VolumeResizeReasonInvalidResizeRequest, targetErr.Error()) continue } entry.RequestedSize = entryTarget.String() entry.ObservedCapacity = observed.String() + oc.ReqLogger.Info("DEBUG: processResizeSubmission - PVC size check", "name", entry.Name, "requested", requested.String(), "target", entryTarget.String(), "observed", observed.String()) + if requested.Cmp(entryTarget) >= 0 { entry.State = marklogicv1.PVCResizeStateWaitingForCheckpoint entry.LastReason = "" entry.LastMessage = "Waiting for resize checkpoint" + oc.ReqLogger.Info("DEBUG: processResizeSubmission - Request already at target, waiting for checkpoint", "name", entry.Name) continue } + oc.ReqLogger.Info("DEBUG: processResizeSubmission - Submitting PVC resize patch", "name", entry.Name, "newSize", entryTarget.String()) patch := client.MergeFrom(pvc.DeepCopy()) if pvc.Spec.Resources.Requests == nil { pvc.Spec.Resources.Requests = corev1.ResourceList{} } pvc.Spec.Resources.Requests[corev1.ResourceStorage] = entryTarget if patchErr := oc.Client.Patch(oc.Ctx, pvc, patch); patchErr != nil { + oc.ReqLogger.Info("DEBUG: processResizeSubmission - Patch failed", "name", entry.Name, "error", patchErr.Error()) oc.markPVCFailed(status, entry.Name, marklogicv1.VolumeResizeReasonResizeFailed, patchErr.Error()) continue } @@ -377,6 +385,7 @@ func (oc *OperatorContext) processResizeSubmission(status *marklogicv1.VolumeRes entry.State = marklogicv1.PVCResizeStateResizeSubmitted entry.LastReason = "" entry.LastMessage = "Resize request submitted" + oc.ReqLogger.Info("DEBUG: processResizeSubmission - Resize patch submitted successfully", "name", entry.Name) } oc.updateSequentialActivePVC(status) @@ -762,9 +771,10 @@ func (oc *OperatorContext) processResizeVerification(status *marklogicv1.VolumeR entry.RestartRequired = true entry.LastReason = "" entry.LastMessage = "Filesystem resize still pending" - if entry.State == marklogicv1.PVCResizeStateRestarted { + // If this PVC already has a checkpoint (offline or online), it means pod was restarted + // but filesystem resize is still pending - needs another restart + if isPVCCheckpointed(entry) { needsRestartAgain = append(needsRestartAgain, entry.Name) - entry.State = marklogicv1.PVCResizeStateCheckpointed entry.LastMessage = "Filesystem resize still pending after restart; scheduling another restart" } continue @@ -846,11 +856,15 @@ func (oc *OperatorContext) newResizeStatus(pvcState *resizePVCDiscovery, targetS func (oc *OperatorContext) initializePVCStatuses(status *marklogicv1.VolumeResizeStatus, pvcState *resizePVCDiscovery) { if len(status.PVCStatuses) == 0 { + oc.ReqLogger.Info("DEBUG: initializePVCStatuses - initializing from discovery", "expectedNames", pvcState.expectedNames, "count", len(pvcState.expectedNames)) status.PVCStatuses = make([]marklogicv1.PVCResizeStatus, 0, len(pvcState.expectedNames)) for _, name := range pvcState.expectedNames { entry := marklogicv1.PVCResizeStatus{Name: name, State: marklogicv1.PVCResizeStatePending} if target, ok := pvcState.targetByPVC[name]; ok { entry.RequestedSize = target.String() + oc.ReqLogger.Info("DEBUG: initializePVCStatuses - initialized PVC with target", "name", name, "target", target.String()) + } else { + oc.ReqLogger.Info("DEBUG: initializePVCStatuses - initialized PVC without target", "name", name) } status.PVCStatuses = append(status.PVCStatuses, entry) } From 254ee2ec218253ff4cb1e71f99e9313a561e1fc1 Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Tue, 23 Jun 2026 18:43:30 +0200 Subject: [PATCH 06/17] Fix #3 --- pkg/k8sutil/volume_resize_validation.go | 4 +++ pkg/k8sutil/volume_resize_validation_test.go | 36 ++++++++++++++++++++ test/e2e-helm/9_volume_resize_test.go | 6 ++++ test/e2e/9_volume_resize_test.go | 6 ++++ 4 files changed, 52 insertions(+) diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index ab94032e..45a1d802 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -775,6 +775,10 @@ func (oc *OperatorContext) processResizeVerification(status *marklogicv1.VolumeR // but filesystem resize is still pending - needs another restart if isPVCCheckpointed(entry) { needsRestartAgain = append(needsRestartAgain, entry.Name) + entry.State = marklogicv1.PVCResizeStateRestartPending + if entry.PodName == "" { + entry.PodName = derivePodNameFromPVC(currentSts.Name, entry.Name) + } entry.LastMessage = "Filesystem resize still pending after restart; scheduling another restart" } continue diff --git a/pkg/k8sutil/volume_resize_validation_test.go b/pkg/k8sutil/volume_resize_validation_test.go index 6725854f..4ff1ff21 100644 --- a/pkg/k8sutil/volume_resize_validation_test.go +++ b/pkg/k8sutil/volume_resize_validation_test.go @@ -1117,6 +1117,42 @@ func TestResizeVerificationRetryPathStallsThenResumes(t *testing.T) { } } +func TestResizeVerificationMarksRestartPendingForAdditionalRestart(t *testing.T) { + oc := newResizeTestContext(t, resizeTestInput{desiredSize: "50Gi", currentSize: "20Gi", updateStrategy: appsv1.OnDeleteStatefulSetStrategyType}) + seedVerificationStatus(t, oc, "verify-restart-again", "50Gi", "") + + status := getUpdatedGroup(t, oc).Status.VolumeResizeStatus + status.PVCStatuses[1].State = marklogicv1.PVCResizeStateRestarted + status.PVCStatuses[1].CheckpointType = marklogicv1.PVCResizeCheckpointTypeOfflinePending + status.PVCStatuses[1].RestartRequired = true + if err := oc.patchResizeStatus(status); err != nil { + t.Fatalf("failed to seed restarted pvc state: %v", err) + } + + pvc := newBoundPVC("datadir-dnode-1", "50Gi") + pvc.Status.Capacity[corev1.ResourceStorage] = resourceMustParse("20Gi") + pvc.Status.Conditions = []corev1.PersistentVolumeClaimCondition{{ + Type: corev1.PersistentVolumeClaimFileSystemResizePending, + Status: corev1.ConditionTrue, + }} + replacePVC(t, oc, pvc) + + if _, err := oc.ReconcileVolumeResizeValidation().Output(); err != nil { + t.Fatalf("unexpected error during verification restart-again pass: %v", err) + } + + updated := getUpdatedGroup(t, oc).Status.VolumeResizeStatus + if updated.Phase != marklogicv1.VolumeResizePhaseRestartingPods { + t.Fatalf("expected RestartingPods phase, got %s", updated.Phase) + } + if updated.PVCStatuses[1].State != marklogicv1.PVCResizeStateRestartPending { + t.Fatalf("expected pvc state RestartPending, got %s", updated.PVCStatuses[1].State) + } + if updated.PVCStatuses[1].PodName == "" { + t.Fatalf("expected pod name to be set for restart") + } +} + func TestResizeVerificationTemplateLagRoutesBackToStatefulSetSync(t *testing.T) { oc := newResizeTestContext(t, resizeTestInput{desiredSize: "50Gi", currentSize: "20Gi", updateStrategy: appsv1.OnDeleteStatefulSetStrategyType}) seedVerificationStatus(t, oc, "verify-template-lag", "50Gi", "") diff --git a/test/e2e-helm/9_volume_resize_test.go b/test/e2e-helm/9_volume_resize_test.go index 2e3c33eb..9f96165c 100644 --- a/test/e2e-helm/9_volume_resize_test.go +++ b/test/e2e-helm/9_volume_resize_test.go @@ -263,6 +263,12 @@ func createNSResizeCluster(ctx context.Context, client klient.Client, ns string) Enabled: true, Size: resizeNSInitialSize, }, + AdditionalVolumeMounts: &[]corev1.VolumeMount{ + { + Name: resizeNSExtraPVCName, + MountPath: "/var/opt/MarkLogic/resize-logs", + }, + }, AdditionalVolumeClaimTemplates: &[]corev1.PersistentVolumeClaim{ { ObjectMeta: metav1.ObjectMeta{Name: resizeNSExtraPVCName}, diff --git a/test/e2e/9_volume_resize_test.go b/test/e2e/9_volume_resize_test.go index d73e0f0c..d598b3c0 100644 --- a/test/e2e/9_volume_resize_test.go +++ b/test/e2e/9_volume_resize_test.go @@ -248,6 +248,12 @@ func createResizeNamespaceAndCluster(ctx context.Context, t *testing.T, client k Enabled: true, Size: resizeInitialSize, }, + AdditionalVolumeMounts: &[]corev1.VolumeMount{ + { + Name: resizeExtraPVCName, + MountPath: "/var/opt/MarkLogic/resize-logs", + }, + }, AdditionalVolumeClaimTemplates: &[]corev1.PersistentVolumeClaim{ { ObjectMeta: metav1.ObjectMeta{Name: resizeExtraPVCName}, From e522d13574da9702cd3a3e39c1767334cdef99ce Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Wed, 24 Jun 2026 10:37:52 +0200 Subject: [PATCH 07/17] running all tests --- Jenkinsfile | 64 ++++++++++++++++++----------------------------------- Makefile | 10 +++++++++ 2 files changed, 31 insertions(+), 43 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index e4e6d341..7780d425 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -233,6 +233,14 @@ void runHelmNamespaceScopedE2eTests() { """ } +void runAllE2eTests() { + withEksCredentials { + sh """ + make e2e-test-all IMG=${operatorRepo}:${VERSION} VERSION=${VERSION} + """ + } +} + // Dynamically extracts dependent container image references from their canonical // sources and triggers the BlackDuck scan job with the full CONTAINER_IMAGES list. // PUBLISH_IMAGE=true also prepends the published operator registry image. @@ -344,28 +352,26 @@ pipeline { } // ----------------------------------------------------------------------- - // E2E Tests — runs on Minikube (default) or the shared EKS cluster. - // Minikube and EKS paths are unified into the same named stages. - // The EKS cluster lock is acquired only for EKS builds, so unrelated - // Minikube builds are never blocked. Cleanup is guaranteed via - // try/finally even when earlier stages throw. + // E2E Tests — branch builds run the full suite automatically: + // cluster-scoped E2E, Helm namespace-scoped E2E, and EKS E2E. + // The EKS lock is held only while the full suite executes. // ----------------------------------------------------------------------- stage('E2E Tests') { steps { script { - if (params.TEST_ON_EKS && params.E2E_SCOPE != 'cluster') { - error "E2E_SCOPE='${params.E2E_SCOPE}' is not supported when TEST_ON_EKS=true. Use E2E_SCOPE='cluster'." + def doSetup = { runMinikubeSetup() } + def doTests = { + lock(resource: 'jenkinsKubeNinjasEksCluster', inversePrecedence: true) { + timeout(time: 3, unit: 'HOURS') { + runAllE2eTests() + } + } } - - def doSetup = { params.TEST_ON_EKS ? runEKSSetup() : runMinikubeSetup() } - def doTests = { params.TEST_ON_EKS ? runEKSE2eTests() : runE2eTests(params.E2E_SCOPE) } - def doCleanup = { + def doCleanup = { catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { - if (params.TEST_ON_EKS) { runEKSCleanup() } else { runMinikubeCleanup() } + runMinikubeCleanup() } } - def doIstioSetup = { params.TEST_ON_EKS ? runEKSIstioSetup() : runIstioMinikubeSetup() } - def doIstioTests = { params.TEST_ON_EKS ? runEKSIstioE2eTests() : runIstioE2eTests() } def testBody = { try { @@ -374,36 +380,8 @@ pipeline { } finally { stage('Cleanup') { doCleanup() } } - // Istio stages are always declared so that Jenkins Stage View - // shows a consistent set of columns across all run types. - // When VERIFY_ISTIO_AMBIENT is false the stages are entered but - // immediately skipped, preserving their position in the view. - try { - stage('Istio Setup') { - if (params.E2E_SCOPE == 'cluster' && params.VERIFY_ISTIO_AMBIENT) { doIstioSetup() } - else { echo "Istio tests skipped (E2E_SCOPE=${params.E2E_SCOPE}, VERIFY_ISTIO_AMBIENT=${params.VERIFY_ISTIO_AMBIENT})" } - } - stage('Run Istio e2e Tests') { - if (params.E2E_SCOPE == 'cluster' && params.VERIFY_ISTIO_AMBIENT) { doIstioTests() } - else { echo "Istio tests skipped (E2E_SCOPE=${params.E2E_SCOPE}, VERIFY_ISTIO_AMBIENT=${params.VERIFY_ISTIO_AMBIENT})" } - } - } finally { - stage('Istio Cleanup') { - if (params.E2E_SCOPE == 'cluster' && params.VERIFY_ISTIO_AMBIENT) { doCleanup() } - else { echo "Istio tests skipped (E2E_SCOPE=${params.E2E_SCOPE}, VERIFY_ISTIO_AMBIENT=${params.VERIFY_ISTIO_AMBIENT})" } - } - } - } - - if (params.TEST_ON_EKS) { - lock(resource: 'jenkinsKubeNinjasEksCluster', inversePrecedence: true) { - timeout(time: 3, unit: 'HOURS') { - testBody() - } - } - } else { - testBody() } + testBody() } } } diff --git a/Makefile b/Makefile index 1682dc06..21d80172 100755 --- a/Makefile +++ b/Makefile @@ -233,6 +233,16 @@ e2e-test-helm-namespace: fi E2E_DOCKER_IMAGE=$(IMG) go test -v -count=1 -timeout 45m ./test/e2e-helm +.PHONY: e2e-test-all ## Run cluster-scoped E2E, Helm namespace-scoped E2E, and EKS E2E tests in sequence. +e2e-test-all: + @echo "=====Running cluster-scoped E2E, Helm E2E, and EKS E2E tests=====" + @set -e; \ + trap '$(MAKE) e2e-cleanup-eks' EXIT; \ + $(MAKE) e2e-test-cluster; \ + $(MAKE) e2e-test-helm-namespace; \ + $(MAKE) e2e-setup-eks; \ + $(MAKE) e2e-test-eks + .PHONY: e2e-test-volume-resize ## Run ONLY the cluster-scoped volume resize test (two namespaces in parallel) e2e-test-volume-resize: @echo "=====Running cluster-scoped volume-resize e2e test (parallel, 2 namespaces)=====" From 247e0f717876c3795a446a6291faddce360ae642 Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Wed, 24 Jun 2026 14:49:15 +0200 Subject: [PATCH 08/17] Revert "running all tests"" --- Jenkinsfile | 64 +++++++++++++++++++++++++++++++++++------------------ Makefile | 10 --------- 2 files changed, 43 insertions(+), 31 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 7780d425..e4e6d341 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -233,14 +233,6 @@ void runHelmNamespaceScopedE2eTests() { """ } -void runAllE2eTests() { - withEksCredentials { - sh """ - make e2e-test-all IMG=${operatorRepo}:${VERSION} VERSION=${VERSION} - """ - } -} - // Dynamically extracts dependent container image references from their canonical // sources and triggers the BlackDuck scan job with the full CONTAINER_IMAGES list. // PUBLISH_IMAGE=true also prepends the published operator registry image. @@ -352,26 +344,28 @@ pipeline { } // ----------------------------------------------------------------------- - // E2E Tests — branch builds run the full suite automatically: - // cluster-scoped E2E, Helm namespace-scoped E2E, and EKS E2E. - // The EKS lock is held only while the full suite executes. + // E2E Tests — runs on Minikube (default) or the shared EKS cluster. + // Minikube and EKS paths are unified into the same named stages. + // The EKS cluster lock is acquired only for EKS builds, so unrelated + // Minikube builds are never blocked. Cleanup is guaranteed via + // try/finally even when earlier stages throw. // ----------------------------------------------------------------------- stage('E2E Tests') { steps { script { - def doSetup = { runMinikubeSetup() } - def doTests = { - lock(resource: 'jenkinsKubeNinjasEksCluster', inversePrecedence: true) { - timeout(time: 3, unit: 'HOURS') { - runAllE2eTests() - } - } + if (params.TEST_ON_EKS && params.E2E_SCOPE != 'cluster') { + error "E2E_SCOPE='${params.E2E_SCOPE}' is not supported when TEST_ON_EKS=true. Use E2E_SCOPE='cluster'." } - def doCleanup = { + + def doSetup = { params.TEST_ON_EKS ? runEKSSetup() : runMinikubeSetup() } + def doTests = { params.TEST_ON_EKS ? runEKSE2eTests() : runE2eTests(params.E2E_SCOPE) } + def doCleanup = { catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { - runMinikubeCleanup() + if (params.TEST_ON_EKS) { runEKSCleanup() } else { runMinikubeCleanup() } } } + def doIstioSetup = { params.TEST_ON_EKS ? runEKSIstioSetup() : runIstioMinikubeSetup() } + def doIstioTests = { params.TEST_ON_EKS ? runEKSIstioE2eTests() : runIstioE2eTests() } def testBody = { try { @@ -380,8 +374,36 @@ pipeline { } finally { stage('Cleanup') { doCleanup() } } + // Istio stages are always declared so that Jenkins Stage View + // shows a consistent set of columns across all run types. + // When VERIFY_ISTIO_AMBIENT is false the stages are entered but + // immediately skipped, preserving their position in the view. + try { + stage('Istio Setup') { + if (params.E2E_SCOPE == 'cluster' && params.VERIFY_ISTIO_AMBIENT) { doIstioSetup() } + else { echo "Istio tests skipped (E2E_SCOPE=${params.E2E_SCOPE}, VERIFY_ISTIO_AMBIENT=${params.VERIFY_ISTIO_AMBIENT})" } + } + stage('Run Istio e2e Tests') { + if (params.E2E_SCOPE == 'cluster' && params.VERIFY_ISTIO_AMBIENT) { doIstioTests() } + else { echo "Istio tests skipped (E2E_SCOPE=${params.E2E_SCOPE}, VERIFY_ISTIO_AMBIENT=${params.VERIFY_ISTIO_AMBIENT})" } + } + } finally { + stage('Istio Cleanup') { + if (params.E2E_SCOPE == 'cluster' && params.VERIFY_ISTIO_AMBIENT) { doCleanup() } + else { echo "Istio tests skipped (E2E_SCOPE=${params.E2E_SCOPE}, VERIFY_ISTIO_AMBIENT=${params.VERIFY_ISTIO_AMBIENT})" } + } + } + } + + if (params.TEST_ON_EKS) { + lock(resource: 'jenkinsKubeNinjasEksCluster', inversePrecedence: true) { + timeout(time: 3, unit: 'HOURS') { + testBody() + } + } + } else { + testBody() } - testBody() } } } diff --git a/Makefile b/Makefile index 21d80172..1682dc06 100755 --- a/Makefile +++ b/Makefile @@ -233,16 +233,6 @@ e2e-test-helm-namespace: fi E2E_DOCKER_IMAGE=$(IMG) go test -v -count=1 -timeout 45m ./test/e2e-helm -.PHONY: e2e-test-all ## Run cluster-scoped E2E, Helm namespace-scoped E2E, and EKS E2E tests in sequence. -e2e-test-all: - @echo "=====Running cluster-scoped E2E, Helm E2E, and EKS E2E tests=====" - @set -e; \ - trap '$(MAKE) e2e-cleanup-eks' EXIT; \ - $(MAKE) e2e-test-cluster; \ - $(MAKE) e2e-test-helm-namespace; \ - $(MAKE) e2e-setup-eks; \ - $(MAKE) e2e-test-eks - .PHONY: e2e-test-volume-resize ## Run ONLY the cluster-scoped volume resize test (two namespaces in parallel) e2e-test-volume-resize: @echo "=====Running cluster-scoped volume-resize e2e test (parallel, 2 namespaces)=====" From 2b756f822a3f6cbda7f32838308fd7ba6fd3ef17 Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Thu, 25 Jun 2026 14:53:42 +0200 Subject: [PATCH 09/17] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/k8sutil/volume_resize_validation.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index 45a1d802..ad807ac2 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -334,7 +334,7 @@ func (oc *OperatorContext) processResizeSubmission(status *marklogicv1.VolumeRes for _, idx := range indices { entry := &status.PVCStatuses[idx] if isPVCCheckpointed(entry) { - oc.ReqLogger.Info("DEBUG: processResizeSubmission - PVC already checkpointed, skipping", "name", entry.Name) + oc.ReqLogger.V(1).Info("processResizeSubmission: PVC already checkpointed, skipping", "name", entry.Name) continue } From 753c645976592b4b39e3f0bd966890af490349a2 Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Thu, 25 Jun 2026 14:53:59 +0200 Subject: [PATCH 10/17] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/k8sutil/volume_resize_validation.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index ad807ac2..481c8f55 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -340,7 +340,7 @@ func (oc *OperatorContext) processResizeSubmission(status *marklogicv1.VolumeRes pvc := &corev1.PersistentVolumeClaim{} if getErr := oc.Client.Get(oc.Ctx, client.ObjectKey{Namespace: oc.MarklogicGroup.Namespace, Name: entry.Name}, pvc); getErr != nil { - oc.ReqLogger.Info("DEBUG: processResizeSubmission - Failed to fetch PVC", "name", entry.Name, "error", getErr.Error()) + oc.ReqLogger.V(1).Info("processResizeSubmission: failed to fetch PVC", "name", entry.Name, "error", getErr.Error()) oc.markPVCFailed(status, entry.Name, marklogicv1.VolumeResizeReasonResizeFailed, getErr.Error()) continue } From 0c3db56c7a3fde0dc8ea714d3c18a1a45d7e5729 Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Thu, 25 Jun 2026 14:54:12 +0200 Subject: [PATCH 11/17] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/k8sutil/volume_resize_validation.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index 481c8f55..e1abec68 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -745,7 +745,7 @@ func (oc *OperatorContext) processResizeVerification(status *marklogicv1.VolumeR if getErr := oc.Client.Get(oc.Ctx, client.ObjectKey{Namespace: oc.MarklogicGroup.Namespace, Name: entry.Name}, pvc); getErr != nil { return oc.scheduleSyncRetryOrFail(status, marklogicv1.VolumeResizeReasonMarkLogicHealthCheckFailed, "Failed to fetch PVC during verification", getErr) } - oc.ReqLogger.Info("DEBUG: processResizeVerification - PVC state", "name", entry.Name, "state", entry.State, "checkpointType", entry.CheckpointType, "restartRequired", entry.RestartRequired, "fileSystemResizePending", hasFileSystemResizePending(pvc)) + oc.ReqLogger.V(1).Info("processResizeVerification: PVC state", "name", entry.Name, "state", entry.State, "checkpointType", entry.CheckpointType, "restartRequired", entry.RestartRequired, "fileSystemResizePending", hasFileSystemResizePending(pvc)) requested := pvc.Spec.Resources.Requests[corev1.ResourceStorage] observed := pvc.Status.Capacity[corev1.ResourceStorage] From 5536477ec12de4c7c974d886c3f8598073064520 Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Thu, 25 Jun 2026 14:54:22 +0200 Subject: [PATCH 12/17] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/k8sutil/volume_resize_validation.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index e1abec68..1c14519a 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -860,7 +860,7 @@ func (oc *OperatorContext) newResizeStatus(pvcState *resizePVCDiscovery, targetS func (oc *OperatorContext) initializePVCStatuses(status *marklogicv1.VolumeResizeStatus, pvcState *resizePVCDiscovery) { if len(status.PVCStatuses) == 0 { - oc.ReqLogger.Info("DEBUG: initializePVCStatuses - initializing from discovery", "expectedNames", pvcState.expectedNames, "count", len(pvcState.expectedNames)) + oc.ReqLogger.V(1).Info("initializePVCStatuses: initializing from discovery", "expectedNames", pvcState.expectedNames, "count", len(pvcState.expectedNames)) status.PVCStatuses = make([]marklogicv1.PVCResizeStatus, 0, len(pvcState.expectedNames)) for _, name := range pvcState.expectedNames { entry := marklogicv1.PVCResizeStatus{Name: name, State: marklogicv1.PVCResizeStatePending} From e3271591ece54e1d6d21c6172f659a47d440c8dc Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Thu, 25 Jun 2026 14:54:35 +0200 Subject: [PATCH 13/17] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/k8sutil/volume_resize_validation.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index 1c14519a..bfaef5df 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -1270,7 +1270,6 @@ func (oc *OperatorContext) syncStatefulSetPVCTemplates(status *marklogicv1.Volum sort.Strings(templatesBelowTarget) oc.ReqLogger.Info("DEBUG: syncStatefulSetPVCTemplates - templates below target, recreating StatefulSet", "statefulSet", currentSts.Name, "belowTarget", templatesBelowTarget, "allTemplates", getTemplateNamesFromSTS(currentSts)) - _ = templatesBelowTarget addResizeMarker(status, resizeMarkerTemplateRecreateStarted) if hasResizeMarker(status, resizeMarkerTemplateDeleted) { From c5ed72f8518293565b2dcba181b7c39bd0cbc340 Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Thu, 25 Jun 2026 14:54:54 +0200 Subject: [PATCH 14/17] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/k8sutil/volume_resize_validation.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index bfaef5df..6a595858 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -1651,11 +1651,6 @@ func resolveResizeTargetsFromSpec(group *marklogicv1.MarklogicGroup) (templateRe targets[tmpl.Name] = size } } - targetMap := make(map[string]string) - for k, v := range targets { - targetMap[k] = v.String() - } - _ = targetMap return targets, nil } From d9f3ae3379cebc86a54651b9c44f9b67ab80bdac Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Thu, 25 Jun 2026 15:20:41 +0200 Subject: [PATCH 15/17] remove DEBUG log --- pkg/k8sutil/volume_resize_validation.go | 32 +++++++++++++++---------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index 6a595858..a4bcc4cd 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -352,31 +352,31 @@ func (oc *OperatorContext) processResizeSubmission(status *marklogicv1.VolumeRes } entryTarget, targetErr := resizeTargetForPVCEntry(status, entry) if targetErr != nil { - oc.ReqLogger.Info("DEBUG: processResizeSubmission - Failed to resolve target", "name", entry.Name, "error", targetErr.Error()) + oc.ReqLogger.V(1).Info("processResizeSubmission: failed to resolve target", "name", entry.Name, "error", targetErr.Error()) oc.markPVCFailed(status, entry.Name, marklogicv1.VolumeResizeReasonInvalidResizeRequest, targetErr.Error()) continue } entry.RequestedSize = entryTarget.String() entry.ObservedCapacity = observed.String() - oc.ReqLogger.Info("DEBUG: processResizeSubmission - PVC size check", "name", entry.Name, "requested", requested.String(), "target", entryTarget.String(), "observed", observed.String()) + oc.ReqLogger.V(1).Info("processResizeSubmission: PVC size check", "name", entry.Name, "requested", requested.String(), "target", entryTarget.String(), "observed", observed.String()) if requested.Cmp(entryTarget) >= 0 { entry.State = marklogicv1.PVCResizeStateWaitingForCheckpoint entry.LastReason = "" entry.LastMessage = "Waiting for resize checkpoint" - oc.ReqLogger.Info("DEBUG: processResizeSubmission - Request already at target, waiting for checkpoint", "name", entry.Name) + oc.ReqLogger.V(1).Info("processResizeSubmission: request already at target, waiting for checkpoint", "name", entry.Name) continue } - oc.ReqLogger.Info("DEBUG: processResizeSubmission - Submitting PVC resize patch", "name", entry.Name, "newSize", entryTarget.String()) + oc.ReqLogger.V(1).Info("processResizeSubmission: submitting PVC resize patch", "name", entry.Name, "newSize", entryTarget.String()) patch := client.MergeFrom(pvc.DeepCopy()) if pvc.Spec.Resources.Requests == nil { pvc.Spec.Resources.Requests = corev1.ResourceList{} } pvc.Spec.Resources.Requests[corev1.ResourceStorage] = entryTarget if patchErr := oc.Client.Patch(oc.Ctx, pvc, patch); patchErr != nil { - oc.ReqLogger.Info("DEBUG: processResizeSubmission - Patch failed", "name", entry.Name, "error", patchErr.Error()) + oc.ReqLogger.V(1).Info("processResizeSubmission: patch failed", "name", entry.Name, "error", patchErr.Error()) oc.markPVCFailed(status, entry.Name, marklogicv1.VolumeResizeReasonResizeFailed, patchErr.Error()) continue } @@ -385,7 +385,7 @@ func (oc *OperatorContext) processResizeSubmission(status *marklogicv1.VolumeRes entry.State = marklogicv1.PVCResizeStateResizeSubmitted entry.LastReason = "" entry.LastMessage = "Resize request submitted" - oc.ReqLogger.Info("DEBUG: processResizeSubmission - Resize patch submitted successfully", "name", entry.Name) + oc.ReqLogger.V(1).Info("processResizeSubmission: resize patch submitted successfully", "name", entry.Name) } oc.updateSequentialActivePVC(status) @@ -811,7 +811,7 @@ func (oc *OperatorContext) processResizeVerification(status *marklogicv1.VolumeR oc.recalculatePVCProgress(status) if len(needsRestartAgain) > 0 { sort.Strings(needsRestartAgain) - oc.ReqLogger.Info("DEBUG: processResizeVerification - PVCs still pending after restart, triggering another restart", "pvcs", needsRestartAgain) + oc.ReqLogger.V(1).Info("processResizeVerification: PVCs still pending after restart, triggering another restart", "pvcs", needsRestartAgain) oc.transitionResizePhase(status, marklogicv1.VolumeResizePhaseRestartingPods, "", fmt.Sprintf("Filesystem resize still pending after restart for PVCs: %s; scheduling another restart", strings.Join(needsRestartAgain, ","))) oc.emitResizeEvent(corev1.EventTypeNormal, "VolumeResizeProgressing", status.Message) if patchErr := oc.patchResizeStatus(status); patchErr != nil { @@ -866,9 +866,9 @@ func (oc *OperatorContext) initializePVCStatuses(status *marklogicv1.VolumeResiz entry := marklogicv1.PVCResizeStatus{Name: name, State: marklogicv1.PVCResizeStatePending} if target, ok := pvcState.targetByPVC[name]; ok { entry.RequestedSize = target.String() - oc.ReqLogger.Info("DEBUG: initializePVCStatuses - initialized PVC with target", "name", name, "target", target.String()) + oc.ReqLogger.V(1).Info("initializePVCStatuses: initialized PVC with target", "name", name, "target", target.String()) } else { - oc.ReqLogger.Info("DEBUG: initializePVCStatuses - initialized PVC without target", "name", name) + oc.ReqLogger.V(1).Info("initializePVCStatuses: initialized PVC without target", "name", name) } status.PVCStatuses = append(status.PVCStatuses, entry) } @@ -1258,7 +1258,7 @@ func (oc *OperatorContext) syncStatefulSetPVCTemplates(status *marklogicv1.Volum if len(templatesMissingFromStatefulSet) > 0 { sort.Strings(templatesMissingFromStatefulSet) - oc.ReqLogger.Info("DEBUG: syncStatefulSetPVCTemplates - templates missing from StatefulSet", "statefulSet", currentSts.Name, "missing", templatesMissingFromStatefulSet, "available", getTemplateNamesFromSTS(currentSts)) + oc.ReqLogger.V(1).Info("syncStatefulSetPVCTemplates: templates missing from StatefulSet", "statefulSet", currentSts.Name, "missing", templatesMissingFromStatefulSet, "available", getTemplateNamesFromSTS(currentSts)) return false, fmt.Errorf("statefulset %s is missing volumeClaimTemplates: %s", currentSts.Name, strings.Join(templatesMissingFromStatefulSet, ",")) } @@ -1269,7 +1269,8 @@ func (oc *OperatorContext) syncStatefulSetPVCTemplates(status *marklogicv1.Volum } sort.Strings(templatesBelowTarget) - oc.ReqLogger.Info("DEBUG: syncStatefulSetPVCTemplates - templates below target, recreating StatefulSet", "statefulSet", currentSts.Name, "belowTarget", templatesBelowTarget, "allTemplates", getTemplateNamesFromSTS(currentSts)) + oc.ReqLogger.V(1).Info("syncStatefulSetPVCTemplates: templates below target, recreating StatefulSet", "statefulSet", currentSts.Name, "belowTarget", templatesBelowTarget, "allTemplates", getTemplateNamesFromSTS(currentSts)) + _ = templatesBelowTarget addResizeMarker(status, resizeMarkerTemplateRecreateStarted) if hasResizeMarker(status, resizeMarkerTemplateDeleted) { @@ -1552,7 +1553,7 @@ func (oc *OperatorContext) discoverPrimaryPVCs(sts *appsv1.StatefulSet, targets templateNames = getResizableTemplateNames(sts) } sort.Strings(templateNames) - oc.ReqLogger.Info("DEBUG: discoverPrimaryPVCs - discovering PVCs", "templateNames", templateNames, "replicas", replicas, "stsName", sts.Name) + oc.ReqLogger.V(1).Info("discoverPrimaryPVCs: discovering PVCs", "templateNames", templateNames, "replicas", replicas, "stsName", sts.Name) for _, templateName := range templateNames { templateTarget, hasTarget := targets[templateName] if !hasTarget { @@ -1599,7 +1600,7 @@ func (oc *OperatorContext) discoverPrimaryPVCs(sts *appsv1.StatefulSet, targets templateStateMap[k] = v.String() } } - oc.ReqLogger.Info("DEBUG: discoverPrimaryPVCs - discovery complete", "found", len(state.foundPVCs), "expected", len(state.expectedNames), "missing", state.missingPVCs, "minByTemplate", templateStateMap) + oc.ReqLogger.V(1).Info("discoverPrimaryPVCs: discovery complete", "found", len(state.foundPVCs), "expected", len(state.expectedNames), "missing", state.missingPVCs, "minByTemplate", templateStateMap) return state, nil } @@ -1651,6 +1652,11 @@ func resolveResizeTargetsFromSpec(group *marklogicv1.MarklogicGroup) (templateRe targets[tmpl.Name] = size } } + targetMap := make(map[string]string) + for k, v := range targets { + targetMap[k] = v.String() + } + _ = targetMap return targets, nil } From 2d5605f4d0a786b1f85f2060c854d686e6f7f6c5 Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Thu, 25 Jun 2026 15:50:37 +0200 Subject: [PATCH 16/17] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/k8sutil/volume_resize_validation.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index a4bcc4cd..9718bf15 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -1270,7 +1270,6 @@ func (oc *OperatorContext) syncStatefulSetPVCTemplates(status *marklogicv1.Volum sort.Strings(templatesBelowTarget) oc.ReqLogger.V(1).Info("syncStatefulSetPVCTemplates: templates below target, recreating StatefulSet", "statefulSet", currentSts.Name, "belowTarget", templatesBelowTarget, "allTemplates", getTemplateNamesFromSTS(currentSts)) - _ = templatesBelowTarget addResizeMarker(status, resizeMarkerTemplateRecreateStarted) if hasResizeMarker(status, resizeMarkerTemplateDeleted) { From 8054cecfc799882870c2a45f4d9ab6efa93f5c53 Mon Sep 17 00:00:00 2001 From: Romain Winieski Date: Thu, 25 Jun 2026 15:50:47 +0200 Subject: [PATCH 17/17] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/k8sutil/volume_resize_validation.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pkg/k8sutil/volume_resize_validation.go b/pkg/k8sutil/volume_resize_validation.go index 9718bf15..e49a08a3 100644 --- a/pkg/k8sutil/volume_resize_validation.go +++ b/pkg/k8sutil/volume_resize_validation.go @@ -1651,11 +1651,6 @@ func resolveResizeTargetsFromSpec(group *marklogicv1.MarklogicGroup) (templateRe targets[tmpl.Name] = size } } - targetMap := make(map[string]string) - for k, v := range targets { - targetMap[k] = v.String() - } - _ = targetMap return targets, nil }