diff --git a/build/components/versions.yml b/build/components/versions.yml index e5feff83d0..b229dfec31 100644 --- a/build/components/versions.yml +++ b/build/components/versions.yml @@ -3,7 +3,7 @@ firmware: libvirt: v10.9.0 edk2: stable202411 core: - 3p-kubevirt: dvp/set-memory-limits-while-hotplugging + 3p-kubevirt: dvp/hotplug-cpu-prefer-cores-over-sockets 3p-containerized-data-importer: v1.60.3-v12n.16 distribution: 2.8.3 package: diff --git a/images/virt-artifact/werf.inc.yaml b/images/virt-artifact/werf.inc.yaml index 860b1a58c1..f3e1714dd6 100644 --- a/images/virt-artifact/werf.inc.yaml +++ b/images/virt-artifact/werf.inc.yaml @@ -15,7 +15,7 @@ secrets: shell: install: - | - echo rebuild 1 + echo rebuild 11 echo "Git clone {{ $gitRepoName }} repository..." git clone --depth=1 $(cat /run/secrets/SOURCE_REPO)/{{ $gitRepoUrl }} --branch {{ $tag }} /src/kubevirt diff --git a/images/virtualization-artifact/pkg/controller/kvbuilder/kvvm.go b/images/virtualization-artifact/pkg/controller/kvbuilder/kvvm.go index d6ab79887d..a9f53558bb 100644 --- a/images/virtualization-artifact/pkg/controller/kvbuilder/kvvm.go +++ b/images/virtualization-artifact/pkg/controller/kvbuilder/kvvm.go @@ -20,6 +20,8 @@ import ( "fmt" "maps" "os" + "strconv" + "strings" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -52,6 +54,16 @@ const ( EnableMemoryHotplugThreshold = 1 * 1024 * 1024 * 1024 // 1 Gi (no hotplug for VMs with less than 1Gi) ) +const ( + // VCPUTopologyDynamicCoresAnnotation annotation indicates "distributed by sockets" or "dynamic cores number" VCPU topology. + VCPUTopologyDynamicCoresAnnotation = "internal.virtualization.deckhouse.io/vcpu-topology-dynamic-cores" + + CPUResourcesRequestsFractionAnnotation = "internal.virtualization.deckhouse.io/cpu-resources-requests-fraction" + + // CPUMaxCoresPerSocket is a maximum number of cores per socket. + CPUMaxCoresPerSocket = 16 +) + type KVVMOptions struct { EnableParavirtualization bool OsType v1alpha2.OsType @@ -251,25 +263,35 @@ func (b *KVVM) SetCPU(cores int, coreFraction string) error { if domainSpec.CPU == nil { domainSpec.CPU = &virtv1.CPU{} } - cpuRequest, err := GetCPURequest(cores, coreFraction) + + fraction, err := GetCPUFraction(coreFraction) if err != nil { return err } - cpuLimit := GetCPULimit(cores) - if domainSpec.Resources.Requests == nil { - domainSpec.Resources.Requests = make(map[corev1.ResourceName]resource.Quantity) - } - if domainSpec.Resources.Limits == nil { - domainSpec.Resources.Limits = make(map[corev1.ResourceName]resource.Quantity) - } - domainSpec.Resources.Requests[corev1.ResourceCPU] = *cpuRequest - domainSpec.Resources.Limits[corev1.ResourceCPU] = *cpuLimit - - socketsNeeded, coresNeeded := vm.CalculateCoresAndSockets(cores) - - domainSpec.CPU.Cores = uint32(coresNeeded) - domainSpec.CPU.Sockets = uint32(socketsNeeded) - domainSpec.CPU.MaxSockets = uint32(socketsNeeded) + b.SetKVVMIAnnotation(CPUResourcesRequestsFractionAnnotation, strconv.Itoa(fraction)) + + //cpuRequest, err := GetCPURequest(cores, coreFraction) + //if err != nil { + // return err + //} + //cpuLimit := GetCPULimit(cores) + //if domainSpec.Resources.Requests == nil { + // domainSpec.Resources.Requests = make(map[corev1.ResourceName]resource.Quantity) + //} + //if domainSpec.Resources.Limits == nil { + // domainSpec.Resources.Limits = make(map[corev1.ResourceName]resource.Quantity) + //} + //domainSpec.Resources.Requests[corev1.ResourceCPU] = *cpuRequest + //domainSpec.Resources.Limits[corev1.ResourceCPU] = *cpuLimit + + socketsNeeded, coresPerSocketNeeded := vm.CalculateCoresAndSockets(cores) + + // Use "dynamic cores" hotplug strategy. + // Workaround: swap cores and sockets in domainSpec to bypass vm-validator webhook. + b.SetKVVMIAnnotation(VCPUTopologyDynamicCoresAnnotation, "") + domainSpec.CPU.Cores = uint32(socketsNeeded) + domainSpec.CPU.Sockets = uint32(coresPerSocketNeeded) + domainSpec.CPU.MaxSockets = CPUMaxCoresPerSocket return nil } @@ -342,6 +364,40 @@ func isVMRunningWithMemoryResources(kvvm *virtv1.VirtualMachine) bool { return hasMemoryRequests && hasMemoryLimits } +func GetCPUFraction(cpuFraction string) (int, error) { + if cpuFraction == "" { + return 100, nil + } + fraction := intstr.FromString(cpuFraction) + value, _, err := getIntOrPercentValueSafely(&fraction) + if err != nil { + return 0, fmt.Errorf("invalid value for cpu fraction: %v", err) + } + return value, nil +} + +func getIntOrPercentValueSafely(intOrStr *intstr.IntOrString) (int, bool, error) { + switch intOrStr.Type { + case intstr.Int: + return intOrStr.IntValue(), false, nil + case intstr.String: + isPercent := false + s := intOrStr.StrVal + if strings.HasSuffix(s, "%") { + isPercent = true + s = strings.TrimSuffix(intOrStr.StrVal, "%") + } else { + return 0, false, fmt.Errorf("invalid type: string is not a percentage") + } + v, err := strconv.Atoi(s) + if err != nil { + return 0, false, fmt.Errorf("invalid value %q: %v", intOrStr.StrVal, err) + } + return int(v), isPercent, nil + } + return 0, false, fmt.Errorf("invalid type: neither int nor percentage") +} + func GetCPURequest(cores int, coreFraction string) (*resource.Quantity, error) { if coreFraction == "" { return GetCPULimit(cores), nil diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/statistic.go b/images/virtualization-artifact/pkg/controller/vm/internal/statistic.go index 0b0f504e15..3541772793 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/statistic.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/statistic.go @@ -90,27 +90,24 @@ func (h *StatisticHandler) syncResources(changed *v1alpha2.VirtualMachine, var ( cpuKVVMIRequest resource.Quantity memorySize resource.Quantity - cores int topology v1alpha2.Topology coreFraction string ) if kvvmi == nil { memorySize = changed.Spec.Memory.Size - cores = changed.Spec.CPU.Cores - coreFraction = changed.Spec.CPU.CoreFraction - sockets, coresPerSocket := vm.CalculateCoresAndSockets(cores) + sockets, coresPerSocket := vm.CalculateCoresAndSockets(changed.Spec.CPU.Cores) topology = v1alpha2.Topology{CoresPerSocket: coresPerSocket, Sockets: sockets} + coreFraction = changed.Spec.CPU.CoreFraction } else { cpuKVVMIRequest = kvvmi.Spec.Domain.Resources.Requests[corev1.ResourceCPU] memorySize = kvvmi.Spec.Domain.Resources.Requests[corev1.ResourceMemory] - cores = h.getCoresByKVVMI(kvvmi) coreFraction = h.getCoreFractionByKVVMI(kvvmi) topology = h.getCurrentTopologyByKVVMI(kvvmi) } resources = v1alpha2.ResourcesStatus{ CPU: v1alpha2.CPUStatus{ - Cores: cores, + Cores: topology.CoresPerSocket * topology.Sockets, CoreFraction: coreFraction, RequestedCores: cpuKVVMIRequest, Topology: topology, @@ -165,18 +162,31 @@ func (h *StatisticHandler) syncResources(changed *v1alpha2.VirtualMachine, changed.Status.Resources = resources } +// getCoresByKVVMI +// TODO refactor: no need to get cores from limits after enabling CPU hotplug, kvvmi.Spec.Domain.CPU should be enough. func (h *StatisticHandler) getCoresByKVVMI(kvvmi *virtv1.VirtualMachineInstance) int { if kvvmi == nil { return -1 } - cpuKVVMILimit := kvvmi.Spec.Domain.Resources.Limits[corev1.ResourceCPU] - return int(cpuKVVMILimit.Value()) + + cpuKVVMILimit, hasLimits := kvvmi.Spec.Domain.Resources.Limits[corev1.ResourceCPU] + if hasLimits { + return int(cpuKVVMILimit.Value()) + } + + return 1 } func (h *StatisticHandler) getCoreFractionByKVVMI(kvvmi *virtv1.VirtualMachineInstance) string { if kvvmi == nil { return "" } + // Fraction is stored in annotation after enabling CPU hotplug. + cpuFractionStr, hasAnno := kvvmi.Annotations[""] + if hasAnno { + return cpuFractionStr + "%" + } + // Also support previous implementation: calculate from requests and limits values. cpuKVVMIRequest := kvvmi.Spec.Domain.Resources.Requests[corev1.ResourceCPU] return strconv.Itoa(int(cpuKVVMIRequest.MilliValue())*100/(h.getCoresByKVVMI(kvvmi)*1000)) + "%" } diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/sync_kvvm.go b/images/virtualization-artifact/pkg/controller/vm/internal/sync_kvvm.go index 0b57588c3f..679d9cd398 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/sync_kvvm.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/sync_kvvm.go @@ -255,7 +255,7 @@ func (h *SyncKvvmHandler) Name() string { } func (h *SyncKvvmHandler) isWaiting(vm *v1alpha2.VirtualMachine) bool { - return !checkVirtualMachineConfiguration(vm) + return !virtualMachineDependenciesAreReady(vm) } func (h *SyncKvvmHandler) syncKVVM(ctx context.Context, s state.VirtualMachineState, allChanges vmchange.SpecChanges) (bool, error) { diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/sync_metadata.go b/images/virtualization-artifact/pkg/controller/vm/internal/sync_metadata.go index df72bb67d9..349c7ec27b 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/sync_metadata.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/sync_metadata.go @@ -22,6 +22,7 @@ import ( "fmt" "maps" + "github.com/deckhouse/virtualization-controller/pkg/controller/kvbuilder" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -188,10 +189,20 @@ func (h *SyncMetadataHandler) patchLabelsAndAnnotations(ctx context.Context, obj return h.client.Patch(ctx, obj, client.RawPatch(types.JSONPatchType, bytes)) } +var annotationsToKeep = []string{ + annotations.AnnNetworksSpec, + virtv1.AllowPodBridgeNetworkLiveMigrationAnnotation, + netmanager.AnnoIPAddressCNIRequest, + virtv1.USBMigrationStrategyAnn, + kvbuilder.CPUResourcesRequestsFractionAnnotation, + kvbuilder.VCPUTopologyDynamicCoresAnnotation, +} + // updateKVVMSpecTemplateMetadataAnnotations ensures that the special network annotation is present if it exists. // It also removes well-known annotations that are dangerous to propagate. func (h *SyncMetadataHandler) updateKVVMSpecTemplateMetadataAnnotations(currAnno, newAnno map[string]string) map[string]string { res := make(map[string]string, len(newAnno)) + for k, v := range newAnno { if k == annotations.AnnVMLastAppliedSpec || k == annotations.AnnVMClassLastAppliedSpec { continue @@ -200,20 +211,11 @@ func (h *SyncMetadataHandler) updateKVVMSpecTemplateMetadataAnnotations(currAnno res[k] = v } - if v, ok := currAnno[annotations.AnnNetworksSpec]; ok { - res[annotations.AnnNetworksSpec] = v - } - - if v, ok := currAnno[virtv1.AllowPodBridgeNetworkLiveMigrationAnnotation]; ok { - res[virtv1.AllowPodBridgeNetworkLiveMigrationAnnotation] = v - } - - if v, ok := currAnno[netmanager.AnnoIPAddressCNIRequest]; ok { - res[netmanager.AnnoIPAddressCNIRequest] = v - } - - if v, ok := currAnno[virtv1.USBMigrationStrategyAnn]; ok { - res[virtv1.USBMigrationStrategyAnn] = v + // Restore annotations set by kvbuilder. + for _, keepAnno := range annotationsToKeep { + if v, ok := currAnno[keepAnno]; ok { + res[keepAnno] = v + } } return commonvm.RemoveNonPropagatableAnnotations(res) diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/sync_power_state.go b/images/virtualization-artifact/pkg/controller/vm/internal/sync_power_state.go index f3939f0ff0..fbb1143601 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/sync_power_state.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/sync_power_state.go @@ -127,7 +127,7 @@ func (h *SyncPowerStateHandler) syncPowerState( }) changed := s.VirtualMachine().Changed() - isConfigurationApplied := checkVirtualMachineConfiguration(changed) + isConfigurationApplied := virtualMachineDependenciesAreReady(changed) maintenance, _ := conditions.GetCondition(vmcondition.TypeMaintenance, changed.Status.Conditions) var vmAction VMAction diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/util.go b/images/virtualization-artifact/pkg/controller/vm/internal/util.go index c47ee80776..9bd9ef9639 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/util.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/util.go @@ -95,14 +95,15 @@ type PhaseGetter func(vm *v1alpha2.VirtualMachine, kvvm *virtv1.VirtualMachine) var mapPhases = map[virtv1.VirtualMachinePrintableStatus]PhaseGetter{ // VirtualMachineStatusStopped indicates that the virtual machine is currently stopped and isn't expected to start. virtv1.VirtualMachineStatusStopped: func(vm *v1alpha2.VirtualMachine, kvvm *virtv1.VirtualMachine) v1alpha2.MachinePhase { - if vm != nil && kvvm != nil { - if !checkVirtualMachineConfiguration(vm) && - kvvm != nil && kvvm.Annotations[annotations.AnnVMStartRequested] == "true" { - return v1alpha2.MachinePending - } + if vm == nil { + return v1alpha2.MachineStopped + } + + if !virtualMachineDependenciesAreReady(vm) && kvvm.Annotations[annotations.AnnVMStartRequested] == "true" { + return v1alpha2.MachinePending } - if vm != nil && vm.Status.Phase == v1alpha2.MachinePending && + if vm.Status.Phase == v1alpha2.MachinePending && (vm.Spec.RunPolicy == v1alpha2.AlwaysOnPolicy || vm.Spec.RunPolicy == v1alpha2.AlwaysOnUnlessStoppedManually) { return v1alpha2.MachinePending } @@ -184,6 +185,11 @@ var mapPhases = map[virtv1.VirtualMachinePrintableStatus]PhaseGetter{ virtv1.VirtualMachineStatusWaitingForVolumeBinding: func(_ *v1alpha2.VirtualMachine, _ *virtv1.VirtualMachine) v1alpha2.MachinePhase { return v1alpha2.MachinePending }, + // VirtualMachineStatusWaitingForReceiver indicates that this virtual machine is a receiver VM and + // migration should start next. + virtv1.VirtualMachineStatusWaitingForReceiver: func(_ *v1alpha2.VirtualMachine, _ *virtv1.VirtualMachine) v1alpha2.MachinePhase { + return v1alpha2.MachineMigrating + }, kvvmEmptyPhase: func(_ *v1alpha2.VirtualMachine, _ *virtv1.VirtualMachine) v1alpha2.MachinePhase { return v1alpha2.MachinePending @@ -251,7 +257,8 @@ func podFinal(pod corev1.Pod) bool { return pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed } -func checkVirtualMachineConfiguration(vm *v1alpha2.VirtualMachine) bool { +// virtualMachineDependenciesAreReady returns whether VM +func virtualMachineDependenciesAreReady(vm *v1alpha2.VirtualMachine) bool { for _, c := range vm.Status.Conditions { switch vmcondition.Type(c.Type) { case vmcondition.TypeBlockDevicesReady: diff --git a/images/virtualization-artifact/pkg/controller/vmchange/comparators.go b/images/virtualization-artifact/pkg/controller/vmchange/comparators.go index 106bfad7ef..3f2cce7de5 100644 --- a/images/virtualization-artifact/pkg/controller/vmchange/comparators.go +++ b/images/virtualization-artifact/pkg/controller/vmchange/comparators.go @@ -19,6 +19,7 @@ package vmchange import ( "k8s.io/apimachinery/pkg/api/resource" + "github.com/deckhouse/virtualization-controller/pkg/common/vm" "github.com/deckhouse/virtualization-controller/pkg/controller/kvbuilder" "github.com/deckhouse/virtualization/api/core/v1alpha2" ) @@ -127,7 +128,15 @@ func compareBootloader(current, desired *v1alpha2.VirtualMachineSpec) []FieldCha // compareCPU returns changes in the cpu section. func compareCPU(current, desired *v1alpha2.VirtualMachineSpec) []FieldChange { - coresChanges := compareInts("cpu.cores", current.CPU.Cores, desired.CPU.Cores, 0, ActionRestart) + // Cores can be changed "on the fly" using CPU Hotplug ... + coresChangedAction := ActionApplyImmediate + // ... but sockets count change requires a reboot. + currentSockets, _ := vm.CalculateCoresAndSockets(current.CPU.Cores) + desiredSockets, _ := vm.CalculateCoresAndSockets(desired.CPU.Cores) + if currentSockets != desiredSockets { + coresChangedAction = ActionRestart + } + coresChanges := compareInts("cpu.cores", current.CPU.Cores, desired.CPU.Cores, 0, coresChangedAction) fractionChanges := compareStrings("cpu.coreFraction", current.CPU.CoreFraction, desired.CPU.CoreFraction, DefaultCPUCoreFraction, ActionRestart) // Yield full replace if both fields changed. diff --git a/images/virtualization-artifact/pkg/controller/workload-updater/internal/handler/hotplug.go b/images/virtualization-artifact/pkg/controller/workload-updater/internal/handler/hotplug.go index c2bbdeb6f2..64845b963a 100644 --- a/images/virtualization-artifact/pkg/controller/workload-updater/internal/handler/hotplug.go +++ b/images/virtualization-artifact/pkg/controller/workload-updater/internal/handler/hotplug.go @@ -18,6 +18,7 @@ package handler import ( "context" + "fmt" corev1 "k8s.io/api/core/v1" virtv1 "kubevirt.io/api/core/v1" @@ -56,7 +57,12 @@ func (h *HotplugHandler) Handle(ctx context.Context, vm *v1alpha2.VirtualMachine } cond, _ := conditions.GetKVVMICondition(virtv1.VirtualMachineInstanceMemoryChange, kvvmi.Status.Conditions) - if cond.Status != corev1.ConditionTrue { + isMemoryHotplug := cond.Status == corev1.ConditionTrue + + cond, _ = conditions.GetKVVMICondition(virtv1.VirtualMachineInstanceVCPUChange, kvvmi.Status.Conditions) + isCPUHotplug := cond.Status == corev1.ConditionTrue + + if !isCPUHotplug && !isMemoryHotplug { return reconcile.Result{}, nil } @@ -76,5 +82,5 @@ func (h *HotplugHandler) Name() string { } func getHotplugResourcesSum(vm *v1alpha2.VirtualMachine) string { - return vm.Spec.Memory.Size.String() + return fmt.Sprintf("cpu.cores=%d,memory.size=%s", vm.Spec.CPU.Cores, vm.Spec.Memory.Size.String()) } diff --git a/images/virtualization-artifact/pkg/controller/workload-updater/internal/watcher/kvvmi.go b/images/virtualization-artifact/pkg/controller/workload-updater/internal/watcher/kvvmi.go index 59eec3b3cd..5046b7b9f9 100644 --- a/images/virtualization-artifact/pkg/controller/workload-updater/internal/watcher/kvvmi.go +++ b/images/virtualization-artifact/pkg/controller/workload-updater/internal/watcher/kvvmi.go @@ -55,8 +55,18 @@ func (w *KVVMIWatcher) Watch(mgr manager.Manager, ctr controller.Controller) err DeleteFunc: func(e event.TypedDeleteEvent[*virtv1.VirtualMachineInstance]) bool { return false }, UpdateFunc: func(e event.TypedUpdateEvent[*virtv1.VirtualMachineInstance]) bool { nodePlacementCondition, _ := conditions.GetKVVMICondition(conditions.VirtualMachineInstanceNodePlacementNotMatched, e.ObjectNew.Status.Conditions) + if nodePlacementCondition.Status == corev1.ConditionTrue { + return true + } hotMemoryChangeCondition, _ := conditions.GetKVVMICondition(virtv1.VirtualMachineInstanceMemoryChange, e.ObjectNew.Status.Conditions) - return nodePlacementCondition.Status == corev1.ConditionTrue || hotMemoryChangeCondition.Status == corev1.ConditionTrue + if hotMemoryChangeCondition.Status == corev1.ConditionTrue { + return true + } + hotCPUChangeCondition, _ := conditions.GetKVVMICondition(virtv1.VirtualMachineInstanceVCPUChange, e.ObjectNew.Status.Conditions) + if hotCPUChangeCondition.Status == corev1.ConditionTrue { + return true + } + return false }, }, ),