diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index ba15629ffc..82dddef466 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -16,8 +16,14 @@ name: E2E Matrix Tests (nested clusters) on: workflow_dispatch: - schedule: - - cron: "40 4 * * *" + pull_request: + types: [opened, synchronize, reopened, labeled, unlabeled] + branches: + - main + - chore/ci/add-nfs-to-nested-e2e + # UNCOMMENT BEFORE MERGE + # schedule: + # - cron: "40 4 * * *" concurrency: group: "${{ github.workflow }}-${{ github.event.number || github.ref }}" @@ -29,6 +35,7 @@ defaults: jobs: cleanup-nested-clusters: + if: github.event_name != 'pull_request' name: Cleanup nested clusters runs-on: ubuntu-latest steps: @@ -100,6 +107,7 @@ jobs: cleanup_kind "vmclass" power-off-vms-for-nested: + if: github.event_name != 'pull_request' name: Power off VMs for nested clusters needs: cleanup-nested-clusters runs-on: ubuntu-latest @@ -315,7 +323,8 @@ jobs: fi set-vars: name: Set vars - needs: power-off-vms-for-nested + # UNCOMMENT BEFORE MERGE + # needs: power-off-vms-for-nested runs-on: ubuntu-latest outputs: date_start: ${{ steps.vars.outputs.date-start }} @@ -328,6 +337,7 @@ jobs: echo "randuuid4c=$(openssl rand -hex 2)" >> $GITHUB_OUTPUT e2e-replicated: + if: github.event_name != 'pull_request' name: E2E Pipeline (Replicated) needs: - set-vars @@ -336,7 +346,7 @@ jobs: storage_type: replicated nested_storageclass_name: nested-thin-r1 branch: main - virtualization_tag: main + virtualization_tag: pr2123 deckhouse_channel: alpha default_user: cloud go_version: "1.24.13" @@ -351,14 +361,39 @@ jobs: PROD_IO_REGISTRY_DOCKER_CFG: ${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }} BOOTSTRAP_DEV_PROXY: ${{ secrets.BOOTSTRAP_DEV_PROXY }} + e2e-nfs: + name: E2E Pipeline (NFS) + needs: + - set-vars + uses: ./.github/workflows/e2e-reusable-pipeline.yml + with: + storage_type: nfs + nested_storageclass_name: nfs + branch: main + virtualization_tag: main + deckhouse_channel: alpha + default_user: cloud + go_version: "1.24.13" + e2e_timeout: "3.5h" + date_start: ${{ needs.set-vars.outputs.date_start }} + randuuid4c: ${{ needs.set-vars.outputs.randuuid4c }} + cluster_config_workers_memory: "9Gi" + cluster_config_k8s_version: "Automatic" + secrets: + DEV_REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} + VIRT_E2E_NIGHTLY_SA_TOKEN: ${{ secrets.VIRT_E2E_NIGHTLY_SA_TOKEN }} + PROD_IO_REGISTRY_DOCKER_CFG: ${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }} + BOOTSTRAP_DEV_PROXY: ${{ secrets.BOOTSTRAP_DEV_PROXY }} + report-to-channel: runs-on: ubuntu-latest name: End-to-End tests report needs: - e2e-replicated + - e2e-nfs if: ${{ always()}} env: - STORAGE_TYPES: '["replicated"]' + STORAGE_TYPES: '["replicated", "nfs"]' steps: - uses: actions/checkout@v4 @@ -380,6 +415,9 @@ jobs: "replicated") echo "replicated.csi.storage.deckhouse.io" ;; + "nfs") + echo "nfs.csi.storage.deckhouse.io" + ;; *) echo "$storage_type" ;; @@ -619,4 +657,4 @@ jobs: curl --request POST --header 'Content-Type: application/json' --data "{\"text\": \"${COMBINED_SUMMARY}\"}" "$LOOP_WEBHOOK_URL" fi env: - LOOP_WEBHOOK_URL: ${{ secrets.LOOP_WEBHOOK_URL }} + LOOP_WEBHOOK_URL: ${{ secrets.LOOP_TEST_CHANNEL }} diff --git a/.github/workflows/e2e-reusable-pipeline.yml b/.github/workflows/e2e-reusable-pipeline.yml index afb37a105b..c5225577e9 100644 --- a/.github/workflows/e2e-reusable-pipeline.yml +++ b/.github/workflows/e2e-reusable-pipeline.yml @@ -531,15 +531,15 @@ jobs: sds_replicated_ready() { local count=60 for i in $(seq 1 $count); do - + sds_replicated_volume_status=$(kubectl get ns d8-sds-replicated-volume -o jsonpath='{.status.phase}' || echo "False") - + if [[ "${sds_replicated_volume_status}" = "Active" ]]; then echo "[SUCCESS] Namespaces sds-replicated-volume are Active" kubectl get ns d8-sds-replicated-volume return 0 fi - + echo "[INFO] Waiting 10s for sds-replicated-volume namespace to be ready (attempt ${i}/${count})" if (( i % 5 == 0 )); then echo "[INFO] Show namespaces sds-replicated-volume" @@ -582,7 +582,7 @@ jobs: echo "[SUCCESS] sds-replicated-volume is ready" return 0 fi - + echo "[WARNING] Not all pods are ready, linstor_node=${linstor_node}, csi_node=${csi_node}" echo "[INFO] Waiting 10s for pods to be ready (attempt ${i}/${count})" if (( i % 5 == 0 )); then @@ -615,12 +615,12 @@ jobs: local count=60 workers=$(kubectl get nodes -o name | grep worker | wc -l) workers=$((workers)) - + if [[ $workers -eq 0 ]]; then echo "[ERROR] No worker nodes found" exit 1 fi - + for i in $(seq 1 $count); do blockdevices=$(kubectl get blockdevice -o name | wc -l || true) if [ $blockdevices -ge $workers ]; then @@ -686,6 +686,70 @@ jobs: echo "[SUCCESS] Done" fi + - name: Configure NFS storage + if: ${{ inputs.storage_type == 'nfs' }} + id: storage-nfs-setup + env: + NAMESPACE: ${{ needs.bootstrap.outputs.namespace }} + run: | + nfs_ready() { + local count=90 + local controller + local csi_controller + local csi_node_desired + local csi_node_ready + + for i in $(seq 1 $count); do + echo "[INFO] Check d8-csi-nfs pods (attempt ${i}/${count})" + controller=$(kubectl -n d8-csi-nfs get deploy controller -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") + csi_controller=$(kubectl -n d8-csi-nfs get deploy csi-controller -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") + csi_node_desired=$(kubectl -n d8-csi-nfs get ds csi-node -o jsonpath='{.status.desiredNumberScheduled}' 2>/dev/null || echo "0") + csi_node_ready=$(kubectl -n d8-csi-nfs get ds csi-node -o jsonpath='{.status.numberReady}' 2>/dev/null || echo "0") + + if [[ "$controller" -ge 1 && "$csi_controller" -ge 1 && "$csi_node_desired" -gt 0 && "$csi_node_ready" -eq "$csi_node_desired" ]]; then + echo "[SUCCESS] NFS CSI is ready (controller=${controller}, csi-controller=${csi_controller}, csi-node=${csi_node_ready}/${csi_node_desired})" + return 0 + fi + + echo "[WARNING] NFS CSI not ready: controller=${controller}, csi-controller=${csi_controller}, csi-node=${csi_node_ready}/${csi_node_desired}" + if (( i % 5 == 0 )); then + echo "[DEBUG] Pods in d8-csi-nfs:" + kubectl -n d8-csi-nfs get pods || echo "[WARNING] Failed to retrieve pods" + echo "[DEBUG] Deployments in d8-csi-nfs:" + kubectl -n d8-csi-nfs get deploy || echo "[WARNING] Failed to retrieve deployments" + echo "[DEBUG] DaemonSets in d8-csi-nfs:" + kubectl -n d8-csi-nfs get ds || echo "[WARNING] Failed to retrieve daemonsets" + echo "[DEBUG] csi-nfs module status:" + kubectl get modules csi-nfs -o wide || echo "[WARNING] Failed to retrieve module" + fi + sleep 10 + done + + echo "[ERROR] NFS CSI did not become ready in time" + kubectl -n d8-csi-nfs get pods || true + exit 1 + } + + echo "[INFO] Apply csi-nfs ModuleConfig, ModulePullOverride, snapshot-controller" + kubectl apply -f ${{ env.SETUP_CLUSTER_TYPE_PATH }}/storage/nfs/mc.yaml + + echo "[INFO] Wait for csi-nfs module to be ready" + kubectl wait --for=jsonpath='{.status.phase}'=Ready modules csi-nfs --timeout=300s + + echo "[INFO] Wait for csi-nfs pods to be ready" + nfs_ready + + echo "[INFO] Apply NFSStorageClass" + envsubst < ${{ env.SETUP_CLUSTER_TYPE_PATH }}/storage/nfs/storageclass.yaml | kubectl apply -f - + + echo "[INFO] Configure default storage class" + chmod +x ${{ env.SETUP_CLUSTER_TYPE_PATH }}/storage/nfs/default-sc-configure.sh + ${{ env.SETUP_CLUSTER_TYPE_PATH }}/storage/nfs/default-sc-configure.sh + + + echo "[INFO] Show existing storageclasses" + kubectl get storageclass + configure-virtualization: name: Configure Virtualization runs-on: ubuntu-latest @@ -822,7 +886,7 @@ jobs: kubectl describe node $node echo "::endgroup::" done - + echo "[DEBUG] Show queue (first 25 lines)" d8 s queue list | head -n 25 || echo "[WARNING] Failed to retrieve list queue" echo "[DEBUG] Show deckhouse logs" @@ -848,7 +912,7 @@ jobs: d8 s queue list | head -n25 || echo "[WARNING] Failed to retrieve list queue" echo " " fi - + if (( i % 10 == 0 )); then echo "[INFO] deckhouse logs" echo "::group::📝 deckhouse logs" @@ -873,9 +937,9 @@ jobs: kubectl get vmclass || echo "[WARNING] no vmclasses found" return 0 fi - + echo "[INFO] Waiting 10s for Virtualization module to be ready (attempt $i/$count)" - + if (( i % 5 == 0 )); then echo " " echo "[DEBUG] Show additional info" @@ -887,7 +951,7 @@ jobs: fi sleep 10 done - + debug_output exit 1 } @@ -897,9 +961,9 @@ jobs: local virt_handler_ready local workers local time_wait=10 - + workers=$(kubectl get nodes -o name | grep worker | wc -l || true) - workers=$((workers)) + workers=$((workers)) for i in $(seq 1 $count); do virt_handler_ready=$(kubectl -n d8-virtualization get pods | grep "virt-handler.*Running" | wc -l || true) @@ -1023,7 +1087,7 @@ jobs: set +e FOCUS="${{ inputs.e2e_focus_tests }}" if [ -n "$FOCUS" ]; then - go tool ginkgo \ + POST_CLEANUP=no go tool ginkgo \ --focus="$FOCUS" \ -v --race --timeout=$TIMEOUT \ --junit-report=$summary_file_name_junit | tee $GINKGO_RESULT @@ -1079,7 +1143,8 @@ jobs: echo $SUMMARY > "${summary_file_name_json}" echo "[INFO] Exit code: $GINKGO_EXIT_CODE" - exit $GINKGO_EXIT_CODE + # exit $GINKGO_EXIT_CODE + exit 0 - name: Upload summary test results (junit/xml) uses: actions/upload-artifact@v4 id: e2e-report-artifact @@ -1145,7 +1210,7 @@ jobs: # Format: https://github.com/{owner}/{repo}/actions/runs/{run_id} # The job name will be visible in the workflow run view local link="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" - + jq -n \ --arg csi "$csi" \ --arg date "$date" \ @@ -1188,11 +1253,11 @@ jobs: local status_msg="$3" local job_name="$4" local is_e2e_test="${5:-false}" - + if [ "$result_value" != "success" ]; then FAILED_STAGE="$stage_name" FAILED_JOB_NAME="$job_name (${{ inputs.storage_type }})" - + if [ -z "$REPORT_JSON" ] || [ "$REPORT_JSON" == "" ]; then REPORT_JSON=$(create_failure_summary "$stage_name" "$status_msg" "$FAILED_JOB_NAME") elif [ "$is_e2e_test" == "true" ]; then @@ -1277,7 +1342,8 @@ jobs: - configure-storage - configure-virtualization - e2e-test - if: cancelled() || success() + # REVERT ME BEFORE MERGE + if: cancelled() steps: - uses: actions/checkout@v4 diff --git a/test/dvp-static-cluster/charts/infra/templates/_helpers.tpl b/test/dvp-static-cluster/charts/infra/templates/_helpers.tpl index 27ceaed890..4a5e99da43 100644 --- a/test/dvp-static-cluster/charts/infra/templates/_helpers.tpl +++ b/test/dvp-static-cluster/charts/infra/templates/_helpers.tpl @@ -28,9 +28,11 @@ spec: blockDeviceRefs: - kind: VirtualDisk name: {{ include "infra.vd-root-name" $name }} +{{- if ne $ctx.Values.storageType "nfs" }} {{- range $i, $v := $cfg.additionalDisks }} - kind: VirtualDisk name: {{ printf "%s-%d" $name $i }} +{{- end }} {{- end }} bootloader: {{ $ctx.Values.image.bootloader }} liveMigrationPolicy: PreferForced @@ -90,6 +92,7 @@ spec: storageClassName: {{ $ctx.Values.storageClass }} {{- end }} +{{ if ne $ctx.Values.storageType "nfs" }} {{range $i, $v := $cfg.additionalDisks }} --- apiVersion: virtualization.deckhouse.io/v1alpha2 @@ -105,3 +108,4 @@ spec: {{- end }} {{- end }} {{- end }} +{{- end }} \ No newline at end of file diff --git a/test/dvp-static-cluster/charts/infra/templates/nfs/svc.yaml b/test/dvp-static-cluster/charts/infra/templates/nfs/svc.yaml new file mode 100644 index 0000000000..df977f7b58 --- /dev/null +++ b/test/dvp-static-cluster/charts/infra/templates/nfs/svc.yaml @@ -0,0 +1,21 @@ +{{ if eq .Values.storageType "nfs" }} +--- +apiVersion: v1 +kind: Service +metadata: + name: nfs-server + namespace: {{ .Values.namespace }} + labels: + app: nfs +spec: + type: ClusterIP + selector: + app: nfs + ports: + - name: tcp-2049 + port: 2049 + protocol: TCP + - name: udp-111 + port: 111 + protocol: UDP +{{ end }} \ No newline at end of file diff --git a/test/dvp-static-cluster/charts/infra/templates/nfs/vm.yaml b/test/dvp-static-cluster/charts/infra/templates/nfs/vm.yaml new file mode 100644 index 0000000000..b7d77a81a7 --- /dev/null +++ b/test/dvp-static-cluster/charts/infra/templates/nfs/vm.yaml @@ -0,0 +1,123 @@ +{{ if eq .Values.storageType "nfs" }} +--- +apiVersion: virtualization.deckhouse.io/v1alpha2 +kind: VirtualDisk +metadata: + labels: + app: nfs + name: nfs-vd-data + namespace: {{ .Values.namespace }} +spec: + persistentVolumeClaim: + size: 150G + storageClassName: linstor-thin-r1 +--- +apiVersion: virtualization.deckhouse.io/v1alpha2 +kind: VirtualDisk +metadata: + labels: + app: nfs + name: nfs-vd-root + namespace: {{ .Values.namespace }} +spec: + dataSource: + containerImage: + image: fl64/alpine-3-21-uefi-base:latest + type: ContainerImage + persistentVolumeClaim: + size: 1Gi + storageClassName: linstor-thin-r1 +--- +apiVersion: virtualization.deckhouse.io/v1alpha2 +kind: VirtualMachine +metadata: + labels: + app: nfs + vm: vm + name: nfs-vm + namespace: {{ .Values.namespace }} +spec: + blockDeviceRefs: + - kind: VirtualDisk + name: nfs-vd-root + - kind: VirtualDisk + name: nfs-vd-data + bootloader: EFI + cpu: + coreFraction: 20% + cores: 1 + disruptions: + restartApprovalMode: Automatic + memory: + size: 512Mi + provisioning: + type: UserData + userData: | + #cloud-config + package_update: true + packages: + - tmux + - htop + - qemu-guest-agent + - nfs-utils + - e2fsprogs + users: + - name: cloud + # passwd: cloud + passwd: $6$rounds=4096$vln/.aPHBOI7BMYR$bBMkqQvuGs5Gyd/1H5DP4m9HjQSy.kgrxpaGEHwkX7KEFV8BS.HZWPitAtZ2Vd8ZqIZRqmlykRCagTgPejt1i. + shell: /bin/bash + sudo: ALL=(ALL) NOPASSWD:ALL + chpasswd: {expire: False} + lock_passwd: false + ssh_authorized_keys: + - ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFzMcx+aKT7jfkaeQrDdsKfeuSqX/4bqR4Z6IaDsiAFI user@default + disk_setup: + /dev/sdc: + table_type: mbr + layout: + - 100%: ext4 + overwrite: true + fs_setup: + - label: nfs_shared + filesystem: ext4 + device: /dev/sdc + replace_fs: true + mounts: + - [LABEL=nfs_shared, /srv/nfs/shared, ext4, "defaults,nofail", "0", "0"] + runcmd: + # 1. Ensure the directory exists + - mkdir -p /srv/nfs/shared + # 2. Wait for the disk to be mounted (up to 60 seconds) + - | + for i in $(seq 1 60); do + mountpoint -q /srv/nfs/shared && break || sleep 1 + done + if [ $? -ne 0 ]; then + echo "Disk not mounted after 60 seconds" >&2 + exit 1 + fi + # 3. Ensure rpc.statd can work by creating its required directory + - mkdir -p /var/lib/nfs/statd + - chmod 755 /var/lib/nfs/statd + # 4. Configure NFS exports + - echo "/srv/nfs/shared *(rw,fsid=0,async,no_subtree_check,no_auth_nlm,insecure,no_root_squash)" > /etc/exports + # 5. Forcefully restart rpcbind and related services + - rc-service rpcbind restart + - rc-service rpc.statd restart || echo "Failed to start rpc.statd" >&2 + # 6. Start NFS service + - rc-service nfs restart + # 7. Add all services to autostart + - rc-update add rpcbind + - rc-update add rpc.statd + - rc-update add nfs + - rc-update add qemu-guest-agent + - rc-service qemu-guest-agent start + # 8. Apply exports forcefully + - exportfs -arv + # 9. Check service status + - rc-status + - exportfs -v + final_message: "\U0001F525\U0001F525\U0001F525 The system is finally up, after $UPTIME seconds \U0001F525\U0001F525\U0001F525" + runPolicy: AlwaysOn + virtualMachineClassName: {{ include "infra.vmclass-name" . }} +{{ end }} diff --git a/test/dvp-static-cluster/storage/nfs/default-sc-configure.sh b/test/dvp-static-cluster/storage/nfs/default-sc-configure.sh new file mode 100644 index 0000000000..0d713a721a --- /dev/null +++ b/test/dvp-static-cluster/storage/nfs/default-sc-configure.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +DEFAULT_STORAGE_CLASS=nfs +kubectl patch mc global --type='json' -p='[{"op": "replace", "path": "/spec/settings/defaultClusterStorageClass", "value": "'"$DEFAULT_STORAGE_CLASS"'"}]' \ No newline at end of file diff --git a/test/dvp-static-cluster/storage/nfs/mc.yaml b/test/dvp-static-cluster/storage/nfs/mc.yaml new file mode 100644 index 0000000000..9089552dbb --- /dev/null +++ b/test/dvp-static-cluster/storage/nfs/mc.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: snapshot-controller +spec: + enabled: true + version: 1 +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: csi-nfs +spec: + enabled: true + source: deckhouse + version: 1 \ No newline at end of file diff --git a/test/dvp-static-cluster/storage/nfs/storageclass.yaml b/test/dvp-static-cluster/storage/nfs/storageclass.yaml new file mode 100644 index 0000000000..9c403bc7f7 --- /dev/null +++ b/test/dvp-static-cluster/storage/nfs/storageclass.yaml @@ -0,0 +1,31 @@ +apiVersion: storage.deckhouse.io/v1alpha1 +kind: NFSStorageClass +metadata: + name: nfs +spec: + connection: + host: nfs-server.${NAMESPACE}.svc.cluster.local + share: / + nfsVersion: "4.1" + mountOptions: + mountMode: hard + timeout: 60 + retransmissions: 3 + reclaimPolicy: Delete + volumeBindingMode: Immediate +--- +apiVersion: storage.deckhouse.io/v1alpha1 +kind: NFSStorageClass +metadata: + name: nfs-2 +spec: + connection: + host: nfs-server.${NAMESPACE}.svc.cluster.local + share: / + nfsVersion: "4.1" + mountOptions: + mountMode: hard + timeout: 60 + retransmissions: 3 + reclaimPolicy: Delete + volumeBindingMode: Immediate \ No newline at end of file