From b132fd85a569996c3d87ff54f1226037e7e390ca Mon Sep 17 00:00:00 2001 From: Moshe Kabala Date: Mon, 15 Jun 2026 16:58:39 +0300 Subject: [PATCH] uninstall: add --custom flag to interactively pick extra data to delete A normal uninstall removes the cluster and (depending on --purge/--cleanup/ --clear-data) wipes data in coarse, all-or-nothing groups. This adds a --custom flag that performs a normal uninstall (cluster removal) and then interactively asks which extra data to delete, per item: - Application data, per store: MongoDB, MinIO, Elasticsearch, Keycloak, plus an "all application data" rollup - Install config: manifests, hostname - Cache: container image cache (containerd dir or Docker volume), Zot registry data, helm chart cache Implementation: - pkg/local: add storage/mongodb and storage/minio dir constants and a RemoveDataSubDir helper (sudo fallback) for targeted removals. - pkg/server: add CustomTarget enum and UninstallCustom, sharing the cluster-removal preamble with the existing Uninstall. Deletions are best-effort; the first error is returned so the command exits non-zero. - cmd/server: add --custom (mutually exclusive with --purge/--cleanup/ --clear-data), a survey multi-select menu, and a confirmation that lists exactly what will be deleted. Co-Authored-By: Claude Opus 4.8 (1M context) --- cmd/server/uninstall.go | 123 +++++++++++++++++++++++++++++++++++++++- pkg/local/utils.go | 18 ++++++ pkg/server/uninstall.go | 106 ++++++++++++++++++++++++++++++++-- 3 files changed, 241 insertions(+), 6 deletions(-) diff --git a/cmd/server/uninstall.go b/cmd/server/uninstall.go index 7ee38fb7f..2c17f9f5b 100644 --- a/cmd/server/uninstall.go +++ b/cmd/server/uninstall.go @@ -1,6 +1,10 @@ package server import ( + "context" + "fmt" + + "github.com/AlecAivazis/survey/v2" "github.com/spf13/cobra" "github.com/tensorleap/helm-charts/pkg/local" "github.com/tensorleap/helm-charts/pkg/log" @@ -11,12 +15,34 @@ type UninstallFlags struct { Purge bool Cleanup bool ClearData bool + Custom bool } func (flags *UninstallFlags) AddToCommand(cmd *cobra.Command) { cmd.Flags().BoolVar(&flags.Purge, "purge", false, "Remove all data and cached files") cmd.Flags().BoolVar(&flags.Cleanup, "cleanup", false, "Cleanup cached data (registry, containerd, helm-cache)") cmd.Flags().BoolVar(&flags.ClearData, "clear-data", false, "Clear application data (storage, manifests) but keep cache") + cmd.Flags().BoolVar(&flags.Custom, "custom", false, "Interactively choose exactly which extra data to delete on top of a normal uninstall") +} + +// customUninstallOption pairs a user-facing menu label with the target it maps +// to. The order here is the order shown in the prompt. +type customUninstallOption struct { + label string + target server.CustomTarget +} + +var customUninstallOptions = []customUninstallOption{ + {"Projects, datasets & job metadata (MongoDB)", server.TargetMongo}, + {"Datasets, model weights & artifacts (MinIO)", server.TargetMinio}, + {"Analyses, insights & sample data (Elasticsearch)", server.TargetElastic}, + {"User accounts & login (Keycloak)", server.TargetKeycloak}, + {"All application data (everything above)", server.TargetAllAppData}, + {"Install config — versions & params (manifests)", server.TargetManifests}, + {"Install hostname", server.TargetHostname}, + {"Container image cache (containerd)", server.TargetImageCache}, + {"In-cluster registry data (Zot)", server.TargetRegistry}, + {"Helm chart cache", server.TargetHelmCache}, } func NewUninstallCmd() *cobra.Command { @@ -24,7 +50,7 @@ func NewUninstallCmd() *cobra.Command { cmd := &cobra.Command{ Use: "uninstall", Short: "Remove local Tensorleap installation", - Long: `Remove local Tensorleap installation`, + Long: `Remove local Tensorleap installation. Use --custom to interactively pick exactly which extra data to delete.`, RunE: func(cmd *cobra.Command, args []string) error { _, err := server.InitDataDirFunc(cmd.Context(), "") if err != nil { @@ -41,10 +67,16 @@ func NewUninstallCmd() *cobra.Command { func RunUninstallCmd(cmd *cobra.Command, flags *UninstallFlags) error { log.SetCommandName("uninstall") + + if flags.Custom && (flags.Purge || flags.Cleanup || flags.ClearData) { + return fmt.Errorf("--custom cannot be combined with --purge, --cleanup, or --clear-data") + } + log.SendCloudReport("info", "Starting uninstall", "Starting", &map[string]interface{}{ "purge": flags.Purge, "cleanup": flags.Cleanup, "clearData": flags.ClearData, + "custom": flags.Custom, }) close, err := local.SetupInfra("uninstall") if err != nil { @@ -53,6 +85,11 @@ func RunUninstallCmd(cmd *cobra.Command, flags *UninstallFlags) error { defer close() ctx := cmd.Context() + + if flags.Custom { + return runCustomUninstall(ctx) + } + err = server.Uninstall(ctx, flags.Purge, flags.Cleanup, flags.ClearData) if err != nil { log.SendCloudReport("error", "Failed to uninstall", "Failed", &map[string]interface{}{"error": err.Error()}) @@ -63,6 +100,90 @@ func RunUninstallCmd(cmd *cobra.Command, flags *UninstallFlags) error { return nil } +// runCustomUninstall prompts for the extra data to delete, confirms, then runs +// the uninstall. The cluster is always removed regardless of the selection. +func runCustomUninstall(ctx context.Context) error { + targets, err := promptCustomUninstallTargets() + if err != nil { + return err + } + + if len(targets) > 0 { + confirmed, err := confirmCustomUninstall(targets) + if err != nil { + return err + } + if !confirmed { + log.Println("Uninstall cancelled") + return nil + } + } else { + log.Println("No extra data selected — performing a normal uninstall (removing the cluster only)") + } + + log.SendCloudReport("info", "Starting custom uninstall", "Running", &map[string]interface{}{"targets": targets}) + if err := server.UninstallCustom(ctx, targets); err != nil { + log.SendCloudReport("error", "Failed to uninstall", "Failed", &map[string]interface{}{"error": err.Error()}) + return err + } + + log.SendCloudReport("info", "Successfully completed uninstall", "Success", nil) + return nil +} + +func promptCustomUninstallTargets() ([]server.CustomTarget, error) { + options := make([]string, len(customUninstallOptions)) + for i, o := range customUninstallOptions { + options[i] = o.label + } + + selectedLabels := []string{} + prompt := &survey.MultiSelect{ + Message: "Select extra data to delete (space to toggle, enter to confirm). A normal uninstall removes the cluster regardless:", + Options: options, + } + if err := survey.AskOne(prompt, &selectedLabels); err != nil { + return nil, err + } + + labelToTarget := make(map[string]server.CustomTarget, len(customUninstallOptions)) + for _, o := range customUninstallOptions { + labelToTarget[o.label] = o.target + } + + targets := make([]server.CustomTarget, 0, len(selectedLabels)) + for _, l := range selectedLabels { + if t, ok := labelToTarget[l]; ok { + targets = append(targets, t) + } + } + return targets, nil +} + +func confirmCustomUninstall(targets []server.CustomTarget) (bool, error) { + selected := make(map[server.CustomTarget]bool, len(targets)) + for _, t := range targets { + selected[t] = true + } + + log.Println("The following will be permanently deleted (in addition to removing the Tensorleap cluster):") + for _, o := range customUninstallOptions { + if selected[o.target] { + log.Printf(" - %s", o.label) + } + } + + confirm := false + prompt := &survey.Confirm{ + Message: "Proceed? This cannot be undone.", + Default: false, + } + if err := survey.AskOne(prompt, &confirm); err != nil { + return false, err + } + return confirm, nil +} + func init() { RootCommand.AddCommand(NewUninstallCmd()) } diff --git a/pkg/local/utils.go b/pkg/local/utils.go index 231121b53..446a13c8f 100644 --- a/pkg/local/utils.go +++ b/pkg/local/utils.go @@ -21,6 +21,8 @@ const ( STORAGE_DIR_NAME = "storage" KEYCLOAK_DB_STORAGE_DIR_NAME = "storage/keycloak" ELASTIC_STORAGE_DIR_NAME = "storage/elasticsearch" + MONGODB_STORAGE_DIR_NAME = "storage/mongodb" + MINIO_STORAGE_DIR_NAME = "storage/minio" HOSTNAME_FILE = "hostname" MANIFEST_DIR_NAME = "manifests" INSTALLATION_PARAMS_FILE_NAME = "params.yaml" @@ -229,6 +231,22 @@ func ClearAppData() error { return nil } +// RemoveDataSubDir removes a single path (file or directory) under the server +// data dir, falling back to sudo when a permission error blocks direct removal. +// Used by the custom uninstall to delete only the items the user selected. +func RemoveDataSubDir(subDir string) error { + target := path.Join(GetServerDataDir(), subDir) + log.Infof("Removing: %s", target) + if err := os.RemoveAll(target); err != nil { + rmCmd := exec.Command("/bin/sh", "-c", fmt.Sprintf("sudo rm -rf %s", target)) + if err := rmCmd.Run(); err != nil { + log.SendCloudReport("error", "Failed removing data path", "Failed", &map[string]interface{}{"path": target, "error": err.Error()}) + return err + } + } + return nil +} + func GetInstallationManifestPath() string { return path.Join(GetServerDataDir(), MANIFEST_DIR_NAME, INSTALLATION_MANIFEST_FILE_NAME) } diff --git a/pkg/server/uninstall.go b/pkg/server/uninstall.go index fec39cf14..e06bc1049 100644 --- a/pkg/server/uninstall.go +++ b/pkg/server/uninstall.go @@ -11,16 +11,112 @@ import ( const legacySidecarRegistryName = "k3d-tensorleap-registry" -func Uninstall(ctx context.Context, purge bool, cleanup bool, clearData bool) (err error) { - err = k3d.UninstallCluster(ctx) - if err != nil { +// CustomTarget identifies a single piece of data the custom uninstall can +// delete. A normal uninstall (cluster removal) always runs regardless; these +// are the extra opt-in deletions. +type CustomTarget string + +const ( + TargetMongo CustomTarget = "mongodb" + TargetMinio CustomTarget = "minio" + TargetElastic CustomTarget = "elasticsearch" + TargetKeycloak CustomTarget = "keycloak" + TargetAllAppData CustomTarget = "all-app-data" + TargetManifests CustomTarget = "manifests" + TargetHostname CustomTarget = "hostname" + TargetImageCache CustomTarget = "image-cache" + TargetRegistry CustomTarget = "registry" + TargetHelmCache CustomTarget = "helm-cache" +) + +// removeClusterAndLegacySidecar performs the baseline uninstall shared by every +// mode: delete the k3d cluster, then best-effort remove the legacy pre-Zot +// sidecar registry container. +func removeClusterAndLegacySidecar(ctx context.Context) error { + if err := k3d.UninstallCluster(ctx); err != nil { return err } - - // Best-effort cleanup of legacy k3d sidecar registry container (pre-Zot installs) if rmErr := docker.TryRemoveContainer(ctx, legacySidecarRegistryName); rmErr != nil { log.Warnf("Failed to remove legacy registry container: %v", rmErr) } + return nil +} + +// UninstallCustom runs a normal uninstall (removing the cluster) and then +// deletes only the extra data identified by targets. Deletions are best-effort: +// a failure is logged and the rest still run; the first error is returned so the +// command exits non-zero. +func UninstallCustom(ctx context.Context, targets []CustomTarget) error { + if err := removeClusterAndLegacySidecar(ctx); err != nil { + return err + } + + selected := make(map[CustomTarget]bool, len(targets)) + for _, t := range targets { + selected[t] = true + } + + var firstErr error + remove := func(subDir string) { + if err := local.RemoveDataSubDir(subDir); err != nil { + log.Warnf("Failed to remove %s: %v", subDir, err) + if firstErr == nil { + firstErr = err + } + } + } + + // Application data. The "all" rollup supersedes the per-store selections. + if selected[TargetAllAppData] { + remove(local.STORAGE_DIR_NAME) + } else { + if selected[TargetMongo] { + remove(local.MONGODB_STORAGE_DIR_NAME) + } + if selected[TargetMinio] { + remove(local.MINIO_STORAGE_DIR_NAME) + } + if selected[TargetElastic] { + remove(local.ELASTIC_STORAGE_DIR_NAME) + } + if selected[TargetKeycloak] { + remove(local.KEYCLOAK_DB_STORAGE_DIR_NAME) + } + } + + // Installation config. + if selected[TargetManifests] { + remove(local.MANIFEST_DIR_NAME) + } + if selected[TargetHostname] { + remove(local.HOSTNAME_FILE) + } + + // Cache. The container image cache lives as a local dir or a Docker volume + // depending on the platform, so clear both forms. + if selected[TargetImageCache] { + remove(local.CONTAINERD_DIR_NAME) + if err := k3d.RemoveImageCachingVolume(ctx); err != nil { + log.Warnf("Failed to remove image caching volume: %v", err) + if firstErr == nil { + firstErr = err + } + } + } + if selected[TargetRegistry] { + remove(local.REGISTRY_DIR_NAME) + } + if selected[TargetHelmCache] { + remove(local.HELM_CACHE_DIR_NAME) + } + + return firstErr +} + +func Uninstall(ctx context.Context, purge bool, cleanup bool, clearData bool) (err error) { + if err = removeClusterAndLegacySidecar(ctx); err != nil { + return err + } if cleanup || purge { err = k3d.RemoveImageCachingVolume(ctx)