From 85d57fc839c56ed8c370c898571eaedabce7c32b Mon Sep 17 00:00:00 2001 From: Asaf Yehezkel Date: Thu, 25 Jun 2026 15:53:21 +0300 Subject: [PATCH] Propagate proxy env to engine PUSH jobs (pippin builder) The pippin image-dependencies-builder init container runs its own dind daemon, which does not inherit the k3d node's containerd proxy/mirror config. Behind a corporate proxy this makes it fail to pull the engine-generic base image from public.ecr.aws (DNS + egress). Capture HTTP(S)_PROXY/NO_PROXY from the installer's own environment and inject them into the engine job template's init container, augmenting NO_PROXY with in-cluster targets (Zot registry, MinIO, cluster CIDRs, .svc/.cluster.local) so local traffic bypasses the proxy. No-op when no proxy is configured. node-server's setEnvParams merges by name and never rebuilds the env list, so these chart-declared vars are preserved. Co-Authored-By: Claude Opus 4.8 --- charts/tensorleap/Chart.yaml | 2 +- charts/tensorleap/charts/engine/Chart.yaml | 2 +- .../templates/engine-job-template-cm.yaml | 18 ++++++++ charts/tensorleap/charts/engine/values.yaml | 6 +++ pkg/helm/utils.go | 4 ++ pkg/helm/utils_test.go | 3 ++ pkg/server/installation_params.go | 45 +++++++++++++++++++ pkg/server/installation_params_test.go | 31 +++++++++++++ 8 files changed, 109 insertions(+), 2 deletions(-) diff --git a/charts/tensorleap/Chart.yaml b/charts/tensorleap/Chart.yaml index 522c5e0c6..c695124c6 100644 --- a/charts/tensorleap/Chart.yaml +++ b/charts/tensorleap/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: tensorleap type: application -version: 1.6.34 +version: 1.6.35 dependencies: - name: ingress-nginx version: 4.10.0 diff --git a/charts/tensorleap/charts/engine/Chart.yaml b/charts/tensorleap/charts/engine/Chart.yaml index 9251ae14d..3ccebc402 100644 --- a/charts/tensorleap/charts/engine/Chart.yaml +++ b/charts/tensorleap/charts/engine/Chart.yaml @@ -1,4 +1,4 @@ apiVersion: v2 name: tensorleap-engine type: application -version: 1.0.614 +version: 1.0.615 diff --git a/charts/tensorleap/charts/engine/templates/engine-job-template-cm.yaml b/charts/tensorleap/charts/engine/templates/engine-job-template-cm.yaml index fe0db53a3..065e37909 100644 --- a/charts/tensorleap/charts/engine/templates/engine-job-template-cm.yaml +++ b/charts/tensorleap/charts/engine/templates/engine-job-template-cm.yaml @@ -39,6 +39,24 @@ data: value: /shared/logs - name: IS_K3D value: "true" + {{- if .Values.http_proxy }} + - name: HTTP_PROXY + value: {{ .Values.http_proxy | quote }} + - name: http_proxy + value: {{ .Values.http_proxy | quote }} + {{- end }} + {{- if .Values.https_proxy }} + - name: HTTPS_PROXY + value: {{ .Values.https_proxy | quote }} + - name: https_proxy + value: {{ .Values.https_proxy | quote }} + {{- end }} + {{- if .Values.no_proxy }} + - name: NO_PROXY + value: {{ .Values.no_proxy | quote }} + - name: no_proxy + value: {{ .Values.no_proxy | quote }} + {{- end }} volumeMounts: - name: shared-logs mountPath: /shared/logs diff --git a/charts/tensorleap/charts/engine/values.yaml b/charts/tensorleap/charts/engine/values.yaml index db03bb954..21e2f74ce 100644 --- a/charts/tensorleap/charts/engine/values.yaml +++ b/charts/tensorleap/charts/engine/values.yaml @@ -11,6 +11,12 @@ target_repo: tensorleap-registry:5000 generic_calculator_image: "public.ecr.aws/tensorleap/engine-generic" generic_py_ver: py38 #select from: py38, py39, py310 +# Outbound proxy for engine PUSH jobs (the pippin image-dependencies-builder dind). +# Set by the installer from its own HTTP(S)_PROXY/NO_PROXY env; empty = no proxy. +http_proxy: "" +https_proxy: "" +no_proxy: "" + localDataDirectories: [] gpu: false gpuTolerations: [] diff --git a/pkg/helm/utils.go b/pkg/helm/utils.go index a7673799a..3f81f595b 100644 --- a/pkg/helm/utils.go +++ b/pkg/helm/utils.go @@ -37,6 +37,7 @@ type ServerHelmValuesParams struct { Tls TLSParams `json:"tls"` HostName string `json:"hostname"` DatadogEnv map[string]string `json:"datadogEnv"` + ProxyEnv map[string]string `json:"proxyEnv"` KeycloakEnabled bool `json:"keycloakEnabled"` DisableAuth bool `json:"disableAuth"` InstalledServerVersion string `json:"installedServerVersion"` @@ -255,6 +256,9 @@ func CreateTensorleapChartValues(params *ServerHelmValuesParams) (Record, error) "tensorleap-engine": Record{ "gpu": params.Gpu, "localDataDirectories": params.LocalDataDirectories, + "http_proxy": params.ProxyEnv["http_proxy"], + "https_proxy": params.ProxyEnv["https_proxy"], + "no_proxy": params.ProxyEnv["no_proxy"], }, "tensorleap-node-server": Record{ "enableKeycloak": params.KeycloakEnabled, diff --git a/pkg/helm/utils_test.go b/pkg/helm/utils_test.go index abd87b362..78604245a 100644 --- a/pkg/helm/utils_test.go +++ b/pkg/helm/utils_test.go @@ -27,6 +27,9 @@ func TestCreateTensorleapChartValues(t *testing.T) { "tensorleap-engine": Record{ "gpu": params.Gpu, "localDataDirectories": params.LocalDataDirectories, + "http_proxy": "", + "https_proxy": "", + "no_proxy": "", }, "tensorleap-node-server": Record{ "enableKeycloak": params.KeycloakEnabled, diff --git a/pkg/server/installation_params.go b/pkg/server/installation_params.go index 00909b4d6..d14db219e 100644 --- a/pkg/server/installation_params.go +++ b/pkg/server/installation_params.go @@ -838,6 +838,8 @@ func (params *InstallationParams) GetServerHelmValuesParams(versionTag string) * datadogEnvs := params.GetDatadogEnvs() + proxyEnvs := params.GetEngineProxyEnv() + localBucketPath := path.Join(local.GetServerDataDir(), local.STORAGE_DIR_NAME, "minio", "session") return &helm.ServerHelmValuesParams{ @@ -850,6 +852,7 @@ func (params *InstallationParams) GetServerHelmValuesParams(versionTag string) * ProxyUrl: params.ProxyUrl, Tls: *tlsParams, DatadogEnv: datadogEnvs, + ProxyEnv: proxyEnvs, KeycloakEnabled: !params.DisabledAuth, DisableAuth: params.DisabledAuth, InstalledServerVersion: versionTag, @@ -876,6 +879,48 @@ func (params *InstallationParams) GetDatadogEnvs() map[string]string { return data } +// engineJobNoProxyEntries are appended to the user's NO_PROXY so that engine PUSH +// jobs reach in-cluster services (Zot registry, MinIO, k8s API/DNS) directly +// instead of routing them through the corporate proxy. +var engineJobNoProxyEntries = []string{ + "tensorleap-registry", + "tensorleap-minio", + "localhost", + "127.0.0.1", + ".svc", + ".svc.cluster.local", + ".cluster.local", + "10.42.0.0/16", + "10.43.0.0/16", +} + +// GetEngineProxyEnv returns the outbound-proxy env to inject into engine PUSH jobs +// (notably the pippin image-dependencies-builder dind, which runs its own daemon and +// does not inherit the node's containerd proxy/mirror config). Values are captured +// from the installer's own environment; returns nil when no proxy is configured. +func (params *InstallationParams) GetEngineProxyEnv() map[string]string { + httpProxy := lookupFirstEnv("HTTP_PROXY", "http_proxy") + httpsProxy := lookupFirstEnv("HTTPS_PROXY", "https_proxy") + if httpProxy == "" && httpsProxy == "" { + return nil + } + noProxy := lookupFirstEnv("NO_PROXY", "no_proxy") + return map[string]string{ + "http_proxy": httpProxy, + "https_proxy": httpsProxy, + "no_proxy": k3d.AddToNoProxy(noProxy, engineJobNoProxyEntries), + } +} + +func lookupFirstEnv(keys ...string) string { + for _, key := range keys { + if value, ok := os.LookupEnv(key); ok && value != "" { + return value + } + } + return "" +} + func (params *InstallationParams) GetInfraHelmValuesParams(syncRegistries []helm.ZotSyncRegistry, registryImage string) *helm.InfraHelmValuesParams { nvidiaGpuVisibleDevices := "" diff --git a/pkg/server/installation_params_test.go b/pkg/server/installation_params_test.go index 1012e2b61..9e061ad86 100644 --- a/pkg/server/installation_params_test.go +++ b/pkg/server/installation_params_test.go @@ -91,3 +91,34 @@ func TestGetServerHelmValuesParams(t *testing.T) { assert.False(t, helmParams.KeycloakEnabled, "Keycloak should be disabled when DisabledAuth is true") }) } + +func TestGetEngineProxyEnv(t *testing.T) { + params := &InstallationParams{} + + t.Run("returns nil when no proxy is set", func(t *testing.T) { + t.Setenv("HTTP_PROXY", "") + t.Setenv("http_proxy", "") + t.Setenv("HTTPS_PROXY", "") + t.Setenv("https_proxy", "") + assert.Nil(t, params.GetEngineProxyEnv()) + }) + + t.Run("captures proxy and augments no_proxy with in-cluster entries", func(t *testing.T) { + t.Setenv("HTTPS_PROXY", "http://proxy:3128") + t.Setenv("NO_PROXY", ".renault.fr") + + env := params.GetEngineProxyEnv() + assert.Equal(t, "http://proxy:3128", env["https_proxy"]) + assert.Contains(t, env["no_proxy"], ".renault.fr") + assert.Contains(t, env["no_proxy"], "tensorleap-registry") + assert.Contains(t, env["no_proxy"], "tensorleap-minio") + assert.Contains(t, env["no_proxy"], "10.43.0.0/16") + }) + + t.Run("prefers uppercase but falls back to lowercase", func(t *testing.T) { + t.Setenv("HTTP_PROXY", "") + t.Setenv("http_proxy", "http://lower:3128") + env := params.GetEngineProxyEnv() + assert.Equal(t, "http://lower:3128", env["http_proxy"]) + }) +}