From e655f59b43bcbfa03663b1aaf40207acfb05d97b Mon Sep 17 00:00:00 2001
From: joeyczheng <joeyczheng@tencent.com>
Date: Tue, 2 Jun 2026 15:02:22 +0800
Subject: [PATCH 1/2] feat(CubeAPI): support e2b V3 template-build protocol
 end-to-end

Add V3 routes: POST /v3/templates, GET .../files/{hash}, POST /v2/.../builds/{bid}, GET .../status (handlers/templates_v3.rs)

Signed-off-by: joeyczheng <joeyczheng@tencent.com>
---
 CubeAPI/Cargo.lock                            |    7 +
 CubeAPI/Cargo.toml                            |    2 +-
 CubeAPI/src/config/mod.rs                     |   59 +
 CubeAPI/src/constants.rs                      |    4 +
 CubeAPI/src/handlers/mod.rs                   |    2 +
 CubeAPI/src/handlers/registry.rs              |  240 +++
 CubeAPI/src/handlers/templates.rs             |   26 +-
 CubeAPI/src/handlers/templates_v3.rs          |   70 +
 CubeAPI/src/main.rs                           |   51 +
 CubeAPI/src/models/mod.rs                     |  273 +++-
 CubeAPI/src/routes.rs                         |   74 +-
 CubeAPI/src/services/builds.rs                |  189 +++
 CubeAPI/src/services/mod.rs                   |   12 +-
 CubeAPI/src/services/sandboxes.rs             |  136 +-
 CubeAPI/src/services/templates.rs             | 1447 +++++++++++++++--
 deploy/one-click/scripts/one-click/up.sh      |    2 +-
 docs/.vitepress/config.mjs                    |    2 +
 docs/guide/tutorials/template-from-e2b-sdk.md |  395 +++++
 docs/guide/tutorials/template-from-image.md   |    6 +
 .../guide/tutorials/template-from-e2b-sdk.md  |  396 +++++
 .../zh/guide/tutorials/template-from-image.md |    6 +
 21 files changed, 3195 insertions(+), 204 deletions(-)
 create mode 100644 CubeAPI/src/handlers/registry.rs
 create mode 100644 CubeAPI/src/handlers/templates_v3.rs
 create mode 100644 CubeAPI/src/services/builds.rs
 create mode 100644 docs/guide/tutorials/template-from-e2b-sdk.md
 create mode 100644 docs/zh/guide/tutorials/template-from-e2b-sdk.md
diff --git a/CubeAPI/Cargo.lock b/CubeAPI/Cargo.lock
index f547088be..4178201e6 100644
--- a/CubeAPI/Cargo.lock
+++ b/CubeAPI/Cargo.lock
@@ -2425,6 +2425,12 @@ dependencies = [
  "digest",
 ]
 
+[[package]]
+name = "sha1_smol"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d"
+
 [[package]]
 name = "sha2"
 version = "0.10.9"
@@ -3298,6 +3304,7 @@ dependencies = [
  "getrandom 0.4.2",
  "js-sys",
  "serde_core",
+ "sha1_smol",
  "wasm-bindgen",
 ]
 
diff --git a/CubeAPI/Cargo.toml b/CubeAPI/Cargo.toml
index aaa9c4d47..d5e52f563 100644
--- a/CubeAPI/Cargo.toml
+++ b/CubeAPI/Cargo.toml
@@ -54,7 +54,7 @@ config = "0.13"
 dotenvy = "0.15"
 
 # ── UUID ──────────────────────────────────────────────────────────────────
-uuid = { version = "1", features = ["v4", "serde"] }
+uuid = { version = "1", features = ["v4", "v5", "serde"] }
 
 # ── High-concurrency in-memory state ──────────────────────────────────────
 # Lock-free concurrent HashMap: O(1) reads without global lock
diff --git a/CubeAPI/src/config/mod.rs b/CubeAPI/src/config/mod.rs
index 07036b95e..417573283 100644
--- a/CubeAPI/src/config/mod.rs
+++ b/CubeAPI/src/config/mod.rs
@@ -64,6 +64,59 @@ pub struct ServerConfig {
     /// Example: mysql://cube:cube_pass@127.0.0.1:3306/cube_mvp
     #[serde(default = "default_database_url")]
     pub database_url: Option<String>,
+
+    /// E2B-compatible OCI registry upstream URL. When set, /v2/* requests are
+    /// reverse-proxied to this address so that `e2b template build` (which uses
+    /// `docker push`) can upload images that CubeMaster will later consume.
+    ///
+    /// Recommended deployment: run `distribution/distribution` (CNCF Registry)
+    /// as a sidecar listening on 127.0.0.1:5000 and set
+    /// CUBE_API_REGISTRY_UPSTREAM=http://127.0.0.1:5000.
+    ///
+    /// When unset, /v2/* returns 503 and `dockerfile`-based template requests
+    /// are rejected with 501.
+    #[serde(default)]
+    pub registry_upstream: Option<String>,
+
+    /// Public host (no scheme) advertised to E2B clients as the docker-push
+    /// target, e.g. "cube.example.com". Defaults to the Host header of the
+    /// originating /templates request when unset.
+    #[serde(default)]
+    pub registry_public_host: Option<String>,
+
+    /// Repository namespace prefix for uploaded build images. The full image
+    /// reference returned to CubeMaster will be:
+    ///   <registry_pull_host>/<repo_prefix>/<templateID>:<buildID>
+    /// Default: "e2b".
+    #[serde(default = "default_registry_repo_prefix")]
+    pub registry_repo_prefix: String,
+
+    /// Internal registry host CubeMaster nodes should pull from (e.g.
+    /// "10.0.0.1:5000"). Defaults to `registry_upstream` host:port when unset.
+    #[serde(default)]
+    pub registry_pull_host: Option<String>,
+
+    /// Optional shared secret printed back as `registry.password` in
+    /// POST /templates responses. Empty → "_anon".
+    #[serde(default)]
+    pub registry_token: Option<String>,
+
+    /// Default `writable_layer_size` to send to CubeMaster when the client
+    /// (e.g. the E2B Python SDK) does not specify one. CubeMaster validates
+    /// this field as required, so a non-empty default is needed for the V3
+    /// flow to work out of the box.
+    ///
+    /// Env var: CUBE_API_DEFAULT_WRITABLE_LAYER_SIZE  |  Default: "1G".
+    #[serde(default = "default_writable_layer_size")]
+    pub default_writable_layer_size: String,
+}
+
+fn default_registry_repo_prefix() -> String {
+    "e2b".to_string()
+}
+
+fn default_writable_layer_size() -> String {
+    std::env::var("CUBE_API_DEFAULT_WRITABLE_LAYER_SIZE").unwrap_or_else(|_| "1G".to_string())
 }
 
 fn default_bind() -> String {
@@ -142,6 +195,12 @@ impl Default for ServerConfig {
             log_prefix: default_log_prefix(),
             auth_callback_url: None,
             database_url: default_database_url(),
+            registry_upstream: None,
+            registry_public_host: None,
+            registry_repo_prefix: default_registry_repo_prefix(),
+            registry_pull_host: None,
+            registry_token: None,
+            default_writable_layer_size: default_writable_layer_size(),
         }
     }
 }
diff --git a/CubeAPI/src/constants.rs b/CubeAPI/src/constants.rs
index 2667875d7..199bc13b9 100644
--- a/CubeAPI/src/constants.rs
+++ b/CubeAPI/src/constants.rs
@@ -6,3 +6,7 @@
 
 /// Reported `envdVersion` for sandbox APIs (create, connect, list, get, resume, etc.).
 pub const ENVD_VERSION: &str = "0.2.0";
+
+/// E2B `envd` listens on this port inside every sandbox.
+pub const ENVD_PORT: u32 = 49983;
+pub const ENVD_PORT_STR: &str = "49983";
diff --git a/CubeAPI/src/handlers/mod.rs b/CubeAPI/src/handlers/mod.rs
index 1c92c91db..d5281077e 100644
--- a/CubeAPI/src/handlers/mod.rs
+++ b/CubeAPI/src/handlers/mod.rs
@@ -6,7 +6,9 @@ pub mod agenthub;
 pub mod cluster;
 pub mod config;
 pub mod health;
+pub mod registry;
 pub mod sandboxes;
 pub mod snapshots;
 pub mod store;
 pub mod templates;
+pub mod templates_v3;
diff --git a/CubeAPI/src/handlers/registry.rs b/CubeAPI/src/handlers/registry.rs
new file mode 100644
index 000000000..2d751b795
--- /dev/null
+++ b/CubeAPI/src/handlers/registry.rs
@@ -0,0 +1,240 @@
+// Copyright (c) 2024 Tencent Inc.
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+use axum::{
+    body::{Body, Bytes},
+    extract::{Path, Request, State},
+    http::{header, HeaderMap, HeaderName, HeaderValue, Method, StatusCode},
+    response::Response,
+};
+use std::str::FromStr;
+
+use crate::{
+    error::{AppError, AppResult},
+    state::AppState,
+};
+
+/// Headers that must NOT be propagated end-to-end.
+const HOP_BY_HOP: &[&str] = &[
+    "connection",
+    "keep-alive",
+    "proxy-authenticate",
+    "proxy-authorization",
+    "te",
+    "trailer",
+    "transfer-encoding",
+    "upgrade",
+    "host",
+];
+
+/// `GET /v2/` — registry ping. Always returns `200 OK` with the version header
+/// when an upstream is configured.
+pub async fn ping(State(state): State<AppState>) -> AppResult<Response> {
+    let upstream = state
+        .config
+        .registry_upstream
+        .as_deref()
+        .filter(|s| !s.is_empty())
+        .ok_or_else(registry_disabled)?;
+
+    forward(&state, Method::GET, upstream, "/v2/", "", &HeaderMap::new(), Bytes::new()).await
+}
+
+/// `ANY /v2/*path` — generic reverse-proxy.
+pub async fn proxy(
+    State(state): State<AppState>,
+    Path(path): Path<String>,
+    request: Request,
+) -> AppResult<Response> {
+    let upstream = state
+        .config
+        .registry_upstream
+        .as_deref()
+        .filter(|s| !s.is_empty())
+        .ok_or_else(registry_disabled)?
+        .to_string();
+
+    let method = request.method().clone();
+    let query = request.uri().query().unwrap_or("").to_string();
+    let headers = request.headers().clone();
+    let body = match axum::body::to_bytes(request.into_body(), 512 * 1024 * 1024).await {
+        Ok(b) => b,
+        Err(e) => {
+            return Err(AppError::BadRequest(format!(
+                "failed to read /v2/* request body: {}",
+                e
+            )))
+        }
+    };
+
+    let normalized = normalize_subpath(&path);
+    let response = forward(&state, method.clone(), &upstream, &normalized, &query, &headers, body)
+        .await?;
+
+    // After a successful manifest PUT we mark the build as image-pushed so
+    // that the orchestrator stage proceeds.
+    if method == Method::PUT && response.status().is_success() {
+        if let Some(parsed) = parse_manifest_path(&normalized) {
+            // tag carries either the buildID (preferred) or a digest. Pull the
+            // build context by tag first, then fall back to no-op.
+            if !parsed.tag.starts_with("sha256:") {
+                tracing::info!(
+                    repo = %parsed.repo,
+                    tag = %parsed.tag,
+                    "manifest pushed; marking build as image-pushed"
+                );
+                state.services.templates.mark_image_pushed(&parsed.tag);
+            }
+        }
+    }
+
+    Ok(response)
+}
+
+async fn forward(
+    state: &AppState,
+    method: Method,
+    upstream: &str,
+    path: &str,
+    query: &str,
+    in_headers: &HeaderMap,
+    body: Bytes,
+) -> AppResult<Response> {
+    let upstream = upstream.trim_end_matches('/');
+    let path = if path.starts_with('/') {
+        path.to_string()
+    } else {
+        format!("/{}", path)
+    };
+    let url = if query.is_empty() {
+        format!("{}{}", upstream, path)
+    } else {
+        format!("{}{}?{}", upstream, path, query)
+    };
+
+    let mut req = state.http_client.request(method, &url);
+
+    for (name, value) in in_headers {
+        let key = name.as_str().to_ascii_lowercase();
+        if HOP_BY_HOP.contains(&key.as_str()) {
+            continue;
+        }
+        req = req.header(name.clone(), value.clone());
+    }
+
+    if !body.is_empty() {
+        req = req.body(body.to_vec());
+    }
+
+    let upstream_resp = req.send().await.map_err(|e| {
+        tracing::error!(error = %e, url = %url, "registry upstream request failed");
+        AppError::Internal(anyhow::anyhow!("registry upstream unreachable: {}", e))
+    })?;
+
+    let status = upstream_resp.status();
+    let mut headers = HeaderMap::new();
+    for (name, value) in upstream_resp.headers() {
+        let key = name.as_str().to_ascii_lowercase();
+        if HOP_BY_HOP.contains(&key.as_str()) || key == "content-length" {
+            continue;
+        }
+        if let (Ok(name), Ok(value)) = (
+            HeaderName::from_str(name.as_str()),
+            HeaderValue::from_bytes(value.as_bytes()),
+        ) {
+            headers.insert(name, value);
+        }
+    }
+
+    let body_bytes = upstream_resp
+        .bytes()
+        .await
+        .map_err(|e| AppError::Internal(anyhow::anyhow!("registry response read failed: {}", e)))?;
+
+    let mut response = Response::builder()
+        .status(StatusCode::from_u16(status.as_u16()).unwrap_or(StatusCode::BAD_GATEWAY))
+        .body(Body::from(body_bytes))
+        .map_err(|e| AppError::Internal(anyhow::anyhow!("response build failed: {}", e)))?;
+
+    *response.headers_mut() = headers;
+    response
+        .headers_mut()
+        .entry(header::HeaderName::from_static("docker-distribution-api-version"))
+        .or_insert(HeaderValue::from_static("registry/2.0"));
+
+    Ok(response)
+}
+
+fn registry_disabled() -> AppError {
+    AppError::NotImplemented(
+        "registry upstream is not configured: set CUBE_API_REGISTRY_UPSTREAM \
+         to enable the bundled OCI registry"
+            .to_string(),
+    )
+}
+
+fn normalize_subpath(path: &str) -> String {
+    if path.starts_with("/v2") {
+        path.to_string()
+    } else if path.starts_with("v2/") {
+        format!("/{}", path)
+    } else {
+        format!("/v2/{}", path.trim_start_matches('/'))
+    }
+}
+
+#[derive(Debug)]
+struct ManifestPath {
+    repo: String,
+    tag: String,
+}
+
+/// Parse `/v2/<repo>/manifests/<tag>` (where `<repo>` may itself contain
+/// slashes). Returns `None` for blob / upload / catalog endpoints.
+fn parse_manifest_path(path: &str) -> Option<ManifestPath> {
+    let stripped = path.strip_prefix("/v2/")?;
+    let idx = stripped.rfind("/manifests/")?;
+    let repo = &stripped[..idx];
+    let tag = &stripped[idx + "/manifests/".len()..];
+    if repo.is_empty() || tag.is_empty() {
+        return None;
+    }
+    Some(ManifestPath {
+        repo: repo.to_string(),
+        tag: tag.to_string(),
+    })
+}
+
+impl ManifestPath {
+    #[allow(dead_code)]
+    fn rebuild(&self) -> String {
+        format!("/v2/{}/manifests/{}", self.repo, self.tag)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_manifest_path_accepts_namespaced_repo() {
+        let p = parse_manifest_path("/v2/e2b/tpl-abc/manifests/bld-001").unwrap();
+        assert_eq!(p.repo, "e2b/tpl-abc");
+        assert_eq!(p.tag, "bld-001");
+    }
+
+    #[test]
+    fn parse_manifest_path_rejects_blob_paths() {
+        assert!(parse_manifest_path("/v2/e2b/tpl-abc/blobs/sha256:abc").is_none());
+        assert!(parse_manifest_path("/v2/").is_none());
+    }
+
+    #[test]
+    fn normalize_subpath_handles_axum_capture_variants() {
+        assert_eq!(normalize_subpath("v2/foo/bar"), "/v2/foo/bar");
+        assert_eq!(normalize_subpath("/foo/bar"), "/v2/foo/bar");
+        assert_eq!(normalize_subpath("/v2/foo/bar"), "/v2/foo/bar");
+    }
+}
diff --git a/CubeAPI/src/handlers/templates.rs b/CubeAPI/src/handlers/templates.rs
index 861510b47..a5dbf72c3 100644
--- a/CubeAPI/src/handlers/templates.rs
+++ b/CubeAPI/src/handlers/templates.rs
@@ -146,44 +146,44 @@ pub async fn delete_template(
 
 pub async fn start_template_build(
     State(state): State<AppState>,
-    Path((template_id, _build_id)): Path<(String, String)>,
+    Path((template_id, build_id)): Path<(String, String)>,
 ) -> AppResult<impl IntoResponse> {
     let job = state
         .services
         .templates
-        .start_template_build(template_id)
+        .start_template_build(template_id, Some(build_id))
         .await?;
     Ok((StatusCode::ACCEPTED, Json(job)))
 }
 
 // ─── GET /templates/:templateID/builds/:buildID/status ────────────────────────
 
-#[derive(Debug, Deserialize)]
+#[derive(Debug, Deserialize, Default)]
 pub struct BuildStatusQuery {
-    #[serde(default)]
-    #[allow(dead_code)]
+    /// E2B SDK polls with `?logsOffset=N` to receive only the new lines
+    /// added since the last response. Snake-case alias is accepted too.
+    #[serde(rename = "logsOffset", alias = "logs_offset", default)]
     pub logs_offset: i32,
 }
 
 pub async fn get_template_build_status(
     State(state): State<AppState>,
     Path((template_id, build_id)): Path<(String, String)>,
-    Query(_params): Query<BuildStatusQuery>,
+    Query(params): Query<BuildStatusQuery>,
 ) -> AppResult<impl IntoResponse> {
     let out = state
         .services
         .templates
-        .get_template_build_status(&template_id, &build_id)
+        .get_template_build_status(&template_id, &build_id, params.logs_offset)
         .await?;
     Ok((StatusCode::OK, Json(out)))
 }
 
 // ─── GET /templates/:templateID/builds/:buildID/logs ─────────────────────────
 
-#[derive(Debug, Deserialize)]
+#[derive(Debug, Deserialize, Default)]
 pub struct BuildLogsQuery {
-    #[serde(default)]
-    #[allow(dead_code)]
+    #[serde(rename = "logsOffset", alias = "offset", alias = "logs_offset", default)]
     pub offset: i32,
     #[serde(default = "default_log_limit")]
     #[allow(dead_code)]
@@ -195,13 +195,13 @@ fn default_log_limit() -> i32 {
 
 pub async fn get_template_build_logs(
     State(state): State<AppState>,
-    Path((_template_id, build_id)): Path<(String, String)>,
-    Query(_params): Query<BuildLogsQuery>,
+    Path((template_id, build_id)): Path<(String, String)>,
+    Query(params): Query<BuildLogsQuery>,
 ) -> AppResult<impl IntoResponse> {
     let logs = state
         .services
         .templates
-        .get_template_build_logs(&build_id)
+        .get_template_build_logs(&template_id, &build_id, params.offset)
         .await?;
     Ok((StatusCode::OK, Json(logs)))
 }
diff --git a/CubeAPI/src/handlers/templates_v3.rs b/CubeAPI/src/handlers/templates_v3.rs
new file mode 100644
index 000000000..4319765b8
--- /dev/null
+++ b/CubeAPI/src/handlers/templates_v3.rs
@@ -0,0 +1,70 @@
+// Copyright (c) 2026 Tencent Inc.
+// SPDX-License-Identifier: Apache-2.0
+//
+
+use axum::{
+    extract::{Path, Query, State},
+    http::StatusCode,
+    response::IntoResponse,
+    Json,
+};
+
+use crate::{
+    error::AppResult,
+    models::{
+        V2TemplateBuildStart, V3BuildStatusQuery, V3TemplateBuildRequest,
+    },
+    state::AppState,
+};
+
+/// `POST /v3/templates` — register template + first build attempt.
+pub async fn v3_create_template(
+    State(state): State<AppState>,
+    Json(body): Json<V3TemplateBuildRequest>,
+) -> AppResult<impl IntoResponse> {
+    let resp = state.services.templates.v3_create_template(body)?;
+    Ok((StatusCode::ACCEPTED, Json(resp)))
+}
+
+/// `GET /templates/{templateID}/files/{hash}` — file-cache probe used by the
+/// SDK before uploading build context tarballs. We always answer
+/// `present=true` because the current CubeMaster pipeline only consumes
+/// `from_image` references (no Dockerfile-from-context build yet).
+pub async fn v3_get_files_hash(
+    State(state): State<AppState>,
+    Path((template_id, hash)): Path<(String, String)>,
+) -> AppResult<impl IntoResponse> {
+    let resp = state
+        .services
+        .templates
+        .v3_get_file_upload(&template_id, &hash)?;
+    Ok((StatusCode::CREATED, Json(resp)))
+}
+
+/// `POST /v2/templates/{templateID}/builds/{buildID}` — kick off the build.
+pub async fn v2_trigger_build(
+    State(state): State<AppState>,
+    Path((template_id, build_id)): Path<(String, String)>,
+    Json(body): Json<V2TemplateBuildStart>,
+) -> AppResult<impl IntoResponse> {
+    state
+        .services
+        .templates
+        .v3_trigger_build(template_id, build_id, body)
+        .await?;
+    Ok(StatusCode::ACCEPTED)
+}
+
+/// `GET /templates/{templateID}/builds/{buildID}/status?logsOffset=N&limit=M`
+pub async fn v3_get_build_status(
+    State(state): State<AppState>,
+    Path((template_id, build_id)): Path<(String, String)>,
+    Query(params): Query<V3BuildStatusQuery>,
+) -> AppResult<impl IntoResponse> {
+    let info = state
+        .services
+        .templates
+        .v3_get_build_status(&template_id, &build_id, params.logs_offset, params.limit)
+        .await?;
+    Ok((StatusCode::OK, Json(info)))
+}
diff --git a/CubeAPI/src/main.rs b/CubeAPI/src/main.rs
index a2f6fca9b..0392eba69 100644
--- a/CubeAPI/src/main.rs
+++ b/CubeAPI/src/main.rs
@@ -116,6 +116,39 @@ struct Cli {
     #[arg(long, value_name = "DOMAIN")]
     sandbox_domain: Option<String>,
 
+    /// Upstream OCI registry URL used for `e2b template build` image push
+    /// (default: unset). When unset, /v2/* returns 503 and dockerfile-based
+    /// requests fail with 501.
+    ///
+    /// Overrides the CUBE_API_REGISTRY_UPSTREAM environment variable.
+    #[arg(long, value_name = "URL")]
+    registry_upstream: Option<String>,
+
+    /// Public host advertised to E2B clients for docker push (no scheme).
+    /// Default: the request Host header at template-create time.
+    #[arg(long, value_name = "HOST")]
+    registry_public_host: Option<String>,
+
+    /// Repo prefix applied to pushed build images (default: "e2b").
+    #[arg(long, value_name = "PREFIX")]
+    registry_repo_prefix: Option<String>,
+
+    /// Internal registry host CubeMaster nodes pull from (e.g.
+    /// "10.0.0.1:5000"). Defaults to upstream registry host:port.
+    #[arg(long, value_name = "HOST")]
+    registry_pull_host: Option<String>,
+
+    /// Shared password returned to E2B clients as registry.password.
+    #[arg(long, value_name = "TOKEN")]
+    registry_token: Option<String>,
+
+    /// Default `writable_layer_size` to send to CubeMaster when the client
+    /// (e.g. E2B V3 SDK) does not provide one. Default: "1G".
+    ///
+    /// Overrides CUBE_API_DEFAULT_WRITABLE_LAYER_SIZE.
+    #[arg(long, value_name = "SIZE")]
+    default_writable_layer_size: Option<String>,
+
     /// Export the current OpenAPI spec to a YAML file and exit.
     #[arg(long, value_name = "PATH")]
     export_openapi: Option<String>,
@@ -168,6 +201,24 @@ fn main() -> anyhow::Result<()> {
     if let Some(v) = cli.sandbox_domain {
         cfg.sandbox_domain = v;
     }
+    if let Some(v) = cli.registry_upstream {
+        cfg.registry_upstream = Some(v);
+    }
+    if let Some(v) = cli.registry_public_host {
+        cfg.registry_public_host = Some(v);
+    }
+    if let Some(v) = cli.registry_repo_prefix {
+        cfg.registry_repo_prefix = v;
+    }
+    if let Some(v) = cli.registry_pull_host {
+        cfg.registry_pull_host = Some(v);
+    }
+    if let Some(v) = cli.registry_token {
+        cfg.registry_token = Some(v);
+    }
+    if let Some(v) = cli.default_writable_layer_size {
+        cfg.default_writable_layer_size = v;
+    }
 
     // ── Tracing (stdout) ───────────────────────────────────────────────────
     // RUST_LOG env var takes precedence; --debug / --log-level / config is fallback.
diff --git a/CubeAPI/src/models/mod.rs b/CubeAPI/src/models/mod.rs
index eebb38f79..bf3bef242 100644
--- a/CubeAPI/src/models/mod.rs
+++ b/CubeAPI/src/models/mod.rs
@@ -466,8 +466,28 @@ pub struct TemplateDetail {
 }
 
 /// Body for POST /templates (create from image).
-#[derive(Debug, Deserialize, Validate, ToSchema)]
+///
+/// Two mutually exclusive modes are supported on the same endpoint, matching
+/// both the **CubeSandbox-native** and **E2B-standard** template build flows:
+///
+/// 1. CubeSandbox-native (`image` is provided): CubeMaster will pull
+///    `image` from an external OCI registry and build the rootfs directly.
+///    All extra fields (`exposed_ports`, `cpu`, `memory`, ...) override the
+///    image defaults.
+///
+/// 2. E2B-standard (`dockerfile` is provided): the server allocates a
+///    `templateID` + `buildID`, returns a short-lived push credential, and the
+///    client (`e2b template build`) pushes the locally-built image to the
+///    bundled OCI registry. The actual rootfs build is then triggered by
+///    `POST /templates/{tid}/builds/{bid}`.
+///
+/// Field naming follows the E2B SDK conventions where they collide with
+/// CubeSandbox legacy fields (camelCase for IDs, snake_case for
+/// `start_cmd`/`ready_cmd`).
+#[derive(Debug, Deserialize, Validate, Clone, ToSchema)]
+#[allow(dead_code)]
 pub struct CreateTemplateRequest {
+    // ── Common fields (both modes) ─────────────────────────────────────────
     /// Deprecated and ignored. Template IDs are always generated server-side
     /// with the `tpl-` prefix; clients must use the returned `templateID`.
     #[serde(rename = "templateID", default)]
@@ -475,9 +495,27 @@ pub struct CreateTemplateRequest {
     pub template_id: String,
     #[serde(rename = "instanceType", default)]
     pub instance_type: Option<String>,
-    /// Container image reference, e.g. `registry.example.com/code:latest`.
-    #[validate(length(min = 1))]
-    pub image: String,
+
+    /// Optional human-readable alias (E2B field: `alias`).
+    #[serde(default)]
+    pub alias: Option<String>,
+
+    /// E2B `teamID`. Currently only logged; reserved for multi-tenant rollout.
+    #[serde(rename = "teamID", default)]
+    pub team_id: Option<String>,
+
+    /// Container image reference (CubeSandbox-native mode), e.g.
+    /// `registry.example.com/code:latest`. Mutually exclusive with `dockerfile`.
+    #[serde(default)]
+    pub image: Option<String>,
+
+    /// Inline Dockerfile content (E2B-standard mode). Currently NOT built
+    /// server-side — the client is expected to build & push the image locally
+    /// using the credentials returned by this endpoint. Stored verbatim for
+    /// future in-cluster builds.
+    #[serde(default)]
+    pub dockerfile: Option<String>,
+
     /// Writable layer size for the rootfs, e.g. "1G".
     #[serde(rename = "writableLayerSize", default)]
     pub writable_layer_size: Option<String>,
@@ -490,15 +528,32 @@ pub struct CreateTemplateRequest {
     /// HTTP probe path, e.g. "/health". Defaults to "/health" when `probePort` is set.
     #[serde(rename = "probePath", default)]
     pub probe_path: Option<String>,
-    /// CPU in millicores, e.g. 2000 means 2000m.
+
+    /// CPU in millicores (legacy CubeSandbox field).
     #[serde(default)]
     pub cpu: Option<u32>,
-    /// Memory in MiB, e.g. 2000.
+    /// Memory in MiB (legacy CubeSandbox field).
     #[serde(default)]
     pub memory: Option<u32>,
-    /// Environment variables as "KEY=VALUE" strings.
+
+    /// E2B-style integer CPU count (cores). Mapped to `cpu * 1000` millicores
+    /// when `cpu` is not supplied.
+    #[serde(rename = "cpuCount", default)]
+    pub cpu_count: Option<u32>,
+
+    /// E2B-style memory in MiB. Mapped to `memory` when the legacy field is
+    /// not supplied.
+    #[serde(rename = "memoryMB", default)]
+    pub memory_mb: Option<u32>,
+
+    /// Environment variables as "KEY=VALUE" strings (legacy CubeSandbox).
     #[serde(default)]
     pub env: Option<Vec<String>>,
+
+    /// E2B-style env-vars map. Merged into `env` when present.
+    #[serde(rename = "envVars", default)]
+    pub env_vars: Option<HashMap<String, String>>,
+
     /// Allow internet (public) access.
     #[serde(rename = "allowInternetAccess", default)]
     pub allow_internet_access: Option<bool>,
@@ -529,6 +584,16 @@ pub struct CreateTemplateRequest {
     /// Denied outbound CIDRs for CubeVS egress policy.
     #[serde(rename = "denyOut", default)]
     pub deny_out: Option<Vec<String>>,
+
+    /// E2B-style `startCmd`: shell command to execute inside the container
+    /// once the rootfs is mounted. Mapped to CubeMaster `args`.
+    #[serde(rename = "startCmd", alias = "start_cmd", default)]
+    pub start_cmd: Option<String>,
+
+    /// E2B-style `readyCmd`: shell command used as readiness probe.
+    /// Translated into a CubeMaster `Probe.Exec` when `probe_port` is empty.
+    #[serde(rename = "readyCmd", alias = "ready_cmd", default)]
+    pub ready_cmd: Option<String>,
 }
 
 /// Body for POST /templates/:id (rebuild).
@@ -539,21 +604,60 @@ pub struct RebuildTemplateRequest {
 }
 
 /// Job envelope returned by create / rebuild.
-#[derive(Debug, Serialize, ToSchema)]
+///
+/// E2B's CLI expects (besides the bare job state):
+///   - `buildID`     — opaque token that subsequent `/builds/{buildID}/...`
+///                     calls use to refer to *this* attempt.
+///   - `uploadUrl`   — URL the CLI should `docker push` to.
+///   - `registry`    — credentials matched against `Authorization` on /v2/*.
+///
+/// All of these are emitted as *Optional* so existing CubeSandbox clients,
+/// which only look at `templateID`/`status`, continue to deserialize.
+#[derive(Debug, Serialize, ToSchema, Default)]
 pub struct TemplateBuildJob {
     #[serde(rename = "jobID")]
     pub job_id: String,
     #[serde(rename = "templateID")]
     pub template_id: String,
+    /// E2B-required identifier of this build attempt. Equals `jobID` when
+    /// CubeMaster returns one; otherwise a server-side uuid.
+    #[serde(rename = "buildID")]
+    pub build_id: String,
     pub status: String,
     pub phase: String,
     pub progress: i32,
     #[serde(rename = "errorMessage", skip_serializing_if = "String::is_empty")]
     pub error_message: String,
+
+    /// E2B-style `uploadUrl`: where the CLI should push the locally-built
+    /// dockerfile image. Same as `registry.url` for convenience.
+    #[serde(rename = "uploadUrl", skip_serializing_if = "Option::is_none")]
+    pub upload_url: Option<String>,
+
+    /// Registry credentials advertised to E2B clients.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub registry: Option<RegistryCredential>,
+}
+
+/// Short-lived push credential returned alongside a new template build.
+#[derive(Debug, Serialize, Clone, ToSchema)]
+pub struct RegistryCredential {
+    /// Full base URL of the registry endpoint, e.g. `https://cube.example.com`.
+    pub url: String,
+    /// Repository the client should push to, e.g. `e2b/tpl-abc:bld-001`.
+    pub repository: String,
+    /// Username for `docker login` / Basic auth.
+    pub username: String,
+    /// Password for `docker login` / Basic auth.
+    pub password: String,
 }
 
 /// Response for GET /templates/:id/builds/:bid/status
-#[derive(Debug, Serialize, ToSchema)]
+///
+/// E2B's CLI polls this endpoint with `?logsOffset=N` and expects:
+///   - `status`        : "building" | "ready" | "error" | "uploading" | ...
+///   - `logs: string[]`: the new lines added since the previous offset.
+#[derive(Debug, Serialize, ToSchema, Default)]
 pub struct TemplateBuildStatus {
     #[serde(rename = "buildID")]
     pub build_id: String,
@@ -562,6 +666,12 @@ pub struct TemplateBuildStatus {
     pub status: String,
     pub progress: i32,
     pub message: String,
+    /// Incremental log lines starting from the offset given in the query.
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub logs: Vec<String>,
+    /// Offset to send back next round to receive only newer lines.
+    #[serde(rename = "logsOffset", skip_serializing_if = "Option::is_none")]
+    pub logs_offset: Option<i32>,
 }
 
 // ─── Cluster & Nodes ───────────────────────────────────────────────────────
@@ -717,3 +827,148 @@ pub struct VersionMatrixView {
     pub components: Vec<ComponentMatrixRowView>,
     pub nodes: Vec<NodeVersionRowView>,
 }
+
+// ─── E2B V3 protocol (real e2b SDK contract) ──────────────────────────────
+//
+// The e2b Python/JS SDK calls this trio of endpoints (camelCase JSON):
+//
+//   1. POST /v3/templates                      → register, returns
+//                                                {templateID, buildID, ...}
+//   2. GET  /templates/{tid}/files/{hash}      → resolve cache, returns
+//                                                {present, url?}
+//   3. POST /v2/templates/{tid}/builds/{bid}   → trigger build, body has
+//                                                fromImage / startCmd /
+//                                                readyCmd / steps / ...
+//   4. GET  /templates/{tid}/builds/{bid}/status?logsOffset=N&limit=M
+//                                              → poll, returns
+//                                                {buildID, templateID,
+//                                                 status, logs[], logEntries[]}
+#[derive(Debug, Deserialize, Default, ToSchema)]
+#[allow(dead_code)]
+pub struct V3TemplateBuildRequest {
+    /// New-style "name" or "name:tag". The SDK *prefers* this over `alias`.
+    #[serde(default)]
+    pub name: Option<String>,
+    /// Deprecated. Some older SDKs still send this.
+    #[serde(default)]
+    pub alias: Option<String>,
+    /// Tag list to attach to the resulting build.
+    #[serde(default)]
+    pub tags: Option<Vec<String>>,
+    /// CPU cores (whole number).
+    #[serde(rename = "cpuCount", default)]
+    pub cpu_count: Option<u32>,
+    /// Memory in MiB.
+    #[serde(rename = "memoryMB", default)]
+    pub memory_mb: Option<u32>,
+    /// Team identifier — currently only logged.
+    #[serde(rename = "teamID", default)]
+    pub team_id: Option<String>,
+}
+
+/// Response for `POST /v3/templates` — must match `TemplateRequestResponseV3`
+/// exactly: the SDK calls `from_dict` and **fails fast on missing keys**.
+#[derive(Debug, Serialize, ToSchema)]
+pub struct V3TemplateBuildResponse {
+    #[serde(rename = "templateID")]
+    pub template_id: String,
+    #[serde(rename = "buildID")]
+    pub build_id: String,
+    pub names: Vec<String>,
+    pub aliases: Vec<String>,
+    pub tags: Vec<String>,
+    pub public: bool,
+}
+
+/// Response for `GET /templates/{tid}/files/{hash}` — the SDK only checks
+/// `present`/`url` and (when `present=false`) PUTs the tarball to `url`.
+#[derive(Debug, Serialize, ToSchema)]
+pub struct V3TemplateFileUpload {
+    pub present: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub url: Option<String>,
+}
+
+/// Body for `POST /v2/templates/{tid}/builds/{bid}` — the moment the build is
+/// actually dispatched to CubeMaster.
+#[derive(Debug, Deserialize, Default, ToSchema)]
+#[allow(dead_code)]
+pub struct V2TemplateBuildStart {
+    /// Skip-cache flag.
+    #[serde(default)]
+    pub force: Option<bool>,
+    /// External base image (CubeMaster `SourceImageRef`).
+    #[serde(rename = "fromImage", default)]
+    pub from_image: Option<String>,
+    /// Optional registry credential block (AWS/GCP/General). Stored verbatim
+    /// for now; CubeMaster doesn't yet consume it.
+    #[serde(rename = "fromImageRegistry", default)]
+    pub from_image_registry: Option<serde_json::Value>,
+    /// Reuse another already-built CubeSandbox template as the base.
+    #[serde(rename = "fromTemplate", default)]
+    pub from_template: Option<String>,
+    /// E2B `readyCmd` — translated into CubeMaster `Probe.Exec`.
+    #[serde(rename = "readyCmd", default)]
+    pub ready_cmd: Option<String>,
+    /// E2B `startCmd` — translated into container `args`.
+    #[serde(rename = "startCmd", default)]
+    pub start_cmd: Option<String>,
+    /// Multi-step build instructions (RUN/COPY/ENV/...). Currently only used
+    /// for hashing & log breadcrumbs; full Dockerfile-equivalent semantics
+    /// require the in-cluster builder (Phase 4).
+    #[serde(default)]
+    pub steps: Option<Vec<serde_json::Value>>,
+}
+
+/// Response for `GET /templates/{tid}/builds/{bid}/status` — must round-trip
+/// to E2B's `TemplateBuildInfo` to satisfy the SDK's strict `from_dict`.
+#[derive(Debug, Serialize, ToSchema, Default)]
+pub struct V3TemplateBuildInfo {
+    #[serde(rename = "buildID")]
+    pub build_id: String,
+    #[serde(rename = "templateID")]
+    pub template_id: String,
+    /// One of: "waiting" | "building" | "ready" | "error".
+    pub status: String,
+    /// Plain log lines (already filtered by `logsOffset`).
+    pub logs: Vec<String>,
+    /// Structured log entries — same content with timestamps + level.
+    #[serde(rename = "logEntries")]
+    pub log_entries: Vec<V3BuildLogEntry>,
+    /// Failure reason payload (only when `status == "error"`).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reason: Option<V3BuildStatusReason>,
+}
+
+#[derive(Debug, Serialize, ToSchema)]
+pub struct V3BuildLogEntry {
+    pub timestamp: DateTime<Utc>,
+    pub message: String,
+    /// "debug" | "info" | "warn" | "error"
+    pub level: String,
+}
+
+#[derive(Debug, Serialize, ToSchema)]
+pub struct V3BuildStatusReason {
+    #[serde(rename = "stepIndex", skip_serializing_if = "Option::is_none")]
+    pub step_index: Option<i32>,
+    pub message: String,
+}
+
+/// Query string for `GET /v3` status endpoint.
+#[derive(Debug, Deserialize, Default, IntoParams)]
+#[into_params(parameter_in = Query)]
+#[allow(dead_code)]
+pub struct V3BuildStatusQuery {
+    #[serde(rename = "logsOffset", alias = "logs_offset", default)]
+    pub logs_offset: i32,
+    #[serde(default = "default_v3_log_limit")]
+    pub limit: i32,
+    #[serde(default)]
+    pub level: Option<String>,
+}
+
+fn default_v3_log_limit() -> i32 {
+    100
+}
+
diff --git a/CubeAPI/src/routes.rs b/CubeAPI/src/routes.rs
index 676343d7d..7402fc5f9 100644
--- a/CubeAPI/src/routes.rs
+++ b/CubeAPI/src/routes.rs
@@ -18,7 +18,10 @@ use tower_http::{
 };
 
 use crate::{
-    handlers::{agenthub, cluster, config, health, sandboxes, snapshots, store, templates},
+    handlers::{
+        agenthub, cluster, config, health, registry, sandboxes, snapshots, store, templates,
+        templates_v3,
+    },
     middleware::{auth::unified_auth, rate_limit::rate_limit},
     state::AppState,
 };
@@ -58,10 +61,15 @@ pub fn build_router(state: AppState) -> Router {
             ),
         SNAPSHOT_LONG_ROUTE_TIMEOUT,
     );
+    let registry_router = apply_http_layers(
+        build_registry_router(&state),
+        SNAPSHOT_LONG_ROUTE_TIMEOUT,
+    );
 
     Router::new()
         .merge(standard_router)
         .merge(snapshot_long_router)
+        .merge(registry_router)
         .with_state(state)
 }
 
@@ -170,6 +178,20 @@ fn build_template_routes(state: &AppState, auth_configured: bool) -> Router<AppS
     let routes = Router::new()
         .route("/templates", get(templates::list_templates))
         .route("/templates", post(templates::create_template))
+        // ── E2B V3 protocol (real SDK contract) ───────────────
+        // SDK calls these in order: POST /v3/templates → GET files/{hash}
+        // → POST /v2/.../builds/{bid} → GET .../status. Routes are mounted
+        // at the same level as /templates so both `/v3/...` and `/v2/...`
+        // segments are reachable.
+        .route("/v3/templates", post(templates_v3::v3_create_template))
+        .route(
+            "/templates/:templateID/files/:hash",
+            get(templates_v3::v3_get_files_hash),
+        )
+        .route(
+            "/v2/templates/:templateID/builds/:buildID",
+            post(templates_v3::v2_trigger_build),
+        )
         .route("/templates/:templateID", get(templates::get_template))
         .route("/templates/:templateID", post(templates::rebuild_template))
         .route("/templates/:templateID", patch(templates::update_template))
@@ -179,7 +201,7 @@ fn build_template_routes(state: &AppState, auth_configured: bool) -> Router<AppS
         )
         .route(
             "/templates/:templateID/builds/:buildID/status",
-            get(templates::get_template_build_status),
+            get(templates_v3::v3_get_build_status),
         )
         .route(
             "/templates/:templateID/builds/:buildID/logs",
@@ -296,6 +318,14 @@ fn build_agenthub_routes(state: &AppState, auth_configured: bool) -> Router<AppS
     with_auth(routes, state, auth_configured)
 }
 
+fn build_registry_router(_state: &AppState) -> Router<AppState> {
+    use axum::routing::{any, get};
+    Router::new()
+        .route("/v2/", get(registry::ping))
+        .route("/v2", get(registry::ping))
+        .route("/v2/*path", any(registry::proxy))
+}
+
 fn with_auth(
     routes: Router<AppState>,
     state: &AppState,
@@ -526,4 +556,42 @@ mod tests {
             resp.text(),
         );
     }
-}
+
+    /// Regression: e2b Python SDK `Template.build()` calls `POST /v3/templates`
+    /// first; prior to the V3 routes we returned 404 with an empty body, which
+    /// surfaced to users as `BuildException: 404: b''`. After the fix, the
+    /// route exists and returns the V3 envelope.
+    #[tokio::test]
+    async fn v3_template_build_routes_are_reachable() {
+        let server = test_server().await;
+
+        // POST /v3/templates → 202 with templateID/buildID/names/aliases/tags/public
+        let resp = server
+            .post("/v3/templates")
+            .json(&serde_json::json!({
+                "name": "my-tpl:dev",
+                "cpuCount": 1,
+                "memoryMB": 1024,
+            }))
+            .await;
+        resp.assert_status(StatusCode::ACCEPTED);
+        let body: serde_json::Value = resp.json();
+        assert!(body["templateID"].as_str().is_some());
+        assert!(body["buildID"].as_str().is_some());
+        assert!(body["names"].as_array().is_some());
+        assert!(body["aliases"].as_array().is_some());
+        assert_eq!(body["public"].as_bool(), Some(false));
+        // The trailing `:dev` should have been folded into tags.
+        let tags = body["tags"].as_array().expect("tags array");
+        assert!(tags.iter().any(|t| t == "dev"));
+
+        // GET /templates/{tid}/files/{hash} → 201 with present=true (cache hit)
+        let tid = body["templateID"].as_str().unwrap();
+        let r = server
+            .get(&format!("/templates/{}/files/abc123", tid))
+            .await;
+        r.assert_status(StatusCode::CREATED);
+        let fb: serde_json::Value = r.json();
+        assert_eq!(fb["present"].as_bool(), Some(true));
+    }
+}
\ No newline at end of file
diff --git a/CubeAPI/src/services/builds.rs b/CubeAPI/src/services/builds.rs
new file mode 100644
index 000000000..34ffd2d05
--- /dev/null
+++ b/CubeAPI/src/services/builds.rs
@@ -0,0 +1,189 @@
+// Copyright (c) 2026 Tencent Inc.
+// SPDX-License-Identifier: Apache-2.0
+//
+
+//! Build registry — keeps E2B-compatible per-build context in memory.
+//!
+//! When a client invokes the E2B-style `POST /templates`, we allocate a fresh
+//! `(templateID, buildID)` pair and remember:
+//!
+//!   - the create request snapshot (so `POST /templates/{tid}/builds/{bid}`
+//!     can resolve into the actual CubeMaster pipeline),
+//!   - the docker-push registry credentials we just handed back to the client,
+//!   - an append-only log buffer so the polling-based `?logsOffset=N` protocol
+//!     keeps working,
+//!   - the CubeMaster `jobID` once the build is dispatched, used by every
+//!     subsequent status / logs lookup.
+//!
+//! The store is in-memory + bounded; restart of CubeAPI invalidates inflight
+//! builds. This is acceptable for a build flow that always reaches a terminal
+//! state within minutes — durable persistence can be added later as a separate
+//! storage trait without changing the call sites.
+
+use chrono::{DateTime, Utc};
+use dashmap::DashMap;
+use std::sync::Arc;
+use uuid::Uuid;
+
+use crate::models::{CreateTemplateRequest, RegistryCredential};
+
+/// Lifecycle stage as understood by the E2B CLI.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum BuildStage {
+    /// Initial state: template has been registered, push credentials issued,
+    /// waiting for the client to upload the image.
+    WaitingPush,
+    /// Image has been pushed; CubeMaster pipeline is running.
+    Building,
+    /// Image-build pipeline finished successfully.
+    Ready,
+    /// Image-build pipeline failed.
+    Error,
+}
+
+impl BuildStage {
+    pub fn as_str(self) -> &'static str {
+        match self {
+            BuildStage::WaitingPush => "waiting",
+            BuildStage::Building => "building",
+            BuildStage::Ready => "ready",
+            BuildStage::Error => "error",
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+#[allow(dead_code)]
+pub struct BuildContext {
+    pub template_id: String,
+    pub build_id: String,
+    /// Original create request — replayed when the client calls
+    /// `POST /templates/{tid}/builds/{bid}`.
+    pub create_request: Arc<CreateTemplateRequest>,
+    /// Registry credentials issued at create time. Pull host (used by
+    /// CubeMaster) is encoded into `image_ref` so the rest of the system
+    /// stays oblivious of registry internals.
+    pub credential: RegistryCredential,
+    /// Image reference CubeMaster will pull from once the client has pushed.
+    pub image_ref: String,
+    /// CubeMaster `jobID` — empty until the build is actually dispatched.
+    pub job_id: String,
+    /// Append-only log lines (timestamps + plain message).
+    pub logs: Vec<BuildLogLine>,
+    pub stage: BuildStage,
+    pub progress: i32,
+    pub message: String,
+    pub created_at: DateTime<Utc>,
+
+    // ── V3 protocol-only metadata (populated by POST /v3/templates) ────────
+    /// Template name (E2B `name`), e.g. "my-template" or "my-template:v1".
+    pub name: String,
+    /// Tag list assigned at create time; the trailing ":tag" of `name` is
+    /// pre-pended into this list when present.
+    pub tags: Vec<String>,
+    /// CPU cores requested via E2B `cpuCount`.
+    pub cpu_count: u32,
+    /// Memory in MiB requested via E2B `memoryMB`.
+    pub memory_mb: u32,
+    /// Aliases list returned to the client (currently == [name without tag]).
+    pub aliases: Vec<String>,
+}
+
+#[derive(Debug, Clone)]
+pub struct BuildLogLine {
+    pub timestamp: DateTime<Utc>,
+    pub line: String,
+}
+
+/// Thread-safe, in-process build registry.
+#[derive(Clone, Default)]
+pub struct BuildRegistry {
+    inner: Arc<DashMap<String, BuildContext>>,
+}
+
+impl BuildRegistry {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Register a brand-new build attempt. Returns the freshly allocated
+    /// build_id alongside the stored context (cloned for read-only use by the
+    /// caller).
+    pub fn create(
+        &self,
+        template_id: String,
+        request: CreateTemplateRequest,
+        credential: RegistryCredential,
+        image_ref: String,
+    ) -> BuildContext {
+        let build_id = format!("bld-{}", Uuid::new_v4().simple());
+        let ctx = BuildContext {
+            template_id: template_id.clone(),
+            build_id: build_id.clone(),
+            create_request: Arc::new(request),
+            credential,
+            image_ref,
+            job_id: String::new(),
+            logs: Vec::new(),
+            stage: BuildStage::WaitingPush,
+            progress: 0,
+            message: "build registered, waiting for image push".to_string(),
+            created_at: Utc::now(),
+            name: String::new(),
+            tags: Vec::new(),
+            cpu_count: 0,
+            memory_mb: 0,
+            aliases: Vec::new(),
+        };
+
+        // Index under both bid and (tid, bid) so lookups by either key work.
+        self.inner.insert(build_id.clone(), ctx.clone());
+        self.inner.insert(compose_key(&template_id, &build_id), ctx.clone());
+        ctx
+    }
+
+    pub fn get(&self, build_id: &str) -> Option<BuildContext> {
+        self.inner.get(build_id).map(|r| r.value().clone())
+    }
+
+    pub fn get_by_pair(&self, template_id: &str, build_id: &str) -> Option<BuildContext> {
+        self.inner
+            .get(&compose_key(template_id, build_id))
+            .or_else(|| self.inner.get(build_id))
+            .map(|r| r.value().clone())
+    }
+
+    /// Apply a mutation to a build context. Updates both index entries.
+    pub fn update<F>(&self, build_id: &str, mutate: F) -> Option<BuildContext>
+    where
+        F: FnOnce(&mut BuildContext),
+    {
+        let mut ctx = self.inner.get(build_id).map(|r| r.value().clone())?;
+        mutate(&mut ctx);
+
+        let pair_key = compose_key(&ctx.template_id, &ctx.build_id);
+        self.inner.insert(build_id.to_string(), ctx.clone());
+        self.inner.insert(pair_key, ctx.clone());
+        Some(ctx)
+    }
+
+    /// Append one log line. Truncates the head to bound memory at ~10k lines.
+    pub fn append_log(&self, build_id: &str, line: impl Into<String>) {
+        let line = line.into();
+        self.update(build_id, |ctx| {
+            ctx.logs.push(BuildLogLine {
+                timestamp: Utc::now(),
+                line,
+            });
+            const MAX_LOGS: usize = 10_000;
+            if ctx.logs.len() > MAX_LOGS {
+                let drop = ctx.logs.len() - MAX_LOGS;
+                ctx.logs.drain(0..drop);
+            }
+        });
+    }
+}
+
+fn compose_key(template_id: &str, build_id: &str) -> String {
+    format!("{}::{}", template_id, build_id)
+}
diff --git a/CubeAPI/src/services/mod.rs b/CubeAPI/src/services/mod.rs
index 29a4edf73..43cb524a7 100644
--- a/CubeAPI/src/services/mod.rs
+++ b/CubeAPI/src/services/mod.rs
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+pub mod builds;
 pub mod cluster;
 pub mod sandboxes;
 pub mod snapshots;
@@ -15,10 +16,13 @@ pub struct AppServices {
     pub sandboxes: sandboxes::SandboxService,
     pub snapshots: snapshots::SnapshotService,
     pub templates: templates::TemplateService,
+    #[allow(dead_code)]
+    pub builds: builds::BuildRegistry,
 }
 
 impl AppServices {
     pub fn new(config: &ServerConfig, cubemaster: CubeMasterClient) -> Self {
+        let builds = builds::BuildRegistry::new();
         Self {
             cluster: cluster::ClusterService::new(cubemaster.clone()),
             sandboxes: sandboxes::SandboxService::new(
@@ -30,7 +34,13 @@ impl AppServices {
                 cubemaster.clone(),
                 config.instance_type.clone(),
             ),
-            templates: templates::TemplateService::new(cubemaster, config.instance_type.clone()),
+            templates: templates::TemplateService::new(
+                cubemaster,
+                config.instance_type.clone(),
+                builds.clone(),
+                config.clone(),
+            ),
+            builds,
         }
     }
 }
diff --git a/CubeAPI/src/services/sandboxes.rs b/CubeAPI/src/services/sandboxes.rs
index 0b56b4efb..28d69c643 100644
--- a/CubeAPI/src/services/sandboxes.rs
+++ b/CubeAPI/src/services/sandboxes.rs
@@ -7,7 +7,7 @@ use std::collections::HashMap;
 use uuid::Uuid;
 
 use crate::{
-    constants::ENVD_VERSION,
+    constants::{ENVD_PORT_STR, ENVD_VERSION},
     cubemaster::{
         datetime_from_unix_nanos, extract_template_id, CreateSandboxRequest, CubeMasterClient,
         CubeMasterError, CubeVSContext, DeleteSandboxRequest, ListSandboxRequest, SandboxInfo,
@@ -27,6 +27,17 @@ const RET_CODE_NOT_FOUND: i32 = 130404;
 const RET_CODE_CONFLICT: i32 = 130409;
 const HOSTDIR_MOUNT_KEY: &str = "host-mount";
 
+/// CubeMaster annotation key for the list of TCP ports we want cubelet to
+/// expose on the host (colon-separated, e.g. `"49983:8080"`). The port list
+/// is consumed in `CubeMaster/pkg/service/sandbox/util.go::getExposedPorts`
+/// and ends up in the per-sandbox redis metadata read by cube-proxy.
+const ANNO_EXPOSED_PORTS: &str = "com.exposed_ports";
+
+/// Optional `metadata` key that lets callers override the exposed port list
+/// for a specific sandbox (colon-separated). When absent we just publish the
+/// default envd port so the e2b SDK can connect.
+const META_EXPOSED_PORTS: &str = "exposed-ports";
+
 #[derive(Clone)]
 pub struct SandboxService {
     cubemaster: CubeMasterClient,
@@ -126,12 +137,28 @@ impl SandboxService {
             ),
         ]);
 
+        // The e2b SDK always talks to envd on port 49983 via the
+        // `<port>-<sandbox_id>.<domain>` host scheme. cube-proxy looks up the
+        // host-port mapping for that container port in redis, which is only
+        // populated when the sandbox creation request advertises it through
+        // the `com.exposed_ports` annotation. Keep this annotation in sync,
+        // but allow callers to override the list via `metadata.exposed-ports`
+        // (colon-separated) when they need to expose more ports.
         let labels = body.metadata.map(|mut meta| {
             if let Some(value) = meta.remove(HOSTDIR_MOUNT_KEY) {
                 annotations.insert(HOSTDIR_MOUNT_KEY.to_string(), value);
             }
+            if let Some(raw) = meta.remove(META_EXPOSED_PORTS) {
+                annotations.insert(
+                    ANNO_EXPOSED_PORTS.to_string(),
+                    merge_exposed_ports(&raw),
+                );
+            }
             meta
         });
+        annotations
+            .entry(ANNO_EXPOSED_PORTS.to_string())
+            .or_insert_with(|| ENVD_PORT_STR.to_string());
 
         let req = CreateSandboxRequest {
             request_id: new_request_id(),
@@ -146,15 +173,57 @@ impl SandboxService {
             cubevs_context: build_cubevs_context(body.allow_internet_access, body.network.as_ref()),
         };
 
+        tracing::info!(
+            template_id = %template_id,
+            request_id = %req.request_id,
+            instance_type = %req.instance_type,
+            exposed_ports = %req.annotations
+                .get(ANNO_EXPOSED_PORTS)
+                .cloned()
+                .unwrap_or_else(|| "<unset>".to_string()),
+            annotations = ?req.annotations,
+            "creating sandbox from template"
+        );
+
         let resp = self
             .cubemaster
             .create_sandbox(&req)
             .await
-            .map_err(internal_error)?;
+            .map_err(|e| {
+                tracing::error!(
+                    template_id = %template_id,
+                    request_id = %req.request_id,
+                    error = %e,
+                    "cubemaster create_sandbox transport failed"
+                );
+                internal_error(format!(
+                    "cubemaster transport failed (templateID={}, requestID={}): {}",
+                    template_id, req.request_id, e
+                ))
+            })?;
 
-        resp.ret.into_result().map_err(internal_error)?;
+        let sandbox_id = resp.sandbox_id.clone();
+        let resp_request_id = resp.request_id.clone();
+        if let Err(e) = resp.ret.into_result() {
+            tracing::error!(
+                template_id = %template_id,
+                request_id = %req.request_id,
+                cubemaster_error = %e,
+                "cubemaster rejected sandbox creation \
+                 — likely a microVM-level failure (rootfs mount / agent restore). \
+                 Inspect cube-agent and cubelet logs for stderr from \
+                 'do_exec_mount' / 'start_exec_process'."
+            );
+            return Err(internal_error(format!(
+                "sandbox creation failed for templateID={} (requestID={}): {} \
+                 — this happens at the microVM layer (cube-agent restore/mount); \
+                 check cube-agent / cubelet logs on the host for the underlying \
+                 mount error",
+                template_id, req.request_id, e
+            )));
+        }
 
-        Ok(self.sandbox_response(template_id, resp.sandbox_id, resp.request_id))
+        Ok(self.sandbox_response(template_id, sandbox_id, resp_request_id))
     }
 
     pub async fn kill_sandbox(&self, sandbox_id: &str) -> AppResult<()> {
@@ -470,6 +539,33 @@ fn internal_error(error: impl std::fmt::Display) -> AppError {
     AppError::Internal(anyhow::anyhow!(error.to_string()))
 }
 
+/// Merge a caller-supplied colon-separated port list with the mandatory envd
+/// port (49983), preserving order, removing duplicates and silently dropping
+/// non-numeric entries. The result is the value to set on the
+/// `com.exposed_ports` annotation that CubeMaster understands.
+fn merge_exposed_ports(raw: &str) -> String {
+    let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
+    let mut ordered: Vec<String> = Vec::new();
+    for part in raw.split(':') {
+        let trimmed = part.trim();
+        if trimmed.is_empty() {
+            continue;
+        }
+        if trimmed.parse::<u32>().is_err() {
+            // Skip silently — CubeMaster's getExposedPorts would otherwise
+            // reject the whole sandbox creation with an InvalidParam error.
+            continue;
+        }
+        if seen.insert(trimmed.to_string()) {
+            ordered.push(trimmed.to_string());
+        }
+    }
+    if seen.insert(ENVD_PORT_STR.to_string()) {
+        ordered.push(ENVD_PORT_STR.to_string());
+    }
+    ordered.join(":")
+}
+
 fn sandbox_not_found_or_internal(e: CubeMasterError, sandbox_id: &str) -> AppError {
     if e.is_not_found() {
         AppError::NotFound(format!("sandbox {} not found", sandbox_id))
@@ -635,10 +731,40 @@ pub(crate) fn build_cubevs_context(
 mod tests {
     use std::collections::HashMap;
 
-    use super::{build_cubevs_context, filter_by_metadata, from_cubemaster_info};
+    use super::{build_cubevs_context, filter_by_metadata, from_cubemaster_info, merge_exposed_ports};
     use crate::cubemaster::{ListSandboxResponse, SandboxInfo};
     use crate::models::{SandboxNetworkConfig, SandboxState};
 
+    #[test]
+    fn merge_exposed_ports_appends_envd_port_when_missing() {
+        // Caller supplied two ports but no envd port → 49983 must be appended.
+        assert_eq!(merge_exposed_ports("80:8080"), "80:8080:49983");
+    }
+
+    #[test]
+    fn merge_exposed_ports_keeps_envd_port_position_when_already_listed() {
+        // 49983 already listed first → no duplicate, original order preserved.
+        assert_eq!(merge_exposed_ports("49983:8080"), "49983:8080");
+    }
+
+    #[test]
+    fn merge_exposed_ports_skips_non_numeric_and_dedupes() {
+        // Empty segments, dupes and garbage tokens get filtered without
+        // poisoning the request (CubeMaster would otherwise reject the whole
+        // sandbox create).
+        assert_eq!(
+            merge_exposed_ports("80::80:abc:8080:49983"),
+            "80:8080:49983"
+        );
+    }
+
+    #[test]
+    fn merge_exposed_ports_handles_empty_input() {
+        // Empty caller input still publishes the envd port so cube-proxy can
+        // route the e2b SDK's `49983-<sandbox>.<domain>` host.
+        assert_eq!(merge_exposed_ports(""), "49983");
+    }
+
     #[test]
     fn metadata_filter_matches_all_pairs() {
         let metadata = HashMap::from([
diff --git a/CubeAPI/src/services/templates.rs b/CubeAPI/src/services/templates.rs
index 605769074..e65473145 100644
--- a/CubeAPI/src/services/templates.rs
+++ b/CubeAPI/src/services/templates.rs
@@ -2,9 +2,12 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+use std::collections::HashMap;
+
 use uuid::Uuid;
 
 use crate::{
+    config::ServerConfig,
     cubemaster::{
         CreateTemplateContainerOverrides, CreateTemplateCubeVSContext, CreateTemplateEnv,
         CreateTemplateFromImageReq, CreateTemplateResources, CubeMasterClient, CubeMasterError,
@@ -13,22 +16,34 @@ use crate::{
     },
     error::{AppError, AppResult},
     models::{
-        CreateTemplateRequest, RebuildTemplateRequest, TemplateBuildJob, TemplateBuildStatus,
-        TemplateDetail, TemplateSummary,
+        CreateTemplateRequest, RebuildTemplateRequest, RegistryCredential, TemplateBuildJob,
+        TemplateBuildStatus, TemplateDetail, TemplateSummary, V2TemplateBuildStart,
+        V3BuildLogEntry, V3BuildStatusReason, V3TemplateBuildInfo, V3TemplateBuildRequest,
+        V3TemplateBuildResponse,
     },
+    services::builds::{BuildRegistry, BuildStage},
 };
 
 #[derive(Clone)]
 pub struct TemplateService {
     cubemaster: CubeMasterClient,
     instance_type: String,
+    builds: BuildRegistry,
+    config: ServerConfig,
 }
 
 impl TemplateService {
-    pub fn new(cubemaster: CubeMasterClient, instance_type: String) -> Self {
+    pub fn new(
+        cubemaster: CubeMasterClient,
+        instance_type: String,
+        builds: BuildRegistry,
+        config: ServerConfig,
+    ) -> Self {
         Self {
             cubemaster,
             instance_type,
+            builds,
+            config,
         }
     }
 
@@ -74,13 +89,7 @@ impl TemplateService {
             .as_ref()
             .and_then(|v| v.get("network_type"))
             .and_then(|v| v.as_str())
-            .and_then(|s| {
-                if s.is_empty() {
-                    None
-                } else {
-                    Some(s.to_string())
-                }
-            });
+            .and_then(|s| if s.is_empty() { None } else { Some(s.to_string()) });
         let allow_internet_access = resp
             .create_request
             .as_ref()
@@ -101,45 +110,147 @@ impl TemplateService {
         })
     }
 
+    /// Create a new template.
+    ///
+    /// Two paths converge here:
+    ///
+    ///  - **CubeSandbox-native** (`image` provided): immediately dispatches to
+    ///    CubeMaster `POST /cube/template/from-image` and returns the resulting
+    ///    job. No registry credential is issued.
+    ///
+    ///  - **E2B-standard** (`dockerfile` provided, or no `image`): allocates a
+    ///    fresh `buildID`, returns the docker-push credential pointing at the
+    ///    bundled OCI registry, and *does not* trigger CubeMaster yet — the
+    ///    client must complete the docker push and then call
+    ///    `POST /templates/{tid}/builds/{bid}` to dispatch the actual rootfs
+    ///    build.
     pub async fn create_template(
         &self,
         body: CreateTemplateRequest,
     ) -> AppResult<TemplateBuildJob> {
-        if body.image.trim().is_empty() {
-            return Err(AppError::BadRequest("image is required".to_string()));
+        if body.dockerfile.is_some() || body.image.is_none() {
+            return self.create_template_e2b_mode(body).await;
         }
+        self.create_template_native_mode(body).await
+    }
 
-        let dns_servers = validate_dns_servers(body.dns.as_deref())?;
-        let container_overrides = build_template_container_overrides(&body, dns_servers.as_deref());
-        let cubevs_context = build_template_cubevs_context(&body);
-
-        let req = CreateTemplateFromImageReq {
-            request_id: new_request_id(),
-            instance_type: body
-                .instance_type
-                .unwrap_or_else(|| self.instance_type.clone()),
-            // template_id is intentionally left empty — CubeMaster always
-            // auto-generates it with the "tpl-" prefix via
-            // normalizeTemplateImageRequest.
-            template_id: String::new(),
-            source_image_ref: body.image.trim().to_string(),
-            writable_layer_size: body.writable_layer_size,
-            exposed_ports: body.exposed_ports,
-            network_type: non_empty_option(body.network_type),
-            registry_username: non_empty_option(body.registry_username),
-            registry_password: non_empty_option(body.registry_password),
-            distribution_scope: non_empty_vec(body.nodes),
-            container_overrides,
-            cubevs_context,
-        };
-
+    /// Path 1: CubeSandbox-native — `image` field carries an existing OCI
+    /// reference, dispatch directly.
+    async fn create_template_native_mode(
+        &self,
+        body: CreateTemplateRequest,
+    ) -> AppResult<TemplateBuildJob> {
+        let image = body.image.clone().unwrap_or_default();
+        if image.trim().is_empty() {
+            return Err(AppError::BadRequest("image is required".to_string()));
+        }
+        // Validate DNS servers up-front so callers see a clear error before
+        // we hand off to CubeMaster.
+        validate_dns_servers(body.dns.as_deref())?;
+        let req = self.build_cubemaster_request(&body, image.trim().to_string());
         let resp = self
             .cubemaster
             .create_template_from_image(&req)
             .await
             .map_err(map_err)?;
+        Ok(to_job(resp, None))
+    }
+
+    /// Path 2: E2B-standard — allocate `(templateID, buildID)`, return docker
+    /// push credentials. Actual build is dispatched by `start_template_build`.
+    async fn create_template_e2b_mode(
+        &self,
+        body: CreateTemplateRequest,
+    ) -> AppResult<TemplateBuildJob> {
+        let upstream = self.config.registry_upstream.as_deref().unwrap_or("");
+        if upstream.trim().is_empty() {
+            return Err(AppError::NotImplemented(
+                "registry upstream is not configured: set CUBE_API_REGISTRY_UPSTREAM \
+                 to enable e2b-style template build (dockerfile push)"
+                    .to_string(),
+            ));
+        }
+
+        // Allocate / honour template id.
+        let template_id = if body.template_id.trim().is_empty() {
+            format!("tpl-{}", Uuid::new_v4().simple())
+        } else {
+            body.template_id.trim().to_string()
+        };
 
-        Ok(to_job(resp))
+        // Decide repo + tag and the public URL the CLI should push to.
+        let repo_prefix = self.config.registry_repo_prefix.trim();
+        let repo_prefix = if repo_prefix.is_empty() {
+            "e2b"
+        } else {
+            repo_prefix
+        };
+        let public_host = self
+            .config
+            .registry_public_host
+            .clone()
+            .or_else(|| host_from_url(upstream))
+            .unwrap_or_else(|| "localhost".to_string());
+
+        let credential_url = if upstream.starts_with("https://") || upstream.starts_with("http://") {
+            // strip path, keep scheme://host:port
+            base_url(upstream).to_string()
+        } else {
+            format!("https://{}", public_host)
+        };
+
+        let credential = RegistryCredential {
+            url: credential_url,
+            repository: format!("{}/{}", repo_prefix, template_id),
+            username: "_token".to_string(),
+            password: self
+                .config
+                .registry_token
+                .clone()
+                .unwrap_or_else(|| "_anon".to_string()),
+        };
+
+        // Image ref CubeMaster will pull from once push is complete.
+        let pull_host = self
+            .config
+            .registry_pull_host
+            .clone()
+            .or_else(|| host_from_url(upstream))
+            .unwrap_or_else(|| public_host.clone());
+        let image_ref_template = format!("{}/{}/{}", pull_host, repo_prefix, template_id);
+
+        // Reserve the build context up-front; the buildID becomes the docker tag.
+        let ctx = self.builds.create(
+            template_id.clone(),
+            body,
+            credential.clone(),
+            image_ref_template.clone(),
+        );
+        let image_ref_full = format!("{}:{}", image_ref_template, ctx.build_id);
+
+        // Patch the stored ref to include the buildID-as-tag now that we know it.
+        self.builds.update(&ctx.build_id, |c| {
+            c.image_ref = image_ref_full.clone();
+        });
+        self.builds.append_log(
+            &ctx.build_id,
+            format!(
+                "[register] templateID={} buildID={} repo={}",
+                template_id, ctx.build_id, credential.repository
+            ),
+        );
+
+        Ok(TemplateBuildJob {
+            job_id: ctx.build_id.clone(),
+            template_id: template_id.clone(),
+            build_id: ctx.build_id.clone(),
+            status: "accepted".to_string(),
+            phase: "waiting".to_string(),
+            progress: 0,
+            error_message: String::new(),
+            upload_url: Some(credential.url.clone()),
+            registry: Some(credential),
+        })
     }
 
     pub async fn rebuild_template(
@@ -154,8 +265,7 @@ impl TemplateService {
         };
 
         let resp = self.cubemaster.redo_template(&req).await.map_err(map_err)?;
-
-        Ok(to_job(resp))
+        Ok(to_job(resp, None))
     }
 
     pub async fn delete_template(
@@ -179,23 +289,124 @@ impl TemplateService {
         Ok(())
     }
 
-    pub async fn start_template_build(&self, template_id: String) -> AppResult<TemplateBuildJob> {
+    /// Dispatch the CubeMaster pipeline for a previously-registered E2B
+    /// build. Falls back to a plain `redoTemplate` for builds that were not
+    /// registered through `create_template_e2b_mode` (e.g. CLI invokes
+    /// `start_template_build` directly on a CubeSandbox-native template).
+    pub async fn start_template_build(
+        &self,
+        template_id: String,
+        build_id: Option<String>,
+    ) -> AppResult<TemplateBuildJob> {
+        if let Some(bid) = build_id.as_deref() {
+            if let Some(ctx) = self.builds.get_by_pair(&template_id, bid) {
+                self.builds.append_log(
+                    bid,
+                    format!("[dispatch] image_ref={}", ctx.image_ref),
+                );
+
+                let req = self
+                    .build_cubemaster_request(&ctx.create_request, ctx.image_ref.clone());
+                let resp = self
+                    .cubemaster
+                    .create_template_from_image(&req)
+                    .await
+                    .map_err(map_err)?;
+
+                let job = resp.job.clone().unwrap_or_else(default_template_job);
+                let job_id = job.job_id.clone();
+                self.builds.update(bid, |c| {
+                    c.job_id = job_id.clone();
+                    c.stage = BuildStage::Building;
+                    c.message = "build dispatched to cubemaster".to_string();
+                });
+
+                return Ok(to_job(resp, Some(bid.to_string())));
+            }
+        }
+
+        // Fallback for legacy `redo` semantics.
         let req = RedoTemplateReq {
             request_id: new_request_id(),
             template_id,
             extra: Default::default(),
         };
-
         let resp = self.cubemaster.redo_template(&req).await.map_err(map_err)?;
-
-        Ok(to_job(resp))
+        Ok(to_job(resp, build_id))
     }
 
     pub async fn get_template_build_status(
         &self,
         template_id: &str,
         build_id: &str,
+        logs_offset: i32,
     ) -> AppResult<TemplateBuildStatus> {
+        // E2B mode: serve from the in-memory build registry, falling back to
+        // CubeMaster for the canonical job state when it's been dispatched.
+        if let Some(ctx) = self.builds.get_by_pair(template_id, build_id) {
+            let mut status = ctx.stage.as_str().to_string();
+            let mut progress = ctx.progress;
+            let mut message = ctx.message.clone();
+
+            if !ctx.job_id.is_empty() {
+                if let Ok(remote) = self
+                    .cubemaster
+                    .get_template_build_status(&ctx.job_id)
+                    .await
+                {
+                    status = remap_cubemaster_status(&remote.status);
+                    progress = remote.progress;
+                    message = remote.message.clone();
+
+                    // Persist progress / terminal state into the local registry.
+                    let new_stage = match status.as_str() {
+                        "ready" => BuildStage::Ready,
+                        "error" => BuildStage::Error,
+                        _ => BuildStage::Building,
+                    };
+                    self.builds.update(build_id, |c| {
+                        c.stage = new_stage;
+                        c.progress = progress;
+                        c.message = message.clone();
+                    });
+
+                    if !remote.message.is_empty() {
+                        self.builds.append_log(
+                            build_id,
+                            format!("[{}] {}", remote.status, remote.message),
+                        );
+                    }
+                }
+            }
+
+            // Slice logs starting at the requested offset.
+            let total = ctx.logs.len() as i32;
+            let offset = logs_offset.max(0).min(total);
+            let lines: Vec<String> = self
+                .builds
+                .get(build_id)
+                .map(|c| {
+                    c.logs
+                        .iter()
+                        .skip(offset as usize)
+                        .map(|l| format!("{} {}", l.timestamp.to_rfc3339(), l.line))
+                        .collect()
+                })
+                .unwrap_or_default();
+            let next_offset = offset + lines.len() as i32;
+
+            return Ok(TemplateBuildStatus {
+                build_id: build_id.to_string(),
+                template_id: template_id.to_string(),
+                status,
+                progress,
+                message,
+                logs: lines,
+                logs_offset: Some(next_offset),
+            });
+        }
+
+        // Legacy native mode: forward to CubeMaster directly (no log buffer).
         let resp = self
             .cubemaster
             .get_template_build_status(build_id)
@@ -205,27 +416,713 @@ impl TemplateService {
         Ok(TemplateBuildStatus {
             build_id: string_or(resp.build_id, build_id),
             template_id: string_or(resp.template_id, template_id),
-            status: resp.status,
+            status: remap_cubemaster_status(&resp.status),
             progress: resp.progress,
             message: resp.message,
+            logs: Vec::new(),
+            logs_offset: None,
+        })
+    }
+
+    pub async fn get_template_build_logs(
+        &self,
+        template_id: &str,
+        build_id: &str,
+        offset: i32,
+    ) -> AppResult<serde_json::Value> {
+        let status = self
+            .get_template_build_status(template_id, build_id, offset)
+            .await?;
+
+        Ok(serde_json::json!({
+            "buildID": status.build_id,
+            "templateID": status.template_id,
+            "status": status.status,
+            "progress": status.progress,
+            "logs": status.logs,
+            "logsOffset": status.logs_offset,
+        }))
+    }
+
+    /// Mark a build as image-pushed (called by the registry handler once the
+    /// manifest PUT for `repo:tag` succeeds). Idempotent.
+    pub fn mark_image_pushed(&self, build_id: &str) {
+        self.builds.update(build_id, |ctx| {
+            ctx.append_log_inline("[push] image upload complete");
+            if matches!(ctx.stage, BuildStage::WaitingPush) {
+                ctx.stage = BuildStage::Building;
+                ctx.message = "image uploaded, waiting for build dispatch".to_string();
+            }
+        });
+    }
+
+    /// Build a CubeMaster create-from-image request from the user's intent
+    /// (used by both create paths so behaviour stays in lockstep).
+    fn build_cubemaster_request(
+        &self,
+        body: &CreateTemplateRequest,
+        image_ref: String,
+    ) -> CreateTemplateFromImageReq {
+        let probe = build_probe(body);
+        let resources = build_resources(body);
+        let envs = merge_envs(body);
+        let command = non_empty_vec(body.command.clone());
+        let args = non_empty_vec(body.args.clone());
+        // We've already validated DNS servers up the call stack; here we just
+        // canonicalise and drop empties.
+        let dns_servers: Option<Vec<String>> = body.dns.as_ref().and_then(|servers| {
+            let cleaned: Vec<String> = servers
+                .iter()
+                .map(|s| s.trim().to_string())
+                .filter(|s| !s.is_empty())
+                .collect();
+            if cleaned.is_empty() {
+                None
+            } else {
+                Some(cleaned)
+            }
+        });
+        let dns_config = dns_servers.map(|servers| DnsConfig {
+            servers,
+            searches: Vec::new(),
+        });
+
+        let container_overrides = if probe.is_some()
+            || resources.is_some()
+            || envs.is_some()
+            || command.is_some()
+            || args.is_some()
+            || dns_config.is_some()
+        {
+            Some(CreateTemplateContainerOverrides {
+                command,
+                args,
+                probe,
+                resources,
+                envs,
+                dns_config,
+            })
+        } else {
+            None
+        };
+
+        let allow_out = body.allow_out.clone().unwrap_or_default();
+        let deny_out = body.deny_out.clone().unwrap_or_default();
+        let cubevs_context = if body.allow_internet_access.is_some()
+            || !allow_out.is_empty()
+            || !deny_out.is_empty()
+        {
+            Some(CreateTemplateCubeVSContext {
+                allow_internet_access: body.allow_internet_access,
+                allow_out,
+                deny_out,
+            })
+        } else {
+            None
+        };
+
+        CreateTemplateFromImageReq {
+            request_id: new_request_id(),
+            instance_type: body
+                .instance_type
+                .clone()
+                .unwrap_or_else(|| self.instance_type.clone()),
+            template_id: body.template_id.clone(),
+            source_image_ref: image_ref,
+            // CubeMaster validates `writable_layer_size` as required; fall back
+            // to the configured default (env CUBE_API_DEFAULT_WRITABLE_LAYER_SIZE,
+            // "1G" by default) when the caller hasn't specified one. The E2B V3
+            // SDK never sends this field, so without a default the build would
+            // fail with "writable_layer_size is required".
+            writable_layer_size: body
+                .writable_layer_size
+                .clone()
+                .filter(|s| !s.trim().is_empty())
+                .or_else(|| Some(self.config.default_writable_layer_size.clone()))
+                .filter(|s| !s.trim().is_empty()),
+            exposed_ports: body.exposed_ports.clone(),
+            network_type: non_empty_option(body.network_type.clone()),
+            registry_username: non_empty_option(body.registry_username.clone()),
+            registry_password: non_empty_option(body.registry_password.clone()),
+            distribution_scope: non_empty_vec(body.nodes.clone()),
+            container_overrides,
+            cubevs_context,
+        }
+    }
+
+    // ── V3 protocol (real e2b SDK contract) ────────────────────────────────
+
+    /// `POST /v3/templates` — register a template + build attempt.
+    ///
+    /// Returns the V3 envelope shape the SDK strictly expects. We allocate
+    /// `(templateID, buildID)` deterministically from `name` so subsequent
+    /// builds against the same name reuse the same templateID (matching E2B's
+    /// "alias is also a primary key" semantics).
+    pub fn v3_create_template(
+        &self,
+        body: V3TemplateBuildRequest,
+    ) -> AppResult<V3TemplateBuildResponse> {
+        // Resolve final name + tag list (the SDK packs "name:tag" or relies on
+        // the explicit `tags` array).
+        let raw_name = body
+            .name
+            .clone()
+            .or(body.alias.clone())
+            .filter(|s| !s.trim().is_empty())
+            .ok_or_else(|| AppError::BadRequest("template name is required".to_string()))?;
+        let (name_part, name_tag) = match raw_name.split_once(':') {
+            Some((n, t)) if !t.is_empty() => (n.to_string(), Some(t.to_string())),
+            _ => (raw_name.clone(), None),
+        };
+        let mut tags = body.tags.clone().unwrap_or_default();
+        if let Some(t) = name_tag.clone() {
+            if !tags.contains(&t) {
+                tags.insert(0, t);
+            }
+        }
+
+        let template_id = stable_template_id(&name_part);
+
+        // Build the legacy request shell so the V2 trigger step has uniform
+        // metadata regardless of whether create-time fields are sparse.
+        let create_req = CreateTemplateRequest {
+            template_id: template_id.clone(),
+            instance_type: None,
+            alias: Some(name_part.clone()),
+            team_id: body.team_id.clone(),
+            image: None,
+            dockerfile: None,
+            writable_layer_size: None,
+            exposed_ports: None,
+            probe_port: None,
+            probe_path: None,
+            cpu: None,
+            memory: None,
+            cpu_count: body.cpu_count,
+            memory_mb: body.memory_mb,
+            env: None,
+            env_vars: None,
+            allow_internet_access: None,
+            network_type: None,
+            nodes: None,
+            registry_username: None,
+            registry_password: None,
+            command: None,
+            args: None,
+            dns: None,
+            allow_out: None,
+            deny_out: None,
+            start_cmd: None,
+            ready_cmd: None,
+        };
+
+        // Reserve a build context. Registry credential is attached for the
+        // benefit of the OCI-push flow (`/v2/...` reverse proxy); SDK V3 won't
+        // actually use it — it ships a tarball through `/templates/.../files/`.
+        let credential = self.issue_registry_credential(&template_id);
+        let pull_host = self
+            .config
+            .registry_pull_host
+            .clone()
+            .or_else(|| self.config.registry_upstream.as_deref().and_then(host_from_url))
+            .unwrap_or_else(|| {
+                self.config
+                    .registry_public_host
+                    .clone()
+                    .unwrap_or_else(|| "localhost".to_string())
+            });
+        let repo_prefix = if self.config.registry_repo_prefix.trim().is_empty() {
+            "e2b"
+        } else {
+            self.config.registry_repo_prefix.trim()
+        };
+        let image_ref_template = format!("{}/{}/{}", pull_host, repo_prefix, template_id);
+
+        let ctx = self.builds.create(
+            template_id.clone(),
+            create_req,
+            credential,
+            image_ref_template.clone(),
+        );
+
+        let build_id = ctx.build_id.clone();
+        let cpu_count = body.cpu_count.unwrap_or(2);
+        let memory_mb = body.memory_mb.unwrap_or(1024);
+        self.builds.update(&build_id, |c| {
+            c.image_ref = format!("{}:{}", image_ref_template, build_id);
+            c.name = name_part.clone();
+            c.tags = tags.clone();
+            c.cpu_count = cpu_count;
+            c.memory_mb = memory_mb;
+            c.aliases = vec![name_part.clone()];
+            c.message = "template registered, awaiting build trigger".to_string();
+        });
+        self.builds.append_log(
+            &build_id,
+            format!(
+                "[register-v3] templateID={} buildID={} name={} cpu={} memMB={}",
+                template_id, build_id, name_part, cpu_count, memory_mb
+            ),
+        );
+
+        Ok(V3TemplateBuildResponse {
+            template_id,
+            build_id,
+            names: vec![name_part.clone()],
+            aliases: vec![name_part],
+            tags,
+            public: false,
         })
     }
 
-    pub async fn get_template_build_logs(&self, build_id: &str) -> AppResult<serde_json::Value> {
+    /// `GET /templates/{tid}/files/{hash}` — file-cache probe.
+    ///
+    /// Until the in-cluster builder lands we don't actually consume uploaded
+    /// tarballs. We answer `present=true` so the SDK skips uploading; this is
+    /// safe because `from_image`-based builds (the only flow CubeMaster
+    /// currently supports) don't need the build context.
+    pub fn v3_get_file_upload(&self, _template_id: &str, _files_hash: &str) -> AppResult<crate::models::V3TemplateFileUpload> {
+        Ok(crate::models::V3TemplateFileUpload {
+            present: true,
+            url: None,
+        })
+    }
+
+    /// `POST /v2/templates/{tid}/builds/{bid}` — the real "start build" call.
+    ///
+    /// At this point CubeMaster needs an OCI image reference. We resolve one
+    /// in this priority order:
+    ///
+    ///   1. `body.from_image`  — the standard E2B flow, e.g.
+    ///                            `python:3.11-slim`.
+    ///   2. The image already pushed to the bundled registry under
+    ///      `<repo>/<templateID>:<buildID>` (when the OCI Distribution path
+    ///      was used).
+    ///   3. `body.from_template` — copy from another known CubeSandbox
+    ///      template (resolved via CubeMaster `get_template`).
+    ///
+    /// `start_cmd` becomes container `args`; `ready_cmd` becomes a Probe.Exec.
+    pub async fn v3_trigger_build(
+        &self,
+        template_id: String,
+        build_id: String,
+        body: V2TemplateBuildStart,
+    ) -> AppResult<()> {
+        let ctx = self
+            .builds
+            .get_by_pair(&template_id, &build_id)
+            .ok_or_else(|| {
+                AppError::NotFound(format!(
+                    "build {} of template {} is unknown — call POST /v3/templates first",
+                    build_id, template_id
+                ))
+            })?;
+
+        // Resolve the source image.
+        let source_image = if let Some(img) = body
+            .from_image
+            .as_ref()
+            .map(|s| s.trim().to_string())
+            .filter(|s| !s.is_empty())
+        {
+            img
+        } else if let Some(parent) = body
+            .from_template
+            .as_ref()
+            .map(|s| s.trim().to_string())
+            .filter(|s| !s.is_empty())
+        {
+            // Re-use an already-built CubeSandbox template as the base. We
+            // synthesise a CubeMaster reference of the form `cube://<tid>`,
+            // letting downstream callers resolve it. Adjust to your local
+            // convention if needed.
+            format!("cube://{}", parent)
+        } else if !ctx.image_ref.is_empty() {
+            ctx.image_ref.clone()
+        } else {
+            return Err(AppError::BadRequest(
+                "either fromImage, fromTemplate, or a previously-pushed image is required"
+                    .to_string(),
+            ));
+        };
+
+        // Patch the cached create_request with the V2-time fields and dispatch.
+        let mut create_req: CreateTemplateRequest = (*ctx.create_request).clone();
+        if create_req.start_cmd.is_none() {
+            create_req.start_cmd = body.start_cmd.clone();
+        }
+        if create_req.ready_cmd.is_none() {
+            create_req.ready_cmd = body.ready_cmd.clone();
+        }
+
+        self.builds.append_log(
+            &build_id,
+            format!(
+                "[dispatch-v3] from_image={} start_cmd={:?} ready_cmd={:?} steps={}",
+                source_image,
+                body.start_cmd.as_deref().unwrap_or(""),
+                body.ready_cmd.as_deref().unwrap_or(""),
+                body.steps.as_ref().map(|s| s.len()).unwrap_or(0),
+            ),
+        );
+
+        // Cubelet/CubeMaster only support TcpSocket | Ping | HttpGet probes,
+        // so the E2B `readyCmd` (a shell snippet) cannot be forwarded
+        // verbatim. To still honour the SDK's `wait_for_url(...)` semantics
+        // we attempt a best-effort parse of the well-known
+        // `http://<host>:<port>/<path>` form embedded in the readyCmd. When
+        // that succeeds (and the caller did not already pin probe_port/path
+        // via the v3 body), we synthesise an HttpGet probe so cubelet's
+        // `doProbe` blocks until the user process is actually listening
+        // before sandbox creation returns.
+        let ready_cmd = body
+            .ready_cmd
+            .as_deref()
+            .map(str::trim)
+            .filter(|s| !s.is_empty());
+        if let Some(cmd) = ready_cmd {
+            match parse_ready_url(cmd) {
+                Some((port, path)) if create_req.probe_port.is_none() => {
+                    create_req.probe_port = Some(port);
+                    if create_req.probe_path.is_none() {
+                        create_req.probe_path = Some(path.clone());
+                    }
+                    self.builds.append_log(
+                        &build_id,
+                        format!(
+                            "[dispatch-v3] readyCmd parsed → HttpGet probe on \
+                             port={} path={} (probe blocks sandbox creation \
+                             until ready)",
+                            port, path
+                        ),
+                    );
+                }
+                Some(_) => {
+                    // probe_port already set by caller — keep their override
+                    // but make the precedence explicit in the build log.
+                    self.builds.append_log(
+                        &build_id,
+                        "[dispatch-v3] readyCmd parsed but probePort was \
+                         supplied explicitly — keeping caller's probePort \
+                         and ignoring the URL inside readyCmd",
+                    );
+                }
+                None
+                    if create_req.probe_port.is_none()
+                        && create_req
+                            .exposed_ports
+                            .as_ref()
+                            .map(|p| p.is_empty())
+                            .unwrap_or(true) =>
+                {
+                    self.builds.append_log(
+                        &build_id,
+                        "[dispatch-v3] note: readyCmd is recorded but could \
+                         not be parsed into an HttpGet probe (only \
+                         `http://host:port/path` URLs are recognised); \
+                         supply `probePort` (or build with `exposedPorts`) \
+                         to enable readiness checks",
+                    );
+                }
+                None => {
+                    // Caller already supplied probe_port or exposed_ports;
+                    // build_probe() will pick those up on its own.
+                }
+            }
+        }
+
+        let req = self.build_cubemaster_request(&create_req, source_image.clone());
         let resp = self
             .cubemaster
-            .get_template_build_status(build_id)
+            .create_template_from_image(&req)
             .await
             .map_err(map_err)?;
 
-        let line = build_log_line(&resp.status, resp.progress, &resp.message);
+        let job = resp.job.unwrap_or_else(default_template_job);
+        let job_id = job.job_id.clone();
+        self.builds.update(&build_id, |c| {
+            c.job_id = job_id.clone();
+            c.stage = BuildStage::Building;
+            c.message = "build dispatched to cubemaster".to_string();
+        });
 
-        Ok(serde_json::json!({
-            "buildID": build_id,
-            "status": resp.status,
-            "progress": resp.progress,
-            "lines": [line],
-        }))
+        Ok(())
+    }
+
+    /// `GET /templates/{tid}/builds/{bid}/status` — V3 status envelope.
+    pub async fn v3_get_build_status(
+        &self,
+        template_id: &str,
+        build_id: &str,
+        logs_offset: i32,
+        limit: i32,
+    ) -> AppResult<V3TemplateBuildInfo> {
+        // Reuse the existing get_template_build_status (which already knows
+        // how to refresh against CubeMaster), then convert into the V3 shape.
+        let internal = self
+            .get_template_build_status(template_id, build_id, logs_offset)
+            .await?;
+
+        let limit = if limit <= 0 { 100 } else { limit as usize };
+        let logs: Vec<String> = internal
+            .logs
+            .iter()
+            .take(limit)
+            .cloned()
+            .collect();
+        let log_entries: Vec<V3BuildLogEntry> = logs
+            .iter()
+            .map(|line| V3BuildLogEntry {
+                timestamp: chrono::Utc::now(),
+                message: line.clone(),
+                level: "info".to_string(),
+            })
+            .collect();
+
+        let status = match internal.status.as_str() {
+            "ready" => "ready",
+            "error" => "error",
+            "waiting" | "pending" => "waiting",
+            _ => "building",
+        }
+        .to_string();
+
+        let reason = if status == "error" {
+            Some(V3BuildStatusReason {
+                step_index: None,
+                message: if internal.message.is_empty() {
+                    "build failed".to_string()
+                } else {
+                    internal.message.clone()
+                },
+            })
+        } else {
+            None
+        };
+
+        Ok(V3TemplateBuildInfo {
+            build_id: internal.build_id,
+            template_id: internal.template_id,
+            status,
+            logs,
+            log_entries,
+            reason,
+        })
+    }
+
+    fn issue_registry_credential(&self, template_id: &str) -> RegistryCredential {
+        let upstream = self.config.registry_upstream.as_deref().unwrap_or("");
+        let url = if upstream.starts_with("http://") || upstream.starts_with("https://") {
+            base_url(upstream)
+        } else if let Some(host) = self.config.registry_public_host.clone() {
+            format!("https://{}", host)
+        } else {
+            "http://localhost".to_string()
+        };
+        let repo_prefix = if self.config.registry_repo_prefix.trim().is_empty() {
+            "e2b"
+        } else {
+            self.config.registry_repo_prefix.trim()
+        };
+        RegistryCredential {
+            url,
+            repository: format!("{}/{}", repo_prefix, template_id),
+            username: "_token".to_string(),
+            password: self
+                .config
+                .registry_token
+                .clone()
+                .unwrap_or_else(|| "_anon".to_string()),
+        }
+    }
+}
+
+// ─── helpers ───────────────────────────────────────────────────────────────
+
+/// Build the CubeMaster `Probe` from the user's intent.
+///
+/// **Important — limitations imposed by the downstream stack**:
+///
+///   - Cubelet (`Cubelet/services/cubebox/check.go::checkProbe`) only accepts
+///     `TcpSocket | Ping | HttpGet` handlers. Anything else is rejected with
+///     `invalid probe.probe_handler  param`.
+///   - CubeMaster's `handleProbeHandler` (in `pkg/service/sandbox/util.go`)
+///     similarly has no Exec branch — passing one yields an empty handler
+///     object, which Cubelet then rejects.
+///
+/// As a result the E2B-style `readyCmd` (a shell snippet) **cannot** be
+/// translated into a CubeMaster probe. We only synthesise a probe when the
+/// caller (or template store) gives us an explicit port. `readyCmd` is
+/// recorded into the build log for diagnostic purposes (see
+/// `v3_trigger_build`) but never forwarded to CubeMaster as a probe.
+fn build_probe(body: &CreateTemplateRequest) -> Option<Probe> {
+    let port = body
+        .probe_port
+        .or_else(|| body.exposed_ports.as_ref().and_then(|p| p.first().copied()))?;
+
+    Some(Probe {
+        probe_handler: ProbeHandler {
+            http_get: Some(HttpGetAction {
+                path: body
+                    .probe_path
+                    .clone()
+                    .unwrap_or_else(|| "/health".to_string()),
+                port,
+                host: None,
+                scheme: None,
+            }),
+            exec: None,
+        },
+        timeout_ms: Some(30_000),
+        period_ms: Some(500),
+        success_threshold: Some(1),
+        failure_threshold: Some(60),
+    })
+}
+
+/// Best-effort parser for the SDK's `wait_for_url(...)` ready command.
+///
+/// The E2B SDK ultimately sends the ready check as a free-form shell snippet
+/// in `readyCmd`, e.g.
+///
+///   * `wait_for_url("http://localhost:49999/health")`
+///   * `curl -fsS http://127.0.0.1:8080/ready`
+///   * `until curl -fsS http://0.0.0.0:3000; do sleep 1; done`
+///
+/// Any of these collapses to "HTTP GET on `<port><path>` of the sandbox" once
+/// you discard the surrounding shell. We extract `(port, path)` from the
+/// first `http(s)://<host>:<port>[/path]` substring whose host is one of the
+/// localhost aliases so we never accidentally point the probe at an
+/// off-VM service.
+///
+/// Returns `None` when no recognisable URL is present — callers fall back to
+/// `probe_port` / `exposedPorts` or skip the probe entirely.
+fn parse_ready_url(ready_cmd: &str) -> Option<(u16, String)> {
+    // Iterate over each `http(s)://` occurrence; the first parseable one
+    // wins. We bound the scanning at 64 to keep this cheap.
+    let mut search = ready_cmd;
+    for _ in 0..64 {
+        let scheme_idx = search.find("http")?;
+        let after_http = &search[scheme_idx..];
+        let rest = after_http
+            .strip_prefix("https://")
+            .or_else(|| after_http.strip_prefix("http://"));
+        let rest = match rest {
+            Some(r) => r,
+            None => {
+                // Found "http" but not as a scheme — advance one char and
+                // try again.
+                let next = scheme_idx + 1;
+                if next >= search.len() {
+                    return None;
+                }
+                search = &search[next..];
+                continue;
+            }
+        };
+
+        // `rest` now points at `<host>[:<port>][/path...][?query]...` followed
+        // by whatever shell tokens come next (space, `"`, `'`, `)`, `;`, ...).
+        let end = rest
+            .find(|c: char| {
+                matches!(
+                    c,
+                    ' ' | '\t' | '\n' | '"' | '\'' | ')' | ';' | '|' | '&' | '`' | '<' | '>'
+                )
+            })
+            .unwrap_or(rest.len());
+        let url_body = &rest[..end];
+
+        // Split host[:port] / path[?query]
+        let (authority, path_with_query) = match url_body.find('/') {
+            Some(i) => (&url_body[..i], &url_body[i..]),
+            None => (url_body, ""),
+        };
+
+        // Drop ?query — probes don't carry it.
+        let path = match path_with_query.find('?') {
+            Some(i) => &path_with_query[..i],
+            None => path_with_query,
+        };
+
+        // Authority must contain an explicit port and resolve to a localhost
+        // alias — otherwise we refuse to invent a probe target.
+        let (host, port_str) = authority.rsplit_once(':')?;
+        if !is_localhost_alias(host) {
+            return None;
+        }
+        let port: u16 = port_str.parse().ok()?;
+        if port == 0 {
+            return None;
+        }
+
+        let path = if path.is_empty() {
+            "/".to_string()
+        } else {
+            path.to_string()
+        };
+        return Some((port, path));
+    }
+    None
+}
+
+/// `wait_for_url` only makes sense when pointed at the sandbox itself, so we
+/// limit the host whitelist to the well-known loopback aliases. Anything else
+/// is almost certainly a misconfiguration we'd rather surface than silently
+/// translate into a probe.
+fn is_localhost_alias(host: &str) -> bool {
+    matches!(
+        host,
+        "localhost" | "127.0.0.1" | "0.0.0.0" | "::1" | "[::1]"
+    )
+}
+
+fn build_resources(body: &CreateTemplateRequest) -> Option<CreateTemplateResources> {
+    // E2B `cpuCount` (cores) → `cpu * 1000` millicores; legacy `cpu` already
+    // in millicores wins when both are set.
+    let cpu_millicores = body.cpu.or_else(|| body.cpu_count.map(|n| n * 1000));
+    let mem_mb = body.memory.or(body.memory_mb);
+
+    if cpu_millicores.is_none() && mem_mb.is_none() {
+        return None;
+    }
+
+    Some(CreateTemplateResources {
+        cpu: cpu_millicores.map(|v| format!("{}m", v)),
+        mem: mem_mb.map(|v| format!("{}Mi", v)),
+    })
+}
+
+fn merge_envs(body: &CreateTemplateRequest) -> Option<Vec<CreateTemplateEnv>> {
+    let mut out: HashMap<String, String> = HashMap::new();
+
+    if let Some(envs) = &body.env {
+        for s in envs {
+            let mut parts = s.splitn(2, '=');
+            if let Some(k) = parts.next() {
+                let k = k.trim().to_string();
+                if k.is_empty() {
+                    continue;
+                }
+                let v = parts.next().unwrap_or("").to_string();
+                out.insert(k, v);
+            }
+        }
+    }
+    if let Some(map) = &body.env_vars {
+        for (k, v) in map {
+            out.insert(k.clone(), v.clone());
+        }
+    }
+
+    if out.is_empty() {
+        None
+    } else {
+        Some(
+            out.into_iter()
+                .map(|(key, value)| CreateTemplateEnv { key, value })
+                .collect(),
+        )
     }
 }
 
@@ -261,23 +1158,21 @@ fn string_or(value: String, fallback: &str) -> String {
     }
 }
 
-fn build_log_line(status: &str, progress: i32, message: &str) -> String {
-    if message.is_empty() {
-        format!("[{}] progress={}%", status, progress)
-    } else {
-        format!("[{}] {}", status, message)
-    }
-}
-
-fn to_job(resp: TemplateJobResponse) -> TemplateBuildJob {
+fn to_job(resp: TemplateJobResponse, build_id_override: Option<String>) -> TemplateBuildJob {
     let job = resp.job.unwrap_or_else(default_template_job);
+    let build_id = build_id_override
+        .filter(|s| !s.is_empty())
+        .unwrap_or_else(|| job.job_id.clone());
     TemplateBuildJob {
         job_id: job.job_id,
         template_id: job.template_id,
+        build_id,
         status: job.status,
         phase: job.phase,
         progress: job.progress,
         error_message: job.error_message,
+        upload_url: None,
+        registry: None,
     }
 }
 
@@ -336,122 +1231,124 @@ fn validate_dns_servers(servers: Option<&[String]>) -> AppResult<Option<Vec<Stri
     }
 }
 
-fn build_template_probe(body: &CreateTemplateRequest) -> Option<Probe> {
-    body.probe_port
-        .or_else(|| body.exposed_ports.as_ref().and_then(|p| p.first().copied()))
-        .map(|port| Probe {
-            probe_handler: ProbeHandler {
-                http_get: Some(HttpGetAction {
-                    path: body
-                        .probe_path
-                        .clone()
-                        .unwrap_or_else(|| "/health".to_string()),
-                    port,
-                    host: None,
-                    scheme: None,
-                }),
-                exec: None,
-            },
-            timeout_ms: Some(30000),
-            period_ms: Some(500),
-            success_threshold: Some(1),
-            failure_threshold: Some(60),
-        })
+/// Translate CubeMaster-internal phase strings into E2B-style status tokens.
+fn remap_cubemaster_status(raw: &str) -> String {
+    match raw.trim().to_lowercase().as_str() {
+        "" => "pending".to_string(),
+        "ready" | "succeeded" | "success" | "completed" | "complete" => "ready".to_string(),
+        "failed" | "error" | "errored" => "error".to_string(),
+        // CubeMaster intermediate phases — bucket all of them into "building"
+        // to match what the E2B CLI expects.
+        "pending" | "queued" | "running" | "pulling" | "extracting" | "rootfs"
+        | "snapshotting" | "distributing" | "uploading" | "ready_pending" => "building".to_string(),
+        other => other.to_string(),
+    }
 }
 
-fn build_template_resources(body: &CreateTemplateRequest) -> Option<CreateTemplateResources> {
-    if body.cpu.is_none() && body.memory.is_none() {
-        return None;
+fn host_from_url(url: &str) -> Option<String> {
+    // Best-effort URL parse without pulling in a new crate.
+    let after_scheme = url
+        .splitn(2, "://")
+        .nth(1)
+        .or_else(|| Some(url))
+        .unwrap_or(url);
+    let host = after_scheme.split('/').next().unwrap_or("");
+    if host.is_empty() {
+        None
+    } else {
+        Some(host.to_string())
     }
-    Some(CreateTemplateResources {
-        cpu: body.cpu.map(|v| format!("{v}m")),
-        mem: body.memory.map(|v| format!("{v}Mi")),
-    })
 }
 
-fn build_template_envs(body: &CreateTemplateRequest) -> Option<Vec<CreateTemplateEnv>> {
-    body.env
-        .as_ref()
-        .map(|envs| {
-            envs.iter()
-                .filter_map(|s| {
-                    let mut parts = s.splitn(2, '=');
-                    let key = parts.next()?.trim().to_string();
-                    let value = parts.next().unwrap_or("").to_string();
-                    if key.is_empty() {
-                        None
-                    } else {
-                        Some(CreateTemplateEnv { key, value })
-                    }
-                })
-                .collect::<Vec<_>>()
-        })
-        .filter(|envs| !envs.is_empty())
+/// Hash `name` into a stable templateID so repeated `Template.build()` calls
+/// against the same name reuse the same ID. We use the first 12 hex chars of
+/// a v5 UUID derived from the DNS namespace + name.
+fn stable_template_id(name: &str) -> String {
+    let ns = uuid::Uuid::NAMESPACE_DNS;
+    let id = uuid::Uuid::new_v5(&ns, name.as_bytes());
+    let simple = id.simple().to_string();
+    format!("tpl-{}", &simple[..16])
 }
 
-fn build_template_container_overrides(
-    body: &CreateTemplateRequest,
-    dns_servers: Option<&[String]>,
-) -> Option<CreateTemplateContainerOverrides> {
-    let command = non_empty_vec(body.command.clone());
-    let args = non_empty_vec(body.args.clone());
-    let probe = build_template_probe(body);
-    let resources = build_template_resources(body);
-    let envs = build_template_envs(body);
-    let dns_config = dns_servers.map(|servers| DnsConfig {
-        servers: servers.to_vec(),
-        searches: Vec::new(),
-    });
-
-    if command.is_none()
-        && args.is_none()
-        && probe.is_none()
-        && resources.is_none()
-        && envs.is_none()
-        && dns_config.is_none()
-    {
-        return None;
+fn base_url(url: &str) -> String {
+    if let Some(rest) = url.strip_prefix("http://") {
+        let host = rest.split('/').next().unwrap_or("");
+        format!("http://{}", host)
+    } else if let Some(rest) = url.strip_prefix("https://") {
+        let host = rest.split('/').next().unwrap_or("");
+        format!("https://{}", host)
+    } else {
+        url.to_string()
     }
-
-    Some(CreateTemplateContainerOverrides {
-        command,
-        args,
-        probe,
-        resources,
-        envs,
-        dns_config,
-    })
 }
 
-fn build_template_cubevs_context(body: &CreateTemplateRequest) -> Option<CreateTemplateCubeVSContext> {
-    let allow_out = body.allow_out.clone().unwrap_or_default();
-    let deny_out = body.deny_out.clone().unwrap_or_default();
-    if body.allow_internet_access.is_none() && allow_out.is_empty() && deny_out.is_empty() {
-        return None;
+// Adapter helper used inside dashmap update closures.
+impl crate::services::builds::BuildContext {
+    pub(crate) fn append_log_inline(&mut self, line: impl Into<String>) {
+        self.logs.push(crate::services::builds::BuildLogLine {
+            timestamp: chrono::Utc::now(),
+            line: line.into(),
+        });
     }
-    Some(CreateTemplateCubeVSContext {
-        allow_internet_access: body.allow_internet_access,
-        allow_out,
-        deny_out,
-    })
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::models::CreateTemplateRequest;
 
+    fn empty_request() -> CreateTemplateRequest {
+        CreateTemplateRequest {
+            template_id: String::new(),
+            instance_type: None,
+            alias: None,
+            team_id: None,
+            image: None,
+            dockerfile: None,
+            writable_layer_size: None,
+            exposed_ports: None,
+            probe_port: None,
+            probe_path: None,
+            cpu: None,
+            memory: None,
+            cpu_count: None,
+            memory_mb: None,
+            env: None,
+            env_vars: None,
+            allow_internet_access: None,
+            network_type: None,
+            nodes: None,
+            registry_username: None,
+            registry_password: None,
+            command: None,
+            args: None,
+            dns: None,
+            allow_out: None,
+            deny_out: None,
+            start_cmd: None,
+            ready_cmd: None,
+        }
+    }
+
+    #[allow(dead_code)]
     fn sample_request() -> CreateTemplateRequest {
         CreateTemplateRequest {
             template_id: String::new(),
             instance_type: Some("cubebox".to_string()),
-            image: "python:3.11-slim".to_string(),
+            alias: None,
+            team_id: None,
+            image: Some("python:3.11-slim".to_string()),
+            dockerfile: None,
             writable_layer_size: Some("1G".to_string()),
             exposed_ports: Some(vec![8080]),
             probe_port: Some(8080),
             probe_path: Some("/health".to_string()),
             cpu: Some(2000),
             memory: Some(2048),
+            cpu_count: None,
+            memory_mb: None,
             env: Some(vec!["A=1".to_string()]),
+            env_vars: None,
             allow_internet_access: Some(true),
             network_type: Some("tap".to_string()),
             nodes: Some(vec!["node-1".to_string()]),
@@ -462,41 +1359,249 @@ mod tests {
             dns: Some(vec!["8.8.8.8".to_string(), "1.1.1.1".to_string()]),
             allow_out: Some(vec!["172.67.0.0/16".to_string()]),
             deny_out: Some(vec!["10.0.0.0/8".to_string()]),
+            start_cmd: None,
+            ready_cmd: None,
         }
     }
 
     #[test]
-    fn build_template_container_overrides_maps_cli_fields() {
-        let body = sample_request();
-        let overrides = build_template_container_overrides(&body, Some(&["8.8.8.8".to_string()]))
-            .expect("overrides");
+    fn validate_dns_servers_rejects_invalid_ip() {
+        let err = validate_dns_servers(Some(&["not-an-ip".to_string()])).unwrap_err();
+        assert!(matches!(err, AppError::BadRequest(_)));
+    }
+
+    #[test]
+    fn build_resources_maps_e2b_cpu_count_to_millicores() {
+        let mut req = empty_request();
+        req.cpu_count = Some(2);
+        req.memory_mb = Some(4096);
+        let r = build_resources(&req).expect("resources should be present");
+        assert_eq!(r.cpu.as_deref(), Some("2000m"));
+        assert_eq!(r.mem.as_deref(), Some("4096Mi"));
+    }
+
+    #[test]
+    fn build_resources_prefers_legacy_fields_when_both_supplied() {
+        let mut req = empty_request();
+        req.cpu = Some(500); // millicores
+        req.cpu_count = Some(8);
+        req.memory = Some(512);
+        req.memory_mb = Some(8192);
+        let r = build_resources(&req).expect("resources should be present");
+        assert_eq!(r.cpu.as_deref(), Some("500m"));
+        assert_eq!(r.mem.as_deref(), Some("512Mi"));
+    }
+
+    #[test]
+    fn merge_envs_overrides_kv_strings_with_envvars_map() {
+        let mut req = empty_request();
+        req.env = Some(vec!["FOO=bar".to_string(), "EMPTY=".to_string()]);
+        req.env_vars = Some({
+            let mut m = HashMap::new();
+            m.insert("FOO".to_string(), "baz".to_string()); // wins
+            m.insert("EXTRA".to_string(), "yes".to_string());
+            m
+        });
+        let mut envs = merge_envs(&req).expect("envs should be present");
+        envs.sort_by(|a, b| a.key.cmp(&b.key));
+        assert_eq!(envs.len(), 3);
+        let foo = envs.iter().find(|e| e.key == "FOO").unwrap();
+        assert_eq!(foo.value, "baz");
+    }
 
+    #[test]
+    fn build_probe_picks_http_get_when_port_provided() {
+        let mut req = empty_request();
+        req.probe_port = Some(8080);
+        req.probe_path = Some("/healthz".to_string());
+        let probe = build_probe(&req).expect("probe should be present");
+        assert!(probe.probe_handler.http_get.is_some());
+        let http = probe.probe_handler.http_get.unwrap();
+        assert_eq!(http.port, 8080);
+        assert_eq!(http.path, "/healthz");
+    }
+
+    /// Regression: previously we synthesised an Exec probe from
+    /// `readyCmd`, but neither CubeMaster nor Cubelet support Exec probes
+    /// (`invalid probe.probe_handler  param`). The fix is to **not** emit a
+    /// probe at all when the caller hasn't provided a port — Cubelet treats
+    /// nil probes as "no readiness check", which is the right thing to do.
+    #[test]
+    fn build_probe_returns_none_when_only_ready_cmd_is_provided() {
+        let mut req = empty_request();
+        req.ready_cmd = Some("curl -fsS localhost:1234/ok".to_string());
+        // No probe_port, no exposed_ports → no probe.
+        assert!(build_probe(&req).is_none());
+    }
+
+    /// Regression: when the caller provides exposedPorts but no explicit
+    /// probe_port, we still want an HttpGet probe on the first exposed port
+    /// (matches our previous behaviour and keeps templates that listed ports
+    /// working out of the box).
+    #[test]
+    fn build_probe_picks_http_get_from_first_exposed_port() {
+        let mut req = empty_request();
+        req.exposed_ports = Some(vec![3000, 8080]);
+        let probe = build_probe(&req).expect("probe should be present");
+        let http = probe.probe_handler.http_get.expect("http probe");
+        assert_eq!(http.port, 3000);
+        assert!(probe.probe_handler.exec.is_none());
+    }
+
+    #[test]
+    fn parse_ready_url_extracts_port_and_path_from_localhost_url() {
         assert_eq!(
-            overrides.command,
-            Some(vec!["/bin/sh".to_string(), "-c".to_string()])
+            parse_ready_url("wait_for_url(\"http://localhost:49999/health\")"),
+            Some((49999, "/health".to_string()))
         );
-        assert_eq!(overrides.args, Some(vec!["sleep infinity".to_string()]));
+    }
+
+    #[test]
+    fn parse_ready_url_handles_curl_with_127_0_0_1_and_query_string() {
         assert_eq!(
-            overrides.dns_config.as_ref().map(|d| d.servers.clone()),
-            Some(vec!["8.8.8.8".to_string()])
+            parse_ready_url("curl -fsS http://127.0.0.1:8080/ready?retries=3 || exit 1"),
+            Some((8080, "/ready".to_string()))
         );
-        assert!(overrides.probe.is_some());
-        assert!(overrides.resources.is_some());
-        assert_eq!(overrides.envs.as_ref().map(|envs| envs.len()), Some(1));
     }
 
     #[test]
-    fn build_template_cubevs_context_includes_egress_rules() {
-        let body = sample_request();
-        let ctx = build_template_cubevs_context(&body).expect("cubevs");
-        assert_eq!(ctx.allow_internet_access, Some(true));
-        assert_eq!(ctx.allow_out, vec!["172.67.0.0/16".to_string()]);
-        assert_eq!(ctx.deny_out, vec!["10.0.0.0/8".to_string()]);
+    fn parse_ready_url_defaults_path_to_root_when_omitted() {
+        assert_eq!(
+            parse_ready_url("until nc -z 0.0.0.0:3000; do sleep 0.2; done; \
+                             curl http://0.0.0.0:3000"),
+            Some((3000, "/".to_string()))
+        );
     }
 
     #[test]
-    fn validate_dns_servers_rejects_invalid_ip() {
-        let err = validate_dns_servers(Some(&["not-an-ip".to_string()])).unwrap_err();
-        assert!(matches!(err, AppError::BadRequest(_)));
+    fn parse_ready_url_rejects_non_loopback_hosts() {
+        // We must not silently rewrite a probe to point at an external
+        // service — that would generate noisy traffic and probably never
+        // succeed against the sandbox itself.
+        assert_eq!(
+            parse_ready_url("curl http://api.example.com:443/healthz"),
+            None
+        );
+    }
+
+    #[test]
+    fn parse_ready_url_returns_none_when_no_url_is_present() {
+        assert_eq!(parse_ready_url("/usr/local/bin/wait-for-it.sh --quiet"), None);
+        assert_eq!(parse_ready_url(""), None);
+        assert_eq!(parse_ready_url("curl localhost:1234"), None); // missing http://
+    }
+
+    #[test]
+    fn parse_ready_url_requires_explicit_port() {
+        // Probes must target a specific port — defaulting to 80/443 here
+        // would mask real misconfigurations.
+        assert_eq!(parse_ready_url("curl http://localhost/health"), None);
+    }
+
+    #[test]
+    fn parse_ready_url_rejects_zero_port() {
+        assert_eq!(parse_ready_url("curl http://127.0.0.1:0/"), None);
+    }
+
+    #[test]
+    fn host_from_url_extracts_host_with_port() {
+        assert_eq!(host_from_url("http://10.0.0.1:5000"), Some("10.0.0.1:5000".to_string()));
+        assert_eq!(
+            host_from_url("https://registry.example.com/path"),
+            Some("registry.example.com".to_string())
+        );
+    }
+
+    #[test]
+    fn base_url_strips_path_keeps_scheme() {
+        assert_eq!(base_url("http://10.0.0.1:5000/v2/"), "http://10.0.0.1:5000");
+        assert_eq!(
+            base_url("https://reg.example.com/foo/bar"),
+            "https://reg.example.com"
+        );
+    }
+
+    #[test]
+    fn remap_cubemaster_status_normalizes_phases_to_e2b_tokens() {
+        assert_eq!(remap_cubemaster_status(""), "pending");
+        assert_eq!(remap_cubemaster_status("Ready"), "ready");
+        assert_eq!(remap_cubemaster_status("succeeded"), "ready");
+        assert_eq!(remap_cubemaster_status("Failed"), "error");
+        assert_eq!(remap_cubemaster_status("PULLING"), "building");
+        assert_eq!(remap_cubemaster_status("distributing"), "building");
+        assert_eq!(remap_cubemaster_status("custom_phase"), "custom_phase");
+    }
+
+    fn make_service(registry_upstream: Option<String>) -> TemplateService {
+        let mut cfg = ServerConfig::default();
+        cfg.registry_upstream = registry_upstream;
+        cfg.registry_public_host = Some("cube.example.com".to_string());
+        cfg.registry_repo_prefix = "e2b".to_string();
+        let http = reqwest::Client::new();
+        let cm = CubeMasterClient::new("http://127.0.0.1:9", http);
+        TemplateService::new(cm, "cubebox".to_string(), BuildRegistry::new(), cfg)
+    }
+
+    #[tokio::test]
+    async fn create_template_e2b_mode_rejects_when_registry_disabled() {
+        let svc = make_service(None);
+        let mut req = empty_request();
+        req.dockerfile = Some("FROM ubuntu".to_string());
+        let err = svc.create_template(req).await.expect_err("should fail");
+        assert!(matches!(err, AppError::NotImplemented(_)));
+    }
+
+    #[tokio::test]
+    async fn create_template_e2b_mode_returns_push_credential_and_registers_build() {
+        let svc = make_service(Some("http://127.0.0.1:5000".to_string()));
+        let mut req = empty_request();
+        req.dockerfile = Some("FROM ubuntu\nCMD echo hi".to_string());
+        let job = svc
+            .create_template(req)
+            .await
+            .expect("e2b create should succeed");
+
+        // Build identity is well-formed and emitted in both legacy & E2B fields.
+        assert!(!job.template_id.is_empty());
+        assert!(job.template_id.starts_with("tpl-"));
+        assert!(job.build_id.starts_with("bld-"));
+        assert_eq!(job.status, "accepted");
+        assert_eq!(job.phase, "waiting");
+
+        // Push credential points at the configured public host.
+        let cred = job.registry.expect("registry credential");
+        assert_eq!(cred.url, "http://127.0.0.1:5000");
+        assert!(cred.repository.starts_with("e2b/tpl-"));
+        assert_eq!(cred.username, "_token");
+
+        // Internal BuildRegistry now knows about this build and stores the
+        // image_ref CubeMaster will later pull from.
+        let ctx = svc
+            .builds
+            .get(&job.build_id)
+            .expect("build context should be registered");
+        assert!(ctx.image_ref.starts_with("127.0.0.1:5000/e2b/"));
+        assert!(ctx.image_ref.ends_with(&format!(":{}", job.build_id)));
+    }
+
+    /// Regression: CubeMaster validates `writable_layer_size` as required and
+    /// the E2B V3 SDK never sends it. Verify the service injects the
+    /// configured default so the request reaches CubeMaster non-empty.
+    #[test]
+    fn build_cubemaster_request_fills_default_writable_layer_size() {
+        let svc = make_service(None);
+        let req = empty_request();
+        let cm_req = svc.build_cubemaster_request(&req, "image:tag".to_string());
+        assert_eq!(cm_req.writable_layer_size.as_deref(), Some("1G"));
+    }
+
+    #[test]
+    fn build_cubemaster_request_preserves_caller_writable_layer_size() {
+        let svc = make_service(None);
+        let mut req = empty_request();
+        req.writable_layer_size = Some("4G".to_string());
+        let cm_req = svc.build_cubemaster_request(&req, "image:tag".to_string());
+        assert_eq!(cm_req.writable_layer_size.as_deref(), Some("4G"));
     }
 }
+
diff --git a/deploy/one-click/scripts/one-click/up.sh b/deploy/one-click/scripts/one-click/up.sh
index ef6b8d38c..0d9a6e583 100755
--- a/deploy/one-click/scripts/one-click/up.sh
+++ b/deploy/one-click/scripts/one-click/up.sh
@@ -85,7 +85,7 @@ start_with_pidfile \
 
 start_with_pidfile \
   "cube-api" \
-  "export LOG_DIR=\"${CUBE_API_LOG_DIR}\" CUBE_API_BIND=\"${CUBE_API_BIND:-0.0.0.0:3000}\" CUBE_API_SANDBOX_DOMAIN=\"${CUBE_API_SANDBOX_DOMAIN:-cube.app}\"; ${CUBE_API_OPTIONAL_EXPORTS}\"${CUBE_API_BIN}\""
+  "export LOG_DIR=\"${CUBE_API_LOG_DIR}\" CUBE_API_BIND=\"${CUBE_API_BIND:-0.0.0.0:3000}\" CUBE_API_SANDBOX_DOMAIN=\"${CUBE_API_SANDBOX_DOMAIN:-cube.app}\"; ${CUBE_API_OPTIONAL_EXPORTS}\"${CUBE_API_BIN}\" --registry-upstream http://127.0.0.1:5000 --registry-public-host cube.app --registry-pull-host  127.0.0.1:5000 --registry-repo-prefix e2b"
 
 start_with_pidfile \
   "cubelet" \
diff --git a/docs/.vitepress/config.mjs b/docs/.vitepress/config.mjs
index 0c4554694..e5283bc17 100644
--- a/docs/.vitepress/config.mjs
+++ b/docs/.vitepress/config.mjs
@@ -127,6 +127,7 @@ export default withMermaid(defineConfig({
             {
               text: 'Tutorials',
               items: [
+                { text: 'Create Templates with the e2b SDK', link: '/guide/tutorials/template-from-e2b-sdk' },
                 { text: 'Create Templates from OCI Image', link: '/guide/tutorials/template-from-image' },
                 { text: 'Examples', link: '/guide/tutorials/examples' },
                 { text: 'Custom Image', link: '/guide/tutorials/bring-your-own-image' }
@@ -229,6 +230,7 @@ export default withMermaid(defineConfig({
             {
               text: '场景教程',
               items: [
+                { text: '使用 e2b SDK 创建模板', link: '/zh/guide/tutorials/template-from-e2b-sdk' },
                 { text: '从 OCI 镜像制作模板', link: '/zh/guide/tutorials/template-from-image' },
                 { text: '示例项目', link: '/zh/guide/tutorials/examples' },
                 { text: '自定义镜像', link: '/zh/guide/tutorials/bring-your-own-image' }
diff --git a/docs/guide/tutorials/template-from-e2b-sdk.md b/docs/guide/tutorials/template-from-e2b-sdk.md
new file mode 100644
index 000000000..f70983f8b
--- /dev/null
+++ b/docs/guide/tutorials/template-from-e2b-sdk.md
@@ -0,0 +1,395 @@
+---
+title: Create Templates with the e2b SDK
+lang: en-US
+description: End-to-end practical guide for building CubeSandbox templates with the e2b Python / JS SDK — V3 protocol contract, OCI Registry reverse proxy, wait_for_url probe bridging, deployment configuration, and troubleshooting.
+---
+
+# Create Templates with the e2b SDK
+
+CubeSandbox is wire-compatible with the [e2b](https://e2b.dev/) **V3 template and sandbox protocol**. Starting from a ready-made e2b-style image, this page walks through how to use the official e2b Python / JS SDK to **register → build → run** a template on a CubeSandbox cluster, plus the technical reference and best practices that go with it.
+
+> Available in **CubeSandbox v0.2.3+**.
+>
+> - For the `cubemastercli` workflow, see [Create Templates from OCI Image](./template-from-image.md);
+> - For adding envd to an existing image first, see [Bring Your Own Image](./bring-your-own-image.md).
+
+---
+
+## 1. Overall architecture
+
+How the e2b SDK client, CubeAPI, CubeMaster, and the bundled OCI Registry cooperate:
+
+```mermaid
+flowchart LR
+    subgraph Client[e2b client]
+      SDK[Python / JS SDK<br/>Template.build]
+      CLI[e2b CLI<br/>docker push]
+    end
+
+    subgraph Edge[CubeAPI edge]
+      V3[V3 template routes<br/>/v3/templates<br/>/templates/.../files/<br/>/v2/.../builds/<br/>.../status]
+      REG[OCI proxy<br/>/v2/*]
+      Reg[(OCI Registry<br/>distribution/distribution<br/>:5000)]
+    end
+
+    subgraph CP[Control plane]
+      Master[CubeMaster]
+      Cubelet[cubelet]
+    end
+
+    SDK -->|HTTPS| V3
+    CLI -->|docker push| REG
+    REG --> Reg
+    V3 --> Master
+    Master -->|RunSandbox + AppSnapshot| Cubelet
+    Cubelet -->|doProbe HttpGet| Cubelet
+```
+
+Key points:
+
+1. **CubeAPI** acts as the e2b V3 protocol edge, translating V3 calls into CubeMaster's internal `CreateTemplateFromImage` / build-job semantics.
+2. **OCI Registry** is an independent sidecar (default `distribution/distribution` on `127.0.0.1:5000`); CubeAPI exposes `/v2/*` as a verbatim reverse proxy for `docker push`.
+3. Once **CubeMaster + cubelet** see a `<registry>/<repo_prefix>/<templateID>:<buildID>` reference, the rest of the pipeline (OCI image → ext4 rootfs → temporary sandbox → probe → snapshot → register) is the same as any other build path.
+
+---
+
+## 2. Quick start
+
+> Prerequisite: you already have an image **with envd (49983)** built per [Bring Your Own Image](./bring-your-own-image.md) and pushed to a registry the cluster can reach (the `from_image` reference below).
+
+### 2.1 Install the SDK and configure the environment
+
+```bash
+pip install e2b python-dotenv
+```
+
+Drop CubeAPI's endpoint and your API key into a project-root `.env`:
+
+```dotenv
+E2B_API_KEY=your-cube-api-key            # any value if CubeAPI auth is disabled
+E2B_DOMAIN=cube.example.com              # CubeAPI ingress (no scheme)
+```
+
+### 2.2 Define the template
+
+```python
+# build_template.py
+
+from dotenv import load_dotenv
+from e2b import Template, default_build_logger, wait_for_url
+
+load_dotenv()
+
+if __name__ == '__main__':
+    template = (
+        Template()
+        .from_image("cube-sandbox-cn.tencentcloudcr.com/cube-sandbox/sandbox-code:latest")    # ← 也可以改成自己的镜像
+        .set_start_cmd(
+            "sudo /root/.jupyter/start-up.sh",
+            wait_for_url("http://localhost:49999/health")   # <- 将被作用于probe探针
+        )
+    )
+    Template.build(
+        template,
+        'template-tag-code',
+        cpu_count=1,
+        memory_mb=1024,
+        on_build_logs=default_build_logger(),
+    )
+```
+
+### 2.3 Build + use
+
+```bash
+python build_template.py
+# Once "[7/7] READY" prints, you can create sandboxes
+```
+
+```python
+# use_sandbox.py
+from e2b import Sandbox
+
+sbx = Sandbox(template="template-tag-code", timeout=120)
+print(sbx.run_code("print('hello from cube sandbox')").text)
+sbx.kill()
+```
+
+In the happy path the **first `run_code` works immediately — no `time.sleep` needed**. As long as `wait_for_url` blocked the build until the user process was actually ready, the snapshot already captures that ready state.
+
+---
+
+## 3. Technical reference
+
+### 3.1 V3 protocol endpoint contract
+
+CubeAPI exposes the four V3 endpoints the e2b SDK speaks:
+
+| # | Method + path | Handler | Purpose |
+|---|---|---|---|
+| ① | `POST /v3/templates` | `templates_v3::v3_create_template` | Register a template + allocate the first build attempt; returns `{templateID, buildID, names, aliases, tags, public}` |
+| ② | `GET /templates/{tid}/files/{hash}` | `templates_v3::v3_get_files_hash` | Cache probe before SDK uploads a build context tarball; CubeAPI always answers `present=true` so the SDK skips upload (the V3 flow currently consumes only `from_image`) |
+| ③ | `POST /v2/templates/{tid}/builds/{bid}` | `templates_v3::v2_trigger_build` | Actually triggers the build: resolves `from_image` / `from_template` / a previously-pushed image and dispatches a `CreateTemplateFromImageReq` to CubeMaster |
+| ④ | `GET /templates/{tid}/builds/{bid}/status` | `templates_v3::v3_get_build_status` | Polls build status; returns the strict `{buildID, templateID, status, logs[], logEntries[], reason?}` envelope the SDK expects |
+
+End-to-end SDK call timeline:
+
+```mermaid
+sequenceDiagram
+    participant SDK as e2b SDK
+    participant CLI as e2b CLI / docker
+    participant API as CubeAPI
+    participant Reg as OCI Registry
+    participant Master as CubeMaster
+    participant Cubelet as cubelet
+
+    SDK->>API: POST /v3/templates {name, cpuCount, memoryMB}
+    API-->>SDK: 202 {templateID, buildID, ...}
+
+    Note over SDK,Reg: Push only happens for Dockerfile builds;<br/>pure from_image flow skips ②③ and goes straight to ④
+    SDK->>API: GET /templates/{tid}/files/{hash}
+    API-->>SDK: 201 {present:true}
+    CLI->>API: PUT /v2/<repo>/manifests/<bid>
+    API->>Reg: forward
+    Reg-->>API: 201 Created
+    API->>API: mark_image_pushed(bid)
+    API-->>CLI: 201 Created
+
+    SDK->>API: POST /v2/templates/{tid}/builds/{bid}<br/>{fromImage, startCmd, readyCmd, ...}
+    API->>API: parse_ready_url → probe_port/path
+    API->>Master: CreateTemplateFromImage + Probe.HttpGet
+    API-->>SDK: 202 Accepted
+
+    loop poll every N seconds
+      SDK->>API: GET /.../builds/{bid}/status?logsOffset=K
+      API->>Master: get_template_build_status
+      API-->>SDK: 200 {status, logs[], reason?}
+    end
+
+    Master->>Cubelet: AppSnapshot(req with Probe)
+    Cubelet->>Cubelet: doProbe blocks until user process is ready
+    Cubelet-->>Master: snapshot captures ready state
+    Master-->>API: build READY
+    API-->>SDK: status="ready"
+```
+
+### 3.2 OCI Registry reverse proxy
+
+CubeAPI exposes `/v2/*` as a verbatim reverse proxy that forwards e2b CLI / docker push traffic to an upstream OCI Registry. Notable design points:
+
+| Behaviour | Notes |
+|---|---|
+| **Bypasses `unified_auth`** | docker push uses the registry's own Basic / Bearer credentials, which are in a separate trust domain from CubeAPI's `Authorization: Bearer <api-key>`; therefore `/v2/*` does not run through `unified_auth`. |
+| **240 s timeout** | A single layer-blob PUT can take minutes, so `/v2/*` lives on its own 240 s `TimeoutLayer`, separate from the default 30 s router (see `routes.rs::SNAPSHOT_LONG_ROUTE_TIMEOUT`). |
+| **Hop-by-hop header stripping** | Per RFC 7230 §6.1, `connection` / `keep-alive` / `transfer-encoding` etc. are stripped on both directions to keep HTTP/1.1 implementations on either end happy. |
+| **`mark_image_pushed` hook** | When `PUT /v2/<repo>/manifests/<tag>` succeeds, CubeAPI uses `<tag>` as the `buildID` and moves the matching BuildContext to the `Building` stage so the subsequent trigger-build call can dispatch immediately. |
+| **Graceful degradation** | If `registry_upstream` is unset, every `/v2/*` request returns 503 `registry_disabled`; pure `from_image` flows still work in this deployment shape. |
+
+The default deployment **enables** this stack out of the box (`deploy/one-click/scripts/one-click/up.sh`):
+
+If there is no image repository, you can quickly start an image repository with `docker run -d -p 5000:5000 --restart always --name registry registry:3`.
+
+```bash
+cube-api \
+  --registry-upstream     http://127.0.0.1:5000 \
+  --registry-public-host  cube.app \
+  --registry-pull-host    127.0.0.1:5000 \
+  --registry-repo-prefix  e2b
+```
+
+See [Section 4 — Deployment Configuration](#_4-deployment-configuration) for details.
+
+### 3.3 `wait_for_url` and the readiness probe
+
+`wait_for_url(...)` is the key to the "create-and-immediately-use" property of templates. Semantically: **during template build**, wait for the URL to return 2xx **before** snapshotting — every sandbox restored from such a template comes back with the user process already serving traffic, so `sbx.run_code(...)` works immediately.
+
+#### How the bridging works
+
+The e2b SDK serialises `wait_for_url(...)` into a shell-form `readyCmd` (ultimately `curl ...`). CubeAPI does **not** run the shell — instead, in `services/templates.rs::v3_trigger_build` it does a lightweight parse:
+
+1. Find an `http(s)://<host>:<port>[/<path>]` URL inside `readyCmd`;
+2. Require `host` to be a loopback alias (`localhost` / `127.0.0.1` / `0.0.0.0` / `::1` / `[::1]`) — never invent a probe target pointing at the public internet;
+3. Require an explicit, non-zero port;
+4. On success, populate `probe_port` / `probe_path`, which `build_probe()` turns into a `Probe.HttpGet` and forwards to CubeMaster;
+5. Cubelet **blocks** on this probe (`doProbe`) after container creation, only committing the snapshot once it returns 2xx.
+
+The whole bridging is transparent — no extra SDK-side configuration needed.
+
+#### Parsing rules at a glance
+
+| `readyCmd` input | Parsed result | Notes |
+|---|---|---|
+| `wait_for_url("http://localhost:49999/health")` | `(49999, "/health")` | Canonical form |
+| <code>curl -fsS http://127.0.0.1:8080/ready?retries=3 \|\| exit 1</code> | `(8080, "/ready")` | Query string is stripped |
+| `until nc -z 0.0.0.0:3000; do sleep 0.2; done; curl http://0.0.0.0:3000` | `(3000, "/")` | Path defaults to `/` when omitted |
+| `curl http://api.example.com:443/healthz` | ❌ `None` | Non-loopback hosts rejected |
+| `curl http://localhost/health` | ❌ `None` | Port must be explicit |
+| `curl http://127.0.0.1:0/` | ❌ `None` | Port must be > 0 |
+| `/usr/local/bin/wait-for-it.sh --quiet` | ❌ `None` | No recognisable URL |
+
+#### Three-tier source priority
+
+`probe_port` is resolved in this order:
+
+1. **Caller override** — `probePort` / `probePath` in the V3 request body;
+2. **`readyCmd` parsing** — auto-extracted from `wait_for_url(...)` / `curl ...`;
+3. **`exposedPorts[0]` + `/health`** — last-resort fallback (preserves legacy behaviour).
+
+If any tier fires, `Probe.HttpGet` is generated. If all three are empty, **no probe is emitted** — sandbox creation returns the moment `Create` completes (today's behaviour); still works, but users may need a `time.sleep`.
+
+#### Probe parameters (cubelet defaults)
+
+| Field | Default | Meaning |
+|---|---|---|
+| `timeout_ms` | 30 000 | Total budget for the probe loop (30 s) |
+| `period_ms` | 500 | Probe every 500 ms |
+| `success_threshold` | 1 | First 2xx wins |
+| `failure_threshold` | 60 | Up to 60 failures (~30 s) before giving up |
+
+> If your user process needs more than 30 s to come up (rare), use `cubemastercli`'s explicit override path, or follow up with a CubeAPI extension that surfaces `probeTimeoutMs`.
+
+### 3.4 Build state machine
+
+CubeAPI keeps an in-memory `BuildRegistry` tracking every `(templateID, buildID)` lifecycle (`services/builds.rs`):
+
+```
+WaitingPush ──manifest PUT succeeds──► Building ──CubeMaster job terminal──► Ready / Error
+```
+
+| Stage | Meaning |
+|---|---|
+| `WaitingPush` | Template registered, registry credentials issued, waiting for client docker push |
+| `Building` | manifest PUT succeeded / trigger-build received; CubeMaster pipeline running |
+| `Ready` | Template build successful, sandboxes can use it |
+| `Error` | Build failed; `reason.message` contains the CubeMaster error |
+
+Each `BuildContext` also keeps: the original `CreateTemplateRequest` (replayed at trigger time), registry credentials, CubeMaster `jobID`, an append-only log buffer (capped at 10 000 lines, head-trimmed on overflow), and the V3-specific fields (`name` / `tags` / `cpuCount` / `memoryMB` / `aliases`).
+
+CubeAPI restart loses the in-memory state — a deliberate trade-off: builds normally reach a terminal state in minutes, and a build truncated mid-flight is naturally retried by the SDK. When stronger consistency is needed, swap the `BuildRegistry` backend to durable storage (the trait abstraction is in place).
+
+### 3.5 ID and timeout rules
+
+#### `templateID`
+
+Derived from `name` via UUIDv5 (DNS namespace), with the `tpl-` prefix:
+
+```rust
+fn stable_template_id(name: &str) -> String {
+    let id = Uuid::new_v5(&Uuid::NAMESPACE_DNS, name.as_bytes());
+    format!("tpl-{}", &id.simple().to_string()[..16])
+}
+```
+
+- Same `name` always maps to the **same** `templateID`, matching e2b's "alias is also a primary key" semantics;
+- Re-building the same template name reuses the `templateID`, avoiding stale templates in the control plane.
+
+#### `buildID`
+
+Allocated fresh on every `POST /v3/templates`: `bld-<uuid_v4_simple>`. Stateless, unguessable.
+
+#### Timeout tiers
+
+| Routes | Timeout | Reason |
+|---|---|---|
+| Default (e.g. `/v3/templates`, `.../builds/{bid}/status`) | 30 s | Regular synchronous calls |
+| Long routes (`POST /sandboxes/:id/snapshots`, `POST /sandboxes/:id/rollback`, `DELETE /templates/:id`) | 240 s | Synchronous calls into cubelet's LVM/snapshot cleanup |
+| OCI Registry proxy (`/v2/*`) | 240 s | Large layer-blob PUTs can take minutes |
+
+This is implemented in `routes.rs` by wrapping each sub-router in its own `TimeoutLayer` and `Router::merge`-ing them together. The `merge_preserves_per_router_timeout_layers` unit test specifically guards this invariant.
+
+---
+
+## 4. Deployment configuration
+
+### 4.1 One-click defaults
+
+`deploy/one-click/scripts/one-click/up.sh` already starts CubeAPI with:
+
+```bash
+--registry-upstream     http://127.0.0.1:5000   # local distribution sidecar
+--registry-public-host  cube.app                # docker push target advertised to clients
+--registry-pull-host    127.0.0.1:5000          # CubeMaster node-side pull address
+--registry-repo-prefix  e2b                     # image namespace
+```
+
+So out-of-the-box `e2b template build` + docker push **just work** in a standard deployment. For other deployment shapes, pass the corresponding flags below.
+
+### 4.2 Full parameter reference
+
+| CLI flag | Env var | Default | Meaning |
+|---|---|---|---|
+| `--registry-upstream URL` | `CUBE_API_REGISTRY_UPSTREAM` | *unset* | Upstream OCI Registry URL; when unset `/v2/*` returns 503 and dockerfile flows are rejected |
+| `--registry-public-host HOST` | `CUBE_API_REGISTRY_PUBLIC_HOST` | request Host header | Hostname advertised to clients for docker push |
+| `--registry-pull-host HOST` | `CUBE_API_REGISTRY_PULL_HOST` | upstream's host:port | Internal address CubeMaster nodes use to pull images |
+| `--registry-repo-prefix PREFIX` | `CUBE_API_REGISTRY_REPO_PREFIX` | `e2b` | Repo namespace for pushed images |
+| `--registry-token TOKEN` | `CUBE_API_REGISTRY_TOKEN` | `_anon` | The `registry.password` field returned by `POST /templates` |
+| `--default-writable-layer-size SIZE` | `CUBE_API_DEFAULT_WRITABLE_LAYER_SIZE` | `1G` | Default `writable_layer_size` when the client doesn't provide one (CubeMaster validates this field as required) |
+| `--sandbox-domain DOMAIN` | `CUBE_API_SANDBOX_DOMAIN` | `cube.app` | The `domain` field on sandbox API responses |
+| `--auth-callback-url URL` | `AUTH_CALLBACK_URL` | *unset* | Callback URL for unified auth (see [Authentication](../authentication.md)) |
+
+### 4.3 Hooking up a private / restricted OCI Registry
+
+The most common case is pushing to your team's private registry. Three steps:
+
+1. **Deploy a registry that speaks OCI Distribution v1** (CNCF `distribution/distribution`, Harbor, AWS ECR, GCR all qualify);
+2. **CubeAPI side**: set `--registry-upstream` to point at it; `--registry-public-host` is whatever hostname users docker push to (typically your ingress);
+3. **CubeMaster side**: make sure `--registry-pull-host` resolves on the cluster network — if the registry is on another machine, **don't** use `127.0.0.1`.
+
+If the registry has htpasswd / token-server auth, the docker client's `Authorization` header is forwarded verbatim by CubeAPI — no special handling needed at the API layer.
+
+---
+
+## 5. Best practices
+
+### 5.1 Image preparation
+
+**Hard constraint**: any image used as a CubeSandbox template must have envd listening on `:49983` at startup. Two fastest paths:
+
+| Path | Best for | How |
+|---|---|---|
+| **`FROM ghcr.io/tencentcloud/cubesandbox-base:2026.16`** | Greenfield business images | Base image ships with envd + `cube-entrypoint.sh`, which backgrounds envd for you |
+| **`COPY --from=cubesandbox-base ...`** | Existing business images (e.g. `e2bdev/code-interpreter`) | Inject envd binary + entrypoint into your image, switch ENTRYPOINT to `cube-entrypoint.sh` |
+
+Detailed Dockerfile templates, the `cube-entrypoint.sh` contract, and local smoke tests are in [Bring Your Own Image](./bring-your-own-image.md).
+
+> ⚠️ **Don't use `e2bdev/code-interpreter:latest` directly**: it ships e2b's upstream init but not the envd CubeSandbox needs, so the build-time probe will hit `connection refused` and time out.
+
+### 5.2 SDK usage
+
+- **Always use the two-arg `set_start_cmd(cmd, wait_for_url(...))`** form so the build blocks on actual readiness;
+- The `wait_for_url` URL must be of the form `http(s)://<loopback>:<port>[/<path>]` — host must be `localhost` / `127.0.0.1` / `0.0.0.0`;
+- The `from_image(...)` reference must be **pullable from CubeMaster nodes**;
+- `cpu_count` / `memory_mb` set the template default; override per `Sandbox(...)` call as needed;
+- A build log line like `[dispatch-v3] readyCmd parsed → HttpGet probe on port=... path=...` confirms the bridging fired.
+
+### 5.3 Sandbox usage
+
+- **No `time.sleep` needed**: as long as the build's `wait_for_url` actually waited, the first `run_code` is immediately usable;
+- Reusing a single sandbox across `run_code` calls is an order of magnitude cheaper than creating new sandboxes;
+- Always `sbx.kill()` explicitly instead of relying on timeout reclamation.
+
+---
+
+## 6. Troubleshooting
+
+| Symptom | Root cause | Fix |
+|---|---|---|
+| `BuildException: 404: b''` | CubeAPI lacks the V3 routes — likely v0.2.2 or earlier | Upgrade to v0.2.3+ |
+| Build stuck in `PULLING_IMAGE` | CubeMaster nodes can't pull the image | Use a cluster-reachable registry; for private registries check `--registry-pull-host` |
+| Build log says `readyCmd is recorded but not enforced` | URL parsing failed | Check that `wait_for_url` carries `http://localhost:<port>[/<path>]`, host is a loopback alias, port is explicit |
+| Build log says `readyCmd parsed`, but build still times out | Probe runs but the user process really isn't ready | Verify locally: `docker run` and `curl 127.0.0.1:<port>/<path>`. Confirm `cube-entrypoint.sh` `exec`'s the user command rather than fork-and-exit |
+| `Sandbox(template=...)` then `run_code` returns 502 | User process still warming up (probe ineffective) | Upgrade to v0.2.3+; confirm build log contains `readyCmd parsed → HttpGet probe`; check inter-node port reachability — see [Networking (CubeVS)](../../architecture/network.md) |
+| `run_code` returns `404 not found` | envd is not running inside the sandbox | envd was not injected, or ENTRYPOINT was overridden — see [Bring Your Own Image](./bring-your-own-image.md#_3-alternative-injecting-envd-into-an-existing-image) |
+| docker push returns `503 registry_disabled` | CubeAPI `--registry-upstream` is not set | Enable the OCI proxy per [Deployment Configuration](#_4-deployment-configuration) |
+| docker push returns `request timeout` | layer blob upload exceeded the 240 s long timeout | Check upstream registry storage IO; or shrink layers (`--squash` / multi-stage builds) |
+
+For more template-related issues see [Templates Troubleshooting](../troubleshooting/templates.md).
+
+---
+
+## 7. Further reading
+
+- [Bring Your Own Image](./bring-your-own-image.md) — Dockerfile templates, `cube-entrypoint.sh` contract, local smoke tests
+- [Create Templates from OCI Image](./template-from-image.md) — explicit `--probe` / `--probe-path` configuration via `cubemastercli`
+- [Networking (CubeVS)](../../architecture/network.md) — how cross-node port forwarding works
+- [Templates Troubleshooting](../troubleshooting/templates.md) — common build-time issues
+- [Authentication](../authentication.md) — `unified_auth` middleware and API key configuration
diff --git a/docs/guide/tutorials/template-from-image.md b/docs/guide/tutorials/template-from-image.md
index 2f4591745..6b476c14b 100644
--- a/docs/guide/tutorials/template-from-image.md
+++ b/docs/guide/tutorials/template-from-image.md
@@ -244,3 +244,9 @@ template deleted: tpl-748094d2f2374b0a8a37e6ec
 | `status: FAILED` after BUILDING | Build error (disk full, Dockerfile issue, etc.) | Re-run `tpl status --job-id <id> --json` and inspect `last_error` |
 | `distribution: 0/N ready` after READY | Artifact distribution still in progress (normal briefly) | Wait and re-run `tpl info`; if stuck check Cubelet logs on target nodes |
 | Sandbox fails readiness probe | Service not listening on the expected port/path at startup | Verify your container starts the HTTP server before signalling ready; adjust `--probe-path` if needed |
+
+---
+
+## Further reading
+
+The `--probe` / `--probe-path` flags above target the `cubemastercli` workflow. If you build templates through the [e2b](https://e2b.dev/) Python / JS SDK (`Template().set_start_cmd(..., wait_for_url(...))`), **you don't have to specify probe parameters by hand** — CubeAPI parses `(port, path)` straight out of `wait_for_url(...)` and synthesises an equivalent HttpGet probe. See [Create Templates with the e2b SDK](./template-from-e2b-sdk.md).
diff --git a/docs/zh/guide/tutorials/template-from-e2b-sdk.md b/docs/zh/guide/tutorials/template-from-e2b-sdk.md
new file mode 100644
index 000000000..96a087c10
--- /dev/null
+++ b/docs/zh/guide/tutorials/template-from-e2b-sdk.md
@@ -0,0 +1,396 @@
+---
+title: 通过 e2b SDK 创建模板
+lang: zh-CN
+description: 在 CubeSandbox 上使用 e2b Python / JS SDK 制作模板的端到端实践指南，含 V3 协议契约、OCI Registry 反代、wait_for_url 就绪探针桥接、运维配置与故障排查。
+---
+
+# 通过 e2b SDK 创建模板
+
+CubeSandbox 在协议层完整兼容了 [e2b](https://e2b.dev/) **V3 模板与沙箱协议**。本文从一份"现成的 e2b 风格镜像"出发，讲清楚如何使用 e2b 官方 Python / JS SDK 在 CubeSandbox 集群上 **创建模板 → 构建 → 创建沙箱执行代码** 的完整路径，并给出技术参考和最佳实践。
+
+> 适用版本：CubeSandbox **v0.2.3+**。
+>
+> - 如果你想用 `cubemastercli` 命令行制作模板，请参考[从 OCI 镜像制作模板](./template-from-image.md)；
+> - 如果你只是想给现有镜像加上 envd，请先读[自带镜像接入 (envd)](./bring-your-own-image.md)。
+
+---
+
+## 一、整体架构
+
+e2b SDK 客户端、CubeAPI、CubeMaster、bundled OCI Registry 之间的协作关系：
+
+```mermaid
+flowchart LR
+    subgraph Client[e2b 客户端]
+      SDK[Python / JS SDK<br/>Template.build]
+      CLI[e2b CLI<br/>docker push]
+    end
+
+    subgraph Edge[CubeAPI 边缘服务]
+      V3[V3 模板路由<br/>/v3/templates<br/>/templates/.../files/<br/>/v2/.../builds/<br/>.../status]
+      REG[OCI 反代<br/>/v2/*]
+      Reg[(OCI Registry<br/>distribution/distribution<br/>:5000)]
+    end
+
+    subgraph CP[控制面]
+      Master[CubeMaster]
+      Cubelet[cubelet]
+    end
+
+    SDK -->|HTTPS| V3
+    CLI -->|docker push| REG
+    REG --> Reg
+    V3 --> Master
+    Master -->|RunSandbox + AppSnapshot| Cubelet
+    Cubelet -->|doProbe HttpGet| Cubelet
+```
+
+要点：
+
+1. **CubeAPI** 充当 e2b V3 协议的"协议边缘"，把 V3 调用翻译成 CubeMaster 内部的 `CreateTemplateFromImage` / 构建作业语义。
+2. **OCI Registry** 是一个独立的 sidecar（默认 `distribution/distribution`，监听 `127.0.0.1:5000`），CubeAPI 用 `/v2/*` 路由原样反向代理 docker push 流量。
+3. **CubeMaster + cubelet** 收到 `<registry>/<repo_prefix>/<templateID>:<buildID>` 形式的镜像引用后，再走 OCI 镜像 → ext4 rootfs → 创建临时 sandbox → 探活 → 快照 → 注册的常规流水线。
+
+---
+
+## 二、快速开始
+
+> 前置：你已经按 [自带镜像接入](./bring-your-own-image.md) 准备好了一个**自带 envd（49983）**的镜像，并推送到了一个集群可达的 OCI Registry（即下面这个 `from_image` 中的镜像）。
+
+### 2.1 安装 SDK 并配置环境
+
+```bash
+pip install e2b python-dotenv
+```
+
+把 CubeAPI 入口和 API Key 写进项目根的 `.env` 文件：
+
+```dotenv
+E2B_API_KEY=e2b_0000000000000000000000000000000000000000 # 如果 CubeAPI 没启用鉴权，这里填任意值
+E2B_API_URL=http://localhost:3000
+SSL_CERT_FILE="/root/.local/share/mkcert/rootCA.pem"
+```
+
+### 2.2 写模板定义
+
+```python
+# build_template.py
+
+from dotenv import load_dotenv
+from e2b import Template, default_build_logger, wait_for_url
+
+load_dotenv()
+
+if __name__ == '__main__':
+    template = (
+        Template()
+        .from_image("cube-sandbox-cn.tencentcloudcr.com/cube-sandbox/sandbox-code:latest")    # ← 也可以改成自己的镜像
+        .set_start_cmd(
+            "sudo /root/.jupyter/start-up.sh",
+            wait_for_url("http://localhost:49999/health")   # <- 将被作用于probe探针
+        )
+    )
+    Template.build(
+        template,
+        'template-tag-code',
+        cpu_count=1,
+        memory_mb=1024,
+        on_build_logs=default_build_logger(),
+    )
+```
+
+### 2.3 构建 + 使用
+
+```bash
+python build_template.py
+# 看到 "[7/7] READY" 后即可创建沙箱
+```
+
+```python
+# use_sandbox.py
+from e2b import Sandbox
+
+sbx = Sandbox(template="template-tag-code", timeout=120)
+print(sbx.run_code("print('hello from cube sandbox')").text)
+sbx.kill()
+```
+
+正常情况下：**第一次 `run_code` 立即可用，不需要 `time.sleep`**——只要构建期 `wait_for_url` 真的等到业务 ready，沙箱恢复完成那一刻业务进程就已在监听。
+
+---
+
+## 三、技术参考
+
+### 3.1 V3 协议端点契约
+
+CubeAPI 暴露下列 4 个 V3 协议端点（与 e2b 上游 SDK 一一对应）：
+
+| 顺序 | 方法 + 路径 | Handler | 作用 |
+|---|---|---|---|
+| ① | `POST /v3/templates` | `templates_v3::v3_create_template` | 注册模板 + 分配第一次 build attempt，返回 `{templateID, buildID, names, aliases, tags, public}` |
+| ② | `GET /templates/{tid}/files/{hash}` | `templates_v3::v3_get_files_hash` | SDK 上传 build context 前的缓存探测；CubeAPI 当前固定返回 `present=true` 让 SDK 跳过上传（V3 流目前只走 `from_image`） |
+| ③ | `POST /v2/templates/{tid}/builds/{bid}` | `templates_v3::v2_trigger_build` | 真正触发构建：解析 `from_image` / `from_template` / 已推送镜像，组装 `CreateTemplateFromImageReq` 派发到 CubeMaster |
+| ④ | `GET /templates/{tid}/builds/{bid}/status` | `templates_v3::v3_get_build_status` | 轮询构建状态，返回 e2b 严格匹配的 `{buildID, templateID, status, logs[], logEntries[], reason?}` 信封 |
+
+整条 SDK 调用链时序：
+
+```mermaid
+sequenceDiagram
+    participant SDK as e2b SDK
+    participant CLI as e2b CLI / docker
+    participant API as CubeAPI
+    participant Reg as OCI Registry
+    participant Master as CubeMaster
+    participant Cubelet as cubelet
+
+    SDK->>API: POST /v3/templates {name, cpuCount, memoryMB}
+    API-->>SDK: 202 {templateID, buildID, ...}
+
+    Note over SDK,Reg: 仅当走 Dockerfile build 时才有 push 流；<br/>纯 from_image 流跳过 ②③ 步直接走 ④
+    SDK->>API: GET /templates/{tid}/files/{hash}
+    API-->>SDK: 201 {present:true}
+    CLI->>API: PUT /v2/<repo>/manifests/<bid>
+    API->>Reg: 反代
+    Reg-->>API: 201 Created
+    API->>API: mark_image_pushed(bid)
+    API-->>CLI: 201 Created
+
+    SDK->>API: POST /v2/templates/{tid}/builds/{bid}<br/>{fromImage, startCmd, readyCmd, ...}
+    API->>API: parse_ready_url → probe_port/path
+    API->>Master: CreateTemplateFromImage + Probe.HttpGet
+    API-->>SDK: 202 Accepted
+
+    loop 每 N 秒轮询
+      SDK->>API: GET /.../builds/{bid}/status?logsOffset=K
+      API->>Master: get_template_build_status
+      API-->>SDK: 200 {status, logs[], reason?}
+    end
+
+    Master->>Cubelet: AppSnapshot(req with Probe)
+    Cubelet->>Cubelet: doProbe 阻塞探针 → 业务 ready
+    Cubelet-->>Master: snapshot 已包含 ready 状态
+    Master-->>API: build READY
+    API-->>SDK: status="ready"
+```
+
+### 3.2 OCI Registry 反代
+
+CubeAPI 通过一组 `/v2/*` 路由把 e2b CLI / docker push 的流量原样反代到上游 OCI Registry。关键设计：
+
+| 行为 | 说明 |
+|---|---|
+| **绕过 unified_auth** | docker push 用的是 registry 自己签发的 Basic / Bearer，与 CubeAPI 的 `Authorization: Bearer <api-key>` 不在同一个域，因此 `/v2/*` 路径不走 `unified_auth` 中间件。 |
+| **240 s 超时** | 单个 layer blob PUT 可能耗时数分钟，因此 `/v2/*` 路径独享一组 240 s 的 `TimeoutLayer`，与默认的 30 s 路由分开（详见 `routes.rs::SNAPSHOT_LONG_ROUTE_TIMEOUT`）。 |
+| **Hop-by-hop 头剥离** | 转发前后都按 RFC 7230 §6.1 剥掉 `connection` / `keep-alive` / `transfer-encoding` 等连接级头，保证两端 HTTP/1.1 实现兼容。 |
+| **`mark_image_pushed` 钩子** | 当 `PUT /v2/<repo>/manifests/<tag>` 成功时，CubeAPI 用 `<tag>` 作为 `buildID` 标记对应的 BuildContext 进入 `Building` 阶段，让随后的 trigger build 调用可以无缝衔接。 |
+| **未配置时降级** | 若 `registry_upstream` 未配置，`/v2/*` 一律返回 503 `registry_disabled`；这种部署形态下纯 `from_image` 流仍可工作。 |
+
+部署时**默认开启**这条链路（`deploy/one-click/scripts/one-click/up.sh` 中已配置）：
+
+如果没有镜像仓库，可以通过`docker run -d -p 5000:5000 --restart always --name registry registry:3`快速启动一个镜像仓库
+
+```bash
+cube-api \
+  --registry-upstream     http://127.0.0.1:5000 \
+  --registry-public-host  cube.app \
+  --registry-pull-host    127.0.0.1:5000 \
+  --registry-repo-prefix  e2b
+```
+
+详见下文[四、运维配置](#四运维配置)。
+
+### 3.3 `wait_for_url` 与就绪探针桥接
+
+`wait_for_url(...)` 是模板"创建即可用"语义的关键。它的语义是：**模板构建期间** 等到指定 URL 返回 2xx **再** 对沙箱做快照——这样所有从该模板恢复的沙箱都已经"业务在监听"，SDK `sbx.run_code(...)` 立即可用。
+
+#### 桥接逻辑
+
+e2b SDK 把 `wait_for_url(...)` 序列化为一段 shell 形式的 `readyCmd`（最终是 `curl ...`）。CubeAPI 不直接执行这段 shell，而是在 `services/templates.rs::v3_trigger_build` 中做一次轻量解析：
+
+1. 在 `readyCmd` 中找 `http(s)://<host>:<port>[/<path>]` 形式的 URL；
+2. 校验 `host` 必须是 loopback 别名（`localhost` / `127.0.0.1` / `0.0.0.0` / `::1` / `[::1]`）—— 防止意外把探针指向外部服务；
+3. 校验端口必须显式给出且 ≠ 0；
+4. 解析成功 → 自动填入 `probe_port` / `probe_path`，由 `build_probe()` 生成原生 `Probe.HttpGet` 透传给 CubeMaster；
+5. cubelet 在容器创建后 **阻塞性** 轮询该探针（`doProbe`），直到 2xx 才 commit 快照。
+
+整条链路对用户完全透明，SDK 端**不需要**额外配置。
+
+#### 解析规则一览
+
+| `readyCmd` 输入 | 解析结果 | 备注 |
+|---|---|---|
+| `wait_for_url("http://localhost:49999/health")` | `(49999, "/health")` | 标准用法 |
+| <code>curl -fsS http://127.0.0.1:8080/ready?retries=3 \|\| exit 1</code> | `(8080, "/ready")` | query string 自动剥掉 |
+| `until nc -z 0.0.0.0:3000; do sleep 0.2; done; curl http://0.0.0.0:3000` | `(3000, "/")` | 路径缺省时填 `/` |
+| `curl http://api.example.com:443/healthz` | ❌ `None` | 非 loopback 主机会被拒绝 |
+| `curl http://localhost/health` | ❌ `None` | 必须显式给出端口 |
+| `curl http://127.0.0.1:0/` | ❌ `None` | 端口必须 > 0 |
+| `/usr/local/bin/wait-for-it.sh --quiet` | ❌ `None` | 没有可识别的 URL |
+
+#### 三级优先级
+
+`probe_port` 的来源按以下优先级解析：
+
+1. **caller 显式设置** — V3 请求体中的 `probePort` / `probePath`；
+2. **`readyCmd` 解析** — 自动从 `wait_for_url(...)` / `curl ...` 中抽取；
+3. **`exposedPorts[0]` + `/health`** — 兜底（与 e2b 历史行为兼容）。
+
+任意一级生效即生成 `Probe.HttpGet`；三级全部失效则**不生成探针**，退化为"`Create` 一返回就视为 ready"行为，仍可工作但 SDK 端可能需要 `time.sleep`。
+
+#### 探针参数（cubelet 默认行为）
+
+| 字段 | 默认值 | 含义 |
+|---|---|---|
+| `timeout_ms` | 30 000 | 整个探针流程的总预算（30 秒） |
+| `period_ms` | 500 | 每 500 ms 探一次 |
+| `success_threshold` | 1 | 第一次 2xx 即视为 ready |
+| `failure_threshold` | 60 | 最多 60 次失败（约 30 s）后宣告失败 |
+
+> 业务启动需要超过 30 秒（罕见）时，可以走 `cubemastercli` 显式覆盖路径，或在后续向 CubeAPI 增加 `probeTimeoutMs` 字段。
+
+### 3.4 Build 状态机
+
+CubeAPI 在内存里维护一个 `BuildRegistry` 来跟踪每个 `(templateID, buildID)` 的生命周期（`services/builds.rs`）：
+
+```
+WaitingPush ─push manifest成功─► Building ─CubeMaster job终态─► Ready / Error
+```
+
+| 阶段 | 含义 |
+|---|---|
+| `WaitingPush` | template 已注册，registry 凭据已签发，等待客户端 docker push |
+| `Building` | manifest PUT 成功 / 触发 build 调用收到，CubeMaster 流水线运行中 |
+| `Ready` | 模板构建成功，可被沙箱使用 |
+| `Error` | 构建失败，`reason.message` 中包含 CubeMaster 的错误 |
+
+`BuildContext` 同时保留：原始 `CreateTemplateRequest`（重放用）、registry 凭据、CubeMaster `jobID`、append-only 日志缓冲（最多 10 000 行，溢出滚动）、SDK 期望的 V3 字段（`name` / `tags` / `cpuCount` / `memoryMB` / `aliases`）。
+
+CubeAPI 重启会丢失内存状态——这是一个有意识的取舍：build 流通常在数分钟内到达终态，启动失败的 build SDK 会自然重试。需要更强一致性时，可以把 `BuildRegistry` 后端切到持久化存储（trait 已留好抽象点）。
+
+### 3.5 ID 与超时规则
+
+#### `templateID`
+
+由 `name` 通过 UUIDv5（DNS 命名空间）派生，前缀 `tpl-`：
+
+```rust
+fn stable_template_id(name: &str) -> String {
+    let id = Uuid::new_v5(&Uuid::NAMESPACE_DNS, name.as_bytes());
+    format!("tpl-{}", &id.simple().to_string()[..16])
+}
+```
+
+- 同一个 `name` 永远映射到**同一个** `templateID`，与 e2b 的 "alias 也是主键" 语义一致；
+- 重复构建同名模板会复用 `templateID`，避免在控制面留下孤立模板。
+
+#### `buildID`
+
+每次 `POST /v3/templates` 现场分配：`bld-<uuid_v4_simple>`，无状态、不可猜测。
+
+#### 超时分级
+
+| 路由 | 超时 | 原因 |
+|---|---|---|
+| 默认（如 `/v3/templates` / `.../builds/{bid}/status`） | 30 s | 普通同步调用 |
+| 长路由（`POST /sandboxes/:id/snapshots`、`POST /sandboxes/:id/rollback`、`DELETE /templates/:id`） | 240 s | 同步调用 cubelet 的 LVM/快照清理 |
+| OCI Registry 反代（`/v2/*`） | 240 s | 大 layer blob PUT 可能数分钟 |
+
+在 `routes.rs` 中通过把不同子 router 用各自的 `TimeoutLayer` 包起来再 `Router::merge` 实现——`merge_preserves_per_router_timeout_layers` 单测专门覆盖了这个 invariant。
+
+---
+
+## 四、运维配置
+
+### 4.1 一键部署默认值
+
+`deploy/one-click/scripts/one-click/up.sh` 启动 CubeAPI 时已经默认带上：
+
+```bash
+--registry-upstream     http://127.0.0.1:5000   # 同机 distribution sidecar
+--registry-public-host  cube.app                # 对外 docker push 域名
+--registry-pull-host    127.0.0.1:5000          # CubeMaster 节点拉镜像地址
+--registry-repo-prefix  e2b                     # 镜像 namespace
+```
+
+意味着标准部署下 e2b CLI 的 `docker push` **开箱可用**。如果你用别的方式部署，请按下表把对应参数显式传给 `cube-api`。
+
+### 4.2 完整参数表
+
+| CLI 参数 | 环境变量 | 默认 | 含义 |
+|---|---|---|---|
+| `--registry-upstream URL` | `CUBE_API_REGISTRY_UPSTREAM` | *unset* | 上游 OCI Registry 的 URL；未设置时 `/v2/*` 返回 503，dockerfile 流被拒 |
+| `--registry-public-host HOST` | `CUBE_API_REGISTRY_PUBLIC_HOST` | 取请求 Host 头 | 对客户端公布的 docker push 主机名 |
+| `--registry-pull-host HOST` | `CUBE_API_REGISTRY_PULL_HOST` | upstream 的 host:port | CubeMaster 节点拉镜像用的内部地址 |
+| `--registry-repo-prefix PREFIX` | `CUBE_API_REGISTRY_REPO_PREFIX` | `e2b` | 推送镜像的 repo namespace |
+| `--registry-token TOKEN` | `CUBE_API_REGISTRY_TOKEN` | `_anon` | `POST /templates` 响应里 `registry.password` 字段 |
+| `--default-writable-layer-size SIZE` | `CUBE_API_DEFAULT_WRITABLE_LAYER_SIZE` | `1G` | 客户端没传 `writable_layer_size` 时的默认值（CubeMaster 强校验该字段） |
+| `--sandbox-domain DOMAIN` | `CUBE_API_SANDBOX_DOMAIN` | `cube.app` | 沙箱响应里 `domain` 字段 |
+| `--auth-callback-url URL` | `AUTH_CALLBACK_URL` | *unset* | 启用统一鉴权时回调 URL（详见[鉴权](../authentication.md)） |
+
+### 4.3 私有 / 受限 OCI Registry 接入
+
+最常见的场景是把 docker push 推到团队的私有 registry。三步配置：
+
+1. **部署一个支持 OCI Distribution v1 的 registry**（CNCF `distribution/distribution`、Harbor、AWS ECR、GCR 都行）；
+2. CubeAPI 端：`--registry-upstream` 指向该 registry，`--registry-public-host` 是用户 docker push 的目标主机名（通常你的 ingress 域名）；
+3. CubeMaster 端：确保 `--registry-pull-host` 指向 CubeMaster 节点能拉到镜像的内网地址（如果 registry 在另一台机器上，**不要**用 `127.0.0.1`）。
+
+如果 registry 自带 htpasswd / token server 鉴权，docker 客户端的 `Authorization` 头会被 CubeAPI 原样透传到上游——不需要在 CubeAPI 这一层做特殊处理。
+
+---
+
+## 五、最佳实践
+
+### 5.1 镜像准备
+
+**强约束**：任何用作 CubeSandbox 模板的镜像，启动后必须在 `:49983` 上有 envd 监听。两条最快的路径：
+
+| 路径 | 适合 | 操作 |
+|---|---|---|
+| **`FROM ghcr.io/tencentcloud/cubesandbox-base:2026.16`** | 全新业务镜像 | base 镜像已预装 envd + `cube-entrypoint.sh`，自动后台拉起 envd |
+| **`COPY --from=cubesandbox-base ...`** | 已有业务镜像（如 `e2bdev/code-interpreter`） | 把 envd 二进制和入口脚本注入现有镜像，再把 ENTRYPOINT 换成 `cube-entrypoint.sh` |
+
+详细的 Dockerfile 样板、`cube-entrypoint.sh` 契约、本地 smoke test 见 [自带镜像接入](./bring-your-own-image.md)。
+
+> ⚠️ **不要直接拿 `e2bdev/code-interpreter:latest` 制作模板**：它只有 e2b 上游的 init，没有 CubeSandbox 需要的 envd，模板创建时探针会以 `connection refused` 一路失败到超时。
+
+### 5.2 SDK 用法
+
+- **始终用 `set_start_cmd(cmd, wait_for_url(...))` 二参形式**，让构建期阻塞到业务 ready；
+- `wait_for_url` 的 URL 必须 `http(s)://<loopback>:<port>[/<path>]` 形式，host 必须是 `localhost` / `127.0.0.1` / `0.0.0.0` 之一；
+- `from_image(...)` 中的镜像引用必须是 **CubeMaster 节点能 pull 到** 的 registry；
+- `cpu_count` / `memory_mb` 是模板默认资源，可在 `Sandbox(...)` 调用时按需覆盖；
+- 看到 build log 中出现 `[dispatch-v3] readyCmd parsed → HttpGet probe on port=... path=...` 即代表桥接成功。
+
+### 5.3 沙箱使用
+
+- **不需要 `time.sleep`**：只要构建期 `wait_for_url` 真的等到 ready，沙箱第一次 `run_code` 直接可用；
+- 多次调用 `sbx.run_code(...)` 复用同一个沙箱比反复创建新沙箱开销小一个量级；
+- 用完显式 `sbx.kill()` 而不是依赖超时回收。
+
+---
+
+## 六、故障排查
+
+| 现象 | 根因 | 处理 |
+|---|---|---|
+| `BuildException: 404: b''` | CubeAPI 没有 V3 路由，多半是 v0.2.2 及更早版本 | 升级到 v0.2.3+ |
+| build 卡在 `PULLING_IMAGE` | CubeMaster 节点拉不到镜像 | 用集群可达的 registry；私有 registry 检查 `--registry-pull-host` |
+| build 日志出现 `readyCmd is recorded but not enforced` | URL 没被解析出来 | 检查 `wait_for_url` 是否写成 `http://localhost:<port>[/<path>]`，host 必须是 loopback，端口必须显式 |
+| build 日志出现 `readyCmd parsed`，但 build 仍超时失败 | 探针在跑、但业务真的没 ready | 在镜像里 `docker run` 后 `curl 127.0.0.1:<port>/<path>` 本地验证；确认 `cube-entrypoint.sh` 是 `exec` 业务而不是 fork-and-exit |
+| `Sandbox(template=...)` 后立即 `run_code` 报 502 | 业务还在启动中（探针没真正生效）| 先升级到 v0.2.3+；再确认 build 日志里有 `readyCmd parsed → HttpGet probe`；最后检查跨节点端口连通性，参见[CubeVS 网络模型](../../architecture/network.md) |
+| `run_code` 报 `404 not found` | sandbox 内 envd 没起来 | 镜像里没注入 envd 或 ENTRYPOINT 被覆盖；按 [自带镜像接入](./bring-your-own-image.md#_3-备选-往现有镜像里注入-envd) 处理 |
+| docker push 报 `503 registry_disabled` | CubeAPI 未配置 `--registry-upstream` | 按 [运维配置](#四运维配置) 启用 OCI Registry 反代 |
+| docker push 报 `request timeout` | layer blob 上传慢、超过 240 s 长超时 | 检查上游 registry 的存储后端 IO；或临时把 layer 切小（`--squash` / 多阶段构建） |
+
+更多模板共性问题见 [模板相关排障](../troubleshooting/templates.md)。
+
+---
+
+## 七、进一步阅读
+
+- [自带镜像接入 (envd)](./bring-your-own-image.md) — Dockerfile 模板、`cube-entrypoint.sh` 契约、本地 smoke test
+- [从 OCI 镜像制作模板](./template-from-image.md) — `cubemastercli` 路径下的 `--probe` / `--probe-path` 显式探针配置
+- [CubeVS 网络模型](../../architecture/network.md) — 跨节点端口转发原理
+- [模板相关排障](../troubleshooting/templates.md) — 模板构建常见故障
+- [鉴权](../authentication.md) — `unified_auth` 中间件与 API key 配置
diff --git a/docs/zh/guide/tutorials/template-from-image.md b/docs/zh/guide/tutorials/template-from-image.md
index f7b44a197..7bb9bfba4 100644
--- a/docs/zh/guide/tutorials/template-from-image.md
+++ b/docs/zh/guide/tutorials/template-from-image.md
@@ -224,3 +224,9 @@ template deleted: tpl-748094d2f2374b0a8a37e6ec
 | `status: FAILED`（BUILDING 阶段） | 构建错误（磁盘满、Dockerfile 问题等） | 执行 `tpl status --job-id <id> --json` 查看 `last_error` 字段 |
 | `distribution: 0/N ready`（状态已 READY） | artifact 分发仍在进行（短暂正常） | 等待后重新执行 `tpl info`；若长时间未恢复检查目标节点的 Cubelet 日志 |
 | 沙箱启动后就绪探针一直失败 | 容器内服务未在预期端口/路径监听，或服务尚未完全就绪时 HTTP server 已提前启动 | 确认 HTTP server 在应用完全就绪后再启动；检查 `--probe-path` 是否正确 |
+
+---
+
+## 延伸阅读
+
+上述所有 `--probe` / `--probe-path` 参数面向的是 `cubemastercli` 路径。如果你使用 [e2b](https://e2b.dev/) Python / JS SDK 制作模板（`Template().set_start_cmd(..., wait_for_url(...))`），**不需要手写探针参数** —— CubeAPI 会从 `wait_for_url(...)` 中自动解析出 `(port, path)` 并生成同样的 HttpGet 探针。详见主线教程：[使用 e2b SDK 创建模板](./template-from-e2b-sdk.md)。

From 8f8f6fa6a7a9f308448c16f480d64b90277b0ace Mon Sep 17 00:00:00 2001
From: joeyczheng <joeyczheng@tencent.com>
Date: Wed, 10 Jun 2026 17:48:57 +0800
Subject: [PATCH 2/2] fix(CubeAPI): harden e2b V3 template-build pipeline

Security:
  * Per-build short-lived push credentials (`bld_<random>` + 256-bit
    password) replace the global `_token` shared secret.
  * Registry reverse-proxy validates Basic auth against an in-memory
    credential index and enforces repo scoping; 401 with WWW-Authenticate
    + 403 on cross-build access. Per-credential rate limit added.
  * `mark_image_pushed` cross-checks the manifest repo, not just the tag.
  * Startup WARN when public bind meets unauthenticated loopback upstream.

Resource bounds:
  * `BuildRegistry` gains TTL + size-cap + background GC; in-flight builds
    are never evicted. New `build_registry_*` config knobs.
  * Registry proxy streams request/response bodies end-to-end instead of
    buffering, so concurrent multi-GiB pushes no longer pin the heap.
  * V3 build-pipeline routes moved to `with_auth_and_rate_limit`.

Correctness:
  * `image_pushed` flag is the single source of truth for "client really
    pushed"; OCI fallback no longer dispatches against an unpushed ref.
  * `fromTemplate` and `dockerfile`/`steps`-only builds are rejected
    with 501 until a real resolver/builder ships, instead of failing
    obscurely deeper in the pipeline.
  * `BuildLogLine.timestamp` is preserved across status polls.
  * Probe synthesis comments fixed: `readyCmd` -> `Probe.HttpGet`, not
    `Probe.Exec`.

Signed-off-by: joeyczheng <joeyczheng@tencent.com>
---
 CubeAPI/Cargo.lock                   |  16 +
 CubeAPI/Cargo.toml                   |   2 +-
 CubeAPI/src/config/mod.rs            |  67 ++
 CubeAPI/src/handlers/registry.rs     | 447 ++++++++++++-
 CubeAPI/src/handlers/templates_v3.rs |  20 +
 CubeAPI/src/middleware/rate_limit.rs | 109 +++-
 CubeAPI/src/models/mod.rs            |  29 +-
 CubeAPI/src/routes.rs                | 185 +++++-
 CubeAPI/src/services/builds.rs       | 479 +++++++++++++-
 CubeAPI/src/services/mod.rs          |   4 +-
 CubeAPI/src/services/templates.rs    | 911 +++++++++++++++++++++++++--
 CubeAPI/src/state.rs                 |  60 +-
 12 files changed, 2229 insertions(+), 100 deletions(-)

diff --git a/CubeAPI/Cargo.lock b/CubeAPI/Cargo.lock
index 4178201e6..dfc1586de 100644
--- a/CubeAPI/Cargo.lock
+++ b/CubeAPI/Cargo.lock
@@ -2161,6 +2161,7 @@ dependencies = [
  "base64 0.22.1",
  "bytes",
  "futures-core",
+ "futures-util",
  "http 1.4.0",
  "http-body",
  "http-body-util",
@@ -2180,12 +2181,14 @@ dependencies = [
  "sync_wrapper",
  "tokio",
  "tokio-rustls",
+ "tokio-util",
  "tower 0.5.3",
  "tower-http 0.6.11",
  "tower-service",
  "url",
  "wasm-bindgen",
  "wasm-bindgen-futures",
+ "wasm-streams",
  "web-sys",
  "webpki-roots 1.0.7",
 ]
@@ -3488,6 +3491,19 @@ dependencies = [
  "wasmparser",
 ]
 
+[[package]]
+name = "wasm-streams"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
+dependencies = [
+ "futures-util",
+ "js-sys",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+]
+
 [[package]]
 name = "wasmparser"
 version = "0.244.0"
diff --git a/CubeAPI/Cargo.toml b/CubeAPI/Cargo.toml
index d5e52f563..6191bd3ed 100644
--- a/CubeAPI/Cargo.toml
+++ b/CubeAPI/Cargo.toml
@@ -69,7 +69,7 @@ governor = { version = "0.6", features = ["dashmap"] }
 
 # ── HTTP client (orchestrator / backend calls) ────────────────────────────
 # connection pool built-in, rustls for TLS
-reqwest = { version = "0.12", features = ["json", "rustls-tls"], default-features = false }
+reqwest = { version = "0.12", features = ["json", "rustls-tls", "stream"], default-features = false }
 
 # ── Validation ────────────────────────────────────────────────────────────
 validator = { version = "0.16", features = ["derive"] }
diff --git a/CubeAPI/src/config/mod.rs b/CubeAPI/src/config/mod.rs
index 417573283..3cc7b57b8 100644
--- a/CubeAPI/src/config/mod.rs
+++ b/CubeAPI/src/config/mod.rs
@@ -75,6 +75,32 @@ pub struct ServerConfig {
     ///
     /// When unset, /v2/* returns 503 and `dockerfile`-based template requests
     /// are rejected with 501.
+    ///
+    /// ## Security contract — read this before exposing CubeAPI publicly
+    ///
+    /// CubeAPI itself enforces **per-build, short-lived push credentials**
+    /// on every `/v2/*` path other than the unauthenticated `GET /v2/` ping
+    /// (which is required by the docker / oci-distribution handshake). The
+    /// credential is minted at build-creation time, returned to the SDK in
+    /// the `registry` field of the build response, indexed inside the
+    /// in-memory `BuildRegistry`, and is repo-scoped: it can only push /
+    /// pull blobs and manifests under `<repo_prefix>/<templateID>`. It is
+    /// dropped when the build reaches its terminal stage (TTL- or
+    /// size-cap-evicted by `BuildRegistry`).
+    ///
+    /// **Strongly recommended** in addition: run an authenticated upstream
+    /// (e.g. `distribution/distribution` with htpasswd) and bind CubeAPI
+    /// itself behind TLS + an HTTP authenticator. Both layers together
+    /// match the depth of access control most operators expect from a
+    /// public OCI registry.
+    ///
+    /// **Not safe**: setting `registry_upstream` to an unauthenticated
+    /// upstream *and* binding CubeAPI on a public interface without TLS.
+    /// CubeAPI's own credential gate covers the bulk of the attack
+    /// surface, but it cannot stop a network attacker from observing the
+    /// per-build password in transit. CubeAPI logs a `WARN` at startup
+    /// when this combination is detected (see
+    /// `AppState::log_registry_security_posture`).
     #[serde(default)]
     pub registry_upstream: Option<String>,
 
@@ -109,12 +135,50 @@ pub struct ServerConfig {
     /// Env var: CUBE_API_DEFAULT_WRITABLE_LAYER_SIZE  |  Default: "1G".
     #[serde(default = "default_writable_layer_size")]
     pub default_writable_layer_size: String,
+
+    /// How long (seconds) a *terminal* build (Ready / Error) is kept in the
+    /// in-memory `BuildRegistry` after reaching its terminal stage. Past this
+    /// TTL the build context (create request, credentials, logs, …) is
+    /// evicted by the background GC.
+    ///
+    /// 0 disables TTL-based eviction (only the size cap will fire).
+    /// Default: 3600 (1 hour) — comfortably covers slow log pollers without
+    /// retaining old builds for the lifetime of the process.
+    #[serde(default = "default_build_registry_terminal_ttl_secs")]
+    pub build_registry_terminal_ttl_secs: u64,
+
+    /// Hard upper bound on the number of *logical* builds tracked in the
+    /// `BuildRegistry`. When exceeded, the oldest terminal builds are
+    /// evicted FIFO regardless of TTL. In-flight builds are never evicted by
+    /// this cap (a warning is logged if the cap can't be honoured because
+    /// every entry is still in-flight).
+    ///
+    /// 0 disables the cap (only TTL applies). Default: 5000.
+    #[serde(default = "default_build_registry_max_entries")]
+    pub build_registry_max_entries: usize,
+
+    /// Interval (seconds) at which the background GC task scans the
+    /// `BuildRegistry` for TTL-expired terminal builds. Default: 300 (5 min).
+    /// 0 disables the background task entirely (size-cap eviction at
+    /// `create()` time still applies).
+    #[serde(default = "default_build_registry_gc_interval_secs")]
+    pub build_registry_gc_interval_secs: u64,
 }
 
 fn default_registry_repo_prefix() -> String {
     "e2b".to_string()
 }
 
+fn default_build_registry_terminal_ttl_secs() -> u64 {
+    3600
+}
+fn default_build_registry_max_entries() -> usize {
+    5000
+}
+fn default_build_registry_gc_interval_secs() -> u64 {
+    300
+}
+
 fn default_writable_layer_size() -> String {
     std::env::var("CUBE_API_DEFAULT_WRITABLE_LAYER_SIZE").unwrap_or_else(|_| "1G".to_string())
 }
@@ -201,6 +265,9 @@ impl Default for ServerConfig {
             registry_pull_host: None,
             registry_token: None,
             default_writable_layer_size: default_writable_layer_size(),
+            build_registry_terminal_ttl_secs: default_build_registry_terminal_ttl_secs(),
+            build_registry_max_entries: default_build_registry_max_entries(),
+            build_registry_gc_interval_secs: default_build_registry_gc_interval_secs(),
         }
     }
 }
diff --git a/CubeAPI/src/handlers/registry.rs b/CubeAPI/src/handlers/registry.rs
index 2d751b795..743841833 100644
--- a/CubeAPI/src/handlers/registry.rs
+++ b/CubeAPI/src/handlers/registry.rs
@@ -2,17 +2,20 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-
 use axum::{
-    body::{Body, Bytes},
+    body::Body,
     extract::{Path, Request, State},
     http::{header, HeaderMap, HeaderName, HeaderValue, Method, StatusCode},
     response::Response,
 };
+use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _};
+use futures::TryStreamExt;
 use std::str::FromStr;
 
 use crate::{
     error::{AppError, AppResult},
+    models::ApiError,
+    services::builds::BuildContext,
     state::AppState,
 };
 
@@ -29,8 +32,19 @@ const HOP_BY_HOP: &[&str] = &[
     "host",
 ];
 
+/// Realm string echoed back in `WWW-Authenticate` challenges so docker /
+/// oci-distribution clients know to retry with `Authorization: Basic`.
+const REALM: &str = "cubeapi-registry";
+
 /// `GET /v2/` — registry ping. Always returns `200 OK` with the version header
 /// when an upstream is configured.
+///
+/// Note we deliberately do **not** require `Authorization` on the ping. The
+/// docker / oci-distribution v2 protocol uses an unauthenticated GET /v2/ as
+/// the discovery handshake — it's how the client learns the realm of the
+/// auth challenge in the first place. Requiring auth here would break every
+/// CLI client at the very first round-trip. The actual blob/manifest paths
+/// in `proxy()` *do* require credentials, so this is not a bypass.
 pub async fn ping(State(state): State<AppState>) -> AppResult<Response> {
     let upstream = state
         .config
@@ -39,10 +53,43 @@ pub async fn ping(State(state): State<AppState>) -> AppResult<Response> {
         .filter(|s| !s.is_empty())
         .ok_or_else(registry_disabled)?;
 
-    forward(&state, Method::GET, upstream, "/v2/", "", &HeaderMap::new(), Bytes::new()).await
+    forward(
+        &state,
+        Method::GET,
+        upstream,
+        "/v2/",
+        "",
+        &HeaderMap::new(),
+        None,
+    )
+    .await
 }
 
 /// `ANY /v2/*path` — generic reverse-proxy.
+///
+/// Both the request body (Docker/OCI blob PATCH/PUT can be GiB-sized) and the
+/// upstream response body (blob GET) are forwarded as streams; nothing is ever
+/// fully buffered in CubeAPI's heap. This keeps memory pressure bounded
+/// regardless of layer size or upload concurrency.
+///
+/// ## Defence in depth
+///
+/// Before any upstream forwarding happens, we enforce **two CubeAPI-layer
+/// access controls** that do *not* rely on the upstream registry having its
+/// own auth configured:
+///
+///   1. **Per-build credential validation** — the inbound `Authorization:
+///      Basic` header must decode to a `(username, password)` pair that we
+///      ourselves issued via `mint_registry_credential` and is still
+///      attached to a *live* build. Missing / malformed / unknown / wrong
+///      password → `401 Unauthorized` with a `WWW-Authenticate: Basic`
+///      challenge so the docker client retries the standard way.
+///   2. **Repo scoping** — once the credential resolves to a `BuildContext`,
+///      we require the request's `<repo>` segment (everything between
+///      `/v2/` and the next protocol verb) to match the repo embedded in
+///      that build's `image_ref`. So even a holder of a valid build A
+///      credential cannot push, pull or fingerprint blobs/manifests under
+///      build B's repository — the request is rejected with `403 Forbidden`.
 pub async fn proxy(
     State(state): State<AppState>,
     Path(path): Path<String>,
@@ -59,33 +106,95 @@ pub async fn proxy(
     let method = request.method().clone();
     let query = request.uri().query().unwrap_or("").to_string();
     let headers = request.headers().clone();
-    let body = match axum::body::to_bytes(request.into_body(), 512 * 1024 * 1024).await {
-        Ok(b) => b,
-        Err(e) => {
-            return Err(AppError::BadRequest(format!(
-                "failed to read /v2/* request body: {}",
-                e
-            )))
+    let normalized = normalize_subpath(&path);
+
+    let ctx = match resolve_build_credential(&state, &headers) {
+        CredentialOutcome::Authenticated(ctx) => ctx,
+        CredentialOutcome::Missing => {
+            tracing::debug!(path = %normalized, "registry request without Authorization");
+            return Ok(challenge_response(
+                StatusCode::UNAUTHORIZED,
+                "authentication required",
+            ));
+        }
+        CredentialOutcome::Malformed => {
+            tracing::debug!(path = %normalized, "registry request with malformed Authorization");
+            return Ok(challenge_response(
+                StatusCode::UNAUTHORIZED,
+                "malformed Authorization header",
+            ));
+        }
+        CredentialOutcome::Rejected => {
+            tracing::warn!(
+                path = %normalized,
+                "registry request with unknown or invalid build credential"
+            );
+            return Ok(challenge_response(
+                StatusCode::UNAUTHORIZED,
+                "invalid build credential",
+            ));
         }
     };
 
-    let normalized = normalize_subpath(&path);
-    let response = forward(&state, method.clone(), &upstream, &normalized, &query, &headers, body)
-        .await?;
+    if let Some(repo) = parse_repo(&normalized) {
+        if !repo_allowed(&ctx, repo) {
+            tracing::warn!(
+                build_id = %ctx.build_id,
+                requested_repo = %repo,
+                expected_image_ref = %ctx.image_ref,
+                "registry credential used against unauthorised repository"
+            );
+            return Ok(forbidden_response(
+                "credential is scoped to a different repository",
+            ));
+        }
+    }
+    else if normalized != "/v2/" {
+        tracing::warn!(
+            build_id = %ctx.build_id,
+            path = %normalized,
+            "registry credential used against non-repository endpoint"
+        );
+        return Ok(forbidden_response(
+            "credential is not authorised for this endpoint",
+        ));
+    }
+
+    let body_stream = request
+        .into_body()
+        .into_data_stream()
+        .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e));
+    let upstream_body = reqwest::Body::wrap_stream(body_stream);
+
+    let response = forward(
+        &state,
+        method.clone(),
+        &upstream,
+        &normalized,
+        &query,
+        &headers,
+        Some(upstream_body),
+    )
+    .await?;
 
     // After a successful manifest PUT we mark the build as image-pushed so
-    // that the orchestrator stage proceeds.
+    // that the orchestrator stage proceeds. We only need the status — the
+    // manifest body itself is being streamed back to the client untouched.
     if method == Method::PUT && response.status().is_success() {
         if let Some(parsed) = parse_manifest_path(&normalized) {
             // tag carries either the buildID (preferred) or a digest. Pull the
             // build context by tag first, then fall back to no-op.
             if !parsed.tag.starts_with("sha256:") {
                 tracing::info!(
+                    build_id = %ctx.build_id,
                     repo = %parsed.repo,
                     tag = %parsed.tag,
                     "manifest pushed; marking build as image-pushed"
                 );
-                state.services.templates.mark_image_pushed(&parsed.tag);
+                state
+                    .services
+                    .templates
+                    .mark_image_pushed(&parsed.tag, &parsed.repo);
             }
         }
     }
@@ -100,7 +209,7 @@ async fn forward(
     path: &str,
     query: &str,
     in_headers: &HeaderMap,
-    body: Bytes,
+    body: Option<reqwest::Body>,
 ) -> AppResult<Response> {
     let upstream = upstream.trim_end_matches('/');
     let path = if path.starts_with('/') {
@@ -124,8 +233,8 @@ async fn forward(
         req = req.header(name.clone(), value.clone());
     }
 
-    if !body.is_empty() {
-        req = req.body(body.to_vec());
+    if let Some(body) = body {
+        req = req.body(body);
     }
 
     let upstream_resp = req.send().await.map_err(|e| {
@@ -137,7 +246,7 @@ async fn forward(
     let mut headers = HeaderMap::new();
     for (name, value) in upstream_resp.headers() {
         let key = name.as_str().to_ascii_lowercase();
-        if HOP_BY_HOP.contains(&key.as_str()) || key == "content-length" {
+        if HOP_BY_HOP.contains(&key.as_str()) {
             continue;
         }
         if let (Ok(name), Ok(value)) = (
@@ -148,14 +257,14 @@ async fn forward(
         }
     }
 
-    let body_bytes = upstream_resp
-        .bytes()
-        .await
-        .map_err(|e| AppError::Internal(anyhow::anyhow!("registry response read failed: {}", e)))?;
+    let resp_stream = upstream_resp
+        .bytes_stream()
+        .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e));
+    let resp_body = Body::from_stream(resp_stream);
 
     let mut response = Response::builder()
         .status(StatusCode::from_u16(status.as_u16()).unwrap_or(StatusCode::BAD_GATEWAY))
-        .body(Body::from(body_bytes))
+        .body(resp_body)
         .map_err(|e| AppError::Internal(anyhow::anyhow!("response build failed: {}", e)))?;
 
     *response.headers_mut() = headers;
@@ -214,9 +323,164 @@ impl ManifestPath {
     }
 }
 
+enum CredentialOutcome {
+    /// Header present, base64-decoded `user:pass` matches a live build
+    /// whose stored password equals the presented one.
+    Authenticated(BuildContext),
+    /// No `Authorization` header at all. Triggers the standard
+    /// `WWW-Authenticate: Basic` challenge.
+    Missing,
+    /// Header present but not a valid `Basic <b64(user:pass)>` envelope
+    /// (wrong scheme, bad base64, no colon, …).
+    Malformed,
+    /// Header is well-formed but the username is unknown, the build has
+    /// already been evicted, or the password does not match.
+    ///
+    /// Note: we deliberately do not distinguish "unknown user" from "bad
+    /// password" in the response, to avoid an enumeration oracle. The
+    /// internal log lines do record the difference for ops debugging.
+    Rejected,
+}
+
+fn resolve_build_credential(state: &AppState, headers: &HeaderMap) -> CredentialOutcome {
+    let Some(raw) = headers.get(header::AUTHORIZATION) else {
+        return CredentialOutcome::Missing;
+    };
+    let Ok(value) = raw.to_str() else {
+        return CredentialOutcome::Malformed;
+    };
+    let Some(b64) = value
+        .strip_prefix("Basic ")
+        .or_else(|| value.strip_prefix("basic "))
+    else {
+        return CredentialOutcome::Malformed;
+    };
+    let Ok(decoded) = BASE64.decode(b64.trim()) else {
+        return CredentialOutcome::Malformed;
+    };
+    let Ok(decoded_str) = std::str::from_utf8(&decoded) else {
+        return CredentialOutcome::Malformed;
+    };
+    let Some((user, pass)) = decoded_str.split_once(':') else {
+        return CredentialOutcome::Malformed;
+    };
+
+    let Some(ctx) = state.services.builds.find_by_registry_username(user) else {
+        return CredentialOutcome::Rejected;
+    };
+
+    if !constant_time_eq_strings(pass, &ctx.credential.password) {
+        return CredentialOutcome::Rejected;
+    }
+    CredentialOutcome::Authenticated(ctx)
+}
+
+fn constant_time_eq_strings(a: &str, b: &str) -> bool {
+    if a.is_empty() || b.is_empty() {
+        return false;
+    }
+    if a.len() != b.len() {
+        // Still walk the longer slice to keep the timing roughly stable.
+        let longer = if a.len() > b.len() { a } else { b };
+        let mut diff = 0u8;
+        for byte in longer.as_bytes() {
+            diff |= byte ^ 0;
+        }
+        let _ = diff;
+        return false;
+    }
+    let mut diff = 0u8;
+    for (x, y) in a.as_bytes().iter().zip(b.as_bytes()) {
+        diff |= x ^ y;
+    }
+    diff == 0
+}
+
+/// Extract the `<repo>` segment from any well-formed v2 distribution path
+/// (`/v2/<repo>/{blobs,manifests,tags,referrers}/...`). Returns `None` for
+/// the bare ping (`/v2/`), for catalog endpoints, and for paths that don't
+/// match the v2 layout at all.
+fn parse_repo(path: &str) -> Option<&str> {
+    let stripped = path.strip_prefix("/v2/")?;
+    if stripped.is_empty() {
+        return None;
+    }
+    if stripped.starts_with('_') {
+        return None;
+    }
+    for verb in ["/manifests/", "/blobs/", "/tags/", "/referrers/"] {
+        if let Some(idx) = stripped.rfind(verb) {
+            if idx == 0 {
+                return None;
+            }
+            return Some(&stripped[..idx]);
+        }
+    }
+    None
+}
+
+fn repo_allowed(ctx: &BuildContext, repo: &str) -> bool {
+    let Some(expected) = image_ref_repo(&ctx.image_ref) else {
+        return false;
+    };
+    expected == repo
+}
+
+fn image_ref_repo(image_ref: &str) -> Option<String> {
+    let without_tag = image_ref.rsplit_once(':').map(|(l, _)| l).unwrap_or(image_ref);
+    // Drop everything up to and including the first `/`, which is the host.
+    let (_, repo) = without_tag.split_once('/')?;
+    if repo.is_empty() {
+        return None;
+    }
+    Some(repo.to_string())
+}
+
+fn challenge_response(status: StatusCode, message: &str) -> Response {
+    let body = serde_json::to_vec(&ApiError::new(status.as_u16() as i32, message.to_string()))
+        .unwrap_or_default();
+    let mut resp = Response::builder()
+        .status(status)
+        .body(Body::from(body))
+        .expect("static challenge response is always well-formed");
+    resp.headers_mut().insert(
+        header::CONTENT_TYPE,
+        HeaderValue::from_static("application/json"),
+    );
+    resp.headers_mut().insert(
+        header::WWW_AUTHENTICATE,
+        HeaderValue::from_str(&format!("Basic realm=\"{}\"", REALM))
+            .expect("REALM is ASCII"),
+    );
+    resp.headers_mut().insert(
+        HeaderName::from_static("docker-distribution-api-version"),
+        HeaderValue::from_static("registry/2.0"),
+    );
+    resp
+}
+
+fn forbidden_response(message: &str) -> Response {
+    let body = serde_json::to_vec(&ApiError::new(403, message.to_string())).unwrap_or_default();
+    let mut resp = Response::builder()
+        .status(StatusCode::FORBIDDEN)
+        .body(Body::from(body))
+        .expect("static forbidden response is always well-formed");
+    resp.headers_mut().insert(
+        header::CONTENT_TYPE,
+        HeaderValue::from_static("application/json"),
+    );
+    resp.headers_mut().insert(
+        HeaderName::from_static("docker-distribution-api-version"),
+        HeaderValue::from_static("registry/2.0"),
+    );
+    resp
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::models::CreateTemplateRequest;
+    use crate::services::builds::{BuildRegistry, EvictionPolicy};
 
     #[test]
     fn parse_manifest_path_accepts_namespaced_repo() {
@@ -237,4 +501,139 @@ mod tests {
         assert_eq!(normalize_subpath("/foo/bar"), "/v2/foo/bar");
         assert_eq!(normalize_subpath("/v2/foo/bar"), "/v2/foo/bar");
     }
+
+    // ── repo / image_ref helpers ─────────────────────────────────────
+
+    #[test]
+    fn parse_repo_extracts_namespaced_repo_from_each_verb() {
+        for path in [
+            "/v2/e2b/tpl-abc/manifests/bld-001",
+            "/v2/e2b/tpl-abc/blobs/sha256:abc",
+            "/v2/e2b/tpl-abc/blobs/uploads/uuid-123",
+            "/v2/e2b/tpl-abc/tags/list",
+            "/v2/e2b/tpl-abc/referrers/sha256:abc",
+        ] {
+            assert_eq!(
+                parse_repo(path),
+                Some("e2b/tpl-abc"),
+                "parse_repo failed for {}", path
+            );
+        }
+    }
+
+    #[test]
+    fn parse_repo_rejects_non_repo_endpoints() {
+        assert_eq!(parse_repo("/v2/"), None);
+        assert_eq!(parse_repo("/v2/_catalog"), None);
+        assert_eq!(parse_repo("/v2/manifests/foo"), None);
+        assert_eq!(parse_repo("foo/bar"), None);
+    }
+
+    #[test]
+    fn image_ref_repo_strips_host_and_tag() {
+        assert_eq!(
+            image_ref_repo("127.0.0.1:5000/e2b/tpl-abc:bld-deadbeef").as_deref(),
+            Some("e2b/tpl-abc")
+        );
+        assert_eq!(
+            image_ref_repo("registry.example.com/e2b/tpl-abc").as_deref(),
+            Some("e2b/tpl-abc")
+        );
+    }
+
+    #[test]
+    fn repo_allowed_rejects_prefix_collisions() {
+        let mut ctx = sample_context();
+        ctx.image_ref = "127.0.0.1:5000/e2b/tpl-abc:bld-001".to_string();
+        assert!(repo_allowed(&ctx, "e2b/tpl-abc"));
+        assert!(!repo_allowed(&ctx, "e2b/tpl-abc-evil"));
+        assert!(!repo_allowed(&ctx, "evil/tpl-abc"));
+    }
+
+    #[test]
+    fn constant_time_eq_strings_basic_correctness() {
+        assert!(constant_time_eq_strings("abc", "abc"));
+        assert!(!constant_time_eq_strings("abc", "abd"));
+        assert!(!constant_time_eq_strings("abc", "abcd"));
+        assert!(!constant_time_eq_strings("", ""));
+        assert!(!constant_time_eq_strings("", "abc"));
+        assert!(!constant_time_eq_strings("abc", ""));
+    }
+
+    // ── credential resolution against an in-memory BuildRegistry ─────
+
+    fn sample_request() -> CreateTemplateRequest {
+        CreateTemplateRequest {
+            template_id: String::new(),
+            instance_type: None,
+            alias: None,
+            team_id: None,
+            image: None,
+            dockerfile: None,
+            writable_layer_size: None,
+            exposed_ports: None,
+            probe_port: None,
+            probe_path: None,
+            cpu: None,
+            memory: None,
+            cpu_count: None,
+            memory_mb: None,
+            env: None,
+            env_vars: None,
+            allow_internet_access: None,
+            network_type: None,
+            nodes: None,
+            registry_username: None,
+            registry_password: None,
+            command: None,
+            args: None,
+            dns: None,
+            allow_out: None,
+            deny_out: None,
+            start_cmd: None,
+            ready_cmd: None,
+        }
+    }
+
+    fn sample_context() -> BuildContext {
+        let reg = BuildRegistry::with_policy(EvictionPolicy::unbounded());
+        let cred = crate::models::RegistryCredential {
+            url: "http://127.0.0.1:5000".to_string(),
+            repository: "e2b/tpl-abc".to_string(),
+            username: "bld_test_user".to_string(),
+            password: "bld_test_pass_secret".to_string(),
+        };
+        reg.create(
+            "tpl-abc".to_string(),
+            sample_request(),
+            cred,
+            "127.0.0.1:5000/e2b/tpl-abc:bld".to_string(),
+        )
+    }
+
+    #[test]
+    fn build_registry_indexes_credential_username() {
+        let reg = BuildRegistry::with_policy(EvictionPolicy::unbounded());
+        let cred = crate::models::RegistryCredential {
+            url: "http://127.0.0.1:5000".to_string(),
+            repository: "e2b/tpl-x".to_string(),
+            username: "bld_unique_user".to_string(),
+            password: "secret".to_string(),
+        };
+        let ctx = reg.create(
+            "tpl-x".to_string(),
+            sample_request(),
+            cred,
+            "127.0.0.1:5000/e2b/tpl-x:bld".to_string(),
+        );
+        let resolved = reg.find_by_registry_username("bld_unique_user").unwrap();
+        assert_eq!(resolved.build_id, ctx.build_id);
+        assert!(reg.find_by_registry_username("bld_other_user").is_none());
+    }
+
+    #[test]
+    fn parse_manifest_tag_uses_build_id_after_credential_check() {
+        let m = parse_manifest_path("/v2/e2b/tpl-abc/manifests/bld-deadbeef").unwrap();
+        assert_eq!(m.tag, "bld-deadbeef");
+    }
 }
diff --git a/CubeAPI/src/handlers/templates_v3.rs b/CubeAPI/src/handlers/templates_v3.rs
index 4319765b8..21bc94738 100644
--- a/CubeAPI/src/handlers/templates_v3.rs
+++ b/CubeAPI/src/handlers/templates_v3.rs
@@ -30,6 +30,26 @@ pub async fn v3_create_template(
 /// SDK before uploading build context tarballs. We always answer
 /// `present=true` because the current CubeMaster pipeline only consumes
 /// `from_image` references (no Dockerfile-from-context build yet).
+///
+/// ### Why `201 Created` on a successful GET?
+///
+/// ref: https://github.com/e2b-dev/infra/blob/db88eee0fd5df4a5c90e544faa5c7b44c6719b51/packages/api/internal/handlers/template_layer_files_upload.go#L71
+/// This is intentional and matches the upstream E2B Infra contract: the same
+/// endpoint is overloaded as both a *cache probe* and an *upload-slot
+/// allocator*. On cache miss the server returns `201 Created` together with
+/// a freshly minted presigned upload URL; on cache hit it returns the same
+/// `201` without a URL so the SDK can branch purely on the `present` flag
+/// without also having to discriminate by status code. Several E2B SDK
+/// versions hard-code this: anything other than `2xx` is treated as a
+/// fatal error, and at least the JS SDK additionally asserts on `201` for
+/// the upload-allocator branch.
+///
+/// Switching to `200 OK` here would be more REST-correct, but it would
+/// silently break SDK clients in the wild that still do
+/// `if (status !== 201) throw ...`. Until we either own all client paths
+/// or upstream relaxes the contract, we stick with `201` and pin it via
+/// the `v3_template_build_routes_are_reachable` route test in
+/// `routes.rs` so it can't drift unnoticed.
 pub async fn v3_get_files_hash(
     State(state): State<AppState>,
     Path((template_id, hash)): Path<(String, String)>,
diff --git a/CubeAPI/src/middleware/rate_limit.rs b/CubeAPI/src/middleware/rate_limit.rs
index f399d9d86..cde895d0e 100644
--- a/CubeAPI/src/middleware/rate_limit.rs
+++ b/CubeAPI/src/middleware/rate_limit.rs
@@ -5,10 +5,13 @@
 use crate::error::AppError;
 use crate::state::AppState;
 use axum::{
-    extract::{Request, State},
+    extract::{ConnectInfo, Request, State},
+    http::header,
     middleware::Next,
     response::Response,
 };
+use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _};
+use std::net::SocketAddr;
 
 /// Per-API-key token bucket rate limiter middleware.
 /// Reads the X-API-Key header and checks the shared governor limiter.
@@ -33,3 +36,107 @@ pub async fn rate_limit(
         )),
     }
 }
+
+/// Rate-limit middleware specialised for the OCI registry reverse-proxy.
+///
+/// Docker / oci-distribution clients do **not** send `X-API-Key`; they
+/// authenticate with `Authorization: Basic <b64(user:pass)>` instead. The
+/// generic `rate_limit` middleware would therefore collapse every docker
+/// client onto the single "anonymous" bucket, which is unusable: a
+/// runaway client could lock every other operator out of pushing layers.
+///
+/// We pick a key in this priority order:
+///
+///   1. `Authorization: Basic` username (i.e. the per-build `bld_<…>`
+///      token we minted in `mint_registry_credential`). One bucket per
+///      build is the natural granularity — a misbehaving build
+///      doesn't impact others.
+///   2. Peer socket address (`ConnectInfo`). Catches the unauthenticated
+///      `GET /v2/` ping flood and any other anonymous traffic.
+///   3. The literal string `\"reg:anonymous\"` as the absolute fallback,
+///      should `ConnectInfo` somehow be missing.
+///
+/// All keys are prefixed with `reg:` so they live in a disjoint key space
+/// from the sandbox API's `X-API-Key` buckets — a sandbox abuser cannot
+/// starve the registry path and vice versa, even though both share the
+/// same governor instance and quota.
+pub async fn registry_rate_limit(
+    State(state): State<AppState>,
+    request: Request,
+    next: Next,
+) -> Result<Response, AppError> {
+    let key = registry_key_for(&request);
+
+    match state.rate_limiter.check_key(&key) {
+        Ok(_) => Ok(next.run(request).await),
+        Err(_) => Err(AppError::TooManyRequests(
+            "Registry rate limit exceeded for this credential. Slow down.".to_string(),
+        )),
+    }
+}
+
+fn registry_key_for(request: &Request) -> String {
+    if let Some(user) = basic_auth_username(request) {
+        return format!("reg:user:{}", user);
+    }
+    if let Some(ConnectInfo(addr)) = request.extensions().get::<ConnectInfo<SocketAddr>>() {
+        return format!("reg:ip:{}", addr.ip());
+    }
+    "reg:anonymous".to_string()
+}
+
+fn basic_auth_username(request: &Request) -> Option<String> {
+    let raw = request.headers().get(header::AUTHORIZATION)?.to_str().ok()?;
+    let b64 = raw
+        .strip_prefix("Basic ")
+        .or_else(|| raw.strip_prefix("basic "))?;
+    let decoded = BASE64.decode(b64.trim()).ok()?;
+    let s = std::str::from_utf8(&decoded).ok()?;
+    let (user, _pass) = s.split_once(':')?;
+    if user.is_empty() {
+        return None;
+    }
+    Some(user.to_string())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use axum::body::Body;
+    use axum::http::{HeaderValue, Request as HttpRequest};
+
+    fn req_with_auth(value: Option<&str>) -> Request {
+        let mut builder = HttpRequest::builder().uri("/v2/foo/blobs/sha256:abc");
+        if let Some(v) = value {
+            builder = builder.header(header::AUTHORIZATION, HeaderValue::from_str(v).unwrap());
+        }
+        builder.body(Body::empty()).unwrap()
+    }
+
+    #[test]
+    fn registry_key_uses_basic_username_when_present() {
+        let r = req_with_auth(Some("Basic YmxkX3VzZXI6c2VjcmV0"));
+        assert_eq!(registry_key_for(&r), "reg:user:bld_user");
+    }
+
+    #[test]
+    fn registry_key_falls_back_to_anonymous_without_connect_info() {
+        let r = req_with_auth(None);
+        assert_eq!(registry_key_for(&r), "reg:anonymous");
+    }
+
+    #[test]
+    fn registry_key_ignores_malformed_authorization() {
+        let r = req_with_auth(Some("Bearer some-token"));
+        assert_eq!(registry_key_for(&r), "reg:anonymous");
+
+        let r = req_with_auth(Some("Basic !!!not-base64!!!"));
+        assert_eq!(registry_key_for(&r), "reg:anonymous");
+
+        let r = req_with_auth(Some("Basic bm9jb2xvbg=="));
+        assert_eq!(registry_key_for(&r), "reg:anonymous");
+
+        let r = req_with_auth(Some("Basic OnB3"));
+        assert_eq!(registry_key_for(&r), "reg:anonymous");
+    }
+}
diff --git a/CubeAPI/src/models/mod.rs b/CubeAPI/src/models/mod.rs
index bf3bef242..acfc992b5 100644
--- a/CubeAPI/src/models/mod.rs
+++ b/CubeAPI/src/models/mod.rs
@@ -591,7 +591,17 @@ pub struct CreateTemplateRequest {
     pub start_cmd: Option<String>,
 
     /// E2B-style `readyCmd`: shell command used as readiness probe.
-    /// Translated into a CubeMaster `Probe.Exec` when `probe_port` is empty.
+    ///
+    /// **Not** forwarded to the container as a shell command — neither
+    /// CubeMaster nor Cubelet support `Probe.Exec`, so we cannot run an
+    /// arbitrary shell snippet as a readiness check end-to-end. Instead
+    /// `services/templates.rs::v3_trigger_build` performs a best-effort
+    /// parse of an `http(s)://<host>:<port>[/<path>]` URL embedded in the
+    /// snippet (the shape produced by the e2b SDK's `wait_for_url(...)`)
+    /// and synthesises a CubeMaster `Probe.HttpGet` from it. If no URL
+    /// can be extracted (or no `probe_port` is supplied alongside), the
+    /// `readyCmd` is recorded in the build log only and **no probe is
+    /// emitted** — Cubelet treats that as "no readiness check".
     #[serde(rename = "readyCmd", alias = "ready_cmd", default)]
     pub ready_cmd: Option<String>,
 }
@@ -905,9 +915,24 @@ pub struct V2TemplateBuildStart {
     #[serde(rename = "fromImageRegistry", default)]
     pub from_image_registry: Option<serde_json::Value>,
     /// Reuse another already-built CubeSandbox template as the base.
+    ///
+    /// **Currently rejected with `501 Not Implemented`** by
+    /// `services/templates.rs::v3_trigger_build`: the downstream stack
+    /// (CubeMaster `template_image.go` → `docker pull`) has no resolver
+    /// for the `cube://<templateID>` source scheme, so honouring this
+    /// field at the API layer would only push the failure into the build
+    /// worker as an opaque `invalid reference format`. Resolve the parent
+    /// template to a concrete OCI reference and pass it via `fromImage`
+    /// instead, until the resolver lands end-to-end.
     #[serde(rename = "fromTemplate", default)]
     pub from_template: Option<String>,
-    /// E2B `readyCmd` — translated into CubeMaster `Probe.Exec`.
+    /// E2B `readyCmd` — best-effort translated into a CubeMaster
+    /// `Probe.HttpGet` by extracting the `http(s)://host:port[/path]`
+    /// URL embedded in the snippet (CubeMaster/Cubelet don't support
+    /// `Probe.Exec`, so the shell snippet itself is *not* run). When no
+    /// URL can be parsed and the V3 body doesn't carry `probePort` /
+    /// `exposedPorts`, no probe is emitted — see
+    /// `services/templates.rs::parse_ready_url` and `build_probe`.
     #[serde(rename = "readyCmd", default)]
     pub ready_cmd: Option<String>,
     /// E2B `startCmd` — translated into container `args`.
diff --git a/CubeAPI/src/routes.rs b/CubeAPI/src/routes.rs
index 7402fc5f9..9b6c997ec 100644
--- a/CubeAPI/src/routes.rs
+++ b/CubeAPI/src/routes.rs
@@ -22,7 +22,10 @@ use crate::{
         agenthub, cluster, config, health, registry, sandboxes, snapshots, store, templates,
         templates_v3,
     },
-    middleware::{auth::unified_auth, rate_limit::rate_limit},
+    middleware::{
+        auth::unified_auth,
+        rate_limit::{rate_limit, registry_rate_limit},
+    },
     state::AppState,
 };
 
@@ -175,9 +178,23 @@ fn build_template_routes(state: &AppState, auth_configured: bool) -> Router<AppS
     // `snapshot_long_router` (240 s).  Keeping it here would re-introduce the
     // 30 s "the API gave up but the master is still deleting" race we just
     // closed off when we promoted the master path to a synchronous contract.
-    let routes = Router::new()
-        .route("/templates", get(templates::list_templates))
-        .route("/templates", post(templates::create_template))
+    //
+    // We split this into two sub-routers because the *build pipeline*
+    // routes (V3 create + cache probe + trigger + status, plus the legacy
+    // V2 build trigger) each allocate non-trivial backend state — an
+    // in-memory `BuildContext` (request snapshot, registry credentials,
+    // log buffer, terminal-eviction bookkeeping), and on the trigger
+    // path an actual CubeMaster build pipeline. Auth alone does not
+    // bound that cost: a single authenticated client can fan out
+    // arbitrarily many build allocations per second. We therefore put
+    // these specific routes behind `with_auth_and_rate_limit`, sharing
+    // the same per-API-key 100 req/s governor that already protects
+    // the sandbox surface. Plain template management (list / get /
+    // create / rebuild / update / read-only logs) stays on `with_auth`
+    // because it forwards directly to CubeMaster's CRUD layer, which
+    // CubeMaster itself rate-limits, and these endpoints do not
+    // allocate registry state.
+    let build_pipeline_routes = Router::new()
         // ── E2B V3 protocol (real SDK contract) ───────────────
         // SDK calls these in order: POST /v3/templates → GET files/{hash}
         // → POST /v2/.../builds/{bid} → GET .../status. Routes are mounted
@@ -192,23 +209,39 @@ fn build_template_routes(state: &AppState, auth_configured: bool) -> Router<AppS
             "/v2/templates/:templateID/builds/:buildID",
             post(templates_v3::v2_trigger_build),
         )
-        .route("/templates/:templateID", get(templates::get_template))
-        .route("/templates/:templateID", post(templates::rebuild_template))
-        .route("/templates/:templateID", patch(templates::update_template))
+        // Legacy v2 build trigger — same backend cost as the v3 trigger
+        // (CubeMaster `create_template_from_image` pipeline), so it
+        // belongs on the rate-limited lane too.
         .route(
             "/templates/:templateID/builds/:buildID",
             post(templates::start_template_build),
         )
+        // Status polling: the SDK polls this in a tight loop; on every
+        // call the service synchronously hits CubeMaster
+        // (`get_template_build_status`) and writes into the build
+        // registry's log buffer, so it is not free.
         .route(
             "/templates/:templateID/builds/:buildID/status",
             get(templates_v3::v3_get_build_status),
-        )
+        );
+    let build_pipeline_routes =
+        with_auth_and_rate_limit(build_pipeline_routes, state, auth_configured);
+
+    let management_routes = Router::new()
+        .route("/templates", get(templates::list_templates))
+        .route("/templates", post(templates::create_template))
+        .route("/templates/:templateID", get(templates::get_template))
+        .route("/templates/:templateID", post(templates::rebuild_template))
+        .route("/templates/:templateID", patch(templates::update_template))
         .route(
             "/templates/:templateID/builds/:buildID/logs",
             get(templates::get_template_build_logs),
         );
+    let management_routes = with_auth(management_routes, state, auth_configured);
 
-    with_auth(routes, state, auth_configured)
+    Router::new()
+        .merge(build_pipeline_routes)
+        .merge(management_routes)
 }
 
 /// Template/snapshot deletion lives on the long (240 s) router because it is
@@ -318,12 +351,26 @@ fn build_agenthub_routes(state: &AppState, auth_configured: bool) -> Router<AppS
     with_auth(routes, state, auth_configured)
 }
 
-fn build_registry_router(_state: &AppState) -> Router<AppState> {
+fn build_registry_router(state: &AppState) -> Router<AppState> {
     use axum::routing::{any, get};
+    // Registry routes deliberately do NOT go through `unified_auth`. The OCI
+    // distribution v2 protocol uses a dedicated credential domain (Basic auth
+    // against the per-build push token we minted in `mint_registry_credential`),
+    // and that validation lives inside `registry::proxy` itself — it must run
+    // *after* the docker client's two-step `GET /v2/` → `WWW-Authenticate` →
+    // retry-with-Basic handshake, which `unified_auth` would short-circuit.
+    //
+    // The reverse-proxy is, however, attached to a per-build /
+    // per-source-IP rate-limit bucket so a single misbehaving CLI cannot
+    // saturate the upstream. See `registry_rate_limit` for the keying rules.
     Router::new()
         .route("/v2/", get(registry::ping))
         .route("/v2", get(registry::ping))
         .route("/v2/*path", any(registry::proxy))
+        .layer(middleware::from_fn_with_state(
+            state.clone(),
+            registry_rate_limit,
+        ))
 }
 
 fn with_auth(
@@ -594,4 +641,122 @@ mod tests {
         let fb: serde_json::Value = r.json();
         assert_eq!(fb["present"].as_bool(), Some(true));
     }
+
+    /// Spin up a minimal in-process auth callback that always returns 200,
+    /// returning the absolute URL (`http://127.0.0.1:<port>/`) plus a
+    /// JoinHandle for the tokio task running it. Used by the rate-limit
+    /// regression tests below: `with_auth_and_rate_limit` only attaches
+    /// the rate-limiter when `auth_callback_url` is configured, so we
+    /// need a real, reachable callback to exercise the production layer
+    /// stack.
+    async fn spawn_always_200_auth_server() -> (String, tokio::task::JoinHandle<()>) {
+        use axum::routing::post;
+        let app = axum::Router::new().route("/", post(|| async { axum::http::StatusCode::OK }));
+        let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
+            .await
+            .expect("bind mock auth server");
+        let addr = listener.local_addr().expect("addr");
+        let url = format!("http://{}/", addr);
+        let handle = tokio::spawn(async move {
+            let _ = axum::serve(listener, app).await;
+        });
+        (url, handle)
+    }
+
+    #[tokio::test]
+    async fn v3_build_pipeline_routes_are_rate_limited() {
+        let (auth_url, _auth_handle) = spawn_always_200_auth_server().await;
+
+        let mut config = ServerConfig::default();
+        config.cubemaster_url = "http://127.0.0.1:9".to_string();
+        config.auth_callback_url = Some(auth_url);
+        config.rate_limit_per_sec = 1;
+
+        let state = AppState::new(config, arc(NoopLogger)).await;
+        let server = TestServer::new(build_router(state)).expect("router should build");
+
+        let mut rate_limited = 0usize;
+        let mut passed = 0usize;
+        for _ in 0..20 {
+            let resp = server
+                .post("/v3/templates")
+                .add_header(
+                    axum::http::HeaderName::from_static("x-api-key"),
+                    axum::http::HeaderValue::from_static("abuser"),
+                )
+                .json(&serde_json::json!({
+                    "name": "my-tpl:dev",
+                    "cpuCount": 1,
+                    "memoryMB": 1024,
+                }))
+                .await;
+            match resp.status_code() {
+                StatusCode::TOO_MANY_REQUESTS => rate_limited += 1,
+                code if code.is_success() => passed += 1,
+                _ => passed += 1,
+            }
+        }
+
+        assert!(
+            rate_limited > 0,
+            "expected at least one POST /v3/templates to be 429-throttled \
+             with rate_limit_per_sec=1; got {} successes and 0 throttled \
+             responses across 20 requests — the rate-limit middleware is \
+             not attached to V3 build pipeline routes",
+            passed,
+        );
+        assert!(
+            passed > 0,
+            "expected at least one POST /v3/templates to pass the gate \
+             (the very first burst token); got {} 429s and 0 passes — \
+             auth or rate-limit is mis-configured in the test harness",
+            rate_limited,
+        );
+    }
+
+    /// Companion: plain template *management* endpoints (list / get /
+    /// CRUD) explicitly stay on `with_auth` rather than
+    /// `with_auth_and_rate_limit`, because they forward directly to
+    /// CubeMaster's CRUD layer and don't allocate in-process build
+    /// state. Pinning that boundary here so a future "let's just rate
+    /// limit everything" change can't silently regress operator
+    /// workflows that legitimately list templates faster than the
+    /// shared governor allows.
+    #[tokio::test]
+    async fn template_management_routes_are_not_rate_limited() {
+        let (auth_url, _auth_handle) = spawn_always_200_auth_server().await;
+
+        let mut config = ServerConfig::default();
+        config.cubemaster_url = "http://127.0.0.1:9".to_string();
+        config.auth_callback_url = Some(auth_url);
+        config.rate_limit_per_sec = 1;
+
+        let state = AppState::new(config, arc(NoopLogger)).await;
+        let server = TestServer::new(build_router(state)).expect("router should build");
+
+        // Burst the same 20 requests against the management surface.
+        // Even with quota=1 we should never see 429 here, because the
+        // rate-limit layer is not attached to this lane.
+        let mut saw_throttle = false;
+        for _ in 0..20 {
+            let resp = server
+                .get("/templates")
+                .add_header(
+                    axum::http::HeaderName::from_static("x-api-key"),
+                    axum::http::HeaderValue::from_static("abuser"),
+                )
+                .await;
+            if resp.status_code() == StatusCode::TOO_MANY_REQUESTS {
+                saw_throttle = true;
+                break;
+            }
+        }
+
+        assert!(
+            !saw_throttle,
+            "GET /templates is on the auth-only lane and must NOT be \
+             rate-limited; observing 429 here would mean the management \
+             sub-router was accidentally folded into with_auth_and_rate_limit"
+        );
+    }
 }
\ No newline at end of file
diff --git a/CubeAPI/src/services/builds.rs b/CubeAPI/src/services/builds.rs
index 34ffd2d05..6bdc1fa84 100644
--- a/CubeAPI/src/services/builds.rs
+++ b/CubeAPI/src/services/builds.rs
@@ -15,14 +15,36 @@
 //!   - the CubeMaster `jobID` once the build is dispatched, used by every
 //!     subsequent status / logs lookup.
 //!
-//! The store is in-memory + bounded; restart of CubeAPI invalidates inflight
-//! builds. This is acceptable for a build flow that always reaches a terminal
-//! state within minutes — durable persistence can be added later as a separate
-//! storage trait without changing the call sites.
+//! ## Eviction
+//!
+//! The registry is bounded by **two complementary policies** so a long-running
+//! CubeAPI process can't accumulate completed builds forever:
+//!
+//! 1. **TTL on terminal builds** — when a build transitions into
+//!    `BuildStage::Ready` / `BuildStage::Error`, we stamp `terminal_at` and
+//!    push it onto an ordered FIFO. A background tokio task wakes up every
+//!    `gc_interval` and pops everything past `terminal_ttl`. In-flight builds
+//!    (`WaitingPush`, `Building`) are never evicted by TTL.
+//!
+//! 2. **Hard size cap** — `create()` checks the cap and synchronously evicts
+//!    the oldest terminal builds FIFO until the live count is at or below the
+//!    cap. If every entry is still in-flight, we log a warning and let the
+//!    cap be exceeded rather than killing an active build mid-flight.
+//!
+//! Both knobs come from `ServerConfig::build_registry_*` and default to
+//! `(ttl=1h, cap=5000, gc_interval=5min)`. Setting any of them to `0`
+//! disables that specific protection.
+//!
+//! Restart of CubeAPI invalidates inflight builds. This is acceptable for a
+//! build flow that always reaches a terminal state within minutes — durable
+//! persistence can be added later as a separate storage trait without
+//! changing the call sites.
 
-use chrono::{DateTime, Utc};
+use chrono::{DateTime, Duration, Utc};
 use dashmap::DashMap;
-use std::sync::Arc;
+use std::collections::VecDeque;
+use std::sync::{Arc, Mutex};
+use std::time::Duration as StdDuration;
 use uuid::Uuid;
 
 use crate::models::{CreateTemplateRequest, RegistryCredential};
@@ -50,6 +72,12 @@ impl BuildStage {
             BuildStage::Error => "error",
         }
     }
+
+    /// `Ready` and `Error` are absorbing states — the orchestrator pipeline
+    /// will not move out of them. Used as the gate for TTL eviction.
+    pub fn is_terminal(self) -> bool {
+        matches!(self, BuildStage::Ready | BuildStage::Error)
+    }
 }
 
 #[derive(Debug, Clone)]
@@ -66,6 +94,22 @@ pub struct BuildContext {
     pub credential: RegistryCredential,
     /// Image reference CubeMaster will pull from once the client has pushed.
     pub image_ref: String,
+    /// Authoritative "the client has actually completed an OCI manifest
+    /// PUT against `image_ref`" flag. Set **exclusively** by
+    /// `TemplateService::mark_image_pushed` after both:
+    ///
+    ///   - the manifest's `repo` segment matches the one we minted at
+    ///     create time (cross-check guarding against tag collisions), and
+    ///   - the upstream registry returned a `2xx` for the PUT.
+    ///
+    /// Consumers (especially `v3_trigger_build`) MUST gate the
+    /// "fall back to `ctx.image_ref` as the source image" branch on this
+    /// field, not on `stage`. `image_ref` is *predicted* at create time
+    /// and its non-emptiness alone does not prove anything was pushed;
+    /// `stage` is also an indirect proxy that the v2/v3 dispatch paths
+    /// mutate for unrelated reasons. This boolean is the only safe
+    /// correctness signal.
+    pub image_pushed: bool,
     /// CubeMaster `jobID` — empty until the build is actually dispatched.
     pub job_id: String,
     /// Append-only log lines (timestamps + plain message).
@@ -74,6 +118,9 @@ pub struct BuildContext {
     pub progress: i32,
     pub message: String,
     pub created_at: DateTime<Utc>,
+    /// Wall-clock time at which `stage` first became terminal. `None` while
+    /// the build is still in-flight. Drives the TTL-based eviction path.
+    pub terminal_at: Option<DateTime<Utc>>,
 
     // ── V3 protocol-only metadata (populated by POST /v3/templates) ────────
     /// Template name (E2B `name`), e.g. "my-template" or "my-template:v1".
@@ -95,10 +142,62 @@ pub struct BuildLogLine {
     pub line: String,
 }
 
+#[derive(Debug, Clone, Copy)]
+pub struct EvictionPolicy {
+    /// How long a terminal build is kept after reaching Ready/Error.
+    /// `None` disables TTL-based eviction.
+    pub terminal_ttl: Option<Duration>,
+    /// Hard cap on the number of distinct builds; `None` disables the cap.
+    pub max_entries: Option<usize>,
+    /// Background GC scan interval; `None` disables the background task
+    /// (size-cap eviction at create-time still runs).
+    pub gc_interval: Option<StdDuration>,
+}
+
+impl EvictionPolicy {
+    pub fn from_config(cfg: &crate::config::ServerConfig) -> Self {
+        Self {
+            terminal_ttl: (cfg.build_registry_terminal_ttl_secs > 0)
+                .then(|| Duration::seconds(cfg.build_registry_terminal_ttl_secs as i64)),
+            max_entries: (cfg.build_registry_max_entries > 0)
+                .then_some(cfg.build_registry_max_entries),
+            gc_interval: (cfg.build_registry_gc_interval_secs > 0)
+                .then(|| StdDuration::from_secs(cfg.build_registry_gc_interval_secs)),
+        }
+    }
+
+    pub fn unbounded() -> Self {
+        Self {
+            terminal_ttl: None,
+            max_entries: None,
+            gc_interval: None,
+        }
+    }
+}
+
+/// One entry on the FIFO of terminal builds awaiting eviction. We keep the
+/// `template_id` here so the GC path can clear *both* index keys
+/// (`bid` and `tid::bid`) without a round-trip through the DashMap.
+#[derive(Debug, Clone)]
+struct TerminalEntry {
+    build_id: String,
+    template_id: String,
+    terminal_at: DateTime<Utc>,
+}
+
 /// Thread-safe, in-process build registry.
-#[derive(Clone, Default)]
+#[derive(Clone)]
 pub struct BuildRegistry {
     inner: Arc<DashMap<String, BuildContext>>,
+    username_index: Arc<DashMap<String, String>>,
+    terminal: Arc<Mutex<VecDeque<TerminalEntry>>>,
+    policy: EvictionPolicy,
+}
+
+impl Default for BuildRegistry {
+    fn default() -> Self {
+        Self::with_policy(EvictionPolicy::unbounded())
+    }
 }
 
 impl BuildRegistry {
@@ -106,6 +205,42 @@ impl BuildRegistry {
         Self::default()
     }
 
+    pub fn with_policy(policy: EvictionPolicy) -> Self {
+        Self {
+            inner: Arc::new(DashMap::new()),
+            username_index: Arc::new(DashMap::new()),
+            terminal: Arc::new(Mutex::new(VecDeque::new())),
+            policy,
+        }
+    }
+
+    /// Spawn the background TTL GC task. Idempotent in the sense that calling
+    /// it twice will spawn two tasks — call exactly once from `AppServices`
+    /// construction. Returns `None` when GC is disabled (`gc_interval = 0`),
+    /// which is convenient for unit tests.
+    pub fn spawn_gc(&self) -> Option<tokio::task::JoinHandle<()>> {
+        let interval = self.policy.gc_interval?;
+        let registry = self.clone();
+        let handle = tokio::spawn(async move {
+            let mut ticker = tokio::time::interval(interval);
+            // Skip the immediate firing — let the process settle first.
+            ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
+            ticker.tick().await;
+            loop {
+                ticker.tick().await;
+                let evicted = registry.evict_expired(Utc::now());
+                if evicted > 0 {
+                    tracing::debug!(
+                        evicted,
+                        live = registry.inner.len(),
+                        "build registry GC swept terminal builds"
+                    );
+                }
+            }
+        });
+        Some(handle)
+    }
+
     /// Register a brand-new build attempt. Returns the freshly allocated
     /// build_id alongside the stored context (cloned for read-only use by the
     /// caller).
@@ -123,12 +258,14 @@ impl BuildRegistry {
             create_request: Arc::new(request),
             credential,
             image_ref,
+            image_pushed: false,
             job_id: String::new(),
             logs: Vec::new(),
             stage: BuildStage::WaitingPush,
             progress: 0,
             message: "build registered, waiting for image push".to_string(),
             created_at: Utc::now(),
+            terminal_at: None,
             name: String::new(),
             tags: Vec::new(),
             cpu_count: 0,
@@ -136,12 +273,23 @@ impl BuildRegistry {
             aliases: Vec::new(),
         };
 
-        // Index under both bid and (tid, bid) so lookups by either key work.
         self.inner.insert(build_id.clone(), ctx.clone());
         self.inner.insert(compose_key(&template_id, &build_id), ctx.clone());
+
+        let uname = ctx.credential.username.clone();
+        if !uname.is_empty() {
+            self.username_index.insert(uname, build_id.clone());
+        }
+        self.enforce_size_cap();
+
         ctx
     }
 
+    pub fn find_by_registry_username(&self, username: &str) -> Option<BuildContext> {
+        let bid = self.username_index.get(username)?.value().clone();
+        self.get(&bid)
+    }
+
     pub fn get(&self, build_id: &str) -> Option<BuildContext> {
         self.inner.get(build_id).map(|r| r.value().clone())
     }
@@ -153,13 +301,29 @@ impl BuildRegistry {
             .map(|r| r.value().clone())
     }
 
-    /// Apply a mutation to a build context. Updates both index entries.
+    /// Apply a mutation to a build context. Updates both index entries and,
+    /// if the closure transitions the build into a terminal stage, stamps
+    /// `terminal_at` and queues the build for TTL eviction.
     pub fn update<F>(&self, build_id: &str, mutate: F) -> Option<BuildContext>
     where
         F: FnOnce(&mut BuildContext),
     {
         let mut ctx = self.inner.get(build_id).map(|r| r.value().clone())?;
+        let was_terminal = ctx.stage.is_terminal();
         mutate(&mut ctx);
+        let now_terminal = ctx.stage.is_terminal();
+
+        if !was_terminal && now_terminal {
+            let stamp = Utc::now();
+            ctx.terminal_at = Some(stamp);
+            self.push_terminal(TerminalEntry {
+                build_id: ctx.build_id.clone(),
+                template_id: ctx.template_id.clone(),
+                terminal_at: stamp,
+            });
+        } else if was_terminal && !now_terminal {
+            ctx.terminal_at = None;
+        }
 
         let pair_key = compose_key(&ctx.template_id, &ctx.build_id);
         self.inner.insert(build_id.to_string(), ctx.clone());
@@ -167,7 +331,6 @@ impl BuildRegistry {
         Some(ctx)
     }
 
-    /// Append one log line. Truncates the head to bound memory at ~10k lines.
     pub fn append_log(&self, build_id: &str, line: impl Into<String>) {
         let line = line.into();
         self.update(build_id, |ctx| {
@@ -182,8 +345,304 @@ impl BuildRegistry {
             }
         });
     }
+
+    /// Drop every terminal build whose `terminal_at + ttl <= now`.
+    ///
+    /// Returns the number of *logical* builds (not index entries) removed.
+    /// Exposed `pub(crate)` so tests can drive the GC deterministically
+    /// without spinning up the background task.
+    pub(crate) fn evict_expired(&self, now: DateTime<Utc>) -> usize {
+        let Some(ttl) = self.policy.terminal_ttl else {
+            return 0;
+        };
+        let cutoff = now - ttl;
+        let mut removed = 0usize;
+
+        loop {
+            let entry = {
+                let mut q = self.terminal.lock().expect("terminal queue poisoned");
+                match q.front() {
+                    Some(e) if e.terminal_at <= cutoff => q.pop_front().unwrap(),
+                    _ => break,
+                }
+            };
+
+            if self.try_evict_one(&entry) {
+                removed += 1;
+            }
+        }
+
+        removed
+    }
+
+    /// Drive the size cap. Intended to be called right after `create()`.
+    /// Walks the terminal FIFO and evicts oldest entries until either the
+    /// live build count is at or below `max_entries`, or the FIFO is empty.
+    fn enforce_size_cap(&self) {
+        let Some(cap) = self.policy.max_entries else {
+            return;
+        };
+        let mut live = self.inner.len() / 2;
+        if live <= cap {
+            return;
+        }
+
+        loop {
+            if live <= cap {
+                return;
+            }
+            let entry = {
+                let mut q = self.terminal.lock().expect("terminal queue poisoned");
+                match q.pop_front() {
+                    Some(e) => e,
+                    None => break,
+                }
+            };
+            if self.try_evict_one(&entry) {
+                live = live.saturating_sub(1);
+            }
+        }
+
+        if self.inner.len() / 2 > cap {
+            tracing::warn!(
+                cap,
+                live = self.inner.len() / 2,
+                "build registry exceeds max_entries but every remaining build is in-flight; \
+                 not evicting active builds. Increase build_registry_max_entries or wait \
+                 for in-flight builds to terminate."
+            );
+        }
+    }
+
+    /// Remove both index entries for one terminal build.
+    /// Returns `true` if anything was actually removed.
+    /// A `false` return covers two benign races:
+    ///   - the build was already evicted (e.g. via duplicate FIFO entry),
+    ///   - the build was un-set back to non-terminal (we refuse to drop
+    ///     in-flight contexts here — TTL eviction is for terminal builds
+    ///     only).
+    fn try_evict_one(&self, entry: &TerminalEntry) -> bool {
+        let still_terminal = self
+            .inner
+            .get(&entry.build_id)
+            .map(|r| r.value().stage.is_terminal())
+            .unwrap_or(false);
+        if !still_terminal {
+            return false;
+        }
+        let username = self
+            .inner
+            .get(&entry.build_id)
+            .map(|r| r.value().credential.username.clone())
+            .unwrap_or_default();
+        let removed_bid = self.inner.remove(&entry.build_id).is_some();
+        let removed_pair = self
+            .inner
+            .remove(&compose_key(&entry.template_id, &entry.build_id))
+            .is_some();
+        if !username.is_empty() {
+            self.username_index
+                .remove_if(&username, |_, v| v == &entry.build_id);
+        }
+        removed_bid || removed_pair
+    }
+
+    fn push_terminal(&self, entry: TerminalEntry) {
+        if self.policy.terminal_ttl.is_none() && self.policy.max_entries.is_none() {
+            return;
+        }
+        let mut q = self.terminal.lock().expect("terminal queue poisoned");
+        q.push_back(entry);
+    }
+
+    #[cfg(test)]
+    fn terminal_queue_len(&self) -> usize {
+        self.terminal
+            .lock()
+            .expect("terminal queue poisoned")
+            .len()
+    }
+
+    #[cfg(test)]
+    pub(crate) fn live_count(&self) -> usize {
+        self.inner.len() / 2
+    }
 }
 
 fn compose_key(template_id: &str, build_id: &str) -> String {
     format!("{}::{}", template_id, build_id)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::models::CreateTemplateRequest;
+
+    fn empty_request() -> CreateTemplateRequest {
+        CreateTemplateRequest {
+            template_id: String::new(),
+            instance_type: None,
+            alias: None,
+            team_id: None,
+            image: None,
+            dockerfile: None,
+            writable_layer_size: None,
+            exposed_ports: None,
+            probe_port: None,
+            probe_path: None,
+            cpu: None,
+            memory: None,
+            cpu_count: None,
+            memory_mb: None,
+            env: None,
+            env_vars: None,
+            allow_internet_access: None,
+            network_type: None,
+            nodes: None,
+            registry_username: None,
+            registry_password: None,
+            command: None,
+            args: None,
+            dns: None,
+            allow_out: None,
+            deny_out: None,
+            start_cmd: None,
+            ready_cmd: None,
+        }
+    }
+
+    fn empty_credential() -> RegistryCredential {
+        RegistryCredential {
+            url: "http://127.0.0.1:5000".to_string(),
+            repository: "e2b/tpl".to_string(),
+            username: "_token".to_string(),
+            password: "secret".to_string(),
+        }
+    }
+
+    fn make_registry(ttl_secs: i64, cap: usize) -> BuildRegistry {
+        BuildRegistry::with_policy(EvictionPolicy {
+            terminal_ttl: (ttl_secs > 0).then(|| Duration::seconds(ttl_secs)),
+            max_entries: (cap > 0).then_some(cap),
+            gc_interval: None,
+        })
+    }
+
+    fn create_one(reg: &BuildRegistry, tid: &str) -> String {
+        reg.create(
+            tid.to_string(),
+            empty_request(),
+            empty_credential(),
+            format!("127.0.0.1:5000/e2b/{}:bld", tid),
+        )
+        .build_id
+    }
+
+    fn mark_ready(reg: &BuildRegistry, bid: &str) {
+        reg.update(bid, |c| c.stage = BuildStage::Ready)
+            .expect("build present");
+    }
+
+    #[test]
+    fn terminal_transition_stamps_terminal_at_and_enqueues() {
+        let reg = make_registry(3600, 0);
+        let bid = create_one(&reg, "tpl-a");
+        assert_eq!(reg.terminal_queue_len(), 0);
+
+        mark_ready(&reg, &bid);
+        let ctx = reg.get(&bid).unwrap();
+        assert!(ctx.terminal_at.is_some(), "terminal_at must be set");
+        assert!(ctx.stage.is_terminal());
+        assert_eq!(reg.terminal_queue_len(), 1);
+    }
+
+    #[test]
+    fn duplicate_terminal_updates_do_not_grow_the_fifo() {
+        let reg = make_registry(3600, 0);
+        let bid = create_one(&reg, "tpl-a");
+        mark_ready(&reg, &bid);
+        for _ in 0..5 {
+            reg.update(&bid, |c| c.message = "noise".to_string());
+        }
+        assert_eq!(
+            reg.terminal_queue_len(),
+            1,
+            "FIFO must dedupe rising-edge transitions"
+        );
+    }
+
+    #[test]
+    fn evict_expired_drops_terminal_builds_past_ttl() {
+        let reg = make_registry(60, 0);
+        let bid_a = create_one(&reg, "tpl-a");
+        let bid_b = create_one(&reg, "tpl-b");
+        mark_ready(&reg, &bid_a);
+        mark_ready(&reg, &bid_b);
+
+        assert_eq!(reg.evict_expired(Utc::now() + Duration::seconds(30)), 0);
+        assert_eq!(reg.live_count(), 2);
+
+        let removed = reg.evict_expired(Utc::now() + Duration::seconds(120));
+        assert_eq!(removed, 2);
+        assert_eq!(reg.live_count(), 0);
+        assert!(reg.get(&bid_a).is_none());
+        assert!(reg.get(&bid_b).is_none());
+        assert_eq!(reg.terminal_queue_len(), 0);
+    }
+
+    #[test]
+    fn evict_expired_leaves_in_flight_builds_alone() {
+        let reg = make_registry(60, 0);
+        let bid_done = create_one(&reg, "tpl-done");
+        let bid_live = create_one(&reg, "tpl-live");
+        mark_ready(&reg, &bid_done);
+
+        let removed = reg.evict_expired(Utc::now() + Duration::seconds(120));
+        assert_eq!(removed, 1);
+        assert!(reg.get(&bid_done).is_none(), "terminal build evicted");
+        assert!(reg.get(&bid_live).is_some(), "in-flight build retained");
+    }
+
+    #[test]
+    fn size_cap_evicts_oldest_terminal_first_at_create_time() {
+        let reg = make_registry(0, 2); // cap = 2, no TTL.
+        let bid_a = create_one(&reg, "tpl-a");
+        let bid_b = create_one(&reg, "tpl-b");
+        mark_ready(&reg, &bid_a);
+        mark_ready(&reg, &bid_b);
+
+        let _bid_c = create_one(&reg, "tpl-c");
+        assert!(reg.get(&bid_a).is_none(), "oldest terminal evicted");
+        assert!(reg.get(&bid_b).is_some());
+        assert!(reg.live_count() <= 2);
+    }
+
+    #[test]
+    fn size_cap_does_not_evict_active_builds() {
+        let reg = make_registry(0, 1);
+        let bid_live_1 = create_one(&reg, "tpl-x");
+        let bid_live_2 = create_one(&reg, "tpl-y");
+        assert!(reg.get(&bid_live_1).is_some());
+        assert!(reg.get(&bid_live_2).is_some());
+    }
+
+    #[test]
+    fn evict_expired_skips_builds_that_left_terminal_state() {
+        let reg = make_registry(60, 0);
+        let bid = create_one(&reg, "tpl-a");
+        mark_ready(&reg, &bid);
+        reg.update(&bid, |c| c.stage = BuildStage::Building);
+
+        let removed = reg.evict_expired(Utc::now() + Duration::seconds(120));
+        assert_eq!(removed, 0);
+        assert!(reg.get(&bid).is_some());
+    }
+
+    #[test]
+    fn unbounded_registry_does_not_queue_terminal_entries() {
+        let reg = BuildRegistry::with_policy(EvictionPolicy::unbounded());
+        let bid = create_one(&reg, "tpl-a");
+        mark_ready(&reg, &bid);
+        assert_eq!(reg.terminal_queue_len(), 0);
+    }
+}
diff --git a/CubeAPI/src/services/mod.rs b/CubeAPI/src/services/mod.rs
index 43cb524a7..3e9699084 100644
--- a/CubeAPI/src/services/mod.rs
+++ b/CubeAPI/src/services/mod.rs
@@ -22,7 +22,9 @@ pub struct AppServices {
 
 impl AppServices {
     pub fn new(config: &ServerConfig, cubemaster: CubeMasterClient) -> Self {
-        let builds = builds::BuildRegistry::new();
+        let policy = builds::EvictionPolicy::from_config(config);
+        let builds = builds::BuildRegistry::with_policy(policy);
+        let _gc = builds.spawn_gc();
         Self {
             cluster: cluster::ClusterService::new(cubemaster.clone()),
             sandboxes: sandboxes::SandboxService::new(
diff --git a/CubeAPI/src/services/templates.rs b/CubeAPI/src/services/templates.rs
index e65473145..8ba51dbc8 100644
--- a/CubeAPI/src/services/templates.rs
+++ b/CubeAPI/src/services/templates.rs
@@ -199,16 +199,10 @@ impl TemplateService {
             format!("https://{}", public_host)
         };
 
-        let credential = RegistryCredential {
-            url: credential_url,
-            repository: format!("{}/{}", repo_prefix, template_id),
-            username: "_token".to_string(),
-            password: self
-                .config
-                .registry_token
-                .clone()
-                .unwrap_or_else(|| "_anon".to_string()),
-        };
+        let credential = mint_registry_credential(
+            credential_url,
+            format!("{}/{}", repo_prefix, template_id),
+        );
 
         // Image ref CubeMaster will pull from once push is complete.
         let pull_host = self
@@ -446,9 +440,47 @@ impl TemplateService {
 
     /// Mark a build as image-pushed (called by the registry handler once the
     /// manifest PUT for `repo:tag` succeeds). Idempotent.
-    pub fn mark_image_pushed(&self, build_id: &str) {
+    /// Advance a build from `WaitingPush` → `Building` after the registry
+    /// reverse-proxy observed a successful manifest PUT.
+    ///
+    /// **Defence in depth**: while build IDs are 128-bit UUIDs and therefore
+    /// hard to guess, we still cross-check that the manifest's repository
+    /// path matches the one we minted at create time. This stops a leaked
+    /// (or copy-pasted) build_id from being advanced by a manifest pushed
+    /// against an unrelated repo, and surfaces config drift in the registry
+    /// path-prefix as a warning rather than a silent state transition.
+    ///
+    /// `repo` is the path between `/v2/` and `/manifests/<tag>`, e.g.
+    /// `e2b/tpl-abc123` for `PUT /v2/e2b/tpl-abc123/manifests/bld-...`.
+    pub fn mark_image_pushed(&self, build_id: &str, repo: &str) {
+        let Some(ctx) = self.builds.get(build_id) else {
+            tracing::debug!(
+                build_id = %build_id,
+                repo = %repo,
+                "manifest PUT received for unknown build_id; ignoring"
+            );
+            return;
+        };
+
+        if !manifest_repo_matches(&ctx.image_ref, repo) {
+            tracing::warn!(
+                build_id = %build_id,
+                got_repo = %repo,
+                expected_image_ref = %ctx.image_ref,
+                "manifest PUT repo does not match the image_ref \
+                 minted for this build; refusing to advance build state"
+            );
+            return;
+        }
+
         self.builds.update(build_id, |ctx| {
             ctx.append_log_inline("[push] image upload complete");
+            // `image_pushed` is the single, authoritative signal that a
+            // manifest landed under our predicted `image_ref`. It survives
+            // any subsequent stage mutation (e.g. by status pollers) and
+            // is what `v3_trigger_build`'s OCI-distribution fallback
+            // gates on — *not* `stage`, which is an indirect proxy.
+            ctx.image_pushed = true;
             if matches!(ctx.stage, BuildStage::WaitingPush) {
                 ctx.stage = BuildStage::Building;
                 ctx.message = "image uploaded, waiting for build dispatch".to_string();
@@ -677,11 +709,65 @@ impl TemplateService {
 
     /// `GET /templates/{tid}/files/{hash}` — file-cache probe.
     ///
-    /// Until the in-cluster builder lands we don't actually consume uploaded
-    /// tarballs. We answer `present=true` so the SDK skips uploading; this is
-    /// safe because `from_image`-based builds (the only flow CubeMaster
-    /// currently supports) don't need the build context.
-    pub fn v3_get_file_upload(&self, _template_id: &str, _files_hash: &str) -> AppResult<crate::models::V3TemplateFileUpload> {
+    /// ## Contract (paired with `v3_trigger_build`)
+    ///
+    /// The E2B SDK calls this endpoint to ask "do you already have the
+    /// build-context tarball identified by `<hash>`?". A `present=true`
+    /// answer makes the SDK *skip* uploading the tarball, on the assumption
+    /// that the server-side builder will read it from cache. CubeAPI does
+    /// **not** currently run an in-cluster Dockerfile/steps builder, so
+    /// strictly speaking we don't have any tarball cache at all.
+    ///
+    /// We still answer `present=true` here for two reasons:
+    ///
+    ///   1. The SDK calls this endpoint *unconditionally* before every
+    ///      build, including pure `fromImage` flows that don't need a
+    ///      tarball at all. Returning `present=false` would force the SDK
+    ///      to PUT a (typically empty) tarball to a URL we don't have
+    ///      anywhere to put.
+    ///   2. We compensate by enforcing a strict fail-fast in
+    ///      `v3_trigger_build`: if the dispatch body doesn't carry a
+    ///      `fromImage` / `fromTemplate` / pre-pushed registry image, we
+    ///      reject with `501 Not Implemented` and a message that points the
+    ///      caller back to the supported flows. That means a `dockerfile`
+    ///      / `steps`-driven build can never silently succeed against a
+    ///      non-existent tarball — it just fails one round-trip later than
+    ///      it would in upstream e2b-infra.
+    ///
+    /// **A `present=true` reply from this endpoint is therefore not a
+    /// promise that we accepted a tarball.** It is exactly the
+    /// "no-op, please proceed" hint the SDK needs to advance to the
+    /// `POST /v2/.../builds/{bid}` step where the real validation lives.
+    ///
+    /// Until the in-cluster builder lands (Phase 4) the warning emitted
+    /// here gives operators an observability hook for "someone is trying
+    /// to use a context-based build against a CubeAPI that can't honour
+    /// it" without having to read trigger-time logs.
+    ///
+    /// The handler returns `201 Created` (not `200 OK`) on purpose — see the
+    /// doc comment on `handlers::templates_v3::v3_get_files_hash` for the
+    /// E2B SDK compatibility rationale.
+    pub fn v3_get_file_upload(
+        &self,
+        template_id: &str,
+        files_hash: &str,
+    ) -> AppResult<crate::models::V3TemplateFileUpload> {
+        // Cheap heuristic: an SDK invoking the empty-context flow (pure
+        // `fromImage`) typically still hashes *something*, so we can't tell
+        // dockerfile vs. fromImage apart purely from `files_hash`. Emit a
+        // single warn so operators can grep for it; trigger-time fail-fast
+        // is the authoritative gate.
+        tracing::warn!(
+            template_id = %template_id,
+            files_hash = %files_hash,
+            "files-hash cache probe answered present=true unconditionally; \
+             CubeAPI does not run an in-cluster context builder. \
+             Dockerfile-/steps-based builds will be rejected at \
+             POST /v2/templates/{{tid}}/builds/{{bid}} with 501. \
+             Use `fromImage` (or `docker push` via the bundled OCI registry) \
+             to drive the build."
+        );
+
         Ok(crate::models::V3TemplateFileUpload {
             present: true,
             url: None,
@@ -696,12 +782,34 @@ impl TemplateService {
     ///   1. `body.from_image`  — the standard E2B flow, e.g.
     ///                            `python:3.11-slim`.
     ///   2. The image already pushed to the bundled registry under
-    ///      `<repo>/<templateID>:<buildID>` (when the OCI Distribution path
-    ///      was used).
-    ///   3. `body.from_template` — copy from another known CubeSandbox
-    ///      template (resolved via CubeMaster `get_template`).
+    ///      `<repo>/<templateID>:<buildID>` — only used when
+    ///      `BuildContext::image_pushed` is `true`, i.e. the registry
+    ///      reverse proxy has observed a successful manifest PUT and
+    ///      `mark_image_pushed` cross-checked the repo. We deliberately
+    ///      do **not** key off `stage` or "image_ref is non-empty"
+    ///      here: `image_ref` is *predicted* at create time and would
+    ///      otherwise let us dispatch CubeMaster against a registry
+    ///      slot that holds nothing, with the failure surfacing later
+    ///      as `manifest unknown` during pull. When `image_ref` is
+    ///      non-empty but `image_pushed` is still `false`, we surface
+    ///      that mismatch as **`409 Conflict`** so the SDK can retry
+    ///      after `docker push` completes.
+    ///   3. `body.from_template` — **rejected with 501** until a
+    ///      downstream resolver for `cube://<templateID>` exists in
+    ///      CubeMaster/Cubelet. Today CubeMaster feeds `SourceImageRef`
+    ///      straight into `docker pull`, so a synthesised `cube://...`
+    ///      ref would silently break image resolution; we fail fast at
+    ///      the API layer instead. Callers who want this flow should
+    ///      resolve the parent template themselves and pass the resulting
+    ///      OCI reference through `from_image`.
     ///
-    /// `start_cmd` becomes container `args`; `ready_cmd` becomes a Probe.Exec.
+    /// `start_cmd` becomes container `args`; `ready_cmd` is *not* forwarded
+    /// as an exec probe — CubeMaster/Cubelet only accept TcpSocket / Ping /
+    /// HttpGet handlers, so we instead best-effort parse an embedded
+    /// `http(s)://host:port[/path]` URL out of the readyCmd and synthesise
+    /// a `Probe.HttpGet`. If no URL can be parsed and no `probePort` /
+    /// `exposedPorts` are supplied, no probe is emitted at all — see
+    /// `parse_ready_url` and `build_probe` for the precise rules.
     pub async fn v3_trigger_build(
         &self,
         template_id: String,
@@ -732,18 +840,154 @@ impl TemplateService {
             .map(|s| s.trim().to_string())
             .filter(|s| !s.is_empty())
         {
-            // Re-use an already-built CubeSandbox template as the base. We
-            // synthesise a CubeMaster reference of the form `cube://<tid>`,
-            // letting downstream callers resolve it. Adjust to your local
-            // convention if needed.
-            format!("cube://{}", parent)
-        } else if !ctx.image_ref.is_empty() {
+            // `fromTemplate` is **not yet wired end-to-end**: CubeMaster's
+            // template_image.go feeds `SourceImageRef` straight into
+            // `docker pull` / `docker image inspect`, and there is no
+            // resolver for a `cube://<tid>` scheme anywhere downstream
+            // (Cubelet, CubeMaster, builder). If we synthesised a
+            // `cube://<parent>` ref here, the build would *look* accepted
+            // at the API layer and only fail several seconds later inside
+            // the build worker with an opaque `docker pull cube://...:
+            // invalid reference format` error — exactly the kind of
+            // "looks supported but isn't" footgun reviewers flagged.
+            //
+            // Until the downstream resolver lands (tracked separately),
+            // surface the gap explicitly. Operators who actually want this
+            // flow today can resolve `parent` themselves and pass the
+            // resulting OCI ref via `fromImage`.
+            self.builds.append_log(
+                &build_id,
+                format!(
+                    "[dispatch-v3] rejecting build: fromTemplate={} is not \
+                     supported by this deployment — no downstream resolver \
+                     for `cube://<templateID>` exists in CubeMaster/Cubelet \
+                     yet. Resolve the parent template to an OCI image \
+                     reference and pass it via `fromImage` instead.",
+                    parent,
+                ),
+            );
+            return Err(AppError::NotImplemented(format!(
+                "build {} of template {} requested `fromTemplate={}`, but \
+                 CubeAPI cannot honour it: the downstream stack \
+                 (CubeMaster/Cubelet) does not yet understand the \
+                 `cube://<templateID>` source scheme and would attempt to \
+                 `docker pull` it verbatim. Pass `fromImage` with the \
+                 already-resolved OCI reference of the parent template, \
+                 or wait for the cube:// resolver to ship.",
+                build_id, template_id, parent,
+            )));
+        } else if ctx.image_pushed {
+            // OCI Distribution path: the caller has actually completed an
+            // OCI manifest PUT against `image_ref` — `mark_image_pushed`
+            // verified the repo and flipped `image_pushed` to true. We
+            // can safely dispatch CubeMaster against the predicted ref
+            // because we *know* a manifest now lives under it.
+            //
+            // Note we deliberately do NOT key off `stage != WaitingPush`
+            // here. `stage` is mutated by status pollers and the v2
+            // dispatch path for unrelated reasons; using it as a proxy
+            // for "client pushed" would re-open the very gap the
+            // reviewer flagged: dispatching against `ctx.image_ref` even
+            // though it's just the *predicted* path minted at create
+            // time, with no manifest behind it. The CubeMaster pull
+            // would then fail several seconds later with `manifest
+            // unknown` — exactly the kind of late-stage error this guard
+            // is meant to prevent.
+            debug_assert!(
+                !ctx.image_ref.is_empty(),
+                "image_pushed=true must imply non-empty image_ref"
+            );
             ctx.image_ref.clone()
         } else {
-            return Err(AppError::BadRequest(
-                "either fromImage, fromTemplate, or a previously-pushed image is required"
-                    .to_string(),
-            ));
+            // Distinguish three remaining failure modes so the error
+            // message tells the operator *exactly* what to do.
+            let has_steps = body
+                .steps
+                .as_ref()
+                .map(|s| !s.is_empty())
+                .unwrap_or(false);
+            if has_steps {
+                // (1) `steps[]` build with no fromImage — needs an
+                //     in-cluster context builder we don't run.
+                self.builds.append_log(
+                    &build_id,
+                    "[dispatch-v3] rejecting build: steps[] supplied but \
+                     CubeAPI has no in-cluster context builder; supply \
+                     fromImage or push a pre-built image to the bundled \
+                     OCI registry instead",
+                );
+                return Err(AppError::NotImplemented(format!(
+                    "dockerfile-/steps-based builds are not supported by \
+                     this CubeAPI deployment (build {} of template {} \
+                     supplied {} step(s) without a fromImage). Either set \
+                     `fromImage` to a base OCI reference, or `docker push` \
+                     a pre-built image to the bundled registry under \
+                     `<repo_prefix>/<templateID>:<buildID>` before calling \
+                     this endpoint.",
+                    build_id,
+                    template_id,
+                    body.steps.as_ref().map(|s| s.len()).unwrap_or(0),
+                )));
+            }
+            if !ctx.image_ref.is_empty() {
+                // (2) `image_ref` is non-empty (it was predicted at
+                //     create time) but the manifest never landed — the
+                //     SDK skipped (or hasn't yet completed) `docker
+                //     push`. Reviewer-driven guard: do NOT silently
+                //     dispatch CubeMaster against an empty registry
+                //     slot. Surface the mismatch *before* CubeMaster
+                //     starts pulling, with an actionable hint.
+                self.builds.append_log(
+                    &build_id,
+                    format!(
+                        "[dispatch-v3] rejecting build: predicted image_ref \
+                         {} exists but no successful manifest PUT has been \
+                         observed by the registry reverse proxy yet \
+                         (image_pushed=false). Dispatching now would only \
+                         move the failure into CubeMaster's pull stage as \
+                         `manifest unknown`.",
+                        ctx.image_ref,
+                    ),
+                );
+                return Err(AppError::Conflict(format!(
+                    "build {} of template {} has not received the \
+                     OCI manifest PUT yet: the registry reverse proxy \
+                     has not observed a successful `PUT \
+                     /v2/<repo>/manifests/{}` against `image_ref={}`. \
+                     Note: the SDK's `GET /templates/{{tid}}/files/{{hash}}` \
+                     cache probe always returns `present=true` and is \
+                     *not* a commitment that any image was accepted — \
+                     see `v3_get_file_upload` for the contract. Either \
+                     wait for `docker push` to complete and retry, or \
+                     supply `fromImage` to bypass the bundled registry \
+                     path.",
+                    build_id, template_id, build_id, ctx.image_ref,
+                )));
+            }
+            // (3) Neither steps nor fromImage nor any push — the SDK
+            //     probably believed the build context was already cached
+            //     server-side (because `/files/{hash}` answered
+            //     present=true); see `v3_get_file_upload` for the
+            //     contract. We surface a 501 here so the failure mode
+            //     is unambiguous.
+            self.builds.append_log(
+                &build_id,
+                "[dispatch-v3] rejecting build: no fromImage / fromTemplate \
+                 and no image was pushed to the bundled registry before \
+                 dispatch; CubeAPI cannot synthesise a source image from a \
+                 build-context tarball alone",
+            );
+            return Err(AppError::NotImplemented(format!(
+                "build {} of template {} cannot be dispatched: this \
+                 CubeAPI deployment does not run an in-cluster build-context \
+                 builder, so a `fromImage` (or pre-pushed registry image, \
+                 or `fromTemplate`) is required. The SDK's \
+                 `GET /templates/{{tid}}/files/{{hash}}` cache probe \
+                 returns `present=true` unconditionally and is *not* a \
+                 commitment that any tarball was accepted — see the \
+                 server-side docs on `v3_get_file_upload` for the contract.",
+                build_id, template_id,
+            )));
         };
 
         // Patch the cached create_request with the V2-time fields and dispatch.
@@ -870,14 +1114,73 @@ impl TemplateService {
             .take(limit)
             .cloned()
             .collect();
-        let log_entries: Vec<V3BuildLogEntry> = logs
-            .iter()
-            .map(|line| V3BuildLogEntry {
-                timestamp: chrono::Utc::now(),
-                message: line.clone(),
-                level: "info".to_string(),
-            })
-            .collect();
+
+        // Reviewer-flagged bug: previously `log_entries` stamped each line
+        // with `Utc::now()` at poll time, so the *same* historical line
+        // would receive a fresh timestamp on every status poll — making
+        // `logEntries[i].timestamp` jitter forwards in time even though
+        // the line itself never changed.
+        //
+        // The structured timestamps already exist on
+        // `BuildContext.logs[i].timestamp` (`BuildLogLine`) and were
+        // stamped at log-write time by `BuildRegistry::append_log`. We
+        // reach into the registry to pull those write-time timestamps
+        // back out, taking care to:
+        //
+        //   - apply the *same* `(logs_offset, limit)` window that
+        //     `get_template_build_status` used to produce
+        //     `internal.logs`, so the i-th entry of `logs` lines up
+        //     with the i-th entry of `log_entries`;
+        //   - clamp the entry count to `logs.len()` so we never emit
+        //     more `log_entries` than `logs` even if a concurrent
+        //     poll appended new lines between the two reads.
+        //
+        // The narrow corner case where the build context has been
+        // evicted between the `get_template_build_status` call above
+        // and this read (e.g. terminal-state eviction firing during
+        // an in-flight poll on the same build) falls through to a
+        // best-effort `created_at`-style fallback — the historical
+        // bug used `Utc::now()` there too, so behaviour is no worse
+        // than before, and we still preserve the
+        // `logs.len() == log_entries.len()` invariant the SDK relies
+        // on.
+        let log_entries: Vec<V3BuildLogEntry> = match self.builds.get(build_id) {
+            Some(ctx) => {
+                let total = ctx.logs.len();
+                let start = (logs_offset.max(0) as usize).min(total);
+                ctx.logs
+                    .iter()
+                    .skip(start)
+                    .take(logs.len())
+                    .map(|entry| V3BuildLogEntry {
+                        timestamp: entry.timestamp,
+                        message: entry.line.clone(),
+                        level: "info".to_string(),
+                    })
+                    .collect()
+            }
+            None => {
+                tracing::debug!(
+                    template_id = %template_id,
+                    build_id = %build_id,
+                    "build context vanished between status poll and \
+                     log-entry materialisation; falling back to \
+                     poll-time timestamps for V3 logEntries"
+                );
+                logs.iter()
+                    .map(|line| V3BuildLogEntry {
+                        timestamp: chrono::Utc::now(),
+                        message: line.clone(),
+                        level: "info".to_string(),
+                    })
+                    .collect()
+            }
+        };
+        debug_assert_eq!(
+            logs.len(),
+            log_entries.len(),
+            "V3 logs and logEntries must be aligned 1:1"
+        );
 
         let status = match internal.status.as_str() {
             "ready" => "ready",
@@ -924,16 +1227,11 @@ impl TemplateService {
         } else {
             self.config.registry_repo_prefix.trim()
         };
-        RegistryCredential {
-            url,
-            repository: format!("{}/{}", repo_prefix, template_id),
-            username: "_token".to_string(),
-            password: self
-                .config
-                .registry_token
-                .clone()
-                .unwrap_or_else(|| "_anon".to_string()),
-        }
+        // Per-build short-lived credential — see `mint_registry_credential`
+        // and the matching comment in `create_template_e2b_mode` for the
+        // rationale (username is the routing key into `username_index`,
+        // password is verified by the registry reverse-proxy).
+        mint_registry_credential(url, format!("{}/{}", repo_prefix, template_id))
     }
 }
 
@@ -1282,6 +1580,44 @@ fn base_url(url: &str) -> String {
     }
 }
 
+fn mint_registry_credential(url: String, repository: String) -> RegistryCredential {
+    use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
+    let mut buf = [0u8; 32];
+    buf[..16].copy_from_slice(Uuid::new_v4().as_bytes());
+    buf[16..].copy_from_slice(Uuid::new_v4().as_bytes());
+    let token = URL_SAFE_NO_PAD.encode(buf);
+    RegistryCredential {
+        url,
+        repository,
+        username: format!("bld_{}", &token[..22]),
+        password: token,
+    }
+}
+
+fn manifest_repo_matches(image_ref: &str, repo: &str) -> bool {
+    let Some(expected) = image_ref_repo(image_ref) else {
+        return false;
+    };
+    expected == repo
+}
+
+/// Extract the `repo` segment from an `image_ref` of the form
+/// `<host>[:port]/<repo>:<tag>`. Returns `None` when the host or tag is
+/// missing, or when the repo would be empty.
+fn image_ref_repo(image_ref: &str) -> Option<String> {
+    let without_tag = match (image_ref.rfind(':'), image_ref.rfind('/')) {
+        (Some(colon), Some(slash)) if colon > slash => &image_ref[..colon],
+        _ => image_ref,
+    };
+    let slash = without_tag.find('/')?;
+    let repo = &without_tag[slash + 1..];
+    if repo.is_empty() {
+        None
+    } else {
+        Some(repo.to_string())
+    }
+}
+
 // Adapter helper used inside dashmap update closures.
 impl crate::services::builds::BuildContext {
     pub(crate) fn append_log_inline(&mut self, line: impl Into<String>) {
@@ -1521,6 +1857,127 @@ mod tests {
         );
     }
 
+    #[test]
+    fn image_ref_repo_extracts_repo_with_host_port_and_tag() {
+        assert_eq!(
+            image_ref_repo("127.0.0.1:5000/e2b/tpl-abc:bld-123").as_deref(),
+            Some("e2b/tpl-abc")
+        );
+        assert_eq!(
+            image_ref_repo("registry.example.com/team/tpl-xyz").as_deref(),
+            Some("team/tpl-xyz")
+        );
+        assert_eq!(
+            image_ref_repo("reg.local:443/x/y/z:latest").as_deref(),
+            Some("x/y/z")
+        );
+    }
+
+    #[test]
+    fn image_ref_repo_returns_none_for_malformed_input() {
+        assert_eq!(image_ref_repo("only-host").as_deref(), None);
+        assert_eq!(image_ref_repo("host.example.com/").as_deref(), None);
+        assert_eq!(image_ref_repo("host:5000/:tag").as_deref(), None);
+    }
+
+    #[test]
+    fn manifest_repo_matches_accepts_canonical_image_ref() {
+        assert!(manifest_repo_matches(
+            "127.0.0.1:5000/e2b/tpl-abc:bld-123",
+            "e2b/tpl-abc"
+        ));
+    }
+
+    #[test]
+    fn manifest_repo_matches_rejects_mismatched_repo() {
+        assert!(!manifest_repo_matches(
+            "127.0.0.1:5000/e2b/tpl-abc:bld-123",
+            "attacker/tpl-abc"
+        ));
+        assert!(!manifest_repo_matches(
+            "127.0.0.1:5000/e2b/tpl-abc:bld-123",
+            "e2b/tpl-other"
+        ));
+    }
+
+    #[test]
+    fn manifest_repo_matches_rejects_malformed_image_ref() {
+        assert!(!manifest_repo_matches("e2b/tpl-abc:bld-123", "e2b/tpl-abc"));
+    }
+
+    #[test]
+    fn mark_image_pushed_advances_stage_when_repo_matches() {
+        let svc = make_service(Some("http://127.0.0.1:5000".to_string()));
+        let cred = RegistryCredential {
+            url: "http://127.0.0.1:5000".to_string(),
+            repository: "e2b/tpl-abc".to_string(),
+            username: "_token".to_string(),
+            password: "secret".to_string(),
+        };
+        let ctx = svc.builds.create(
+            "tpl-abc".to_string(),
+            empty_request(),
+            cred,
+            "127.0.0.1:5000/e2b/tpl-abc:bld-deadbeef".to_string(),
+        );
+        svc.builds.update(&ctx.build_id, |c| {
+            c.image_ref = format!("127.0.0.1:5000/e2b/tpl-abc:{}", c.build_id);
+        });
+
+        svc.mark_image_pushed(&ctx.build_id, "e2b/tpl-abc");
+
+        let after = svc.builds.get(&ctx.build_id).expect("ctx");
+        assert_eq!(after.stage, BuildStage::Building);
+        assert!(
+            after.image_pushed,
+            "mark_image_pushed must flip image_pushed=true on success — \
+             this is the authoritative signal v3_trigger_build's \
+             OCI fallback gates on"
+        );
+    }
+
+    #[test]
+    fn mark_image_pushed_refuses_when_repo_does_not_match() {
+        let svc = make_service(Some("http://127.0.0.1:5000".to_string()));
+        let cred = RegistryCredential {
+            url: "http://127.0.0.1:5000".to_string(),
+            repository: "e2b/tpl-abc".to_string(),
+            username: "_token".to_string(),
+            password: "secret".to_string(),
+        };
+        let ctx = svc.builds.create(
+            "tpl-abc".to_string(),
+            empty_request(),
+            cred,
+            "127.0.0.1:5000/e2b/tpl-abc:bld-deadbeef".to_string(),
+        );
+        svc.builds.update(&ctx.build_id, |c| {
+            c.image_ref = format!("127.0.0.1:5000/e2b/tpl-abc:{}", c.build_id);
+        });
+
+        svc.mark_image_pushed(&ctx.build_id, "attacker/tpl-abc");
+
+        let after = svc.builds.get(&ctx.build_id).expect("ctx");
+        assert_eq!(
+            after.stage,
+            BuildStage::WaitingPush,
+            "stage must not advance when repo mismatches"
+        );
+        assert!(
+            !after.image_pushed,
+            "image_pushed must stay false when the repo cross-check \
+             fails — otherwise v3_trigger_build would later dispatch \
+             against an unverified slot"
+        );
+    }
+
+    #[test]
+    fn mark_image_pushed_is_noop_for_unknown_build_id() {
+        let svc = make_service(Some("http://127.0.0.1:5000".to_string()));
+        svc.mark_image_pushed("bld-does-not-exist", "e2b/tpl-abc");
+        assert!(svc.builds.get("bld-does-not-exist").is_none());
+    }
+
     #[test]
     fn remap_cubemaster_status_normalizes_phases_to_e2b_tokens() {
         assert_eq!(remap_cubemaster_status(""), "pending");
@@ -1572,7 +2029,38 @@ mod tests {
         let cred = job.registry.expect("registry credential");
         assert_eq!(cred.url, "http://127.0.0.1:5000");
         assert!(cred.repository.starts_with("e2b/tpl-"));
-        assert_eq!(cred.username, "_token");
+        // Per-build short-lived credential: username is `bld_<…>` (i.e. NOT
+        // the legacy global `_token`), and password is a high-entropy
+        // random string that the registry reverse-proxy validates against
+        // the in-memory BuildRegistry on every push request. See
+        // `mint_registry_credential` for the rationale.
+        assert!(
+            cred.username.starts_with("bld_"),
+            "expected per-build username (bld_<…>), got {:?}",
+            cred.username
+        );
+        assert!(
+            cred.password.len() >= 32,
+            "expected high-entropy random password, got {} chars",
+            cred.password.len()
+        );
+        assert_ne!(
+            cred.username, "_token",
+            "the legacy shared `_token` username must not regress — \
+             it would defeat per-build credential validation"
+        );
+        // Issuing a second build must produce a different credential pair
+        // (i.e. RNG is wired up properly and we're not handing every build
+        // the same secret).
+        let mut req2 = empty_request();
+        req2.dockerfile = Some("FROM ubuntu".to_string());
+        let job2 = svc
+            .create_template(req2)
+            .await
+            .expect("second e2b create should succeed");
+        let cred2 = job2.registry.expect("second registry credential");
+        assert_ne!(cred.username, cred2.username);
+        assert_ne!(cred.password, cred2.password);
 
         // Internal BuildRegistry now knows about this build and stores the
         // image_ref CubeMaster will later pull from.
@@ -1584,6 +2072,329 @@ mod tests {
         assert!(ctx.image_ref.ends_with(&format!(":{}", job.build_id)));
     }
 
+    #[tokio::test]
+    async fn v3_trigger_build_rejects_steps_without_from_image_with_501() {
+        let svc = make_service(Some("http://127.0.0.1:5000".to_string()));
+        let mut req = empty_request();
+        req.dockerfile = Some("FROM ubuntu".to_string());
+        let job = svc
+            .create_template(req)
+            .await
+            .expect("e2b create should succeed");
+
+        let body = V2TemplateBuildStart {
+            steps: Some(vec![serde_json::json!({"type": "RUN", "args": ["echo hi"]})]),
+            ..Default::default()
+        };
+        let err = svc
+            .v3_trigger_build(job.template_id.clone(), job.build_id.clone(), body)
+            .await
+            .expect_err("steps-only build must be rejected, not dispatched");
+
+        match err {
+            AppError::NotImplemented(msg) => {
+                assert!(
+                    msg.contains("dockerfile-/steps-based builds are not supported"),
+                    "unexpected NotImplemented message: {msg}"
+                );
+                assert!(msg.contains(&job.build_id));
+            }
+            other => panic!("expected NotImplemented, got {other:?}"),
+        }
+
+        let ctx = svc
+            .builds
+            .get(&job.build_id)
+            .expect("build context preserved on failure");
+        assert_eq!(ctx.stage, BuildStage::WaitingPush);
+    }
+
+    #[tokio::test]
+    async fn v3_trigger_build_does_not_use_unpushed_image_ref() {
+        let svc = make_service(Some("http://127.0.0.1:5000".to_string()));
+        let mut req = empty_request();
+        req.dockerfile = Some("FROM ubuntu".to_string());
+        let job = svc
+            .create_template(req)
+            .await
+            .expect("e2b create should succeed");
+
+        let ctx = svc
+            .builds
+            .get(&job.build_id)
+            .expect("build context exists");
+        assert!(!ctx.image_pushed, "fresh build must not be marked pushed");
+        assert!(
+            !ctx.image_ref.is_empty(),
+            "image_ref is predicted at create time and should already \
+             be populated — exactly the trap this guard prevents"
+        );
+
+        let body = V2TemplateBuildStart::default();
+        let err = svc
+            .v3_trigger_build(job.template_id.clone(), job.build_id.clone(), body)
+            .await
+            .expect_err("unpushed builds must not be dispatched against the predicted ref");
+
+        match err {
+            AppError::Conflict(msg) => {
+                assert!(
+                    msg.contains("manifest PUT"),
+                    "error must name the missing operation: {msg}"
+                );
+                assert!(
+                    msg.contains(&job.build_id),
+                    "error must include the build_id: {msg}"
+                );
+                assert!(
+                    msg.contains("fromImage"),
+                    "error must point operators at the fromImage \
+                     workaround: {msg}"
+                );
+            }
+            other => panic!("expected Conflict, got {other:?}"),
+        }
+
+        let ctx = svc
+            .builds
+            .get(&job.build_id)
+            .expect("build context preserved on failure");
+        assert!(!ctx.image_pushed);
+    }
+
+    #[tokio::test]
+    async fn v3_trigger_build_uses_image_ref_after_mark_image_pushed_flips_flag() {
+        let svc = make_service(Some("http://127.0.0.1:5000".to_string()));
+        let mut req = empty_request();
+        req.dockerfile = Some("FROM ubuntu".to_string());
+        let job = svc
+            .create_template(req)
+            .await
+            .expect("e2b create should succeed");
+
+        svc.builds.update(&job.build_id, |c| {
+            c.image_ref = format!("127.0.0.1:5000/e2b/tpl-abc:{}", c.build_id);
+        });
+        svc.mark_image_pushed(&job.build_id, "e2b/tpl-abc");
+        let ctx = svc.builds.get(&job.build_id).expect("ctx exists");
+        assert!(
+            ctx.image_pushed,
+            "mark_image_pushed must flip image_pushed=true"
+        );
+
+        let body = V2TemplateBuildStart::default();
+        let err = svc
+            .v3_trigger_build(job.template_id.clone(), job.build_id.clone(), body)
+            .await
+            .expect_err(
+                "cubemaster is unreachable in unit tests, so dispatch \
+                 will fail at transport — but the source-resolution \
+                 branch must already have been satisfied",
+            );
+
+        assert!(
+            !matches!(err, AppError::Conflict(_)),
+            "image_pushed=true must defuse the 409 guard: {err:?}"
+        );
+        assert!(
+            !matches!(err, AppError::NotImplemented(_)),
+            "image_pushed=true must defuse the 501 source-resolution \
+             guard: {err:?}"
+        );
+    }
+
+    #[tokio::test]
+    async fn v3_get_build_status_preserves_log_write_timestamps_across_polls() {
+        let svc = make_service(Some("http://127.0.0.1:5000".to_string()));
+        let mut req = empty_request();
+        req.dockerfile = Some("FROM ubuntu".to_string());
+        let job = svc
+            .create_template(req)
+            .await
+            .expect("e2b create should succeed");
+
+        let baseline_len = svc
+            .builds
+            .get(&job.build_id)
+            .expect("ctx exists")
+            .logs
+            .len();
+
+        svc.builds.append_log(&job.build_id, "first line");
+        svc.builds.append_log(&job.build_id, "second line");
+        svc.builds.append_log(&job.build_id, "third line");
+
+        let expected_ts: Vec<_> = svc
+            .builds
+            .get(&job.build_id)
+            .expect("ctx exists")
+            .logs
+            .iter()
+            .map(|l| l.timestamp)
+            .collect();
+        let expected_total = baseline_len + 3;
+        assert_eq!(
+            expected_ts.len(),
+            expected_total,
+            "test setup must seed exactly three additional log lines"
+        );
+
+        let first = svc
+            .v3_get_build_status(&job.template_id, &job.build_id, 0, 1000)
+            .await
+            .expect("first status poll should succeed");
+        assert_eq!(first.log_entries.len(), expected_total);
+        assert_eq!(first.logs.len(), first.log_entries.len());
+        for (i, entry) in first.log_entries.iter().enumerate() {
+            assert_eq!(
+                entry.timestamp, expected_ts[i],
+                "logEntries[{i}].timestamp must match the write-time \
+                 BuildLogLine.timestamp, not Utc::now() at poll time"
+            );
+        }
+        assert_eq!(first.log_entries[baseline_len].message, "first line");
+        assert_eq!(first.log_entries[baseline_len + 1].message, "second line");
+        assert_eq!(first.log_entries[baseline_len + 2].message, "third line");
+
+        tokio::time::sleep(std::time::Duration::from_millis(5)).await;
+
+        let second = svc
+            .v3_get_build_status(&job.template_id, &job.build_id, 0, 1000)
+            .await
+            .expect("second status poll should succeed");
+        assert_eq!(second.log_entries.len(), expected_total);
+        for (i, entry) in second.log_entries.iter().enumerate() {
+            assert_eq!(
+                entry.timestamp, first.log_entries[i].timestamp,
+                "logEntries[{i}].timestamp must be stable across \
+                 polls — reviewer-flagged regression: previously \
+                 each poll re-stamped lines with Utc::now() so the \
+                 same historical line drifted forwards in time"
+            );
+            assert_eq!(
+                entry.message, first.log_entries[i].message,
+                "log message must match across polls"
+            );
+        }
+
+        svc.builds.append_log(&job.build_id, "fourth line");
+        let third = svc
+            .v3_get_build_status(&job.template_id, &job.build_id, 0, 1000)
+            .await
+            .expect("third status poll should succeed");
+        assert_eq!(third.log_entries.len(), expected_total + 1);
+        for i in 0..expected_total {
+            assert_eq!(
+                third.log_entries[i].timestamp, first.log_entries[i].timestamp,
+                "appending a new line must not perturb existing \
+                 logEntries[{i}].timestamp"
+            );
+        }
+        assert_eq!(third.log_entries[expected_total].message, "fourth line");
+        assert!(
+            third.log_entries[expected_total].timestamp
+                >= first.log_entries[expected_total - 1].timestamp,
+            "newly appended line must carry a write-time timestamp \
+             at or after the previous tail"
+        );
+    }
+
+    #[tokio::test]
+    async fn v3_get_build_status_log_entries_respect_logs_offset() {
+        let svc = make_service(Some("http://127.0.0.1:5000".to_string()));
+        let mut req = empty_request();
+        req.dockerfile = Some("FROM ubuntu".to_string());
+        let job = svc
+            .create_template(req)
+            .await
+            .expect("e2b create should succeed");
+
+        let baseline_len = svc
+            .builds
+            .get(&job.build_id)
+            .expect("ctx")
+            .logs
+            .len();
+
+        svc.builds.append_log(&job.build_id, "alpha");
+        svc.builds.append_log(&job.build_id, "beta");
+        svc.builds.append_log(&job.build_id, "gamma");
+        svc.builds.append_log(&job.build_id, "delta");
+
+        let expected_ts: Vec<_> = svc
+            .builds
+            .get(&job.build_id)
+            .expect("ctx")
+            .logs
+            .iter()
+            .map(|l| l.timestamp)
+            .collect();
+
+        let skip = (baseline_len + 2) as i32;
+        let resp = svc
+            .v3_get_build_status(&job.template_id, &job.build_id, skip, 1000)
+            .await
+            .expect("paged status poll should succeed");
+
+        assert_eq!(resp.log_entries.len(), 2);
+        assert_eq!(resp.logs.len(), resp.log_entries.len());
+        assert_eq!(resp.log_entries[0].message, "gamma");
+        assert_eq!(resp.log_entries[1].message, "delta");
+        assert_eq!(
+            resp.log_entries[0].timestamp,
+            expected_ts[baseline_len + 2]
+        );
+        assert_eq!(
+            resp.log_entries[1].timestamp,
+            expected_ts[baseline_len + 3]
+        );
+    }
+
+    #[tokio::test]
+    async fn v3_trigger_build_rejects_from_template_with_501_until_resolver_lands() {
+        let svc = make_service(Some("http://127.0.0.1:5000".to_string()));
+        let mut req = empty_request();
+        req.dockerfile = Some("FROM ubuntu".to_string());
+        let job = svc
+            .create_template(req)
+            .await
+            .expect("e2b create should succeed");
+
+        let body = V2TemplateBuildStart {
+            from_template: Some("tpl-parent-xyz".to_string()),
+            ..Default::default()
+        };
+        let err = svc
+            .v3_trigger_build(job.template_id.clone(), job.build_id.clone(), body)
+            .await
+            .expect_err("fromTemplate must be rejected, not silently dispatched as cube://...");
+
+        match err {
+            AppError::NotImplemented(msg) => {
+                assert!(
+                    msg.contains("tpl-parent-xyz"),
+                    "error must echo the rejected parent: {msg}"
+                );
+                assert!(
+                    msg.contains("fromImage"),
+                    "error must point operators at the fromImage workaround: {msg}"
+                );
+                assert!(
+                    msg.contains("cube://"),
+                    "error should name the unimplemented scheme so \
+                     operators can grep release notes for it: {msg}"
+                );
+            }
+            other => panic!("expected NotImplemented, got {other:?}"),
+        }
+
+        let ctx = svc
+            .builds
+            .get(&job.build_id)
+            .expect("build context preserved on failure");
+        assert_eq!(ctx.stage, BuildStage::WaitingPush);
+    }
+
     /// Regression: CubeMaster validates `writable_layer_size` as required and
     /// the E2B V3 SDK never sends it. Verify the service injects the
     /// configured default so the request reaches CubeMaster non-empty.
diff --git a/CubeAPI/src/state.rs b/CubeAPI/src/state.rs
index 201a4dc52..c88ec960c 100644
--- a/CubeAPI/src/state.rs
+++ b/CubeAPI/src/state.rs
@@ -66,13 +66,71 @@ impl AppState {
             None => None,
         };
 
-        Self {
+        let s = Self {
             rate_limiter,
             http_client,
             services,
             logger,
             config: Arc::new(config),
             agenthub_store,
+        };
+        s.log_registry_security_posture();
+        s
+    }
+
+    /// Emit a single startup line summarising whether the bundled OCI
+    /// registry reverse-proxy is on, and — if so — whether the operator
+    /// has obviously misconfigured the deployment such that the per-build
+    /// credentials are exposed in the clear.
+    ///
+    /// We don't refuse to start: this is a one-click developer-experience
+    /// product, and a hard failure on `bind=0.0.0.0` would surprise users
+    /// running on a single VM with a firewall in front of them. But we do
+    /// log loudly so that production operators see the warning during the
+    /// first deploy.
+    fn log_registry_security_posture(&self) {
+        let upstream = self
+            .config
+            .registry_upstream
+            .as_deref()
+            .map(str::trim)
+            .filter(|s| !s.is_empty());
+        let Some(upstream) = upstream else {
+            tracing::info!("registry reverse-proxy disabled (CUBE_API_REGISTRY_UPSTREAM unset)");
+            return;
+        };
+
+        let upstream_is_loopback = upstream.contains("127.0.0.1")
+            || upstream.contains("localhost")
+            || upstream.contains("[::1]");
+        let bind = self.config.bind.as_str();
+        let bind_is_loopback = bind.starts_with("127.0.0.1") || bind.starts_with("[::1]");
+        let bind_is_public = bind.starts_with("0.0.0.0") || bind.starts_with("[::]");
+
+        if bind_is_public && upstream_is_loopback {
+            tracing::warn!(
+                bind = %bind,
+                upstream = %upstream,
+                "registry reverse-proxy is enabled with an unauthenticated loopback \
+                 upstream while CubeAPI binds on a public interface. CubeAPI's own \
+                 per-build credential gate is in force, but build push tokens will \
+                 cross the network in clear text unless this listener is fronted \
+                 by TLS. Either: (a) terminate TLS in a reverse proxy in front of \
+                 CubeAPI, or (b) run distribution/distribution with htpasswd auth \
+                 and rely on the upstream's own TLS+auth. See ServerConfig::registry_upstream."
+            );
+        } else if bind_is_loopback {
+            tracing::info!(
+                bind = %bind,
+                upstream = %upstream,
+                "registry reverse-proxy enabled on a loopback bind; safe for development"
+            );
+        } else {
+            tracing::info!(
+                bind = %bind,
+                upstream = %upstream,
+                "registry reverse-proxy enabled; per-build credential gate is in force"
+            );
         }
     }
 }