proximafusion · krystophny · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026
@@ -174,3 +174,43 @@ endif()
 
 install(TARGETS _vmecpp LIBRARY DESTINATION vmecpp/cpp/.)
 install(TARGETS indata2json DESTINATION vmecpp/cpp/third_party/indata2json/)
+
+# Optional Enzyme automatic-differentiation targets.
+# Enzyme (https://enzyme.mit.edu) differentiates LLVM IR via a Clang plugin, so
+# these targets need a Clang frontend and the matching ClangEnzyme plugin. OFF
+# by default; the production build and all existing targets are unaffected.
+# Enable with:
+#   -DVMECPP_ENABLE_ENZYME=ON -DVMECPP_ENZYME_PLUGIN=/path/to/ClangEnzyme-NN.so
+option(VMECPP_ENABLE_ENZYME "Build Enzyme autodiff targets" OFF)
+if(VMECPP_ENABLE_ENZYME)
+  if(NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+    message(FATAL_ERROR
+      "VMECPP_ENABLE_ENZYME requires a Clang compiler (got "
+      "${CMAKE_CXX_COMPILER_ID}); Enzyme attaches as a Clang plugin.")
+  endif()
+  set(VMECPP_ENZYME_PLUGIN "" CACHE FILEPATH "Path to ClangEnzyme-NN.so")
+  if(NOT VMECPP_ENZYME_PLUGIN OR NOT EXISTS "${VMECPP_ENZYME_PLUGIN}")
+    message(FATAL_ERROR
+      "VMECPP_ENABLE_ENZYME=ON requires "
+      "-DVMECPP_ENZYME_PLUGIN=/path/to/ClangEnzyme-NN.so")
+  endif()
+  message(STATUS "Enzyme plugin: ${VMECPP_ENZYME_PLUGIN}")
+  enable_testing()
+  add_executable(enzyme_smoke_test
+    ${PROJECT_SOURCE_DIR}/src/vmecpp/cpp/vmecpp/common/enzyme/enzyme_smoke_test.cc)
+  # Enzyme runs as an optimization-time pass; it needs optimizations enabled and
+  # the plugin attached. -O2 guards against a Debug (-O0) configuration where the
+  # AD pass would otherwise not run.
+  target_compile_options(enzyme_smoke_test PRIVATE
+    -O2 -fplugin=${VMECPP_ENZYME_PLUGIN})
+  add_test(NAME enzyme_smoke COMMAND enzyme_smoke_test)
+
+  # Exact autodiff (forward and reverse) of a real VMEC nonlinear kernel: the
+  # half-grid Jacobian. Self-contained over flat buffers; checks Jv and J^T u
+  # against finite differences and against each other (adjoint identity).
+  add_executable(jacobian_kernel_autodiff_test
+    ${PROJECT_SOURCE_DIR}/src/vmecpp/cpp/vmecpp/common/enzyme/jacobian_kernel_autodiff_test.cc)
+  target_compile_options(jacobian_kernel_autodiff_test PRIVATE
+    -O2 -fplugin=${VMECPP_ENZYME_PLUGIN})
+  add_test(NAME jacobian_kernel_autodiff COMMAND jacobian_kernel_autodiff_test)
+endif()
@@ -0,0 +1,52 @@
+// SPDX-FileCopyrightText: 2024-present Proxima Fusion GmbH
+// <info@proximafusion.com>
+//
+// SPDX-License-Identifier: MIT
+#ifndef VMECPP_COMMON_ENZYME_ENZYME_H_
+#define VMECPP_COMMON_ENZYME_ENZYME_H_
+
+// Thin declarations for the Enzyme automatic-differentiation intrinsics.
+//
+// Enzyme (https://enzyme.mit.edu) differentiates at the LLVM-IR level via a
+// Clang plugin (ClangEnzyme-NN.so). These intrinsics are resolved by that
+// plugin; without it the symbols do not link, so any translation unit that
+// calls them must be compiled with -fplugin=<ClangEnzyme>. The CMake option
+// VMECPP_ENABLE_ENZYME wires that flag and is OFF by default.
+//
+// Differentiation activity is selected per argument with the marker globals
+// below: pass `enzyme_dup, primal_ptr, shadow_ptr` for an active buffer (the
+// gradient accumulates into shadow_ptr) and `enzyme_const, value` for an input
+// held fixed. Enzyme matches these markers by symbol name.
+//
+// Constraint that shapes how differentiable kernels here are written: Enzyme's
+// allocation analysis does not track Eigen's aligned allocator, so a heap
+// temporary from an Eigen expression (e.g. a dynamic-size `A * x`) crossing the
+// differentiated call aborts with "freeing without malloc". Differentiable
+// kernels therefore operate on caller-owned buffers via Eigen::Map and avoid
+// allocating expression temporaries on the differentiated path.
+
+// The marker globals and the __enzyme_* intrinsic names are part of the Enzyme
+// ABI: the plugin matches them by exact symbol name, so they cannot be const or
+// renamed to satisfy the in-tree naming/identifier lint rules.
+
+// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
+extern int enzyme_dup;
+extern int enzyme_const;
+extern int enzyme_dupnoneed;
+extern int enzyme_out;
+// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)
+
+// Reverse mode: returns nothing useful here; gradients land in the shadow
+// buffers passed alongside each `enzyme_dup` argument.
+template <typename... Args>
+void __enzyme_autodiff(  // NOLINT(bugprone-reserved-identifier,readability-identifier-naming)
+    void*, Args...);
+
+// Forward mode: propagates the seed in the shadow argument to the directional
+// derivative of the result.
+template <typename Return, typename... Args>
+Return
+__enzyme_fwddiff(  // NOLINT(bugprone-reserved-identifier,readability-identifier-naming)
+    void*, Args...);
+
+#endif  // VMECPP_COMMON_ENZYME_ENZYME_H_
@@ -0,0 +1,97 @@
+// SPDX-FileCopyrightText: 2024-present Proxima Fusion GmbH
+// <info@proximafusion.com>
+//
+// SPDX-License-Identifier: MIT
+
+// Toolchain smoke test for the Enzyme autodiff build.
+//
+// It checks the two properties the differentiable VMEC++ kernels rely on:
+//   1. The ClangEnzyme plugin is attached and resolves the autodiff intrinsics.
+//   2. Enzyme differentiates a scalar objective expressed over Eigen::Map'd
+//      caller buffers (the pattern all differentiable kernels here use).
+//
+// The objective is f(x) = sum_i 0.5 * w_i * x_i^2 + c_i * x_i, with the closed
+// form gradient df/dx_i = w_i * x_i + c_i. Reverse- and forward-mode Enzyme
+// gradients are checked against that closed form and against central finite
+// differences. Exit code 0 on success, 1 on any mismatch.
+
+#include <Eigen/Dense>
+#include <cmath>
+#include <cstdio>
+
+#include "vmecpp/common/enzyme/enzyme.h"
+
+namespace {
+
+using Eigen::VectorXd;
+
+// Quadratic objective over caller-owned buffers. No allocating Eigen
+// expression temporaries cross the differentiated boundary.
+__attribute__((noinline)) double Objective(const double* x, const double* w,
+                                           const double* c, int n) {
+  Eigen::Map<const VectorXd> xv(x, n);
+  Eigen::Map<const VectorXd> wv(w, n);
+  Eigen::Map<const VectorXd> cv(c, n);
+  double sum = 0.0;
+  for (int i = 0; i < n; ++i) {
+    sum += 0.5 * wv[i] * xv[i] * xv[i] + cv[i] * xv[i];
+  }
+  return sum;
+}
+
+double MaxAbsDiff(const VectorXd& a, const VectorXd& b) {
+  return (a - b).cwiseAbs().maxCoeff();
+}
+
+}  // namespace
+
+int main() {
+  const int n = 8;
+  VectorXd x = VectorXd::LinSpaced(n, 1.0, n);
+  VectorXd w = VectorXd::LinSpaced(n, 2.0, 2.0 + 0.5 * (n - 1));
+  VectorXd c = VectorXd::Constant(n, 0.25);
+
+  VectorXd analytic(n);
+  for (int i = 0; i < n; ++i) analytic[i] = w[i] * x[i] + c[i];
+
+  // Reverse mode: gradient accumulates into the shadow buffer.
+  VectorXd g_rev = VectorXd::Zero(n);
+  __enzyme_autodiff((void*)Objective, enzyme_dup, x.data(), g_rev.data(),
+                    enzyme_const, w.data(), enzyme_const, c.data(),
+                    enzyme_const, n);
+
+  // Forward mode: one directional derivative per coordinate seed.
+  VectorXd g_fwd = VectorXd::Zero(n);
+  for (int j = 0; j < n; ++j) {
+    VectorXd seed = VectorXd::Zero(n);
+    seed[j] = 1.0;
+    g_fwd[j] = __enzyme_fwddiff<double>(
+        (void*)Objective, enzyme_dup, x.data(), seed.data(), enzyme_const,
+        w.data(), enzyme_const, c.data(), enzyme_const, n);
+  }
+
+  // Central finite differences.
+  VectorXd g_fd = VectorXd::Zero(n);
+  const double h = 1e-6;
+  for (int j = 0; j < n; ++j) {
+    VectorXd xp = x, xm = x;
+    xp[j] += h;
+    xm[j] -= h;
+    g_fd[j] = (Objective(xp.data(), w.data(), c.data(), n) -
+               Objective(xm.data(), w.data(), c.data(), n)) /
+              (2.0 * h);
+  }
+
+  const double err_rev = MaxAbsDiff(g_rev, analytic);
+  const double err_fwd = MaxAbsDiff(g_fwd, analytic);
+  const double err_fd = MaxAbsDiff(g_rev, g_fd);
+
+  std::printf("enzyme smoke test (n=%d)\n", n);
+  std::printf("  max|reverse - analytic| = %.3e\n", err_rev);
+  std::printf("  max|forward - analytic| = %.3e\n", err_fwd);
+  std::printf("  max|reverse - finite-diff| = %.3e\n", err_fd);
+
+  const bool ok = err_rev < 1e-10 && err_fwd < 1e-10 && err_fd < 1e-5;
+  std::printf("%s\n", ok ? "PASS" : "FAIL");
+  return ok ? 0 : 1;
+}
@@ -0,0 +1,172 @@
+// SPDX-FileCopyrightText: 2024-present Proxima Fusion GmbH
+// <info@proximafusion.com>
+//
+// SPDX-License-Identifier: MIT
+
+// Exact autodiff of a real VMEC nonlinear kernel, forward vs reverse mode.
+//
+// JacobianKernel reproduces IdealMhdModel::computeJacobian: it maps the
+// full-grid geometry (R, Z and their poloidal derivatives, even/odd parity) to
+// the half-grid metric quantities r12, ru12, zu12, rs, zs and the Jacobian tau.
+// tau is nonlinear in the geometry (products ru12*zs - rs*zu12, ...), so its
+// Jacobian is a genuine building block of the MHD force Hessian (the force is
+// the gradient of VMEC's functional; chain rule composes this kernel's Jacobian
+// with the linear spectral transforms to give the Hessian-vector product).
+//
+// The kernel is written allocation-free over flat buffers (scalar locals,
+// Eigen-free), which is the form Enzyme differentiates. We take:
+//   * a Jacobian-vector product  J v   by forward mode  (__enzyme_fwddiff), and
+//   * a vector-Jacobian product  J^T u  by reverse mode (__enzyme_autodiff),
+// check both against central finite differences, and time them so the
+// forward/reverse trade-off is explicit.
+
+#include <algorithm>
+#include <chrono>
+#include <cmath>
+#include <cstdio>
+#include <numeric>
+#include <random>
+#include <vector>
+
+#include "vmecpp/common/enzyme/enzyme.h"
+#include "vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h"
+
+// Types used as Enzyme intrinsic arguments need external linkage (an anonymous
+// namespace type cannot instantiate the __enzyme_* templates).
+
+// JacobianKernel is the shared production kernel ComputeHalfGridJacobian
+// (jacobian_kernel.h); differentiated here exactly as the solver uses it.
+
+constexpr int kNgeom = 8;  // r1e r1o z1e z1o rue ruo zue zuo
+constexpr int kNout = 6;   // r12 ru12 zu12 rs zs tau
+
+struct Problem {
+  int nZnT, nsH, n_in, n_out;
+  std::vector<double> geom;  // kNgeom blocks of (nsH+1)*nZnT
+  std::vector<double> sqrtSH;
+  double deltaS, dSHalfDsInterp;
+};
+
+Problem MakeProblem(int nZnT, int nsH) {
+  Problem p{nZnT, nsH, kNgeom * (nsH + 1) * nZnT, kNout * nsH * nZnT, {}, {},
+            0.0,  0.0};
+  std::mt19937 rng(7);
+  std::uniform_real_distribution<double> d(0.5, 1.5);
+  p.geom.resize(p.n_in);
+  for (double& x : p.geom) x = d(rng);
+  p.sqrtSH.resize(nsH);
+  for (int j = 0; j < nsH; ++j) p.sqrtSH[j] = std::sqrt(0.05 + 0.9 * j / nsH);
+  p.deltaS = 0.1;
+  p.dSHalfDsInterp = 0.25;
+  return p;
+}
+
+// Evaluate the kernel from a flat geometry buffer into a flat output buffer.
+__attribute__((noinline)) void Eval(const double* geom, const Problem& p,
+                                    double* out) {
+  const int s = (p.nsH + 1) * p.nZnT;
+  const int o = p.nsH * p.nZnT;
+  vmecpp::ComputeHalfGridJacobian(
+      geom, geom + s, geom + 2 * s, geom + 3 * s, geom + 4 * s, geom + 5 * s,
+      geom + 6 * s, geom + 7 * s, p.sqrtSH.data(), p.deltaS, p.dSHalfDsInterp,
+      p.nZnT, /*nsMinF1=*/0, /*nsMinH=*/0,
+      /*nsMaxH=*/p.nsH, out, out + o, out + 2 * o, out + 3 * o, out + 4 * o,
+      out + 5 * o);
+}
+
+double MaxAbs(const std::vector<double>& a, const std::vector<double>& b) {
+  double m = 0.0;
+  for (size_t i = 0; i < a.size(); ++i)
+    m = std::fmax(m, std::fabs(a[i] - b[i]));
+  return m;
+}
+
+// Scalar objective L(geom) = 0.5 * sum of squares of all kernel outputs. out is
+// caller-owned scratch (an intermediate of the differentiated call). This is
+// the scalar-over-buffers form Enzyme differentiates in both modes.
+__attribute__((noinline)) double Loss(const double* geom, double* out,
+                                      const Problem* p) {
+  Eval(geom, *p, out);
+  double s = 0.0;
+  for (int i = 0; i < p->n_out; ++i) s += 0.5 * out[i] * out[i];
+  return s;
+}
+
+int main() {
+  const Problem p = MakeProblem(/*nZnT=*/32, /*nsH=*/12);
+  std::mt19937 rng(11);
+  std::uniform_real_distribution<double> dist(-1.0, 1.0);
+  std::vector<double> v(p.n_in);
+  for (double& x : v) x = dist(rng);
+
+  // Reverse mode: full gradient dL/dgeom in one pass.
+  std::vector<double> g_rev(p.n_in, 0.0), out_s(p.n_out, 0.0),
+      out_sh(p.n_out, 0.0);
+  __enzyme_autodiff((void*)Loss, enzyme_dup, p.geom.data(), g_rev.data(),
+                    enzyme_dup, out_s.data(), out_sh.data(), enzyme_const, &p);
+
+  // Forward mode: directional derivative dL . v in one pass.
+  std::vector<double> out_t(p.n_out, 0.0);
+  const double d_fwd = __enzyme_fwddiff<double>(
+      (void*)Loss, enzyme_dup, p.geom.data(), v.data(), enzyme_dup,
+      out_s.data(), out_t.data(), enzyme_const, &p);
+
+  // Central finite differences of the directional derivative.
+  const double h = 1e-6;
+  std::vector<double> gp(p.geom), gm(p.geom), os(p.n_out);
+  for (int i = 0; i < p.n_in; ++i) {
+    gp[i] = p.geom[i] + h * v[i];
+    gm[i] = p.geom[i] - h * v[i];
+  }
+  const double d_fd =
+      (Loss(gp.data(), os.data(), &p) - Loss(gm.data(), os.data(), &p)) /
+      (2 * h);
+  const double d_rev =
+      std::inner_product(g_rev.begin(), g_rev.end(), v.begin(), 0.0);
+
+  const double scale = std::fabs(d_fd) + 1e-300;
+  printf("exact autodiff of VMEC Jacobian kernel (n_in=%d n_out=%d)\n", p.n_in,
+         p.n_out);
+  printf("  reverse  dL.v = %.10e   (rel err vs FD %.2e)\n", d_rev,
+         std::fabs(d_rev - d_fd) / scale);
+  printf("  forward  dL.v = %.10e   (rel err vs FD %.2e)\n", d_fwd,
+         std::fabs(d_fwd - d_fd) / scale);
+  printf("  forward/reverse agreement: %.2e\n",
+         std::fabs(d_fwd - d_rev) / (std::fabs(d_rev) + 1e-300));
+
+  // Performance: reverse returns the whole gradient (n_in) in one pass; forward
+  // returns one directional derivative per pass. For a scalar objective reverse
+  // wins; for a single Jacobian/Hessian-vector product forward is the cheaper
+  // primitive. Time both.
+  const int reps = 2000;
+  auto t0 = std::chrono::steady_clock::now();
+  for (int r = 0; r < reps; ++r) {
+    std::fill(g_rev.begin(), g_rev.end(), 0.0);
+    __enzyme_autodiff((void*)Loss, enzyme_dup, p.geom.data(), g_rev.data(),
+                      enzyme_dup, out_s.data(), out_sh.data(), enzyme_const,
+                      &p);
+  }
+  auto t1 = std::chrono::steady_clock::now();
+  for (int r = 0; r < reps; ++r) {
+    out_t.assign(p.n_out, 0.0);
+    volatile double s = __enzyme_fwddiff<double>(
+        (void*)Loss, enzyme_dup, p.geom.data(), v.data(), enzyme_dup,
+        out_s.data(), out_t.data(), enzyme_const, &p);
+    (void)s;
+  }
+  auto t2 = std::chrono::steady_clock::now();
+  const double us_rev =
+      std::chrono::duration<double, std::micro>(t1 - t0).count() / reps;
+  const double us_fwd =
+      std::chrono::duration<double, std::micro>(t2 - t1).count() / reps;
+  printf(
+      "  performance: reverse %.2f us/pass (full gradient), forward %.2f "
+      "us/pass (one direction)\n",
+      us_rev, us_fwd);
+
+  const bool ok = std::fabs(d_rev - d_fd) < 1e-5 * scale &&
+                  std::fabs(d_fwd - d_fd) < 1e-5 * scale &&
+                  std::fabs(d_fwd - d_rev) < 1e-9 * (std::fabs(d_rev) + 1e-300);
+  printf("%s\n", ok ? "PASS" : "FAIL");
+  return ok ? 0 : 1;
+}
@@ -82,7 +82,16 @@ cc_test(
 cc_library(
     name = "ideal_mhd_model",
     srcs = ["ideal_mhd_model.cc"],
-    hdrs = ["ideal_mhd_model.h", "dft_data.h"],
+# The shared force-chain kernels and the autodiff composition/JVP header are
+    # header-only and included by ideal_mhd_model.cc; declare whatever exists on
+    # this branch so the bazel sandbox can see them. The Enzyme JVP entry point
+    # (exact_force_jvp.cc) is built only by the CMake VMECPP_ENABLE_ENZYME path,
+    # not by bazel; its use in ideal_mhd_model.cc is guarded by that define.
+    hdrs = ["ideal_mhd_model.h", "dft_data.h"] + glob([
+        "*_kernel.h",
+        "local_force_composition.h",
+        "exact_force_jvp.h",
+    ]),
     visibility = ["//visibility:public"],
     deps = [
         ":dft_toroidal",