Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
eb91683
build: bump CMake abseil pin to 20260107.1 for Clang >= 21
krystophny Jun 14, 2026
a9180e0
enzyme: opt-in Clang/Enzyme build option and AD smoke test
krystophny Jun 14, 2026
66664d3
enzyme: exact autodiff of the VMEC Jacobian kernel (forward vs reverse)
krystophny Jun 14, 2026
ebd1699
ideal_mhd_model: share the Jacobian kernel between solver and autodiff
krystophny Jun 14, 2026
128d8ba
ideal_mhd_model: share the metric kernel (gsqrt, guu, guv, gvv)
krystophny Jun 14, 2026
c893db1
ideal_mhd_model: share the contravariant-field kernel (bsupu, bsupv)
krystophny Jun 14, 2026
9fe801c
ideal_mhd_model: share the covariant-field kernel (bsubu, bsubv)
krystophny Jun 14, 2026
377497c
ideal_mhd_model: share the magnetic-pressure kernel
krystophny Jun 14, 2026
fbbf4cc
bazel: declare force-chain kernel headers in ideal_mhd_model (sandbox…
krystophny Jun 14, 2026
1989dd0
ci: skip benchmark result upload on fork PRs (token is read-only)
krystophny Jun 14, 2026
ed9da6e
ci: build VMEC2000 from source so the compat test runs on numpy 2
krystophny Jun 14, 2026
dac979b
test: skip vmecpp-only indata fields in the VMEC2000 compat subset
krystophny Jun 14, 2026
d46732c
ci: sync VMEC2000-from-source build, benchmark fork guard, abseil com…
krystophny Jun 15, 2026
92edab5
Merge remote-tracking branch 'upstream/main' into HEAD
krystophny Jun 15, 2026
d809ead
ideal_mhd_model: mark Jacobian metric kernel buffers __restrict
krystophny Jun 16, 2026
20be28e
output_quantities: compare jcuru/jcurv at a looser opt-in tolerance
krystophny Jun 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,43 @@ endif()

install(TARGETS _vmecpp LIBRARY DESTINATION vmecpp/cpp/.)
install(TARGETS indata2json DESTINATION vmecpp/cpp/third_party/indata2json/)

# Optional Enzyme automatic-differentiation targets.
# Enzyme (https://enzyme.mit.edu) differentiates LLVM IR via a Clang plugin, so
# these targets need a Clang frontend and the matching ClangEnzyme plugin. OFF
# by default; the production build and all existing targets are unaffected.
# Enable with:
# -DVMECPP_ENABLE_ENZYME=ON -DVMECPP_ENZYME_PLUGIN=/path/to/ClangEnzyme-NN.so
option(VMECPP_ENABLE_ENZYME "Build Enzyme autodiff targets" OFF)
if(VMECPP_ENABLE_ENZYME)
if(NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
message(FATAL_ERROR
"VMECPP_ENABLE_ENZYME requires a Clang compiler (got "
"${CMAKE_CXX_COMPILER_ID}); Enzyme attaches as a Clang plugin.")
endif()
set(VMECPP_ENZYME_PLUGIN "" CACHE FILEPATH "Path to ClangEnzyme-NN.so")
if(NOT VMECPP_ENZYME_PLUGIN OR NOT EXISTS "${VMECPP_ENZYME_PLUGIN}")
message(FATAL_ERROR
"VMECPP_ENABLE_ENZYME=ON requires "
"-DVMECPP_ENZYME_PLUGIN=/path/to/ClangEnzyme-NN.so")
endif()
message(STATUS "Enzyme plugin: ${VMECPP_ENZYME_PLUGIN}")
enable_testing()
add_executable(enzyme_smoke_test
${PROJECT_SOURCE_DIR}/src/vmecpp/cpp/vmecpp/common/enzyme/enzyme_smoke_test.cc)
# Enzyme runs as an optimization-time pass; it needs optimizations enabled and
# the plugin attached. -O2 guards against a Debug (-O0) configuration where the
# AD pass would otherwise not run.
target_compile_options(enzyme_smoke_test PRIVATE
-O2 -fplugin=${VMECPP_ENZYME_PLUGIN})
add_test(NAME enzyme_smoke COMMAND enzyme_smoke_test)

# Exact autodiff (forward and reverse) of a real VMEC nonlinear kernel: the
# half-grid Jacobian. Self-contained over flat buffers; checks Jv and J^T u
# against finite differences and against each other (adjoint identity).
add_executable(jacobian_kernel_autodiff_test
${PROJECT_SOURCE_DIR}/src/vmecpp/cpp/vmecpp/common/enzyme/jacobian_kernel_autodiff_test.cc)
target_compile_options(jacobian_kernel_autodiff_test PRIVATE
-O2 -fplugin=${VMECPP_ENZYME_PLUGIN})
add_test(NAME jacobian_kernel_autodiff COMMAND jacobian_kernel_autodiff_test)
endif()
52 changes: 52 additions & 0 deletions src/vmecpp/cpp/vmecpp/common/enzyme/enzyme.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// SPDX-FileCopyrightText: 2024-present Proxima Fusion GmbH
// <info@proximafusion.com>
//
// SPDX-License-Identifier: MIT
#ifndef VMECPP_COMMON_ENZYME_ENZYME_H_
#define VMECPP_COMMON_ENZYME_ENZYME_H_

// Thin declarations for the Enzyme automatic-differentiation intrinsics.
//
// Enzyme (https://enzyme.mit.edu) differentiates at the LLVM-IR level via a
// Clang plugin (ClangEnzyme-NN.so). These intrinsics are resolved by that
// plugin; without it the symbols do not link, so any translation unit that
// calls them must be compiled with -fplugin=<ClangEnzyme>. The CMake option
// VMECPP_ENABLE_ENZYME wires that flag and is OFF by default.
//
// Differentiation activity is selected per argument with the marker globals
// below: pass `enzyme_dup, primal_ptr, shadow_ptr` for an active buffer (the
// gradient accumulates into shadow_ptr) and `enzyme_const, value` for an input
// held fixed. Enzyme matches these markers by symbol name.
//
// Constraint that shapes how differentiable kernels here are written: Enzyme's
// allocation analysis does not track Eigen's aligned allocator, so a heap
// temporary from an Eigen expression (e.g. a dynamic-size `A * x`) crossing the
// differentiated call aborts with "freeing without malloc". Differentiable
// kernels therefore operate on caller-owned buffers via Eigen::Map and avoid
// allocating expression temporaries on the differentiated path.

// The marker globals and the __enzyme_* intrinsic names are part of the Enzyme
// ABI: the plugin matches them by exact symbol name, so they cannot be const or
// renamed to satisfy the in-tree naming/identifier lint rules.

// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
extern int enzyme_dup;
extern int enzyme_const;
extern int enzyme_dupnoneed;
extern int enzyme_out;
// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)

// Reverse mode: returns nothing useful here; gradients land in the shadow
// buffers passed alongside each `enzyme_dup` argument.
template <typename... Args>
void __enzyme_autodiff( // NOLINT(bugprone-reserved-identifier,readability-identifier-naming)
void*, Args...);

// Forward mode: propagates the seed in the shadow argument to the directional
// derivative of the result.
template <typename Return, typename... Args>
Return
__enzyme_fwddiff( // NOLINT(bugprone-reserved-identifier,readability-identifier-naming)
void*, Args...);

#endif // VMECPP_COMMON_ENZYME_ENZYME_H_
97 changes: 97 additions & 0 deletions src/vmecpp/cpp/vmecpp/common/enzyme/enzyme_smoke_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// SPDX-FileCopyrightText: 2024-present Proxima Fusion GmbH
// <info@proximafusion.com>
//
// SPDX-License-Identifier: MIT

// Toolchain smoke test for the Enzyme autodiff build.
//
// It checks the two properties the differentiable VMEC++ kernels rely on:
// 1. The ClangEnzyme plugin is attached and resolves the autodiff intrinsics.
// 2. Enzyme differentiates a scalar objective expressed over Eigen::Map'd
// caller buffers (the pattern all differentiable kernels here use).
//
// The objective is f(x) = sum_i 0.5 * w_i * x_i^2 + c_i * x_i, with the closed
// form gradient df/dx_i = w_i * x_i + c_i. Reverse- and forward-mode Enzyme
// gradients are checked against that closed form and against central finite
// differences. Exit code 0 on success, 1 on any mismatch.

#include <Eigen/Dense>
#include <cmath>
#include <cstdio>

#include "vmecpp/common/enzyme/enzyme.h"

namespace {

using Eigen::VectorXd;

// Quadratic objective over caller-owned buffers. No allocating Eigen
// expression temporaries cross the differentiated boundary.
__attribute__((noinline)) double Objective(const double* x, const double* w,
const double* c, int n) {
Eigen::Map<const VectorXd> xv(x, n);
Eigen::Map<const VectorXd> wv(w, n);
Eigen::Map<const VectorXd> cv(c, n);
double sum = 0.0;
for (int i = 0; i < n; ++i) {
sum += 0.5 * wv[i] * xv[i] * xv[i] + cv[i] * xv[i];
}
return sum;
}

double MaxAbsDiff(const VectorXd& a, const VectorXd& b) {
return (a - b).cwiseAbs().maxCoeff();
}

} // namespace

int main() {
const int n = 8;
VectorXd x = VectorXd::LinSpaced(n, 1.0, n);
VectorXd w = VectorXd::LinSpaced(n, 2.0, 2.0 + 0.5 * (n - 1));
VectorXd c = VectorXd::Constant(n, 0.25);

VectorXd analytic(n);
for (int i = 0; i < n; ++i) analytic[i] = w[i] * x[i] + c[i];

// Reverse mode: gradient accumulates into the shadow buffer.
VectorXd g_rev = VectorXd::Zero(n);
__enzyme_autodiff((void*)Objective, enzyme_dup, x.data(), g_rev.data(),
enzyme_const, w.data(), enzyme_const, c.data(),
enzyme_const, n);

// Forward mode: one directional derivative per coordinate seed.
VectorXd g_fwd = VectorXd::Zero(n);
for (int j = 0; j < n; ++j) {
VectorXd seed = VectorXd::Zero(n);
seed[j] = 1.0;
g_fwd[j] = __enzyme_fwddiff<double>(
(void*)Objective, enzyme_dup, x.data(), seed.data(), enzyme_const,
w.data(), enzyme_const, c.data(), enzyme_const, n);
}

// Central finite differences.
VectorXd g_fd = VectorXd::Zero(n);
const double h = 1e-6;
for (int j = 0; j < n; ++j) {
VectorXd xp = x, xm = x;
xp[j] += h;
xm[j] -= h;
g_fd[j] = (Objective(xp.data(), w.data(), c.data(), n) -
Objective(xm.data(), w.data(), c.data(), n)) /
(2.0 * h);
}

const double err_rev = MaxAbsDiff(g_rev, analytic);
const double err_fwd = MaxAbsDiff(g_fwd, analytic);
const double err_fd = MaxAbsDiff(g_rev, g_fd);

std::printf("enzyme smoke test (n=%d)\n", n);
std::printf(" max|reverse - analytic| = %.3e\n", err_rev);
std::printf(" max|forward - analytic| = %.3e\n", err_fwd);
std::printf(" max|reverse - finite-diff| = %.3e\n", err_fd);

const bool ok = err_rev < 1e-10 && err_fwd < 1e-10 && err_fd < 1e-5;
std::printf("%s\n", ok ? "PASS" : "FAIL");
return ok ? 0 : 1;
}
172 changes: 172 additions & 0 deletions src/vmecpp/cpp/vmecpp/common/enzyme/jacobian_kernel_autodiff_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// SPDX-FileCopyrightText: 2024-present Proxima Fusion GmbH
// <info@proximafusion.com>
//
// SPDX-License-Identifier: MIT

// Exact autodiff of a real VMEC nonlinear kernel, forward vs reverse mode.
//
// JacobianKernel reproduces IdealMhdModel::computeJacobian: it maps the
// full-grid geometry (R, Z and their poloidal derivatives, even/odd parity) to
// the half-grid metric quantities r12, ru12, zu12, rs, zs and the Jacobian tau.
// tau is nonlinear in the geometry (products ru12*zs - rs*zu12, ...), so its
// Jacobian is a genuine building block of the MHD force Hessian (the force is
// the gradient of VMEC's functional; chain rule composes this kernel's Jacobian
// with the linear spectral transforms to give the Hessian-vector product).
//
// The kernel is written allocation-free over flat buffers (scalar locals,
// Eigen-free), which is the form Enzyme differentiates. We take:
// * a Jacobian-vector product J v by forward mode (__enzyme_fwddiff), and
// * a vector-Jacobian product J^T u by reverse mode (__enzyme_autodiff),
// check both against central finite differences, and time them so the
// forward/reverse trade-off is explicit.

#include <algorithm>
#include <chrono>
#include <cmath>
#include <cstdio>
#include <numeric>
#include <random>
#include <vector>

#include "vmecpp/common/enzyme/enzyme.h"
#include "vmecpp/vmec/ideal_mhd_model/jacobian_kernel.h"

// Types used as Enzyme intrinsic arguments need external linkage (an anonymous
// namespace type cannot instantiate the __enzyme_* templates).

// JacobianKernel is the shared production kernel ComputeHalfGridJacobian
// (jacobian_kernel.h); differentiated here exactly as the solver uses it.

constexpr int kNgeom = 8; // r1e r1o z1e z1o rue ruo zue zuo
constexpr int kNout = 6; // r12 ru12 zu12 rs zs tau

struct Problem {
int nZnT, nsH, n_in, n_out;
std::vector<double> geom; // kNgeom blocks of (nsH+1)*nZnT
std::vector<double> sqrtSH;
double deltaS, dSHalfDsInterp;
};

Problem MakeProblem(int nZnT, int nsH) {
Problem p{nZnT, nsH, kNgeom * (nsH + 1) * nZnT, kNout * nsH * nZnT, {}, {},
0.0, 0.0};
std::mt19937 rng(7);
std::uniform_real_distribution<double> d(0.5, 1.5);
p.geom.resize(p.n_in);
for (double& x : p.geom) x = d(rng);
p.sqrtSH.resize(nsH);
for (int j = 0; j < nsH; ++j) p.sqrtSH[j] = std::sqrt(0.05 + 0.9 * j / nsH);
p.deltaS = 0.1;
p.dSHalfDsInterp = 0.25;
return p;
}

// Evaluate the kernel from a flat geometry buffer into a flat output buffer.
__attribute__((noinline)) void Eval(const double* geom, const Problem& p,
double* out) {
const int s = (p.nsH + 1) * p.nZnT;
const int o = p.nsH * p.nZnT;
vmecpp::ComputeHalfGridJacobian(
geom, geom + s, geom + 2 * s, geom + 3 * s, geom + 4 * s, geom + 5 * s,
geom + 6 * s, geom + 7 * s, p.sqrtSH.data(), p.deltaS, p.dSHalfDsInterp,
p.nZnT, /*nsMinF1=*/0, /*nsMinH=*/0,
/*nsMaxH=*/p.nsH, out, out + o, out + 2 * o, out + 3 * o, out + 4 * o,
out + 5 * o);
}

double MaxAbs(const std::vector<double>& a, const std::vector<double>& b) {
double m = 0.0;
for (size_t i = 0; i < a.size(); ++i)
m = std::fmax(m, std::fabs(a[i] - b[i]));
return m;
}

// Scalar objective L(geom) = 0.5 * sum of squares of all kernel outputs. out is
// caller-owned scratch (an intermediate of the differentiated call). This is
// the scalar-over-buffers form Enzyme differentiates in both modes.
__attribute__((noinline)) double Loss(const double* geom, double* out,
const Problem* p) {
Eval(geom, *p, out);
double s = 0.0;
for (int i = 0; i < p->n_out; ++i) s += 0.5 * out[i] * out[i];
return s;
}

int main() {
const Problem p = MakeProblem(/*nZnT=*/32, /*nsH=*/12);
std::mt19937 rng(11);
std::uniform_real_distribution<double> dist(-1.0, 1.0);
std::vector<double> v(p.n_in);
for (double& x : v) x = dist(rng);

// Reverse mode: full gradient dL/dgeom in one pass.
std::vector<double> g_rev(p.n_in, 0.0), out_s(p.n_out, 0.0),
out_sh(p.n_out, 0.0);
__enzyme_autodiff((void*)Loss, enzyme_dup, p.geom.data(), g_rev.data(),
enzyme_dup, out_s.data(), out_sh.data(), enzyme_const, &p);

// Forward mode: directional derivative dL . v in one pass.
std::vector<double> out_t(p.n_out, 0.0);
const double d_fwd = __enzyme_fwddiff<double>(
(void*)Loss, enzyme_dup, p.geom.data(), v.data(), enzyme_dup,
out_s.data(), out_t.data(), enzyme_const, &p);

// Central finite differences of the directional derivative.
const double h = 1e-6;
std::vector<double> gp(p.geom), gm(p.geom), os(p.n_out);
for (int i = 0; i < p.n_in; ++i) {
gp[i] = p.geom[i] + h * v[i];
gm[i] = p.geom[i] - h * v[i];
}
const double d_fd =
(Loss(gp.data(), os.data(), &p) - Loss(gm.data(), os.data(), &p)) /
(2 * h);
const double d_rev =
std::inner_product(g_rev.begin(), g_rev.end(), v.begin(), 0.0);

const double scale = std::fabs(d_fd) + 1e-300;
printf("exact autodiff of VMEC Jacobian kernel (n_in=%d n_out=%d)\n", p.n_in,
p.n_out);
printf(" reverse dL.v = %.10e (rel err vs FD %.2e)\n", d_rev,
std::fabs(d_rev - d_fd) / scale);
printf(" forward dL.v = %.10e (rel err vs FD %.2e)\n", d_fwd,
std::fabs(d_fwd - d_fd) / scale);
printf(" forward/reverse agreement: %.2e\n",
std::fabs(d_fwd - d_rev) / (std::fabs(d_rev) + 1e-300));

// Performance: reverse returns the whole gradient (n_in) in one pass; forward
// returns one directional derivative per pass. For a scalar objective reverse
// wins; for a single Jacobian/Hessian-vector product forward is the cheaper
// primitive. Time both.
const int reps = 2000;
auto t0 = std::chrono::steady_clock::now();
for (int r = 0; r < reps; ++r) {
std::fill(g_rev.begin(), g_rev.end(), 0.0);
__enzyme_autodiff((void*)Loss, enzyme_dup, p.geom.data(), g_rev.data(),
enzyme_dup, out_s.data(), out_sh.data(), enzyme_const,
&p);
}
auto t1 = std::chrono::steady_clock::now();
for (int r = 0; r < reps; ++r) {
out_t.assign(p.n_out, 0.0);
volatile double s = __enzyme_fwddiff<double>(
(void*)Loss, enzyme_dup, p.geom.data(), v.data(), enzyme_dup,
out_s.data(), out_t.data(), enzyme_const, &p);
(void)s;
}
auto t2 = std::chrono::steady_clock::now();
const double us_rev =
std::chrono::duration<double, std::micro>(t1 - t0).count() / reps;
const double us_fwd =
std::chrono::duration<double, std::micro>(t2 - t1).count() / reps;
printf(
" performance: reverse %.2f us/pass (full gradient), forward %.2f "
"us/pass (one direction)\n",
us_rev, us_fwd);

const bool ok = std::fabs(d_rev - d_fd) < 1e-5 * scale &&
std::fabs(d_fwd - d_fd) < 1e-5 * scale &&
std::fabs(d_fwd - d_rev) < 1e-9 * (std::fabs(d_rev) + 1e-300);
printf("%s\n", ok ? "PASS" : "FAIL");
return ok ? 0 : 1;
}
11 changes: 10 additions & 1 deletion src/vmecpp/cpp/vmecpp/vmec/ideal_mhd_model/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,16 @@ cc_test(
cc_library(
name = "ideal_mhd_model",
srcs = ["ideal_mhd_model.cc"],
hdrs = ["ideal_mhd_model.h", "dft_data.h"],
# The shared force-chain kernels and the autodiff composition/JVP header are
# header-only and included by ideal_mhd_model.cc; declare whatever exists on
# this branch so the bazel sandbox can see them. The Enzyme JVP entry point
# (exact_force_jvp.cc) is built only by the CMake VMECPP_ENABLE_ENZYME path,
# not by bazel; its use in ideal_mhd_model.cc is guarded by that define.
hdrs = ["ideal_mhd_model.h", "dft_data.h"] + glob([
"*_kernel.h",
"local_force_composition.h",
"exact_force_jvp.h",
]),
visibility = ["//visibility:public"],
deps = [
":dft_toroidal",
Expand Down
Loading
Loading