Skip to content

Implement project_options for unified compiler settings#721

Draft
eessmann wants to merge 1 commit intoQuEST-Kit:develfrom
eessmann:comp_fix
Draft

Implement project_options for unified compiler settings#721
eessmann wants to merge 1 commit intoQuEST-Kit:develfrom
eessmann:comp_fix

Conversation

@eessmann
Copy link
Copy Markdown
Contributor

Found the needed flags to disable NaN checking on clang-based compilers without the need for fast-math flags.

Add a project_options.cmake file to centralize compiler configuration and standardize warnings and features for C/C++. Update CMakeLists.txt to use project_options, enhancing code organization and maintainability.

Add a new `project_options.cmake` file to configure consistent compiler warnings and features across different compilers for both C and C++ standard libraries. Migrate existing compilation setup in `CMakeLists.txt` to use the newly created `project_options` interface library, ensuring cleaner code and better manageability.
@otbrown
Copy link
Copy Markdown
Collaborator

otbrown commented Apr 13, 2026

As discussed, we'll need to run quite a few checks on this one, but great that we might be able to do away with -Ofast!

Pre-merge checks:

  • Add CPU_FAST_MATH as a CMake option to re-enable -Ofast on CPU subroutines as an escape hatch in case a user finds a platform where just the complex flags don't recover performance sufficiently and/or they really want to live dangerously.
  • Test CPU correctness under LLVM.
  • Test CPU performance under LLVM.
  • Test CPU correctness under GNU.
  • Test CPU performance under GNU.
  • Test CPU correctness under Apple Clang.
  • Test CPU performance under Apple Clang.
  • Test CPU correctness under MSVC.
  • Test CPU performance under MSVC.
  • Test GPU correctness under nvcc.
  • Test GPU performance under nvcc.
  • Test GPU correctness hipcc.
  • Test GPU performance under hipcc.

For correctness testing we obviously want to see the test suite pass. For performance let's just A/B test something generic (the QFT, or maybe the Trotter time evolution) against the -Ofast version. It will can be slower, but not a lot slower. We can figure out what exactly that means as we go 😉

@eessmann
Copy link
Copy Markdown
Contributor Author

I am using the following code a benchmark:

/** @file
 * A minimum C++23 example of running QuEST, reporting
 * the execution environment, preparing a 24-qubit
 * sinusoidal state, applying the QFT, and verifying
 * the expected frequency-domain peaks.
 *
 * @author Tyson Jones
 */

#include "quest.h"

#include <algorithm>
#include <cmath>
#include <complex>
#include <format>
#include <numbers>
#include <print>
#include <utility>
#include <vector>

namespace {

constexpr int numQubits = 24;
constexpr qindex sinusoidFrequency = 1;
constexpr qindex initChunkSize = qindex{1} << 18;
constexpr qreal verificationTolerance = static_cast<qreal>(1e-4);

template <typename... Args>
void logRoot(std::format_string<Args...> fmt, Args&&... args) {

    if (getQuESTEnv().rank == 0)
        std::println(fmt, std::forward<Args>(args)...);
}

bool isClose(qreal actual, qreal expected, qreal tolerance) {

    return std::abs(actual - expected) <= tolerance;
}

void initSinusoidalState(Qureg qureg, qindex frequency) {

    const qreal twoPi = static_cast<qreal>(2) * std::numbers::pi_v<qreal>;
    const qreal normFactor = std::sqrt(static_cast<qreal>(2) / static_cast<qreal>(qureg.numAmps));

    std::vector<qcomp> amps(static_cast<std::size_t>(std::min(initChunkSize, qureg.numAmps)));

    for (qindex startInd = 0; startInd < qureg.numAmps; startInd += initChunkSize) {

        const qindex numChunkAmps = std::min(initChunkSize, qureg.numAmps - startInd);

        for (qindex offset = 0; offset < numChunkAmps; ++offset) {
            const qindex basisIndex = startInd + offset;
            const qreal phase = twoPi * static_cast<qreal>(frequency) * static_cast<qreal>(basisIndex)
                / static_cast<qreal>(qureg.numAmps);
            const qreal amplitude = normFactor * std::sin(phase);

            amps[static_cast<std::size_t>(offset)] = qcomp(amplitude, 0);
        }

        setQuregAmps(qureg, startInd, amps.data(), numChunkAmps);
    }
}

struct VerificationResult {
    qreal prob1{};
    qreal probLast{};
    qreal probOther{};
    qreal totalProbAfter{};
    qcomp amp1;
    qcomp ampLast;
    bool passed{};
};

VerificationResult verifyQftSpectrum(Qureg qureg) {

    const qindex lastBin = qureg.numAmps - 1;

    const qreal prob1 = calcProbOfBasisState(qureg, 1);
    const qreal probLast = calcProbOfBasisState(qureg, lastBin);
    const qreal totalProbAfter = calcTotalProb(qureg);
    const qreal probOther = std::max(static_cast<qreal>(0), totalProbAfter - prob1 - probLast);

    const qcomp amp1 = getQuregAmp(qureg, 1);
    const qcomp ampLast = getQuregAmp(qureg, lastBin);

    const bool peaksAreBalanced =
        isClose(prob1, static_cast<qreal>(0.5), verificationTolerance) &&
        isClose(probLast, static_cast<qreal>(0.5), verificationTolerance);
    const bool leakageIsSmall = probOther <= verificationTolerance;
    const bool realsAreSmall =
        std::abs(std::real(amp1)) <= verificationTolerance &&
        std::abs(std::real(ampLast)) <= verificationTolerance;
    const bool imagSignsMatch = std::imag(amp1) > 0 && std::imag(ampLast) < 0;

    return {
        .prob1 = prob1,
        .probLast = probLast,
        .probOther = probOther,
        .totalProbAfter = totalProbAfter,
        .amp1 = amp1,
        .ampLast = ampLast,
        .passed = peaksAreBalanced && leakageIsSmall && realsAreSmall && imagSignsMatch,
    };
}

} // namespace

int main() {

    initQuESTEnv();
    reportQuESTEnv();

    Qureg qureg = createForcedQureg(numQubits);
    reportQuregParams(qureg);

    setMaxNumReportedItems(8, 8);
    setMaxNumReportedSigFigs(4);

    logRoot("{}", std::format(
        "Preparing a {}-qubit sinusoidal state with {} amplitudes.", qureg.numQubits, qureg.numAmps));
    logRoot("Using sinusoid frequency bin {}.", sinusoidFrequency);

    initSinusoidalState(qureg, sinusoidFrequency);

    const qreal totalProbBefore = calcTotalProb(qureg);
    logRoot("Total probability before QFT: {:.6f}", totalProbBefore);
    logRoot("State before QFT (truncated by QuEST reporter):");
    reportQureg(qureg);

    logRoot("Applying the full QFT to all {} qubits.", qureg.numQubits);
    applyFullQuantumFourierTransform(qureg, false);

    const VerificationResult result = verifyQftSpectrum(qureg);

    logRoot("Total probability after QFT: {:.6f}", result.totalProbAfter);
    logRoot("State after QFT (truncated by QuEST reporter):");
    reportQureg(qureg);

    const qindex lastBin = qureg.numAmps - 1;
    logRoot("Expected dominant bins: {} and {}.", sinusoidFrequency, lastBin);
    logRoot("Probability at bin {}: {:.6f}", sinusoidFrequency, result.prob1);
    logRoot("Probability at bin {}: {:.6f}", lastBin, result.probLast);
    logRoot("Probability outside the dominant bins: {:.6f}", result.probOther);
    logRoot("Amplitude at bin {}: {:.6f} {:+.6f}i",
        sinusoidFrequency, std::real(result.amp1), std::imag(result.amp1));
    logRoot("Amplitude at bin {}: {:.6f} {:+.6f}i",
        lastBin, std::real(result.ampLast), std::imag(result.ampLast));

    if (!result.passed)
        logRoot("Verification failed: the QFT did not produce the expected sinusoidal spectrum.");
    else
        logRoot("Verification passed: the QFT expanded the sinusoid into the expected frequency peaks.");

    destroyQureg(qureg);
    finalizeQuESTEnv();

    return result.passed ? 0 : 1;
}

@eessmann
Copy link
Copy Markdown
Contributor Author

Here are the benchmark results for clang on macOS:

Command Mean [s] Min [s] Max [s] Relative
clang-baseline 1.151 ± 0.022 1.125 1.201 1.00
clang-update 1.168 ± 0.025 1.139 1.206 1.02 ± 0.03

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants