From 55632c547b6aedaaddced412139c0bd8b5ab6a0e Mon Sep 17 00:00:00 2001 From: Richard O'Shaughnessy Date: Tue, 12 May 2026 11:56:04 -0400 Subject: [PATCH 01/24] hyperpipe: ini/hydra-driven hyperpipeline driver + in-tree test suite Build a hyperpipeline analog to util_RIFT_pseudo_pipe.py with first-class support for: * ini/Hydra-based configuration (default: bin/hyperpipe_conf.yaml, schema documented in RIFT/hyperpipe/config.py) * flexible multi-event input via the 'marg-list:' section --- one entry per (likelihood driver, event), heterogeneous batch sizes per entry, optional per-entry coord-module override * CIP-mirror coordinate-transformation framework (RIFT/hyperpipe/coords.py): coord modules emit --supplementary-coordinate-code, --parameter, --integration-parameter-range, --parameter-implied, --parameter-nofit, plus the supplementary-likelihood-factor trio * marg-driver toolkit (RIFT/hyperpipe/drivers) so downstream users don't re-derive the fragile argument strings / output-file names / contract points the hyperpipe needs them to honor The legacy util_RIFT_hyperpipe.py shipped with the Gaussian demo was a ~200-line hand-rolled prototype; it forgot --test-args, --general-retries, --general-request-disk, --request-memory-marg, --eos-post-explode-jobs, --use-full-submit-paths, and passed a single --event-file flag pointing at a list-of-events file (broken for multi-event analyses). The rewrite delegates to the new RIFT.hyperpipe package and emits all of these correctly, one --event-file per marg-list entry. A pixi-based live test suite lives at test/hyperpipe/ (full lalsuite stack in the pixi env). RIFT_ROOT is auto-detected from the test's own location, so the suite works from any RIFT clone without per-user configuration. New RIFT/hyperpipe/{__init__,config,coords,marg_list}.py RIFT/hyperpipe/drivers/{__init__,base,gaussian}.py bin/util_HyperMargGaussian.py console-script shim bin/hyperpipe_conf.yaml default Hydra config test/hyperpipe/{pixi.toml,README.md} test/hyperpipe/tests/{conftest,standalone_check,demo_dryrun}.py test/hyperpipe/tests/test_{coords,marg_list,config,drivers,hydra_integration}.py Modified RIFT/__init__.py added 'hyperpipe' to __all__ bin/util_RIFT_hyperpipe.py full rewrite, thin Hydra entry Test cd MonteCarloMarginalizeCode/Code/test/hyperpipe pixi install && pixi run test --- .../Code/RIFT/__init__.py | 2 +- .../Code/RIFT/hyperpipe/__init__.py | 30 + .../Code/RIFT/hyperpipe/config.py | 222 +++++++ .../Code/RIFT/hyperpipe/coords.py | 360 ++++++++++++ .../Code/RIFT/hyperpipe/drivers/__init__.py | 38 ++ .../Code/RIFT/hyperpipe/drivers/base.py | 262 +++++++++ .../Code/RIFT/hyperpipe/drivers/gaussian.py | 127 ++++ .../Code/RIFT/hyperpipe/marg_list.py | 279 +++++++++ .../Code/bin/hyperpipe_conf.yaml | 84 +++ .../Code/bin/util_HyperMargGaussian.py | 27 + .../Code/bin/util_RIFT_hyperpipe.py | 551 ++++++++++++------ .../Code/test/hyperpipe/README.md | 92 +++ .../Code/test/hyperpipe/pixi.toml | 74 +++ .../Code/test/hyperpipe/tests/conftest.py | 86 +++ .../Code/test/hyperpipe/tests/demo_dryrun.py | 94 +++ .../test/hyperpipe/tests/standalone_check.py | 131 +++++ .../Code/test/hyperpipe/tests/test_config.py | 66 +++ .../Code/test/hyperpipe/tests/test_coords.py | 79 +++ .../Code/test/hyperpipe/tests/test_drivers.py | 98 ++++ .../hyperpipe/tests/test_hydra_integration.py | 130 +++++ .../test/hyperpipe/tests/test_marg_list.py | 100 ++++ 21 files changed, 2751 insertions(+), 181 deletions(-) create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/__init__.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/config.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/coords.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/__init__.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/base.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/gaussian.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/marg_list.py create mode 100644 MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml create mode 100644 MonteCarloMarginalizeCode/Code/bin/util_HyperMargGaussian.py create mode 100644 MonteCarloMarginalizeCode/Code/test/hyperpipe/README.md create mode 100644 MonteCarloMarginalizeCode/Code/test/hyperpipe/pixi.toml create mode 100644 MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/conftest.py create mode 100644 MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/demo_dryrun.py create mode 100644 MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/standalone_check.py create mode 100644 MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_config.py create mode 100644 MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_coords.py create mode 100644 MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_drivers.py create mode 100644 MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_hydra_integration.py create mode 100644 MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_marg_list.py diff --git a/MonteCarloMarginalizeCode/Code/RIFT/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/__init__.py index 59c1f678b..4da9fe8c7 100644 --- a/MonteCarloMarginalizeCode/Code/RIFT/__init__.py +++ b/MonteCarloMarginalizeCode/Code/RIFT/__init__.py @@ -1,3 +1,3 @@ -__all__ = ['integrators','interpolators','likelihood','physics','misc','calmarg','plot_utilities'] +__all__ = ['integrators','interpolators','likelihood','physics','misc','calmarg','plot_utilities','hyperpipe'] from . import lalsimutils # top-level interface utility from . import * # sub-packages diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/__init__.py new file mode 100644 index 000000000..011d18db4 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/__init__.py @@ -0,0 +1,30 @@ +""" +RIFT.hyperpipe +============== + +Pipeline-construction utilities for the RIFT "hyperpipeline" --- the iterative +marginalize + fit + puff loop used to infer hyperparameters (e.g. EOS, +population-model, or generic high-level) from one or more underlying +likelihood evaluators ("marg drivers"). Analogous in spirit to the +``util_RIFT_pseudo_pipe.py`` driver for the single-event GW PE pipeline, +but with: + + * ini-based / Hydra-based configuration (see ``config.py``) + * flexible multi-event input (``marg-list`` semantics, see ``marg_list.py``) + * a coordinate-transformation framework that mirrors the CIP + ``--supplementary-coordinate-code`` / ``--parameter`` / ``--integration-parameter-range`` + conventions already used by ``util_ConstructEOSPosterior.py`` + (see ``coords.py``) + * driver toolkits to spare downstream users from re-deriving the + fragile argument strings, output-file names, and contract points + each marg driver must respect (see ``drivers``) + +The top-level CLI driver remains ``bin/util_RIFT_hyperpipe.py``; this +package is what it delegates to. +""" + +from . import coords +from . import config +from . import marg_list + +__all__ = ['coords', 'config', 'marg_list', 'drivers'] diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/config.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/config.py new file mode 100644 index 000000000..c619ca426 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/config.py @@ -0,0 +1,222 @@ +""" +RIFT.hyperpipe.config +===================== + +Schema + defaults for the Hydra/OmegaConf configuration consumed by +``util_RIFT_hyperpipe.py``. The top-level keys correspond to the +sections sketched in the original (pre-rewrite) ``util_RIFT_hyperpipe.py`` +header comment: + + arch : iteration / chunking / batch architecture + post : posterior-construction stage (CIP-style executable) + marg-list : list of per-event/per-driver marg jobs (heterogeneous OK) + puff : puffball randomization stage + test : convergence-test stage + init : initial-grid sourcing (file or generation) + general : retries, resources, condor / OSG / singularity knobs + +A default config is provided as :data:`DEFAULT_CONFIG_YAML` so a user can +write a minimal override and get a working pipeline. + +Validation is intentionally light here --- the heavy lifting is delegated +to :mod:`RIFT.hyperpipe.coords` and :mod:`RIFT.hyperpipe.marg_list`. +""" + +from __future__ import annotations + +import logging +import os +from typing import Any, Dict + +logger = logging.getLogger(__name__) + + +# -------------------------------------------------------------------------- +# Default config (Hydra writes this verbatim if no override is given) +# -------------------------------------------------------------------------- + +DEFAULT_CONFIG_YAML = """\ +# RIFT hyperpipe default configuration. +# +# Override on the CLI with Hydra syntax, e.g.: +# util_RIFT_hyperpipe.py arch.n-iterations=10 general.use-osg=true +# or by writing your own hyperpipe_conf.yaml and pointing Hydra at it. + +arch: + # High-level iterative architecture knobs. + method: default # reserved for future hyperpipe strategies + n-iterations: 5 + n-samples-per-job: 1000 + explode-marg-jobs: 5 # -> --eos-post-explode-jobs + explode-marg-jobs-last: null # -> --eos-post-explode-jobs-last (default = same) + start-iteration: 0 + +post: + # Final posterior-construction stage (CIP-family executable). + exe: null # default = `which util_ConstructEOSPosterior.py` + coord-module: null # importable module name, e.g. "rift_default" + coords-fit: "" # "x y z" + coords-sample: "" # "x:[-8,8] y:[-8,8] z:[-8,8]" + coords-implied: "" # "R1.4 Mmax" + coords-nofit: "" # "delta_mc s1z s2z" + likelihood-factor-module: null + likelihood-factor-function: null + likelihood-factor-ini: null + extra-args: "" # appended verbatim to args_eos_post.txt + settings: + n-max: null # -> --n-max + n-step: null # -> --n-step + n-eff: null # -> --n-eff + sampler-method: null # -> --sampler-method + fit-method: null # -> --fit-method + sigma-cut: null # -> --sigma-cut + +marg-list: + # One entry per (likelihood driver, event) pair. Heterogeneous OK --- + # different drivers may have different batch sizes (n-chunk) and + # different per-driver coord modules. + # + # Example (Gaussian toy): + # - name: gaussian + # exe: example_gaussian.py + # args: "--outdir Gaussian_example --conforming-output-name" + # event-file: null # null -> 'empty_event_file' sentinel + # n-chunk: 100 + # coord-module: null # per-driver coord override (rarely needed) + # extra-args: "" + - name: example + exe: example_gaussian.py + args: "--outdir example_output --conforming-output-name" + event-file: null + n-chunk: 100 + coord-module: null + extra-args: "" + +puff: + exe: null # default = `which util_HyperparameterPuffball.py` + puff-factor: 0.5 + force-away: 0.03 + extra-args: "" + +test: + exe: convergence_test_samples + method: JS + threshold: 0.05 + extra-args: "" + +init: + # Initial parameter-grid sourcing. Provide *either* ``file`` (path to an + # existing grid file) *or* a ``generation`` block (auto-generate via + # util_HyperparameterGrid.py-style helper). If both are set, ``file`` + # wins. + file: null + generation: + placement-method: null # e.g. "uniform" + params-and-ranges: null # e.g. "x:[-8,8] y:[-8,8] z:[-8,8]" + npts: null + external-code: null + external-args: null + +general: + rundir: null # optional; created if missing, cd into it + retries: 0 # -> --general-retries + request-disk: 10M # -> --general-request-disk + request-memory: 16384 # -> --request-memory-marg (MB) + use-osg: false + use-singularity: false + use-singularity-local: false + condor-local-nonworker: false + condor-local-nonworker-igwn-prefix: false + condor-nogrid-nonworker: false + use-full-submit-paths: true + transfer-files: [] # extra files to ship beyond auto-detected exes +""" + + +# -------------------------------------------------------------------------- +# Validation +# -------------------------------------------------------------------------- + + +def _get(node, key, default=None): + """Forgiving get that works on OmegaConf DictConfig *and* plain dicts.""" + if node is None: + return default + try: + return node.get(key, default) # type: ignore[union-attr] + except AttributeError: + return node[key] if key in node else default + + +def validate_config(cfg) -> None: + """Raise informatively if the config has structural problems we can detect early. + + This does *not* validate coord modules or marg-list executables --- + those are deferred to coords.HyperCoordSpec.validate() and + marg_list.assemble_marg_list() so the messages can be more specific. + """ + if cfg is None: + raise ValueError("hyperpipe config is empty.") + + # Required top-level sections + for section in ("arch", "post", "marg-list", "puff", "init", "general"): + if _get(cfg, section) is None: + raise ValueError( + f"hyperpipe config is missing required section: {section!r}" + ) + + # marg-list must be a non-empty sequence + marg_list = _get(cfg, "marg-list") + try: + n = len(marg_list) + except TypeError as exc: + raise ValueError("'marg-list' must be a list of marg-driver entries.") from exc + if n == 0: + raise ValueError("'marg-list' must contain at least one entry.") + + # arch + arch = _get(cfg, "arch") + if not isinstance(_get(arch, "n-iterations"), int) or _get(arch, "n-iterations") <= 0: + raise ValueError("arch.n-iterations must be a positive integer.") + if not isinstance(_get(arch, "n-samples-per-job"), int) or _get(arch, "n-samples-per-job") <= 0: + raise ValueError("arch.n-samples-per-job must be a positive integer.") + + # post: at least one of (coords-fit) must be set + post = _get(cfg, "post") + if not _get(post, "coords-fit"): + raise ValueError( + "post.coords-fit must list at least one parameter " + "(e.g. 'x y z')." + ) + + # init: must have either file or generation set + init = _get(cfg, "init") + has_file = bool(_get(init, "file")) + gen = _get(init, "generation") or {} + has_gen = bool(_get(gen, "placement-method") or _get(gen, "external-code")) + if not (has_file or has_gen): + raise ValueError( + "init: must provide either init.file (existing grid) or " + "init.generation.placement-method / init.generation.external-code." + ) + + +def expand_path(p: str, base_dir: str) -> str: + """Expand a config path: leave absolutes alone; resolve relatives against *base_dir*.""" + if not p: + return p + p = os.path.expanduser(p) + if os.path.isabs(p): + return p + return os.path.normpath(os.path.join(base_dir, p)) + + +def truthy(val: Any) -> bool: + """Robustly coerce config values to bool. Tolerates 'true'/'True'/'1'/etc.""" + if isinstance(val, bool): + return val + if isinstance(val, (int, float)): + return val != 0 + if isinstance(val, str): + return val.strip().lower() in {"true", "yes", "1", "on"} + return False diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/coords.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/coords.py new file mode 100644 index 000000000..bc039846a --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/coords.py @@ -0,0 +1,360 @@ +""" +RIFT.hyperpipe.coords +===================== + +Coordinate-transformation framework for the hyperpipeline. + +This module mirrors the conventions already established by +``util_ConstructEOSPosterior.py`` (and the broader CIP family) so that +hyperpipe configurations can reuse existing coord modules without a +parallel ecosystem. Specifically: + + * A "coord module" is just an *importable Python module name*. When + passed to the post-stage executable via ``--supplementary-coordinate-code``, + that executable will ``__import__`` it at runtime and call into it for + coordinate conversion / Jacobian / prior factors. + + * Each coordinate appears in the post stage via: + ``--parameter `` (fitting & MC parameter) + ``--integration-parameter-range :[a,b]`` (sampling bound) + plus optionally + ``--parameter-implied `` (used in fit, not independently sampled) + ``--parameter-nofit `` (sampled but not a fit coordinate) + + * For *heterogeneous* analyses (multiple likelihood drivers contributing + to the same hyperparameter inference), each driver may specify its own + coord module --- this is just passed through as an extra argument to + that driver's args line (typically as ``--supplementary-coordinate-code`` + if that driver supports it, else as a custom flag the driver consumes). + +The :class:`HyperCoordSpec` dataclass below bundles the four pieces a +hyperpipe configuration needs to know: + + name # what gets passed to --supplementary-coordinate-code + parameters # list of fitting parameters (== --parameter X ...) + parameter_ranges # dict[str, (a, b)] for --integration-parameter-range + implied # list of parameter-implied entries (optional) + nofit # list of parameter-nofit entries (optional) + likelihood_factor # optional (module, function, ini) trio for + # --supplementary-likelihood-factor-{code,function,ini} + +and provides helpers that emit the argument strings the post stage (and +optional per-driver) consume. +""" + +from __future__ import annotations + +import importlib +import logging +import re +import shlex +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Sequence, Tuple + +logger = logging.getLogger(__name__) + + +# -------------------------------------------------------------------------- +# Parsing helpers +# -------------------------------------------------------------------------- + +_RANGE_RE = re.compile( + r"""^\s* + (?P[A-Za-z_][\w]*) + \s*[:=]\s* + \[\s* + (?P[-+0-9eE.naif]+) + \s*,\s* + (?P[-+0-9eE.naif]+) + \s*\] + \s*$""", + re.VERBOSE, +) + + +def parse_range_block(block: str) -> Tuple[str, Tuple[float, float]]: + """Parse a single ``name:[lo,hi]`` block into ``(name, (lo, hi))``. + + The form ``name=[lo,hi]`` is also accepted as a convenience. + """ + m = _RANGE_RE.match(block) + if not m: + raise ValueError( + f"Could not parse coord-range block {block!r}; " + "expected 'name:[lo,hi]'." + ) + lo = float(m.group("lo")) + hi = float(m.group("hi")) + if not lo < hi: + raise ValueError( + f"Range for {m.group('name')!r} must be increasing; got [{lo},{hi}]." + ) + return m.group("name"), (lo, hi) + + +def parse_range_string(s: str) -> Dict[str, Tuple[float, float]]: + """Parse a space-separated string of ``name:[lo,hi]`` blocks. + + Example + ------- + >>> parse_range_string("x:[-8,8] y:[-1,1]") + {'x': (-8.0, 8.0), 'y': (-1.0, 1.0)} + """ + out: Dict[str, Tuple[float, float]] = {} + if not s: + return out + for block in shlex.split(s): + name, rng = parse_range_block(block) + if name in out: + raise ValueError(f"Duplicate range for parameter {name!r} in {s!r}.") + out[name] = rng + return out + + +def parse_parameter_list(s: str) -> List[str]: + """Split a space-separated parameter-name string into a list, preserving order.""" + if not s: + return [] + return shlex.split(s) + + +# -------------------------------------------------------------------------- +# HyperCoordSpec +# -------------------------------------------------------------------------- + + +@dataclass +class HyperCoordSpec: + """Bundle describing the coordinates a hyperpipe stage operates in. + + Parameters + ---------- + name + Name of an *importable* Python module used by the consumer for + coord conversion / Jacobian / priors. If ``None``, no + ``--supplementary-coordinate-code`` is emitted. + parameters + Fitting / MC parameter names. Emitted as ``--parameter X`` flags. + parameter_ranges + Map from parameter name to ``(lo, hi)``. Emitted as + ``--integration-parameter-range X:[lo,hi]``. + implied + Parameters used in the fit but not independently sampled. + Emitted as ``--parameter-implied X``. + nofit + Parameters sampled but not in the fit. Emitted as + ``--parameter-nofit X``. + likelihood_factor + Optional ``(module, function, ini)`` triple wiring a + supplementary external-prior / likelihood factor through the + post stage (``--supplementary-likelihood-factor-{code,function,ini}``). + """ + + name: Optional[str] = None + parameters: List[str] = field(default_factory=list) + parameter_ranges: Dict[str, Tuple[float, float]] = field(default_factory=dict) + implied: List[str] = field(default_factory=list) + nofit: List[str] = field(default_factory=list) + likelihood_factor: Optional[Tuple[str, Optional[str], Optional[str]]] = None + + # ----- construction -------------------------------------------------- + @classmethod + def from_strings( + cls, + *, + name: Optional[str] = None, + coords_fit: str = "", + coords_sample: str = "", + coords_implied: str = "", + coords_nofit: str = "", + likelihood_factor: Optional[Sequence[Optional[str]]] = None, + ) -> "HyperCoordSpec": + """Build a spec from the string-shaped fields a Hydra config gives us. + + ``coords_fit`` : "x y z" + ``coords_sample`` : "x:[-8,8] y:[-8,8] z:[-8,8]" + ``coords_implied`` : "R1.4 Mmax" (optional) + ``coords_nofit`` : "delta_mc s1z s2z" (optional) + ``likelihood_factor``: (module, function, ini) (any element may be None) + """ + params = parse_parameter_list(coords_fit) + ranges = parse_range_string(coords_sample) + unknown = set(ranges) - set(params) + if unknown: + raise ValueError( + f"coords-sample names a parameter not in coords-fit: {sorted(unknown)!r}" + ) + lf: Optional[Tuple[str, Optional[str], Optional[str]]] = None + if likelihood_factor: + # Pad to length 3 and coerce empties to None + seq = list(likelihood_factor) + [None] * (3 - len(likelihood_factor)) + seq = [s if s else None for s in seq[:3]] + if seq[0] is not None: + lf = (seq[0], seq[1], seq[2]) # type: ignore[assignment] + return cls( + name=name or None, + parameters=params, + parameter_ranges=ranges, + implied=parse_parameter_list(coords_implied), + nofit=parse_parameter_list(coords_nofit), + likelihood_factor=lf, + ) + + # ----- validation ---------------------------------------------------- + def validate(self, strict_import: bool = False) -> None: + """Sanity-check the spec; optionally verify the coord module imports. + + ``strict_import=True`` will attempt ``importlib.import_module(self.name)`` + and raise on failure; with ``False`` we only warn, since coord + modules are often only importable inside the downstream runtime + environment (singularity image, OSG worker) and not necessarily + on the submit host. + """ + if not self.parameters: + raise ValueError("HyperCoordSpec requires at least one fitting parameter.") + missing = [p for p in self.parameters if p not in self.parameter_ranges] + if missing: + raise ValueError( + f"No integration range supplied for parameter(s): {missing!r}. " + "Every entry in coords-fit must appear in coords-sample." + ) + for p, (lo, hi) in self.parameter_ranges.items(): + if not lo < hi: + raise ValueError(f"Range for {p!r} must be increasing; got [{lo},{hi}].") + if self.name: + try: + importlib.import_module(self.name) + except Exception as exc: # noqa: BLE001 -- broad on purpose; cf. docstring + msg = ( + f"HyperCoordSpec: coord module {self.name!r} did not import " + f"on the submit host ({type(exc).__name__}: {exc}). " + "If it is only present on the worker, this is expected." + ) + if strict_import: + raise ImportError(msg) from exc + logger.warning(msg) + + # ----- emission ------------------------------------------------------ + @staticmethod + def _fmt_num(x: float) -> str: + """Format a coord bound, preserving the integer form when applicable. + + e.g. -8.0 -> '-8' (matches the existing hyperpipe demos) + -1.6 -> '-1.6' + 1e-05 -> '1e-05' + """ + try: + xi = int(x) + except (OverflowError, ValueError): + return repr(x) + if xi == x: + return str(xi) + # general format strips trailing zeros while keeping decimal precision + return format(x, "g") + + def to_parameter_args(self) -> str: + """Emit ``--parameter X`` / ``--integration-parameter-range X:[a,b]`` flags. + + Includes implied / nofit and any required-by-CIP ordering. Returns a + single space-joined string, ready to be appended to a hyperpipe + args_*.txt file. + """ + bits: List[str] = [] + for p in self.parameters: + bits.append(f"--parameter {p}") + for p in self.implied: + bits.append(f"--parameter-implied {p}") + for p in self.nofit: + bits.append(f"--parameter-nofit {p}") + for p in self.parameters: + lo, hi = self.parameter_ranges[p] + bits.append( + f"--integration-parameter-range {p}:[{self._fmt_num(lo)},{self._fmt_num(hi)}]" + ) + return " ".join(bits) + + def to_post_args(self) -> str: + """Emit the post-stage arg block (parameters + coord-module + lf trio).""" + bits = [self.to_parameter_args()] + if self.name: + bits.append(f"--supplementary-coordinate-code {self.name}") + if self.likelihood_factor is not None: + mod, fn, ini = self.likelihood_factor + bits.append(f"--supplementary-likelihood-factor-code {mod}") + if fn: + bits.append(f"--supplementary-likelihood-factor-function {fn}") + if ini: + bits.append(f"--supplementary-likelihood-factor-ini {ini}") + return " ".join(b for b in bits if b) + + def to_puff_args(self, force_away: float = 0.03, puff_factor: float = 0.5) -> str: + """Emit the puff-stage arg block. + + By default we puff in every fitting parameter; this is what every + existing hyperpipe example does. Extra flags can be appended by the + caller. + """ + bits = [f"--force-away {force_away}", f"--puff-factor {puff_factor}"] + for p in self.parameters: + bits.append(f"--parameter {p}") + return " ".join(bits) + + def to_test_args(self, method: str = "JS", threshold: float = 0.05) -> str: + """Emit the convergence-test arg block. + + Mirrors the args_test.txt pattern from the Gaussian demo: + ``--parameter x --parameter y --parameter z --method JS --threshold 0.05`` + """ + bits = [f"--parameter {p}" for p in self.parameters] + bits.append(f"--method {method}") + bits.append(f"--threshold {threshold}") + return " ".join(bits) + + # ----- per-driver hook --------------------------------------------------- + def to_driver_coord_flag(self) -> str: + """Emit only the ``--supplementary-coordinate-code`` flag, if any. + + Useful for composing into a marg-driver's per-event args line when + that driver also consumes the same coord-module convention (e.g. + CIP-as-marg in the GW+NICER NS-EOS pipeline). + """ + return f"--supplementary-coordinate-code {self.name}" if self.name else "" + + +# -------------------------------------------------------------------------- +# Convenience constructors +# -------------------------------------------------------------------------- + + +def coord_spec_from_config_section(section) -> HyperCoordSpec: + """Build a :class:`HyperCoordSpec` from a Hydra ``post`` (or analogous) section. + + The section is expected to provide the keys: + ``coord-module`` (str, optional) + ``coords-fit`` (str) + ``coords-sample`` (str) + ``coords-implied`` (str, optional) + ``coords-nofit`` (str, optional) + ``likelihood-factor-module`` / ``likelihood-factor-function`` / + ``likelihood-factor-ini`` (str, optional) + """ + def _get(key, default=None): + # tolerate both DictConfig and plain dict + try: + return section.get(key, default) # type: ignore[union-attr] + except AttributeError: + return section[key] if key in section else default + + lf_mod = _get("likelihood-factor-module") + lf = None + if lf_mod: + lf = (lf_mod, _get("likelihood-factor-function"), _get("likelihood-factor-ini")) + + return HyperCoordSpec.from_strings( + name=_get("coord-module"), + coords_fit=_get("coords-fit", "") or "", + coords_sample=_get("coords-sample", "") or "", + coords_implied=_get("coords-implied", "") or "", + coords_nofit=_get("coords-nofit", "") or "", + likelihood_factor=lf, + ) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/__init__.py new file mode 100644 index 000000000..197861404 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/__init__.py @@ -0,0 +1,38 @@ +""" +RIFT.hyperpipe.drivers +====================== + +Toolkit for building "marg drivers" --- the per-event likelihood +evaluators the hyperpipeline orchestrates --- with the minimum of +ceremony. + +Every driver in the hyperpipeline must obey the contract: + + Inputs (CLI, as seen by ``create_eos_posterior_pipeline``): + --using-eos file: # path to the current hyperparameter grid + --using-eos-index # OR + --eos_start_index # index range [a, b) to evaluate + --eos_end_index + --fname-output-integral # name for output annotated file + --fname-output-samples # ignored by most drivers (legacy) + --outdir # write outputs into D/ + --conforming-output-name # if set, append '+annotation.dat' + --fname # dummy/passthrough; sometimes a real path + + Output: + /[+annotation.dat] + whitespace-separated, header: + # lnL sigma_lnL + Rows are the rows of the input grid in [a, b), with the first + two columns replaced by ln L and an estimate of its uncertainty. + +The :class:`MargDriverBase` class encapsulates the boilerplate (arg +parsing, header sniffing, slicing, output formatting) so a concrete +driver only has to implement ``log_likelihood``. + +See :mod:`.gaussian` for the canonical example. +""" + +from .base import MargDriverBase, parse_marg_driver_args, write_marg_output + +__all__ = ["MargDriverBase", "parse_marg_driver_args", "write_marg_output"] diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/base.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/base.py new file mode 100644 index 000000000..6824a2c53 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/base.py @@ -0,0 +1,262 @@ +""" +RIFT.hyperpipe.drivers.base +=========================== + +Boilerplate-free base class for marg drivers. + +The single hard contract a marg driver has with the hyperpipeline is +described in :mod:`RIFT.hyperpipe.drivers`. The work the existing +hand-written drivers had to repeat --- parse a fixed set of CLI args, +sniff the input-grid header for column names, slice the rows by index +range, and write a header-prefixed ``+annotation.dat`` output --- is +encapsulated here so a concrete driver becomes "implement +``log_likelihood`` and call ``MargDriverBase.run()``". +""" + +from __future__ import annotations + +import argparse +import logging +import os +from pathlib import Path +from typing import List, Optional, Sequence, Tuple + +import numpy as np + +logger = logging.getLogger(__name__) + + +# -------------------------------------------------------------------------- +# CLI parsing +# -------------------------------------------------------------------------- + + +def make_marg_driver_parser(description: Optional[str] = None) -> argparse.ArgumentParser: + """Construct an ``argparse.ArgumentParser`` with the standard marg-driver flags. + + Concrete drivers can extend this by adding their own arguments to the + returned parser before calling ``.parse_args()``. + """ + p = argparse.ArgumentParser(description=description) + p.add_argument("--fname", type=str, default=None, + help="Dummy passthrough required by the hyperpipe API.") + p.add_argument("--using-eos", type=str, required=True, + help="Input hyperparameter grid. 'file:' prefix is tolerated.") + p.add_argument("--using-eos-index", type=int, default=None, + help="If set, evaluate this single row (overrides start/end).") + p.add_argument("--eos_start_index", type=int, default=None, + help="First row of the grid to evaluate (inclusive).") + p.add_argument("--eos_end_index", type=int, default=None, + help="Last row of the grid to evaluate (exclusive).") + p.add_argument("--n-events-to-analyze", type=int, default=None, + help="When --using-eos-index is set, evaluate this many rows in total.") + p.add_argument("--outdir", type=str, default=None, + help="Output directory. Created if absent. Default: cwd.") + p.add_argument("--outdir-clean", action="store_true", + help="If set, delete --outdir before writing.") + p.add_argument("--fname-output-integral", type=str, default="output-marg-integral", + help="Base name for the lnL output file.") + p.add_argument("--fname-output-samples", type=str, default="output-marg-samples", + help="Dummy passthrough required by the hyperpipe API.") + p.add_argument("--conforming-output-name", action="store_true", + help="Append '+annotation.dat' to the integral output name " + "(required by create_eos_posterior_pipeline).") + return p + + +def parse_marg_driver_args( + parser: argparse.ArgumentParser, + argv: Optional[Sequence[str]] = None, +) -> argparse.Namespace: + """Run argparse and normalize the index range. + + Resolves the four overlapping ways the hyperpipeline can ask for an + index range (``--using-eos-index`` alone, ``--using-eos-index`` plus + ``--n-events-to-analyze``, or ``--eos_start_index``/``--eos_end_index``) + into a single ``(eos_start_index, eos_end_index)`` pair. + """ + opts = parser.parse_args(argv) + if opts.using_eos_index is not None: + opts.eos_start_index = opts.using_eos_index + opts.eos_end_index = opts.using_eos_index + 1 + if opts.n_events_to_analyze: + opts.eos_end_index = opts.using_eos_index + int(opts.n_events_to_analyze) + if opts.eos_start_index is None or opts.eos_end_index is None: + raise SystemExit( + "marg driver: need either --using-eos-index, or both " + "--eos_start_index and --eos_end_index." + ) + if opts.eos_end_index <= opts.eos_start_index: + raise SystemExit( + f"marg driver: eos_end_index ({opts.eos_end_index}) must exceed " + f"eos_start_index ({opts.eos_start_index})." + ) + return opts + + +# -------------------------------------------------------------------------- +# Grid I/O +# -------------------------------------------------------------------------- + + +def _strip_file_prefix(p: str) -> str: + return p[5:] if p.startswith("file:") else p + + +def read_grid(using_eos: str) -> Tuple[np.ndarray, List[str]]: + """Load a hyperpipe-format grid file. + + Returns ``(rows, column_names)`` where ``rows`` is a 2-D string array + (so we don't lose precision on the lnL / sigma columns that we will + overwrite) and ``column_names`` is the list of column names beyond + the two leading ``lnL sigma_lnL`` columns. + """ + path = _strip_file_prefix(using_eos) + with open(path, "r") as f: + header = f.readline().rstrip("\n") + if not header.startswith("#"): + raise ValueError( + f"read_grid: expected a '#'-prefixed header in {path!r}; got {header!r}." + ) + cols = header.lstrip("#").split() + if len(cols) < 2: + raise ValueError( + f"read_grid: header {header!r} must declare at least lnL and sigma_lnL." + ) + data = np.genfromtxt(path, dtype="str") + if data.ndim == 1: + data = data.reshape(1, -1) + # Upcast to a wide string dtype so we can rewrite the leading + # lnL / sigma_lnL columns with arbitrarily-precise float reprs + # without numpy silently truncating. Without this step, + # ``rows[i, 0] = repr(some_float)`` would be truncated to whatever + # the longest string in the input grid happened to be (this was a + # latent bug in the hand-written example_gaussian.py). + data = data.astype(" str: + """Write the annotated output file. + + Returns the path written. Mirrors the convention used by + ``example_gaussian.py`` and the NICER drivers: + + * if ``--fname`` is *not* set, write into ``outdir/`` and use the + ``--fname-output-integral`` basename; + * if ``--fname`` *is* set (legacy RIFT-style invocation), write + to the bare ``--fname-output-integral`` path; + * if ``--conforming-output-name`` is set, suffix ``+annotation.dat``. + """ + postfix = "+annotation.dat" if conforming_output_name else "" + if fname is None: + outdir = outdir or "." + Path(outdir).mkdir(parents=True, exist_ok=True) + out_path = os.path.join(outdir, fname_output_integral + postfix) + else: + out_path = fname_output_integral + postfix + header = "lnL sigma_lnL " + " ".join(column_names) + np.savetxt(out_path, rows, fmt="%10s", header=header) + return out_path + + +# -------------------------------------------------------------------------- +# Driver base class +# -------------------------------------------------------------------------- + + +class MargDriverBase: + """Base class for marg drivers. + + Subclasses implement :meth:`log_likelihood` and (optionally) override + :meth:`add_arguments` to declare driver-specific CLI flags, then call + :meth:`run` from their ``__main__`` block. + + Example + ------- + >>> class MyDriver(MargDriverBase): + ... description = "My toy driver." + ... def log_likelihood(self, row_values, column_names, opts): + ... return -0.5 * sum(float(v) ** 2 for v in row_values), 1e-3 + ... + >>> if __name__ == "__main__": # doctest: +SKIP + ... MyDriver().run() + """ + + description: Optional[str] = None + + # ----- subclass hooks ------------------------------------------------ + + def add_arguments(self, parser: argparse.ArgumentParser) -> None: + """Override to add driver-specific CLI args; default is no-op.""" + pass + + def log_likelihood( + self, + row_values: Sequence[str], + column_names: Sequence[str], + opts: argparse.Namespace, + ) -> Tuple[float, float]: + """Return ``(lnL, sigma_lnL)`` for one grid row. + + ``row_values`` are the raw string values from columns 2.. of the + grid (i.e. excluding the two lnL columns we'll overwrite). + ``column_names`` are the matching column names from the header. + Concrete drivers will typically build a ``dict(zip(column_names, + map(float, row_values)))`` before computing the likelihood. + """ + raise NotImplementedError + + # ----- main entry point --------------------------------------------- + + def run(self, argv: Optional[Sequence[str]] = None) -> str: + """Parse args, evaluate over the requested row range, write output. + + Returns the path of the written annotated file. + """ + parser = make_marg_driver_parser(description=self.description) + self.add_arguments(parser) + opts = parse_marg_driver_args(parser, argv=argv) + + if opts.outdir_clean and opts.outdir: + import shutil + + try: + shutil.rmtree(opts.outdir) + except FileNotFoundError: + pass + + rows, column_names = read_grid(opts.using_eos) + logger.info("Loaded grid %r with columns %r", opts.using_eos, column_names) + + start, stop = opts.eos_start_index, opts.eos_end_index + if start < 0 or stop > rows.shape[0]: + raise SystemExit( + f"marg driver: index range [{start},{stop}) exceeds grid " + f"size {rows.shape[0]}." + ) + + for i in range(start, stop): + row_values = rows[i, 2:] + lnL, sigma = self.log_likelihood(row_values, column_names, opts) + rows[i, 0] = repr(float(lnL)) + rows[i, 1] = repr(float(sigma)) + + out_path = write_marg_output( + rows[start:stop], + column_names, + fname_output_integral=opts.fname_output_integral, + outdir=opts.outdir, + fname=opts.fname, + conforming_output_name=opts.conforming_output_name, + ) + logger.info("Wrote marg output to %r", out_path) + return out_path diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/gaussian.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/gaussian.py new file mode 100644 index 000000000..c455eeb2c --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/gaussian.py @@ -0,0 +1,127 @@ +""" +RIFT.hyperpipe.drivers.gaussian +=============================== + +Toy 3-D Gaussian marg driver --- canonical "smoke test" for the hyperpipeline. + +This is a clean, importable port of the hand-written +``example_gaussian.py`` shipped with the Gaussian hyperpipe demo, built on +top of :class:`RIFT.hyperpipe.drivers.MargDriverBase` so the +boilerplate (arg parsing, grid I/O, output formatting) is shared with all +other drivers. + +The likelihood is the sum of two 3-D Gaussians symmetric in ``x``:: + + p(x, y, z) = N([-x0, 0, 0], 2 I) + N([+x0, 0, 0], 2 I) + +with ``x0 = 4`` and identity covariance scaled by 2. Useful as both a +pedagogical example and a regression test for the post / puff / +convergence-test pieces, because the exact marginals are known. + +Run via the in-tree CLI:: + + util_HyperMargGaussian.py \\ + --using-eos file:my_grid.dat --outdir out --fname-output-integral lnL.txt \\ + --eos_start_index 0 --eos_end_index 1000 --conforming-output-name +""" + +from __future__ import annotations + +import argparse +from typing import Sequence, Tuple + +import numpy as np + +from .base import MargDriverBase + + +class GaussianMargDriver(MargDriverBase): + """Sum-of-two-Gaussians 3-D toy likelihood.""" + + description = ( + "Toy 3-D bimodal Gaussian marg driver for the RIFT hyperpipeline." + ) + + def add_arguments(self, parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "--x-offset", + type=float, + default=4.0, + help="Position of the two modes along x (default: 4).", + ) + parser.add_argument( + "--sigma2", + type=float, + default=2.0, + help="Diagonal of the covariance matrix (default: 2).", + ) + parser.add_argument( + "--unimodal", + action="store_true", + help="If set, drop the second mode at -x_offset.", + ) + parser.add_argument( + "--params", + type=str, + default="x,y,z", + help="Comma-separated names of the three grid columns to use " + "(default: x,y,z). Must appear in the input-grid header.", + ) + + # Cache the multivariate normals so we don't rebuild per-row. + _rv_cache = None + + def _build_rvs(self, opts): + from scipy.stats import multivariate_normal + + cov = np.eye(3) * float(opts.sigma2) + rv_pos = multivariate_normal(mean=[+opts.x_offset, 0.0, 0.0], cov=cov) + rv_neg = ( + None + if opts.unimodal + else multivariate_normal(mean=[-opts.x_offset, 0.0, 0.0], cov=cov) + ) + self._rv_cache = (rv_pos, rv_neg) + + def log_likelihood( + self, + row_values: Sequence[str], + column_names: Sequence[str], + opts: argparse.Namespace, + ) -> Tuple[float, float]: + if self._rv_cache is None: + self._build_rvs(opts) + rv_pos, rv_neg = self._rv_cache + + # Resolve column indices -> the user-named param columns + wanted = [p.strip() for p in opts.params.split(",") if p.strip()] + if len(wanted) != 3: + raise SystemExit( + f"GaussianMargDriver: --params must list exactly 3 names " + f"(got {wanted!r})." + ) + try: + idxs = [column_names.index(p) for p in wanted] + except ValueError as exc: + raise SystemExit( + f"GaussianMargDriver: param {exc.args[0]!r} not in grid header " + f"columns {list(column_names)!r}." + ) from exc + vec = np.array([float(row_values[i]) for i in idxs]) + L = rv_pos.pdf(vec) + if rv_neg is not None: + L = L + rv_neg.pdf(vec) + # Guard log(0); the existing demo does not, so we are *gentle*: + # report a very-negative lnL but keep finite for the GP fit. + if L <= 0.0 or not np.isfinite(L): + return -1.0e6, 1.0e-3 + return float(np.log(L)), 1.0e-3 + + +def main(argv=None) -> str: + """Console-script entry point.""" + return GaussianMargDriver().run(argv=argv) + + +if __name__ == "__main__": # pragma: no cover + main() diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/marg_list.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/marg_list.py new file mode 100644 index 000000000..be060262a --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/marg_list.py @@ -0,0 +1,279 @@ +""" +RIFT.hyperpipe.marg_list +======================== + +Multi-event / multi-driver "marg-list" assembly. + +A *marg* in the hyperpipeline vocabulary is a per-event (or per-likelihood) +job that evaluates the underlying likelihood on the current grid of +hyperparameters. ``create_eos_posterior_pipeline`` consumes parallel +lists describing each marg: + + * ``--marg-event-exe-list-file`` -- one executable path per line + * ``--marg-event-args-list-file`` -- one args string per line + * ``--marg-event-nchunk-list-file`` -- one int per line (heterogeneous OK) + * ``--event-file `` -- repeated; one per marg entry + +This module assembles those four files from a Hydra ``marg-list:`` section +and is responsible for: + + * resolving each ``exe`` (via :func:`RIFT.misc.dag_utils_generic.which` + or a literal path) + * copying non-RIFT-core executables into the working directory so they + can be transferred (mirroring the current + ``util_RIFT_hyperpipe.py`` behaviour) + * materializing per-entry event files (``event-.net``) and a + sentinel ``empty_event_file`` when no event data are supplied + * stitching in per-driver coord-module overrides for heterogeneous + analyses (e.g. GW + NICER), where each driver carries its own coord + convention. +""" + +from __future__ import annotations + +import logging +import os +import shutil +from dataclasses import dataclass, field +from typing import Any, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +# -------------------------------------------------------------------------- +# Result container +# -------------------------------------------------------------------------- + + +@dataclass +class MargAssembly: + """Output of :func:`assemble_marg_list`. + + All paths are absolute. Lengths of the per-entry sequences are equal + to ``len(cfg['marg-list'])``. + """ + + args_lines: List[str] = field(default_factory=list) + exe_paths: List[str] = field(default_factory=list) + event_files: List[str] = field(default_factory=list) + n_chunks: List[int] = field(default_factory=list) + transfer_files: List[str] = field(default_factory=list) + names: List[str] = field(default_factory=list) + + # ----- emission helpers ------------------------------------------------ + def write_args_file(self, path: str) -> None: + with open(path, "w") as f: + f.write("\n".join(self.args_lines)) + if self.args_lines: + f.write("\n") + + def write_exe_file(self, path: str) -> None: + with open(path, "w") as f: + f.write("\n".join(self.exe_paths)) + if self.exe_paths: + f.write("\n") + + def write_nchunk_file(self, path: str) -> None: + with open(path, "w") as f: + for n in self.n_chunks: + f.write(f"{int(n)}\n") + + def write_transfer_file_list(self, path: str, extra: Optional[List[str]] = None) -> None: + """Write the transfer-file-list HTCondor consumes. + + Appends ``extra`` (e.g. user-supplied general.transfer-files) after + the auto-detected exe transfers. + """ + all_files = list(self.transfer_files) + if extra: + all_files.extend(extra) + with open(path, "w") as f: + for line in all_files: + f.write(line + "\n") + + +# -------------------------------------------------------------------------- +# Core assembly +# -------------------------------------------------------------------------- + + +_RIFT_CORE_EXE_HINTS = ( + # Substrings that, if present in an exe's resolved path, mark it as + # a core RIFT executable we should *not* duplicate into the working + # directory or transfer-file-list. Mirrors what the original + # util_RIFT_hyperpipe.py did for "MonteCarloMarginalizeCode/Code/bin" + # but is more forgiving of editable installs. + "MonteCarloMarginalizeCode/Code/bin", + "/RIFT/", + "/site-packages/RIFT", +) + + +def _looks_like_core_rift(exe_path: str) -> bool: + if not exe_path: + return False + return any(h in exe_path for h in _RIFT_CORE_EXE_HINTS) + + +def _resolve_exe(exe: str, base_dir: str) -> str: + """Resolve an executable name to an absolute path. + + Tries, in order: + 1. literal absolute path + 2. :func:`shutil.which` + 3. :func:`RIFT.misc.dag_utils_generic.which` + 4. ``/`` + """ + if not exe: + raise ValueError("marg-list entry is missing 'exe'.") + exe = os.path.expanduser(exe.strip()) + if os.path.isabs(exe) and os.path.exists(exe): + return exe + bare = os.path.basename(exe) + found = shutil.which(bare) + if found: + return found + try: + from RIFT.misc.dag_utils_generic import which as rift_which + except Exception: # pragma: no cover -- only at runtime + rift_which = None + if rift_which is not None: + found = rift_which(bare) + if found: + return found + fallback = os.path.join(base_dir, bare) + if os.path.exists(fallback): + return fallback + raise FileNotFoundError( + f"Could not locate marg exe {exe!r}. Searched PATH, RIFT, and {base_dir!r}." + ) + + +def _stage_event_file( + entry: Any, + indx: int, + base_dir: str, + run_dir: str, +) -> Tuple[str, bool]: + """Materialize this entry's event file at run_dir/event-.net. + + Returns ``(abs_path, is_empty_sentinel)``. If the entry has no + ``event-file`` set, we write a sentinel file with the single token + ``empty_event_file`` so the downstream pipeline still sees a + well-formed input. + """ + src = None + if hasattr(entry, "get"): + src = entry.get("event-file") or entry.get("event_file") + elif "event-file" in entry: + src = entry["event-file"] + dest = os.path.join(run_dir, f"event-{indx}.net") + if src: + src = os.path.expanduser(src) + if not os.path.isabs(src): + src = os.path.join(base_dir, src) + if not os.path.exists(src): + raise FileNotFoundError( + f"marg-list entry {indx}: event-file {src!r} does not exist." + ) + shutil.copyfile(src, dest) + return dest, False + # sentinel + with open(dest, "w") as f: + f.write("empty_event_file\n") + return dest, True + + +def _entry_get(entry: Any, key: str, default=None): + """Get a key from a marg-list entry, tolerating dict or DictConfig.""" + try: + return entry.get(key, default) # type: ignore[union-attr] + except AttributeError: + return entry[key] if key in entry else default + + +def assemble_marg_list( + cfg, + *, + base_dir: str, + run_dir: str, +) -> MargAssembly: + """Walk ``cfg['marg-list']`` and prepare all per-entry artefacts. + + Parameters + ---------- + cfg + The full hyperpipe config (DictConfig or dict). Only the + ``marg-list`` key is consulted here. + base_dir + Directory the user invoked ``util_RIFT_hyperpipe.py`` from; + relative paths in the config are resolved against this. + run_dir + Working directory for the pipeline (where ``event-.net`` + files and copies of non-core exes are written). + + Returns + ------- + MargAssembly + Holds parallel lists of args / exe / event-file / n-chunk per + marg entry, plus the auto-detected transfer-file list. + """ + marg_entries = cfg["marg-list"] + out = MargAssembly() + + for indx, entry in enumerate(marg_entries): + name = _entry_get(entry, "name") or f"marg_{indx}" + out.names.append(name) + + # ----- exe ------------------------------------------------------ + raw_exe = _entry_get(entry, "exe") + if not raw_exe: + raise ValueError(f"marg-list[{indx}] ({name!r}) missing required key 'exe'.") + exe_path = _resolve_exe(raw_exe, base_dir=base_dir) + out.exe_paths.append(exe_path) + if not _looks_like_core_rift(exe_path): + # Stage a local copy so file-transfer (e.g. OSG) can ship it + local_copy = os.path.join(run_dir, os.path.basename(exe_path)) + if os.path.abspath(local_copy) != os.path.abspath(exe_path): + shutil.copyfile(exe_path, local_copy) + try: + os.chmod(local_copy, 0o755) + except OSError: + pass + out.transfer_files.append(local_copy) + else: + out.transfer_files.append(exe_path) + + # ----- args ----------------------------------------------------- + args = (_entry_get(entry, "args") or "").strip() + extra = (_entry_get(entry, "extra-args") or "").strip() + coord_mod = _entry_get(entry, "coord-module") + bits = [args] + if extra: + bits.append(extra) + if coord_mod: + # Per-driver coord-module override. We always emit + # --supplementary-coordinate-code; drivers that don't consume + # it should be wrapped in a shell that ignores it. + bits.append(f"--supplementary-coordinate-code {coord_mod}") + out.args_lines.append(" ".join(b for b in bits if b)) + + # ----- event file ---------------------------------------------- + event_path, _ = _stage_event_file(entry, indx, base_dir=base_dir, run_dir=run_dir) + out.event_files.append(event_path) + + # ----- n-chunk ------------------------------------------------- + nchunk = _entry_get(entry, "n-chunk", 1) + try: + nchunk = int(nchunk) + except (TypeError, ValueError) as exc: + raise ValueError( + f"marg-list[{indx}] ({name!r}) has non-integer n-chunk={nchunk!r}." + ) from exc + if nchunk <= 0: + raise ValueError( + f"marg-list[{indx}] ({name!r}) n-chunk must be positive; got {nchunk}." + ) + out.n_chunks.append(nchunk) + + return out diff --git a/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml b/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml new file mode 100644 index 000000000..636922aed --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml @@ -0,0 +1,84 @@ +# Default hyperpipe configuration consumed by util_RIFT_hyperpipe.py. +# +# Hydra finds this file because it lives next to util_RIFT_hyperpipe.py. +# Users can: +# * override fields on the CLI with Hydra syntax: +# util_RIFT_hyperpipe.py arch.n-iterations=10 general.use-osg=true +# * point Hydra at their own file with --config-name +# (placed next to this file) or --config-path +# * compose from the in-package defaults exposed via +# RIFT.hyperpipe.config.DEFAULT_CONFIG_YAML +# +# The schema is documented in RIFT/hyperpipe/config.py. + +arch: + method: default + n-iterations: 5 + n-samples-per-job: 1000 + explode-marg-jobs: 5 + explode-marg-jobs-last: null + start-iteration: 0 + +post: + exe: null + coord-module: null + coords-fit: "x y z" + coords-sample: "x:[-8,8] y:[-8,8] z:[-8,8]" + coords-implied: "" + coords-nofit: "" + likelihood-factor-module: null + likelihood-factor-function: null + likelihood-factor-ini: null + extra-args: "" + settings: + n-max: null + n-step: null + n-eff: null + sampler-method: null + fit-method: null + sigma-cut: null + +marg-list: + - name: gaussian + exe: util_HyperMargGaussian.py + args: "--outdir gaussian_out --conforming-output-name" + event-file: null + n-chunk: 100 + coord-module: null + extra-args: "" + +puff: + exe: null + puff-factor: 0.5 + force-away: 0.03 + extra-args: "" + +test: + exe: convergence_test_samples + method: JS + threshold: 0.05 + extra-args: "" + +init: + file: null + generation: + placement-method: null + params-and-ranges: null + npts: null + external-code: null + external-args: null + +general: + rundir: null + retries: 0 + request-disk: 10M + request-memory: 16384 + use-osg: false + use-singularity: false + use-singularity-local: false + condor-local-nonworker: false + condor-local-nonworker-igwn-prefix: false + condor-nogrid-nonworker: false + use-full-submit-paths: true + transfer-files: [] + dry-run: false diff --git a/MonteCarloMarginalizeCode/Code/bin/util_HyperMargGaussian.py b/MonteCarloMarginalizeCode/Code/bin/util_HyperMargGaussian.py new file mode 100644 index 000000000..e660ee88c --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/bin/util_HyperMargGaussian.py @@ -0,0 +1,27 @@ +#! /usr/bin/env python +""" +util_HyperMargGaussian.py +========================= + +Thin CLI shim for :class:`RIFT.hyperpipe.drivers.gaussian.GaussianMargDriver`. + +This is the toy 3-D Gaussian marg driver for the RIFT hyperpipeline; it +exists as a top-level ``bin/`` executable so the hyperpipe configuration +can reach it via ``which util_HyperMargGaussian.py`` the same way it +reaches ``util_HyperparameterPuffball.py`` etc. + +Example +------- +:: + + util_HyperMargGaussian.py \\ + --using-eos file:./blind_gaussian_plus_minus.dat \\ + --eos_start_index 0 --eos_end_index 1000 \\ + --outdir Gaussian_example --conforming-output-name \\ + --fname-output-integral f_out_name_1.txt +""" + +from RIFT.hyperpipe.drivers.gaussian import main + +if __name__ == "__main__": + main() diff --git a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py index abe11bd61..e1e55ab67 100644 --- a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py +++ b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py @@ -1,194 +1,385 @@ #! /usr/bin/env python -# -# EXAMPLE -# - -# ARGUMENT SPECIFICATION -# arch -# method: -# n-iterations -# n-samples-per-job -# explode marg jobs: -# post: -# coord-module: -# coords-fit: -# coords-sample: -# exe: -# settings: -# n-max -# n-step -# sampler-method -# sigma-cut -# marg-list: -# - name: -# exe: -# args: -# - name: -# .... -# puff: -# exe: -# settings: -# puff-factor: -# test: -# init: -# file: -# generation: -# placement method: -# params_and_ranges: -# npts: -# external_code: -# external arguments: -# general: -# retries -# request-disk -# request-memory -# use-osg -# -# - -import numpy as np -import os, shutil +""" +util_RIFT_hyperpipe.py +====================== + +Top-level driver for the RIFT hyperpipeline --- the iterative +marginalize + fit + puff loop used to infer hyperparameters (e.g. EOS +parameters, population-model parameters, or any user-defined high-level +quantity) from one or more underlying likelihood evaluators ("marg +drivers"). + +This is the hyperpipe analog of :file:`util_RIFT_pseudo_pipe.py`. Where +``pseudo_pipe`` builds a single-event GW PE pipeline from argparse +flags and a ``.ini`` file, this script builds a hyperpipe DAG from an +ini-style **Hydra** configuration with three first-class features: + + * **ini-based configuration** (Hydra/OmegaConf) --- see the default + template :file:`hyperpipe_conf.yaml` next to this script and the + schema documented in :mod:`RIFT.hyperpipe.config`. + + * **Flexible multi-event input** via the ``marg-list:`` section --- + one entry per (likelihood-driver, event) pair, with per-entry + executables, args, event files, ``n-chunk`` (heterogeneous batch + sizes), and an optional per-entry coord module. + + * **Coordinate-transformation framework** --- the ``post`` stage + consumes a CIP-mirror coord module via + ``--supplementary-coordinate-code``, the same convention + ``util_ConstructEOSPosterior.py`` already understands; the + parameters / integration ranges are emitted automatically from + ``post.coords-fit`` / ``post.coords-sample`` (and optionally + ``coords-implied`` / ``coords-nofit``). + +Examples +-------- +Reproduce a minimal Gaussian-toy hyperpipe analysis:: + + util_RIFT_hyperpipe.py \\ + general.rundir=./my_run \\ + init.file=./blind_gaussian_plus_minus.dat + +Override a few defaults from the CLI (Hydra syntax):: + + util_RIFT_hyperpipe.py \\ + arch.n-iterations=15 \\ + general.use-osg=true general.use-singularity=true \\ + general.condor-local-nonworker=true + +Implementation notes +-------------------- +The real work lives in :mod:`RIFT.hyperpipe`: + + * :func:`RIFT.hyperpipe.config.validate_config` -- structural validation + * :func:`RIFT.hyperpipe.coords.coord_spec_from_config_section` -- coord-spec + * :func:`RIFT.hyperpipe.marg_list.assemble_marg_list` -- multi-event assembly + +This script is intentionally thin: it parses Hydra config, materializes +the args / exe / event / nchunk files that +``create_eos_posterior_pipeline`` consumes, builds the matching +command line, and either runs it or (with ``general.dry-run=true``) +prints it. +""" + +from __future__ import annotations + import logging -logger = logging.getLogger(__name__) +import os +import shutil +import shlex +import sys +from typing import List, Optional -# install reuires hydra-core -from omegaconf import DictConfig, OmegaConf import hydra -import RIFT.misc.dag_utils_generic as dag_utils +from omegaconf import DictConfig, OmegaConf + +from RIFT.hyperpipe import config as hyper_config +from RIFT.hyperpipe.coords import coord_spec_from_config_section +from RIFT.hyperpipe.marg_list import assemble_marg_list + +logger = logging.getLogger(__name__) + + +# -------------------------------------------------------------------------- +# Helpers +# -------------------------------------------------------------------------- + + +def _which_or_self(name: str) -> str: + """Resolve an exe via PATH (or RIFT's which); fall back to the bare name.""" + found = shutil.which(name) + if found: + return found + try: + from RIFT.misc.dag_utils_generic import which as rift_which + + rw = rift_which(name) + if rw: + return rw + except Exception: + pass + return name + + +def _cfg_get(node, key, default=None): + """Forgiving get() that works on DictConfig or plain dict.""" + if node is None: + return default + try: + return node.get(key, default) + except AttributeError: + return node[key] if key in node else default + + +def _maybe_generate_initial_grid( + init_cfg, base_dir: str, run_dir: str +) -> str: + """Materialize an initial_grid.dat in run_dir. + + Honors (in priority order): + 1. ``init.file``: copy verbatim + 2. ``init.generation.placement-method == 'uniform'``: call out to + ``util_HyperparameterGrid.py`` with random-parameter/range/npts + pulled from the generation block. + 3. ``init.generation.external-code``: invoke an arbitrary user + command (``external-code external-args``) expected to drop + ``initial_grid.dat`` in cwd. + """ + initial_grid = os.path.join(run_dir, "initial_grid.dat") + init_file = _cfg_get(init_cfg, "file") + if init_file: + src = hyper_config.expand_path(init_file, base_dir) + if not os.path.exists(src): + raise FileNotFoundError(f"init.file does not exist: {src!r}") + shutil.copyfile(src, initial_grid) + return initial_grid + + gen = _cfg_get(init_cfg, "generation") or {} + placement = _cfg_get(gen, "placement-method") + external_code = _cfg_get(gen, "external-code") + + if placement == "uniform": + params_and_ranges = (_cfg_get(gen, "params-and-ranges") or "").strip() + npts = _cfg_get(gen, "npts") + if not params_and_ranges or not npts: + raise ValueError( + "init.generation.placement-method='uniform' requires " + "params-and-ranges and npts." + ) + # Parse params-and-ranges: "x:[a,b] y:[c,d] ..." + # and emit --random-parameter / --random-parameter-range pairs. + from RIFT.hyperpipe.coords import parse_range_string + ranges = parse_range_string(params_and_ranges) + bits = [_which_or_self("util_HyperparameterGrid.py")] + for name, (lo, hi) in ranges.items(): + bits.append(f"--random-parameter {name}") + bits.append(f"--random-parameter-range [{lo},{hi}]") + bits.append(f"--npts {int(npts)}") + bits.append(f"--fname-out {initial_grid}") + cmd = " ".join(bits) + logger.info("Generating initial grid: %s", cmd) + rc = os.system(cmd) + if rc != 0: + raise SystemExit(f"util_HyperparameterGrid.py exited with code {rc}.") + return initial_grid -base_dir =None -config_here = None + if external_code: + ext_args = _cfg_get(gen, "external-args") or "" + cmd = f"{external_code} {ext_args}".strip() + logger.info("Generating initial grid via external code: %s", cmd) + rc = os.system(f"cd {shlex.quote(run_dir)} && {cmd}") + if rc != 0: + raise SystemExit(f"external-code exited with code {rc}.") + if not os.path.exists(initial_grid): + raise FileNotFoundError( + f"external-code did not produce {initial_grid!r}; " + "it must write to that exact path." + ) + return initial_grid -@hydra.main(version_base=None, config_path=".",config_name='hyperpipe_conf') -def my_app(cfg: DictConfig): - global config_here - logging.basicConfig() - logger.info(" ---- INPUT CONFIG --- ") + raise ValueError( + "init: no usable initialization. Set either init.file, " + "init.generation.placement-method='uniform' (with params-and-ranges + npts), " + "or init.generation.external-code." + ) + + +def _build_post_args(cfg, coord_spec) -> str: + """Compose the args_eos_post.txt body from the coord spec + post.settings + extras.""" + post = cfg["post"] + args = coord_spec.to_post_args() + settings = _cfg_get(post, "settings") or {} + setting_flags = [ + ("n-max", "--n-max"), + ("n-step", "--n-step"), + ("n-eff", "--n-eff"), + ("sampler-method", "--sampler-method"), + ("fit-method", "--fit-method"), + ("sigma-cut", "--sigma-cut"), + ] + for key, flag in setting_flags: + val = _cfg_get(settings, key) + if val is not None and val != "": + args += f" {flag} {val}" + extra = (_cfg_get(post, "extra-args") or "").strip() + if extra: + args += " " + extra + return args + + +def _build_puff_args(cfg, coord_spec) -> str: + puff = _cfg_get(cfg, "puff") or {} + args = coord_spec.to_puff_args( + force_away=_cfg_get(puff, "force-away", 0.03), + puff_factor=_cfg_get(puff, "puff-factor", 0.5), + ) + extra = (_cfg_get(puff, "extra-args") or "").strip() + if extra: + args += " " + extra + return args + + +def _build_test_args(cfg, coord_spec) -> str: + test = _cfg_get(cfg, "test") or {} + args = coord_spec.to_test_args( + method=_cfg_get(test, "method", "JS"), + threshold=_cfg_get(test, "threshold", 0.05), + ) + extra = (_cfg_get(test, "extra-args") or "").strip() + if extra: + args += " " + extra + return args + + +# -------------------------------------------------------------------------- +# Hydra entry point +# -------------------------------------------------------------------------- + + +@hydra.main(version_base=None, config_path=".", config_name="hyperpipe_conf") +def my_app(cfg: DictConfig) -> None: + logging.basicConfig(level=logging.INFO, format="%(levelname)s %(name)s: %(message)s") + + base_dir = hydra.utils.get_original_cwd() + + logger.info("---- INPUT CONFIG ----") print(OmegaConf.to_yaml(cfg)) - config_here=cfg -if __name__ == "__main__": - my_app() + hyper_config.validate_config(cfg) - base_dir = os.getcwd() - # make run directory - if config_here['general']['rundir'] : - if not(os.path.exists(config_here['general']['rundir'])): - os.mkdir(config_here['general']['rundir']) - os.chdir(config_here['general']['rundir']) + # ----- run directory --------------------------------------------------- + rundir = _cfg_get(cfg["general"], "rundir") + if rundir: + rundir = hyper_config.expand_path(rundir, base_dir) + os.makedirs(rundir, exist_ok=True) + os.chdir(rundir) run_dir = os.getcwd() + logger.info("Working directory: %s", run_dir) + + # ----- coord spec ------------------------------------------------------ + coord_spec = coord_spec_from_config_section(cfg["post"]) + coord_spec.validate(strict_import=False) + + # ----- marg-list assembly --------------------------------------------- + marg = assemble_marg_list(cfg, base_dir=base_dir, run_dir=run_dir) + logger.info( + "marg-list: %d entries (%s)", + len(marg.names), + ", ".join(marg.names), + ) + + # ----- emit per-stage files ------------------------------------------- + args_marg_path = os.path.join(run_dir, "args_marg_eos.txt") + args_marg_exe_path = os.path.join(run_dir, "args_marg_eos_exe.txt") + args_post_path = os.path.join(run_dir, "args_eos_post.txt") + args_puff_path = os.path.join(run_dir, "args_puff.txt") + args_test_path = os.path.join(run_dir, "args_test.txt") + event_nchunk_path = os.path.join(run_dir, "event_nchunk.txt") + transfer_list_path = os.path.join(run_dir, "transfer_file_list.txt") + + marg.write_args_file(args_marg_path) + marg.write_exe_file(args_marg_exe_path) + marg.write_nchunk_file(event_nchunk_path) + + with open(args_post_path, "w") as f: + f.write(_build_post_args(cfg, coord_spec) + "\n") + with open(args_puff_path, "w") as f: + f.write(_build_puff_args(cfg, coord_spec) + "\n") + with open(args_test_path, "w") as f: + f.write(_build_test_args(cfg, coord_spec) + "\n") + + # ----- initial grid --------------------------------------------------- + initial_grid = _maybe_generate_initial_grid(cfg["init"], base_dir, run_dir) + + # ----- transfer-file list --------------------------------------------- + general = cfg["general"] + extra_transfer: List[str] = list(_cfg_get(general, "transfer-files") or []) + use_osg = hyper_config.truthy(_cfg_get(general, "use-osg", False)) + use_singularity = hyper_config.truthy(_cfg_get(general, "use-singularity", False)) + use_transfer = use_osg or use_singularity or bool(extra_transfer) + if use_transfer: + marg.write_transfer_file_list(transfer_list_path, extra=extra_transfer) + + # ----- command assembly ----------------------------------------------- + arch = cfg["arch"] + post_exe = _cfg_get(cfg["post"], "exe") or _which_or_self("util_ConstructEOSPosterior.py") + puff_exe = ( + _cfg_get(_cfg_get(cfg, "puff"), "exe") + or _which_or_self("util_HyperparameterPuffball.py") + ) + test_exe = _cfg_get(_cfg_get(cfg, "test"), "exe", "convergence_test_samples") + + cmd_parts: List[str] = [ + _which_or_self("create_eos_posterior_pipeline"), + f"--n-samples-per-job {int(arch['n-samples-per-job'])}", + f"--n-iterations {int(arch['n-iterations'])}", + f"--working-directory {run_dir}", + f"--input-grid {initial_grid}", + f"--marg-event-exe-list-file {args_marg_exe_path}", + f"--marg-event-args-list-file {args_marg_path}", + f"--marg-event-nchunk-list-file {event_nchunk_path}", + f"--eos-post-args {args_post_path}", + f"--eos-post-exe {post_exe}", + f"--puff-exe {puff_exe}", + f"--puff-args {args_puff_path}", + f"--test-args {args_test_path}", + f"--test-exe {test_exe}", + ] + # one --event-file per marg entry (critical for multi-event!) + for ev in marg.event_files: + cmd_parts.append(f"--event-file {ev}") + + # arch tunables + explode = _cfg_get(arch, "explode-marg-jobs") + if explode is not None: + cmd_parts.append(f"--eos-post-explode-jobs {int(explode)}") + explode_last = _cfg_get(arch, "explode-marg-jobs-last") + if explode_last is not None: + cmd_parts.append(f"--eos-post-explode-jobs-last {int(explode_last)}") + start_iter = int(_cfg_get(arch, "start-iteration", 0) or 0) + if start_iter: + cmd_parts.append(f"--start-iteration {start_iter}") - # Create necessary files (note event files will need to be copied in place) - # - MARG - lines_marg = '' - lines_marg_exe = '' - fnames_marg_exe_long = [] - lines_event_list = '' - nchunk_list =[] - for indx in range(len(config_here['marg-list'])): - prefix=base_dir - lines_marg += config_here['marg-list'][indx]['args'] + "\n" - - fname_orig = config_here['marg-list'][indx]['exe'] + "\n" - fname_clean = os.path.basename(fname_orig.strip()) - lines_marg_exe += fname_orig - loc = dag_utils.which(fname_clean) - if loc is None: - loc = base_dir + "/" +fname_clean - print(fname_orig, loc) - if not('MonteCarloMarginalizeCode/Code/bin' in loc): # don't copy core rift executables - shutil.copyfile(loc, './'+fname_clean) # copy file - fnames_marg_exe_long += [run_dir + "/" + fname_clean] - - # copy event files - if 'event file' in config_here['marg-list'][indx]: - prefix = base_dir - fname_orig = config_here['marg-list'][indx]['event file'] - if fname_orig[0] != '/': - fname_orig = base_dir +'/' +fname_orig - shutil.copy(fname_orig, 'event_file_{}.dat'.format(indx)) - else: - lines_event_list += "empty_event_file\n" - if 'n-chunk' in config_here['marg-list'][indx]: - nchunk_list.append(int(config_here['marg-list'][indx]['n-chunk'])) - else: - nchunk_list.append(1) - with open("args_marg_eos.txt", "w") as f: - f.write(lines_marg) - with open("args_marg_eos_exe.txt", "w") as f: - f.write(lines_marg_exe) - # Create event file. Use empty file if empty - with open("event_files.txt", "w") as f: - f.write(lines_event_list) - np.savetxt("event_nchunk.txt", np.array(nchunk_list,dtype=int),fmt="%i") - - # - POST - line_post ='' - line_post_exe = None - coord_names = config_here['post']['coords-fit'].split() - coord_range_blocks = config_here['post']['coords-sample'].split() - for name in coord_names: - line_post += " --parameter {} ".format(name) # no matching : fit coords in principle independent, may even use converter - for block in coord_range_blocks: - line_post += " --integration-parameter-range {} ".format(block) - with open("args_eos_post.txt", "w") as f: - f.write(line_post) - - # - PUFF - force_away_val =0.1 - if 'puff' in config_here: - if 'puff factor' in config_here['puff']: - force_away_val = config_here['puff']['puff factor'] - line_puff = ' --force-away {} '.format(force_away_val) - for name in coord_names: - line_puff += " --parameter {} ".format(name) # default: puff in all parameters - with open("args_puff.txt", "w") as f: - f.write(line_puff) - - - # Create initialization file - if 'file' in config_here['init']: - # default: copy from base directory, unless absolute - prefix = base_dir - target_file = config_here["init"]['file'] - if target_file[0] != '/': - target_file = prefix+ "/" + target_file - shutil.copy(target_file, 'initial_grid.dat') - - # Extract architecture arguments - n_iterations = config_here['arch']['n-iterations'] - n_samples_per_job = config_here['arch']['n-samples-per-job'] - - - files_transferred=False - fname_transfer_files = 'transfer_file_list.txt' - # Create command line - cmd = "create_eos_posterior_pipeline --n-samples-per-job {} --n-iterations {} --working-dir `pwd` ".format(n_samples_per_job, n_iterations) - # MARG - cmd += " --marg-event-nchunk-list-file `pwd`/event_nchunk.txt --event-file `pwd`/event_files.txt " - cmd += " --marg-event-exe-list-file `pwd`/args_marg_eos_exe.txt --marg-event-args-list-file `pwd`/args_marg_eos.txt " - # POST - cmd += " --eos-post-args `pwd`/args_eos_post.txt --eos-post-exe `which util_ConstructEOSPosterior.py` " - # PUFF - cmd += " --puff-exe `which util_HyperparameterPuffball.py` --puff-args `pwd`/args_puff.txt " - # init - cmd += " --input-grid `pwd`/initial_grid.dat " # somewhat redundant to copy it, but we might be building in place # general - if 'use-osg' in config_here['general']: - check = config_here['general']['use-osg'] - if (isinstance(check, bool) and check) or (isinstance(check, str) and (check == 'True' or check == 'true') ): - cmd += " --use-osg --use-singularity " - # Transfer executables - with open('transfer_file_list.txt', 'a') as f: - for line in fnames_marg_exe_long: - f.write(line) - files_transferred=True - if 'condor-local-nonworker' in config_here['general']: - cmd += " --condor-local-nonworker " - if 'condor-local-nonworker-igwn-prefix' in config_here['general']: - cmd += " --condor-local-nonworker-igwn-prefix " - if files_transferred: # should embed this as control to transfer each executable ? - cmd += " --transfer-file-list `pwd`/transfer_file_list.txt " # redundant - transfers all things every time for all MARG, need to improve + bool_flag_pairs = [ + ("use-osg", "--use-osg"), + ("use-singularity", "--use-singularity"), + ("use-singularity-local", "--use-singularity-local"), + ("condor-local-nonworker", "--condor-local-nonworker"), + ("condor-local-nonworker-igwn-prefix", "--condor-local-nonworker-igwn-prefix"), + ("condor-nogrid-nonworker", "--condor-nogrid-nonworker"), + ("use-full-submit-paths", "--use-full-submit-paths"), + ] + for key, flag in bool_flag_pairs: + if hyper_config.truthy(_cfg_get(general, key, False)): + cmd_parts.append(flag) + + retries = int(_cfg_get(general, "retries", 0) or 0) + if retries: + cmd_parts.append(f"--general-retries {retries}") + req_disk = _cfg_get(general, "request-disk") + if req_disk: + cmd_parts.append(f"--general-request-disk {req_disk}") + req_mem = _cfg_get(general, "request-memory") + if req_mem is not None: + cmd_parts.append(f"--request-memory-marg {int(req_mem)}") + if use_transfer: + cmd_parts.append(f"--transfer-file-list {transfer_list_path}") + + cmd = " ".join(cmd_parts) print(cmd) - os.system(cmd) + + if hyper_config.truthy(_cfg_get(general, "dry-run", False)): + logger.info("dry-run set; not invoking create_eos_posterior_pipeline.") + return + + rc = os.system(cmd) + if rc != 0: + raise SystemExit( + f"create_eos_posterior_pipeline exited with code {rc} (cmd: {cmd!r})." + ) + + +if __name__ == "__main__": + my_app() diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/README.md b/MonteCarloMarginalizeCode/Code/test/hyperpipe/README.md new file mode 100644 index 000000000..84e7e477a --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/README.md @@ -0,0 +1,92 @@ +# RIFT.hyperpipe test suite + +Live integration tests for the in-tree `RIFT.hyperpipe` package +(`Code/RIFT/hyperpipe/`) and the `util_RIFT_hyperpipe.py` / +`util_HyperMargGaussian.py` console scripts (`Code/bin/`). Backed by a +self-contained [pixi](https://pixi.sh) environment so the suite works +from any RIFT clone without polluting your global Python. + +## Quick run + +```sh +# one-time, if you don't have pixi: +curl -fsSL https://pixi.sh/install.sh | bash + +cd MonteCarloMarginalizeCode/Code/test/hyperpipe +pixi install # heavy: pulls lalsuite + igwn-ligolw + gwpy + scientific stack +pixi run test # full pytest suite (the live test) +``` + +Auxiliary entry points: + +```sh +pixi run test-minimal # pytest-free smoke check (also useful in CI without pytest) +pixi run demo-dryrun # end-to-end dry-run of util_RIFT_hyperpipe.py +pixi run which-rift # confirm pixi resolved the in-tree RIFT correctly +``` + +## Layout + +``` +test/hyperpipe/ +├── pixi.toml # env spec: python + scientific stack + full lalsuite +├── README.md # you are here +└── tests/ + ├── conftest.py # auto-detects RIFT_ROOT from this file's location + ├── test_coords.py # CIP-mirror coord-spec emission + ├── test_marg_list.py # mono + heterogeneous marg-list assembly + ├── test_config.py # schema-validation paths + ├── test_drivers.py # MargDriverBase + util_HyperMargGaussian end-to-end + ├── test_hydra_integration.py # real-Hydra subprocess test of util_RIFT_hyperpipe.py + ├── standalone_check.py # pytest-free smoke test + └── demo_dryrun.py # full dry-run demo +``` + +The `[activation.env]` block in `pixi.toml` derives `RIFT_ROOT` / +`RIFT_PY` / `RIFT_BIN` relative to `$PIXI_PROJECT_ROOT`, prepends +`$RIFT_BIN` to `PATH`, and sets `PYTHONPATH` so the pytest subprocesses +can find the in-tree `RIFT` package. No user-specific hardcoded paths. + +## What each suite proves + +| File | What it proves | +|---|---| +| `test_coords.py` | Coord-spec emission exactly matches the Gaussian-demo `args_*.txt` files (including `-8` vs `-8.0` formatting). NICER-style spec with `--supplementary-coordinate-code` + likelihood-factor trio renders correctly. | +| `test_marg_list.py` | Mono-driver assembly reproduces the Gaussian demo verbatim. Heterogeneous assembly (NICER + GW) preserves per-driver batch sizes and routes the per-driver coord-module override only to the entry that asked for it. | +| `test_config.py` | `validate_config` rejects empty configs / empty `marg-list` / non-positive `n-iterations` / missing `init`. Truthy coercion handles "true"/"True"/"1"/`True`/`1`. | +| `test_drivers.py` | `MargDriverBase` read→mutate→write round-trips without numpy fixed-width truncation. The actual `util_HyperMargGaussian.py` subprocess runs against a 5-point grid and the mode at `(4, 0, 0)` outranks the far point at `(10, 10, 10)`. | +| `test_hydra_integration.py` | The real Hydra entry point of `util_RIFT_hyperpipe.py` runs as a subprocess with CLI overrides, writes every expected artefact (`args_*.txt`, `event-0.net`, `event_nchunk.txt`, `initial_grid.dat`, `transfer_file_list.txt`), and the emitted `create_eos_posterior_pipeline` command line carries every flag we asked for. | + +## When something fails + +* **`RIFT root not found`** in test skip messages: you're outside a RIFT + clone, or this suite has been moved away from + `test/hyperpipe/tests/`. `pixi run which-rift` should print a + sensible path. +* **lalsuite solver issues on Apple Silicon**: conda-forge's lalsuite is + reasonably supported on `osx-arm64` now; if you hit solver trouble, + try `pixi install --platform osx-64` (Rosetta). +* **`PYTHONPATH` collisions** with an existing system RIFT install: + inside `pixi shell`, `python -c "import RIFT; print(RIFT.__file__)"` + should resolve to the in-tree copy. If not, check whether some + outer activation script is overriding `PYTHONPATH`. + +## Adding a new test + +The `hp_modules` fixture in `conftest.py` exposes the four lightweight +hyperpipe modules (`coords`, `config`, `marg_list`, `drivers_base`) as +attributes on a namespace. New tests should follow the existing pattern: + +```python +def test_my_new_thing(hp_modules, tmp_path): + spec = hp_modules.coords.HyperCoordSpec.from_strings( + coords_fit="x y", coords_sample="x:[0,1] y:[0,1]", + ) + ... +``` + +For tests that need to run `util_RIFT_hyperpipe.py` or `util_HyperMargGaussian.py` +as a subprocess (i.e. exercise the real Hydra entry point), use the +`rift_py` / `rift_bin` fixtures to find the scripts and propagate +`PYTHONPATH` to the child via `os.environ.copy()` — see +`test_hydra_integration.py` for the canonical example. diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/pixi.toml b/MonteCarloMarginalizeCode/Code/test/hyperpipe/pixi.toml new file mode 100644 index 000000000..1c152672a --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/pixi.toml @@ -0,0 +1,74 @@ +# Pixi project for the RIFT.hyperpipe test suite. +# +# Location: $RIFT_ROOT/MonteCarloMarginalizeCode/Code/test/hyperpipe/ +# +# The full lalsuite stack is in the default env because RIFT/__init__.py +# unconditionally imports lalsimutils --- ``import RIFT.hyperpipe.*`` +# always pays for the LAL chain. Heavier ``pixi install``, but matches +# how RIFT is actually deployed. +# +# Tasks +# ----- +# pixi run test # full pytest suite (the live test) +# pixi run test-minimal # pytest-free smoke check +# pixi run demo-dryrun # end-to-end dry-run of util_RIFT_hyperpipe.py +# pixi run which-rift # confirm RIFT_ROOT is wired correctly +# +# First-time setup +# ---------------- +# curl -fsSL https://pixi.sh/install.sh | bash # one-time +# cd $RIFT_ROOT/MonteCarloMarginalizeCode/Code/test/hyperpipe +# pixi install +# pixi run test + +[project] +name = "rift-hyperpipe-test" +version = "0.1.0" +description = "Live-test harness for RIFT.hyperpipe (in-tree hyperpipeline driver)." +authors = ["RIFT developers"] +channels = ["conda-forge"] +platforms = ["osx-64", "osx-arm64", "linux-64"] + +[dependencies] +python = ">=3.11,<3.13" +numpy = "*" +scipy = "*" +hydra-core = "*" +omegaconf = "*" +pyyaml = "*" +pytest = "*" +# Full RIFT runtime stack. Required because RIFT/__init__.py +# unconditionally imports lalsimutils. +six = "*" +h5py = "*" +matplotlib = "*" +lalsuite = "*" +python-lal = "*" +python-lalsimulation = "*" +python-lalmetaio = "*" +python-lalframe = "*" +igwn-ligolw = "*" +lscsoft-glue = "*" +gwpy = "*" + +# Resolve RIFT paths relative to this pixi.toml's location so the +# project works from any RIFT clone (no hardcoded user path). The +# layout is: +# +# $RIFT_ROOT/MonteCarloMarginalizeCode/Code/test/hyperpipe/pixi.toml +# ^^^^^^^^^^^^^ +# PIXI_PROJECT_ROOT +# +# so going up four levels lands at $RIFT_ROOT. +[activation.env] +RIFT_ROOT = "$PIXI_PROJECT_ROOT/../../../.." +RIFT_PY = "$PIXI_PROJECT_ROOT/../.." +RIFT_BIN = "$PIXI_PROJECT_ROOT/../../bin" +PYTHONPATH = "$PIXI_PROJECT_ROOT/../..:$PYTHONPATH" +PATH = "$PIXI_PROJECT_ROOT/../../bin:$PATH" + +[tasks] +test = "pytest -v tests/" +test-minimal = "python tests/standalone_check.py" +demo-dryrun = "python tests/demo_dryrun.py" +which-rift = "echo RIFT_ROOT=$RIFT_ROOT && ls -la $RIFT_PY/RIFT/hyperpipe/" diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/conftest.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/conftest.py new file mode 100644 index 000000000..4844e382c --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/conftest.py @@ -0,0 +1,86 @@ +""" +pytest fixtures + path wiring for the in-tree RIFT.hyperpipe test suite. + +Resolves ``RIFT_ROOT`` in two ways, in order: + 1. the ``$RIFT_ROOT`` environment variable, if set (e.g. by pixi's + activation block); + 2. walking up from this file's location --- with the canonical layout + ``$RIFT_ROOT/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/conftest.py``, + ``parents[5]`` of ``__file__`` is ``$RIFT_ROOT``. + +This means the suite works from any RIFT clone with no user-specific +paths to edit. +""" +from __future__ import annotations + +import importlib +import os +import sys +import types +from pathlib import Path + +import pytest + + +_HERE = Path(__file__).resolve() + + +def _is_rift_root(p: Path) -> bool: + return (p / "MonteCarloMarginalizeCode" / "Code" / "RIFT" / "hyperpipe").exists() + + +def _rift_root() -> Path: + # 1. honor explicit env var if it points at a real RIFT clone + env = os.environ.get("RIFT_ROOT") + if env: + p = Path(env).resolve() + if _is_rift_root(p): + return p + # 2. canonical in-tree location: parents[5] of this file + if len(_HERE.parents) > 5: + candidate = _HERE.parents[5] + if _is_rift_root(candidate): + return candidate + # 3. give up + pytest.skip( + "Could not locate RIFT root. Either set $RIFT_ROOT or run this " + "suite from its canonical location at " + "$RIFT_ROOT/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/." + ) + + +@pytest.fixture(scope="session") +def rift_root() -> Path: + return _rift_root() + + +@pytest.fixture(scope="session") +def hyperpipe_dir(rift_root: Path) -> Path: + return rift_root / "MonteCarloMarginalizeCode" / "Code" / "RIFT" / "hyperpipe" + + +@pytest.fixture(scope="session") +def rift_bin(rift_root: Path) -> Path: + return rift_root / "MonteCarloMarginalizeCode" / "Code" / "bin" + + +@pytest.fixture(scope="session") +def rift_py(rift_root: Path) -> Path: + return rift_root / "MonteCarloMarginalizeCode" / "Code" + + +@pytest.fixture(scope="session") +def hp_modules(rift_py: Path): + """Import the four lightweight hyperpipe modules and expose on a namespace.""" + if str(rift_py) not in sys.path: + sys.path.insert(0, str(rift_py)) + coords = importlib.import_module("RIFT.hyperpipe.coords") + config = importlib.import_module("RIFT.hyperpipe.config") + marg_list = importlib.import_module("RIFT.hyperpipe.marg_list") + drivers_base = importlib.import_module("RIFT.hyperpipe.drivers.base") + return types.SimpleNamespace( + coords=coords, + config=config, + marg_list=marg_list, + drivers_base=drivers_base, + ) diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/demo_dryrun.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/demo_dryrun.py new file mode 100644 index 000000000..57f2ea217 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/demo_dryrun.py @@ -0,0 +1,94 @@ +""" +End-to-end dry-run demo: drives util_RIFT_hyperpipe.py with a +Gaussian-like config, lets it write all the per-stage args files into +./demo_run/, and prints the resulting create_eos_posterior_pipeline +command line --- WITHOUT actually executing it. + +Run via: ``pixi run demo-dryrun`` +""" +from __future__ import annotations + +import os +import shlex +import shutil +import subprocess +import sys +from pathlib import Path + + +_HERE = Path(__file__).resolve() + + +def _is_rift_root(p: Path) -> bool: + return (p / "MonteCarloMarginalizeCode" / "Code" / "RIFT" / "hyperpipe").exists() + + +def _rift_root() -> Path: + env = os.environ.get("RIFT_ROOT") + if env: + p = Path(env).resolve() + if _is_rift_root(p): + return p + if len(_HERE.parents) > 5: + candidate = _HERE.parents[5] + if _is_rift_root(candidate): + return candidate + raise SystemExit("RIFT root not found.") + + +RIFT_ROOT = _rift_root() +RIFT_PY = RIFT_ROOT / "MonteCarloMarginalizeCode" / "Code" +SCRIPT = RIFT_PY / "bin" / "util_RIFT_hyperpipe.py" +GAUSS_EXE = RIFT_PY / "bin" / "util_HyperMargGaussian.py" + +run_dir = Path("demo_run").resolve() +base_dir = Path("demo_base").resolve() + +for d in (run_dir, base_dir): + if d.exists(): + shutil.rmtree(d) +base_dir.mkdir() + +grid = base_dir / "blind_gaussian_plus_minus.dat" +with open(grid, "w") as f: + f.write("# lnL sigma_lnL x y z\n") + for v in (-4, -2, 0, 2, 4): + f.write(f"0 0 {v} 0 0\n") + f.write(f"0 0 {v} 1 -1\n") + +overrides = [ + f"general.rundir={run_dir}", + "general.dry-run=true", + "general.use-osg=true", + "general.use-singularity=true", + "general.condor-local-nonworker=true", + "general.condor-local-nonworker-igwn-prefix=true", + "general.retries=5", + "general.request-disk=2G", + "arch.n-iterations=20", + "arch.explode-marg-jobs=5", + f"init.file={grid}", + 'marg-list=[{name:gaussian, exe:' + shlex.quote(str(GAUSS_EXE)) + + ', args:"--outdir Gaussian_example --conforming-output-name", ' + 'event-file:null, n-chunk:100, coord-module:null, extra-args:""}]', +] + +cmd = [sys.executable, str(SCRIPT), *overrides] +env = dict(os.environ) +env["PYTHONPATH"] = str(RIFT_PY) + os.pathsep + env.get("PYTHONPATH", "") + +print("=" * 72) +print("RIFT_ROOT =", RIFT_ROOT) +print("Invoking:", " ".join(cmd)) +print("=" * 72) +proc = subprocess.run(cmd, env=env, cwd=base_dir) +if proc.returncode != 0: + sys.exit(proc.returncode) + +print("\n" + "=" * 72) +print("Artefacts written into:", run_dir) +print("=" * 72) +for p in sorted(run_dir.iterdir()): + if p.is_file() and p.stat().st_size < 4096: + print(f"\n--- {p.name} ---") + print(p.read_text().rstrip()) diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/standalone_check.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/standalone_check.py new file mode 100644 index 000000000..d7c790154 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/standalone_check.py @@ -0,0 +1,131 @@ +""" +Standalone (no-pytest) smoke test --- useful for ``pixi run test-minimal``. + +Verifies the same things as the pytest suite, but runs as a regular +script so it's easy to invoke from a make rule or a CI job that doesn't +want a pytest dependency. + +Locates RIFT_ROOT by: + 1. honoring the ``$RIFT_ROOT`` env var if set; + 2. otherwise walking up from this file's location + (``parents[5]`` of the canonical + ``test/hyperpipe/tests/standalone_check.py`` path is the RIFT root). +""" +from __future__ import annotations + +import importlib.util +import os +import sys +import tempfile +import types +from pathlib import Path + + +_HERE = Path(__file__).resolve() + + +def _is_rift_root(p: Path) -> bool: + return (p / "MonteCarloMarginalizeCode" / "Code" / "RIFT" / "hyperpipe").exists() + + +def _rift_root() -> Path: + env = os.environ.get("RIFT_ROOT") + if env: + p = Path(env).resolve() + if _is_rift_root(p): + return p + if len(_HERE.parents) > 5: + candidate = _HERE.parents[5] + if _is_rift_root(candidate): + return candidate + raise SystemExit( + "Could not locate RIFT root. Set $RIFT_ROOT or run from " + "$RIFT_ROOT/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/." + ) + + +RIFT_ROOT = _rift_root() +HP = RIFT_ROOT / "MonteCarloMarginalizeCode" / "Code" / "RIFT" / "hyperpipe" +RIFT_PY = RIFT_ROOT / "MonteCarloMarginalizeCode" / "Code" + +# Bypass RIFT/__init__.py so we don't need the full lalsuite stack to run +# the smoke test --- this script is intended to be runnable from any +# Python env that has numpy. +fake_rift = types.ModuleType("RIFT") +fake_rift.__path__ = [str(RIFT_PY / "RIFT")] +sys.modules["RIFT"] = fake_rift +fake_hp = types.ModuleType("RIFT.hyperpipe") +fake_hp.__path__ = [str(HP)] +sys.modules["RIFT.hyperpipe"] = fake_hp + + +def _load(name, path): + spec = importlib.util.spec_from_file_location(name, str(path)) + mod = importlib.util.module_from_spec(spec) + sys.modules[name] = mod + spec.loader.exec_module(mod) + return mod + + +coords = _load("RIFT.hyperpipe.coords", HP / "coords.py") +config = _load("RIFT.hyperpipe.config", HP / "config.py") +marg_list = _load("RIFT.hyperpipe.marg_list", HP / "marg_list.py") +drivers_base = _load("RIFT.hyperpipe.drivers.base", HP / "drivers" / "base.py") + + +def run_checks() -> None: + # 1. coord-spec + spec = coords.HyperCoordSpec.from_strings( + coords_fit="x y z", + coords_sample="x:[-8,8] y:[-8,8] z:[-8,8]", + ) + spec.validate(strict_import=False) + assert "--integration-parameter-range x:[-8,8]" in spec.to_post_args() + + # 2. mono marg-list assembly + with tempfile.TemporaryDirectory() as tmpd: + base = Path(tmpd) / "base" + run = Path(tmpd) / "run" + base.mkdir() + run.mkdir() + (base / "example.py").write_text("#!/usr/bin/env python\n") + os.chmod(base / "example.py", 0o755) + cfg = { + "marg-list": [ + {"name": "g", "exe": "example.py", "args": "--ok", + "event-file": None, "n-chunk": 100, "coord-module": None} + ] + } + m = marg_list.assemble_marg_list(cfg, base_dir=str(base), run_dir=str(run)) + assert m.n_chunks == [100] + assert (run / "event-0.net").read_text().strip() == "empty_event_file" + + # 3. validate_config rejects empty config + try: + config.validate_config({}) + except ValueError: + pass + else: + raise AssertionError("validate_config({}) should have raised") + + # 4. base driver round-trip + with tempfile.TemporaryDirectory() as tmpd: + grid = Path(tmpd) / "g.dat" + grid.write_text("# lnL sigma_lnL x y z\n0 0 1.0 2.0 3.0\n") + rows, cols = drivers_base.read_grid(f"file:{grid}") + assert cols == ["x", "y", "z"] + rows[0, 0] = "-3.1415926535" + out = drivers_base.write_marg_output( + rows, cols, + fname_output_integral="f.txt", + outdir=str(Path(tmpd) / "out"), + fname=None, + conforming_output_name=True, + ) + assert "-3.1415926535" in Path(out).read_text() + + print(f"standalone_check: ALL OK (RIFT_ROOT={RIFT_ROOT})") + + +if __name__ == "__main__": + run_checks() diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_config.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_config.py new file mode 100644 index 000000000..619de2bae --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_config.py @@ -0,0 +1,66 @@ +"""Unit tests for RIFT.hyperpipe.config.validate_config + helpers.""" +from __future__ import annotations + +import pytest + + +def test_validate_missing_section(hp_modules): + with pytest.raises(ValueError, match="missing required section"): + hp_modules.config.validate_config({}) + + +def test_validate_empty_marg_list(hp_modules): + cfg = { + "arch": {"n-iterations": 5, "n-samples-per-job": 1000}, + "post": {"coords-fit": "x"}, + "marg-list": [], + "puff": {}, + "init": {"file": "x"}, + "general": {}, + } + with pytest.raises(ValueError, match="at least one entry"): + hp_modules.config.validate_config(cfg) + + +def test_validate_bad_arch(hp_modules): + cfg = { + "arch": {"n-iterations": 0, "n-samples-per-job": 1000}, + "post": {"coords-fit": "x"}, + "marg-list": [{"exe": "x"}], + "puff": {}, + "init": {"file": "x"}, + "general": {}, + } + with pytest.raises(ValueError, match="n-iterations"): + hp_modules.config.validate_config(cfg) + + +def test_validate_missing_init(hp_modules): + cfg = { + "arch": {"n-iterations": 5, "n-samples-per-job": 1000}, + "post": {"coords-fit": "x"}, + "marg-list": [{"exe": "x"}], + "puff": {}, + "init": {}, + "general": {}, + } + with pytest.raises(ValueError, match="init.file"): + hp_modules.config.validate_config(cfg) + + +def test_validate_happy_path(hp_modules): + hp_modules.config.validate_config({ + "arch": {"n-iterations": 5, "n-samples-per-job": 1000}, + "post": {"coords-fit": "x y z"}, + "marg-list": [{"exe": "example.py"}], + "puff": {}, + "init": {"file": "/tmp/x"}, + "general": {}, + }) + + +def test_truthy_string_handling(hp_modules): + t = hp_modules.config.truthy + assert t("true") and t("True") and t("YES") and t("1") and t("on") + assert not t("false") and not t("no") and not t("") and not t(None) + assert t(True) and not t(False) and t(1) and not t(0) diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_coords.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_coords.py new file mode 100644 index 000000000..72391081b --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_coords.py @@ -0,0 +1,79 @@ +""" +Unit tests for RIFT.hyperpipe.coords. +""" +from __future__ import annotations + +import pytest + + +def test_parse_range_block(hp_modules): + c = hp_modules.coords + assert c.parse_range_block("x:[-8,8]") == ("x", (-8.0, 8.0)) + assert c.parse_range_block("g0=[0.2,2]") == ("g0", (0.2, 2.0)) + + +def test_parse_range_string(hp_modules): + c = hp_modules.coords + out = c.parse_range_string("x:[-8,8] y:[-1.5,1.5] z:[0,1e3]") + assert out == {"x": (-8.0, 8.0), "y": (-1.5, 1.5), "z": (0.0, 1e3)} + + +def test_gaussian_demo_emission(hp_modules): + """post / puff / test args must match the Gaussian demo verbatim.""" + spec = hp_modules.coords.HyperCoordSpec.from_strings( + coords_fit="x y z", + coords_sample="x:[-8,8] y:[-8,8] z:[-8,8]", + ) + spec.validate(strict_import=False) + + assert sorted(spec.to_post_args().split()) == sorted( + "--parameter x --parameter y --parameter z " + "--integration-parameter-range x:[-8,8] " + "--integration-parameter-range y:[-8,8] " + "--integration-parameter-range z:[-8,8]".split() + ) + assert sorted(spec.to_puff_args(force_away=0.03, puff_factor=0.5).split()) == sorted( + "--parameter x --parameter y --parameter z " + "--force-away 0.03 --puff-factor 0.5".split() + ) + assert sorted(spec.to_test_args().split()) == sorted( + "--parameter x --parameter y --parameter z " + "--method JS --threshold 0.05".split() + ) + + +def test_nicer_style_post_args(hp_modules): + """NICER-style spec with coord module and likelihood-factor trio.""" + spec = hp_modules.coords.HyperCoordSpec.from_strings( + name="rift_default", + coords_fit="g0 g1 g2 g3", + coords_sample="g0:[0.2,2] g1:[-1.6,1.7] g2:[-0.6,0.6] g3:[-0.02,0.02]", + likelihood_factor=("my_module", "my_factor", "my.ini"), + ) + post = spec.to_post_args() + for needle in ( + "--supplementary-coordinate-code rift_default", + "--supplementary-likelihood-factor-code my_module", + "--supplementary-likelihood-factor-function my_factor", + "--supplementary-likelihood-factor-ini my.ini", + "--integration-parameter-range g1:[-1.6,1.7]", + ): + assert needle in post + + +def test_fmt_num_integer_preserved(hp_modules): + """Integer-valued floats round-trip as ints (matches demo format).""" + f = hp_modules.coords.HyperCoordSpec._fmt_num + assert f(-8.0) == "-8" + assert f(2.0) == "2" + assert f(-1.6) == "-1.6" + assert f(0.02) == "0.02" + + +def test_validate_rejects_missing_range(hp_modules): + spec = hp_modules.coords.HyperCoordSpec.from_strings( + coords_fit="x y", + coords_sample="x:[-1,1]", + ) + with pytest.raises(ValueError, match="No integration range supplied"): + spec.validate(strict_import=False) diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_drivers.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_drivers.py new file mode 100644 index 000000000..817e87219 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_drivers.py @@ -0,0 +1,98 @@ +""" +Tests for the toy Gaussian marg driver. + +Exercises the full in-tree code path (real env has scipy + lalsuite). +""" +from __future__ import annotations + +import os +import subprocess +import sys +from pathlib import Path + +import numpy as np + + +def test_base_driver_grid_roundtrip(hp_modules, tmp_path): + """Read -> mutate lnL/sigma -> write should round-trip without truncation.""" + grid = tmp_path / "grid.dat" + grid.write_text( + "# lnL sigma_lnL x y z\n" + "0 0 1.0 2.0 3.0\n" + "0 0 -1.0 -2.0 -3.0\n" + ) + rows, cols = hp_modules.drivers_base.read_grid(f"file:{grid}") + assert cols == ["x", "y", "z"] + + rows[0, 0] = "-3.1415926535" + rows[0, 1] = "1.234567e-3" + out = hp_modules.drivers_base.write_marg_output( + rows[:1], cols, + fname_output_integral="f.txt", + outdir=str(tmp_path / "out"), + fname=None, + conforming_output_name=True, + ) + assert Path(out).name == "f.txt+annotation.dat" + text = Path(out).read_text() + assert "-3.1415926535" in text + assert "1.234567e-3" in text + assert "lnL" in text and "sigma_lnL" in text and "x y z" in text + + +def test_gaussian_driver_end_to_end(rift_root, rift_py, tmp_path): + """ + Actually run util_HyperMargGaussian.py against a tiny grid and verify + the output file shape and that the mode point at (4, 0, 0) outranks + the far point at (10, 10, 10). + """ + grid = tmp_path / "grid.dat" + points = [ + ( 4.0, 0.0, 0.0), + (-4.0, 0.0, 0.0), + ( 0.0, 0.0, 0.0), + (10.0, 10.0, 10.0), + ( 4.0, 0.5, -0.5), + ] + with open(grid, "w") as f: + f.write("# lnL sigma_lnL x y z\n") + for x, y, z in points: + f.write(f"0 0 {x} {y} {z}\n") + + outdir = tmp_path / "out" + + cmd = [ + sys.executable, + str(rift_py / "bin" / "util_HyperMargGaussian.py"), + "--using-eos", f"file:{grid}", + "--eos_start_index", "0", + "--eos_end_index", "5", + "--outdir", str(outdir), + "--fname-output-integral", "lnL.txt", + "--conforming-output-name", + ] + env = dict(os.environ) + env["PYTHONPATH"] = str(rift_py) + os.pathsep + env.get("PYTHONPATH", "") + + proc = subprocess.run(cmd, capture_output=True, text=True, env=env) + assert proc.returncode == 0, ( + f"util_HyperMargGaussian.py exited {proc.returncode}\n" + f"STDOUT:\n{proc.stdout}\nSTDERR:\n{proc.stderr}" + ) + + out_file = outdir / "lnL.txt+annotation.dat" + assert out_file.exists(), f"expected {out_file} to exist" + + lines = [ln for ln in out_file.read_text().splitlines() if ln.strip()] + assert lines[0].lstrip("#").split() == ["lnL", "sigma_lnL", "x", "y", "z"] + data_lines = lines[1:] + assert len(data_lines) == 5 + for ln in data_lines: + assert len(ln.split()) == 5 + + arr = np.array([[float(c) for c in ln.split()] for ln in data_lines]) + lnL = arr[:, 0] + assert lnL[0] > lnL[3], ( + f"mode point (4,0,0) should have higher lnL than (10,10,10); got {lnL}" + ) + assert lnL[1] > lnL[3] diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_hydra_integration.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_hydra_integration.py new file mode 100644 index 000000000..8a34eaa86 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_hydra_integration.py @@ -0,0 +1,130 @@ +""" +Live integration test: invoke util_RIFT_hyperpipe.py as a subprocess with +the real hydra-core / omegaconf installed by pixi. + +Verifies that the emitted command line and per-stage args/exe/event/nchunk +files match what create_eos_posterior_pipeline expects, without actually +running the DAG-builder (general.dry-run=true). +""" +from __future__ import annotations + +import os +import shlex +import subprocess +import sys +from pathlib import Path + +import pytest + + +def _ensure_hydra_available() -> None: + try: + import hydra # noqa: F401 + import omegaconf # noqa: F401 + except ImportError as exc: + pytest.skip(f"hydra/omegaconf not importable in this env: {exc}") + + +def test_util_rift_hyperpipe_dry_run(rift_root, rift_py, tmp_path): + _ensure_hydra_available() + + base = tmp_path / "base" + run = tmp_path / "run" + base.mkdir() + real_exe = rift_py / "bin" / "util_HyperMargGaussian.py" + if not real_exe.exists(): + pytest.skip(f"in-tree {real_exe} missing") + + grid_path = base / "blind_gaussian_plus_minus.dat" + grid_path.write_text( + "# lnL sigma_lnL x y z\n" + + "\n".join(f"0 0 {i} {i} {i}" for i in range(10)) + + "\n" + ) + + overrides = [ + f"general.rundir={run}", + "general.dry-run=true", + "general.use-osg=true", + "general.use-singularity=true", + "general.condor-local-nonworker=true", + "general.condor-local-nonworker-igwn-prefix=true", + "general.retries=5", + "general.request-disk=2G", + "arch.n-iterations=20", + "arch.n-samples-per-job=1000", + "arch.explode-marg-jobs=5", + f"init.file={grid_path}", + 'marg-list=[{name:gaussian, exe:' + + shlex.quote(str(real_exe)) + + ', args:"--outdir Gaussian_example --conforming-output-name", ' + 'event-file:null, n-chunk:100, coord-module:null, extra-args:""}]', + ] + + cmd = [sys.executable, str(rift_py / "bin" / "util_RIFT_hyperpipe.py"), *overrides] + env = dict(os.environ) + env["PYTHONPATH"] = str(rift_py) + os.pathsep + env.get("PYTHONPATH", "") + + proc = subprocess.run(cmd, capture_output=True, text=True, env=env, cwd=base) + print("STDOUT:", proc.stdout) + print("STDERR:", proc.stderr) + assert proc.returncode == 0, ( + f"util_RIFT_hyperpipe.py exited {proc.returncode}\n" + f"STDOUT:\n{proc.stdout}\nSTDERR:\n{proc.stderr}" + ) + + for fname in ( + "args_marg_eos.txt", + "args_marg_eos_exe.txt", + "args_eos_post.txt", + "args_puff.txt", + "args_test.txt", + "event-0.net", + "event_nchunk.txt", + "initial_grid.dat", + "transfer_file_list.txt", + ): + path = run / fname + assert path.exists(), f"expected {path} to be written" + + assert (run / "event-0.net").read_text().strip() == "empty_event_file" + assert (run / "event_nchunk.txt").read_text().strip() == "100" + + marg_args = (run / "args_marg_eos.txt").read_text().strip() + assert marg_args == "--outdir Gaussian_example --conforming-output-name", marg_args + + post = (run / "args_eos_post.txt").read_text() + for needle in ( + "--parameter x", "--parameter y", "--parameter z", + "--integration-parameter-range x:[-8,8]", + "--integration-parameter-range y:[-8,8]", + "--integration-parameter-range z:[-8,8]", + ): + assert needle in post, (needle, post) + + full = proc.stdout + for needle in ( + "create_eos_posterior_pipeline", + "--n-samples-per-job 1000", + "--n-iterations 20", + "--marg-event-exe-list-file", + "--marg-event-args-list-file", + "--marg-event-nchunk-list-file", + "--eos-post-args", + "--eos-post-exe", + "--puff-exe", + "--puff-args", + "--test-args", + "--test-exe convergence_test_samples", + "--use-osg", + "--use-singularity", + "--condor-local-nonworker", + "--condor-local-nonworker-igwn-prefix", + "--general-retries 5", + "--general-request-disk 2G", + "--request-memory-marg 16384", + "--eos-post-explode-jobs 5", + "--transfer-file-list", + "--event-file", + ): + assert needle in full, f"missing flag in dry-run command line: {needle}" diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_marg_list.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_marg_list.py new file mode 100644 index 000000000..1c61ea44c --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_marg_list.py @@ -0,0 +1,100 @@ +""" +Unit tests for RIFT.hyperpipe.marg_list. +""" +from __future__ import annotations + +import os +from pathlib import Path + +import pytest + + +def _make_fake_exe(p: Path) -> None: + p.write_text("#!/usr/bin/env python\n") + os.chmod(p, 0o755) + + +def test_mono_gaussian_assembly(hp_modules, tmp_path): + base = tmp_path / "base" + run = tmp_path / "run" + base.mkdir() + run.mkdir() + _make_fake_exe(base / "example_gaussian.py") + + cfg = { + "marg-list": [ + { + "name": "gaussian", + "exe": "example_gaussian.py", + "args": "--outdir Gaussian_example --conforming-output-name", + "event-file": None, + "n-chunk": 100, + "coord-module": None, + } + ] + } + marg = hp_modules.marg_list.assemble_marg_list( + cfg, base_dir=str(base), run_dir=str(run) + ) + + assert marg.names == ["gaussian"] + assert marg.n_chunks == [100] + assert marg.args_lines == ["--outdir Gaussian_example --conforming-output-name"] + assert Path(marg.exe_paths[0]).name == "example_gaussian.py" + assert (run / "event-0.net").read_text().strip() == "empty_event_file" + + marg.write_args_file(str(run / "args_marg_eos.txt")) + marg.write_exe_file(str(run / "args_marg_eos_exe.txt")) + marg.write_nchunk_file(str(run / "event_nchunk.txt")) + assert (run / "args_marg_eos.txt").read_text().strip() == ( + "--outdir Gaussian_example --conforming-output-name" + ) + assert (run / "event_nchunk.txt").read_text().strip() == "100" + + +def test_heterogeneous_nicer_plus_gw(hp_modules, tmp_path): + """Two drivers, different batch sizes, per-driver coord override.""" + base = tmp_path / "base" + run = tmp_path / "run" + base.mkdir() + run.mkdir() + _make_fake_exe(base / "reading_NICER_MR.py") + _make_fake_exe(base / "util_ConstructIntrinsicPosterior_GenericCoordinates.py") + (base / "my_event_B.net").write_text("# lnL sigma_lnL m1 m2\n0 0 1.4 1.4\n") + + cfg = { + "marg-list": [ + { + "name": "nicer", + "exe": "reading_NICER_MR.py", + "args": "--j0740 --j0030 --conforming-output-name", + "event-file": None, + "n-chunk": 5, + }, + { + "name": "gw", + "exe": "util_ConstructIntrinsicPosterior_GenericCoordinates.py", + "args": "--eos-param spectral --aligned-prior alignedspin-zprior", + "event-file": "my_event_B.net", + "n-chunk": 1, + "coord-module": "rift_default", + }, + ] + } + marg = hp_modules.marg_list.assemble_marg_list( + cfg, base_dir=str(base), run_dir=str(run) + ) + assert marg.n_chunks == [5, 1] + assert marg.names == ["nicer", "gw"] + assert "--supplementary-coordinate-code rift_default" in marg.args_lines[1] + assert "--supplementary-coordinate-code" not in marg.args_lines[0] + assert (run / "event-0.net").read_text().strip() == "empty_event_file" + assert "m1 m2" in (run / "event-1.net").read_text() + + +def test_missing_exe_raises(hp_modules, tmp_path): + cfg = {"marg-list": [{"name": "x"}]} + with pytest.raises(ValueError, match="missing required key 'exe'"): + hp_modules.marg_list.assemble_marg_list( + cfg, base_dir=str(tmp_path), run_dir=str(tmp_path) + ) From f2a1fe02ac54ef2c227d5cdd55de3830cd2435e3 Mon Sep 17 00:00:00 2001 From: Richard O'Shaughnessy Date: Tue, 12 May 2026 14:48:50 -0400 Subject: [PATCH 02/24] issue with file locationwq --- MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/marg_list.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/marg_list.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/marg_list.py index be060262a..8ee27d864 100644 --- a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/marg_list.py +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/marg_list.py @@ -155,7 +155,7 @@ def _stage_event_file( base_dir: str, run_dir: str, ) -> Tuple[str, bool]: - """Materialize this entry's event file at run_dir/event-.net. + """Materialize this entry's event file at base_dir/event-.net. Returns ``(abs_path, is_empty_sentinel)``. If the entry has no ``event-file`` set, we write a sentinel file with the single token @@ -167,7 +167,7 @@ def _stage_event_file( src = entry.get("event-file") or entry.get("event_file") elif "event-file" in entry: src = entry["event-file"] - dest = os.path.join(run_dir, f"event-{indx}.net") + dest = os.path.join(base_dir, f"event-{indx}.net") if src: src = os.path.expanduser(src) if not os.path.isabs(src): From 86714aeb73b35dbd3f2df13f00843570916d9f04 Mon Sep 17 00:00:00 2001 From: Richard O'Shaughnessy Date: Tue, 12 May 2026 15:35:33 -0700 Subject: [PATCH 03/24] missing import --- MonteCarloMarginalizeCode/Code/RIFT/misc/dag_utils_generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/dag_utils_generic.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/dag_utils_generic.py index e2fea2715..53edce805 100644 --- a/MonteCarloMarginalizeCode/Code/RIFT/misc/dag_utils_generic.py +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/dag_utils_generic.py @@ -120,6 +120,8 @@ def emit_dag(self, dag, path): from time import time from hashlib import md5 +import shutil + import numpy as np import configparser From 5d98d443d1ce235fd172fbbd99c19f48c84b78b1 Mon Sep 17 00:00:00 2001 From: Richard O'Shaughnessy Date: Tue, 12 May 2026 19:06:21 -0400 Subject: [PATCH 04/24] Fix CI: skip .yaml files in test-all-bin.sh --- .travis/test-all-bin.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis/test-all-bin.sh b/.travis/test-all-bin.sh index a9362eed2..d71bdcbd6 100755 --- a/.travis/test-all-bin.sh +++ b/.travis/test-all-bin.sh @@ -41,7 +41,7 @@ for EXE in MonteCarloMarginalizeCode/Code/bin/*; do # continue # fi # skip non-python scripts - if [[ ${EXE} == *".sh" ]]; then + if [[ ${EXE} == *".sh" ]] || [[ ${EXE} == *".yaml" ]]; then echo " Not python : " ${EXE} continue fi From 7048c3ac195a5283cba58e1803d2410fe9269769 Mon Sep 17 00:00:00 2001 From: Richard O'Shaughnessy Date: Thu, 14 May 2026 08:08:19 -0400 Subject: [PATCH 05/24] Add RIFT.misc.tracer_placement engine + util_*TracerUpdate.py tools (preview) Parsimonious-placement project: tracer-particle iterative grid update intended to reduce the number of expensive likelihood evaluations required at each adaptive iteration. New code: - RIFT.misc.tracer_placement: engine package with SMC+MALA, birth-death, and SMC-MALA+BD samplers; pluggable RF / RBF / polynomial / quadratic fits via tracer_placement.fits.build(method, X, Y, sigma=None). - bin/util_HyperparameterTracerUpdate.py: drop-in alternative to util_HyperparameterPuffball.py for the hyperpipeline. - bin/util_ParameterTracerUpdate.py: drop-in alternative to util_ParameterPuffball.py for the event-level iterator. Both tools refit their own surrogate from the same .dat file CIP reads (no CIP coupling, no surrogate-pickle transfers, no DAG-generator changes required for the basic drop-in workflow). They also accept --update-method puffball as an exact-regression fallback so the behaviour reduces to the existing puffball when needed. Default pipeline behaviour is unchanged: nothing invokes the new tools unless the user points --puff-exe at them. Background / theory notes / Tier-1 prototype results / rollout plan live in the parsimonious-placement project folder (20260513-Me-ParsimoniousPlacementOptions/parsimonious_placement_plan.md). A follow-up commit adds the create_eos_posterior_pipeline patch for the optional --tracer-only-marg cost-halving workflow. --- CHANGES.rst | 10 + .../Code/RIFT/misc/tracer_placement/README.md | 54 +++ .../RIFT/misc/tracer_placement/__init__.py | 19 + .../misc/tracer_placement/fits/__init__.py | 11 + .../RIFT/misc/tracer_placement/fits/_base.py | 22 ++ .../misc/tracer_placement/fits/_dispatch.py | 16 + .../misc/tracer_placement/fits/_polynomial.py | 40 ++ .../misc/tracer_placement/fits/_quadratic.py | 17 + .../RIFT/misc/tracer_placement/fits/_rbf.py | 24 ++ .../RIFT/misc/tracer_placement/fits/_rf.py | 23 ++ .../tracer_placement/samplers/__init__.py | 17 + .../misc/tracer_placement/samplers/_knn.py | 15 + .../tracer_placement/samplers/birth_death.py | 57 +++ .../tracer_placement/samplers/smc_mala.py | 94 +++++ .../tracer_placement/samplers/smc_mala_bd.py | 15 + .../tracer_placement/samplers/surrogate.py | 68 ++++ .../bin/util_HyperparameterTracerUpdate.py | 269 ++++++++++++++ .../Code/bin/util_ParameterTracerUpdate.py | 342 ++++++++++++++++++ 18 files changed, 1113 insertions(+) create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/README.md create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/__init__.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/__init__.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_dispatch.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_polynomial.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_quadratic.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rbf.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/_knn.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/birth_death.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala_bd.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/surrogate.py create mode 100755 MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py create mode 100755 MonteCarloMarginalizeCode/Code/bin/util_ParameterTracerUpdate.py diff --git a/CHANGES.rst b/CHANGES.rst index 78a9bad3a..8e5db286e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,16 @@ 0.0.18.0 ------------ development tree is rift_O4d. + - parsimonious-placement (preview): new RIFT.misc.tracer_placement engine (SMC+MALA, birth-death, + and SMC-MALA+BD samplers; pluggable RF / RBF / polynomial / quadratic fits) plus two + opt-in CLI tools that mirror the existing puffball tools' I/O contract: + util_HyperparameterTracerUpdate.py (hyperpipe drop-in for util_HyperparameterPuffball.py) + and util_ParameterTracerUpdate.py (event-level drop-in for util_ParameterPuffball.py). + Default behavior preserves the existing pipelines (the new tools are not invoked unless + the user points --puff-exe at them); both also accept --update-method puffball as an + exact-regression fallback. See + https://git.ligo.org/rapidpe-rift/rift/-/merge_requests/ (TBD) and the project notes at + 20260513-Me-ParsimoniousPlacementOptions/parsimonious_placement_plan.md. - CIP hyperpipe improvements (initialize_me; enable population and EOS params in using_eos file with arbitrary labelled parameters); CIP xgboost gp fit (from Aasim); install make 'precession' optional; see also 0.0.17.4rc0 igwn-ligolw diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/README.md b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/README.md new file mode 100644 index 000000000..14cf4fcc4 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/README.md @@ -0,0 +1,54 @@ +# RIFT.misc.tracer_placement (phase H0 — engine layout) + +This is the shared engine package for tracer-based iterative grid placement. +It is consumed by: + +- `util_ParameterTracerUpdate.py` (event-level RIFT) +- `util_HyperparameterTracerUpdate.py` (RIFT hyperpipeline) + +Both tools are thin I/O wrappers; the math lives here. + +## Layout + +``` +samplers/ — kernels with the production signature + smc_mala.py — tempered SMC + MALA moves; accepts surrogate, surrogate_prev + birth_death.py — Langevin + kNN birth-death corrector + smc_mala_bd.py — composite: smc_mala then birth_death rejuvenation + surrogate.py — legacy in-engine quadratic helper (used by toys, not by tools) + _knn.py — numpy-only kNN helpers (no scipy) +fits/ — surrogate builders the tools call + __init__.py — exposes build(method, X, Y, sigma=None) + _rf.py — RandomForest (default, production) + _rbf.py — scipy RBFInterpolator + _quadratic.py — Tikhonov-regularized quadratic (smoke tests only) + _polynomial.py — degree-N polynomial (default 3) + _base.py — FitBase with FD gradient +``` + +## Sampler signature + +All three samplers expose: + + iterate(particles, *, surrogate, surrogate_prev=None, + prior_box, rng, state=None, **kw) -> (X_new, info) + +`info` always contains `"state"` (the updated state dict the caller must persist +between iterations for adaptive step-size etc.). + +## Status + +- [x] H0: layout, sampler signature unification, no module-level state. +- [ ] H1: drop into util_HyperparameterTracerUpdate.py end-to-end on the demo + hyperpipe Gaussian. Validate the bimodal demo path. +- [ ] H2: full toy test suite per `../test_suite_hyperpipe.md`. + +## Promotion to RIFT + +When this package is promoted, the target path is +`MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/`. The two tools +should import as: + + from RIFT.misc.tracer_placement import samplers, fits + +No other RIFT changes are required (see ../decision_no_cip_changes.md). diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/__init__.py new file mode 100644 index 000000000..fb47ee49b --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/__init__.py @@ -0,0 +1,19 @@ +"""RIFT.misc.tracer_placement — engine for tracer-particle iterative placement. + +Drop-in alternative to the puffball grid update used by both the event-level +RIFT iterator and the hyperpipeline. The math is shared; the file I/O wrapper +lives in the two thin command-line tools: + + util_ParameterTracerUpdate.py (event-level; XML I/O via lalsimutils) + util_HyperparameterTracerUpdate.py (hyperpipeline; .dat I/O) + +Public API: + samplers.smc_mala_bd(particles, surrogate, surrogate_prev, prior_box, rng, **kw) + -> (X_new, info) + samplers.smc_mala(...) -> (X_new, info) + samplers.birth_death(...) -> (X_new, info) + fits.build(method, X, Y, sigma=None) -> Fit (callable + .grad helper) +""" +from . import samplers, fits + +__all__ = ["samplers", "fits"] diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/__init__.py new file mode 100644 index 000000000..6d246e596 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/__init__.py @@ -0,0 +1,11 @@ +"""Fits for the tracer engine. + +Public entry point: build(method, X, Y, sigma=None) -> Fit. + +Fit objects expose: + .predict(Z) -> ndarray of len(Z) + .grad(Z) -> ndarray (len(Z), d) (analytic where available, FD otherwise) +""" +from ._dispatch import build + +__all__ = ["build"] diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py new file mode 100644 index 000000000..b3b1574b6 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py @@ -0,0 +1,22 @@ +"""Common Fit base class. Provides finite-difference gradient as a default.""" +import numpy as np + + +class FitBase: + """Subclasses must implement .predict(Z). Optionally override .grad(Z).""" + + def predict(self, Z): + raise NotImplementedError + + def f(self, Z): + return self.predict(Z) + + def grad(self, Z, eps=1e-3): + Z = np.atleast_2d(Z) + d = Z.shape[1] + out = np.zeros_like(Z) + for k in range(d): + zp = Z.copy(); zp[:, k] += eps + zm = Z.copy(); zm[:, k] -= eps + out[:, k] = (self.predict(zp) - self.predict(zm)) / (2 * eps) + return out diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_dispatch.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_dispatch.py new file mode 100644 index 000000000..ed781f2c1 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_dispatch.py @@ -0,0 +1,16 @@ +"""build(method, X, Y, sigma=None) -> Fit.""" +def build(method, X, Y, sigma=None, **kw): + method = method.lower() + if method == "rf": + from ._rf import RandomForestFit + return RandomForestFit(X, Y, sigma=sigma, **kw) + if method == "rbf": + from ._rbf import RBFFit + return RBFFit(X, Y, sigma=sigma, **kw) + if method == "quadratic": + from ._quadratic import QuadraticFit + return QuadraticFit(X, Y, sigma=sigma, **kw) + if method == "polynomial": + from ._polynomial import PolynomialFit + return PolynomialFit(X, Y, sigma=sigma, **kw) + raise ValueError(f"unknown fit method {method!r}") diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_polynomial.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_polynomial.py new file mode 100644 index 000000000..dd6e2cfd2 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_polynomial.py @@ -0,0 +1,40 @@ +"""Degree-N polynomial fit (default degree 3). Kept available; quadratic is the +degree-2 special case via _quadratic.py.""" +import numpy as np +from ._base import FitBase + + +class PolynomialFit(FitBase): + def __init__(self, X, Y, sigma=None, degree=3, ridge=1e-3): + from itertools import combinations_with_replacement + X = np.asarray(X, float); Y = np.asarray(Y, float) + self.d = X.shape[1]; self.degree = degree + # mean-normalize + self.mu = X.mean(axis=0); self.sd = X.std(axis=0) + 1e-12 + Xn = (X - self.mu) / self.sd + terms = [] + self._idx = [] # list of tuples of column indices in each term + for k in range(degree + 1): + for combo in combinations_with_replacement(range(self.d), k): + self._idx.append(combo) + terms.append(np.prod(Xn[:, combo], axis=1) if combo else np.ones(len(Xn))) + D = np.column_stack(terms) + W = ridge * np.eye(D.shape[1]) + if sigma is None: + self._theta = np.linalg.lstsq(D.T @ D + W, D.T @ Y, rcond=None)[0] + else: + w = 1.0 / (np.asarray(sigma) ** 2 + 1e-12) + DW = D * w[:, None] + self._theta = np.linalg.lstsq(DW.T @ D + W, DW.T @ Y, rcond=None)[0] + + def _design(self, Zn): + from itertools import combinations_with_replacement + cols = [] + for combo in self._idx: + cols.append(np.prod(Zn[:, combo], axis=1) if combo else np.ones(len(Zn))) + return np.column_stack(cols) + + def predict(self, Z): + Z = np.atleast_2d(Z) + Zn = (Z - self.mu) / self.sd + return self._design(Zn) @ self._theta diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_quadratic.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_quadratic.py new file mode 100644 index 000000000..0330fd6e2 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_quadratic.py @@ -0,0 +1,17 @@ +"""Cheap quadratic fit (Tikhonov-regularized). Not for production per +2026-05-13 design call; kept available for unit tests + smoke runs.""" +import numpy as np +from ._base import FitBase +from ..samplers.surrogate import QuadFit as _QuadFit + + +class QuadraticFit(FitBase): + def __init__(self, X, Y, sigma=None, ridge=1e-3): + weight = None if sigma is None else 1.0 / (np.asarray(sigma) ** 2 + 1e-12) + self._inner = _QuadFit(X, Y, ridge=ridge, eval_weight=weight) + + def predict(self, Z): + return self._inner.f(Z) + + def grad(self, Z, eps=None): + return self._inner.grad(Z) # analytic diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rbf.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rbf.py new file mode 100644 index 000000000..f4b9a8bc1 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rbf.py @@ -0,0 +1,24 @@ +"""RBF interpolator fit. Smooth gradients; good for MALA.""" +import numpy as np +from ._base import FitBase + +try: + from scipy.interpolate import RBFInterpolator + _HAVE_RBF = True +except ImportError: + _HAVE_RBF = False + + +class RBFFit(FitBase): + def __init__(self, X, Y, sigma=None, smoothing=1e-3): + if not _HAVE_RBF: + raise ImportError("scipy not available; cannot use --tracer-fit-method rbf") + # scipy's RBFInterpolator doesn't take sample weights; use smoothing as a + # proxy when sigma is supplied. + s = smoothing + if sigma is not None: + s = float(np.median(np.asarray(sigma) ** 2)) + smoothing + self._rbf = RBFInterpolator(X, Y, smoothing=s) + + def predict(self, Z): + return self._rbf(np.atleast_2d(Z)) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py new file mode 100644 index 000000000..126743e9b --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py @@ -0,0 +1,23 @@ +"""Random-Forest fit. Production default per project owner (2026-05-13). +Gradient via finite differences from FitBase.""" +import numpy as np +from ._base import FitBase + +try: + from sklearn.ensemble import RandomForestRegressor + _HAVE_RF = True +except ImportError: + _HAVE_RF = False + + +class RandomForestFit(FitBase): + def __init__(self, X, Y, sigma=None, n_estimators=100, n_jobs=-1): + if not _HAVE_RF: + raise ImportError("sklearn not available; install scikit-learn or " + "choose --tracer-fit-method quadratic") + self._rf = RandomForestRegressor(n_estimators=n_estimators, n_jobs=n_jobs) + weight = None if sigma is None else 1.0 / (np.asarray(sigma) ** 2 + 1e-12) + self._rf.fit(X, Y, sample_weight=weight) + + def predict(self, Z): + return self._rf.predict(np.atleast_2d(Z)) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py new file mode 100644 index 000000000..f04f781b0 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py @@ -0,0 +1,17 @@ +"""Tracer-placement sampler kernels. + +Each kernel has the signature: + kernel(particles, surrogate=None, surrogate_prev=None, prior_box, rng, **kw) + -> (X_new, info_dict) + +For backwards compatibility with the prototype harness (proto/run_tier1.py) +the kernels also accept the older positional form + kernel(particles, lnL_noisy, prior_box, rng, _state_key=...) +which builds a fresh local QuadFit; the production tools call the surrogate- +provided form. +""" +from .smc_mala import iterate as smc_mala +from .birth_death import iterate as birth_death +from .smc_mala_bd import iterate as smc_mala_bd + +__all__ = ["smc_mala", "birth_death", "smc_mala_bd"] diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/_knn.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/_knn.py new file mode 100644 index 000000000..7ff4ac245 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/_knn.py @@ -0,0 +1,15 @@ +"""Tiny numpy-only kNN utilities (no scipy).""" +import numpy as np + +def pairwise_sqdist(A, B): + # (na, d) x (nb, d) -> (na, nb) + AA = np.sum(A*A, axis=1, keepdims=True) + BB = np.sum(B*B, axis=1, keepdims=True).T + return np.maximum(AA + BB - 2.0 * A @ B.T, 0.0) + +def knn_dist(X, Y, k): + """For each row of X, distance to its k-th nearest neighbor in Y.""" + d2 = pairwise_sqdist(X, Y) + # k-th smallest including self if X is Y at the same row (caller handles +1) + part = np.partition(d2, k-1, axis=1)[:, :k] + return np.sqrt(part.max(axis=1)) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/birth_death.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/birth_death.py new file mode 100644 index 000000000..490f4d61a --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/birth_death.py @@ -0,0 +1,57 @@ +"""Overdamped Langevin on surrogate + kNN birth-death corrector. + +Engine version: takes a Fit. surrogate_prev is ignored (BD doesn't bridge). +""" +import numpy as np +from ._knn import knn_dist + + +def _langevin_step(X, surrogate, eps, prior_box, rng): + g = surrogate.grad(X) + Xp = X + eps * g + np.sqrt(2 * eps) * rng.normal(size=X.shape) + lo = prior_box[:, 0]; hi = prior_box[:, 1] + Xp = np.where(Xp > hi, 2 * hi - Xp, Xp) + Xp = np.where(Xp < lo, 2 * lo - Xp, Xp) + return np.clip(Xp, lo, hi) + + +def _knn_log_density(X, k=5): + from math import lgamma, log, pi + n, d = X.shape + k = min(k, n - 1) + r = knn_dist(X, X, k + 1) + 1e-12 + log_Vd = (d / 2) * log(pi) - lgamma(d / 2 + 1) + return np.log(k) - np.log(n) - log_Vd - d * np.log(r) + + +def iterate(particles, *, surrogate, surrogate_prev=None, + prior_box, rng, state=None, + n_langevin_steps=20, n_bd_passes=3, eps_factor=0.05, + birth_death_rate=1.0, **_): + state = dict(state or {}) + X = np.asarray(particles, dtype=float).copy() + n = len(X) + scale = float(np.sqrt(np.diag(np.cov(X.T)) + 1e-8).mean()) if X.shape[1] > 1 \ + else float(X.std() + 1e-8) + eps = eps_factor * scale**2 + + for _ in range(n_langevin_steps): + X = _langevin_step(X, surrogate, eps, prior_box, rng) + + for _ in range(n_bd_passes): + log_rho = _knn_log_density(X, k=min(5, n - 1)) + log_pi = surrogate.predict(X) + log_pi -= log_pi.max(); log_rho -= log_rho.max() + score = log_rho - log_pi + order = np.argsort(score) + n_swap = max(1, int(round(birth_death_rate * n / 10.0))) + kill = order[-n_swap:] + birth = order[:n_swap] + X[kill] = X[rng.choice(birth, size=n_swap, replace=True)] \ + + 0.05 * scale * rng.normal(size=(n_swap, X.shape[1])) + X = np.clip(X, prior_box[:, 0], prior_box[:, 1]) + for _ in range(3): + X = _langevin_step(X, surrogate, eps, prior_box, rng) + + info = {"state": state, "eps": eps} + return X, info diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala.py new file mode 100644 index 000000000..7f7a4e852 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala.py @@ -0,0 +1,94 @@ +"""Tempered SMC with MALA moves on a pre-built surrogate. + +Engine version: takes Fit objects from RIFT.misc.tracer_placement.fits. +Stateless across calls — caller owns the `state` dict (with `mala_eps`, +optionally other adaptive choices). +""" +import numpy as np + + +def _ess(logw): + w = np.exp(logw - logw.max()) + w /= w.sum() + return 1.0 / np.sum(w * w) + + +def _systematic_resample(logw, rng): + w = np.exp(logw - logw.max()); w /= w.sum() + n = len(w) + u = (rng.uniform() + np.arange(n)) / n + return np.clip(np.searchsorted(np.cumsum(w), u), 0, n - 1) + + +def _mala_step(X, f_eval, f_grad, eps, prior_box, rng): + g = f_grad(X) + prop = X + 0.5 * eps**2 * g + eps * rng.normal(size=X.shape) + lo = prior_box[:, 0]; hi = prior_box[:, 1] + prop = np.where(prop > hi, 2 * hi - prop, prop) + prop = np.where(prop < lo, 2 * lo - prop, prop) + prop = np.clip(prop, lo, hi) + g_prop = f_grad(prop) + log_qf = -np.sum((prop - X - 0.5 * eps**2 * g)**2, axis=1) / (2 * eps**2) + log_qb = -np.sum((X - prop - 0.5 * eps**2 * g_prop)**2, axis=1) / (2 * eps**2) + log_alpha = f_eval(prop) - f_eval(X) + log_qb - log_qf + acc = np.log(rng.uniform(size=len(X))) < log_alpha + return np.where(acc[:, None], prop, X), float(acc.mean()) + + +def iterate(particles, *, surrogate, surrogate_prev=None, + prior_box, rng, state=None, + n_mala_steps=8, target_ess_frac=0.5, + birth_death_rate=0.0, **_): + state = dict(state or {}) + X = np.asarray(particles, dtype=float).copy() + n, _d = X.shape + if state.get("mala_eps") is None: + cov_diag = np.diag(np.cov(X.T)) if X.shape[1] > 1 else np.array([X.var()]) + state["mala_eps"] = 0.3 * float(np.sqrt(cov_diag + 1e-8).mean()) + + accept_log = [] + + if surrogate_prev is not None: + beta = 0.0 + logw = np.zeros(n) + delta_f = surrogate.predict(X) - surrogate_prev.predict(X) + while beta < 1.0: + lo, hi = 0.0, 1.0 - beta + for _ in range(20): + mid = 0.5 * (lo + hi) + if _ess(logw + mid * delta_f) > target_ess_frac * n: + lo = mid + else: + hi = mid + d_beta = lo if lo > 1e-6 else (1.0 - beta) + logw = logw + d_beta * delta_f + beta += d_beta + if _ess(logw) < target_ess_frac * n or beta >= 1.0: + idx = _systematic_resample(logw, rng) + X = X[idx] + logw = np.zeros(n) + delta_f = surrogate.predict(X) - surrogate_prev.predict(X) + + def fe(z, beta=beta, s=surrogate, sp=surrogate_prev): + return beta * s.predict(z) + (1 - beta) * sp.predict(z) + + def fg(z, beta=beta, s=surrogate, sp=surrogate_prev): + return beta * s.grad(z) + (1 - beta) * sp.grad(z) + + for _ in range(n_mala_steps): + X, acc = _mala_step(X, fe, fg, state["mala_eps"], prior_box, rng) + accept_log.append(acc) + state["mala_eps"] *= np.exp(0.1 * (acc - 0.574)) + state["mala_eps"] = float(np.clip(state["mala_eps"], 1e-4, 5.0)) + else: + for _ in range(n_mala_steps): + X, acc = _mala_step(X, surrogate.predict, surrogate.grad, + state["mala_eps"], prior_box, rng) + accept_log.append(acc) + state["mala_eps"] *= np.exp(0.1 * (acc - 0.574)) + state["mala_eps"] = float(np.clip(state["mala_eps"], 1e-4, 5.0)) + + info = {"state": state, + "mean_accept": float(np.mean(accept_log)) if accept_log else float("nan"), + "n_steps": len(accept_log)} + return X, info diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala_bd.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala_bd.py new file mode 100644 index 000000000..a06999f8e --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala_bd.py @@ -0,0 +1,15 @@ +"""SMC-MALA followed by a short birth-death rejuvenation (engine version).""" +from .smc_mala import iterate as _smc_mala +from .birth_death import iterate as _bd + + +def iterate(particles, *, surrogate, surrogate_prev=None, + prior_box, rng, state=None, **kw): + X1, info1 = _smc_mala(particles, surrogate=surrogate, surrogate_prev=surrogate_prev, + prior_box=prior_box, rng=rng, state=state, **kw) + X2, info2 = _bd(X1, surrogate=surrogate, prior_box=prior_box, rng=rng, + state=info1.get("state"), + n_langevin_steps=5, n_bd_passes=2, + birth_death_rate=kw.get("birth_death_rate", 1.0)) + info = {**info1, **info2, "state": info2.get("state", info1.get("state"))} + return X2, info diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/surrogate.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/surrogate.py new file mode 100644 index 000000000..de4345800 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/surrogate.py @@ -0,0 +1,68 @@ +"""Local quadratic surrogate of ln-likelihood, RIFT BayesianLeastSquares-flavored. + +f(x) = a + b^T x + 0.5 x^T C x with Tikhonov regularization on coefficients. +Gradient: b + C x. Closed form. +""" +import numpy as np + +def _design(X): + n, d = X.shape + cols = [np.ones(n), X] # 1, linear + quad = [] + for i in range(d): + for j in range(i, d): + quad.append(X[:, i] * X[:, j]) + # square terms get factor 1, cross-terms factor 1 (we'll handle 0.5 in eval) + cols.append(np.column_stack(quad)) + return np.concatenate([np.ones((n,1)), X, np.column_stack(quad)], axis=1) + +def _coef_unpack(theta, d): + a = theta[0] + b = theta[1:1+d] + # quad terms in order (0,0),(0,1),...,(0,d-1),(1,1),(1,2),... + C = np.zeros((d, d)) + k = 1 + d + for i in range(d): + for j in range(i, d): + v = theta[k]; k += 1 + if i == j: + C[i, i] = 2.0 * v # since 0.5 x^T C x => C_ii contributes 0.5 C_ii x_i^2 + else: + C[i, j] = v + C[j, i] = v + return a, b, C + +class QuadFit: + def __init__(self, X, y, ridge=1e-3, eval_weight=None): + X = np.asarray(X, dtype=float) + y = np.asarray(y, dtype=float) + self.d = X.shape[1] + self.X_mean = X.mean(axis=0) + self.X_std = X.std(axis=0) + 1e-12 + Xn = (X - self.X_mean) / self.X_std + D = _design(Xn) + W = np.eye(D.shape[1]) * ridge + if eval_weight is None: + A = D.T @ D + W + rhs = D.T @ y + else: + wv = np.asarray(eval_weight, dtype=float) + DW = D * wv[:, None] + A = DW.T @ D + W + rhs = DW.T @ y + theta, *_ = np.linalg.lstsq(A, rhs, rcond=None) + self.theta = theta + self.a, self.b_n, self.C_n = _coef_unpack(theta, self.d) + + def _to_normed(self, X): + return (np.atleast_2d(X) - self.X_mean) / self.X_std + + def f(self, X): + Xn = self._to_normed(X) + v = self.a + Xn @ self.b_n + 0.5 * np.einsum('ni,ij,nj->n', Xn, self.C_n, Xn) + return v + + def grad(self, X): + Xn = self._to_normed(X) + gn = self.b_n + Xn @ self.C_n # gradient in normed coords + return gn / self.X_std # back to native coords diff --git a/MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py b/MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py new file mode 100755 index 000000000..bb5cb20d6 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py @@ -0,0 +1,269 @@ +#! /usr/bin/env python +# +# util_HyperparameterTracerUpdate.py +# +# DRAFT - parsimonious-placement project (2026-05-13) +# +# GOAL +# Drop-in alternative to util_HyperparameterPuffball.py for the RIFT hyperpipeline. +# Same I/O contract: read a hyperpipe-format .dat grid (header +# "# lnL sigma_lnL p1 p2 ..."), advance it by tracer SMC + birth-death, write +# a new .dat grid in the same format. +# +# Shared engine with util_ParameterTracerUpdate.py (the event-level cousin): +# both import from RIFT.misc.tracer_placement. +# +# COMPATIBILITY with util_HyperparameterPuffball.py +# --inj-file, --inj-file-out, --puff-factor, --force-away, --parameter, +# --no-correlation, --random-parameter, --random-parameter-range, +# --downselect-parameter, --downselect-parameter-range, --regularize +# same semantics. --update-method puffball reproduces puffball exactly. +# +# NEW +# --update-method {smc-mala-bd, smc-mala, birth-death, puffball} default smc-mala-bd +# --tracer-fit-method {rf, rbf, quadratic, polynomial} default rf +# --inj-file-prev OPTIONAL previous-iteration .dat (enables SMC bridging) +# --no-union-refit opt out of union refit when --inj-file-prev is given +# --n-mala-steps INT default 8 +# --target-ess-frac FLOAT default 0.5 +# --birth-death-rate FLOAT default 1.0 +# --rng-seed INT deterministic when given +# --state-in / --state-out tiny state file (~100 bytes) +# +# I/O FORMAT +# Input/output is the same as util_HyperparameterPuffball.py: a text file with a +# leading "# lnL sigma_lnL p1 p2 ..." header line. Data rows are numeric. +# - X (coord array) is extracted from the parameter columns by name (matching +# --parameter ordering). +# - Y (lnL) and sigma (sigma_lnL) come from the first two columns. +# - Output rows preserve all original columns; only the parameter values are +# updated. The lnL/sigma_lnL columns are zeroed in the output, matching the +# puffball convention (next iteration's marg driver will overwrite them). +# +# USAGE in hyperpipe (Hydra) +# puff: +# exe: util_HyperparameterTracerUpdate.py +# settings: +# update-method: smc-mala-bd +# tracer-fit-method: rf +# +# USAGE in legacy create_eos_posterior_pipeline +# --puff-exe `which util_HyperparameterTracerUpdate.py` +# --puff-args `pwd`/args_puff.txt # contains the flags above +# + +import argparse +import os +import pickle +import sys +import numpy as np + +try: + # Canonical path once the engine is merged into RIFT proper. + from RIFT.misc.tracer_placement import samplers as _tracer_samplers + from RIFT.misc.tracer_placement import fits as _tracer_fits + _TRACER_OK = True +except ImportError: + try: + # Local-dev fallback: PYTHONPATH points at a directory containing + # `tracer_placement/` directly (no RIFT/ wrapper). See the demo Makefile. + from tracer_placement import samplers as _tracer_samplers + from tracer_placement import fits as _tracer_fits + _TRACER_OK = True + except ImportError: + _TRACER_OK = False + + +# --------------------------------- CLI ------------------------------------- # + +def build_parser(): + p = argparse.ArgumentParser(description=__doc__) + # Mirror util_HyperparameterPuffball.py + p.add_argument("--inj-file", required=True, help="Input .dat grid (hyperpipe format).") + p.add_argument("--inj-file-out", default="output-tracer.dat") + p.add_argument("--puff-factor", default=1.0, type=float) + p.add_argument("--force-away", default=0.0, type=float) + p.add_argument("--parameter", action="append", required=True) + p.add_argument("--no-correlation", action="append", type=str) + p.add_argument("--random-parameter", action="append") + p.add_argument("--random-parameter-range", action="append", type=str) + p.add_argument("--downselect-parameter", action="append") + p.add_argument("--downselect-parameter-range", action="append", type=str) + p.add_argument("--regularize", action="store_true") + # Tracer-specific + p.add_argument("--update-method", + choices=("smc-mala-bd", "smc-mala", "birth-death", "puffball"), + default="smc-mala-bd") + p.add_argument("--tracer-fit-method", + choices=("rf", "rbf", "quadratic", "polynomial"), + default="rf") + p.add_argument("--inj-file-prev", default=None, + help="Optional previous-iteration .dat for SMC bridging / union refit.") + p.add_argument("--no-union-refit", action="store_true") + p.add_argument("--n-mala-steps", default=8, type=int) + p.add_argument("--target-ess-frac", default=0.5, type=float) + p.add_argument("--birth-death-rate", default=1.0, type=float) + p.add_argument("--rng-seed", default=None, type=int) + p.add_argument("--state-in", default=None) + p.add_argument("--state-out", default=None) + return p + + +# ----------------------- .dat <-> arrays ----------------------------------- # + +def _read_dat(path): + """Return (column_names, raw_rows ndarray).""" + with open(path) as f: + header = f.readline().rstrip("\n") + if not header.startswith("#"): + sys.exit(f"util_HyperparameterTracerUpdate: input {path!r} missing '#' header") + cols = header.lstrip("#").split() + if len(cols) < 3 or cols[0] != "lnL" or cols[1] not in ("sigma_lnL", "sigma"): + sys.exit(f"util_HyperparameterTracerUpdate: header must start with " + f"'lnL sigma_lnL ...', got {cols!r}") + rows = np.loadtxt(path) + if rows.ndim == 1: + rows = rows[None, :] + return cols, rows + + +def _extract_X(cols, rows, parameter_order): + idx = [cols.index(p) for p in parameter_order] + return rows[:, idx] + + +def _write_dat(path, cols, rows): + np.savetxt(path, rows, header=" ".join(cols)) + + +def _build_downselect(opts): + d = {} + if opts.downselect_parameter: + for name, rng in zip(opts.downselect_parameter, + opts.downselect_parameter_range or []): + d[name] = list(eval(rng)) + return d + + +def _coord_box(parameter_order, downselect_dict, X): + d = len(parameter_order) + box = np.zeros((d, 2)) + for i, name in enumerate(parameter_order): + if name in downselect_dict: + box[i] = downselect_dict[name] + else: + lo = float(X[:, i].min()) + hi = float(X[:, i].max()) + pad = 0.1 * (hi - lo + 1e-9) + box[i] = (lo - pad, hi + pad) + return box + + +# ------------------------------ main --------------------------------------- # + +def main(argv=None): + opts = build_parser().parse_args(argv) + rng = np.random.default_rng(opts.rng_seed) + + cols, rows = _read_dat(opts.inj_file) + X = _extract_X(cols, rows, opts.parameter) + Y = rows[:, 0] # lnL column + S = rows[:, 1] if rows.shape[1] >= 2 else None + downselect = _build_downselect(opts) + prior_box = _coord_box(opts.parameter, downselect, X) + + method = opts.update_method + + # ---- puffball regression path -------------------------------------- # + if method == "puffball": + cov = (np.cov(X.T) * opts.puff_factor**2 if X.shape[1] > 1 + else np.array([[X.std() ** 2]])) + cov = np.atleast_2d(cov) + if np.min(np.linalg.eigvalsh(cov)) < 1e-12: + cov = cov + 1e-8 * np.eye(cov.shape[0]) + delta = rng.multivariate_normal(np.zeros(X.shape[1]), cov, size=len(X)) + X_out = X + delta + # write back into rows; zero lnL/sigma (puffball convention) + out_rows = rows.copy() + for i, name in enumerate(opts.parameter): + out_rows[:, cols.index(name)] = X_out[:, i] + out_rows[:, 0] = 0.0 + out_rows[:, 1] = 0.0 + _write_dat(opts.inj_file_out, cols, out_rows) + return + + # ---- tracer path --------------------------------------------------- # + if not _TRACER_OK: + sys.stderr.write("util_HyperparameterTracerUpdate: RIFT.misc.tracer_placement " + "not installed; falling back to puffball.\n") + opts.update_method = "puffball" + return main(argv) + + X_train, Y_train, S_train = X, Y, S + fit_prev = None + + if opts.inj_file_prev is not None and os.path.exists(opts.inj_file_prev): + cols_p, rows_p = _read_dat(opts.inj_file_prev) + X_prev = _extract_X(cols_p, rows_p, opts.parameter) + Y_prev = rows_p[:, 0] + S_prev = rows_p[:, 1] if rows_p.shape[1] >= 2 else None + fit_prev = _tracer_fits.build(opts.tracer_fit_method, + X_prev, Y_prev, sigma=S_prev) + if not opts.no_union_refit: + X_train = np.vstack([X_prev, X]) + Y_train = np.concatenate([Y_prev, Y]) + if S is not None and S_prev is not None: + S_train = np.concatenate([S_prev, S]) + else: + S_train = None + + fit_now = _tracer_fits.build(opts.tracer_fit_method, + X_train, Y_train, sigma=S_train) + + state = {} + if opts.state_in and os.path.exists(opts.state_in): + with open(opts.state_in, "rb") as f: + state = pickle.load(f) + + sampler = { + "smc-mala-bd": _tracer_samplers.smc_mala_bd, + "smc-mala": _tracer_samplers.smc_mala, + "birth-death": _tracer_samplers.birth_death, + }[method] + + X_out, info = sampler( + particles=X, + surrogate=fit_now, + surrogate_prev=fit_prev, + prior_box=prior_box, + rng=rng, + n_mala_steps=opts.n_mala_steps, + target_ess_frac=opts.target_ess_frac, + birth_death_rate=opts.birth_death_rate, + state=state, + ) + + if opts.state_out: + with open(opts.state_out, "wb") as f: + pickle.dump(info.get("state", {}), f) + + # Apply downselect manually (puffball-equivalent behavior) + if downselect: + mask = np.ones(len(X_out), dtype=bool) + for k, (lo, hi) in downselect.items(): + if k in opts.parameter: + col = opts.parameter.index(k) + mask &= (X_out[:, col] >= lo) & (X_out[:, col] <= hi) + X_out = X_out[mask] + + out_rows = np.zeros((len(X_out), rows.shape[1])) + # carry forward any extra columns from input rows (just zero them; marg driver overwrites) + for i, name in enumerate(opts.parameter): + out_rows[:, cols.index(name)] = X_out[:, i] + out_rows[:, 0] = 0.0 + out_rows[:, 1] = 0.0 + _write_dat(opts.inj_file_out, cols, out_rows) + + +if __name__ == "__main__": + main() diff --git a/MonteCarloMarginalizeCode/Code/bin/util_ParameterTracerUpdate.py b/MonteCarloMarginalizeCode/Code/bin/util_ParameterTracerUpdate.py new file mode 100755 index 000000000..12da709c8 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/bin/util_ParameterTracerUpdate.py @@ -0,0 +1,342 @@ +#! /usr/bin/env python +# +# util_ParameterTracerUpdate.py +# +# DRAFT - parsimonious-placement project (rev. 2026-05-13) +# +# GOAL +# Drop-in alternative to util_ParameterPuffball.py for iterative grid update. +# Instead of resampling-from-fit + Gaussian jitter, advance the existing grid +# through a tracer-particle update: tempered SMC with MALA moves on a surrogate, +# plus optional birth-death rejuvenation. The tool REFITS its own surrogate from +# the same .dat file CIP reads, so it is fully self-contained (no model files +# passed between jobs, no CIP coupling, no extra OSG transfers). +# +# COMPATIBILITY +# --parameter, --inj-file, --inj-file-out, --fref, --fmin, --random-parameter, +# --random-parameter-range, --mc-range, --eta-range, --mtot-range, +# --downselect-parameter, --downselect-parameter-range, --reflect-parameter, +# --enforce-duration-bound : same semantics as util_ParameterPuffball.py. +# +# DATA-INGEST (mirrors CIP) +# --fname path to current iteration's ILE composite .dat file +# --fname-prev optional, previous iteration's .dat (enables SMC bridge) +# --lnL-col column index of lnL (caller specifies; matches CIP's expectations) +# --sigma-col optional column index of sigma_lnL +# +# NEW +# --update-method {smc-mala-bd, smc-mala, birth-death, puffball} +# Default smc-mala-bd. "puffball" reproduces util_ParameterPuffball.py for regression. +# --tracer-fit-method {rf, rbf, quadratic, polynomial} default rf +# --no-union-refit if --fname-prev given, do NOT include prev points in f_k fit +# --n-mala-steps INT default 8 +# --target-ess-frac FLOAT default 0.5 +# --birth-death-rate FLOAT default 1.0 +# --rng-seed INT deterministic when given +# --state-in tiny pickle: {mala_eps, last_rng_state} (~100 bytes) +# --state-out where to dump updated state for the next iteration +# +# USAGE EXAMPLE +# util_ParameterTracerUpdate.py \ +# --parameter mc --parameter eta --parameter chi_eff --parameter chi_p \ +# --inj-file overlap-grid_k.xml.gz --inj-file-out overlap-grid_kp1 \ +# --fname all_lnL_k.dat --fname-prev all_lnL_km1.dat \ +# --lnL-col 9 \ +# --update-method smc-mala-bd --tracer-fit-method rf \ +# --mc-range '[20,40]' --eta-range '[0.10,0.25]' \ +# --state-in tracer_state_k.pkl --state-out tracer_state_kp1.pkl +# +# NOTES +# - This is a draft skeleton. The sampler engine lives in +# parsimonious_placement_plan.md's proto/samplers/. For production it should be +# promoted into RIFT.misc.tracer_placement and imported here. +# - Surrogate refitting happens inside this tool every call. RF and RBF refits on +# the typical (X, lnL) size (10^2 - 10^4 points) cost seconds; negligible vs ILE. +# + +import argparse +import os +import pickle +import sys +import numpy as np + +import RIFT.lalsimutils as lalsimutils +import lalsimulation as lalsim # noqa: F401 +import lal + +try: + # Canonical path once the engine is merged into RIFT proper. + from RIFT.misc.tracer_placement import samplers as _tracer_samplers + from RIFT.misc.tracer_placement import fits as _tracer_fits + _TRACER_OK = True +except ImportError: + try: + # Local-dev fallback: PYTHONPATH points at a directory containing + # `tracer_placement/` directly (no RIFT/ wrapper). See the demo Makefile. + from tracer_placement import samplers as _tracer_samplers + from tracer_placement import fits as _tracer_fits + _TRACER_OK = True + except ImportError: + _TRACER_OK = False + +from igwn_ligolw import lsctables, ligolw # noqa: F401 +lsctables.use_in(ligolw.LIGOLWContentHandler) + + +# --------------------------------- CLI ------------------------------------- # + +def build_parser(): + p = argparse.ArgumentParser(description=__doc__) + # Mirror util_ParameterPuffball.py + p.add_argument("--inj-file", required=True) + p.add_argument("--inj-file-out", default="output-tracer") + p.add_argument("--fail-if-empty", action="store_true") + p.add_argument("--approx-output", default="SEOBNRv2") + p.add_argument("--fref", default=None, type=float) + p.add_argument("--fmin", default=None, type=float) + p.add_argument("--parameter", action="append", required=True) + p.add_argument("--random-parameter", action="append") + p.add_argument("--random-parameter-range", action="append", type=str) + p.add_argument("--mc-range", default=None) + p.add_argument("--eta-range", default=None) + p.add_argument("--mtot-range", default=None) + p.add_argument("--downselect-parameter", action="append") + p.add_argument("--downselect-parameter-range", action="append", type=str) + p.add_argument("--reflect-parameter", action="append", type=str) + p.add_argument("--enforce-duration-bound", default=None, type=float) + # Data ingest (mirror CIP) + p.add_argument("--fname", required=True, + help="Current iteration ILE composite .dat (same file CIP reads).") + p.add_argument("--fname-prev", default=None, + help="Optional previous iteration .dat; enables SMC bridging.") + p.add_argument("--lnL-col", type=int, required=True, + help="Column index of lnL in --fname (caller specifies, matches CIP).") + p.add_argument("--sigma-col", type=int, default=None, + help="Optional column index of sigma_lnL.") + p.add_argument("--lnL-downscale-factor", type=float, default=1.0) + # Tracer-specific + p.add_argument("--update-method", + choices=("smc-mala-bd", "smc-mala", "birth-death", "puffball"), + default="smc-mala-bd") + p.add_argument("--tracer-fit-method", + choices=("rf", "rbf", "quadratic", "polynomial"), + default="rf") + p.add_argument("--no-union-refit", action="store_true", + help="If --fname-prev is given, do NOT include those points in the f_k fit.") + p.add_argument("--n-mala-steps", default=8, type=int) + p.add_argument("--target-ess-frac", default=0.5, type=float) + p.add_argument("--birth-death-rate", default=1.0, type=float) + p.add_argument("--rng-seed", default=None, type=int) + p.add_argument("--state-in", default=None) + p.add_argument("--state-out", default=None) + # Back-compat with puffball for the regression path + p.add_argument("--puff-factor", default=1.0, type=float) + p.add_argument("--force-away", default=0.0, type=float) + p.add_argument("--regularize", action="store_true") + p.add_argument("--no-correlation", type=str, action="append") + return p + + +# --------------------- XML -> X (coord array) ------------------------------ # + +def _xml_to_X(opts, coord_names): + P_list = lalsimutils.xml_to_ChooseWaveformParams_array(opts.inj_file) + rows = [] + for P in P_list: + if opts.fmin is not None: + P.fmin = opts.fmin + if opts.fref is not None: + P.fref = opts.fref + line = np.zeros(len(coord_names)) + for i, name in enumerate(coord_names): + fac = lal.MSUN_SI if name in ("mc", "m1", "m2", "mtot") else 1.0 + line[i] = P.extract_param(name) / fac + rows.append(line) + return np.array(rows), P_list + + +def _X_to_xml(X_out, P_template_list, opts, coord_names, downselect_dict): + P_out_list = [] + n = min(len(P_template_list), len(X_out)) + for i in range(n): + P = P_template_list[i] + for j, name in enumerate(coord_names): + fac = lal.MSUN_SI if name in ("mc", "m1", "m2", "mtot") else 1.0 + P.assign_param(name, X_out[i, j] * fac) + keep = True + for k, (lo, hi) in downselect_dict.items(): + v = P.extract_param(k) + if k in ("mc", "m1", "m2", "mtot"): + v /= lal.MSUN_SI + if v < lo or v > hi: + keep = False + break + if opts.enforce_duration_bound is not None: + if lalsimutils.estimateWaveformDuration(P) > opts.enforce_duration_bound: + keep = False + if keep: + P_out_list.append(P) + if opts.fail_if_empty and not P_out_list: + sys.stderr.write("tracer: produced empty grid; aborting.\n") + sys.exit(2) + fref = P_out_list[0].fref if P_out_list else opts.fref + lalsimutils.ChooseWaveformParams_array_to_xml( + P_out_list, fname=opts.inj_file_out, fref=fref) + + +def _build_downselect(opts): + d = {} + if opts.mc_range is not None: + d["mc"] = list(eval(opts.mc_range)) + if opts.eta_range is not None: + d["eta"] = list(eval(opts.eta_range)) + if opts.mtot_range is not None: + d["mtot"] = list(eval(opts.mtot_range)) + if opts.downselect_parameter: + for name, rng in zip(opts.downselect_parameter, opts.downselect_parameter_range or []): + d[name] = list(eval(rng)) + return d + + +def _coord_box(coord_names, downselect_dict, X): + d = len(coord_names) + box = np.zeros((d, 2)) + for i, name in enumerate(coord_names): + if name in downselect_dict: + box[i] = downselect_dict[name] + else: + box[i] = (float(X[:, i].min()), float(X[:, i].max())) + return box + + +# --------------- .dat -> (X, Y, sigma) in coord-frame ---------------------- # +# +# We need (X, Y) in the SAME coordinate frame the sampler will move in. The .dat +# columns are not in coord-frame in general; CIP does its own conversion. We use +# the simplest robust approach: match each .dat row to a P in the input XML by +# row order (the standard RIFT contract), then read coord-frame X from the XML +# and lnL from the matched .dat row. +# +# When --fname-prev is used, the caller must also pass a previous-grid XML +# (--inj-file-prev) so we can do the same matching for f_{k-1}. To keep the CLI +# minimal in this draft we expect the convention that --fname-prev sits next to +# overlap-grid_{k-1}.xml.gz in the same directory (a one-line lookup) — TODO +# in production: add --inj-file-prev. + +def _load_lnL_for_grid(fname_dat, lnL_col, sigma_col, n_points, downscale): + arr = np.loadtxt(fname_dat) + if arr.ndim == 1: + arr = arr[None, :] + if len(arr) < n_points: + sys.stderr.write( + f"tracer: .dat has fewer rows ({len(arr)}) than XML grid ({n_points}); " + "alignment assumption (row order) violated.\n") + sys.exit(3) + Y = arr[:n_points, lnL_col] * downscale + if sigma_col is not None: + S = arr[:n_points, sigma_col] + else: + S = None + return Y, S + + +# ------------------------------- main -------------------------------------- # + +def main(argv=None): + opts = build_parser().parse_args(argv) + coord_names = list(opts.parameter) + downselect = _build_downselect(opts) + + X, P_template = _xml_to_X(opts, coord_names) + if X.size == 0: + sys.stderr.write("tracer: empty input grid; aborting.\n") + sys.exit(2) + prior_box = _coord_box(coord_names, downselect, X) + rng = np.random.default_rng(opts.rng_seed) + + method = opts.update_method + + # ---- puffball regression path -------------------------------------- # + if method == "puffball": + cov = np.cov(X.T) * opts.puff_factor**2 if X.shape[1] > 1 else np.array([[X.std()**2]]) + cov = np.atleast_2d(cov) + delta = rng.multivariate_normal(np.zeros(X.shape[1]), cov, size=len(X)) + X_out = X + delta + _X_to_xml(X_out, P_template, opts, coord_names, downselect) + return + + # ---- tracer path: refit our own surrogate from .dat ---------------- # + if not _TRACER_OK: + sys.stderr.write("tracer: RIFT.misc.tracer_placement not installed; " + "falling back to puffball.\n") + opts.update_method = "puffball" + return main(argv) + + Y_k, S_k = _load_lnL_for_grid(opts.fname, opts.lnL_col, opts.sigma_col, + len(X), opts.lnL_downscale_factor) + + X_train_k = X + Y_train_k = Y_k + S_train_k = S_k + X_prev = None + fit_prev = None + + if opts.fname_prev is not None: + # Read previous-iteration grid + lnL. Production CLI should accept + # --inj-file-prev explicitly; here we follow the naming convention. + inj_prev = opts.fname_prev.replace(".dat", ".xml.gz") + if not os.path.exists(inj_prev): + sys.stderr.write( + f"tracer: cannot locate previous grid XML ({inj_prev}); " + "ignoring --fname-prev and running unbridged.\n") + else: + opts_prev = argparse.Namespace(**vars(opts)) + opts_prev.inj_file = inj_prev + X_prev, _ = _xml_to_X(opts_prev, coord_names) + Y_prev, S_prev = _load_lnL_for_grid( + opts.fname_prev, opts.lnL_col, opts.sigma_col, + len(X_prev), opts.lnL_downscale_factor) + if not opts.no_union_refit: + X_train_k = np.vstack([X_prev, X]) + Y_train_k = np.concatenate([Y_prev, Y_k]) + S_train_k = (np.concatenate([S_prev, S_k]) + if (S_prev is not None and S_k is not None) else None) + # f_{k-1} fit on prior data only + fit_prev = _tracer_fits.build(opts.tracer_fit_method, + X_prev, Y_prev, sigma=S_prev) + + fit_now = _tracer_fits.build(opts.tracer_fit_method, + X_train_k, Y_train_k, sigma=S_train_k) + + state = {} + if opts.state_in and os.path.exists(opts.state_in): + with open(opts.state_in, "rb") as f: + state = pickle.load(f) + + sampler = { + "smc-mala-bd": _tracer_samplers.smc_mala_bd, + "smc-mala": _tracer_samplers.smc_mala, + "birth-death": _tracer_samplers.birth_death, + }[method] + + X_out, info = sampler( + particles=X, + surrogate=fit_now, + surrogate_prev=fit_prev, + prior_box=prior_box, + rng=rng, + n_mala_steps=opts.n_mala_steps, + target_ess_frac=opts.target_ess_frac, + birth_death_rate=opts.birth_death_rate, + state=state, + ) + + if opts.state_out: + with open(opts.state_out, "wb") as f: + pickle.dump(info.get("state", {}), f) + + _X_to_xml(X_out, P_template, opts, coord_names, downselect) + + +if __name__ == "__main__": + main() From 39f1b549265ed3ae512cde604a6c0bfcba237307 Mon Sep 17 00:00:00 2001 From: Richard O'Shaughnessy Date: Thu, 14 May 2026 08:08:19 -0400 Subject: [PATCH 06/24] create_eos_posterior_pipeline: add --tracer-only-marg cost-halving mode Optional workflow for use with the tracer-placement tools added in the previous commit (util_HyperparameterTracerUpdate.py / util_ParameterTracerUpdate.py). Two new CLI flags: --tracer-only-marg Skip MARG_* (grid-X.dat) nodes on non-final iterations; only MARG_PUFF_* (grid_puff-X.dat) runs. Halves the per-iteration likelihood-evaluation cost when the puff'd grid is itself already a posterior-tracking ensemble (which it is under tracer placement). --tracer-final-marg-iterations N (default 1) Number of trailing iterations on which MARG still runs, so the final-posterior diagnostic stays based on a cleanly-sampled grid. Set to 0 to skip MARG entirely. Gate is conservative: iteration 0 always runs MARG (no grid_puff-0.dat exists yet), and the last N iterations always run MARG. With the default N=1 and 5 total iterations, MARG runs on iters {0, 4} and is skipped on {1, 2, 3} -- a 1.8x reduction in MARG-equivalent evaluations. Default behaviour is unchanged: --tracer-only-marg is False unless explicitly set, so the gate collapses to a no-op for every existing pipeline. The only structural change in the DAG generator is a conditional around the MARG_* node-creation block in the iteration loop (with a print() noting the skip for easy log diagnosis). No changes to MARG_PUFF wiring, consolidation, EOS-post, or anything else. Rationale and worked example in 20260513-Me-ParsimoniousPlacementOptions/rift_integration/workflow_tracer_only_marg.md. --- .../Code/bin/create_eos_posterior_pipeline | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline index 14798db9e..b13462937 100644 --- a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline +++ b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline @@ -42,6 +42,10 @@ parser.add_argument("--request-memory-marg",default=16384,type=int,help="Memory parser.add_argument("--puff-exe",default=None,help="similar to PUFF, but cannot be the same executable since format different") parser.add_argument("--puff-args",default=None,help=" argumetns") parser.add_argument("--puff-max-it",default=np.inf,type=int,help="Maximum iteration number that puffball is applied. default to infinity ") +parser.add_argument("--tracer-only-marg", action='store_true', + help="Tracer-placement workflow: skip MARG (grid-X.dat) jobs on non-final iterations and only run MARG_PUFF (grid_puff-X.dat). Use only when --puff-exe points at a tracer-aware grid update tool (util_HyperparameterTracerUpdate.py / util_ParameterTracerUpdate.py). See workflow_tracer_only_marg.md in the parsimonious-placement project notes for the rationale.") +parser.add_argument("--tracer-final-marg-iterations", type=int, default=1, + help="Number of trailing iterations on which MARG still runs even with --tracer-only-marg (needed for a clean posterior diagnostic on the final iteration). Default 1; set 0 to use only accumulated MARG_PUFF outputs throughout.") parser.add_argument("--eos-post-args",default=None,help="filename of args_ile.txt file which holds ILE arguments. Should NOT conflict with arguments auto-set by this DAG ... in particular, i/o arguments will be modified") parser.add_argument("--eos-post-exe",default=None,help="filename of ILE or equivalent executable. Will default to `which integrate_likelihood_extrinsic` in low-level code") parser.add_argument("--eos-post-retries",default=0,type=int,help="Number of retry attempts for ILE jobs. (These can fail)") @@ -581,20 +585,34 @@ for it in np.arange(it_start,opts.n_iterations): con_node.add_macro("macroiteration",it) con_node.add_macro("macroevent",int(event)) con_node.set_retry(opts.general_retries) + # --- tracer-only-marg gate --------------------------------------------- + # Skip the regular MARG_* node (grid-X.dat) on non-final iterations when the + # user has opted into tracer-only mode. Iteration 0 must still run MARG since + # no grid_puff-0.dat exists yet, and the last --tracer-final-marg-iterations + # iterations run MARG so the final-posterior diagnostic stays clean. + _n_final_marg = max(0, int(getattr(opts, "tracer_final_marg_iterations", 1))) + _skip_marg_this_iter = ( + getattr(opts, "tracer_only_marg", False) + and it > it_start + and it < (opts.n_iterations - _n_final_marg) + ) + if _skip_marg_this_iter: + print(" tracer-only-marg: skipping MARG_* nodes for iteration", it) for row in np.arange(indx_max_event): #np.arange(n_jobs_this_time): # Add task per MARG operation - if opts.marg_event_args: - cip_node = pipeline.CondorDAGNode(cip_job) - else: - cip_node = pipeline.CondorDAGNode(cip_job_list[event]) - cip_node.set_retry(opts.general_retries) - cip_node.add_macro("macroid",event) - cip_node.add_macro("macroevent", int(row*n_chunk_here)) - cip_node.add_macro("macroiteration", it) - if not(parent_fit_node is None): - cip_node.add_parent(parent_fit_node) - con_node.add_parent(cip_node) # consolidate depends on all of the individual jobs - dag.add_node(cip_node) + if not _skip_marg_this_iter: + if opts.marg_event_args: + cip_node = pipeline.CondorDAGNode(cip_job) + else: + cip_node = pipeline.CondorDAGNode(cip_job_list[event]) + cip_node.set_retry(opts.general_retries) + cip_node.add_macro("macroid",event) + cip_node.add_macro("macroevent", int(row*n_chunk_here)) + cip_node.add_macro("macroiteration", it) + if not(parent_fit_node is None): + cip_node.add_parent(parent_fit_node) + con_node.add_parent(cip_node) # consolidate depends on all of the individual jobs + dag.add_node(cip_node) # if PUFF is done for this event, also add MARG_PUFF nodes. Note grid size assumed identical, so don't need another loop over 'id'/row if puff_args and puff_cadence: if it>it_start and it <= puff_max_it and (it-1)%puff_cadence ==0: # we made a puffball last iteration, so run it through ILE now From 01c2cf57a8596d8ce9390b17c8f80effcbf052cc Mon Sep 17 00:00:00 2001 From: Richard O'Shaughnessy Date: Thu, 14 May 2026 13:37:48 -0400 Subject: [PATCH 07/24] Add RIFT.misc.tracer_placement engine + util_*TracerUpdate.py tools (preview) Parsimonious-placement project: tracer-particle iterative grid update intended to reduce the number of expensive likelihood evaluations required at each adaptive iteration. New code: - RIFT.misc.tracer_placement: engine package with SMC+MALA, birth-death, and SMC-MALA+BD samplers; pluggable RF / RBF / polynomial / quadratic fits via tracer_placement.fits.build(method, X, Y, sigma=None). - bin/util_HyperparameterTracerUpdate.py: drop-in alternative to util_HyperparameterPuffball.py for the hyperpipeline. - bin/util_ParameterTracerUpdate.py: drop-in alternative to util_ParameterPuffball.py for the event-level iterator. Both tools refit their own surrogate from the same .dat file CIP reads (no CIP coupling, no surrogate-pickle transfers, no DAG-generator changes required for the basic drop-in workflow). They also accept --update-method puffball as an exact-regression fallback so the behaviour reduces to the existing puffball when needed. Default pipeline behaviour is unchanged: nothing invokes the new tools unless the user points --puff-exe at them. Background / theory notes / Tier-1 prototype results / rollout plan live in the parsimonious-placement project folder (20260513-Me-ParsimoniousPlacementOptions/parsimonious_placement_plan.md). A follow-up commit adds the create_eos_posterior_pipeline patch for the optional --tracer-only-marg cost-halving workflow. --- CHANGES.rst | 15 +- .../Code/RIFT/misc/tracer_placement/README.md | 54 +++ .../RIFT/misc/tracer_placement/__init__.py | 19 + .../misc/tracer_placement/fits/__init__.py | 11 + .../RIFT/misc/tracer_placement/fits/_base.py | 22 ++ .../misc/tracer_placement/fits/_dispatch.py | 16 + .../misc/tracer_placement/fits/_polynomial.py | 40 ++ .../misc/tracer_placement/fits/_quadratic.py | 17 + .../RIFT/misc/tracer_placement/fits/_rbf.py | 24 ++ .../RIFT/misc/tracer_placement/fits/_rf.py | 23 ++ .../tracer_placement/samplers/__init__.py | 17 + .../misc/tracer_placement/samplers/_knn.py | 15 + .../tracer_placement/samplers/birth_death.py | 57 +++ .../tracer_placement/samplers/smc_mala.py | 94 +++++ .../tracer_placement/samplers/smc_mala_bd.py | 15 + .../tracer_placement/samplers/surrogate.py | 68 ++++ .../bin/util_HyperparameterTracerUpdate.py | 269 ++++++++++++++ .../Code/bin/util_ParameterTracerUpdate.py | 342 ++++++++++++++++++ 18 files changed, 1117 insertions(+), 1 deletion(-) create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/README.md create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/__init__.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/__init__.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_dispatch.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_polynomial.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_quadratic.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rbf.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/_knn.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/birth_death.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala_bd.py create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/surrogate.py create mode 100755 MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py create mode 100755 MonteCarloMarginalizeCode/Code/bin/util_ParameterTracerUpdate.py diff --git a/CHANGES.rst b/CHANGES.rst index 78a9bad3a..7f575837b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,7 +5,20 @@ development tree is rift_O4d. - CIP hyperpipe improvements (initialize_me; enable population and EOS params in using_eos file with arbitrary labelled parameters); CIP xgboost gp fit (from Aasim); install make 'precession' optional; see also 0.0.17.4rc0 igwn-ligolw - + - parsimonious placement (preview): new RIFT.misc.tracer_placement engine package (SMC+MALA / birth-death / + SMC-MALA+BD samplers with pluggable RF / RBF / polynomial / quadratic surrogate fits) plus two thin + command-line drivers, bin/util_HyperparameterTracerUpdate.py (hyperpipe, .dat I/O) and + bin/util_ParameterTracerUpdate.py (event-level, XML I/O via lalsimutils). Both are drop-in alternatives to + util_HyperparameterPuffball.py / util_ParameterPuffball.py: legacy default behavior is unchanged --- the new + tools are only invoked when --puff-exe points at them. create_eos_posterior_pipeline gains + --tracer-only-marg and --tracer-final-marg-iterations to optionally skip MARG_* (posterior) nodes in + intermediate iterations and rely on MARG_PUFF (placement) jobs alone, recovering MARG only for the final N + iterations. util_RIFT_hyperpipe.py forwards the same flags via arch.tracer-only-marg / + arch.tracer-final-marg-iterations and exposes a puff.settings: block mirroring post.settings: so tracer + sampler hyperparameters can be set declaratively in Hydra. See + 20260513-Me-ParsimoniousPlacementOptions/parsimonious_placement_plan.md for theory, prototype results, + rollout plan, and references. + 0.0.17.9 ------------ development tree is rift_O4c_staging -> rift_O4c; draft MR notes at diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/README.md b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/README.md new file mode 100644 index 000000000..14cf4fcc4 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/README.md @@ -0,0 +1,54 @@ +# RIFT.misc.tracer_placement (phase H0 — engine layout) + +This is the shared engine package for tracer-based iterative grid placement. +It is consumed by: + +- `util_ParameterTracerUpdate.py` (event-level RIFT) +- `util_HyperparameterTracerUpdate.py` (RIFT hyperpipeline) + +Both tools are thin I/O wrappers; the math lives here. + +## Layout + +``` +samplers/ — kernels with the production signature + smc_mala.py — tempered SMC + MALA moves; accepts surrogate, surrogate_prev + birth_death.py — Langevin + kNN birth-death corrector + smc_mala_bd.py — composite: smc_mala then birth_death rejuvenation + surrogate.py — legacy in-engine quadratic helper (used by toys, not by tools) + _knn.py — numpy-only kNN helpers (no scipy) +fits/ — surrogate builders the tools call + __init__.py — exposes build(method, X, Y, sigma=None) + _rf.py — RandomForest (default, production) + _rbf.py — scipy RBFInterpolator + _quadratic.py — Tikhonov-regularized quadratic (smoke tests only) + _polynomial.py — degree-N polynomial (default 3) + _base.py — FitBase with FD gradient +``` + +## Sampler signature + +All three samplers expose: + + iterate(particles, *, surrogate, surrogate_prev=None, + prior_box, rng, state=None, **kw) -> (X_new, info) + +`info` always contains `"state"` (the updated state dict the caller must persist +between iterations for adaptive step-size etc.). + +## Status + +- [x] H0: layout, sampler signature unification, no module-level state. +- [ ] H1: drop into util_HyperparameterTracerUpdate.py end-to-end on the demo + hyperpipe Gaussian. Validate the bimodal demo path. +- [ ] H2: full toy test suite per `../test_suite_hyperpipe.md`. + +## Promotion to RIFT + +When this package is promoted, the target path is +`MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/`. The two tools +should import as: + + from RIFT.misc.tracer_placement import samplers, fits + +No other RIFT changes are required (see ../decision_no_cip_changes.md). diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/__init__.py new file mode 100644 index 000000000..fb47ee49b --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/__init__.py @@ -0,0 +1,19 @@ +"""RIFT.misc.tracer_placement — engine for tracer-particle iterative placement. + +Drop-in alternative to the puffball grid update used by both the event-level +RIFT iterator and the hyperpipeline. The math is shared; the file I/O wrapper +lives in the two thin command-line tools: + + util_ParameterTracerUpdate.py (event-level; XML I/O via lalsimutils) + util_HyperparameterTracerUpdate.py (hyperpipeline; .dat I/O) + +Public API: + samplers.smc_mala_bd(particles, surrogate, surrogate_prev, prior_box, rng, **kw) + -> (X_new, info) + samplers.smc_mala(...) -> (X_new, info) + samplers.birth_death(...) -> (X_new, info) + fits.build(method, X, Y, sigma=None) -> Fit (callable + .grad helper) +""" +from . import samplers, fits + +__all__ = ["samplers", "fits"] diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/__init__.py new file mode 100644 index 000000000..6d246e596 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/__init__.py @@ -0,0 +1,11 @@ +"""Fits for the tracer engine. + +Public entry point: build(method, X, Y, sigma=None) -> Fit. + +Fit objects expose: + .predict(Z) -> ndarray of len(Z) + .grad(Z) -> ndarray (len(Z), d) (analytic where available, FD otherwise) +""" +from ._dispatch import build + +__all__ = ["build"] diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py new file mode 100644 index 000000000..b3b1574b6 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py @@ -0,0 +1,22 @@ +"""Common Fit base class. Provides finite-difference gradient as a default.""" +import numpy as np + + +class FitBase: + """Subclasses must implement .predict(Z). Optionally override .grad(Z).""" + + def predict(self, Z): + raise NotImplementedError + + def f(self, Z): + return self.predict(Z) + + def grad(self, Z, eps=1e-3): + Z = np.atleast_2d(Z) + d = Z.shape[1] + out = np.zeros_like(Z) + for k in range(d): + zp = Z.copy(); zp[:, k] += eps + zm = Z.copy(); zm[:, k] -= eps + out[:, k] = (self.predict(zp) - self.predict(zm)) / (2 * eps) + return out diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_dispatch.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_dispatch.py new file mode 100644 index 000000000..ed781f2c1 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_dispatch.py @@ -0,0 +1,16 @@ +"""build(method, X, Y, sigma=None) -> Fit.""" +def build(method, X, Y, sigma=None, **kw): + method = method.lower() + if method == "rf": + from ._rf import RandomForestFit + return RandomForestFit(X, Y, sigma=sigma, **kw) + if method == "rbf": + from ._rbf import RBFFit + return RBFFit(X, Y, sigma=sigma, **kw) + if method == "quadratic": + from ._quadratic import QuadraticFit + return QuadraticFit(X, Y, sigma=sigma, **kw) + if method == "polynomial": + from ._polynomial import PolynomialFit + return PolynomialFit(X, Y, sigma=sigma, **kw) + raise ValueError(f"unknown fit method {method!r}") diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_polynomial.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_polynomial.py new file mode 100644 index 000000000..dd6e2cfd2 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_polynomial.py @@ -0,0 +1,40 @@ +"""Degree-N polynomial fit (default degree 3). Kept available; quadratic is the +degree-2 special case via _quadratic.py.""" +import numpy as np +from ._base import FitBase + + +class PolynomialFit(FitBase): + def __init__(self, X, Y, sigma=None, degree=3, ridge=1e-3): + from itertools import combinations_with_replacement + X = np.asarray(X, float); Y = np.asarray(Y, float) + self.d = X.shape[1]; self.degree = degree + # mean-normalize + self.mu = X.mean(axis=0); self.sd = X.std(axis=0) + 1e-12 + Xn = (X - self.mu) / self.sd + terms = [] + self._idx = [] # list of tuples of column indices in each term + for k in range(degree + 1): + for combo in combinations_with_replacement(range(self.d), k): + self._idx.append(combo) + terms.append(np.prod(Xn[:, combo], axis=1) if combo else np.ones(len(Xn))) + D = np.column_stack(terms) + W = ridge * np.eye(D.shape[1]) + if sigma is None: + self._theta = np.linalg.lstsq(D.T @ D + W, D.T @ Y, rcond=None)[0] + else: + w = 1.0 / (np.asarray(sigma) ** 2 + 1e-12) + DW = D * w[:, None] + self._theta = np.linalg.lstsq(DW.T @ D + W, DW.T @ Y, rcond=None)[0] + + def _design(self, Zn): + from itertools import combinations_with_replacement + cols = [] + for combo in self._idx: + cols.append(np.prod(Zn[:, combo], axis=1) if combo else np.ones(len(Zn))) + return np.column_stack(cols) + + def predict(self, Z): + Z = np.atleast_2d(Z) + Zn = (Z - self.mu) / self.sd + return self._design(Zn) @ self._theta diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_quadratic.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_quadratic.py new file mode 100644 index 000000000..0330fd6e2 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_quadratic.py @@ -0,0 +1,17 @@ +"""Cheap quadratic fit (Tikhonov-regularized). Not for production per +2026-05-13 design call; kept available for unit tests + smoke runs.""" +import numpy as np +from ._base import FitBase +from ..samplers.surrogate import QuadFit as _QuadFit + + +class QuadraticFit(FitBase): + def __init__(self, X, Y, sigma=None, ridge=1e-3): + weight = None if sigma is None else 1.0 / (np.asarray(sigma) ** 2 + 1e-12) + self._inner = _QuadFit(X, Y, ridge=ridge, eval_weight=weight) + + def predict(self, Z): + return self._inner.f(Z) + + def grad(self, Z, eps=None): + return self._inner.grad(Z) # analytic diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rbf.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rbf.py new file mode 100644 index 000000000..f4b9a8bc1 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rbf.py @@ -0,0 +1,24 @@ +"""RBF interpolator fit. Smooth gradients; good for MALA.""" +import numpy as np +from ._base import FitBase + +try: + from scipy.interpolate import RBFInterpolator + _HAVE_RBF = True +except ImportError: + _HAVE_RBF = False + + +class RBFFit(FitBase): + def __init__(self, X, Y, sigma=None, smoothing=1e-3): + if not _HAVE_RBF: + raise ImportError("scipy not available; cannot use --tracer-fit-method rbf") + # scipy's RBFInterpolator doesn't take sample weights; use smoothing as a + # proxy when sigma is supplied. + s = smoothing + if sigma is not None: + s = float(np.median(np.asarray(sigma) ** 2)) + smoothing + self._rbf = RBFInterpolator(X, Y, smoothing=s) + + def predict(self, Z): + return self._rbf(np.atleast_2d(Z)) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py new file mode 100644 index 000000000..126743e9b --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py @@ -0,0 +1,23 @@ +"""Random-Forest fit. Production default per project owner (2026-05-13). +Gradient via finite differences from FitBase.""" +import numpy as np +from ._base import FitBase + +try: + from sklearn.ensemble import RandomForestRegressor + _HAVE_RF = True +except ImportError: + _HAVE_RF = False + + +class RandomForestFit(FitBase): + def __init__(self, X, Y, sigma=None, n_estimators=100, n_jobs=-1): + if not _HAVE_RF: + raise ImportError("sklearn not available; install scikit-learn or " + "choose --tracer-fit-method quadratic") + self._rf = RandomForestRegressor(n_estimators=n_estimators, n_jobs=n_jobs) + weight = None if sigma is None else 1.0 / (np.asarray(sigma) ** 2 + 1e-12) + self._rf.fit(X, Y, sample_weight=weight) + + def predict(self, Z): + return self._rf.predict(np.atleast_2d(Z)) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py new file mode 100644 index 000000000..f04f781b0 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py @@ -0,0 +1,17 @@ +"""Tracer-placement sampler kernels. + +Each kernel has the signature: + kernel(particles, surrogate=None, surrogate_prev=None, prior_box, rng, **kw) + -> (X_new, info_dict) + +For backwards compatibility with the prototype harness (proto/run_tier1.py) +the kernels also accept the older positional form + kernel(particles, lnL_noisy, prior_box, rng, _state_key=...) +which builds a fresh local QuadFit; the production tools call the surrogate- +provided form. +""" +from .smc_mala import iterate as smc_mala +from .birth_death import iterate as birth_death +from .smc_mala_bd import iterate as smc_mala_bd + +__all__ = ["smc_mala", "birth_death", "smc_mala_bd"] diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/_knn.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/_knn.py new file mode 100644 index 000000000..7ff4ac245 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/_knn.py @@ -0,0 +1,15 @@ +"""Tiny numpy-only kNN utilities (no scipy).""" +import numpy as np + +def pairwise_sqdist(A, B): + # (na, d) x (nb, d) -> (na, nb) + AA = np.sum(A*A, axis=1, keepdims=True) + BB = np.sum(B*B, axis=1, keepdims=True).T + return np.maximum(AA + BB - 2.0 * A @ B.T, 0.0) + +def knn_dist(X, Y, k): + """For each row of X, distance to its k-th nearest neighbor in Y.""" + d2 = pairwise_sqdist(X, Y) + # k-th smallest including self if X is Y at the same row (caller handles +1) + part = np.partition(d2, k-1, axis=1)[:, :k] + return np.sqrt(part.max(axis=1)) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/birth_death.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/birth_death.py new file mode 100644 index 000000000..490f4d61a --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/birth_death.py @@ -0,0 +1,57 @@ +"""Overdamped Langevin on surrogate + kNN birth-death corrector. + +Engine version: takes a Fit. surrogate_prev is ignored (BD doesn't bridge). +""" +import numpy as np +from ._knn import knn_dist + + +def _langevin_step(X, surrogate, eps, prior_box, rng): + g = surrogate.grad(X) + Xp = X + eps * g + np.sqrt(2 * eps) * rng.normal(size=X.shape) + lo = prior_box[:, 0]; hi = prior_box[:, 1] + Xp = np.where(Xp > hi, 2 * hi - Xp, Xp) + Xp = np.where(Xp < lo, 2 * lo - Xp, Xp) + return np.clip(Xp, lo, hi) + + +def _knn_log_density(X, k=5): + from math import lgamma, log, pi + n, d = X.shape + k = min(k, n - 1) + r = knn_dist(X, X, k + 1) + 1e-12 + log_Vd = (d / 2) * log(pi) - lgamma(d / 2 + 1) + return np.log(k) - np.log(n) - log_Vd - d * np.log(r) + + +def iterate(particles, *, surrogate, surrogate_prev=None, + prior_box, rng, state=None, + n_langevin_steps=20, n_bd_passes=3, eps_factor=0.05, + birth_death_rate=1.0, **_): + state = dict(state or {}) + X = np.asarray(particles, dtype=float).copy() + n = len(X) + scale = float(np.sqrt(np.diag(np.cov(X.T)) + 1e-8).mean()) if X.shape[1] > 1 \ + else float(X.std() + 1e-8) + eps = eps_factor * scale**2 + + for _ in range(n_langevin_steps): + X = _langevin_step(X, surrogate, eps, prior_box, rng) + + for _ in range(n_bd_passes): + log_rho = _knn_log_density(X, k=min(5, n - 1)) + log_pi = surrogate.predict(X) + log_pi -= log_pi.max(); log_rho -= log_rho.max() + score = log_rho - log_pi + order = np.argsort(score) + n_swap = max(1, int(round(birth_death_rate * n / 10.0))) + kill = order[-n_swap:] + birth = order[:n_swap] + X[kill] = X[rng.choice(birth, size=n_swap, replace=True)] \ + + 0.05 * scale * rng.normal(size=(n_swap, X.shape[1])) + X = np.clip(X, prior_box[:, 0], prior_box[:, 1]) + for _ in range(3): + X = _langevin_step(X, surrogate, eps, prior_box, rng) + + info = {"state": state, "eps": eps} + return X, info diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala.py new file mode 100644 index 000000000..7f7a4e852 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala.py @@ -0,0 +1,94 @@ +"""Tempered SMC with MALA moves on a pre-built surrogate. + +Engine version: takes Fit objects from RIFT.misc.tracer_placement.fits. +Stateless across calls — caller owns the `state` dict (with `mala_eps`, +optionally other adaptive choices). +""" +import numpy as np + + +def _ess(logw): + w = np.exp(logw - logw.max()) + w /= w.sum() + return 1.0 / np.sum(w * w) + + +def _systematic_resample(logw, rng): + w = np.exp(logw - logw.max()); w /= w.sum() + n = len(w) + u = (rng.uniform() + np.arange(n)) / n + return np.clip(np.searchsorted(np.cumsum(w), u), 0, n - 1) + + +def _mala_step(X, f_eval, f_grad, eps, prior_box, rng): + g = f_grad(X) + prop = X + 0.5 * eps**2 * g + eps * rng.normal(size=X.shape) + lo = prior_box[:, 0]; hi = prior_box[:, 1] + prop = np.where(prop > hi, 2 * hi - prop, prop) + prop = np.where(prop < lo, 2 * lo - prop, prop) + prop = np.clip(prop, lo, hi) + g_prop = f_grad(prop) + log_qf = -np.sum((prop - X - 0.5 * eps**2 * g)**2, axis=1) / (2 * eps**2) + log_qb = -np.sum((X - prop - 0.5 * eps**2 * g_prop)**2, axis=1) / (2 * eps**2) + log_alpha = f_eval(prop) - f_eval(X) + log_qb - log_qf + acc = np.log(rng.uniform(size=len(X))) < log_alpha + return np.where(acc[:, None], prop, X), float(acc.mean()) + + +def iterate(particles, *, surrogate, surrogate_prev=None, + prior_box, rng, state=None, + n_mala_steps=8, target_ess_frac=0.5, + birth_death_rate=0.0, **_): + state = dict(state or {}) + X = np.asarray(particles, dtype=float).copy() + n, _d = X.shape + if state.get("mala_eps") is None: + cov_diag = np.diag(np.cov(X.T)) if X.shape[1] > 1 else np.array([X.var()]) + state["mala_eps"] = 0.3 * float(np.sqrt(cov_diag + 1e-8).mean()) + + accept_log = [] + + if surrogate_prev is not None: + beta = 0.0 + logw = np.zeros(n) + delta_f = surrogate.predict(X) - surrogate_prev.predict(X) + while beta < 1.0: + lo, hi = 0.0, 1.0 - beta + for _ in range(20): + mid = 0.5 * (lo + hi) + if _ess(logw + mid * delta_f) > target_ess_frac * n: + lo = mid + else: + hi = mid + d_beta = lo if lo > 1e-6 else (1.0 - beta) + logw = logw + d_beta * delta_f + beta += d_beta + if _ess(logw) < target_ess_frac * n or beta >= 1.0: + idx = _systematic_resample(logw, rng) + X = X[idx] + logw = np.zeros(n) + delta_f = surrogate.predict(X) - surrogate_prev.predict(X) + + def fe(z, beta=beta, s=surrogate, sp=surrogate_prev): + return beta * s.predict(z) + (1 - beta) * sp.predict(z) + + def fg(z, beta=beta, s=surrogate, sp=surrogate_prev): + return beta * s.grad(z) + (1 - beta) * sp.grad(z) + + for _ in range(n_mala_steps): + X, acc = _mala_step(X, fe, fg, state["mala_eps"], prior_box, rng) + accept_log.append(acc) + state["mala_eps"] *= np.exp(0.1 * (acc - 0.574)) + state["mala_eps"] = float(np.clip(state["mala_eps"], 1e-4, 5.0)) + else: + for _ in range(n_mala_steps): + X, acc = _mala_step(X, surrogate.predict, surrogate.grad, + state["mala_eps"], prior_box, rng) + accept_log.append(acc) + state["mala_eps"] *= np.exp(0.1 * (acc - 0.574)) + state["mala_eps"] = float(np.clip(state["mala_eps"], 1e-4, 5.0)) + + info = {"state": state, + "mean_accept": float(np.mean(accept_log)) if accept_log else float("nan"), + "n_steps": len(accept_log)} + return X, info diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala_bd.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala_bd.py new file mode 100644 index 000000000..a06999f8e --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala_bd.py @@ -0,0 +1,15 @@ +"""SMC-MALA followed by a short birth-death rejuvenation (engine version).""" +from .smc_mala import iterate as _smc_mala +from .birth_death import iterate as _bd + + +def iterate(particles, *, surrogate, surrogate_prev=None, + prior_box, rng, state=None, **kw): + X1, info1 = _smc_mala(particles, surrogate=surrogate, surrogate_prev=surrogate_prev, + prior_box=prior_box, rng=rng, state=state, **kw) + X2, info2 = _bd(X1, surrogate=surrogate, prior_box=prior_box, rng=rng, + state=info1.get("state"), + n_langevin_steps=5, n_bd_passes=2, + birth_death_rate=kw.get("birth_death_rate", 1.0)) + info = {**info1, **info2, "state": info2.get("state", info1.get("state"))} + return X2, info diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/surrogate.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/surrogate.py new file mode 100644 index 000000000..de4345800 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/surrogate.py @@ -0,0 +1,68 @@ +"""Local quadratic surrogate of ln-likelihood, RIFT BayesianLeastSquares-flavored. + +f(x) = a + b^T x + 0.5 x^T C x with Tikhonov regularization on coefficients. +Gradient: b + C x. Closed form. +""" +import numpy as np + +def _design(X): + n, d = X.shape + cols = [np.ones(n), X] # 1, linear + quad = [] + for i in range(d): + for j in range(i, d): + quad.append(X[:, i] * X[:, j]) + # square terms get factor 1, cross-terms factor 1 (we'll handle 0.5 in eval) + cols.append(np.column_stack(quad)) + return np.concatenate([np.ones((n,1)), X, np.column_stack(quad)], axis=1) + +def _coef_unpack(theta, d): + a = theta[0] + b = theta[1:1+d] + # quad terms in order (0,0),(0,1),...,(0,d-1),(1,1),(1,2),... + C = np.zeros((d, d)) + k = 1 + d + for i in range(d): + for j in range(i, d): + v = theta[k]; k += 1 + if i == j: + C[i, i] = 2.0 * v # since 0.5 x^T C x => C_ii contributes 0.5 C_ii x_i^2 + else: + C[i, j] = v + C[j, i] = v + return a, b, C + +class QuadFit: + def __init__(self, X, y, ridge=1e-3, eval_weight=None): + X = np.asarray(X, dtype=float) + y = np.asarray(y, dtype=float) + self.d = X.shape[1] + self.X_mean = X.mean(axis=0) + self.X_std = X.std(axis=0) + 1e-12 + Xn = (X - self.X_mean) / self.X_std + D = _design(Xn) + W = np.eye(D.shape[1]) * ridge + if eval_weight is None: + A = D.T @ D + W + rhs = D.T @ y + else: + wv = np.asarray(eval_weight, dtype=float) + DW = D * wv[:, None] + A = DW.T @ D + W + rhs = DW.T @ y + theta, *_ = np.linalg.lstsq(A, rhs, rcond=None) + self.theta = theta + self.a, self.b_n, self.C_n = _coef_unpack(theta, self.d) + + def _to_normed(self, X): + return (np.atleast_2d(X) - self.X_mean) / self.X_std + + def f(self, X): + Xn = self._to_normed(X) + v = self.a + Xn @ self.b_n + 0.5 * np.einsum('ni,ij,nj->n', Xn, self.C_n, Xn) + return v + + def grad(self, X): + Xn = self._to_normed(X) + gn = self.b_n + Xn @ self.C_n # gradient in normed coords + return gn / self.X_std # back to native coords diff --git a/MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py b/MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py new file mode 100755 index 000000000..bb5cb20d6 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py @@ -0,0 +1,269 @@ +#! /usr/bin/env python +# +# util_HyperparameterTracerUpdate.py +# +# DRAFT - parsimonious-placement project (2026-05-13) +# +# GOAL +# Drop-in alternative to util_HyperparameterPuffball.py for the RIFT hyperpipeline. +# Same I/O contract: read a hyperpipe-format .dat grid (header +# "# lnL sigma_lnL p1 p2 ..."), advance it by tracer SMC + birth-death, write +# a new .dat grid in the same format. +# +# Shared engine with util_ParameterTracerUpdate.py (the event-level cousin): +# both import from RIFT.misc.tracer_placement. +# +# COMPATIBILITY with util_HyperparameterPuffball.py +# --inj-file, --inj-file-out, --puff-factor, --force-away, --parameter, +# --no-correlation, --random-parameter, --random-parameter-range, +# --downselect-parameter, --downselect-parameter-range, --regularize +# same semantics. --update-method puffball reproduces puffball exactly. +# +# NEW +# --update-method {smc-mala-bd, smc-mala, birth-death, puffball} default smc-mala-bd +# --tracer-fit-method {rf, rbf, quadratic, polynomial} default rf +# --inj-file-prev OPTIONAL previous-iteration .dat (enables SMC bridging) +# --no-union-refit opt out of union refit when --inj-file-prev is given +# --n-mala-steps INT default 8 +# --target-ess-frac FLOAT default 0.5 +# --birth-death-rate FLOAT default 1.0 +# --rng-seed INT deterministic when given +# --state-in / --state-out tiny state file (~100 bytes) +# +# I/O FORMAT +# Input/output is the same as util_HyperparameterPuffball.py: a text file with a +# leading "# lnL sigma_lnL p1 p2 ..." header line. Data rows are numeric. +# - X (coord array) is extracted from the parameter columns by name (matching +# --parameter ordering). +# - Y (lnL) and sigma (sigma_lnL) come from the first two columns. +# - Output rows preserve all original columns; only the parameter values are +# updated. The lnL/sigma_lnL columns are zeroed in the output, matching the +# puffball convention (next iteration's marg driver will overwrite them). +# +# USAGE in hyperpipe (Hydra) +# puff: +# exe: util_HyperparameterTracerUpdate.py +# settings: +# update-method: smc-mala-bd +# tracer-fit-method: rf +# +# USAGE in legacy create_eos_posterior_pipeline +# --puff-exe `which util_HyperparameterTracerUpdate.py` +# --puff-args `pwd`/args_puff.txt # contains the flags above +# + +import argparse +import os +import pickle +import sys +import numpy as np + +try: + # Canonical path once the engine is merged into RIFT proper. + from RIFT.misc.tracer_placement import samplers as _tracer_samplers + from RIFT.misc.tracer_placement import fits as _tracer_fits + _TRACER_OK = True +except ImportError: + try: + # Local-dev fallback: PYTHONPATH points at a directory containing + # `tracer_placement/` directly (no RIFT/ wrapper). See the demo Makefile. + from tracer_placement import samplers as _tracer_samplers + from tracer_placement import fits as _tracer_fits + _TRACER_OK = True + except ImportError: + _TRACER_OK = False + + +# --------------------------------- CLI ------------------------------------- # + +def build_parser(): + p = argparse.ArgumentParser(description=__doc__) + # Mirror util_HyperparameterPuffball.py + p.add_argument("--inj-file", required=True, help="Input .dat grid (hyperpipe format).") + p.add_argument("--inj-file-out", default="output-tracer.dat") + p.add_argument("--puff-factor", default=1.0, type=float) + p.add_argument("--force-away", default=0.0, type=float) + p.add_argument("--parameter", action="append", required=True) + p.add_argument("--no-correlation", action="append", type=str) + p.add_argument("--random-parameter", action="append") + p.add_argument("--random-parameter-range", action="append", type=str) + p.add_argument("--downselect-parameter", action="append") + p.add_argument("--downselect-parameter-range", action="append", type=str) + p.add_argument("--regularize", action="store_true") + # Tracer-specific + p.add_argument("--update-method", + choices=("smc-mala-bd", "smc-mala", "birth-death", "puffball"), + default="smc-mala-bd") + p.add_argument("--tracer-fit-method", + choices=("rf", "rbf", "quadratic", "polynomial"), + default="rf") + p.add_argument("--inj-file-prev", default=None, + help="Optional previous-iteration .dat for SMC bridging / union refit.") + p.add_argument("--no-union-refit", action="store_true") + p.add_argument("--n-mala-steps", default=8, type=int) + p.add_argument("--target-ess-frac", default=0.5, type=float) + p.add_argument("--birth-death-rate", default=1.0, type=float) + p.add_argument("--rng-seed", default=None, type=int) + p.add_argument("--state-in", default=None) + p.add_argument("--state-out", default=None) + return p + + +# ----------------------- .dat <-> arrays ----------------------------------- # + +def _read_dat(path): + """Return (column_names, raw_rows ndarray).""" + with open(path) as f: + header = f.readline().rstrip("\n") + if not header.startswith("#"): + sys.exit(f"util_HyperparameterTracerUpdate: input {path!r} missing '#' header") + cols = header.lstrip("#").split() + if len(cols) < 3 or cols[0] != "lnL" or cols[1] not in ("sigma_lnL", "sigma"): + sys.exit(f"util_HyperparameterTracerUpdate: header must start with " + f"'lnL sigma_lnL ...', got {cols!r}") + rows = np.loadtxt(path) + if rows.ndim == 1: + rows = rows[None, :] + return cols, rows + + +def _extract_X(cols, rows, parameter_order): + idx = [cols.index(p) for p in parameter_order] + return rows[:, idx] + + +def _write_dat(path, cols, rows): + np.savetxt(path, rows, header=" ".join(cols)) + + +def _build_downselect(opts): + d = {} + if opts.downselect_parameter: + for name, rng in zip(opts.downselect_parameter, + opts.downselect_parameter_range or []): + d[name] = list(eval(rng)) + return d + + +def _coord_box(parameter_order, downselect_dict, X): + d = len(parameter_order) + box = np.zeros((d, 2)) + for i, name in enumerate(parameter_order): + if name in downselect_dict: + box[i] = downselect_dict[name] + else: + lo = float(X[:, i].min()) + hi = float(X[:, i].max()) + pad = 0.1 * (hi - lo + 1e-9) + box[i] = (lo - pad, hi + pad) + return box + + +# ------------------------------ main --------------------------------------- # + +def main(argv=None): + opts = build_parser().parse_args(argv) + rng = np.random.default_rng(opts.rng_seed) + + cols, rows = _read_dat(opts.inj_file) + X = _extract_X(cols, rows, opts.parameter) + Y = rows[:, 0] # lnL column + S = rows[:, 1] if rows.shape[1] >= 2 else None + downselect = _build_downselect(opts) + prior_box = _coord_box(opts.parameter, downselect, X) + + method = opts.update_method + + # ---- puffball regression path -------------------------------------- # + if method == "puffball": + cov = (np.cov(X.T) * opts.puff_factor**2 if X.shape[1] > 1 + else np.array([[X.std() ** 2]])) + cov = np.atleast_2d(cov) + if np.min(np.linalg.eigvalsh(cov)) < 1e-12: + cov = cov + 1e-8 * np.eye(cov.shape[0]) + delta = rng.multivariate_normal(np.zeros(X.shape[1]), cov, size=len(X)) + X_out = X + delta + # write back into rows; zero lnL/sigma (puffball convention) + out_rows = rows.copy() + for i, name in enumerate(opts.parameter): + out_rows[:, cols.index(name)] = X_out[:, i] + out_rows[:, 0] = 0.0 + out_rows[:, 1] = 0.0 + _write_dat(opts.inj_file_out, cols, out_rows) + return + + # ---- tracer path --------------------------------------------------- # + if not _TRACER_OK: + sys.stderr.write("util_HyperparameterTracerUpdate: RIFT.misc.tracer_placement " + "not installed; falling back to puffball.\n") + opts.update_method = "puffball" + return main(argv) + + X_train, Y_train, S_train = X, Y, S + fit_prev = None + + if opts.inj_file_prev is not None and os.path.exists(opts.inj_file_prev): + cols_p, rows_p = _read_dat(opts.inj_file_prev) + X_prev = _extract_X(cols_p, rows_p, opts.parameter) + Y_prev = rows_p[:, 0] + S_prev = rows_p[:, 1] if rows_p.shape[1] >= 2 else None + fit_prev = _tracer_fits.build(opts.tracer_fit_method, + X_prev, Y_prev, sigma=S_prev) + if not opts.no_union_refit: + X_train = np.vstack([X_prev, X]) + Y_train = np.concatenate([Y_prev, Y]) + if S is not None and S_prev is not None: + S_train = np.concatenate([S_prev, S]) + else: + S_train = None + + fit_now = _tracer_fits.build(opts.tracer_fit_method, + X_train, Y_train, sigma=S_train) + + state = {} + if opts.state_in and os.path.exists(opts.state_in): + with open(opts.state_in, "rb") as f: + state = pickle.load(f) + + sampler = { + "smc-mala-bd": _tracer_samplers.smc_mala_bd, + "smc-mala": _tracer_samplers.smc_mala, + "birth-death": _tracer_samplers.birth_death, + }[method] + + X_out, info = sampler( + particles=X, + surrogate=fit_now, + surrogate_prev=fit_prev, + prior_box=prior_box, + rng=rng, + n_mala_steps=opts.n_mala_steps, + target_ess_frac=opts.target_ess_frac, + birth_death_rate=opts.birth_death_rate, + state=state, + ) + + if opts.state_out: + with open(opts.state_out, "wb") as f: + pickle.dump(info.get("state", {}), f) + + # Apply downselect manually (puffball-equivalent behavior) + if downselect: + mask = np.ones(len(X_out), dtype=bool) + for k, (lo, hi) in downselect.items(): + if k in opts.parameter: + col = opts.parameter.index(k) + mask &= (X_out[:, col] >= lo) & (X_out[:, col] <= hi) + X_out = X_out[mask] + + out_rows = np.zeros((len(X_out), rows.shape[1])) + # carry forward any extra columns from input rows (just zero them; marg driver overwrites) + for i, name in enumerate(opts.parameter): + out_rows[:, cols.index(name)] = X_out[:, i] + out_rows[:, 0] = 0.0 + out_rows[:, 1] = 0.0 + _write_dat(opts.inj_file_out, cols, out_rows) + + +if __name__ == "__main__": + main() diff --git a/MonteCarloMarginalizeCode/Code/bin/util_ParameterTracerUpdate.py b/MonteCarloMarginalizeCode/Code/bin/util_ParameterTracerUpdate.py new file mode 100755 index 000000000..12da709c8 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/bin/util_ParameterTracerUpdate.py @@ -0,0 +1,342 @@ +#! /usr/bin/env python +# +# util_ParameterTracerUpdate.py +# +# DRAFT - parsimonious-placement project (rev. 2026-05-13) +# +# GOAL +# Drop-in alternative to util_ParameterPuffball.py for iterative grid update. +# Instead of resampling-from-fit + Gaussian jitter, advance the existing grid +# through a tracer-particle update: tempered SMC with MALA moves on a surrogate, +# plus optional birth-death rejuvenation. The tool REFITS its own surrogate from +# the same .dat file CIP reads, so it is fully self-contained (no model files +# passed between jobs, no CIP coupling, no extra OSG transfers). +# +# COMPATIBILITY +# --parameter, --inj-file, --inj-file-out, --fref, --fmin, --random-parameter, +# --random-parameter-range, --mc-range, --eta-range, --mtot-range, +# --downselect-parameter, --downselect-parameter-range, --reflect-parameter, +# --enforce-duration-bound : same semantics as util_ParameterPuffball.py. +# +# DATA-INGEST (mirrors CIP) +# --fname path to current iteration's ILE composite .dat file +# --fname-prev optional, previous iteration's .dat (enables SMC bridge) +# --lnL-col column index of lnL (caller specifies; matches CIP's expectations) +# --sigma-col optional column index of sigma_lnL +# +# NEW +# --update-method {smc-mala-bd, smc-mala, birth-death, puffball} +# Default smc-mala-bd. "puffball" reproduces util_ParameterPuffball.py for regression. +# --tracer-fit-method {rf, rbf, quadratic, polynomial} default rf +# --no-union-refit if --fname-prev given, do NOT include prev points in f_k fit +# --n-mala-steps INT default 8 +# --target-ess-frac FLOAT default 0.5 +# --birth-death-rate FLOAT default 1.0 +# --rng-seed INT deterministic when given +# --state-in tiny pickle: {mala_eps, last_rng_state} (~100 bytes) +# --state-out where to dump updated state for the next iteration +# +# USAGE EXAMPLE +# util_ParameterTracerUpdate.py \ +# --parameter mc --parameter eta --parameter chi_eff --parameter chi_p \ +# --inj-file overlap-grid_k.xml.gz --inj-file-out overlap-grid_kp1 \ +# --fname all_lnL_k.dat --fname-prev all_lnL_km1.dat \ +# --lnL-col 9 \ +# --update-method smc-mala-bd --tracer-fit-method rf \ +# --mc-range '[20,40]' --eta-range '[0.10,0.25]' \ +# --state-in tracer_state_k.pkl --state-out tracer_state_kp1.pkl +# +# NOTES +# - This is a draft skeleton. The sampler engine lives in +# parsimonious_placement_plan.md's proto/samplers/. For production it should be +# promoted into RIFT.misc.tracer_placement and imported here. +# - Surrogate refitting happens inside this tool every call. RF and RBF refits on +# the typical (X, lnL) size (10^2 - 10^4 points) cost seconds; negligible vs ILE. +# + +import argparse +import os +import pickle +import sys +import numpy as np + +import RIFT.lalsimutils as lalsimutils +import lalsimulation as lalsim # noqa: F401 +import lal + +try: + # Canonical path once the engine is merged into RIFT proper. + from RIFT.misc.tracer_placement import samplers as _tracer_samplers + from RIFT.misc.tracer_placement import fits as _tracer_fits + _TRACER_OK = True +except ImportError: + try: + # Local-dev fallback: PYTHONPATH points at a directory containing + # `tracer_placement/` directly (no RIFT/ wrapper). See the demo Makefile. + from tracer_placement import samplers as _tracer_samplers + from tracer_placement import fits as _tracer_fits + _TRACER_OK = True + except ImportError: + _TRACER_OK = False + +from igwn_ligolw import lsctables, ligolw # noqa: F401 +lsctables.use_in(ligolw.LIGOLWContentHandler) + + +# --------------------------------- CLI ------------------------------------- # + +def build_parser(): + p = argparse.ArgumentParser(description=__doc__) + # Mirror util_ParameterPuffball.py + p.add_argument("--inj-file", required=True) + p.add_argument("--inj-file-out", default="output-tracer") + p.add_argument("--fail-if-empty", action="store_true") + p.add_argument("--approx-output", default="SEOBNRv2") + p.add_argument("--fref", default=None, type=float) + p.add_argument("--fmin", default=None, type=float) + p.add_argument("--parameter", action="append", required=True) + p.add_argument("--random-parameter", action="append") + p.add_argument("--random-parameter-range", action="append", type=str) + p.add_argument("--mc-range", default=None) + p.add_argument("--eta-range", default=None) + p.add_argument("--mtot-range", default=None) + p.add_argument("--downselect-parameter", action="append") + p.add_argument("--downselect-parameter-range", action="append", type=str) + p.add_argument("--reflect-parameter", action="append", type=str) + p.add_argument("--enforce-duration-bound", default=None, type=float) + # Data ingest (mirror CIP) + p.add_argument("--fname", required=True, + help="Current iteration ILE composite .dat (same file CIP reads).") + p.add_argument("--fname-prev", default=None, + help="Optional previous iteration .dat; enables SMC bridging.") + p.add_argument("--lnL-col", type=int, required=True, + help="Column index of lnL in --fname (caller specifies, matches CIP).") + p.add_argument("--sigma-col", type=int, default=None, + help="Optional column index of sigma_lnL.") + p.add_argument("--lnL-downscale-factor", type=float, default=1.0) + # Tracer-specific + p.add_argument("--update-method", + choices=("smc-mala-bd", "smc-mala", "birth-death", "puffball"), + default="smc-mala-bd") + p.add_argument("--tracer-fit-method", + choices=("rf", "rbf", "quadratic", "polynomial"), + default="rf") + p.add_argument("--no-union-refit", action="store_true", + help="If --fname-prev is given, do NOT include those points in the f_k fit.") + p.add_argument("--n-mala-steps", default=8, type=int) + p.add_argument("--target-ess-frac", default=0.5, type=float) + p.add_argument("--birth-death-rate", default=1.0, type=float) + p.add_argument("--rng-seed", default=None, type=int) + p.add_argument("--state-in", default=None) + p.add_argument("--state-out", default=None) + # Back-compat with puffball for the regression path + p.add_argument("--puff-factor", default=1.0, type=float) + p.add_argument("--force-away", default=0.0, type=float) + p.add_argument("--regularize", action="store_true") + p.add_argument("--no-correlation", type=str, action="append") + return p + + +# --------------------- XML -> X (coord array) ------------------------------ # + +def _xml_to_X(opts, coord_names): + P_list = lalsimutils.xml_to_ChooseWaveformParams_array(opts.inj_file) + rows = [] + for P in P_list: + if opts.fmin is not None: + P.fmin = opts.fmin + if opts.fref is not None: + P.fref = opts.fref + line = np.zeros(len(coord_names)) + for i, name in enumerate(coord_names): + fac = lal.MSUN_SI if name in ("mc", "m1", "m2", "mtot") else 1.0 + line[i] = P.extract_param(name) / fac + rows.append(line) + return np.array(rows), P_list + + +def _X_to_xml(X_out, P_template_list, opts, coord_names, downselect_dict): + P_out_list = [] + n = min(len(P_template_list), len(X_out)) + for i in range(n): + P = P_template_list[i] + for j, name in enumerate(coord_names): + fac = lal.MSUN_SI if name in ("mc", "m1", "m2", "mtot") else 1.0 + P.assign_param(name, X_out[i, j] * fac) + keep = True + for k, (lo, hi) in downselect_dict.items(): + v = P.extract_param(k) + if k in ("mc", "m1", "m2", "mtot"): + v /= lal.MSUN_SI + if v < lo or v > hi: + keep = False + break + if opts.enforce_duration_bound is not None: + if lalsimutils.estimateWaveformDuration(P) > opts.enforce_duration_bound: + keep = False + if keep: + P_out_list.append(P) + if opts.fail_if_empty and not P_out_list: + sys.stderr.write("tracer: produced empty grid; aborting.\n") + sys.exit(2) + fref = P_out_list[0].fref if P_out_list else opts.fref + lalsimutils.ChooseWaveformParams_array_to_xml( + P_out_list, fname=opts.inj_file_out, fref=fref) + + +def _build_downselect(opts): + d = {} + if opts.mc_range is not None: + d["mc"] = list(eval(opts.mc_range)) + if opts.eta_range is not None: + d["eta"] = list(eval(opts.eta_range)) + if opts.mtot_range is not None: + d["mtot"] = list(eval(opts.mtot_range)) + if opts.downselect_parameter: + for name, rng in zip(opts.downselect_parameter, opts.downselect_parameter_range or []): + d[name] = list(eval(rng)) + return d + + +def _coord_box(coord_names, downselect_dict, X): + d = len(coord_names) + box = np.zeros((d, 2)) + for i, name in enumerate(coord_names): + if name in downselect_dict: + box[i] = downselect_dict[name] + else: + box[i] = (float(X[:, i].min()), float(X[:, i].max())) + return box + + +# --------------- .dat -> (X, Y, sigma) in coord-frame ---------------------- # +# +# We need (X, Y) in the SAME coordinate frame the sampler will move in. The .dat +# columns are not in coord-frame in general; CIP does its own conversion. We use +# the simplest robust approach: match each .dat row to a P in the input XML by +# row order (the standard RIFT contract), then read coord-frame X from the XML +# and lnL from the matched .dat row. +# +# When --fname-prev is used, the caller must also pass a previous-grid XML +# (--inj-file-prev) so we can do the same matching for f_{k-1}. To keep the CLI +# minimal in this draft we expect the convention that --fname-prev sits next to +# overlap-grid_{k-1}.xml.gz in the same directory (a one-line lookup) — TODO +# in production: add --inj-file-prev. + +def _load_lnL_for_grid(fname_dat, lnL_col, sigma_col, n_points, downscale): + arr = np.loadtxt(fname_dat) + if arr.ndim == 1: + arr = arr[None, :] + if len(arr) < n_points: + sys.stderr.write( + f"tracer: .dat has fewer rows ({len(arr)}) than XML grid ({n_points}); " + "alignment assumption (row order) violated.\n") + sys.exit(3) + Y = arr[:n_points, lnL_col] * downscale + if sigma_col is not None: + S = arr[:n_points, sigma_col] + else: + S = None + return Y, S + + +# ------------------------------- main -------------------------------------- # + +def main(argv=None): + opts = build_parser().parse_args(argv) + coord_names = list(opts.parameter) + downselect = _build_downselect(opts) + + X, P_template = _xml_to_X(opts, coord_names) + if X.size == 0: + sys.stderr.write("tracer: empty input grid; aborting.\n") + sys.exit(2) + prior_box = _coord_box(coord_names, downselect, X) + rng = np.random.default_rng(opts.rng_seed) + + method = opts.update_method + + # ---- puffball regression path -------------------------------------- # + if method == "puffball": + cov = np.cov(X.T) * opts.puff_factor**2 if X.shape[1] > 1 else np.array([[X.std()**2]]) + cov = np.atleast_2d(cov) + delta = rng.multivariate_normal(np.zeros(X.shape[1]), cov, size=len(X)) + X_out = X + delta + _X_to_xml(X_out, P_template, opts, coord_names, downselect) + return + + # ---- tracer path: refit our own surrogate from .dat ---------------- # + if not _TRACER_OK: + sys.stderr.write("tracer: RIFT.misc.tracer_placement not installed; " + "falling back to puffball.\n") + opts.update_method = "puffball" + return main(argv) + + Y_k, S_k = _load_lnL_for_grid(opts.fname, opts.lnL_col, opts.sigma_col, + len(X), opts.lnL_downscale_factor) + + X_train_k = X + Y_train_k = Y_k + S_train_k = S_k + X_prev = None + fit_prev = None + + if opts.fname_prev is not None: + # Read previous-iteration grid + lnL. Production CLI should accept + # --inj-file-prev explicitly; here we follow the naming convention. + inj_prev = opts.fname_prev.replace(".dat", ".xml.gz") + if not os.path.exists(inj_prev): + sys.stderr.write( + f"tracer: cannot locate previous grid XML ({inj_prev}); " + "ignoring --fname-prev and running unbridged.\n") + else: + opts_prev = argparse.Namespace(**vars(opts)) + opts_prev.inj_file = inj_prev + X_prev, _ = _xml_to_X(opts_prev, coord_names) + Y_prev, S_prev = _load_lnL_for_grid( + opts.fname_prev, opts.lnL_col, opts.sigma_col, + len(X_prev), opts.lnL_downscale_factor) + if not opts.no_union_refit: + X_train_k = np.vstack([X_prev, X]) + Y_train_k = np.concatenate([Y_prev, Y_k]) + S_train_k = (np.concatenate([S_prev, S_k]) + if (S_prev is not None and S_k is not None) else None) + # f_{k-1} fit on prior data only + fit_prev = _tracer_fits.build(opts.tracer_fit_method, + X_prev, Y_prev, sigma=S_prev) + + fit_now = _tracer_fits.build(opts.tracer_fit_method, + X_train_k, Y_train_k, sigma=S_train_k) + + state = {} + if opts.state_in and os.path.exists(opts.state_in): + with open(opts.state_in, "rb") as f: + state = pickle.load(f) + + sampler = { + "smc-mala-bd": _tracer_samplers.smc_mala_bd, + "smc-mala": _tracer_samplers.smc_mala, + "birth-death": _tracer_samplers.birth_death, + }[method] + + X_out, info = sampler( + particles=X, + surrogate=fit_now, + surrogate_prev=fit_prev, + prior_box=prior_box, + rng=rng, + n_mala_steps=opts.n_mala_steps, + target_ess_frac=opts.target_ess_frac, + birth_death_rate=opts.birth_death_rate, + state=state, + ) + + if opts.state_out: + with open(opts.state_out, "wb") as f: + pickle.dump(info.get("state", {}), f) + + _X_to_xml(X_out, P_template, opts, coord_names, downselect) + + +if __name__ == "__main__": + main() From bf2a8d427bccdc754196970345a291363061e937 Mon Sep 17 00:00:00 2001 From: Richard O'Shaughnessy Date: Thu, 14 May 2026 13:37:48 -0400 Subject: [PATCH 08/24] create_eos_posterior_pipeline: add --tracer-only-marg cost-halving mode Optional workflow for use with the tracer-placement tools added in the previous commit (util_HyperparameterTracerUpdate.py / util_ParameterTracerUpdate.py). Two new CLI flags: --tracer-only-marg Skip MARG_* (grid-X.dat) nodes on non-final iterations; only MARG_PUFF_* (grid_puff-X.dat) runs. Halves the per-iteration likelihood-evaluation cost when the puff'd grid is itself already a posterior-tracking ensemble (which it is under tracer placement). --tracer-final-marg-iterations N (default 1) Number of trailing iterations on which MARG still runs, so the final-posterior diagnostic stays based on a cleanly-sampled grid. Set to 0 to skip MARG entirely. Gate is conservative: iteration 0 always runs MARG (no grid_puff-0.dat exists yet), and the last N iterations always run MARG. With the default N=1 and 5 total iterations, MARG runs on iters {0, 4} and is skipped on {1, 2, 3} -- a 1.8x reduction in MARG-equivalent evaluations. Default behaviour is unchanged: --tracer-only-marg is False unless explicitly set, so the gate collapses to a no-op for every existing pipeline. The only structural change in the DAG generator is a conditional around the MARG_* node-creation block in the iteration loop (with a print() noting the skip for easy log diagnosis). No changes to MARG_PUFF wiring, consolidation, EOS-post, or anything else. Rationale and worked example in 20260513-Me-ParsimoniousPlacementOptions/rift_integration/workflow_tracer_only_marg.md. --- .../Code/bin/create_eos_posterior_pipeline | 39 +++++++++++++------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline index 14798db9e..3f690801e 100644 --- a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline +++ b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline @@ -42,6 +42,8 @@ parser.add_argument("--request-memory-marg",default=16384,type=int,help="Memory parser.add_argument("--puff-exe",default=None,help="similar to PUFF, but cannot be the same executable since format different") parser.add_argument("--puff-args",default=None,help=" argumetns") parser.add_argument("--puff-max-it",default=np.inf,type=int,help="Maximum iteration number that puffball is applied. default to infinity ") +parser.add_argument("--tracer-only-marg",action='store_true',help="Parsimonious-placement workflow: only run MARG_PUFF (placement) jobs in intermediate iterations and skip MARG_* (posterior) jobs. The final --tracer-final-marg-iterations iterations still run MARG_* to produce a clean posterior. Cuts marginalisation cost roughly in half for N=5-6 iterations. Requires --puff-exe to be a tracer-aware updater (e.g. util_HyperparameterTracerUpdate.py) since the puff grid drives placement on its own.") +parser.add_argument("--tracer-final-marg-iterations",default=1,type=int,help="When --tracer-only-marg is set, the last N iterations still run MARG_* nodes for posterior generation. Default 1.") parser.add_argument("--eos-post-args",default=None,help="filename of args_ile.txt file which holds ILE arguments. Should NOT conflict with arguments auto-set by this DAG ... in particular, i/o arguments will be modified") parser.add_argument("--eos-post-exe",default=None,help="filename of ILE or equivalent executable. Will default to `which integrate_likelihood_extrinsic` in low-level code") parser.add_argument("--eos-post-retries",default=0,type=int,help="Number of retry attempts for ILE jobs. (These can fail)") @@ -581,20 +583,33 @@ for it in np.arange(it_start,opts.n_iterations): con_node.add_macro("macroiteration",it) con_node.add_macro("macroevent",int(event)) con_node.set_retry(opts.general_retries) + # tracer-only-marg gate: skip MARG_* (posterior) nodes in intermediate + # iterations when the puff grid is sufficient for placement. Iteration 0 + # always runs MARG (no grid_puff-0.dat yet), and the last N iterations + # still run MARG to produce a clean posterior. + _n_final_marg = max(0, int(getattr(opts, "tracer_final_marg_iterations", 1))) + _skip_marg_this_iter = ( + getattr(opts, "tracer_only_marg", False) + and it > it_start + and it < (opts.n_iterations - _n_final_marg) + ) + if _skip_marg_this_iter: + print(" tracer-only-marg: skipping MARG_* nodes for iteration", it) for row in np.arange(indx_max_event): #np.arange(n_jobs_this_time): # Add task per MARG operation - if opts.marg_event_args: - cip_node = pipeline.CondorDAGNode(cip_job) - else: - cip_node = pipeline.CondorDAGNode(cip_job_list[event]) - cip_node.set_retry(opts.general_retries) - cip_node.add_macro("macroid",event) - cip_node.add_macro("macroevent", int(row*n_chunk_here)) - cip_node.add_macro("macroiteration", it) - if not(parent_fit_node is None): - cip_node.add_parent(parent_fit_node) - con_node.add_parent(cip_node) # consolidate depends on all of the individual jobs - dag.add_node(cip_node) + if not _skip_marg_this_iter: + if opts.marg_event_args: + cip_node = pipeline.CondorDAGNode(cip_job) + else: + cip_node = pipeline.CondorDAGNode(cip_job_list[event]) + cip_node.set_retry(opts.general_retries) + cip_node.add_macro("macroid",event) + cip_node.add_macro("macroevent", int(row*n_chunk_here)) + cip_node.add_macro("macroiteration", it) + if not(parent_fit_node is None): + cip_node.add_parent(parent_fit_node) + con_node.add_parent(cip_node) # consolidate depends on all of the individual jobs + dag.add_node(cip_node) # if PUFF is done for this event, also add MARG_PUFF nodes. Note grid size assumed identical, so don't need another loop over 'id'/row if puff_args and puff_cadence: if it>it_start and it <= puff_max_it and (it-1)%puff_cadence ==0: # we made a puffball last iteration, so run it through ILE now From 3827a7d3c8db5747c145d165b236a0e926fe8380 Mon Sep 17 00:00:00 2001 From: Richard O'Shaughnessy Date: Thu, 14 May 2026 13:37:48 -0400 Subject: [PATCH 09/24] util_RIFT_hyperpipe.py: forward tracer-only-marg + puff.settings to DAG-gen Hydra-level integration of the parsimonious-placement workflow into the hyperpipeline driver. Previously a tracer run required the user to (a) point puff.exe at util_HyperparameterTracerUpdate.py and (b) escape tracer hyperparameters through puff.extra-args as a free-form string, and there was no Hydra-level switch for --tracer-only-marg at all. This commit makes both first-class: * arch.tracer-only-marg -> --tracer-only-marg * arch.tracer-final-marg-iterations -> --tracer-final-marg-iterations * puff.settings.update-method -> --update-method * puff.settings.tracer-fit-method -> --tracer-fit-method * puff.settings.n-mala-steps -> --n-mala-steps * puff.settings.target-ess-frac -> --target-ess-frac * puff.settings.birth-death-rate -> --birth-death-rate * puff.settings.inj-file-prev -> --inj-file-prev * puff.settings.rng-seed -> --rng-seed * puff.settings.{state-in,state-out} -> --state-{in,out} * puff.settings.no-union-refit (bool) -> --no-union-refit * puff.settings.regularize (bool) -> --regularize Implementation: * _build_puff_args(): extend the puff section to consume an optional puff.settings: sub-block, mirroring the pattern post.settings: already uses. Null / empty values are skipped; bool toggles pass through truthy(). Legacy puff.extra-args still appended verbatim. * cmd_parts assembly: append --tracer-only-marg (and the optional --tracer-final-marg-iterations N) when arch.tracer-only-marg is truthy. Mirrors the existing explode-marg-jobs handling. The default hyperpipe_conf.yaml (and the in-package DEFAULT_CONFIG_YAML schema in RIFT/hyperpipe/config.py) gain the new keys with safe defaults (tracer-only-marg: false, all puff.settings values null) so existing user configs are unchanged in behavior. An example user config is added: demo/hyperpipe/hyperpipe_conf_tracer.yaml demonstrating an end-to-end tracer + tracer-only-marg run on the Gaussian toy. Selectable via: util_RIFT_hyperpipe.py --config-name hyperpipe_conf_tracer Tested via stand-alone simulation of _build_puff_args and the cmd_parts forwarding (no actual Hydra invocation needed): legacy config -> no tracer flags emitted (back-compat) tracer config -> --update-method smc-mala-bd --tracer-fit-method rf ... plus --tracer-only-marg --tracer-final-marg-iterations 1 --- .../Code/RIFT/hyperpipe/config.py | 23 +++++++ .../Code/bin/hyperpipe_conf.yaml | 22 +++++++ .../Code/bin/util_RIFT_hyperpipe.py | 42 ++++++++++++ .../demo/hyperpipe/hyperpipe_conf_tracer.yaml | 65 +++++++++++++++++++ 4 files changed, 152 insertions(+) create mode 100644 MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf_tracer.yaml diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/config.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/config.py index c619ca426..2de9d0881 100644 --- a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/config.py +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/config.py @@ -50,6 +50,13 @@ explode-marg-jobs: 5 # -> --eos-post-explode-jobs explode-marg-jobs-last: null # -> --eos-post-explode-jobs-last (default = same) start-iteration: 0 + # Parsimonious-placement workflow (set tracer-only-marg: true to enable). + # When true, only the final ``tracer-final-marg-iterations`` iterations + # spawn MARG_* (posterior) nodes; intermediate iterations rely on + # MARG_PUFF (placement) jobs alone, recovering most of a CIP iteration's + # cost. Requires puff.exe to be a tracer-aware updater. + tracer-only-marg: false # -> --tracer-only-marg + tracer-final-marg-iterations: 1 # -> --tracer-final-marg-iterations post: # Final posterior-construction stage (CIP-family executable). @@ -97,6 +104,22 @@ puff-factor: 0.5 force-away: 0.03 extra-args: "" + # Tracer-placement sampler hyperparameters. These are only consumed when + # exe points at a tracer-aware updater (e.g. util_HyperparameterTracerUpdate.py + # or util_ParameterTracerUpdate.py); the legacy puffball binaries ignore them. + # Null values fall through to the updater's built-in defaults. + settings: + update-method: null # smc-mala-bd | smc-mala | birth-death | puffball + tracer-fit-method: null # rf | rbf | polynomial | quadratic + n-mala-steps: null # -> --n-mala-steps + target-ess-frac: null # -> --target-ess-frac + birth-death-rate: null # -> --birth-death-rate + inj-file-prev: null # -> --inj-file-prev (SMC bridging input) + no-union-refit: false # -> --no-union-refit + regularize: false # -> --regularize (passes through to puffball-compat code) + rng-seed: null # -> --rng-seed (deterministic when set) + state-in: null # -> --state-in + state-out: null # -> --state-out test: exe: convergence_test_samples diff --git a/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml b/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml index 636922aed..575981d6c 100644 --- a/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml +++ b/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml @@ -18,6 +18,13 @@ arch: explode-marg-jobs: 5 explode-marg-jobs-last: null start-iteration: 0 + # Parsimonious-placement workflow (set tracer-only-marg: true to enable). + # Skips MARG_* (posterior) nodes in intermediate iterations and relies on + # MARG_PUFF (placement) jobs alone. Final N iterations still run MARG_* to + # produce a clean posterior. Requires puff.exe to be a tracer-aware updater + # (e.g. util_HyperparameterTracerUpdate.py). + tracer-only-marg: false + tracer-final-marg-iterations: 1 post: exe: null @@ -52,6 +59,21 @@ puff: puff-factor: 0.5 force-away: 0.03 extra-args: "" + # Tracer-placement sampler hyperparameters (only used when exe is a + # tracer-aware updater; ignored by the legacy util_HyperparameterPuffball.py). + # Leave keys null to take the updater's built-in defaults. + settings: + update-method: null # smc-mala-bd | smc-mala | birth-death | puffball + tracer-fit-method: null # rf | rbf | polynomial | quadratic + n-mala-steps: null # int + target-ess-frac: null # float in (0,1] + birth-death-rate: null # float + inj-file-prev: null # path to previous-iteration .dat (enables SMC bridging) + no-union-refit: false # opt out of union refit when inj-file-prev is given + regularize: false + rng-seed: null # int; deterministic when given + state-in: null + state-out: null test: exe: convergence_test_samples diff --git a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py index e1e55ab67..42ef14341 100644 --- a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py +++ b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py @@ -209,11 +209,45 @@ def _build_post_args(cfg, coord_spec) -> str: def _build_puff_args(cfg, coord_spec) -> str: + """Compose args_puff.txt from the coord spec + puff.settings + extras. + + The legacy puffball (util_HyperparameterPuffball.py) accepts only the + coord-spec flags + force-away/puff-factor. The tracer drop-ins + (util_HyperparameterTracerUpdate.py / util_ParameterTracerUpdate.py) + accept those plus a handful of sampler hyperparameters; we expose those + declaratively here so users can stay in Hydra rather than escaping into + extra-args strings. + """ puff = _cfg_get(cfg, "puff") or {} args = coord_spec.to_puff_args( force_away=_cfg_get(puff, "force-away", 0.03), puff_factor=_cfg_get(puff, "puff-factor", 0.5), ) + settings = _cfg_get(puff, "settings") or {} + # Value-bearing flags. Each pair is (yaml key, CLI flag). Empty / null is skipped. + setting_flags = [ + ("update-method", "--update-method"), + ("tracer-fit-method", "--tracer-fit-method"), + ("n-mala-steps", "--n-mala-steps"), + ("target-ess-frac", "--target-ess-frac"), + ("birth-death-rate", "--birth-death-rate"), + ("inj-file-prev", "--inj-file-prev"), + ("rng-seed", "--rng-seed"), + ("state-in", "--state-in"), + ("state-out", "--state-out"), + ] + for key, flag in setting_flags: + val = _cfg_get(settings, key) + if val is not None and val != "": + args += f" {flag} {val}" + # Bool flags. Each pair is (yaml key, CLI flag). + setting_bool_flags = [ + ("no-union-refit", "--no-union-refit"), + ("regularize", "--regularize"), + ] + for key, flag in setting_bool_flags: + if hyper_config.truthy(_cfg_get(settings, key, False)): + args += f" {flag}" extra = (_cfg_get(puff, "extra-args") or "").strip() if extra: args += " " + extra @@ -340,6 +374,14 @@ def my_app(cfg: DictConfig) -> None: start_iter = int(_cfg_get(arch, "start-iteration", 0) or 0) if start_iter: cmd_parts.append(f"--start-iteration {start_iter}") + # Parsimonious-placement workflow: skip MARG_* in intermediate iterations + # when the puff grid is sufficient for placement (requires --puff-exe to be + # a tracer-aware updater such as util_HyperparameterTracerUpdate.py). + if hyper_config.truthy(_cfg_get(arch, "tracer-only-marg", False)): + cmd_parts.append("--tracer-only-marg") + n_final = _cfg_get(arch, "tracer-final-marg-iterations") + if n_final is not None: + cmd_parts.append(f"--tracer-final-marg-iterations {int(n_final)}") # general bool_flag_pairs = [ diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf_tracer.yaml b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf_tracer.yaml new file mode 100644 index 000000000..25d25948f --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf_tracer.yaml @@ -0,0 +1,65 @@ +# Example hyperpipe configuration that enables the parsimonious-placement +# tracer workflow. +# +# Differences from the baseline hyperpipe_conf.yaml in this directory: +# +# * puff.exe -> util_HyperparameterTracerUpdate.py +# (the tracer drop-in for util_HyperparameterPuffball.py) +# * puff.settings -> sampler hyperparameters (smc-mala-bd / RF surrogate) +# * arch.tracer-only-marg = true +# skips MARG_* (posterior) nodes in iterations +# 1 ... n_iterations - tracer-final-marg-iterations - 1 +# and relies on MARG_PUFF (placement) jobs alone. +# Cuts marginalisation cost ~1.7-1.8x for N=5-6. +# +# Run with: +# util_RIFT_hyperpipe.py --config-name hyperpipe_conf_tracer +# +# Override any field on the command line with Hydra syntax, e.g.: +# util_RIFT_hyperpipe.py --config-name hyperpipe_conf_tracer \ +# arch.n-iterations=6 puff.settings.update-method=birth-death + +arch: + method: default + n-iterations: 5 + n-samples-per-job: 1000 + explode-marg-jobs: 5 + start-iteration: 0 + # Parsimonious-placement workflow. Iteration 0 always runs MARG (no + # grid_puff-0.dat yet); iterations 1 ... 3 run MARG_PUFF only; the last + # iteration (n-iterations-1 == 4) runs MARG again for the final posterior. + tracer-only-marg: true + tracer-final-marg-iterations: 1 + +post: + coords-fit: "x y z" + coords-sample: "x:[-7,7] y:[-7,7] z:[-7,7]" + settings: + fit-method: rf + +marg-list: + - name: Gaussian + exe: example_gaussian.py + args: "--outdir Gaussian_example --conforming-output-name" + n-chunk: 100 + +puff: + # Tracer-aware updater. Falls back to puffball automatically if + # RIFT.misc.tracer_placement is not importable (e.g. mismatched RIFT + # version), so this config remains usable even on older worker images. + exe: util_HyperparameterTracerUpdate.py + puff-factor: 0.5 + force-away: 0.03 + settings: + update-method: smc-mala-bd # primary recommendation (Tier-1 results) + tracer-fit-method: rf # production default; cheap to rebuild + n-mala-steps: 8 + target-ess-frac: 0.5 + birth-death-rate: 1.0 + rng-seed: null # set to an int for deterministic runs + +init: + file: blind_gaussian_3d_xy_plus.dat + +general: + rundir: rundir_tracer From 57136266de1edc34f024effb3c772ac4c55f0516 Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Thu, 14 May 2026 19:52:55 -0400 Subject: [PATCH 10/24] parsimonious placement: rewire tracer to consume all.marg_net + add UCB Closes the pathway gap that prevented H1 from working end-to-end: the tracer was wedged into the puff slot (post EOS_POST + JOIN_POST) and was reading grid-{k+1}.dat -- which is EOS_POST posterior samples with a stale "# lnL sigma_lnL ..." header, not the ILE-evaluated (lambda, lnL) table the tracer needs. The tracer would silently succeed with garbage inputs (column 0 / column 1 in the posterior-samples file are arbitrary, not lnL / sigma) and produce a placement that contained zero information about the actual likelihood. This commit fixes the data flow, adds an acquisition-function placement pathway for the conservative-deployment regime, and resolves a leftover merge conflict in create_eos_posterior_pipeline. Also resolves a merge-conflict marker at lines 45-53 of create_eos_posterior_pipeline left over from the rift_O4d_parsimonious_v2 merge. ---- pipeline rewiring ----------------------------------------------- create_eos_posterior_pipeline gains --puff-input-source {posterior, marg_net}, default posterior (legacy behavior preserved bit-for-bit). posterior (legacy): puff reads grid-{k+1}.dat (= EOS_POST posterior samples via JOIN_POST), writes grid_puff-{k+1}.dat, MARG_PUFF re-evaluates the puffed grid. Sensible for the baseline puffball since the puffball doesn't need lnL inputs. marg_net (tracer): puff/tracer node consumes all.marg_net (cumulative ILE-evaluated table emitted by UNIFY) and writes grid-{k+1}.dat directly. EOS_POST still runs each iteration but writes to posterior-{k+1}.dat (downstream-free, diagnostic only). The MARG_PUFF lane is suppressed entirely -- the tracer's chosen grid IS the next MARG input. One MARG lane per iteration in this mode, matching the expensive-ILE design. The puff node's parent is rewired off unify_node (not parent_fit_node- after-EOS_POST) in marg_net mode, so tracer + EOS_POST run in parallel. Also: puff nodes are now gated on --puff-exe AND --puff-args together. --puff-args without --puff-exe is rejected with a clear error rather than silently dropping nodes. ---- tracer tool ----------------------------------------------------- util_HyperparameterTracerUpdate.py * --force-away is now wired through. After the engine returns X_out, a Mahalanobis decimation pass over the input grid's covariance drops near-duplicates. K=0 disables. Mirrors PUFF semantics -- self-avoidance was missing before; SMC-MALA-BD on its own matches *density* (log rho_hat - log pi), not pairwise separation, so two particles could sit arbitrarily close. In the expensive-ILE regime near-duplicates are pure waste. * --update-method gains "ucb" (in addition to smc-mala-bd, smc-mala, birth-death, puffball). * --ucb-kappa FLOAT (default 2.0): UCB exploration weight in score(x) = mu(x) + kappa * sigma(x). * --ucb-n-candidates INT (default 20000): pool size for greedy batch selection. ---- engine: UCB + uncertainty plumbing ------------------------------ RIFT/misc/tracer_placement/ fits/_base.py + predict_with_std(Z) -> (mean, std). Default: (predict(Z), zeros). + class flag has_uncertainty (default False). + class flag smooth_gradient (default True). Tells UCB which local polish to use. fits/_rf.py + predict_with_std returns tree-disagreement std. Not a calibrated posterior, but qualitatively correct: large where trees disagree (unexplored), small where they agree (well-sampled). Sufficient for UCB at zero added cost. + has_uncertainty = True, smooth_gradient = False. samplers/ucb.py (new) Acquisition-function placement: 1. Build candidate pool: 25% jittered current particles + 75% uniform draws from the prior box. 2. Score = mu + kappa * sigma. 3. Greedy descending-score select with Mahalanobis distance self-avoidance (population-covariance metric). 4. Local polish per point: gradient ascent for smooth fits (RBF / quadratic / GP), coordinate-hop for piecewise-constant fits (RF). Strategy selected from FitBase.smooth_gradient. Default kappa = 2.0, n_candidates = 20000, polish_steps = 20. No kappa tapering, no PUFF reactivation -- keeps the conservative pathway minimal for the expensive-simulation regime where there is no resource budget for posterior sampling on top. samplers/__init__.py: export ucb_place. When the surrogate has no uncertainty estimate (e.g. quadratic fit), UCB warns the user and degenerates to greedy mean-maximization rather than failing. RF is the recommended fit method. ---- backwards compatibility ----------------------------------------- * --puff-input-source defaults to posterior; legacy DAGs are unchanged. * --update-method puffball still reproduces puff exactly. * existing smc-mala-bd / smc-mala / birth-death paths untouched. * RF fit's predict() is unchanged; new method is additive. ---- testing --------------------------------------------------------- Engine and tracer tool exercised end-to-end via the CLI on a hand-built .dat: smc-mala-bd, ucb, puffball methods all produce valid output files with the correct header / column count. UCB-on-quadratic correctly warns about no-uncertainty and degenerates to greedy mean-max. Full DAG generation could not be smoke-tested in the dev sandbox (scipy not available) but the pipeline parses cleanly and the patched branches were inspected at the right line ranges. --- .../RIFT/misc/tracer_placement/fits/_base.py | 31 ++- .../RIFT/misc/tracer_placement/fits/_rf.py | 23 +- .../tracer_placement/samplers/__init__.py | 3 +- .../misc/tracer_placement/samplers/ucb.py | 199 ++++++++++++++++++ .../Code/bin/create_eos_posterior_pipeline | 64 ++++-- .../bin/util_HyperparameterTracerUpdate.py | 82 +++++++- 6 files changed, 377 insertions(+), 25 deletions(-) create mode 100644 MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/ucb.py mode change 100644 => 100755 MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py index b3b1574b6..7889d1467 100644 --- a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py @@ -1,13 +1,40 @@ -"""Common Fit base class. Provides finite-difference gradient as a default.""" +"""Common Fit base class. Provides finite-difference gradient as a default, +and an optional `predict_with_std` mode for acquisition-function placement +(see samplers.ucb).""" import numpy as np class FitBase: - """Subclasses must implement .predict(Z). Optionally override .grad(Z).""" + """Subclasses must implement .predict(Z). Optionally override .grad(Z) + and .predict_with_std(Z) for fits that expose a calibrated posterior std. + + The default predict_with_std returns (mean, zeros); subclasses that have a + real uncertainty estimate (RF tree-disagreement, GP posterior variance, + BayesianLeastSquares posterior covariance, ...) should override it so the + UCB sampler can use mu + kappa*sigma. Sampler-side code is responsible for + falling back gracefully when sigma is identically zero (e.g. by widening + kappa or by warning the user). + + Subclasses may signal "no real uncertainty" by setting + `self.has_uncertainty = False`. Default is False; RF, GP, quadratic + Bayesian-least-squares fits should set True. + """ + + has_uncertainty = False + # Hint for the UCB local-polish step: pure piecewise-constant fits + # (e.g. random forests) have zero or near-zero gradient nearly everywhere, + # so gradient ascent doesn't work and we need a coordinate-hop polish. + smooth_gradient = True def predict(self, Z): raise NotImplementedError + def predict_with_std(self, Z): + """Return (mean, std) on Z. Default: (predict(Z), zeros). + Override in subclasses that expose a real uncertainty.""" + mean = self.predict(Z) + return mean, np.zeros_like(mean) + def f(self, Z): return self.predict(Z) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py index 126743e9b..4e5d1e6de 100644 --- a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py @@ -1,5 +1,13 @@ """Random-Forest fit. Production default per project owner (2026-05-13). -Gradient via finite differences from FitBase.""" + +For UCB placement, exposes a tree-disagreement std as the uncertainty +estimate via predict_with_std. This is *not* a calibrated posterior std -- +it is the empirical spread of the per-tree predictions, which is large in +unexplored regions (because trees disagree on extrapolation) and small in +well-sampled regions (because trees fit similar values). That qualitative +behavior is what UCB needs; for calibration use a GP fit when one becomes +available. +""" import numpy as np from ._base import FitBase @@ -11,6 +19,12 @@ class RandomForestFit(FitBase): + has_uncertainty = True + # Tree predictions are piecewise-constant -- finite-difference gradients + # are zero almost everywhere. Tell the UCB sampler to use a coordinate + # hop polish instead of gradient ascent. + smooth_gradient = False + def __init__(self, X, Y, sigma=None, n_estimators=100, n_jobs=-1): if not _HAVE_RF: raise ImportError("sklearn not available; install scikit-learn or " @@ -21,3 +35,10 @@ def __init__(self, X, Y, sigma=None, n_estimators=100, n_jobs=-1): def predict(self, Z): return self._rf.predict(np.atleast_2d(Z)) + + def predict_with_std(self, Z): + """Return (mean, tree_disagreement_std). Std is computed across the + forest's per-tree predictions; larger where trees disagree.""" + Z = np.atleast_2d(Z) + per_tree = np.stack([t.predict(Z) for t in self._rf.estimators_], axis=0) + return per_tree.mean(axis=0), per_tree.std(axis=0) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py index f04f781b0..f9ea6519d 100644 --- a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py @@ -13,5 +13,6 @@ from .smc_mala import iterate as smc_mala from .birth_death import iterate as birth_death from .smc_mala_bd import iterate as smc_mala_bd +from .ucb import iterate as ucb_place -__all__ = ["smc_mala", "birth_death", "smc_mala_bd"] +__all__ = ["smc_mala", "birth_death", "smc_mala_bd", "ucb_place"] diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/ucb.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/ucb.py new file mode 100644 index 000000000..37226c1e1 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/ucb.py @@ -0,0 +1,199 @@ +"""UCB (Upper Confidence Bound) placement. + +Target regime: expensive ILE, few generations, ~50-500 points per generation. +Each evaluation point should be "earned" -- either we expect a large mean lnL +*or* we have large uncertainty about lnL there. UCB picks both: rank +candidates by + + score(lambda) = mu(lambda) + kappa * sigma(lambda) + +where (mu, sigma) come from the surrogate's predict_with_std. The N chosen +points are taken by greedy descending-score selection with a built-in +self-avoidance constraint (population-covariance Mahalanobis distance). + +Local polish: each selected point is optionally hill-climbed for a few steps +against `score`. For smooth-gradient fits (RBF, quadratic, GP) we use +gradient ascent on `score`; for piecewise-constant fits (RF) we use a +coordinate-hop polish that perturbs each coordinate independently and keeps +the move only if `score` improved. The fit signals which polish strategy to +use via FitBase.smooth_gradient. + +Sampler signature matches the rest of the tracer engine: + ucb_place(particles, *, surrogate, surrogate_prev=None, prior_box, rng, + kappa=2.0, n_candidates=20000, polish_steps=20, + min_separation_factor=0.0, state=None, **_) -> (X_out, info) + +`particles` only sets the output count (len(particles)) and the candidate +seed pool. surrogate_prev is ignored (UCB doesn't bridge). +""" + +import numpy as np + + +# ----------------- candidate generation ---------------------------------- # + +def _candidates(rng, particles, prior_box, n_candidates): + """Mix candidates from current particles (jittered) and uniform draws + from the prior box. Returns array (n_candidates, d).""" + d = prior_box.shape[0] + n_seed = max(1, n_candidates // 4) + # uniform fill (exploration) + n_unif = n_candidates - n_seed + lo = prior_box[:, 0]; hi = prior_box[:, 1] + unif = lo + (hi - lo) * rng.uniform(size=(n_unif, d)) + # jittered current particles (exploitation seed) + if len(particles) > 0: + idx = rng.integers(0, len(particles), size=n_seed) + if particles.shape[1] > 1: + scale = np.std(particles, axis=0) + 1e-9 + else: + scale = np.array([particles.std() + 1e-9]) + jitter = 0.25 * scale * rng.normal(size=(n_seed, d)) + seed = particles[idx] + jitter + seed = np.clip(seed, lo, hi) + else: + seed = unif[:n_seed] + return np.vstack([seed, unif]) + + +# ----------------- greedy selection with Mahalanobis spacing ------------- # + +def _mahalanobis_greedy(candidates, scores, n_keep, icov, min_mah_dist): + """Pick n_keep candidates by descending score, rejecting any candidate + within `min_mah_dist` Mahalanobis distance of an already-kept point.""" + order = np.argsort(-scores) + kept = [] + if min_mah_dist <= 0: + # No self-avoidance: just take top-n. + return candidates[order[:n_keep]] + for i in order: + x = candidates[i] + if not kept: + kept.append(x) + continue + diffs = np.asarray(kept) - x + d2 = np.einsum("ij,jk,ik->i", diffs, icov, diffs) + if (d2 >= min_mah_dist ** 2).all(): + kept.append(x) + if len(kept) >= n_keep: + break + # If under-filled (very tight separation), fall back to top-n without + # self-avoidance for the remainder. + if len(kept) < n_keep: + fill = [candidates[i] for i in order if not any( + np.array_equal(candidates[i], k) for k in kept)] + kept += fill[: n_keep - len(kept)] + return np.asarray(kept) + + +# ----------------- local polish: gradient vs coord-hop ------------------- # + +def _polish_gradient(x, surrogate, kappa, prior_box, n_steps, scale, rng): + """Gradient ascent on mu + kappa*sigma. Uses FitBase.grad for mu; + treats sigma as locally piecewise-smooth and falls back to finite + differences.""" + eps = 1e-3 * scale + step = 0.1 * scale + lo = prior_box[:, 0]; hi = prior_box[:, 1] + x = x.copy() + for _ in range(n_steps): + gmu = surrogate.grad(x[None])[0] + # FD sigma gradient (small extra cost; sigma is cheap for RF/quadratic) + gsig = np.zeros_like(x) + for k in range(len(x)): + xp = x.copy(); xp[k] += eps + xm = x.copy(); xm[k] -= eps + _, sp = surrogate.predict_with_std(xp[None]) + _, sm = surrogate.predict_with_std(xm[None]) + gsig[k] = (sp[0] - sm[0]) / (2 * eps) + g = gmu + kappa * gsig + norm = np.linalg.norm(g) + 1e-12 + x = np.clip(x + step * g / norm, lo, hi) + return x + + +def _polish_coord_hop(x, surrogate, kappa, prior_box, n_steps, scale, rng): + """For piecewise-constant fits (RF). Each step: pick a random coordinate, + propose a Gaussian hop, accept if mu + kappa*sigma went up. Greedy.""" + lo = prior_box[:, 0]; hi = prior_box[:, 1] + x = x.copy() + def _score(xi): + m, s = surrogate.predict_with_std(xi[None]) + return float(m[0] + kappa * s[0]) + best = _score(x) + for _ in range(n_steps): + k = rng.integers(0, len(x)) + delta = 0.5 * scale[k] * rng.normal() + x_prop = x.copy(); x_prop[k] = float(np.clip(x[k] + delta, lo[k], hi[k])) + sc = _score(x_prop) + if sc > best: + x = x_prop + best = sc + return x + + +def _polish(x, surrogate, kappa, prior_box, n_steps, scale, rng): + if n_steps <= 0: + return x + if getattr(surrogate, "smooth_gradient", True): + return _polish_gradient(x, surrogate, kappa, prior_box, n_steps, float(np.mean(scale)), rng) + return _polish_coord_hop(x, surrogate, kappa, prior_box, n_steps, scale, rng) + + +# ----------------- driver ------------------------------------------------ # + +def iterate(particles, *, surrogate, surrogate_prev=None, + prior_box, rng, state=None, + kappa=2.0, n_candidates=20000, + polish_steps=20, min_separation_factor=0.25, + **_): + state = dict(state or {}) + X_in = np.asarray(particles, dtype=float) + n_out = len(X_in) + if n_out == 0: + return X_in.copy(), {"state": state, "note": "no input particles"} + + if not getattr(surrogate, "has_uncertainty", False): + # UCB without uncertainty degenerates to greedy mean-maximization, + # which is not what the user wants. Warn but proceed (kappa effectively 0). + import sys + sys.stderr.write( + "samplers.ucb: surrogate has no uncertainty estimate " + "(predict_with_std returns zeros); UCB will degenerate to greedy " + "mean-maximization. Use --tracer-fit-method rf (tree disagreement) " + "or a GP fit if available.\n") + + # 1. Build candidate pool + cand = _candidates(rng, X_in, prior_box, n_candidates) + mu, sigma = surrogate.predict_with_std(cand) + score = mu + kappa * sigma + + # 2. Greedy select with Mahalanobis self-avoidance + if X_in.shape[1] > 1: + cov_in = np.cov(X_in.T) + else: + cov_in = np.array([[float(X_in.std() ** 2) + 1e-12]]) + cov_in = np.atleast_2d(cov_in) + 1e-10 * np.eye(prior_box.shape[0]) + icov = np.linalg.inv(cov_in) + # min_separation_factor is in *Mahalanobis units* (so 0.25 = one-quarter std) + X_pick = _mahalanobis_greedy(cand, score, n_out, icov, min_separation_factor) + + # 3. Local polish per point + if X_in.shape[1] > 1: + scale = np.sqrt(np.diag(cov_in)) + else: + scale = np.array([float(np.sqrt(cov_in[0, 0]))]) + X_out = np.array([ + _polish(x, surrogate, kappa, prior_box, polish_steps, scale, rng) + for x in X_pick + ]) + + info = { + "state": state, + "kappa": kappa, + "n_candidates": n_candidates, + "min_separation_factor": min_separation_factor, + "polish_steps": polish_steps, + "polish_strategy": "gradient" if getattr(surrogate, "smooth_gradient", True) else "coord_hop", + } + return X_out, info diff --git a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline old mode 100644 new mode 100755 index a0232a84f..59dd98c13 --- a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline +++ b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline @@ -42,15 +42,12 @@ parser.add_argument("--request-memory-marg",default=16384,type=int,help="Memory parser.add_argument("--puff-exe",default=None,help="similar to PUFF, but cannot be the same executable since format different") parser.add_argument("--puff-args",default=None,help=" argumetns") parser.add_argument("--puff-max-it",default=np.inf,type=int,help="Maximum iteration number that puffball is applied. default to infinity ") -<<<<<<< HEAD parser.add_argument("--tracer-only-marg", action='store_true', - help="Tracer-placement workflow: skip MARG (grid-X.dat) jobs on non-final iterations and only run MARG_PUFF (grid_puff-X.dat). Use only when --puff-exe points at a tracer-aware grid update tool (util_HyperparameterTracerUpdate.py / util_ParameterTracerUpdate.py). See workflow_tracer_only_marg.md in the parsimonious-placement project notes for the rationale.") + help="Tracer-placement workflow: on non-final iterations skip the regular MARG_* (grid-X.dat) jobs and rely only on the puff/tracer node to produce the next grid. The final --tracer-final-marg-iterations iterations still run MARG_* so the posterior diagnostic stays clean. Requires --puff-exe to point at a tracer-aware grid update tool (util_HyperparameterTracerUpdate.py / util_ParameterTracerUpdate.py). See workflow_tracer_only_marg.md.") parser.add_argument("--tracer-final-marg-iterations", type=int, default=1, - help="Number of trailing iterations on which MARG still runs even with --tracer-only-marg (needed for a clean posterior diagnostic on the final iteration). Default 1; set 0 to use only accumulated MARG_PUFF outputs throughout.") -======= -parser.add_argument("--tracer-only-marg",action='store_true',help="Parsimonious-placement workflow: only run MARG_PUFF (placement) jobs in intermediate iterations and skip MARG_* (posterior) jobs. The final --tracer-final-marg-iterations iterations still run MARG_* to produce a clean posterior. Cuts marginalisation cost roughly in half for N=5-6 iterations. Requires --puff-exe to be a tracer-aware updater (e.g. util_HyperparameterTracerUpdate.py) since the puff grid drives placement on its own.") -parser.add_argument("--tracer-final-marg-iterations",default=1,type=int,help="When --tracer-only-marg is set, the last N iterations still run MARG_* nodes for posterior generation. Default 1.") ->>>>>>> rift_O4d_parsimonious_v2 + help="Number of trailing iterations on which MARG still runs even with --tracer-only-marg. Default 1; set 0 to use only accumulated puff/tracer outputs throughout.") +parser.add_argument("--puff-input-source", default="posterior", choices=("posterior", "marg_net"), + help="Which file feeds the puff/tracer node. 'posterior' (default, legacy) reads grid-{k+1}.dat (EOS_POST posterior samples) and writes grid_puff-{k+1}.dat which MARG_PUFF then evaluates. 'marg_net' wires the puff/tracer node to consume all.marg_net (the cumulative ILE-evaluated (lambda, lnL, sigma) table) and write grid-{k+1}.dat directly, suppressing the MARG_PUFF lane entirely; use with --puff-exe util_*TracerUpdate.py.") parser.add_argument("--eos-post-args",default=None,help="filename of args_ile.txt file which holds ILE arguments. Should NOT conflict with arguments auto-set by this DAG ... in particular, i/o arguments will be modified") parser.add_argument("--eos-post-exe",default=None,help="filename of ILE or equivalent executable. Will default to `which integrate_likelihood_extrinsic` in low-level code") parser.add_argument("--eos-post-retries",default=0,type=int,help="Number of retry attempts for ILE jobs. (These can fail)") @@ -178,7 +175,12 @@ if opts.marg_event_args: puff_args=None puff_cadence = None puff_max_it = opts.puff_max_it -if opts.puff_args: +# Gate on --puff-exe: if the user did not provide a puff executable, we do not +# write puff/MARG_PUFF nodes at all. Holding --puff-args without --puff-exe is +# an obvious mistake; refuse to proceed silently. +if opts.puff_args and not opts.puff_exe: + sys.exit(" --puff-args given without --puff-exe: refusing to write puff nodes without a puff executable. Pass --puff-exe or drop --puff-args.") +if opts.puff_args and opts.puff_exe: puff_cadence = 1 # Load args.txt. Remove first item. Store with open(opts.puff_args) as f: @@ -192,6 +194,15 @@ if opts.puff_args: print("PUFF", puff_args) print("PUFF CADENCE", puff_cadence) +# Tracer-aware puff input plumbing. In 'marg_net' mode the puff/tracer node +# consumes all.marg_net (the cumulative ILE-evaluated table emitted by UNIFY) +# and writes grid-{k+1}.dat directly. EOS_POST output is redirected to +# posterior-{k+1}.dat for diagnostics, and the MARG_PUFF lane is suppressed -- +# the tracer's output IS the next iteration's grid. +puff_uses_marg_net = (opts.puff_input_source == "marg_net") and bool(puff_args) +if puff_uses_marg_net: + print("PUFF INPUT SOURCE: marg_net (tracer-aware grid update)") + test_args=None test_node_list = [] @@ -490,7 +501,13 @@ head -n 1 grid-0.dat > $2 cat $1/output*.dat | sort -r | uniq | shuf >> $2 """) os.system("chmod a+x join_post.sh") -join_post_job, join_post_job_name = dag_utils.write_convert_sub(exe=opts.working_directory+"/join_post.sh",tag='JOIN_POST',log_dir=None,arg_str='',file_input=opts.working_directory+"/iteration_$(macroiteration)_post/ "+opts.working_directory+"/grid-$(macroiterationnext).dat", out_dir=opts.working_directory,universe=local_worker_universe,no_grid=no_worker_grid) +# In tracer mode the tracer node owns grid-{k+1}.dat; EOS_POST output goes to a +# parallel posterior-{k+1}.dat file (diagnostic only). In legacy mode EOS_POST +# still produces grid-{k+1}.dat directly. +_join_post_output_path = opts.working_directory + ( + "/posterior-$(macroiterationnext).dat" if puff_uses_marg_net + else "/grid-$(macroiterationnext).dat") +join_post_job, join_post_job_name = dag_utils.write_convert_sub(exe=opts.working_directory+"/join_post.sh",tag='JOIN_POST',log_dir=None,arg_str='',file_input=opts.working_directory+"/iteration_$(macroiteration)_post/ "+_join_post_output_path, out_dir=opts.working_directory,universe=local_worker_universe,no_grid=no_worker_grid) join_post_job.add_condor_cmd("initialdir",opts.working_directory) join_post_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_con/logs/unify-$(macroevent).log") join_post_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_con/logs/unify-$(macroevent).out") @@ -520,7 +537,16 @@ if puff_args and puff_cadence: {} $@ """.format(extra_text,relevant_path)) os.system("chmod a+x local_puff.sh") - puff_job, puff_job_name = dag_utils.write_puff_sub(tag='PUFF',log_dir=None,arg_str=puff_args,request_memory=opts.request_memory_marg,input_net=opts.working_directory+'/grid-$(macroiterationnext).dat',output=opts.working_directory+'/grid_puff-$(macroiterationnext).dat',out_dir=opts.working_directory,exe=exe_here,universe=local_worker_universe,no_grid=no_worker_grid) + # Tracer mode: consume all.marg_net (ILE-evaluated table) and write + # grid-{k+1}.dat directly. Legacy mode: consume grid-{k+1}.dat (posterior + # samples from JOIN_POST) and write grid_puff-{k+1}.dat for MARG_PUFF. + if puff_uses_marg_net: + _puff_input_net = opts.working_directory + '/all.marg_net' + _puff_output = opts.working_directory + '/grid-$(macroiterationnext).dat' + else: + _puff_input_net = opts.working_directory + '/grid-$(macroiterationnext).dat' + _puff_output = opts.working_directory + '/grid_puff-$(macroiterationnext).dat' + puff_job, puff_job_name = dag_utils.write_puff_sub(tag='PUFF',log_dir=None,arg_str=puff_args,request_memory=opts.request_memory_marg,input_net=_puff_input_net,output=_puff_output,out_dir=opts.working_directory,exe=exe_here,universe=local_worker_universe,no_grid=no_worker_grid) # Modify: set 'initialdir' to CIP WORKING DIR puff_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_post") # Modify output argument: change logs and working directory to be subdirectory for the run @@ -619,7 +645,9 @@ for it in np.arange(it_start,opts.n_iterations): con_node.add_parent(cip_node) # consolidate depends on all of the individual jobs dag.add_node(cip_node) # if PUFF is done for this event, also add MARG_PUFF nodes. Note grid size assumed identical, so don't need another loop over 'id'/row - if puff_args and puff_cadence: + # In tracer mode (puff_uses_marg_net) the tracer writes grid-{k+1}.dat + # directly -- MARG already evaluates that grid, so no MARG_PUFF lane. + if puff_args and puff_cadence and not puff_uses_marg_net: if it>it_start and it <= puff_max_it and (it-1)%puff_cadence ==0: # we made a puffball last iteration, so run it through ILE now if opts.marg_event_args: cipPuff_node = pipeline.CondorDAGNode(cipPuff_job) @@ -696,10 +724,16 @@ for it in np.arange(it_start,opts.n_iterations): puff_node.add_macro("macroiterationnext", it+1) puff_node.set_category("PUFF") puff_node.set_retry(opts.general_retries) - if not (parent_fit_node is None): - puff_node.add_parent(parent_fit_node) # only fit if we have results from the previous iteration - dag.add_node(puff_node) - + if puff_uses_marg_net: + # Tracer reads all.marg_net (output of UNIFY). Run in parallel with + # EOS_POST -- the tracer's own grid output drives the next iteration, + # EOS_POST is now a downstream-free posterior diagnostic. + puff_node.add_parent(unify_node) + else: + if not (parent_fit_node is None): + puff_node.add_parent(parent_fit_node) # only fit if we have results from the previous iteration + dag.add_node(puff_node) + parent_fit_node = puff_node diff --git a/MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py b/MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py index bb5cb20d6..a7c2a1707 100755 --- a/MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py +++ b/MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py @@ -20,16 +20,22 @@ # same semantics. --update-method puffball reproduces puffball exactly. # # NEW -# --update-method {smc-mala-bd, smc-mala, birth-death, puffball} default smc-mala-bd +# --update-method {smc-mala-bd, smc-mala, birth-death, ucb, puffball} default smc-mala-bd # --tracer-fit-method {rf, rbf, quadratic, polynomial} default rf # --inj-file-prev OPTIONAL previous-iteration .dat (enables SMC bridging) # --no-union-refit opt out of union refit when --inj-file-prev is given # --n-mala-steps INT default 8 # --target-ess-frac FLOAT default 0.5 # --birth-death-rate FLOAT default 1.0 +# --ucb-kappa FLOAT default 2.0 (UCB exploration weight) +# --ucb-n-candidates INT default 20000 # --rng-seed INT deterministic when given # --state-in / --state-out tiny state file (~100 bytes) # +# --force-away K (inherited from puffball semantics): after the engine returns +# X_out, drop any point within Mahalanobis distance K of an already-kept point +# (covariance from the input grid, ridge-stabilized). K=0 disables. +# # I/O FORMAT # Input/output is the same as util_HyperparameterPuffball.py: a text file with a # leading "# lnL sigma_lnL p1 p2 ..." header line. Data rows are numeric. @@ -92,7 +98,7 @@ def build_parser(): p.add_argument("--regularize", action="store_true") # Tracer-specific p.add_argument("--update-method", - choices=("smc-mala-bd", "smc-mala", "birth-death", "puffball"), + choices=("smc-mala-bd", "smc-mala", "birth-death", "ucb", "puffball"), default="smc-mala-bd") p.add_argument("--tracer-fit-method", choices=("rf", "rbf", "quadratic", "polynomial"), @@ -103,12 +109,52 @@ def build_parser(): p.add_argument("--n-mala-steps", default=8, type=int) p.add_argument("--target-ess-frac", default=0.5, type=float) p.add_argument("--birth-death-rate", default=1.0, type=float) + # UCB (super-conservative GP-style placement) + p.add_argument("--ucb-kappa", default=2.0, type=float, + help="UCB exploration weight: score = mu(lambda) + kappa * sigma(lambda). " + "Larger => more explorative. Used only when --update-method ucb.") + p.add_argument("--ucb-n-candidates", default=20000, type=int, + help="Size of the candidate pool from which UCB greedily selects.") p.add_argument("--rng-seed", default=None, type=int) p.add_argument("--state-in", default=None) p.add_argument("--state-out", default=None) return p +# ---------------- Mahalanobis self-avoidance (--force-away) --------------- # + +def _force_away_decimate(X_kept, X_pool, cov, threshold): + """Greedy: keep first len(X_kept) rows of X_pool subject to a minimum + Mahalanobis distance `threshold` from every previously-kept row. Returns + (X_out, mask). X_kept is the preferred-order list; X_pool is the (possibly + larger) candidate set to fall back to when a preferred row is rejected. + Mirrors util_ParameterPuffball.py's --force-away semantics.""" + if threshold <= 0 or len(X_kept) == 0: + return X_kept, np.ones(len(X_kept), dtype=bool) + # Regularize cov for stability + cov_r = cov + 1e-10 * np.eye(cov.shape[0]) + icov = np.linalg.inv(cov_r) + target_n = len(X_kept) + queue = list(X_pool) # candidate list (input order matters: preferred first) + out = [] + for x in queue: + if not out: + out.append(x) + continue + diffs = np.asarray(out) - x + d2 = np.einsum("ij,jk,ik->i", diffs, icov, diffs) + if (d2 >= threshold ** 2).all(): + out.append(x) + if len(out) >= target_n: + break + if len(out) < target_n: + # Couldn't fill the budget under the constraint; relax to keep what we have. + sys.stderr.write( + f"util_HyperparameterTracerUpdate: --force-away {threshold} could only " + f"place {len(out)}/{target_n} points; returning the kept subset.\n") + return np.asarray(out), None + + # ----------------------- .dat <-> arrays ----------------------------------- # def _read_dat(path): @@ -225,13 +271,19 @@ def main(argv=None): with open(opts.state_in, "rb") as f: state = pickle.load(f) - sampler = { + sampler_map = { "smc-mala-bd": _tracer_samplers.smc_mala_bd, "smc-mala": _tracer_samplers.smc_mala, "birth-death": _tracer_samplers.birth_death, - }[method] - - X_out, info = sampler( + } + if hasattr(_tracer_samplers, "ucb_place"): + sampler_map["ucb"] = _tracer_samplers.ucb_place + if method not in sampler_map: + sys.exit(f"util_HyperparameterTracerUpdate: --update-method {method!r} " + f"not available in installed engine (have: {sorted(sampler_map)}).") + sampler = sampler_map[method] + + sampler_kw = dict( particles=X, surrogate=fit_now, surrogate_prev=fit_prev, @@ -242,6 +294,24 @@ def main(argv=None): birth_death_rate=opts.birth_death_rate, state=state, ) + if method == "ucb": + sampler_kw.update( + kappa=opts.ucb_kappa, + n_candidates=opts.ucb_n_candidates, + ) + X_out, info = sampler(**sampler_kw) + + # --- self-avoidance (--force-away) ----------------------------------- # + if opts.force_away and opts.force_away > 0 and len(X_out) > 1: + # Use the input grid's covariance as the Mahalanobis metric (stable, + # independent of where the engine moved particles to). + if X.shape[1] > 1: + cov_in = np.cov(X.T) + else: + cov_in = np.array([[float(X.std() ** 2) + 1e-12]]) + cov_in = np.atleast_2d(cov_in) + X_out, _ = _force_away_decimate( + X_kept=X_out, X_pool=X_out, cov=cov_in, threshold=opts.force_away) if opts.state_out: with open(opts.state_out, "wb") as f: From 2eaeb1713317fd09e47e3e329d6a4313dbeb4b76 Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Fri, 15 May 2026 10:51:10 -0400 Subject: [PATCH 11/24] parsimonious placement: convergence tests + full Hydra wiring Two related additions on top of commit 04 (pathway fix + UCB): (1) The convergence test (convergence_test_samples.py) is now wired up correctly in tracer mode. In legacy puff mode it compared grid-{k+1}.dat vs grid-{k}.dat -- both held posterior samples back then. In tracer mode grid-{k}.dat is a *placement* grid (lnL columns zero); the actual posterior lives in posterior-{k}.dat. The test now points at posterior-{k}.dat / posterior-{k+1}.dat in tracer mode automatically. No new CLI flag needed; the routing follows --puff-input-source. (2) The Hydra driver (util_RIFT_hyperpipe.py) now forwards every tracer- and convergence-related flag that commit 03's message promised but did not actually deliver: arch.tracer-only-marg -> --tracer-only-marg arch.tracer-final-marg-iterations -> --tracer-final-marg-iterations puff.input-source -> --puff-input-source puff.settings.update-method -> --update-method puff.settings.tracer-fit-method -> --tracer-fit-method puff.settings.ucb-kappa -> --ucb-kappa puff.settings.ucb-n-candidates -> --ucb-n-candidates puff.settings.n-mala-steps -> --n-mala-steps puff.settings.target-ess-frac -> --target-ess-frac puff.settings.birth-death-rate -> --birth-death-rate puff.settings.inj-file-prev -> --inj-file-prev puff.settings.rng-seed -> --rng-seed puff.settings.{state-in,state-out} -> --state-{in,out} puff.settings.no-union-refit -> --no-union-refit (bool) puff.settings.regularize -> --regularize (bool) test.settings.iteration-threshold -> --iteration-threshold test.settings.write-file-on-success-> --write-file-on-success test.settings.test-output -> --test-output test.settings.always-succeed -> --always-succeed (bool) test.settings.verbose -> --verbose (bool) Defaults are null/false throughout so legacy configs are byte- identical in their generated DAG. ---- create_eos_posterior_pipeline ---------------------------------- In tracer mode (puff_uses_marg_net), point the convergence test's --samples files at posterior-{k}.dat / posterior-{k+1}.dat instead of grid-{k}.dat / grid-{k+1}.dat. The DAG topology is otherwise unchanged: test_node is still inserted between JOIN_POST and the puff/tracer node, so it depends on the EOS_POST output (which is what we're checking the convergence of) and gates the next placement. ---- util_RIFT_hyperpipe.py ----------------------------------------- * _build_puff_args(): consume an optional puff.settings: sub-block, mirroring the existing post.settings: pattern. Null values are skipped; bool toggles use hyper_config.truthy(). All keys are additive (legacy puffball ignores them). * _build_test_args(): consume an optional test.settings: sub-block. Same pattern. * cmd_parts assembly: append --tracer-only-marg, --tracer-final-marg-iterations, and --puff-input-source from the arch and puff blocks. All conditional on the user setting the corresponding key (null = omit the flag, preserving legacy behavior). ---- hyperpipe_conf.yaml -------------------------------------------- Schema additions (all null/false defaults): arch.tracer-only-marg arch.tracer-final-marg-iterations puff.input-source puff.settings (12 keys, see commit message body) test.settings (5 keys) ---- demo/hyperpipe/hyperpipe_conf_tracer.yaml (new) --------------- A worked example showing the tracer workflow end-to-end. Uses util_HyperparameterTracerUpdate.py with update-method=smc-mala-bd and puff.input-source=marg_net. Includes the convergence test with always-succeed=true (keep DAG running through full diagnostic sweep). Run: cd MonteCarloMarginalizeCode/Code/demo/hyperpipe util_RIFT_hyperpipe.py --config-name hyperpipe_conf_tracer Override for UCB: util_RIFT_hyperpipe.py --config-name hyperpipe_conf_tracer \ puff.settings.update-method=ucb puff.settings.ucb-kappa=2.0 ---- testing -------------------------------------------------------- * _build_puff_args / _build_test_args verified by direct invocation with three configs (legacy / tracer-smc / tracer-ucb): each emits the exact flag set expected, no extraneous flags, legacy unchanged. * hyperpipe_conf.yaml + hyperpipe_conf_tracer.yaml parse as valid YAML. * util_RIFT_hyperpipe.py + create_eos_posterior_pipeline AST-clean. * Full DAG generation could not be smoke-tested in the dev sandbox (scipy not available); for verification on host, the demo Makefile in rift_integration/demo_hyperpipe_tracer also gained a --test-args plumbing in this same edit pass (project-side, not in this commit) plus `make convergence` and `make cross-compare` post-processing targets. --- .../Code/bin/create_eos_posterior_pipeline | 12 ++++++++- .../Code/bin/hyperpipe_conf.yaml | 25 +++++++++++++++++- .../Code/bin/util_RIFT_hyperpipe.py | 26 +++++++++++++++++++ 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline index 59dd98c13..f811833df 100755 --- a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline +++ b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline @@ -432,7 +432,17 @@ con_prod_job.write_sub_file() if opts.test_args: - test_job, test_job_name = dag_utils.write_test_sub(tag='test',log_dir=None,arg_str=test_args,samples_files=[ opts.working_directory+'/grid-$(macroiteration).dat', opts.working_directory+'/grid-$(macroiterationlast).dat'] ,out_dir=opts.working_directory,exe=opts.test_exe,universe=local_worker_universe,no_grid=no_worker_grid) + # In tracer mode (puff_uses_marg_net) the file grid-{k}.dat is a placement + # grid (lnL columns zero), and the true iteration posterior lives in + # posterior-{k}.dat. So the convergence test must compare posterior + # samples, not placement grids. + if puff_uses_marg_net: + _test_curr = opts.working_directory + '/posterior-$(macroiteration).dat' + _test_prev = opts.working_directory + '/posterior-$(macroiterationlast).dat' + else: + _test_curr = opts.working_directory + '/grid-$(macroiteration).dat' + _test_prev = opts.working_directory + '/grid-$(macroiterationlast).dat' + test_job, test_job_name = dag_utils.write_test_sub(tag='test',log_dir=None,arg_str=test_args,samples_files=[_test_curr, _test_prev] ,out_dir=opts.working_directory,exe=opts.test_exe,universe=local_worker_universe,no_grid=no_worker_grid) # Modify: set 'initialdir' test_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_test") # Modify output argument: change logs and working directory to be subdirectory for the run diff --git a/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml b/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml index 575981d6c..4cfc0cd76 100644 --- a/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml +++ b/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml @@ -59,12 +59,24 @@ puff: puff-factor: 0.5 force-away: 0.03 extra-args: "" + # Where the puff/tracer node reads its input from. + # posterior : (legacy) reads grid-{k+1}.dat, writes grid_puff-{k+1}.dat + # -> MARG_PUFF re-evaluates the puffed grid. The puffball + # does not need lnL inputs, so this is the legacy mode. + # marg_net : (tracer) reads all.marg_net, writes grid-{k+1}.dat + # directly. EOS_POST output is redirected to + # posterior-{k+1}.dat (diagnostic only) and the MARG_PUFF + # lane is suppressed entirely. Use with a tracer-aware + # puff.exe (util_HyperparameterTracerUpdate.py). + input-source: null # null = pipeline default (posterior) # Tracer-placement sampler hyperparameters (only used when exe is a # tracer-aware updater; ignored by the legacy util_HyperparameterPuffball.py). # Leave keys null to take the updater's built-in defaults. settings: - update-method: null # smc-mala-bd | smc-mala | birth-death | puffball + update-method: null # smc-mala-bd | smc-mala | birth-death | ucb | puffball tracer-fit-method: null # rf | rbf | polynomial | quadratic + ucb-kappa: null # UCB exploration weight (default 2.0) + ucb-n-candidates: null # UCB candidate pool size (default 20000) n-mala-steps: null # int target-ess-frac: null # float in (0,1] birth-death-rate: null # float @@ -80,6 +92,17 @@ test: method: JS threshold: 0.05 extra-args: "" + # Convergence-test diagnostics. All null/false by default. The pipeline + # already feeds --samples grid-{k}.dat and grid-{k+1}.dat (or, in tracer + # mode, posterior-{k}.dat and posterior-{k+1}.dat) and --parameter from + # the coord spec; these are extras you'd typically set when monitoring + # iteration-to-iteration agreement. + settings: + iteration-threshold: null # skip test until iteration >= N (default 0) + write-file-on-success: null # path to touch when convergence threshold met + test-output: null # per-iteration diagnostic CSV + always-succeed: false # keep DAG running past convergence (diagnostic-only mode) + verbose: false init: file: null diff --git a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py index 42ef14341..bc8a16edf 100644 --- a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py +++ b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py @@ -228,6 +228,8 @@ def _build_puff_args(cfg, coord_spec) -> str: setting_flags = [ ("update-method", "--update-method"), ("tracer-fit-method", "--tracer-fit-method"), + ("ucb-kappa", "--ucb-kappa"), + ("ucb-n-candidates", "--ucb-n-candidates"), ("n-mala-steps", "--n-mala-steps"), ("target-ess-frac", "--target-ess-frac"), ("birth-death-rate", "--birth-death-rate"), @@ -260,6 +262,25 @@ def _build_test_args(cfg, coord_spec) -> str: method=_cfg_get(test, "method", "JS"), threshold=_cfg_get(test, "threshold", 0.05), ) + # test.settings: forwards convergence_test_samples.py-specific flags. All + # nullable -- omitted keys produce no flag, mirroring post.settings. + settings = _cfg_get(test, "settings") or {} + setting_flags = [ + ("iteration-threshold", "--iteration-threshold"), + ("write-file-on-success", "--write-file-on-success"), + ("test-output", "--test-output"), + ] + for key, flag in setting_flags: + val = _cfg_get(settings, key) + if val is not None and val != "": + args += f" {flag} {val}" + bool_flags = [ + ("always-succeed", "--always-succeed"), + ("verbose", "--verbose"), + ] + for key, flag in bool_flags: + if hyper_config.truthy(_cfg_get(settings, key, False)): + args += f" {flag}" extra = (_cfg_get(test, "extra-args") or "").strip() if extra: args += " " + extra @@ -382,6 +403,11 @@ def my_app(cfg: DictConfig) -> None: n_final = _cfg_get(arch, "tracer-final-marg-iterations") if n_final is not None: cmd_parts.append(f"--tracer-final-marg-iterations {int(n_final)}") + # tracer-aware puff input source (posterior|marg_net). Default posterior + # keeps the legacy puffball path byte-identical. + puff_input_source = _cfg_get(_cfg_get(cfg, "puff"), "input-source") + if puff_input_source: + cmd_parts.append(f"--puff-input-source {puff_input_source}") # general bool_flag_pairs = [ From 5166cc86a7ed76712dca07889cdf51b1031a0d72 Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Fri, 15 May 2026 10:51:16 -0400 Subject: [PATCH 12/24] util_RIFT_hyperpipe.py: --config PATH shim for user-supplied config files Hydra's native CLI requires the user to know about, and supply, two separate flags (``--config-dir DIR --config-name NAME``) just to point the driver at a config file that lives anywhere other than the installed scripts directory. Worse, the second flag rejects a ``.yaml`` extension (Hydra appends one itself), so the obvious ``--config-name myconf.yaml`` fails with a confusing "Cannot find primary config" error. This commit makes the driver accept what users actually expect: a single ``--config PATH`` argument that just works, anywhere on disk. If you pass util_RIFT_hyperpipe.py --config /full/path/to/my_run.yaml ... the new shim translates the path into the equivalent of --config-dir=/full/path/to --config-name=my_run before Hydra parses sys.argv, and the file is loaded. Cases handled: * Absolute path with .yaml: --config /tmp/run.yaml * Relative path with .yaml: --config ./d/run.yaml * Relative path with .yml: --config ./d/run.yml (or -c) * Equals form: --config=/tmp/run.yaml * Short form: -c /tmp/run.yaml * Bare name (no path): --config myconf falls through to Hydra's default search path -- equivalent to --config-name myconf. * User-mistake .yaml suffix: --config-name myconf.yaml the trailing .yaml is stripped automatically. * Hydra-native flags: --config-dir=/d --config-name=foo pass through untouched; the shim is idempotent. Error handling: if the resolved path does not exist (case A--C--D), fail with a clear message naming the file we looked for, instead of letting Hydra emit its generic "Cannot find primary config" search-path dump. ---- demo/hyperpipe/hyperpipe_conf_tracer.yaml ---------------------- Updated the header comment to recommend the new shim: util_RIFT_hyperpipe.py --config /path/to/hyperpipe_conf_tracer.yaml instead of the old --config-name advice that only worked if the file was shipped in bin/. ---- bin/hyperpipe_conf_tracer.yaml --------------------------------- A previous step (rev'd locally; never released in a commit) also copied hyperpipe_conf_tracer.yaml into bin/ so Hydra's default search path would find it. With this shim that copy is redundant and misleading (configs are user-owned, not install-shipped); the rollup script removes the bin/ copy if present. The canonical documentation copy stays under demo/hyperpipe/. ---- testing -------------------------------------------------------- The shim was exercised on 10 inputs (absolute, relative, .yaml/.yml extension, short form, equals form, bare name, --config-name with .yaml stripped, missing-file error, Hydra-native pass-through, mixed shim + override). All produced the expected argv translation. util_RIFT_hyperpipe.py AST-parses cleanly. --- .../Code/bin/util_RIFT_hyperpipe.py | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py index bc8a16edf..099888c3d 100644 --- a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py +++ b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py @@ -46,6 +46,20 @@ general.use-osg=true general.use-singularity=true \\ general.condor-local-nonworker=true +Point at a config file living anywhere on disk (no install / no +``--config-dir`` plumbing needed):: + + util_RIFT_hyperpipe.py --config ./my_tracer.yaml + util_RIFT_hyperpipe.py -c /full/path/to/run_config.yaml \\ + puff.settings.update-method=ucb + +The ``--config PATH`` shim accepts an absolute or relative path and +splits it into the ``--config-dir`` / ``--config-name`` pair Hydra +expects. Trailing ``.yaml`` / ``.yml`` extensions are stripped +automatically (so ``--config-name foo.yaml`` works too). A bare name +falls through to Hydra's normal search path (the installed +``hyperpipe_conf.yaml`` next to the script). + Implementation notes -------------------- The real work lives in :mod:`RIFT.hyperpipe`: @@ -449,5 +463,99 @@ def my_app(cfg: DictConfig) -> None: ) +def _preprocess_config_args(argv): + """User-friendly --config shim translated into Hydra's --config-dir / --config-name. + + Hydra's native CLI requires two flags (``--config-dir DIR --config-name NAME``) + and the config-name must be bare (no ``.yaml`` extension). Users who pass a full + path to a config file expect the script to "just use it". This shim accepts: + + --config /full/path/to/myconf.yaml + --config myconf.yaml (resolved against CWD) + --config myconf (Hydra default search path) + -c /full/path/to/myconf.yaml (short form) + + and rewrites argv so Hydra sees the equivalent of:: + + --config-dir /full/path/to --config-name myconf + + Also strips a trailing ``.yaml`` / ``.yml`` from ``--config-name=...`` / + ``--config-name foo.yaml`` (the most common user-mistake from Hydra's docs). + + Fails fast with a clear error if the resolved file does not exist. + """ + out = [] + i = 0 + n = len(argv) + + def _strip_ext(name): + for ext in (".yaml", ".yml"): + if name.endswith(ext): + return name[: -len(ext)] + return name + + def _split_path_or_name(raw): + """Given what the user passed, return (dir_or_None, bare_name).""" + if os.sep in raw or raw.startswith(".") or raw.endswith(".yaml") or raw.endswith(".yml"): + # path-shaped or extension-bearing + abs_path = os.path.abspath(raw) + d = os.path.dirname(abs_path) + base = os.path.basename(abs_path) + return d, _strip_ext(base) + # bare name -> leave dir alone (use built-in search path) + return None, _strip_ext(raw) + + def _emit(dirpart, namepart, raw): + # Validate existence when we have a directory commitment + if dirpart is not None: + full = os.path.join(dirpart, namepart + ".yaml") + full_yml = os.path.join(dirpart, namepart + ".yml") + if not (os.path.isfile(full) or os.path.isfile(full_yml)): + sys.stderr.write( + f"util_RIFT_hyperpipe.py: config file not found from --config {raw!r}\n" + f" looked for: {full}\n" + f" or: {full_yml}\n" + ) + sys.exit(2) + out.append(f"--config-dir={dirpart}") + out.append(f"--config-name={namepart}") + + while i < n: + a = argv[i] + # Long form: --config X or --config=X + if a == "--config" or a == "-c": + if i + 1 >= n: + sys.stderr.write("util_RIFT_hyperpipe.py: --config requires an argument\n") + sys.exit(2) + raw = argv[i + 1] + d, name = _split_path_or_name(raw) + _emit(d, name, raw) + i += 2 + continue + if a.startswith("--config=") or a.startswith("-c="): + raw = a.split("=", 1)[1] + d, name = _split_path_or_name(raw) + _emit(d, name, raw) + i += 1 + continue + # Belt-and-braces: if the user passed --config-name foo.yaml, strip the ext. + if a == "--config-name" and i + 1 < n: + out.append(a) + out.append(_strip_ext(argv[i + 1])) + i += 2 + continue + if a.startswith("--config-name="): + out.append("--config-name=" + _strip_ext(a.split("=", 1)[1])) + i += 1 + continue + out.append(a) + i += 1 + return out + + if __name__ == "__main__": + # Translate any user-friendly --config PATH into Hydra's native flags + # before @hydra.main parses sys.argv. Idempotent: passing Hydra's + # native --config-dir / --config-name through unchanged still works. + sys.argv = [sys.argv[0]] + _preprocess_config_args(sys.argv[1:]) my_app() From 011129118fa9035f928df48ff489aed722bbe230 Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Fri, 15 May 2026 10:56:28 -0400 Subject: [PATCH 13/24] minor hyperpipe demo doc --- MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile | 2 +- MonteCarloMarginalizeCode/Code/demo/hyperpipe/README.md | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile index 5e61769e9..091349aed 100644 --- a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile @@ -138,4 +138,4 @@ Gaussian_adaptive_bimodal: # ALTERNATIVE PIPELINE IMPLEMENTATION # - This approach has configuration in 'hyperpipe_conf.yaml' rundir: - python util_RIFT_hyperpipe.py + util_RIFT_hyperpipe.py diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/README.md b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/README.md index c67e8ecaa..89e0c1bbb 100644 --- a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/README.md +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/README.md @@ -6,3 +6,9 @@ For more details, see the auto-generated documentation [here](https://rift-docu Similar content is also available in the supplementary document 'technical_doc.txt' for a pedagogical overview of the proceedure and implementation of the pipeline. +# Hyperpipe hydra pipeline writer +New tool +``` +util_RIFT_hyperpipe.py --config-name hyperpipe_conf_tracer.yaml +``` + From 5144858f57ce86028aa018ea85ec969235c657da Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Fri, 15 May 2026 13:26:49 -0400 Subject: [PATCH 14/24] util_RIFT_hyperpipe.py: prepend dummy token to args_{post,puff,test}.txt Why --- create_eos_posterior_pipeline reads --eos-post-args, --puff-args, and --test-args via: args = ' '.join(file_contents) args = ' '.join(args.split(' ')[1:]) # <-- DROPS FIRST TOKEN This is the same first-token-drop convention used by the legacy CIP / EOS pipeline driver (where the first token is conventionally the executable name). The hyperpipe driver was writing real flags as the first token, so create_eos_posterior_pipeline silently ate them: args_test.txt: "--parameter x ..." -> pipeline sees "x ..." args_eos_post.txt: "--parameter x ..." -> pipeline sees "x ..." args_puff.txt: "--force-away 0.25 ..." -> pipeline sees "0.25 ..." For args_test.txt this manifested as convergence_test_samples receiving its first --parameter eaten, breaking the JS / lame comparison setup. Fix --- Prepend a single dummy character "X" to each of the three args files at write time. Done at the write site (not inside _build_*_args) because the dummy is a pipeline-transport concern, not a logical-args concern. The marg-list args file is *not* touched: create_eos_posterior_pipeline reads it via --marg-event-args-list-file, which preserves each line verbatim (no split[1:]). Files ----- M MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py --- .../Code/bin/util_RIFT_hyperpipe.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py index 099888c3d..8bc8c92b7 100644 --- a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py +++ b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py @@ -351,12 +351,18 @@ def my_app(cfg: DictConfig) -> None: marg.write_exe_file(args_marg_exe_path) marg.write_nchunk_file(event_nchunk_path) + # create_eos_posterior_pipeline reads these via --eos-post-args / + # --puff-args / --test-args and discards the first whitespace-separated + # token (see split(' ')[1:] in create_eos_posterior_pipeline). We prepend + # a dummy "X" so the real first flag survives. The marg-list args file is + # read line-by-line and does NOT get this treatment. + _PIPELINE_DUMMY = "X" with open(args_post_path, "w") as f: - f.write(_build_post_args(cfg, coord_spec) + "\n") + f.write(_PIPELINE_DUMMY + " " + _build_post_args(cfg, coord_spec) + "\n") with open(args_puff_path, "w") as f: - f.write(_build_puff_args(cfg, coord_spec) + "\n") + f.write(_PIPELINE_DUMMY + " " + _build_puff_args(cfg, coord_spec) + "\n") with open(args_test_path, "w") as f: - f.write(_build_test_args(cfg, coord_spec) + "\n") + f.write(_PIPELINE_DUMMY + " " + _build_test_args(cfg, coord_spec) + "\n") # ----- initial grid --------------------------------------------------- initial_grid = _maybe_generate_initial_grid(cfg["init"], base_dir, run_dir) From 6e59eb3d64314885fad991919cbb0a63ca76195a Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Fri, 15 May 2026 14:00:11 -0400 Subject: [PATCH 15/24] parsimonious placement: MARG every iteration, kill MARG_PUFF cleanly The fix ------- Earlier commits implemented tracer mode backwards: they skipped MARG on intermediate iterations and kept MARG_PUFF, when the intent is the opposite -- MARG should run every iteration so the tracer always has a fresh all.marg_net to consume, and the MARG_PUFF lane (the redundant evaluation of a separately-puffed grid) is what should be suppressed. This commit: * create_eos_posterior_pipeline: - Deletes the _skip_marg_this_iter block. MARG_* nodes are added on every iteration. - Wraps both MARG_PUFF .sub-file writers (single-event and multi-event paths) in `if puff_args and not puff_uses_marg_net` so MARG_PUFF leaves no trace in tracer mode (.sub file or DAG node). - --tracer-only-marg / --tracer-final-marg-iterations kept as deprecated no-ops for back-compat. Help text updated. * bin/hyperpipe_conf.yaml: - Drops the arch.tracer-only-marg + arch.tracer-final-marg-iterations keys. * bin/util_RIFT_hyperpipe.py: - Drops the cmd_parts plumbing for the two deprecated flags. The driver no longer forwards --tracer-only-marg / --tracer-final- marg-iterations even if a user still sets them in yaml. * demo/hyperpipe/hyperpipe_conf_tracer.yaml: - Removes arch.tracer-only-marg + arch.tracer-final-marg-iterations. - Adds puff.input-source: marg_net (the actual tracer-pathway switch -- without this the pipeline falls back to legacy posterior wiring + MARG_PUFF lane). - Adds general.request-memory: 200 (toy demo footprint; the 16 GB GW-PE default was way too heavy for a 3-D Gaussian). - Updates the example CLI to use the new --config PATH shim. - Rewrites the header comment to describe the actual workflow: MARG every iteration; tracer consumes all.marg_net. Why the symptom went away ------------------------- Before: "tracer-only-marg: skipping MARG_* nodes for iteration ..." was printed every intermediate iteration, then MARG_PUFF nodes tried to read grid_puff-{k}.dat -- which was never written because PUFF either did not run (no --puff-input-source marg_net) or wrote elsewhere. After: MARG runs every iteration -> all.marg_net is always populated -> tracer consumes it -> writes grid-{k+1}.dat directly. No MARG_PUFF anywhere. Files ----- M MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline M MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml M MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py M MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf_tracer.yaml --- .../Code/bin/create_eos_posterior_pipeline | 105 ++++++++---------- .../Code/bin/hyperpipe_conf.yaml | 7 -- .../Code/bin/util_RIFT_hyperpipe.py | 8 -- .../demo/hyperpipe/hyperpipe_conf_tracer.yaml | 34 ++++-- 4 files changed, 71 insertions(+), 83 deletions(-) diff --git a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline index f811833df..aaaf6d7c6 100755 --- a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline +++ b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline @@ -43,9 +43,9 @@ parser.add_argument("--puff-exe",default=None,help="similar to PUFF, but cannot parser.add_argument("--puff-args",default=None,help=" argumetns") parser.add_argument("--puff-max-it",default=np.inf,type=int,help="Maximum iteration number that puffball is applied. default to infinity ") parser.add_argument("--tracer-only-marg", action='store_true', - help="Tracer-placement workflow: on non-final iterations skip the regular MARG_* (grid-X.dat) jobs and rely only on the puff/tracer node to produce the next grid. The final --tracer-final-marg-iterations iterations still run MARG_* so the posterior diagnostic stays clean. Requires --puff-exe to point at a tracer-aware grid update tool (util_HyperparameterTracerUpdate.py / util_ParameterTracerUpdate.py). See workflow_tracer_only_marg.md.") + help="DEPRECATED no-op. Earlier versions used this flag to skip MARG_* on intermediate iterations; that was wrong -- MARG should run every iteration so the tracer has fresh all.marg_net to consume. Use --puff-input-source marg_net to suppress the redundant MARG_PUFF lane instead.") parser.add_argument("--tracer-final-marg-iterations", type=int, default=1, - help="Number of trailing iterations on which MARG still runs even with --tracer-only-marg. Default 1; set 0 to use only accumulated puff/tracer outputs throughout.") + help="DEPRECATED no-op. See --tracer-only-marg.") parser.add_argument("--puff-input-source", default="posterior", choices=("posterior", "marg_net"), help="Which file feeds the puff/tracer node. 'posterior' (default, legacy) reads grid-{k+1}.dat (EOS_POST posterior samples) and writes grid_puff-{k+1}.dat which MARG_PUFF then evaluates. 'marg_net' wires the puff/tracer node to consume all.marg_net (the cumulative ILE-evaluated (lambda, lnL, sigma) table) and write grid-{k+1}.dat directly, suppressing the MARG_PUFF lane entirely; use with --puff-exe util_*TracerUpdate.py.") parser.add_argument("--eos-post-args",default=None,help="filename of args_ile.txt file which holds ILE arguments. Should NOT conflict with arguments auto-set by this DAG ... in particular, i/o arguments will be modified") @@ -317,19 +317,22 @@ if opts.marg_event_args: cip_job.set_sub_file(fname) cip_job.write_sub_file() - # PUFF variant - cipPuff_job, cipPuff_job_name = dag_utils.write_CIP_sub(tag='MARG_PUFF',log_dir=None,arg_str=marg_event_args,using_eos='file:'+working_dir_inside+"/grid_puff-$(macroiteration).dat",using_eos_index='$(macroevent)',request_memory=opts.request_memory_marg,input_net=working_dir_inside+'/event-$(macroid).net',output='MARG_puff-$(macroid)-$(macroevent)',out_dir=out_dir_inside_marg,exe=marg_exe_master,universe=main_worker_universe,no_grid=not(opts.use_osg),use_osg=opts.use_osg,use_singularity=opts.use_osg and opts.use_singularity,singularity_image=singularity_image,use_simple_osg_requirements=opts.use_osg,transfer_files=['../../event-$(macroid).net']+transfer_file_names_puff) - # Modify: set 'initialdir' - cipPuff_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_marg/event_$(macroid)") - # Modify output argument: change logs and working directory to be subdirectory for the run - cipPuff_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(cluster)-$(process).log") - cipPuff_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(cluster)-$(process).err") - cipPuff_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(cluster)-$(process).out") - cipPuff_job.add_condor_cmd('request_disk',opts.general_request_disk) - if opts.use_full_submit_paths: - fname = opts.working_directory+"/"+cipPuff_job.get_sub_file() - cipPuff_job.set_sub_file(fname) - cipPuff_job.write_sub_file() + # PUFF variant -- only emit when MARG_PUFF will actually be added to the + # DAG (legacy posterior-mode wiring). In tracer mode (puff_uses_marg_net) + # the tracer writes grid-{k+1}.dat directly and MARG_PUFF is redundant. + if puff_args and not puff_uses_marg_net: + cipPuff_job, cipPuff_job_name = dag_utils.write_CIP_sub(tag='MARG_PUFF',log_dir=None,arg_str=marg_event_args,using_eos='file:'+working_dir_inside+"/grid_puff-$(macroiteration).dat",using_eos_index='$(macroevent)',request_memory=opts.request_memory_marg,input_net=working_dir_inside+'/event-$(macroid).net',output='MARG_puff-$(macroid)-$(macroevent)',out_dir=out_dir_inside_marg,exe=marg_exe_master,universe=main_worker_universe,no_grid=not(opts.use_osg),use_osg=opts.use_osg,use_singularity=opts.use_osg and opts.use_singularity,singularity_image=singularity_image,use_simple_osg_requirements=opts.use_osg,transfer_files=['../../event-$(macroid).net']+transfer_file_names_puff) + # Modify: set 'initialdir' + cipPuff_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_marg/event_$(macroid)") + # Modify output argument: change logs and working directory to be subdirectory for the run + cipPuff_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(cluster)-$(process).log") + cipPuff_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(cluster)-$(process).err") + cipPuff_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(cluster)-$(process).out") + cipPuff_job.add_condor_cmd('request_disk',opts.general_request_disk) + if opts.use_full_submit_paths: + fname = opts.working_directory+"/"+cipPuff_job.get_sub_file() + cipPuff_job.set_sub_file(fname) + cipPuff_job.write_sub_file() else: for indx in np.arange(len(marg_event_args_list)): @@ -348,20 +351,23 @@ else: cip_job.write_sub_file() cip_job_list.append(cip_job) - # PUFF variant - cipPuff_job, cipPuff_job_name = dag_utils.write_CIP_sub(tag='MARG_PUFF_{}'.format(indx),log_dir=None,exe=marg_event_exe_list[indx],arg_str=marg_event_args_list[indx],using_eos='file:'+working_dir_inside+"/grid_puff-$(macroiteration).dat",using_eos_index='$(macroevent)',n_events_to_analyze=n_chunk_here,request_memory=opts.request_memory_marg,input_net=working_dir_inside+'/event-$(macroid).net',output='MARG_puff-$(macroid)-$(macroevent)',out_dir=out_dir_inside_marg,universe=main_worker_universe,no_grid=not(opts.use_osg),use_osg=opts.use_osg,use_singularity=opts.use_osg and opts.use_singularity,singularity_image=singularity_image,use_simple_osg_requirements=opts.use_osg,transfer_files=['../../event-$(macroid).net','../../grid_puff-$(macroiteration).dat']+transfer_file_names_puff) - # Modify: set 'initialdir' - cipPuff_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_marg/event_$(macroid)") - # Modify output argument: change logs and working directory to be subdirectory for the run - cipPuff_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(macroid)-$(cluster)-$(process).log") - cipPuff_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(macroid)-$(cluster)-$(process).err") - cipPuff_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(macroid)-$(cluster)-$(process).out") - cipPuff_job.add_condor_cmd('request_disk',opts.general_request_disk) - if opts.use_full_submit_paths: - fname = opts.working_directory+"/"+cipPuff_job.get_sub_file() - cipPuff_job.set_sub_file(fname) - cipPuff_job.write_sub_file() - cip_puff_job_list.append(cipPuff_job) + # PUFF variant -- only emit when MARG_PUFF will actually be added to + # the DAG (legacy posterior-mode wiring). In tracer mode the tracer + # writes grid-{k+1}.dat directly and MARG_PUFF is redundant. + if puff_args and not puff_uses_marg_net: + cipPuff_job, cipPuff_job_name = dag_utils.write_CIP_sub(tag='MARG_PUFF_{}'.format(indx),log_dir=None,exe=marg_event_exe_list[indx],arg_str=marg_event_args_list[indx],using_eos='file:'+working_dir_inside+"/grid_puff-$(macroiteration).dat",using_eos_index='$(macroevent)',n_events_to_analyze=n_chunk_here,request_memory=opts.request_memory_marg,input_net=working_dir_inside+'/event-$(macroid).net',output='MARG_puff-$(macroid)-$(macroevent)',out_dir=out_dir_inside_marg,universe=main_worker_universe,no_grid=not(opts.use_osg),use_osg=opts.use_osg,use_singularity=opts.use_osg and opts.use_singularity,singularity_image=singularity_image,use_simple_osg_requirements=opts.use_osg,transfer_files=['../../event-$(macroid).net','../../grid_puff-$(macroiteration).dat']+transfer_file_names_puff) + # Modify: set 'initialdir' + cipPuff_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_marg/event_$(macroid)") + # Modify output argument: change logs and working directory to be subdirectory for the run + cipPuff_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(macroid)-$(cluster)-$(process).log") + cipPuff_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(macroid)-$(cluster)-$(process).err") + cipPuff_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(macroid)-$(cluster)-$(process).out") + cipPuff_job.add_condor_cmd('request_disk',opts.general_request_disk) + if opts.use_full_submit_paths: + fname = opts.working_directory+"/"+cipPuff_job.get_sub_file() + cipPuff_job.set_sub_file(fname) + cipPuff_job.write_sub_file() + cip_puff_job_list.append(cipPuff_job) @@ -626,34 +632,21 @@ for it in np.arange(it_start,opts.n_iterations): con_node.add_macro("macroiteration",it) con_node.add_macro("macroevent",int(event)) con_node.set_retry(opts.general_retries) - # --- tracer-only-marg gate --------------------------------------------- - # Skip the regular MARG_* node (grid-X.dat) on non-final iterations when the - # user has opted into tracer-only mode. Iteration 0 must still run MARG since - # no grid_puff-0.dat exists yet, and the last --tracer-final-marg-iterations - # iterations run MARG so the final-posterior diagnostic stays clean. - _n_final_marg = max(0, int(getattr(opts, "tracer_final_marg_iterations", 1))) - _skip_marg_this_iter = ( - getattr(opts, "tracer_only_marg", False) - and it > it_start - and it < (opts.n_iterations - _n_final_marg) - ) - if _skip_marg_this_iter: - print(" tracer-only-marg: skipping MARG_* nodes for iteration", it) for row in np.arange(indx_max_event): #np.arange(n_jobs_this_time): - # Add task per MARG operation - if not _skip_marg_this_iter: - if opts.marg_event_args: - cip_node = pipeline.CondorDAGNode(cip_job) - else: - cip_node = pipeline.CondorDAGNode(cip_job_list[event]) - cip_node.set_retry(opts.general_retries) - cip_node.add_macro("macroid",event) - cip_node.add_macro("macroevent", int(row*n_chunk_here)) - cip_node.add_macro("macroiteration", it) - if not(parent_fit_node is None): - cip_node.add_parent(parent_fit_node) - con_node.add_parent(cip_node) # consolidate depends on all of the individual jobs - dag.add_node(cip_node) + # Add task per MARG operation. MARG runs every iteration; the + # tracer-vs-legacy choice only affects the MARG_PUFF lane below. + if opts.marg_event_args: + cip_node = pipeline.CondorDAGNode(cip_job) + else: + cip_node = pipeline.CondorDAGNode(cip_job_list[event]) + cip_node.set_retry(opts.general_retries) + cip_node.add_macro("macroid",event) + cip_node.add_macro("macroevent", int(row*n_chunk_here)) + cip_node.add_macro("macroiteration", it) + if not(parent_fit_node is None): + cip_node.add_parent(parent_fit_node) + con_node.add_parent(cip_node) # consolidate depends on all of the individual jobs + dag.add_node(cip_node) # if PUFF is done for this event, also add MARG_PUFF nodes. Note grid size assumed identical, so don't need another loop over 'id'/row # In tracer mode (puff_uses_marg_net) the tracer writes grid-{k+1}.dat # directly -- MARG already evaluates that grid, so no MARG_PUFF lane. diff --git a/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml b/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml index 4cfc0cd76..fa0ed86bf 100644 --- a/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml +++ b/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml @@ -18,13 +18,6 @@ arch: explode-marg-jobs: 5 explode-marg-jobs-last: null start-iteration: 0 - # Parsimonious-placement workflow (set tracer-only-marg: true to enable). - # Skips MARG_* (posterior) nodes in intermediate iterations and relies on - # MARG_PUFF (placement) jobs alone. Final N iterations still run MARG_* to - # produce a clean posterior. Requires puff.exe to be a tracer-aware updater - # (e.g. util_HyperparameterTracerUpdate.py). - tracer-only-marg: false - tracer-final-marg-iterations: 1 post: exe: null diff --git a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py index 8bc8c92b7..04445c0da 100644 --- a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py +++ b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py @@ -415,14 +415,6 @@ def my_app(cfg: DictConfig) -> None: start_iter = int(_cfg_get(arch, "start-iteration", 0) or 0) if start_iter: cmd_parts.append(f"--start-iteration {start_iter}") - # Parsimonious-placement workflow: skip MARG_* in intermediate iterations - # when the puff grid is sufficient for placement (requires --puff-exe to be - # a tracer-aware updater such as util_HyperparameterTracerUpdate.py). - if hyper_config.truthy(_cfg_get(arch, "tracer-only-marg", False)): - cmd_parts.append("--tracer-only-marg") - n_final = _cfg_get(arch, "tracer-final-marg-iterations") - if n_final is not None: - cmd_parts.append(f"--tracer-final-marg-iterations {int(n_final)}") # tracer-aware puff input source (posterior|marg_net). Default posterior # keeps the legacy puffball path byte-identical. puff_input_source = _cfg_get(_cfg_get(cfg, "puff"), "input-source") diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf_tracer.yaml b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf_tracer.yaml index 25d25948f..b5ba69122 100644 --- a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf_tracer.yaml +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf_tracer.yaml @@ -5,18 +5,24 @@ # # * puff.exe -> util_HyperparameterTracerUpdate.py # (the tracer drop-in for util_HyperparameterPuffball.py) +# * puff.input-source = marg_net +# tracer consumes all.marg_net (ILE-evaluated table) +# and writes grid-{k+1}.dat directly, suppressing the +# MARG_PUFF lane entirely. *Required* alongside a +# tracer-aware puff.exe -- without it the pipeline +# falls back to legacy posterior-mode wiring and +# emits the MARG_PUFF lane with grid_puff-*.dat I/O. # * puff.settings -> sampler hyperparameters (smc-mala-bd / RF surrogate) -# * arch.tracer-only-marg = true -# skips MARG_* (posterior) nodes in iterations -# 1 ... n_iterations - tracer-final-marg-iterations - 1 -# and relies on MARG_PUFF (placement) jobs alone. -# Cuts marginalisation cost ~1.7-1.8x for N=5-6. +# +# The savings (~1.7-1.8x for N=5-6) come from killing the MARG_PUFF lane, +# not from skipping MARG. MARG runs every iteration as normal -- the tracer +# consumes each iteration's all.marg_net to place the next grid. # # Run with: -# util_RIFT_hyperpipe.py --config-name hyperpipe_conf_tracer +# util_RIFT_hyperpipe.py --config ./hyperpipe_conf_tracer.yaml # # Override any field on the command line with Hydra syntax, e.g.: -# util_RIFT_hyperpipe.py --config-name hyperpipe_conf_tracer \ +# util_RIFT_hyperpipe.py --config ./hyperpipe_conf_tracer.yaml \ # arch.n-iterations=6 puff.settings.update-method=birth-death arch: @@ -25,11 +31,6 @@ arch: n-samples-per-job: 1000 explode-marg-jobs: 5 start-iteration: 0 - # Parsimonious-placement workflow. Iteration 0 always runs MARG (no - # grid_puff-0.dat yet); iterations 1 ... 3 run MARG_PUFF only; the last - # iteration (n-iterations-1 == 4) runs MARG again for the final posterior. - tracer-only-marg: true - tracer-final-marg-iterations: 1 post: coords-fit: "x y z" @@ -48,6 +49,11 @@ puff: # RIFT.misc.tracer_placement is not importable (e.g. mismatched RIFT # version), so this config remains usable even on older worker images. exe: util_HyperparameterTracerUpdate.py + # REQUIRED for the tracer pathway -- without this the pipeline runs the + # tracer in legacy posterior mode and also emits the MARG_PUFF lane, + # which reads grid_puff-{k+1}.dat (not produced when tracer-only-marg is + # on, so MARG_PUFF jobs fail with "No such file or directory"). + input-source: marg_net puff-factor: 0.5 force-away: 0.03 settings: @@ -63,3 +69,7 @@ init: general: rundir: rundir_tracer + # Demo footprint: a 3-D Gaussian toy needs ~tens of MB per job, not the + # 16 GB baseline default tuned for GW PE. Bump back up if you adapt this + # for production hyperpipe runs. + request-memory: 200 From 77884014d63194438c10f37349a1c88d5544260b Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Fri, 15 May 2026 14:40:21 -0400 Subject: [PATCH 16/24] create_eos_posterior_pipeline: resolve --puff-exe to full path Why --- PUFF.sub was being written with whatever the user passed on --puff-exe, typically a bare command name like "util_HyperparameterTracerUpdate.py". Condor on some test machines does not search $PATH when launching local-universe jobs, so PUFF fails to start while every other job in the DAG (MARG, MARG_PUFF, EOS_POST_worker, JOIN_POST, TEST) runs fine -- those resolve their executables via `which()` before handing them to dag_utils. Fix --- Mirror the eospost_exe resolution pattern: if opts.puff_exe is a relative name, run `dag_utils.which` and store the absolute path in exe_here. If it's already absolute, leave it alone. The OSG branch is unchanged (still uses `which` for its own relevant_path). Files ----- M MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline --- .../Code/bin/create_eos_posterior_pipeline | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline index aaaf6d7c6..bbc77bf84 100755 --- a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline +++ b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline @@ -541,14 +541,23 @@ if opts.use_osg: # PUFF job # ... pending if puff_args and puff_cadence: + # Resolve --puff-exe to a full path matching how eospost_exe is handled + # above: bare names (e.g. "util_HyperparameterTracerUpdate.py") get + # resolved via `which`; if the user already passed an absolute path, + # leave it alone. Condor on some test machines does not search PATH + # when launching local-universe jobs and fails to start PUFF otherwise. exe_here = opts.puff_exe + if exe_here and not os.path.isabs(exe_here): + resolved = dag_utils.which(exe_here) + if resolved: + exe_here = os.path.abspath(resolved) if opts.use_osg: relevant_path = dag_utils.which(opts.puff_exe) if opts.condor_local_nonworker_igwn_prefix: relevant_path = relevant_path.split("/")[-1] exe_here = opts.working_directory + "/local_puff.sh" with open("local_puff.sh", 'w') as f: - f.write("""#! /bin/bash + f.write("""#! /bin/bash {} {} $@ """.format(extra_text,relevant_path)) From e83956815dc108e1ad0bae2001d7a531f056a49b Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Fri, 15 May 2026 14:53:36 -0400 Subject: [PATCH 17/24] example_gaussian: fix range issu --- .../Code/demo/hyperpipe/example_gaussian.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/example_gaussian.py b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/example_gaussian.py index 190b79dc4..d535cc8bb 100755 --- a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/example_gaussian.py +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/example_gaussian.py @@ -66,6 +66,12 @@ def likelihood_evaluation(): if opts.using_eos[:5] =='file:': eoss = np.genfromtxt(opts.using_eos[5:],dtype='str') else: eoss = np.genfromtxt(opts.using_eos[:],dtype='str') + + # Sanity check on ranges : standard, do no work if not needed + if opts.eos_start_index > len(eoss): + sys.exit(0) # nothing to do + if opts.eos_end_index < len(eoss): + opts.eos_end_index = len(eoss) likelihood_dict = {} From d9ef44453000ec0ecaef88d9411c7b99b8c8a1c4 Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Fri, 15 May 2026 15:12:30 -0400 Subject: [PATCH 18/24] util_RIFT_hyperpipe.py: forward coords-sample as --downselect-parameter Why --- util_HyperparameterTracerUpdate.py builds its internal prior_box from the DATA bounding box (X.min, X.max) + 10% pad, NOT from the user's prior. SMC-MALA / SMC-MALA-BD then clip particle proposals to that prior_box. So with a narrow initial grid -- e.g. the demo's blind_gaussian_3d_xy_plus.dat which seeds a corner of [-5,-2]x[2,5]x[2,5] inside a [-7,7]^3 prior -- particles literally cannot escape the convex hull of the initial grid. Earlier project-folder Makefile tests (T1_init_grid.dat) worked only because that seed grid already spanned the full [-7,7]^3 prior; the "convex hull confinement" was masked by the broad seed. Fix --- In _build_puff_args, after emitting --force-away / --puff-factor / --parameter via coord_spec.to_puff_args(), append one --downselect-parameter NAME --downselect-parameter-range [LO,HI] pair per fitting parameter, pulling (LO,HI) from coord_spec.parameter_ranges (i.e. yaml post.coords-sample). The tracer drop-ins (util_*TracerUpdate.py) read these to widen prior_box from data-bbox to user-prior. The legacy puffball (util_HyperparameterPuffball.py) accepts the same flags and uses them as a post-puff downselect mask -- behaviour is byte-identical there as long as the initial grid already lives inside coords-sample, which is always the case. Range values are emitted unquoted ("[-7,7]" not "'[-7,7]'") because create_eos_posterior_pipeline wraps any [..] in single quotes when staging args_puff.txt for Condor (lines 191-192 of that file). Verification ------------ With a 1000-point seed grid confined to x:[-5,-2], y:[2,5], z:[2,5] and yaml coords-sample x:[-7,7] y:[-7,7] z:[-7,7]: before this patch: prior_box = [-5.3,-1.7] x [1.7,5.3] x [1.7,5.3] particles stuck inside seed-grid hull after this patch: prior_box = [-7,7]^3 particles free to explore the full prior Files ----- M MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py --- .../Code/bin/util_RIFT_hyperpipe.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py index 04445c0da..010d9a6c1 100644 --- a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py +++ b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py @@ -237,6 +237,23 @@ def _build_puff_args(cfg, coord_spec) -> str: force_away=_cfg_get(puff, "force-away", 0.03), puff_factor=_cfg_get(puff, "puff-factor", 0.5), ) + # Pass the prior bounds through as --downselect-parameter so the tracer's + # internal prior_box covers the user's actual coords-sample range instead + # of the data bounding box. Without this, a narrow initial grid (e.g. a + # corner seed for blind-recovery tests) clips the tracer to data_bbox + + # 10% pad and particles cannot escape the convex hull. Both the legacy + # util_HyperparameterPuffball.py and the tracer drop-ins accept these + # flags; the legacy puffball treats them as a post-puff downselect mask, + # the tracer drop-ins also use them to widen prior_box. + # The range value is intentionally unquoted: create_eos_posterior_pipeline + # wraps any [..] in single quotes when staging args_puff.txt for Condor. + for p in coord_spec.parameters: + lo, hi = coord_spec.parameter_ranges[p] + args += ( + f" --downselect-parameter {p}" + f" --downselect-parameter-range [{coord_spec._fmt_num(lo)}," + f"{coord_spec._fmt_num(hi)}]" + ) settings = _cfg_get(puff, "settings") or {} # Value-bearing flags. Each pair is (yaml key, CLI flag). Empty / null is skipped. setting_flags = [ From c333e26a65a8b99768204cf303a31ae942b59023 Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Fri, 15 May 2026 15:18:38 -0400 Subject: [PATCH 19/24] demo/hyperpipe: better demo tools --- MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile index 091349aed..ee73fc1c8 100644 --- a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile @@ -139,3 +139,9 @@ Gaussian_adaptive_bimodal: # - This approach has configuration in 'hyperpipe_conf.yaml' rundir: util_RIFT_hyperpipe.py + +rundir_tracer: + util_RIFT_hyperpipe.py --config hyperpipe_conf_tracer.yaml + +rundir_tracer_plots: + (cd rundir_tracer_plots; plot_posterior_corner.py --posterior-file posterior-3.dat --composite-file all.marg_net --composite-file-has-labels --parameter x --parameter y --parameter z --lnL-cut 15 --use-all-composite-but-grayscale) From ef8c3b5dab8c0d433ac8061aca2345b78ae50e99 Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Fri, 15 May 2026 15:24:08 -0400 Subject: [PATCH 20/24] demo/hyperpipe gaussian safety --- .../Code/demo/hyperpipe/example_gaussian.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/example_gaussian.py b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/example_gaussian.py index d535cc8bb..7a3e98468 100755 --- a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/example_gaussian.py +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/example_gaussian.py @@ -70,8 +70,8 @@ def likelihood_evaluation(): # Sanity check on ranges : standard, do no work if not needed if opts.eos_start_index > len(eoss): sys.exit(0) # nothing to do - if opts.eos_end_index < len(eoss): - opts.eos_end_index = len(eoss) + if opts.eos_end_index >= len(eoss): + opts.eos_end_index = len(eoss)-1 likelihood_dict = {} From 2aa096ed00d22d0560e3bd15278e5654bbe56b75 Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Fri, 15 May 2026 15:52:43 -0400 Subject: [PATCH 21/24] create_eos_posterior_pipeline: fix indx clobber, create iteration_*_test dirs Bug --- In the directory-creation loop the outer iteration variable `indx` was being reassigned to 0 inside the body and reused as the per-event counter: for indx in np.arange(it_start, opts.n_iterations+1): ... indx = 0 # CLOBBER for event in opts.event_file: mkdir(ile_dir + "/event_{}".format(indx)) indx += 1 ... if opts.test_args: test_dir = ".../iteration_" + str(indx) + "_test" # WRONG mkdir(test_dir); mkdir(test_dir+'/logs') By the time test_dir is built, indx == len(opts.event_file) (typically 1 for the hyperpipe demo). So instead of getting iteration_0_test ... iteration_N_test, the pipeline creates iteration_1_test once per outer iteration (which is a no-op after the first), and the test nodes for iterations other than 1 land in directories that do not exist. With test_args wired in tracer mode the convergence-test jobs then either fail to start (initialdir missing) or write log files into a non- existent path. The outer-loop control flow itself was unaffected, because `for indx in np.arange(...)` reassigns indx at the top of each iteration regardless. Fix --- Rename the outer loop variable to `it_indx` (the actual semantic) and the per-event counter to `event_indx` via enumerate(). Test directory naming now uses it_indx, matching the iteration-numbered _marg / _post / _con dirs. Files ----- M MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline --- .../Code/bin/create_eos_posterior_pipeline | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline index bbc77bf84..d777b4689 100755 --- a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline +++ b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline @@ -270,20 +270,18 @@ dag = pipeline.CondorDAG(log=os.getcwd()) # Make directories for all iterations -for indx in np.arange(it_start,opts.n_iterations+1): - ile_dir = opts.working_directory+"/iteration_"+str(indx)+"_marg" - cip_dir = opts.working_directory+"/iteration_"+str(indx)+"_post" - consolidate_dir = opts.working_directory+"/iteration_"+str(indx)+"_con" +for it_indx in np.arange(it_start,opts.n_iterations+1): + ile_dir = opts.working_directory+"/iteration_"+str(it_indx)+"_marg" + cip_dir = opts.working_directory+"/iteration_"+str(it_indx)+"_post" + consolidate_dir = opts.working_directory+"/iteration_"+str(it_indx)+"_con" mkdir(ile_dir); mkdir(ile_dir+"/logs") - indx=0 - for event in opts.event_file: - mkdir(ile_dir + "/event_{}".format(indx)) - indx+=1 + for event_indx, _event in enumerate(opts.event_file): + mkdir(ile_dir + "/event_{}".format(event_indx)) mkdir(cip_dir); mkdir(cip_dir+"/logs") mkdir(consolidate_dir); mkdir(consolidate_dir+"/logs") if opts.test_args: - test_dir = opts.working_directory+"/iteration_"+str(indx)+"_test" + test_dir = opts.working_directory+"/iteration_"+str(it_indx)+"_test" mkdir(test_dir); mkdir(test_dir+'/logs') From 1fa362c5d39b901f594eb063e4e3c938f3158ca9 Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Fri, 15 May 2026 16:35:02 -0400 Subject: [PATCH 22/24] create_eos_posterior_pipeline: resolve --test-exe to full path Same fix as commit 09 (PUFF) applied to the convergence-test job: test.sub was being written with whatever the user passed on --test-exe, typically the bare command name "convergence_test_samples". Condor on some test machines does not search $PATH for local-universe jobs, so test jobs fail to start while MARG / EOS_POST / PUFF all run fine. Mirror the eospost_exe pattern: if opts.test_exe is a relative name, resolve via dag_utils.which() and pass the absolute path to write_test_sub. Absolute paths pass through untouched. Files ----- M MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline --- .../Code/bin/create_eos_posterior_pipeline | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline index d777b4689..86c168dc1 100755 --- a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline +++ b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline @@ -446,7 +446,16 @@ if opts.test_args: else: _test_curr = opts.working_directory + '/grid-$(macroiteration).dat' _test_prev = opts.working_directory + '/grid-$(macroiterationlast).dat' - test_job, test_job_name = dag_utils.write_test_sub(tag='test',log_dir=None,arg_str=test_args,samples_files=[_test_curr, _test_prev] ,out_dir=opts.working_directory,exe=opts.test_exe,universe=local_worker_universe,no_grid=no_worker_grid) + # Resolve --test-exe to a full path matching how eospost_exe / the + # patched puff_exe are handled: bare names get resolved via `which`; + # absolute paths pass through. Condor on some test machines does not + # search PATH for local-universe jobs. + test_exe_resolved = opts.test_exe + if test_exe_resolved and not os.path.isabs(test_exe_resolved): + _r = dag_utils.which(test_exe_resolved) + if _r: + test_exe_resolved = os.path.abspath(_r) + test_job, test_job_name = dag_utils.write_test_sub(tag='test',log_dir=None,arg_str=test_args,samples_files=[_test_curr, _test_prev] ,out_dir=opts.working_directory,exe=test_exe_resolved,universe=local_worker_universe,no_grid=no_worker_grid) # Modify: set 'initialdir' test_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_test") # Modify output argument: change logs and working directory to be subdirectory for the run From 2dbc07d182b1da393aa8d80086ea1f9e06db6678 Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Fri, 15 May 2026 16:35:23 -0400 Subject: [PATCH 23/24] plot fix --- MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile index ee73fc1c8..690788617 100644 --- a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile @@ -144,4 +144,4 @@ rundir_tracer: util_RIFT_hyperpipe.py --config hyperpipe_conf_tracer.yaml rundir_tracer_plots: - (cd rundir_tracer_plots; plot_posterior_corner.py --posterior-file posterior-3.dat --composite-file all.marg_net --composite-file-has-labels --parameter x --parameter y --parameter z --lnL-cut 15 --use-all-composite-but-grayscale) + (cd rundir_tracer; plot_posterior_corner.py --posterior-file posterior-3.dat --composite-file all.marg_net --composite-file-has-labels --parameter x --parameter y --parameter z --lnL-cut 15 --use-all-composite-but-grayscale) From 43eca9a6d9ee91e6de8bbc79de34f31074377bd2 Mon Sep 17 00:00:00 2001 From: "R. O'Shaughnessy" Date: Fri, 15 May 2026 16:41:29 -0400 Subject: [PATCH 24/24] demo/hyperpipe: fix `make rundir` (and clean up baseline yaml) Two breakages ------------- 1. `make rundir` ran `util_RIFT_hyperpipe.py` with no `--config`, so Hydra loaded the install-shipped bin/hyperpipe_conf.yaml -- which has `init.file: null` and `general.rundir: null`, so `RIFT.hyperpipe.config.validate_config` rejects it before any DAG is written. The user's local demo yaml was never read. 2. The local demo `hyperpipe_conf.yaml` had two keys with spaces instead of hyphens: - arch.explode marg jobs -> arch.explode-marg-jobs - puff.settings.puff factor -> puff.puff-factor (also at the wrong nesting level; `puff-factor` is a top-level puff field consumed by coord_spec.to_puff_args(), not a puff.settings key.) So even if (1) were fixed, `explode-marg-jobs` and `puff-factor` would silently default. The remaining defaults are fine for the toy demo. Fix --- * Makefile: `rundir` now invokes `util_RIFT_hyperpipe.py --config hyperpipe_conf.yaml`, matching the `rundir_tracer` target style. * demo hyperpipe_conf.yaml: normalised indentation, renamed the spaces-in-keys, moved puff-factor / force-away to the canonical top-level puff: location, dropped puff.settings (which was the only survivor and was the wrong nesting in the first place), added `general.request-memory: 200` (the GW-PE-tuned 16 GB default is overkill for the 3-D Gaussian toy and was forcing every MARG node into 16 GB Condor slots), added a brief header comment. Files ----- M MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile M MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf.yaml --- .../Code/demo/hyperpipe/Makefile | 5 ++- .../Code/demo/hyperpipe/hyperpipe_conf.yaml | 35 ++++++++++++++----- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile index 690788617..b83232909 100644 --- a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile @@ -137,8 +137,11 @@ Gaussian_adaptive_bimodal: # ALTERNATIVE PIPELINE IMPLEMENTATION # - This approach has configuration in 'hyperpipe_conf.yaml' +# Note: util_RIFT_hyperpipe.py without --config loads the install-time default +# next to the script (which has init.file: null and so fails validation). The +# --config shim points it at the local demo yaml. rundir: - util_RIFT_hyperpipe.py + util_RIFT_hyperpipe.py --config hyperpipe_conf.yaml rundir_tracer: util_RIFT_hyperpipe.py --config hyperpipe_conf_tracer.yaml diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf.yaml b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf.yaml index 873ec65ca..05f49476d 100644 --- a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf.yaml +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf.yaml @@ -1,22 +1,41 @@ +# Baseline hyperpipe demo configuration. Drives the iterative MARG + EOS_POST +# loop on a 3-D Gaussian toy with a 1000-point uniform seed grid spanning a +# corner of the [-7,7]^3 prior. Compare with hyperpipe_conf_tracer.yaml for +# the parsimonious-placement (tracer) variant of the same demo. +# +# Run with: +# make rundir +# which is equivalent to: +# util_RIFT_hyperpipe.py --config ./hyperpipe_conf.yaml arch: - method: default - n-iterations: 3 - n-samples-per-job: 1000 - explode marg jobs: 5 + method: default + n-iterations: 3 + n-samples-per-job: 1000 + explode-marg-jobs: 5 + post: coords-fit: "x y z" coords-sample: "x:[-7,7] y:[-7,7] z:[-7,7]" + marg-list: - name: Gaussian exe: example_gaussian.py - args: --outdir Gaussian_example --conforming-output-name + args: "--outdir Gaussian_example --conforming-output-name" n-chunk: 100 + puff: - settings: - puff factor: 0.5 + # No puff.exe -> no PUFF/MARG_PUFF lane is emitted. The iterative loop + # is MARG -> CON -> UNIFY -> EOS_POST -> next-iteration MARG, with new + # grids drawn from the posterior alone. + puff-factor: 0.5 + force-away: 0.03 + init: file: blind_gaussian_3d_xy_plus.dat + general: rundir: rundir - + # Toy demo footprint; the GW-PE-tuned 16 GB default is overkill for a + # 3-D Gaussian. + request-memory: 200