diff --git a/.travis/test-all-bin.sh b/.travis/test-all-bin.sh index a9362eed..d71bdcbd 100755 --- a/.travis/test-all-bin.sh +++ b/.travis/test-all-bin.sh @@ -41,7 +41,7 @@ for EXE in MonteCarloMarginalizeCode/Code/bin/*; do # continue # fi # skip non-python scripts - if [[ ${EXE} == *".sh" ]]; then + if [[ ${EXE} == *".sh" ]] || [[ ${EXE} == *".yaml" ]]; then echo " Not python : " ${EXE} continue fi diff --git a/CHANGES.rst b/CHANGES.rst index 78a9bad3..e18d717b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,10 +2,33 @@ 0.0.18.0 ------------ development tree is rift_O4d. + - parsimonious-placement (preview): new RIFT.misc.tracer_placement engine (SMC+MALA, birth-death, + and SMC-MALA+BD samplers; pluggable RF / RBF / polynomial / quadratic fits) plus two + opt-in CLI tools that mirror the existing puffball tools' I/O contract: + util_HyperparameterTracerUpdate.py (hyperpipe drop-in for util_HyperparameterPuffball.py) + and util_ParameterTracerUpdate.py (event-level drop-in for util_ParameterPuffball.py). + Default behavior preserves the existing pipelines (the new tools are not invoked unless + the user points --puff-exe at them); both also accept --update-method puffball as an + exact-regression fallback. See + https://git.ligo.org/rapidpe-rift/rift/-/merge_requests/ (TBD) and the project notes at + 20260513-Me-ParsimoniousPlacementOptions/parsimonious_placement_plan.md. - CIP hyperpipe improvements (initialize_me; enable population and EOS params in using_eos file with arbitrary labelled parameters); CIP xgboost gp fit (from Aasim); install make 'precession' optional; see also 0.0.17.4rc0 igwn-ligolw - + - parsimonious placement (preview): new RIFT.misc.tracer_placement engine package (SMC+MALA / birth-death / + SMC-MALA+BD samplers with pluggable RF / RBF / polynomial / quadratic surrogate fits) plus two thin + command-line drivers, bin/util_HyperparameterTracerUpdate.py (hyperpipe, .dat I/O) and + bin/util_ParameterTracerUpdate.py (event-level, XML I/O via lalsimutils). Both are drop-in alternatives to + util_HyperparameterPuffball.py / util_ParameterPuffball.py: legacy default behavior is unchanged --- the new + tools are only invoked when --puff-exe points at them. create_eos_posterior_pipeline gains + --tracer-only-marg and --tracer-final-marg-iterations to optionally skip MARG_* (posterior) nodes in + intermediate iterations and rely on MARG_PUFF (placement) jobs alone, recovering MARG only for the final N + iterations. util_RIFT_hyperpipe.py forwards the same flags via arch.tracer-only-marg / + arch.tracer-final-marg-iterations and exposes a puff.settings: block mirroring post.settings: so tracer + sampler hyperparameters can be set declaratively in Hydra. See + 20260513-Me-ParsimoniousPlacementOptions/parsimonious_placement_plan.md for theory, prototype results, + rollout plan, and references. + 0.0.17.9 ------------ development tree is rift_O4c_staging -> rift_O4c; draft MR notes at diff --git a/MonteCarloMarginalizeCode/Code/RIFT/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/__init__.py index 59c1f678..4da9fe8c 100644 --- a/MonteCarloMarginalizeCode/Code/RIFT/__init__.py +++ b/MonteCarloMarginalizeCode/Code/RIFT/__init__.py @@ -1,3 +1,3 @@ -__all__ = ['integrators','interpolators','likelihood','physics','misc','calmarg','plot_utilities'] +__all__ = ['integrators','interpolators','likelihood','physics','misc','calmarg','plot_utilities','hyperpipe'] from . import lalsimutils # top-level interface utility from . import * # sub-packages diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/__init__.py new file mode 100644 index 00000000..011d18db --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/__init__.py @@ -0,0 +1,30 @@ +""" +RIFT.hyperpipe +============== + +Pipeline-construction utilities for the RIFT "hyperpipeline" --- the iterative +marginalize + fit + puff loop used to infer hyperparameters (e.g. EOS, +population-model, or generic high-level) from one or more underlying +likelihood evaluators ("marg drivers"). Analogous in spirit to the +``util_RIFT_pseudo_pipe.py`` driver for the single-event GW PE pipeline, +but with: + + * ini-based / Hydra-based configuration (see ``config.py``) + * flexible multi-event input (``marg-list`` semantics, see ``marg_list.py``) + * a coordinate-transformation framework that mirrors the CIP + ``--supplementary-coordinate-code`` / ``--parameter`` / ``--integration-parameter-range`` + conventions already used by ``util_ConstructEOSPosterior.py`` + (see ``coords.py``) + * driver toolkits to spare downstream users from re-deriving the + fragile argument strings, output-file names, and contract points + each marg driver must respect (see ``drivers``) + +The top-level CLI driver remains ``bin/util_RIFT_hyperpipe.py``; this +package is what it delegates to. +""" + +from . import coords +from . import config +from . import marg_list + +__all__ = ['coords', 'config', 'marg_list', 'drivers'] diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/config.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/config.py new file mode 100644 index 00000000..2de9d088 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/config.py @@ -0,0 +1,245 @@ +""" +RIFT.hyperpipe.config +===================== + +Schema + defaults for the Hydra/OmegaConf configuration consumed by +``util_RIFT_hyperpipe.py``. The top-level keys correspond to the +sections sketched in the original (pre-rewrite) ``util_RIFT_hyperpipe.py`` +header comment: + + arch : iteration / chunking / batch architecture + post : posterior-construction stage (CIP-style executable) + marg-list : list of per-event/per-driver marg jobs (heterogeneous OK) + puff : puffball randomization stage + test : convergence-test stage + init : initial-grid sourcing (file or generation) + general : retries, resources, condor / OSG / singularity knobs + +A default config is provided as :data:`DEFAULT_CONFIG_YAML` so a user can +write a minimal override and get a working pipeline. + +Validation is intentionally light here --- the heavy lifting is delegated +to :mod:`RIFT.hyperpipe.coords` and :mod:`RIFT.hyperpipe.marg_list`. +""" + +from __future__ import annotations + +import logging +import os +from typing import Any, Dict + +logger = logging.getLogger(__name__) + + +# -------------------------------------------------------------------------- +# Default config (Hydra writes this verbatim if no override is given) +# -------------------------------------------------------------------------- + +DEFAULT_CONFIG_YAML = """\ +# RIFT hyperpipe default configuration. +# +# Override on the CLI with Hydra syntax, e.g.: +# util_RIFT_hyperpipe.py arch.n-iterations=10 general.use-osg=true +# or by writing your own hyperpipe_conf.yaml and pointing Hydra at it. + +arch: + # High-level iterative architecture knobs. + method: default # reserved for future hyperpipe strategies + n-iterations: 5 + n-samples-per-job: 1000 + explode-marg-jobs: 5 # -> --eos-post-explode-jobs + explode-marg-jobs-last: null # -> --eos-post-explode-jobs-last (default = same) + start-iteration: 0 + # Parsimonious-placement workflow (set tracer-only-marg: true to enable). + # When true, only the final ``tracer-final-marg-iterations`` iterations + # spawn MARG_* (posterior) nodes; intermediate iterations rely on + # MARG_PUFF (placement) jobs alone, recovering most of a CIP iteration's + # cost. Requires puff.exe to be a tracer-aware updater. + tracer-only-marg: false # -> --tracer-only-marg + tracer-final-marg-iterations: 1 # -> --tracer-final-marg-iterations + +post: + # Final posterior-construction stage (CIP-family executable). + exe: null # default = `which util_ConstructEOSPosterior.py` + coord-module: null # importable module name, e.g. "rift_default" + coords-fit: "" # "x y z" + coords-sample: "" # "x:[-8,8] y:[-8,8] z:[-8,8]" + coords-implied: "" # "R1.4 Mmax" + coords-nofit: "" # "delta_mc s1z s2z" + likelihood-factor-module: null + likelihood-factor-function: null + likelihood-factor-ini: null + extra-args: "" # appended verbatim to args_eos_post.txt + settings: + n-max: null # -> --n-max + n-step: null # -> --n-step + n-eff: null # -> --n-eff + sampler-method: null # -> --sampler-method + fit-method: null # -> --fit-method + sigma-cut: null # -> --sigma-cut + +marg-list: + # One entry per (likelihood driver, event) pair. Heterogeneous OK --- + # different drivers may have different batch sizes (n-chunk) and + # different per-driver coord modules. + # + # Example (Gaussian toy): + # - name: gaussian + # exe: example_gaussian.py + # args: "--outdir Gaussian_example --conforming-output-name" + # event-file: null # null -> 'empty_event_file' sentinel + # n-chunk: 100 + # coord-module: null # per-driver coord override (rarely needed) + # extra-args: "" + - name: example + exe: example_gaussian.py + args: "--outdir example_output --conforming-output-name" + event-file: null + n-chunk: 100 + coord-module: null + extra-args: "" + +puff: + exe: null # default = `which util_HyperparameterPuffball.py` + puff-factor: 0.5 + force-away: 0.03 + extra-args: "" + # Tracer-placement sampler hyperparameters. These are only consumed when + # exe points at a tracer-aware updater (e.g. util_HyperparameterTracerUpdate.py + # or util_ParameterTracerUpdate.py); the legacy puffball binaries ignore them. + # Null values fall through to the updater's built-in defaults. + settings: + update-method: null # smc-mala-bd | smc-mala | birth-death | puffball + tracer-fit-method: null # rf | rbf | polynomial | quadratic + n-mala-steps: null # -> --n-mala-steps + target-ess-frac: null # -> --target-ess-frac + birth-death-rate: null # -> --birth-death-rate + inj-file-prev: null # -> --inj-file-prev (SMC bridging input) + no-union-refit: false # -> --no-union-refit + regularize: false # -> --regularize (passes through to puffball-compat code) + rng-seed: null # -> --rng-seed (deterministic when set) + state-in: null # -> --state-in + state-out: null # -> --state-out + +test: + exe: convergence_test_samples + method: JS + threshold: 0.05 + extra-args: "" + +init: + # Initial parameter-grid sourcing. Provide *either* ``file`` (path to an + # existing grid file) *or* a ``generation`` block (auto-generate via + # util_HyperparameterGrid.py-style helper). If both are set, ``file`` + # wins. + file: null + generation: + placement-method: null # e.g. "uniform" + params-and-ranges: null # e.g. "x:[-8,8] y:[-8,8] z:[-8,8]" + npts: null + external-code: null + external-args: null + +general: + rundir: null # optional; created if missing, cd into it + retries: 0 # -> --general-retries + request-disk: 10M # -> --general-request-disk + request-memory: 16384 # -> --request-memory-marg (MB) + use-osg: false + use-singularity: false + use-singularity-local: false + condor-local-nonworker: false + condor-local-nonworker-igwn-prefix: false + condor-nogrid-nonworker: false + use-full-submit-paths: true + transfer-files: [] # extra files to ship beyond auto-detected exes +""" + + +# -------------------------------------------------------------------------- +# Validation +# -------------------------------------------------------------------------- + + +def _get(node, key, default=None): + """Forgiving get that works on OmegaConf DictConfig *and* plain dicts.""" + if node is None: + return default + try: + return node.get(key, default) # type: ignore[union-attr] + except AttributeError: + return node[key] if key in node else default + + +def validate_config(cfg) -> None: + """Raise informatively if the config has structural problems we can detect early. + + This does *not* validate coord modules or marg-list executables --- + those are deferred to coords.HyperCoordSpec.validate() and + marg_list.assemble_marg_list() so the messages can be more specific. + """ + if cfg is None: + raise ValueError("hyperpipe config is empty.") + + # Required top-level sections + for section in ("arch", "post", "marg-list", "puff", "init", "general"): + if _get(cfg, section) is None: + raise ValueError( + f"hyperpipe config is missing required section: {section!r}" + ) + + # marg-list must be a non-empty sequence + marg_list = _get(cfg, "marg-list") + try: + n = len(marg_list) + except TypeError as exc: + raise ValueError("'marg-list' must be a list of marg-driver entries.") from exc + if n == 0: + raise ValueError("'marg-list' must contain at least one entry.") + + # arch + arch = _get(cfg, "arch") + if not isinstance(_get(arch, "n-iterations"), int) or _get(arch, "n-iterations") <= 0: + raise ValueError("arch.n-iterations must be a positive integer.") + if not isinstance(_get(arch, "n-samples-per-job"), int) or _get(arch, "n-samples-per-job") <= 0: + raise ValueError("arch.n-samples-per-job must be a positive integer.") + + # post: at least one of (coords-fit) must be set + post = _get(cfg, "post") + if not _get(post, "coords-fit"): + raise ValueError( + "post.coords-fit must list at least one parameter " + "(e.g. 'x y z')." + ) + + # init: must have either file or generation set + init = _get(cfg, "init") + has_file = bool(_get(init, "file")) + gen = _get(init, "generation") or {} + has_gen = bool(_get(gen, "placement-method") or _get(gen, "external-code")) + if not (has_file or has_gen): + raise ValueError( + "init: must provide either init.file (existing grid) or " + "init.generation.placement-method / init.generation.external-code." + ) + + +def expand_path(p: str, base_dir: str) -> str: + """Expand a config path: leave absolutes alone; resolve relatives against *base_dir*.""" + if not p: + return p + p = os.path.expanduser(p) + if os.path.isabs(p): + return p + return os.path.normpath(os.path.join(base_dir, p)) + + +def truthy(val: Any) -> bool: + """Robustly coerce config values to bool. Tolerates 'true'/'True'/'1'/etc.""" + if isinstance(val, bool): + return val + if isinstance(val, (int, float)): + return val != 0 + if isinstance(val, str): + return val.strip().lower() in {"true", "yes", "1", "on"} + return False diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/coords.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/coords.py new file mode 100644 index 00000000..bc039846 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/coords.py @@ -0,0 +1,360 @@ +""" +RIFT.hyperpipe.coords +===================== + +Coordinate-transformation framework for the hyperpipeline. + +This module mirrors the conventions already established by +``util_ConstructEOSPosterior.py`` (and the broader CIP family) so that +hyperpipe configurations can reuse existing coord modules without a +parallel ecosystem. Specifically: + + * A "coord module" is just an *importable Python module name*. When + passed to the post-stage executable via ``--supplementary-coordinate-code``, + that executable will ``__import__`` it at runtime and call into it for + coordinate conversion / Jacobian / prior factors. + + * Each coordinate appears in the post stage via: + ``--parameter `` (fitting & MC parameter) + ``--integration-parameter-range :[a,b]`` (sampling bound) + plus optionally + ``--parameter-implied `` (used in fit, not independently sampled) + ``--parameter-nofit `` (sampled but not a fit coordinate) + + * For *heterogeneous* analyses (multiple likelihood drivers contributing + to the same hyperparameter inference), each driver may specify its own + coord module --- this is just passed through as an extra argument to + that driver's args line (typically as ``--supplementary-coordinate-code`` + if that driver supports it, else as a custom flag the driver consumes). + +The :class:`HyperCoordSpec` dataclass below bundles the four pieces a +hyperpipe configuration needs to know: + + name # what gets passed to --supplementary-coordinate-code + parameters # list of fitting parameters (== --parameter X ...) + parameter_ranges # dict[str, (a, b)] for --integration-parameter-range + implied # list of parameter-implied entries (optional) + nofit # list of parameter-nofit entries (optional) + likelihood_factor # optional (module, function, ini) trio for + # --supplementary-likelihood-factor-{code,function,ini} + +and provides helpers that emit the argument strings the post stage (and +optional per-driver) consume. +""" + +from __future__ import annotations + +import importlib +import logging +import re +import shlex +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Sequence, Tuple + +logger = logging.getLogger(__name__) + + +# -------------------------------------------------------------------------- +# Parsing helpers +# -------------------------------------------------------------------------- + +_RANGE_RE = re.compile( + r"""^\s* + (?P[A-Za-z_][\w]*) + \s*[:=]\s* + \[\s* + (?P[-+0-9eE.naif]+) + \s*,\s* + (?P[-+0-9eE.naif]+) + \s*\] + \s*$""", + re.VERBOSE, +) + + +def parse_range_block(block: str) -> Tuple[str, Tuple[float, float]]: + """Parse a single ``name:[lo,hi]`` block into ``(name, (lo, hi))``. + + The form ``name=[lo,hi]`` is also accepted as a convenience. + """ + m = _RANGE_RE.match(block) + if not m: + raise ValueError( + f"Could not parse coord-range block {block!r}; " + "expected 'name:[lo,hi]'." + ) + lo = float(m.group("lo")) + hi = float(m.group("hi")) + if not lo < hi: + raise ValueError( + f"Range for {m.group('name')!r} must be increasing; got [{lo},{hi}]." + ) + return m.group("name"), (lo, hi) + + +def parse_range_string(s: str) -> Dict[str, Tuple[float, float]]: + """Parse a space-separated string of ``name:[lo,hi]`` blocks. + + Example + ------- + >>> parse_range_string("x:[-8,8] y:[-1,1]") + {'x': (-8.0, 8.0), 'y': (-1.0, 1.0)} + """ + out: Dict[str, Tuple[float, float]] = {} + if not s: + return out + for block in shlex.split(s): + name, rng = parse_range_block(block) + if name in out: + raise ValueError(f"Duplicate range for parameter {name!r} in {s!r}.") + out[name] = rng + return out + + +def parse_parameter_list(s: str) -> List[str]: + """Split a space-separated parameter-name string into a list, preserving order.""" + if not s: + return [] + return shlex.split(s) + + +# -------------------------------------------------------------------------- +# HyperCoordSpec +# -------------------------------------------------------------------------- + + +@dataclass +class HyperCoordSpec: + """Bundle describing the coordinates a hyperpipe stage operates in. + + Parameters + ---------- + name + Name of an *importable* Python module used by the consumer for + coord conversion / Jacobian / priors. If ``None``, no + ``--supplementary-coordinate-code`` is emitted. + parameters + Fitting / MC parameter names. Emitted as ``--parameter X`` flags. + parameter_ranges + Map from parameter name to ``(lo, hi)``. Emitted as + ``--integration-parameter-range X:[lo,hi]``. + implied + Parameters used in the fit but not independently sampled. + Emitted as ``--parameter-implied X``. + nofit + Parameters sampled but not in the fit. Emitted as + ``--parameter-nofit X``. + likelihood_factor + Optional ``(module, function, ini)`` triple wiring a + supplementary external-prior / likelihood factor through the + post stage (``--supplementary-likelihood-factor-{code,function,ini}``). + """ + + name: Optional[str] = None + parameters: List[str] = field(default_factory=list) + parameter_ranges: Dict[str, Tuple[float, float]] = field(default_factory=dict) + implied: List[str] = field(default_factory=list) + nofit: List[str] = field(default_factory=list) + likelihood_factor: Optional[Tuple[str, Optional[str], Optional[str]]] = None + + # ----- construction -------------------------------------------------- + @classmethod + def from_strings( + cls, + *, + name: Optional[str] = None, + coords_fit: str = "", + coords_sample: str = "", + coords_implied: str = "", + coords_nofit: str = "", + likelihood_factor: Optional[Sequence[Optional[str]]] = None, + ) -> "HyperCoordSpec": + """Build a spec from the string-shaped fields a Hydra config gives us. + + ``coords_fit`` : "x y z" + ``coords_sample`` : "x:[-8,8] y:[-8,8] z:[-8,8]" + ``coords_implied`` : "R1.4 Mmax" (optional) + ``coords_nofit`` : "delta_mc s1z s2z" (optional) + ``likelihood_factor``: (module, function, ini) (any element may be None) + """ + params = parse_parameter_list(coords_fit) + ranges = parse_range_string(coords_sample) + unknown = set(ranges) - set(params) + if unknown: + raise ValueError( + f"coords-sample names a parameter not in coords-fit: {sorted(unknown)!r}" + ) + lf: Optional[Tuple[str, Optional[str], Optional[str]]] = None + if likelihood_factor: + # Pad to length 3 and coerce empties to None + seq = list(likelihood_factor) + [None] * (3 - len(likelihood_factor)) + seq = [s if s else None for s in seq[:3]] + if seq[0] is not None: + lf = (seq[0], seq[1], seq[2]) # type: ignore[assignment] + return cls( + name=name or None, + parameters=params, + parameter_ranges=ranges, + implied=parse_parameter_list(coords_implied), + nofit=parse_parameter_list(coords_nofit), + likelihood_factor=lf, + ) + + # ----- validation ---------------------------------------------------- + def validate(self, strict_import: bool = False) -> None: + """Sanity-check the spec; optionally verify the coord module imports. + + ``strict_import=True`` will attempt ``importlib.import_module(self.name)`` + and raise on failure; with ``False`` we only warn, since coord + modules are often only importable inside the downstream runtime + environment (singularity image, OSG worker) and not necessarily + on the submit host. + """ + if not self.parameters: + raise ValueError("HyperCoordSpec requires at least one fitting parameter.") + missing = [p for p in self.parameters if p not in self.parameter_ranges] + if missing: + raise ValueError( + f"No integration range supplied for parameter(s): {missing!r}. " + "Every entry in coords-fit must appear in coords-sample." + ) + for p, (lo, hi) in self.parameter_ranges.items(): + if not lo < hi: + raise ValueError(f"Range for {p!r} must be increasing; got [{lo},{hi}].") + if self.name: + try: + importlib.import_module(self.name) + except Exception as exc: # noqa: BLE001 -- broad on purpose; cf. docstring + msg = ( + f"HyperCoordSpec: coord module {self.name!r} did not import " + f"on the submit host ({type(exc).__name__}: {exc}). " + "If it is only present on the worker, this is expected." + ) + if strict_import: + raise ImportError(msg) from exc + logger.warning(msg) + + # ----- emission ------------------------------------------------------ + @staticmethod + def _fmt_num(x: float) -> str: + """Format a coord bound, preserving the integer form when applicable. + + e.g. -8.0 -> '-8' (matches the existing hyperpipe demos) + -1.6 -> '-1.6' + 1e-05 -> '1e-05' + """ + try: + xi = int(x) + except (OverflowError, ValueError): + return repr(x) + if xi == x: + return str(xi) + # general format strips trailing zeros while keeping decimal precision + return format(x, "g") + + def to_parameter_args(self) -> str: + """Emit ``--parameter X`` / ``--integration-parameter-range X:[a,b]`` flags. + + Includes implied / nofit and any required-by-CIP ordering. Returns a + single space-joined string, ready to be appended to a hyperpipe + args_*.txt file. + """ + bits: List[str] = [] + for p in self.parameters: + bits.append(f"--parameter {p}") + for p in self.implied: + bits.append(f"--parameter-implied {p}") + for p in self.nofit: + bits.append(f"--parameter-nofit {p}") + for p in self.parameters: + lo, hi = self.parameter_ranges[p] + bits.append( + f"--integration-parameter-range {p}:[{self._fmt_num(lo)},{self._fmt_num(hi)}]" + ) + return " ".join(bits) + + def to_post_args(self) -> str: + """Emit the post-stage arg block (parameters + coord-module + lf trio).""" + bits = [self.to_parameter_args()] + if self.name: + bits.append(f"--supplementary-coordinate-code {self.name}") + if self.likelihood_factor is not None: + mod, fn, ini = self.likelihood_factor + bits.append(f"--supplementary-likelihood-factor-code {mod}") + if fn: + bits.append(f"--supplementary-likelihood-factor-function {fn}") + if ini: + bits.append(f"--supplementary-likelihood-factor-ini {ini}") + return " ".join(b for b in bits if b) + + def to_puff_args(self, force_away: float = 0.03, puff_factor: float = 0.5) -> str: + """Emit the puff-stage arg block. + + By default we puff in every fitting parameter; this is what every + existing hyperpipe example does. Extra flags can be appended by the + caller. + """ + bits = [f"--force-away {force_away}", f"--puff-factor {puff_factor}"] + for p in self.parameters: + bits.append(f"--parameter {p}") + return " ".join(bits) + + def to_test_args(self, method: str = "JS", threshold: float = 0.05) -> str: + """Emit the convergence-test arg block. + + Mirrors the args_test.txt pattern from the Gaussian demo: + ``--parameter x --parameter y --parameter z --method JS --threshold 0.05`` + """ + bits = [f"--parameter {p}" for p in self.parameters] + bits.append(f"--method {method}") + bits.append(f"--threshold {threshold}") + return " ".join(bits) + + # ----- per-driver hook --------------------------------------------------- + def to_driver_coord_flag(self) -> str: + """Emit only the ``--supplementary-coordinate-code`` flag, if any. + + Useful for composing into a marg-driver's per-event args line when + that driver also consumes the same coord-module convention (e.g. + CIP-as-marg in the GW+NICER NS-EOS pipeline). + """ + return f"--supplementary-coordinate-code {self.name}" if self.name else "" + + +# -------------------------------------------------------------------------- +# Convenience constructors +# -------------------------------------------------------------------------- + + +def coord_spec_from_config_section(section) -> HyperCoordSpec: + """Build a :class:`HyperCoordSpec` from a Hydra ``post`` (or analogous) section. + + The section is expected to provide the keys: + ``coord-module`` (str, optional) + ``coords-fit`` (str) + ``coords-sample`` (str) + ``coords-implied`` (str, optional) + ``coords-nofit`` (str, optional) + ``likelihood-factor-module`` / ``likelihood-factor-function`` / + ``likelihood-factor-ini`` (str, optional) + """ + def _get(key, default=None): + # tolerate both DictConfig and plain dict + try: + return section.get(key, default) # type: ignore[union-attr] + except AttributeError: + return section[key] if key in section else default + + lf_mod = _get("likelihood-factor-module") + lf = None + if lf_mod: + lf = (lf_mod, _get("likelihood-factor-function"), _get("likelihood-factor-ini")) + + return HyperCoordSpec.from_strings( + name=_get("coord-module"), + coords_fit=_get("coords-fit", "") or "", + coords_sample=_get("coords-sample", "") or "", + coords_implied=_get("coords-implied", "") or "", + coords_nofit=_get("coords-nofit", "") or "", + likelihood_factor=lf, + ) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/__init__.py new file mode 100644 index 00000000..19786140 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/__init__.py @@ -0,0 +1,38 @@ +""" +RIFT.hyperpipe.drivers +====================== + +Toolkit for building "marg drivers" --- the per-event likelihood +evaluators the hyperpipeline orchestrates --- with the minimum of +ceremony. + +Every driver in the hyperpipeline must obey the contract: + + Inputs (CLI, as seen by ``create_eos_posterior_pipeline``): + --using-eos file: # path to the current hyperparameter grid + --using-eos-index # OR + --eos_start_index # index range [a, b) to evaluate + --eos_end_index + --fname-output-integral # name for output annotated file + --fname-output-samples # ignored by most drivers (legacy) + --outdir # write outputs into D/ + --conforming-output-name # if set, append '+annotation.dat' + --fname # dummy/passthrough; sometimes a real path + + Output: + /[+annotation.dat] + whitespace-separated, header: + # lnL sigma_lnL + Rows are the rows of the input grid in [a, b), with the first + two columns replaced by ln L and an estimate of its uncertainty. + +The :class:`MargDriverBase` class encapsulates the boilerplate (arg +parsing, header sniffing, slicing, output formatting) so a concrete +driver only has to implement ``log_likelihood``. + +See :mod:`.gaussian` for the canonical example. +""" + +from .base import MargDriverBase, parse_marg_driver_args, write_marg_output + +__all__ = ["MargDriverBase", "parse_marg_driver_args", "write_marg_output"] diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/base.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/base.py new file mode 100644 index 00000000..6824a2c5 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/base.py @@ -0,0 +1,262 @@ +""" +RIFT.hyperpipe.drivers.base +=========================== + +Boilerplate-free base class for marg drivers. + +The single hard contract a marg driver has with the hyperpipeline is +described in :mod:`RIFT.hyperpipe.drivers`. The work the existing +hand-written drivers had to repeat --- parse a fixed set of CLI args, +sniff the input-grid header for column names, slice the rows by index +range, and write a header-prefixed ``+annotation.dat`` output --- is +encapsulated here so a concrete driver becomes "implement +``log_likelihood`` and call ``MargDriverBase.run()``". +""" + +from __future__ import annotations + +import argparse +import logging +import os +from pathlib import Path +from typing import List, Optional, Sequence, Tuple + +import numpy as np + +logger = logging.getLogger(__name__) + + +# -------------------------------------------------------------------------- +# CLI parsing +# -------------------------------------------------------------------------- + + +def make_marg_driver_parser(description: Optional[str] = None) -> argparse.ArgumentParser: + """Construct an ``argparse.ArgumentParser`` with the standard marg-driver flags. + + Concrete drivers can extend this by adding their own arguments to the + returned parser before calling ``.parse_args()``. + """ + p = argparse.ArgumentParser(description=description) + p.add_argument("--fname", type=str, default=None, + help="Dummy passthrough required by the hyperpipe API.") + p.add_argument("--using-eos", type=str, required=True, + help="Input hyperparameter grid. 'file:' prefix is tolerated.") + p.add_argument("--using-eos-index", type=int, default=None, + help="If set, evaluate this single row (overrides start/end).") + p.add_argument("--eos_start_index", type=int, default=None, + help="First row of the grid to evaluate (inclusive).") + p.add_argument("--eos_end_index", type=int, default=None, + help="Last row of the grid to evaluate (exclusive).") + p.add_argument("--n-events-to-analyze", type=int, default=None, + help="When --using-eos-index is set, evaluate this many rows in total.") + p.add_argument("--outdir", type=str, default=None, + help="Output directory. Created if absent. Default: cwd.") + p.add_argument("--outdir-clean", action="store_true", + help="If set, delete --outdir before writing.") + p.add_argument("--fname-output-integral", type=str, default="output-marg-integral", + help="Base name for the lnL output file.") + p.add_argument("--fname-output-samples", type=str, default="output-marg-samples", + help="Dummy passthrough required by the hyperpipe API.") + p.add_argument("--conforming-output-name", action="store_true", + help="Append '+annotation.dat' to the integral output name " + "(required by create_eos_posterior_pipeline).") + return p + + +def parse_marg_driver_args( + parser: argparse.ArgumentParser, + argv: Optional[Sequence[str]] = None, +) -> argparse.Namespace: + """Run argparse and normalize the index range. + + Resolves the four overlapping ways the hyperpipeline can ask for an + index range (``--using-eos-index`` alone, ``--using-eos-index`` plus + ``--n-events-to-analyze``, or ``--eos_start_index``/``--eos_end_index``) + into a single ``(eos_start_index, eos_end_index)`` pair. + """ + opts = parser.parse_args(argv) + if opts.using_eos_index is not None: + opts.eos_start_index = opts.using_eos_index + opts.eos_end_index = opts.using_eos_index + 1 + if opts.n_events_to_analyze: + opts.eos_end_index = opts.using_eos_index + int(opts.n_events_to_analyze) + if opts.eos_start_index is None or opts.eos_end_index is None: + raise SystemExit( + "marg driver: need either --using-eos-index, or both " + "--eos_start_index and --eos_end_index." + ) + if opts.eos_end_index <= opts.eos_start_index: + raise SystemExit( + f"marg driver: eos_end_index ({opts.eos_end_index}) must exceed " + f"eos_start_index ({opts.eos_start_index})." + ) + return opts + + +# -------------------------------------------------------------------------- +# Grid I/O +# -------------------------------------------------------------------------- + + +def _strip_file_prefix(p: str) -> str: + return p[5:] if p.startswith("file:") else p + + +def read_grid(using_eos: str) -> Tuple[np.ndarray, List[str]]: + """Load a hyperpipe-format grid file. + + Returns ``(rows, column_names)`` where ``rows`` is a 2-D string array + (so we don't lose precision on the lnL / sigma columns that we will + overwrite) and ``column_names`` is the list of column names beyond + the two leading ``lnL sigma_lnL`` columns. + """ + path = _strip_file_prefix(using_eos) + with open(path, "r") as f: + header = f.readline().rstrip("\n") + if not header.startswith("#"): + raise ValueError( + f"read_grid: expected a '#'-prefixed header in {path!r}; got {header!r}." + ) + cols = header.lstrip("#").split() + if len(cols) < 2: + raise ValueError( + f"read_grid: header {header!r} must declare at least lnL and sigma_lnL." + ) + data = np.genfromtxt(path, dtype="str") + if data.ndim == 1: + data = data.reshape(1, -1) + # Upcast to a wide string dtype so we can rewrite the leading + # lnL / sigma_lnL columns with arbitrarily-precise float reprs + # without numpy silently truncating. Without this step, + # ``rows[i, 0] = repr(some_float)`` would be truncated to whatever + # the longest string in the input grid happened to be (this was a + # latent bug in the hand-written example_gaussian.py). + data = data.astype(" str: + """Write the annotated output file. + + Returns the path written. Mirrors the convention used by + ``example_gaussian.py`` and the NICER drivers: + + * if ``--fname`` is *not* set, write into ``outdir/`` and use the + ``--fname-output-integral`` basename; + * if ``--fname`` *is* set (legacy RIFT-style invocation), write + to the bare ``--fname-output-integral`` path; + * if ``--conforming-output-name`` is set, suffix ``+annotation.dat``. + """ + postfix = "+annotation.dat" if conforming_output_name else "" + if fname is None: + outdir = outdir or "." + Path(outdir).mkdir(parents=True, exist_ok=True) + out_path = os.path.join(outdir, fname_output_integral + postfix) + else: + out_path = fname_output_integral + postfix + header = "lnL sigma_lnL " + " ".join(column_names) + np.savetxt(out_path, rows, fmt="%10s", header=header) + return out_path + + +# -------------------------------------------------------------------------- +# Driver base class +# -------------------------------------------------------------------------- + + +class MargDriverBase: + """Base class for marg drivers. + + Subclasses implement :meth:`log_likelihood` and (optionally) override + :meth:`add_arguments` to declare driver-specific CLI flags, then call + :meth:`run` from their ``__main__`` block. + + Example + ------- + >>> class MyDriver(MargDriverBase): + ... description = "My toy driver." + ... def log_likelihood(self, row_values, column_names, opts): + ... return -0.5 * sum(float(v) ** 2 for v in row_values), 1e-3 + ... + >>> if __name__ == "__main__": # doctest: +SKIP + ... MyDriver().run() + """ + + description: Optional[str] = None + + # ----- subclass hooks ------------------------------------------------ + + def add_arguments(self, parser: argparse.ArgumentParser) -> None: + """Override to add driver-specific CLI args; default is no-op.""" + pass + + def log_likelihood( + self, + row_values: Sequence[str], + column_names: Sequence[str], + opts: argparse.Namespace, + ) -> Tuple[float, float]: + """Return ``(lnL, sigma_lnL)`` for one grid row. + + ``row_values`` are the raw string values from columns 2.. of the + grid (i.e. excluding the two lnL columns we'll overwrite). + ``column_names`` are the matching column names from the header. + Concrete drivers will typically build a ``dict(zip(column_names, + map(float, row_values)))`` before computing the likelihood. + """ + raise NotImplementedError + + # ----- main entry point --------------------------------------------- + + def run(self, argv: Optional[Sequence[str]] = None) -> str: + """Parse args, evaluate over the requested row range, write output. + + Returns the path of the written annotated file. + """ + parser = make_marg_driver_parser(description=self.description) + self.add_arguments(parser) + opts = parse_marg_driver_args(parser, argv=argv) + + if opts.outdir_clean and opts.outdir: + import shutil + + try: + shutil.rmtree(opts.outdir) + except FileNotFoundError: + pass + + rows, column_names = read_grid(opts.using_eos) + logger.info("Loaded grid %r with columns %r", opts.using_eos, column_names) + + start, stop = opts.eos_start_index, opts.eos_end_index + if start < 0 or stop > rows.shape[0]: + raise SystemExit( + f"marg driver: index range [{start},{stop}) exceeds grid " + f"size {rows.shape[0]}." + ) + + for i in range(start, stop): + row_values = rows[i, 2:] + lnL, sigma = self.log_likelihood(row_values, column_names, opts) + rows[i, 0] = repr(float(lnL)) + rows[i, 1] = repr(float(sigma)) + + out_path = write_marg_output( + rows[start:stop], + column_names, + fname_output_integral=opts.fname_output_integral, + outdir=opts.outdir, + fname=opts.fname, + conforming_output_name=opts.conforming_output_name, + ) + logger.info("Wrote marg output to %r", out_path) + return out_path diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/gaussian.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/gaussian.py new file mode 100644 index 00000000..c455eeb2 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/drivers/gaussian.py @@ -0,0 +1,127 @@ +""" +RIFT.hyperpipe.drivers.gaussian +=============================== + +Toy 3-D Gaussian marg driver --- canonical "smoke test" for the hyperpipeline. + +This is a clean, importable port of the hand-written +``example_gaussian.py`` shipped with the Gaussian hyperpipe demo, built on +top of :class:`RIFT.hyperpipe.drivers.MargDriverBase` so the +boilerplate (arg parsing, grid I/O, output formatting) is shared with all +other drivers. + +The likelihood is the sum of two 3-D Gaussians symmetric in ``x``:: + + p(x, y, z) = N([-x0, 0, 0], 2 I) + N([+x0, 0, 0], 2 I) + +with ``x0 = 4`` and identity covariance scaled by 2. Useful as both a +pedagogical example and a regression test for the post / puff / +convergence-test pieces, because the exact marginals are known. + +Run via the in-tree CLI:: + + util_HyperMargGaussian.py \\ + --using-eos file:my_grid.dat --outdir out --fname-output-integral lnL.txt \\ + --eos_start_index 0 --eos_end_index 1000 --conforming-output-name +""" + +from __future__ import annotations + +import argparse +from typing import Sequence, Tuple + +import numpy as np + +from .base import MargDriverBase + + +class GaussianMargDriver(MargDriverBase): + """Sum-of-two-Gaussians 3-D toy likelihood.""" + + description = ( + "Toy 3-D bimodal Gaussian marg driver for the RIFT hyperpipeline." + ) + + def add_arguments(self, parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "--x-offset", + type=float, + default=4.0, + help="Position of the two modes along x (default: 4).", + ) + parser.add_argument( + "--sigma2", + type=float, + default=2.0, + help="Diagonal of the covariance matrix (default: 2).", + ) + parser.add_argument( + "--unimodal", + action="store_true", + help="If set, drop the second mode at -x_offset.", + ) + parser.add_argument( + "--params", + type=str, + default="x,y,z", + help="Comma-separated names of the three grid columns to use " + "(default: x,y,z). Must appear in the input-grid header.", + ) + + # Cache the multivariate normals so we don't rebuild per-row. + _rv_cache = None + + def _build_rvs(self, opts): + from scipy.stats import multivariate_normal + + cov = np.eye(3) * float(opts.sigma2) + rv_pos = multivariate_normal(mean=[+opts.x_offset, 0.0, 0.0], cov=cov) + rv_neg = ( + None + if opts.unimodal + else multivariate_normal(mean=[-opts.x_offset, 0.0, 0.0], cov=cov) + ) + self._rv_cache = (rv_pos, rv_neg) + + def log_likelihood( + self, + row_values: Sequence[str], + column_names: Sequence[str], + opts: argparse.Namespace, + ) -> Tuple[float, float]: + if self._rv_cache is None: + self._build_rvs(opts) + rv_pos, rv_neg = self._rv_cache + + # Resolve column indices -> the user-named param columns + wanted = [p.strip() for p in opts.params.split(",") if p.strip()] + if len(wanted) != 3: + raise SystemExit( + f"GaussianMargDriver: --params must list exactly 3 names " + f"(got {wanted!r})." + ) + try: + idxs = [column_names.index(p) for p in wanted] + except ValueError as exc: + raise SystemExit( + f"GaussianMargDriver: param {exc.args[0]!r} not in grid header " + f"columns {list(column_names)!r}." + ) from exc + vec = np.array([float(row_values[i]) for i in idxs]) + L = rv_pos.pdf(vec) + if rv_neg is not None: + L = L + rv_neg.pdf(vec) + # Guard log(0); the existing demo does not, so we are *gentle*: + # report a very-negative lnL but keep finite for the GP fit. + if L <= 0.0 or not np.isfinite(L): + return -1.0e6, 1.0e-3 + return float(np.log(L)), 1.0e-3 + + +def main(argv=None) -> str: + """Console-script entry point.""" + return GaussianMargDriver().run(argv=argv) + + +if __name__ == "__main__": # pragma: no cover + main() diff --git a/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/marg_list.py b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/marg_list.py new file mode 100644 index 00000000..8ee27d86 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/hyperpipe/marg_list.py @@ -0,0 +1,279 @@ +""" +RIFT.hyperpipe.marg_list +======================== + +Multi-event / multi-driver "marg-list" assembly. + +A *marg* in the hyperpipeline vocabulary is a per-event (or per-likelihood) +job that evaluates the underlying likelihood on the current grid of +hyperparameters. ``create_eos_posterior_pipeline`` consumes parallel +lists describing each marg: + + * ``--marg-event-exe-list-file`` -- one executable path per line + * ``--marg-event-args-list-file`` -- one args string per line + * ``--marg-event-nchunk-list-file`` -- one int per line (heterogeneous OK) + * ``--event-file `` -- repeated; one per marg entry + +This module assembles those four files from a Hydra ``marg-list:`` section +and is responsible for: + + * resolving each ``exe`` (via :func:`RIFT.misc.dag_utils_generic.which` + or a literal path) + * copying non-RIFT-core executables into the working directory so they + can be transferred (mirroring the current + ``util_RIFT_hyperpipe.py`` behaviour) + * materializing per-entry event files (``event-.net``) and a + sentinel ``empty_event_file`` when no event data are supplied + * stitching in per-driver coord-module overrides for heterogeneous + analyses (e.g. GW + NICER), where each driver carries its own coord + convention. +""" + +from __future__ import annotations + +import logging +import os +import shutil +from dataclasses import dataclass, field +from typing import Any, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +# -------------------------------------------------------------------------- +# Result container +# -------------------------------------------------------------------------- + + +@dataclass +class MargAssembly: + """Output of :func:`assemble_marg_list`. + + All paths are absolute. Lengths of the per-entry sequences are equal + to ``len(cfg['marg-list'])``. + """ + + args_lines: List[str] = field(default_factory=list) + exe_paths: List[str] = field(default_factory=list) + event_files: List[str] = field(default_factory=list) + n_chunks: List[int] = field(default_factory=list) + transfer_files: List[str] = field(default_factory=list) + names: List[str] = field(default_factory=list) + + # ----- emission helpers ------------------------------------------------ + def write_args_file(self, path: str) -> None: + with open(path, "w") as f: + f.write("\n".join(self.args_lines)) + if self.args_lines: + f.write("\n") + + def write_exe_file(self, path: str) -> None: + with open(path, "w") as f: + f.write("\n".join(self.exe_paths)) + if self.exe_paths: + f.write("\n") + + def write_nchunk_file(self, path: str) -> None: + with open(path, "w") as f: + for n in self.n_chunks: + f.write(f"{int(n)}\n") + + def write_transfer_file_list(self, path: str, extra: Optional[List[str]] = None) -> None: + """Write the transfer-file-list HTCondor consumes. + + Appends ``extra`` (e.g. user-supplied general.transfer-files) after + the auto-detected exe transfers. + """ + all_files = list(self.transfer_files) + if extra: + all_files.extend(extra) + with open(path, "w") as f: + for line in all_files: + f.write(line + "\n") + + +# -------------------------------------------------------------------------- +# Core assembly +# -------------------------------------------------------------------------- + + +_RIFT_CORE_EXE_HINTS = ( + # Substrings that, if present in an exe's resolved path, mark it as + # a core RIFT executable we should *not* duplicate into the working + # directory or transfer-file-list. Mirrors what the original + # util_RIFT_hyperpipe.py did for "MonteCarloMarginalizeCode/Code/bin" + # but is more forgiving of editable installs. + "MonteCarloMarginalizeCode/Code/bin", + "/RIFT/", + "/site-packages/RIFT", +) + + +def _looks_like_core_rift(exe_path: str) -> bool: + if not exe_path: + return False + return any(h in exe_path for h in _RIFT_CORE_EXE_HINTS) + + +def _resolve_exe(exe: str, base_dir: str) -> str: + """Resolve an executable name to an absolute path. + + Tries, in order: + 1. literal absolute path + 2. :func:`shutil.which` + 3. :func:`RIFT.misc.dag_utils_generic.which` + 4. ``/`` + """ + if not exe: + raise ValueError("marg-list entry is missing 'exe'.") + exe = os.path.expanduser(exe.strip()) + if os.path.isabs(exe) and os.path.exists(exe): + return exe + bare = os.path.basename(exe) + found = shutil.which(bare) + if found: + return found + try: + from RIFT.misc.dag_utils_generic import which as rift_which + except Exception: # pragma: no cover -- only at runtime + rift_which = None + if rift_which is not None: + found = rift_which(bare) + if found: + return found + fallback = os.path.join(base_dir, bare) + if os.path.exists(fallback): + return fallback + raise FileNotFoundError( + f"Could not locate marg exe {exe!r}. Searched PATH, RIFT, and {base_dir!r}." + ) + + +def _stage_event_file( + entry: Any, + indx: int, + base_dir: str, + run_dir: str, +) -> Tuple[str, bool]: + """Materialize this entry's event file at base_dir/event-.net. + + Returns ``(abs_path, is_empty_sentinel)``. If the entry has no + ``event-file`` set, we write a sentinel file with the single token + ``empty_event_file`` so the downstream pipeline still sees a + well-formed input. + """ + src = None + if hasattr(entry, "get"): + src = entry.get("event-file") or entry.get("event_file") + elif "event-file" in entry: + src = entry["event-file"] + dest = os.path.join(base_dir, f"event-{indx}.net") + if src: + src = os.path.expanduser(src) + if not os.path.isabs(src): + src = os.path.join(base_dir, src) + if not os.path.exists(src): + raise FileNotFoundError( + f"marg-list entry {indx}: event-file {src!r} does not exist." + ) + shutil.copyfile(src, dest) + return dest, False + # sentinel + with open(dest, "w") as f: + f.write("empty_event_file\n") + return dest, True + + +def _entry_get(entry: Any, key: str, default=None): + """Get a key from a marg-list entry, tolerating dict or DictConfig.""" + try: + return entry.get(key, default) # type: ignore[union-attr] + except AttributeError: + return entry[key] if key in entry else default + + +def assemble_marg_list( + cfg, + *, + base_dir: str, + run_dir: str, +) -> MargAssembly: + """Walk ``cfg['marg-list']`` and prepare all per-entry artefacts. + + Parameters + ---------- + cfg + The full hyperpipe config (DictConfig or dict). Only the + ``marg-list`` key is consulted here. + base_dir + Directory the user invoked ``util_RIFT_hyperpipe.py`` from; + relative paths in the config are resolved against this. + run_dir + Working directory for the pipeline (where ``event-.net`` + files and copies of non-core exes are written). + + Returns + ------- + MargAssembly + Holds parallel lists of args / exe / event-file / n-chunk per + marg entry, plus the auto-detected transfer-file list. + """ + marg_entries = cfg["marg-list"] + out = MargAssembly() + + for indx, entry in enumerate(marg_entries): + name = _entry_get(entry, "name") or f"marg_{indx}" + out.names.append(name) + + # ----- exe ------------------------------------------------------ + raw_exe = _entry_get(entry, "exe") + if not raw_exe: + raise ValueError(f"marg-list[{indx}] ({name!r}) missing required key 'exe'.") + exe_path = _resolve_exe(raw_exe, base_dir=base_dir) + out.exe_paths.append(exe_path) + if not _looks_like_core_rift(exe_path): + # Stage a local copy so file-transfer (e.g. OSG) can ship it + local_copy = os.path.join(run_dir, os.path.basename(exe_path)) + if os.path.abspath(local_copy) != os.path.abspath(exe_path): + shutil.copyfile(exe_path, local_copy) + try: + os.chmod(local_copy, 0o755) + except OSError: + pass + out.transfer_files.append(local_copy) + else: + out.transfer_files.append(exe_path) + + # ----- args ----------------------------------------------------- + args = (_entry_get(entry, "args") or "").strip() + extra = (_entry_get(entry, "extra-args") or "").strip() + coord_mod = _entry_get(entry, "coord-module") + bits = [args] + if extra: + bits.append(extra) + if coord_mod: + # Per-driver coord-module override. We always emit + # --supplementary-coordinate-code; drivers that don't consume + # it should be wrapped in a shell that ignores it. + bits.append(f"--supplementary-coordinate-code {coord_mod}") + out.args_lines.append(" ".join(b for b in bits if b)) + + # ----- event file ---------------------------------------------- + event_path, _ = _stage_event_file(entry, indx, base_dir=base_dir, run_dir=run_dir) + out.event_files.append(event_path) + + # ----- n-chunk ------------------------------------------------- + nchunk = _entry_get(entry, "n-chunk", 1) + try: + nchunk = int(nchunk) + except (TypeError, ValueError) as exc: + raise ValueError( + f"marg-list[{indx}] ({name!r}) has non-integer n-chunk={nchunk!r}." + ) from exc + if nchunk <= 0: + raise ValueError( + f"marg-list[{indx}] ({name!r}) n-chunk must be positive; got {nchunk}." + ) + out.n_chunks.append(nchunk) + + return out diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/dag_utils_generic.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/dag_utils_generic.py index e2fea271..53edce80 100644 --- a/MonteCarloMarginalizeCode/Code/RIFT/misc/dag_utils_generic.py +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/dag_utils_generic.py @@ -120,6 +120,8 @@ def emit_dag(self, dag, path): from time import time from hashlib import md5 +import shutil + import numpy as np import configparser diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/README.md b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/README.md new file mode 100644 index 00000000..14cf4fcc --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/README.md @@ -0,0 +1,54 @@ +# RIFT.misc.tracer_placement (phase H0 — engine layout) + +This is the shared engine package for tracer-based iterative grid placement. +It is consumed by: + +- `util_ParameterTracerUpdate.py` (event-level RIFT) +- `util_HyperparameterTracerUpdate.py` (RIFT hyperpipeline) + +Both tools are thin I/O wrappers; the math lives here. + +## Layout + +``` +samplers/ — kernels with the production signature + smc_mala.py — tempered SMC + MALA moves; accepts surrogate, surrogate_prev + birth_death.py — Langevin + kNN birth-death corrector + smc_mala_bd.py — composite: smc_mala then birth_death rejuvenation + surrogate.py — legacy in-engine quadratic helper (used by toys, not by tools) + _knn.py — numpy-only kNN helpers (no scipy) +fits/ — surrogate builders the tools call + __init__.py — exposes build(method, X, Y, sigma=None) + _rf.py — RandomForest (default, production) + _rbf.py — scipy RBFInterpolator + _quadratic.py — Tikhonov-regularized quadratic (smoke tests only) + _polynomial.py — degree-N polynomial (default 3) + _base.py — FitBase with FD gradient +``` + +## Sampler signature + +All three samplers expose: + + iterate(particles, *, surrogate, surrogate_prev=None, + prior_box, rng, state=None, **kw) -> (X_new, info) + +`info` always contains `"state"` (the updated state dict the caller must persist +between iterations for adaptive step-size etc.). + +## Status + +- [x] H0: layout, sampler signature unification, no module-level state. +- [ ] H1: drop into util_HyperparameterTracerUpdate.py end-to-end on the demo + hyperpipe Gaussian. Validate the bimodal demo path. +- [ ] H2: full toy test suite per `../test_suite_hyperpipe.md`. + +## Promotion to RIFT + +When this package is promoted, the target path is +`MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/`. The two tools +should import as: + + from RIFT.misc.tracer_placement import samplers, fits + +No other RIFT changes are required (see ../decision_no_cip_changes.md). diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/__init__.py new file mode 100644 index 00000000..fb47ee49 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/__init__.py @@ -0,0 +1,19 @@ +"""RIFT.misc.tracer_placement — engine for tracer-particle iterative placement. + +Drop-in alternative to the puffball grid update used by both the event-level +RIFT iterator and the hyperpipeline. The math is shared; the file I/O wrapper +lives in the two thin command-line tools: + + util_ParameterTracerUpdate.py (event-level; XML I/O via lalsimutils) + util_HyperparameterTracerUpdate.py (hyperpipeline; .dat I/O) + +Public API: + samplers.smc_mala_bd(particles, surrogate, surrogate_prev, prior_box, rng, **kw) + -> (X_new, info) + samplers.smc_mala(...) -> (X_new, info) + samplers.birth_death(...) -> (X_new, info) + fits.build(method, X, Y, sigma=None) -> Fit (callable + .grad helper) +""" +from . import samplers, fits + +__all__ = ["samplers", "fits"] diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/__init__.py new file mode 100644 index 00000000..6d246e59 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/__init__.py @@ -0,0 +1,11 @@ +"""Fits for the tracer engine. + +Public entry point: build(method, X, Y, sigma=None) -> Fit. + +Fit objects expose: + .predict(Z) -> ndarray of len(Z) + .grad(Z) -> ndarray (len(Z), d) (analytic where available, FD otherwise) +""" +from ._dispatch import build + +__all__ = ["build"] diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py new file mode 100644 index 00000000..7889d146 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_base.py @@ -0,0 +1,49 @@ +"""Common Fit base class. Provides finite-difference gradient as a default, +and an optional `predict_with_std` mode for acquisition-function placement +(see samplers.ucb).""" +import numpy as np + + +class FitBase: + """Subclasses must implement .predict(Z). Optionally override .grad(Z) + and .predict_with_std(Z) for fits that expose a calibrated posterior std. + + The default predict_with_std returns (mean, zeros); subclasses that have a + real uncertainty estimate (RF tree-disagreement, GP posterior variance, + BayesianLeastSquares posterior covariance, ...) should override it so the + UCB sampler can use mu + kappa*sigma. Sampler-side code is responsible for + falling back gracefully when sigma is identically zero (e.g. by widening + kappa or by warning the user). + + Subclasses may signal "no real uncertainty" by setting + `self.has_uncertainty = False`. Default is False; RF, GP, quadratic + Bayesian-least-squares fits should set True. + """ + + has_uncertainty = False + # Hint for the UCB local-polish step: pure piecewise-constant fits + # (e.g. random forests) have zero or near-zero gradient nearly everywhere, + # so gradient ascent doesn't work and we need a coordinate-hop polish. + smooth_gradient = True + + def predict(self, Z): + raise NotImplementedError + + def predict_with_std(self, Z): + """Return (mean, std) on Z. Default: (predict(Z), zeros). + Override in subclasses that expose a real uncertainty.""" + mean = self.predict(Z) + return mean, np.zeros_like(mean) + + def f(self, Z): + return self.predict(Z) + + def grad(self, Z, eps=1e-3): + Z = np.atleast_2d(Z) + d = Z.shape[1] + out = np.zeros_like(Z) + for k in range(d): + zp = Z.copy(); zp[:, k] += eps + zm = Z.copy(); zm[:, k] -= eps + out[:, k] = (self.predict(zp) - self.predict(zm)) / (2 * eps) + return out diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_dispatch.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_dispatch.py new file mode 100644 index 00000000..ed781f2c --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_dispatch.py @@ -0,0 +1,16 @@ +"""build(method, X, Y, sigma=None) -> Fit.""" +def build(method, X, Y, sigma=None, **kw): + method = method.lower() + if method == "rf": + from ._rf import RandomForestFit + return RandomForestFit(X, Y, sigma=sigma, **kw) + if method == "rbf": + from ._rbf import RBFFit + return RBFFit(X, Y, sigma=sigma, **kw) + if method == "quadratic": + from ._quadratic import QuadraticFit + return QuadraticFit(X, Y, sigma=sigma, **kw) + if method == "polynomial": + from ._polynomial import PolynomialFit + return PolynomialFit(X, Y, sigma=sigma, **kw) + raise ValueError(f"unknown fit method {method!r}") diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_polynomial.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_polynomial.py new file mode 100644 index 00000000..dd6e2cfd --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_polynomial.py @@ -0,0 +1,40 @@ +"""Degree-N polynomial fit (default degree 3). Kept available; quadratic is the +degree-2 special case via _quadratic.py.""" +import numpy as np +from ._base import FitBase + + +class PolynomialFit(FitBase): + def __init__(self, X, Y, sigma=None, degree=3, ridge=1e-3): + from itertools import combinations_with_replacement + X = np.asarray(X, float); Y = np.asarray(Y, float) + self.d = X.shape[1]; self.degree = degree + # mean-normalize + self.mu = X.mean(axis=0); self.sd = X.std(axis=0) + 1e-12 + Xn = (X - self.mu) / self.sd + terms = [] + self._idx = [] # list of tuples of column indices in each term + for k in range(degree + 1): + for combo in combinations_with_replacement(range(self.d), k): + self._idx.append(combo) + terms.append(np.prod(Xn[:, combo], axis=1) if combo else np.ones(len(Xn))) + D = np.column_stack(terms) + W = ridge * np.eye(D.shape[1]) + if sigma is None: + self._theta = np.linalg.lstsq(D.T @ D + W, D.T @ Y, rcond=None)[0] + else: + w = 1.0 / (np.asarray(sigma) ** 2 + 1e-12) + DW = D * w[:, None] + self._theta = np.linalg.lstsq(DW.T @ D + W, DW.T @ Y, rcond=None)[0] + + def _design(self, Zn): + from itertools import combinations_with_replacement + cols = [] + for combo in self._idx: + cols.append(np.prod(Zn[:, combo], axis=1) if combo else np.ones(len(Zn))) + return np.column_stack(cols) + + def predict(self, Z): + Z = np.atleast_2d(Z) + Zn = (Z - self.mu) / self.sd + return self._design(Zn) @ self._theta diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_quadratic.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_quadratic.py new file mode 100644 index 00000000..0330fd6e --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_quadratic.py @@ -0,0 +1,17 @@ +"""Cheap quadratic fit (Tikhonov-regularized). Not for production per +2026-05-13 design call; kept available for unit tests + smoke runs.""" +import numpy as np +from ._base import FitBase +from ..samplers.surrogate import QuadFit as _QuadFit + + +class QuadraticFit(FitBase): + def __init__(self, X, Y, sigma=None, ridge=1e-3): + weight = None if sigma is None else 1.0 / (np.asarray(sigma) ** 2 + 1e-12) + self._inner = _QuadFit(X, Y, ridge=ridge, eval_weight=weight) + + def predict(self, Z): + return self._inner.f(Z) + + def grad(self, Z, eps=None): + return self._inner.grad(Z) # analytic diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rbf.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rbf.py new file mode 100644 index 00000000..f4b9a8bc --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rbf.py @@ -0,0 +1,24 @@ +"""RBF interpolator fit. Smooth gradients; good for MALA.""" +import numpy as np +from ._base import FitBase + +try: + from scipy.interpolate import RBFInterpolator + _HAVE_RBF = True +except ImportError: + _HAVE_RBF = False + + +class RBFFit(FitBase): + def __init__(self, X, Y, sigma=None, smoothing=1e-3): + if not _HAVE_RBF: + raise ImportError("scipy not available; cannot use --tracer-fit-method rbf") + # scipy's RBFInterpolator doesn't take sample weights; use smoothing as a + # proxy when sigma is supplied. + s = smoothing + if sigma is not None: + s = float(np.median(np.asarray(sigma) ** 2)) + smoothing + self._rbf = RBFInterpolator(X, Y, smoothing=s) + + def predict(self, Z): + return self._rbf(np.atleast_2d(Z)) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py new file mode 100644 index 00000000..4e5d1e6d --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/fits/_rf.py @@ -0,0 +1,44 @@ +"""Random-Forest fit. Production default per project owner (2026-05-13). + +For UCB placement, exposes a tree-disagreement std as the uncertainty +estimate via predict_with_std. This is *not* a calibrated posterior std -- +it is the empirical spread of the per-tree predictions, which is large in +unexplored regions (because trees disagree on extrapolation) and small in +well-sampled regions (because trees fit similar values). That qualitative +behavior is what UCB needs; for calibration use a GP fit when one becomes +available. +""" +import numpy as np +from ._base import FitBase + +try: + from sklearn.ensemble import RandomForestRegressor + _HAVE_RF = True +except ImportError: + _HAVE_RF = False + + +class RandomForestFit(FitBase): + has_uncertainty = True + # Tree predictions are piecewise-constant -- finite-difference gradients + # are zero almost everywhere. Tell the UCB sampler to use a coordinate + # hop polish instead of gradient ascent. + smooth_gradient = False + + def __init__(self, X, Y, sigma=None, n_estimators=100, n_jobs=-1): + if not _HAVE_RF: + raise ImportError("sklearn not available; install scikit-learn or " + "choose --tracer-fit-method quadratic") + self._rf = RandomForestRegressor(n_estimators=n_estimators, n_jobs=n_jobs) + weight = None if sigma is None else 1.0 / (np.asarray(sigma) ** 2 + 1e-12) + self._rf.fit(X, Y, sample_weight=weight) + + def predict(self, Z): + return self._rf.predict(np.atleast_2d(Z)) + + def predict_with_std(self, Z): + """Return (mean, tree_disagreement_std). Std is computed across the + forest's per-tree predictions; larger where trees disagree.""" + Z = np.atleast_2d(Z) + per_tree = np.stack([t.predict(Z) for t in self._rf.estimators_], axis=0) + return per_tree.mean(axis=0), per_tree.std(axis=0) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py new file mode 100644 index 00000000..f9ea6519 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/__init__.py @@ -0,0 +1,18 @@ +"""Tracer-placement sampler kernels. + +Each kernel has the signature: + kernel(particles, surrogate=None, surrogate_prev=None, prior_box, rng, **kw) + -> (X_new, info_dict) + +For backwards compatibility with the prototype harness (proto/run_tier1.py) +the kernels also accept the older positional form + kernel(particles, lnL_noisy, prior_box, rng, _state_key=...) +which builds a fresh local QuadFit; the production tools call the surrogate- +provided form. +""" +from .smc_mala import iterate as smc_mala +from .birth_death import iterate as birth_death +from .smc_mala_bd import iterate as smc_mala_bd +from .ucb import iterate as ucb_place + +__all__ = ["smc_mala", "birth_death", "smc_mala_bd", "ucb_place"] diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/_knn.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/_knn.py new file mode 100644 index 00000000..7ff4ac24 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/_knn.py @@ -0,0 +1,15 @@ +"""Tiny numpy-only kNN utilities (no scipy).""" +import numpy as np + +def pairwise_sqdist(A, B): + # (na, d) x (nb, d) -> (na, nb) + AA = np.sum(A*A, axis=1, keepdims=True) + BB = np.sum(B*B, axis=1, keepdims=True).T + return np.maximum(AA + BB - 2.0 * A @ B.T, 0.0) + +def knn_dist(X, Y, k): + """For each row of X, distance to its k-th nearest neighbor in Y.""" + d2 = pairwise_sqdist(X, Y) + # k-th smallest including self if X is Y at the same row (caller handles +1) + part = np.partition(d2, k-1, axis=1)[:, :k] + return np.sqrt(part.max(axis=1)) diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/birth_death.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/birth_death.py new file mode 100644 index 00000000..490f4d61 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/birth_death.py @@ -0,0 +1,57 @@ +"""Overdamped Langevin on surrogate + kNN birth-death corrector. + +Engine version: takes a Fit. surrogate_prev is ignored (BD doesn't bridge). +""" +import numpy as np +from ._knn import knn_dist + + +def _langevin_step(X, surrogate, eps, prior_box, rng): + g = surrogate.grad(X) + Xp = X + eps * g + np.sqrt(2 * eps) * rng.normal(size=X.shape) + lo = prior_box[:, 0]; hi = prior_box[:, 1] + Xp = np.where(Xp > hi, 2 * hi - Xp, Xp) + Xp = np.where(Xp < lo, 2 * lo - Xp, Xp) + return np.clip(Xp, lo, hi) + + +def _knn_log_density(X, k=5): + from math import lgamma, log, pi + n, d = X.shape + k = min(k, n - 1) + r = knn_dist(X, X, k + 1) + 1e-12 + log_Vd = (d / 2) * log(pi) - lgamma(d / 2 + 1) + return np.log(k) - np.log(n) - log_Vd - d * np.log(r) + + +def iterate(particles, *, surrogate, surrogate_prev=None, + prior_box, rng, state=None, + n_langevin_steps=20, n_bd_passes=3, eps_factor=0.05, + birth_death_rate=1.0, **_): + state = dict(state or {}) + X = np.asarray(particles, dtype=float).copy() + n = len(X) + scale = float(np.sqrt(np.diag(np.cov(X.T)) + 1e-8).mean()) if X.shape[1] > 1 \ + else float(X.std() + 1e-8) + eps = eps_factor * scale**2 + + for _ in range(n_langevin_steps): + X = _langevin_step(X, surrogate, eps, prior_box, rng) + + for _ in range(n_bd_passes): + log_rho = _knn_log_density(X, k=min(5, n - 1)) + log_pi = surrogate.predict(X) + log_pi -= log_pi.max(); log_rho -= log_rho.max() + score = log_rho - log_pi + order = np.argsort(score) + n_swap = max(1, int(round(birth_death_rate * n / 10.0))) + kill = order[-n_swap:] + birth = order[:n_swap] + X[kill] = X[rng.choice(birth, size=n_swap, replace=True)] \ + + 0.05 * scale * rng.normal(size=(n_swap, X.shape[1])) + X = np.clip(X, prior_box[:, 0], prior_box[:, 1]) + for _ in range(3): + X = _langevin_step(X, surrogate, eps, prior_box, rng) + + info = {"state": state, "eps": eps} + return X, info diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala.py new file mode 100644 index 00000000..7f7a4e85 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala.py @@ -0,0 +1,94 @@ +"""Tempered SMC with MALA moves on a pre-built surrogate. + +Engine version: takes Fit objects from RIFT.misc.tracer_placement.fits. +Stateless across calls — caller owns the `state` dict (with `mala_eps`, +optionally other adaptive choices). +""" +import numpy as np + + +def _ess(logw): + w = np.exp(logw - logw.max()) + w /= w.sum() + return 1.0 / np.sum(w * w) + + +def _systematic_resample(logw, rng): + w = np.exp(logw - logw.max()); w /= w.sum() + n = len(w) + u = (rng.uniform() + np.arange(n)) / n + return np.clip(np.searchsorted(np.cumsum(w), u), 0, n - 1) + + +def _mala_step(X, f_eval, f_grad, eps, prior_box, rng): + g = f_grad(X) + prop = X + 0.5 * eps**2 * g + eps * rng.normal(size=X.shape) + lo = prior_box[:, 0]; hi = prior_box[:, 1] + prop = np.where(prop > hi, 2 * hi - prop, prop) + prop = np.where(prop < lo, 2 * lo - prop, prop) + prop = np.clip(prop, lo, hi) + g_prop = f_grad(prop) + log_qf = -np.sum((prop - X - 0.5 * eps**2 * g)**2, axis=1) / (2 * eps**2) + log_qb = -np.sum((X - prop - 0.5 * eps**2 * g_prop)**2, axis=1) / (2 * eps**2) + log_alpha = f_eval(prop) - f_eval(X) + log_qb - log_qf + acc = np.log(rng.uniform(size=len(X))) < log_alpha + return np.where(acc[:, None], prop, X), float(acc.mean()) + + +def iterate(particles, *, surrogate, surrogate_prev=None, + prior_box, rng, state=None, + n_mala_steps=8, target_ess_frac=0.5, + birth_death_rate=0.0, **_): + state = dict(state or {}) + X = np.asarray(particles, dtype=float).copy() + n, _d = X.shape + if state.get("mala_eps") is None: + cov_diag = np.diag(np.cov(X.T)) if X.shape[1] > 1 else np.array([X.var()]) + state["mala_eps"] = 0.3 * float(np.sqrt(cov_diag + 1e-8).mean()) + + accept_log = [] + + if surrogate_prev is not None: + beta = 0.0 + logw = np.zeros(n) + delta_f = surrogate.predict(X) - surrogate_prev.predict(X) + while beta < 1.0: + lo, hi = 0.0, 1.0 - beta + for _ in range(20): + mid = 0.5 * (lo + hi) + if _ess(logw + mid * delta_f) > target_ess_frac * n: + lo = mid + else: + hi = mid + d_beta = lo if lo > 1e-6 else (1.0 - beta) + logw = logw + d_beta * delta_f + beta += d_beta + if _ess(logw) < target_ess_frac * n or beta >= 1.0: + idx = _systematic_resample(logw, rng) + X = X[idx] + logw = np.zeros(n) + delta_f = surrogate.predict(X) - surrogate_prev.predict(X) + + def fe(z, beta=beta, s=surrogate, sp=surrogate_prev): + return beta * s.predict(z) + (1 - beta) * sp.predict(z) + + def fg(z, beta=beta, s=surrogate, sp=surrogate_prev): + return beta * s.grad(z) + (1 - beta) * sp.grad(z) + + for _ in range(n_mala_steps): + X, acc = _mala_step(X, fe, fg, state["mala_eps"], prior_box, rng) + accept_log.append(acc) + state["mala_eps"] *= np.exp(0.1 * (acc - 0.574)) + state["mala_eps"] = float(np.clip(state["mala_eps"], 1e-4, 5.0)) + else: + for _ in range(n_mala_steps): + X, acc = _mala_step(X, surrogate.predict, surrogate.grad, + state["mala_eps"], prior_box, rng) + accept_log.append(acc) + state["mala_eps"] *= np.exp(0.1 * (acc - 0.574)) + state["mala_eps"] = float(np.clip(state["mala_eps"], 1e-4, 5.0)) + + info = {"state": state, + "mean_accept": float(np.mean(accept_log)) if accept_log else float("nan"), + "n_steps": len(accept_log)} + return X, info diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala_bd.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala_bd.py new file mode 100644 index 00000000..a06999f8 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/smc_mala_bd.py @@ -0,0 +1,15 @@ +"""SMC-MALA followed by a short birth-death rejuvenation (engine version).""" +from .smc_mala import iterate as _smc_mala +from .birth_death import iterate as _bd + + +def iterate(particles, *, surrogate, surrogate_prev=None, + prior_box, rng, state=None, **kw): + X1, info1 = _smc_mala(particles, surrogate=surrogate, surrogate_prev=surrogate_prev, + prior_box=prior_box, rng=rng, state=state, **kw) + X2, info2 = _bd(X1, surrogate=surrogate, prior_box=prior_box, rng=rng, + state=info1.get("state"), + n_langevin_steps=5, n_bd_passes=2, + birth_death_rate=kw.get("birth_death_rate", 1.0)) + info = {**info1, **info2, "state": info2.get("state", info1.get("state"))} + return X2, info diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/surrogate.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/surrogate.py new file mode 100644 index 00000000..de434580 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/surrogate.py @@ -0,0 +1,68 @@ +"""Local quadratic surrogate of ln-likelihood, RIFT BayesianLeastSquares-flavored. + +f(x) = a + b^T x + 0.5 x^T C x with Tikhonov regularization on coefficients. +Gradient: b + C x. Closed form. +""" +import numpy as np + +def _design(X): + n, d = X.shape + cols = [np.ones(n), X] # 1, linear + quad = [] + for i in range(d): + for j in range(i, d): + quad.append(X[:, i] * X[:, j]) + # square terms get factor 1, cross-terms factor 1 (we'll handle 0.5 in eval) + cols.append(np.column_stack(quad)) + return np.concatenate([np.ones((n,1)), X, np.column_stack(quad)], axis=1) + +def _coef_unpack(theta, d): + a = theta[0] + b = theta[1:1+d] + # quad terms in order (0,0),(0,1),...,(0,d-1),(1,1),(1,2),... + C = np.zeros((d, d)) + k = 1 + d + for i in range(d): + for j in range(i, d): + v = theta[k]; k += 1 + if i == j: + C[i, i] = 2.0 * v # since 0.5 x^T C x => C_ii contributes 0.5 C_ii x_i^2 + else: + C[i, j] = v + C[j, i] = v + return a, b, C + +class QuadFit: + def __init__(self, X, y, ridge=1e-3, eval_weight=None): + X = np.asarray(X, dtype=float) + y = np.asarray(y, dtype=float) + self.d = X.shape[1] + self.X_mean = X.mean(axis=0) + self.X_std = X.std(axis=0) + 1e-12 + Xn = (X - self.X_mean) / self.X_std + D = _design(Xn) + W = np.eye(D.shape[1]) * ridge + if eval_weight is None: + A = D.T @ D + W + rhs = D.T @ y + else: + wv = np.asarray(eval_weight, dtype=float) + DW = D * wv[:, None] + A = DW.T @ D + W + rhs = DW.T @ y + theta, *_ = np.linalg.lstsq(A, rhs, rcond=None) + self.theta = theta + self.a, self.b_n, self.C_n = _coef_unpack(theta, self.d) + + def _to_normed(self, X): + return (np.atleast_2d(X) - self.X_mean) / self.X_std + + def f(self, X): + Xn = self._to_normed(X) + v = self.a + Xn @ self.b_n + 0.5 * np.einsum('ni,ij,nj->n', Xn, self.C_n, Xn) + return v + + def grad(self, X): + Xn = self._to_normed(X) + gn = self.b_n + Xn @ self.C_n # gradient in normed coords + return gn / self.X_std # back to native coords diff --git a/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/ucb.py b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/ucb.py new file mode 100644 index 00000000..37226c1e --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/RIFT/misc/tracer_placement/samplers/ucb.py @@ -0,0 +1,199 @@ +"""UCB (Upper Confidence Bound) placement. + +Target regime: expensive ILE, few generations, ~50-500 points per generation. +Each evaluation point should be "earned" -- either we expect a large mean lnL +*or* we have large uncertainty about lnL there. UCB picks both: rank +candidates by + + score(lambda) = mu(lambda) + kappa * sigma(lambda) + +where (mu, sigma) come from the surrogate's predict_with_std. The N chosen +points are taken by greedy descending-score selection with a built-in +self-avoidance constraint (population-covariance Mahalanobis distance). + +Local polish: each selected point is optionally hill-climbed for a few steps +against `score`. For smooth-gradient fits (RBF, quadratic, GP) we use +gradient ascent on `score`; for piecewise-constant fits (RF) we use a +coordinate-hop polish that perturbs each coordinate independently and keeps +the move only if `score` improved. The fit signals which polish strategy to +use via FitBase.smooth_gradient. + +Sampler signature matches the rest of the tracer engine: + ucb_place(particles, *, surrogate, surrogate_prev=None, prior_box, rng, + kappa=2.0, n_candidates=20000, polish_steps=20, + min_separation_factor=0.0, state=None, **_) -> (X_out, info) + +`particles` only sets the output count (len(particles)) and the candidate +seed pool. surrogate_prev is ignored (UCB doesn't bridge). +""" + +import numpy as np + + +# ----------------- candidate generation ---------------------------------- # + +def _candidates(rng, particles, prior_box, n_candidates): + """Mix candidates from current particles (jittered) and uniform draws + from the prior box. Returns array (n_candidates, d).""" + d = prior_box.shape[0] + n_seed = max(1, n_candidates // 4) + # uniform fill (exploration) + n_unif = n_candidates - n_seed + lo = prior_box[:, 0]; hi = prior_box[:, 1] + unif = lo + (hi - lo) * rng.uniform(size=(n_unif, d)) + # jittered current particles (exploitation seed) + if len(particles) > 0: + idx = rng.integers(0, len(particles), size=n_seed) + if particles.shape[1] > 1: + scale = np.std(particles, axis=0) + 1e-9 + else: + scale = np.array([particles.std() + 1e-9]) + jitter = 0.25 * scale * rng.normal(size=(n_seed, d)) + seed = particles[idx] + jitter + seed = np.clip(seed, lo, hi) + else: + seed = unif[:n_seed] + return np.vstack([seed, unif]) + + +# ----------------- greedy selection with Mahalanobis spacing ------------- # + +def _mahalanobis_greedy(candidates, scores, n_keep, icov, min_mah_dist): + """Pick n_keep candidates by descending score, rejecting any candidate + within `min_mah_dist` Mahalanobis distance of an already-kept point.""" + order = np.argsort(-scores) + kept = [] + if min_mah_dist <= 0: + # No self-avoidance: just take top-n. + return candidates[order[:n_keep]] + for i in order: + x = candidates[i] + if not kept: + kept.append(x) + continue + diffs = np.asarray(kept) - x + d2 = np.einsum("ij,jk,ik->i", diffs, icov, diffs) + if (d2 >= min_mah_dist ** 2).all(): + kept.append(x) + if len(kept) >= n_keep: + break + # If under-filled (very tight separation), fall back to top-n without + # self-avoidance for the remainder. + if len(kept) < n_keep: + fill = [candidates[i] for i in order if not any( + np.array_equal(candidates[i], k) for k in kept)] + kept += fill[: n_keep - len(kept)] + return np.asarray(kept) + + +# ----------------- local polish: gradient vs coord-hop ------------------- # + +def _polish_gradient(x, surrogate, kappa, prior_box, n_steps, scale, rng): + """Gradient ascent on mu + kappa*sigma. Uses FitBase.grad for mu; + treats sigma as locally piecewise-smooth and falls back to finite + differences.""" + eps = 1e-3 * scale + step = 0.1 * scale + lo = prior_box[:, 0]; hi = prior_box[:, 1] + x = x.copy() + for _ in range(n_steps): + gmu = surrogate.grad(x[None])[0] + # FD sigma gradient (small extra cost; sigma is cheap for RF/quadratic) + gsig = np.zeros_like(x) + for k in range(len(x)): + xp = x.copy(); xp[k] += eps + xm = x.copy(); xm[k] -= eps + _, sp = surrogate.predict_with_std(xp[None]) + _, sm = surrogate.predict_with_std(xm[None]) + gsig[k] = (sp[0] - sm[0]) / (2 * eps) + g = gmu + kappa * gsig + norm = np.linalg.norm(g) + 1e-12 + x = np.clip(x + step * g / norm, lo, hi) + return x + + +def _polish_coord_hop(x, surrogate, kappa, prior_box, n_steps, scale, rng): + """For piecewise-constant fits (RF). Each step: pick a random coordinate, + propose a Gaussian hop, accept if mu + kappa*sigma went up. Greedy.""" + lo = prior_box[:, 0]; hi = prior_box[:, 1] + x = x.copy() + def _score(xi): + m, s = surrogate.predict_with_std(xi[None]) + return float(m[0] + kappa * s[0]) + best = _score(x) + for _ in range(n_steps): + k = rng.integers(0, len(x)) + delta = 0.5 * scale[k] * rng.normal() + x_prop = x.copy(); x_prop[k] = float(np.clip(x[k] + delta, lo[k], hi[k])) + sc = _score(x_prop) + if sc > best: + x = x_prop + best = sc + return x + + +def _polish(x, surrogate, kappa, prior_box, n_steps, scale, rng): + if n_steps <= 0: + return x + if getattr(surrogate, "smooth_gradient", True): + return _polish_gradient(x, surrogate, kappa, prior_box, n_steps, float(np.mean(scale)), rng) + return _polish_coord_hop(x, surrogate, kappa, prior_box, n_steps, scale, rng) + + +# ----------------- driver ------------------------------------------------ # + +def iterate(particles, *, surrogate, surrogate_prev=None, + prior_box, rng, state=None, + kappa=2.0, n_candidates=20000, + polish_steps=20, min_separation_factor=0.25, + **_): + state = dict(state or {}) + X_in = np.asarray(particles, dtype=float) + n_out = len(X_in) + if n_out == 0: + return X_in.copy(), {"state": state, "note": "no input particles"} + + if not getattr(surrogate, "has_uncertainty", False): + # UCB without uncertainty degenerates to greedy mean-maximization, + # which is not what the user wants. Warn but proceed (kappa effectively 0). + import sys + sys.stderr.write( + "samplers.ucb: surrogate has no uncertainty estimate " + "(predict_with_std returns zeros); UCB will degenerate to greedy " + "mean-maximization. Use --tracer-fit-method rf (tree disagreement) " + "or a GP fit if available.\n") + + # 1. Build candidate pool + cand = _candidates(rng, X_in, prior_box, n_candidates) + mu, sigma = surrogate.predict_with_std(cand) + score = mu + kappa * sigma + + # 2. Greedy select with Mahalanobis self-avoidance + if X_in.shape[1] > 1: + cov_in = np.cov(X_in.T) + else: + cov_in = np.array([[float(X_in.std() ** 2) + 1e-12]]) + cov_in = np.atleast_2d(cov_in) + 1e-10 * np.eye(prior_box.shape[0]) + icov = np.linalg.inv(cov_in) + # min_separation_factor is in *Mahalanobis units* (so 0.25 = one-quarter std) + X_pick = _mahalanobis_greedy(cand, score, n_out, icov, min_separation_factor) + + # 3. Local polish per point + if X_in.shape[1] > 1: + scale = np.sqrt(np.diag(cov_in)) + else: + scale = np.array([float(np.sqrt(cov_in[0, 0]))]) + X_out = np.array([ + _polish(x, surrogate, kappa, prior_box, polish_steps, scale, rng) + for x in X_pick + ]) + + info = { + "state": state, + "kappa": kappa, + "n_candidates": n_candidates, + "min_separation_factor": min_separation_factor, + "polish_steps": polish_steps, + "polish_strategy": "gradient" if getattr(surrogate, "smooth_gradient", True) else "coord_hop", + } + return X_out, info diff --git a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline old mode 100644 new mode 100755 index 14798db9..86c168dc --- a/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline +++ b/MonteCarloMarginalizeCode/Code/bin/create_eos_posterior_pipeline @@ -42,6 +42,12 @@ parser.add_argument("--request-memory-marg",default=16384,type=int,help="Memory parser.add_argument("--puff-exe",default=None,help="similar to PUFF, but cannot be the same executable since format different") parser.add_argument("--puff-args",default=None,help=" argumetns") parser.add_argument("--puff-max-it",default=np.inf,type=int,help="Maximum iteration number that puffball is applied. default to infinity ") +parser.add_argument("--tracer-only-marg", action='store_true', + help="DEPRECATED no-op. Earlier versions used this flag to skip MARG_* on intermediate iterations; that was wrong -- MARG should run every iteration so the tracer has fresh all.marg_net to consume. Use --puff-input-source marg_net to suppress the redundant MARG_PUFF lane instead.") +parser.add_argument("--tracer-final-marg-iterations", type=int, default=1, + help="DEPRECATED no-op. See --tracer-only-marg.") +parser.add_argument("--puff-input-source", default="posterior", choices=("posterior", "marg_net"), + help="Which file feeds the puff/tracer node. 'posterior' (default, legacy) reads grid-{k+1}.dat (EOS_POST posterior samples) and writes grid_puff-{k+1}.dat which MARG_PUFF then evaluates. 'marg_net' wires the puff/tracer node to consume all.marg_net (the cumulative ILE-evaluated (lambda, lnL, sigma) table) and write grid-{k+1}.dat directly, suppressing the MARG_PUFF lane entirely; use with --puff-exe util_*TracerUpdate.py.") parser.add_argument("--eos-post-args",default=None,help="filename of args_ile.txt file which holds ILE arguments. Should NOT conflict with arguments auto-set by this DAG ... in particular, i/o arguments will be modified") parser.add_argument("--eos-post-exe",default=None,help="filename of ILE or equivalent executable. Will default to `which integrate_likelihood_extrinsic` in low-level code") parser.add_argument("--eos-post-retries",default=0,type=int,help="Number of retry attempts for ILE jobs. (These can fail)") @@ -169,7 +175,12 @@ if opts.marg_event_args: puff_args=None puff_cadence = None puff_max_it = opts.puff_max_it -if opts.puff_args: +# Gate on --puff-exe: if the user did not provide a puff executable, we do not +# write puff/MARG_PUFF nodes at all. Holding --puff-args without --puff-exe is +# an obvious mistake; refuse to proceed silently. +if opts.puff_args and not opts.puff_exe: + sys.exit(" --puff-args given without --puff-exe: refusing to write puff nodes without a puff executable. Pass --puff-exe or drop --puff-args.") +if opts.puff_args and opts.puff_exe: puff_cadence = 1 # Load args.txt. Remove first item. Store with open(opts.puff_args) as f: @@ -183,6 +194,15 @@ if opts.puff_args: print("PUFF", puff_args) print("PUFF CADENCE", puff_cadence) +# Tracer-aware puff input plumbing. In 'marg_net' mode the puff/tracer node +# consumes all.marg_net (the cumulative ILE-evaluated table emitted by UNIFY) +# and writes grid-{k+1}.dat directly. EOS_POST output is redirected to +# posterior-{k+1}.dat for diagnostics, and the MARG_PUFF lane is suppressed -- +# the tracer's output IS the next iteration's grid. +puff_uses_marg_net = (opts.puff_input_source == "marg_net") and bool(puff_args) +if puff_uses_marg_net: + print("PUFF INPUT SOURCE: marg_net (tracer-aware grid update)") + test_args=None test_node_list = [] @@ -250,20 +270,18 @@ dag = pipeline.CondorDAG(log=os.getcwd()) # Make directories for all iterations -for indx in np.arange(it_start,opts.n_iterations+1): - ile_dir = opts.working_directory+"/iteration_"+str(indx)+"_marg" - cip_dir = opts.working_directory+"/iteration_"+str(indx)+"_post" - consolidate_dir = opts.working_directory+"/iteration_"+str(indx)+"_con" +for it_indx in np.arange(it_start,opts.n_iterations+1): + ile_dir = opts.working_directory+"/iteration_"+str(it_indx)+"_marg" + cip_dir = opts.working_directory+"/iteration_"+str(it_indx)+"_post" + consolidate_dir = opts.working_directory+"/iteration_"+str(it_indx)+"_con" mkdir(ile_dir); mkdir(ile_dir+"/logs") - indx=0 - for event in opts.event_file: - mkdir(ile_dir + "/event_{}".format(indx)) - indx+=1 + for event_indx, _event in enumerate(opts.event_file): + mkdir(ile_dir + "/event_{}".format(event_indx)) mkdir(cip_dir); mkdir(cip_dir+"/logs") mkdir(consolidate_dir); mkdir(consolidate_dir+"/logs") if opts.test_args: - test_dir = opts.working_directory+"/iteration_"+str(indx)+"_test" + test_dir = opts.working_directory+"/iteration_"+str(it_indx)+"_test" mkdir(test_dir); mkdir(test_dir+'/logs') @@ -297,19 +315,22 @@ if opts.marg_event_args: cip_job.set_sub_file(fname) cip_job.write_sub_file() - # PUFF variant - cipPuff_job, cipPuff_job_name = dag_utils.write_CIP_sub(tag='MARG_PUFF',log_dir=None,arg_str=marg_event_args,using_eos='file:'+working_dir_inside+"/grid_puff-$(macroiteration).dat",using_eos_index='$(macroevent)',request_memory=opts.request_memory_marg,input_net=working_dir_inside+'/event-$(macroid).net',output='MARG_puff-$(macroid)-$(macroevent)',out_dir=out_dir_inside_marg,exe=marg_exe_master,universe=main_worker_universe,no_grid=not(opts.use_osg),use_osg=opts.use_osg,use_singularity=opts.use_osg and opts.use_singularity,singularity_image=singularity_image,use_simple_osg_requirements=opts.use_osg,transfer_files=['../../event-$(macroid).net']+transfer_file_names_puff) - # Modify: set 'initialdir' - cipPuff_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_marg/event_$(macroid)") - # Modify output argument: change logs and working directory to be subdirectory for the run - cipPuff_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(cluster)-$(process).log") - cipPuff_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(cluster)-$(process).err") - cipPuff_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(cluster)-$(process).out") - cipPuff_job.add_condor_cmd('request_disk',opts.general_request_disk) - if opts.use_full_submit_paths: - fname = opts.working_directory+"/"+cipPuff_job.get_sub_file() - cipPuff_job.set_sub_file(fname) - cipPuff_job.write_sub_file() + # PUFF variant -- only emit when MARG_PUFF will actually be added to the + # DAG (legacy posterior-mode wiring). In tracer mode (puff_uses_marg_net) + # the tracer writes grid-{k+1}.dat directly and MARG_PUFF is redundant. + if puff_args and not puff_uses_marg_net: + cipPuff_job, cipPuff_job_name = dag_utils.write_CIP_sub(tag='MARG_PUFF',log_dir=None,arg_str=marg_event_args,using_eos='file:'+working_dir_inside+"/grid_puff-$(macroiteration).dat",using_eos_index='$(macroevent)',request_memory=opts.request_memory_marg,input_net=working_dir_inside+'/event-$(macroid).net',output='MARG_puff-$(macroid)-$(macroevent)',out_dir=out_dir_inside_marg,exe=marg_exe_master,universe=main_worker_universe,no_grid=not(opts.use_osg),use_osg=opts.use_osg,use_singularity=opts.use_osg and opts.use_singularity,singularity_image=singularity_image,use_simple_osg_requirements=opts.use_osg,transfer_files=['../../event-$(macroid).net']+transfer_file_names_puff) + # Modify: set 'initialdir' + cipPuff_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_marg/event_$(macroid)") + # Modify output argument: change logs and working directory to be subdirectory for the run + cipPuff_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(cluster)-$(process).log") + cipPuff_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(cluster)-$(process).err") + cipPuff_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(cluster)-$(process).out") + cipPuff_job.add_condor_cmd('request_disk',opts.general_request_disk) + if opts.use_full_submit_paths: + fname = opts.working_directory+"/"+cipPuff_job.get_sub_file() + cipPuff_job.set_sub_file(fname) + cipPuff_job.write_sub_file() else: for indx in np.arange(len(marg_event_args_list)): @@ -328,20 +349,23 @@ else: cip_job.write_sub_file() cip_job_list.append(cip_job) - # PUFF variant - cipPuff_job, cipPuff_job_name = dag_utils.write_CIP_sub(tag='MARG_PUFF_{}'.format(indx),log_dir=None,exe=marg_event_exe_list[indx],arg_str=marg_event_args_list[indx],using_eos='file:'+working_dir_inside+"/grid_puff-$(macroiteration).dat",using_eos_index='$(macroevent)',n_events_to_analyze=n_chunk_here,request_memory=opts.request_memory_marg,input_net=working_dir_inside+'/event-$(macroid).net',output='MARG_puff-$(macroid)-$(macroevent)',out_dir=out_dir_inside_marg,universe=main_worker_universe,no_grid=not(opts.use_osg),use_osg=opts.use_osg,use_singularity=opts.use_osg and opts.use_singularity,singularity_image=singularity_image,use_simple_osg_requirements=opts.use_osg,transfer_files=['../../event-$(macroid).net','../../grid_puff-$(macroiteration).dat']+transfer_file_names_puff) - # Modify: set 'initialdir' - cipPuff_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_marg/event_$(macroid)") - # Modify output argument: change logs and working directory to be subdirectory for the run - cipPuff_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(macroid)-$(cluster)-$(process).log") - cipPuff_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(macroid)-$(cluster)-$(process).err") - cipPuff_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(macroid)-$(cluster)-$(process).out") - cipPuff_job.add_condor_cmd('request_disk',opts.general_request_disk) - if opts.use_full_submit_paths: - fname = opts.working_directory+"/"+cipPuff_job.get_sub_file() - cipPuff_job.set_sub_file(fname) - cipPuff_job.write_sub_file() - cip_puff_job_list.append(cipPuff_job) + # PUFF variant -- only emit when MARG_PUFF will actually be added to + # the DAG (legacy posterior-mode wiring). In tracer mode the tracer + # writes grid-{k+1}.dat directly and MARG_PUFF is redundant. + if puff_args and not puff_uses_marg_net: + cipPuff_job, cipPuff_job_name = dag_utils.write_CIP_sub(tag='MARG_PUFF_{}'.format(indx),log_dir=None,exe=marg_event_exe_list[indx],arg_str=marg_event_args_list[indx],using_eos='file:'+working_dir_inside+"/grid_puff-$(macroiteration).dat",using_eos_index='$(macroevent)',n_events_to_analyze=n_chunk_here,request_memory=opts.request_memory_marg,input_net=working_dir_inside+'/event-$(macroid).net',output='MARG_puff-$(macroid)-$(macroevent)',out_dir=out_dir_inside_marg,universe=main_worker_universe,no_grid=not(opts.use_osg),use_osg=opts.use_osg,use_singularity=opts.use_osg and opts.use_singularity,singularity_image=singularity_image,use_simple_osg_requirements=opts.use_osg,transfer_files=['../../event-$(macroid).net','../../grid_puff-$(macroiteration).dat']+transfer_file_names_puff) + # Modify: set 'initialdir' + cipPuff_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_marg/event_$(macroid)") + # Modify output argument: change logs and working directory to be subdirectory for the run + cipPuff_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(macroid)-$(cluster)-$(process).log") + cipPuff_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(macroid)-$(cluster)-$(process).err") + cipPuff_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_marg/logs/margPuff-$(macroevent)-$(macroid)-$(cluster)-$(process).out") + cipPuff_job.add_condor_cmd('request_disk',opts.general_request_disk) + if opts.use_full_submit_paths: + fname = opts.working_directory+"/"+cipPuff_job.get_sub_file() + cipPuff_job.set_sub_file(fname) + cipPuff_job.write_sub_file() + cip_puff_job_list.append(cipPuff_job) @@ -412,7 +436,26 @@ con_prod_job.write_sub_file() if opts.test_args: - test_job, test_job_name = dag_utils.write_test_sub(tag='test',log_dir=None,arg_str=test_args,samples_files=[ opts.working_directory+'/grid-$(macroiteration).dat', opts.working_directory+'/grid-$(macroiterationlast).dat'] ,out_dir=opts.working_directory,exe=opts.test_exe,universe=local_worker_universe,no_grid=no_worker_grid) + # In tracer mode (puff_uses_marg_net) the file grid-{k}.dat is a placement + # grid (lnL columns zero), and the true iteration posterior lives in + # posterior-{k}.dat. So the convergence test must compare posterior + # samples, not placement grids. + if puff_uses_marg_net: + _test_curr = opts.working_directory + '/posterior-$(macroiteration).dat' + _test_prev = opts.working_directory + '/posterior-$(macroiterationlast).dat' + else: + _test_curr = opts.working_directory + '/grid-$(macroiteration).dat' + _test_prev = opts.working_directory + '/grid-$(macroiterationlast).dat' + # Resolve --test-exe to a full path matching how eospost_exe / the + # patched puff_exe are handled: bare names get resolved via `which`; + # absolute paths pass through. Condor on some test machines does not + # search PATH for local-universe jobs. + test_exe_resolved = opts.test_exe + if test_exe_resolved and not os.path.isabs(test_exe_resolved): + _r = dag_utils.which(test_exe_resolved) + if _r: + test_exe_resolved = os.path.abspath(_r) + test_job, test_job_name = dag_utils.write_test_sub(tag='test',log_dir=None,arg_str=test_args,samples_files=[_test_curr, _test_prev] ,out_dir=opts.working_directory,exe=test_exe_resolved,universe=local_worker_universe,no_grid=no_worker_grid) # Modify: set 'initialdir' test_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_test") # Modify output argument: change logs and working directory to be subdirectory for the run @@ -481,7 +524,13 @@ head -n 1 grid-0.dat > $2 cat $1/output*.dat | sort -r | uniq | shuf >> $2 """) os.system("chmod a+x join_post.sh") -join_post_job, join_post_job_name = dag_utils.write_convert_sub(exe=opts.working_directory+"/join_post.sh",tag='JOIN_POST',log_dir=None,arg_str='',file_input=opts.working_directory+"/iteration_$(macroiteration)_post/ "+opts.working_directory+"/grid-$(macroiterationnext).dat", out_dir=opts.working_directory,universe=local_worker_universe,no_grid=no_worker_grid) +# In tracer mode the tracer node owns grid-{k+1}.dat; EOS_POST output goes to a +# parallel posterior-{k+1}.dat file (diagnostic only). In legacy mode EOS_POST +# still produces grid-{k+1}.dat directly. +_join_post_output_path = opts.working_directory + ( + "/posterior-$(macroiterationnext).dat" if puff_uses_marg_net + else "/grid-$(macroiterationnext).dat") +join_post_job, join_post_job_name = dag_utils.write_convert_sub(exe=opts.working_directory+"/join_post.sh",tag='JOIN_POST',log_dir=None,arg_str='',file_input=opts.working_directory+"/iteration_$(macroiteration)_post/ "+_join_post_output_path, out_dir=opts.working_directory,universe=local_worker_universe,no_grid=no_worker_grid) join_post_job.add_condor_cmd("initialdir",opts.working_directory) join_post_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_con/logs/unify-$(macroevent).log") join_post_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_con/logs/unify-$(macroevent).out") @@ -499,19 +548,37 @@ if opts.use_osg: # PUFF job # ... pending if puff_args and puff_cadence: + # Resolve --puff-exe to a full path matching how eospost_exe is handled + # above: bare names (e.g. "util_HyperparameterTracerUpdate.py") get + # resolved via `which`; if the user already passed an absolute path, + # leave it alone. Condor on some test machines does not search PATH + # when launching local-universe jobs and fails to start PUFF otherwise. exe_here = opts.puff_exe + if exe_here and not os.path.isabs(exe_here): + resolved = dag_utils.which(exe_here) + if resolved: + exe_here = os.path.abspath(resolved) if opts.use_osg: relevant_path = dag_utils.which(opts.puff_exe) if opts.condor_local_nonworker_igwn_prefix: relevant_path = relevant_path.split("/")[-1] exe_here = opts.working_directory + "/local_puff.sh" with open("local_puff.sh", 'w') as f: - f.write("""#! /bin/bash + f.write("""#! /bin/bash {} {} $@ """.format(extra_text,relevant_path)) os.system("chmod a+x local_puff.sh") - puff_job, puff_job_name = dag_utils.write_puff_sub(tag='PUFF',log_dir=None,arg_str=puff_args,request_memory=opts.request_memory_marg,input_net=opts.working_directory+'/grid-$(macroiterationnext).dat',output=opts.working_directory+'/grid_puff-$(macroiterationnext).dat',out_dir=opts.working_directory,exe=exe_here,universe=local_worker_universe,no_grid=no_worker_grid) + # Tracer mode: consume all.marg_net (ILE-evaluated table) and write + # grid-{k+1}.dat directly. Legacy mode: consume grid-{k+1}.dat (posterior + # samples from JOIN_POST) and write grid_puff-{k+1}.dat for MARG_PUFF. + if puff_uses_marg_net: + _puff_input_net = opts.working_directory + '/all.marg_net' + _puff_output = opts.working_directory + '/grid-$(macroiterationnext).dat' + else: + _puff_input_net = opts.working_directory + '/grid-$(macroiterationnext).dat' + _puff_output = opts.working_directory + '/grid_puff-$(macroiterationnext).dat' + puff_job, puff_job_name = dag_utils.write_puff_sub(tag='PUFF',log_dir=None,arg_str=puff_args,request_memory=opts.request_memory_marg,input_net=_puff_input_net,output=_puff_output,out_dir=opts.working_directory,exe=exe_here,universe=local_worker_universe,no_grid=no_worker_grid) # Modify: set 'initialdir' to CIP WORKING DIR puff_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_post") # Modify output argument: change logs and working directory to be subdirectory for the run @@ -582,7 +649,8 @@ for it in np.arange(it_start,opts.n_iterations): con_node.add_macro("macroevent",int(event)) con_node.set_retry(opts.general_retries) for row in np.arange(indx_max_event): #np.arange(n_jobs_this_time): - # Add task per MARG operation + # Add task per MARG operation. MARG runs every iteration; the + # tracer-vs-legacy choice only affects the MARG_PUFF lane below. if opts.marg_event_args: cip_node = pipeline.CondorDAGNode(cip_job) else: @@ -596,7 +664,9 @@ for it in np.arange(it_start,opts.n_iterations): con_node.add_parent(cip_node) # consolidate depends on all of the individual jobs dag.add_node(cip_node) # if PUFF is done for this event, also add MARG_PUFF nodes. Note grid size assumed identical, so don't need another loop over 'id'/row - if puff_args and puff_cadence: + # In tracer mode (puff_uses_marg_net) the tracer writes grid-{k+1}.dat + # directly -- MARG already evaluates that grid, so no MARG_PUFF lane. + if puff_args and puff_cadence and not puff_uses_marg_net: if it>it_start and it <= puff_max_it and (it-1)%puff_cadence ==0: # we made a puffball last iteration, so run it through ILE now if opts.marg_event_args: cipPuff_node = pipeline.CondorDAGNode(cipPuff_job) @@ -673,10 +743,16 @@ for it in np.arange(it_start,opts.n_iterations): puff_node.add_macro("macroiterationnext", it+1) puff_node.set_category("PUFF") puff_node.set_retry(opts.general_retries) - if not (parent_fit_node is None): - puff_node.add_parent(parent_fit_node) # only fit if we have results from the previous iteration - dag.add_node(puff_node) - + if puff_uses_marg_net: + # Tracer reads all.marg_net (output of UNIFY). Run in parallel with + # EOS_POST -- the tracer's own grid output drives the next iteration, + # EOS_POST is now a downstream-free posterior diagnostic. + puff_node.add_parent(unify_node) + else: + if not (parent_fit_node is None): + puff_node.add_parent(parent_fit_node) # only fit if we have results from the previous iteration + dag.add_node(puff_node) + parent_fit_node = puff_node diff --git a/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml b/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml new file mode 100644 index 00000000..fa0ed86b --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/bin/hyperpipe_conf.yaml @@ -0,0 +1,122 @@ +# Default hyperpipe configuration consumed by util_RIFT_hyperpipe.py. +# +# Hydra finds this file because it lives next to util_RIFT_hyperpipe.py. +# Users can: +# * override fields on the CLI with Hydra syntax: +# util_RIFT_hyperpipe.py arch.n-iterations=10 general.use-osg=true +# * point Hydra at their own file with --config-name +# (placed next to this file) or --config-path +# * compose from the in-package defaults exposed via +# RIFT.hyperpipe.config.DEFAULT_CONFIG_YAML +# +# The schema is documented in RIFT/hyperpipe/config.py. + +arch: + method: default + n-iterations: 5 + n-samples-per-job: 1000 + explode-marg-jobs: 5 + explode-marg-jobs-last: null + start-iteration: 0 + +post: + exe: null + coord-module: null + coords-fit: "x y z" + coords-sample: "x:[-8,8] y:[-8,8] z:[-8,8]" + coords-implied: "" + coords-nofit: "" + likelihood-factor-module: null + likelihood-factor-function: null + likelihood-factor-ini: null + extra-args: "" + settings: + n-max: null + n-step: null + n-eff: null + sampler-method: null + fit-method: null + sigma-cut: null + +marg-list: + - name: gaussian + exe: util_HyperMargGaussian.py + args: "--outdir gaussian_out --conforming-output-name" + event-file: null + n-chunk: 100 + coord-module: null + extra-args: "" + +puff: + exe: null + puff-factor: 0.5 + force-away: 0.03 + extra-args: "" + # Where the puff/tracer node reads its input from. + # posterior : (legacy) reads grid-{k+1}.dat, writes grid_puff-{k+1}.dat + # -> MARG_PUFF re-evaluates the puffed grid. The puffball + # does not need lnL inputs, so this is the legacy mode. + # marg_net : (tracer) reads all.marg_net, writes grid-{k+1}.dat + # directly. EOS_POST output is redirected to + # posterior-{k+1}.dat (diagnostic only) and the MARG_PUFF + # lane is suppressed entirely. Use with a tracer-aware + # puff.exe (util_HyperparameterTracerUpdate.py). + input-source: null # null = pipeline default (posterior) + # Tracer-placement sampler hyperparameters (only used when exe is a + # tracer-aware updater; ignored by the legacy util_HyperparameterPuffball.py). + # Leave keys null to take the updater's built-in defaults. + settings: + update-method: null # smc-mala-bd | smc-mala | birth-death | ucb | puffball + tracer-fit-method: null # rf | rbf | polynomial | quadratic + ucb-kappa: null # UCB exploration weight (default 2.0) + ucb-n-candidates: null # UCB candidate pool size (default 20000) + n-mala-steps: null # int + target-ess-frac: null # float in (0,1] + birth-death-rate: null # float + inj-file-prev: null # path to previous-iteration .dat (enables SMC bridging) + no-union-refit: false # opt out of union refit when inj-file-prev is given + regularize: false + rng-seed: null # int; deterministic when given + state-in: null + state-out: null + +test: + exe: convergence_test_samples + method: JS + threshold: 0.05 + extra-args: "" + # Convergence-test diagnostics. All null/false by default. The pipeline + # already feeds --samples grid-{k}.dat and grid-{k+1}.dat (or, in tracer + # mode, posterior-{k}.dat and posterior-{k+1}.dat) and --parameter from + # the coord spec; these are extras you'd typically set when monitoring + # iteration-to-iteration agreement. + settings: + iteration-threshold: null # skip test until iteration >= N (default 0) + write-file-on-success: null # path to touch when convergence threshold met + test-output: null # per-iteration diagnostic CSV + always-succeed: false # keep DAG running past convergence (diagnostic-only mode) + verbose: false + +init: + file: null + generation: + placement-method: null + params-and-ranges: null + npts: null + external-code: null + external-args: null + +general: + rundir: null + retries: 0 + request-disk: 10M + request-memory: 16384 + use-osg: false + use-singularity: false + use-singularity-local: false + condor-local-nonworker: false + condor-local-nonworker-igwn-prefix: false + condor-nogrid-nonworker: false + use-full-submit-paths: true + transfer-files: [] + dry-run: false diff --git a/MonteCarloMarginalizeCode/Code/bin/util_HyperMargGaussian.py b/MonteCarloMarginalizeCode/Code/bin/util_HyperMargGaussian.py new file mode 100644 index 00000000..e660ee88 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/bin/util_HyperMargGaussian.py @@ -0,0 +1,27 @@ +#! /usr/bin/env python +""" +util_HyperMargGaussian.py +========================= + +Thin CLI shim for :class:`RIFT.hyperpipe.drivers.gaussian.GaussianMargDriver`. + +This is the toy 3-D Gaussian marg driver for the RIFT hyperpipeline; it +exists as a top-level ``bin/`` executable so the hyperpipe configuration +can reach it via ``which util_HyperMargGaussian.py`` the same way it +reaches ``util_HyperparameterPuffball.py`` etc. + +Example +------- +:: + + util_HyperMargGaussian.py \\ + --using-eos file:./blind_gaussian_plus_minus.dat \\ + --eos_start_index 0 --eos_end_index 1000 \\ + --outdir Gaussian_example --conforming-output-name \\ + --fname-output-integral f_out_name_1.txt +""" + +from RIFT.hyperpipe.drivers.gaussian import main + +if __name__ == "__main__": + main() diff --git a/MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py b/MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py new file mode 100755 index 00000000..a7c2a170 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/bin/util_HyperparameterTracerUpdate.py @@ -0,0 +1,339 @@ +#! /usr/bin/env python +# +# util_HyperparameterTracerUpdate.py +# +# DRAFT - parsimonious-placement project (2026-05-13) +# +# GOAL +# Drop-in alternative to util_HyperparameterPuffball.py for the RIFT hyperpipeline. +# Same I/O contract: read a hyperpipe-format .dat grid (header +# "# lnL sigma_lnL p1 p2 ..."), advance it by tracer SMC + birth-death, write +# a new .dat grid in the same format. +# +# Shared engine with util_ParameterTracerUpdate.py (the event-level cousin): +# both import from RIFT.misc.tracer_placement. +# +# COMPATIBILITY with util_HyperparameterPuffball.py +# --inj-file, --inj-file-out, --puff-factor, --force-away, --parameter, +# --no-correlation, --random-parameter, --random-parameter-range, +# --downselect-parameter, --downselect-parameter-range, --regularize +# same semantics. --update-method puffball reproduces puffball exactly. +# +# NEW +# --update-method {smc-mala-bd, smc-mala, birth-death, ucb, puffball} default smc-mala-bd +# --tracer-fit-method {rf, rbf, quadratic, polynomial} default rf +# --inj-file-prev OPTIONAL previous-iteration .dat (enables SMC bridging) +# --no-union-refit opt out of union refit when --inj-file-prev is given +# --n-mala-steps INT default 8 +# --target-ess-frac FLOAT default 0.5 +# --birth-death-rate FLOAT default 1.0 +# --ucb-kappa FLOAT default 2.0 (UCB exploration weight) +# --ucb-n-candidates INT default 20000 +# --rng-seed INT deterministic when given +# --state-in / --state-out tiny state file (~100 bytes) +# +# --force-away K (inherited from puffball semantics): after the engine returns +# X_out, drop any point within Mahalanobis distance K of an already-kept point +# (covariance from the input grid, ridge-stabilized). K=0 disables. +# +# I/O FORMAT +# Input/output is the same as util_HyperparameterPuffball.py: a text file with a +# leading "# lnL sigma_lnL p1 p2 ..." header line. Data rows are numeric. +# - X (coord array) is extracted from the parameter columns by name (matching +# --parameter ordering). +# - Y (lnL) and sigma (sigma_lnL) come from the first two columns. +# - Output rows preserve all original columns; only the parameter values are +# updated. The lnL/sigma_lnL columns are zeroed in the output, matching the +# puffball convention (next iteration's marg driver will overwrite them). +# +# USAGE in hyperpipe (Hydra) +# puff: +# exe: util_HyperparameterTracerUpdate.py +# settings: +# update-method: smc-mala-bd +# tracer-fit-method: rf +# +# USAGE in legacy create_eos_posterior_pipeline +# --puff-exe `which util_HyperparameterTracerUpdate.py` +# --puff-args `pwd`/args_puff.txt # contains the flags above +# + +import argparse +import os +import pickle +import sys +import numpy as np + +try: + # Canonical path once the engine is merged into RIFT proper. + from RIFT.misc.tracer_placement import samplers as _tracer_samplers + from RIFT.misc.tracer_placement import fits as _tracer_fits + _TRACER_OK = True +except ImportError: + try: + # Local-dev fallback: PYTHONPATH points at a directory containing + # `tracer_placement/` directly (no RIFT/ wrapper). See the demo Makefile. + from tracer_placement import samplers as _tracer_samplers + from tracer_placement import fits as _tracer_fits + _TRACER_OK = True + except ImportError: + _TRACER_OK = False + + +# --------------------------------- CLI ------------------------------------- # + +def build_parser(): + p = argparse.ArgumentParser(description=__doc__) + # Mirror util_HyperparameterPuffball.py + p.add_argument("--inj-file", required=True, help="Input .dat grid (hyperpipe format).") + p.add_argument("--inj-file-out", default="output-tracer.dat") + p.add_argument("--puff-factor", default=1.0, type=float) + p.add_argument("--force-away", default=0.0, type=float) + p.add_argument("--parameter", action="append", required=True) + p.add_argument("--no-correlation", action="append", type=str) + p.add_argument("--random-parameter", action="append") + p.add_argument("--random-parameter-range", action="append", type=str) + p.add_argument("--downselect-parameter", action="append") + p.add_argument("--downselect-parameter-range", action="append", type=str) + p.add_argument("--regularize", action="store_true") + # Tracer-specific + p.add_argument("--update-method", + choices=("smc-mala-bd", "smc-mala", "birth-death", "ucb", "puffball"), + default="smc-mala-bd") + p.add_argument("--tracer-fit-method", + choices=("rf", "rbf", "quadratic", "polynomial"), + default="rf") + p.add_argument("--inj-file-prev", default=None, + help="Optional previous-iteration .dat for SMC bridging / union refit.") + p.add_argument("--no-union-refit", action="store_true") + p.add_argument("--n-mala-steps", default=8, type=int) + p.add_argument("--target-ess-frac", default=0.5, type=float) + p.add_argument("--birth-death-rate", default=1.0, type=float) + # UCB (super-conservative GP-style placement) + p.add_argument("--ucb-kappa", default=2.0, type=float, + help="UCB exploration weight: score = mu(lambda) + kappa * sigma(lambda). " + "Larger => more explorative. Used only when --update-method ucb.") + p.add_argument("--ucb-n-candidates", default=20000, type=int, + help="Size of the candidate pool from which UCB greedily selects.") + p.add_argument("--rng-seed", default=None, type=int) + p.add_argument("--state-in", default=None) + p.add_argument("--state-out", default=None) + return p + + +# ---------------- Mahalanobis self-avoidance (--force-away) --------------- # + +def _force_away_decimate(X_kept, X_pool, cov, threshold): + """Greedy: keep first len(X_kept) rows of X_pool subject to a minimum + Mahalanobis distance `threshold` from every previously-kept row. Returns + (X_out, mask). X_kept is the preferred-order list; X_pool is the (possibly + larger) candidate set to fall back to when a preferred row is rejected. + Mirrors util_ParameterPuffball.py's --force-away semantics.""" + if threshold <= 0 or len(X_kept) == 0: + return X_kept, np.ones(len(X_kept), dtype=bool) + # Regularize cov for stability + cov_r = cov + 1e-10 * np.eye(cov.shape[0]) + icov = np.linalg.inv(cov_r) + target_n = len(X_kept) + queue = list(X_pool) # candidate list (input order matters: preferred first) + out = [] + for x in queue: + if not out: + out.append(x) + continue + diffs = np.asarray(out) - x + d2 = np.einsum("ij,jk,ik->i", diffs, icov, diffs) + if (d2 >= threshold ** 2).all(): + out.append(x) + if len(out) >= target_n: + break + if len(out) < target_n: + # Couldn't fill the budget under the constraint; relax to keep what we have. + sys.stderr.write( + f"util_HyperparameterTracerUpdate: --force-away {threshold} could only " + f"place {len(out)}/{target_n} points; returning the kept subset.\n") + return np.asarray(out), None + + +# ----------------------- .dat <-> arrays ----------------------------------- # + +def _read_dat(path): + """Return (column_names, raw_rows ndarray).""" + with open(path) as f: + header = f.readline().rstrip("\n") + if not header.startswith("#"): + sys.exit(f"util_HyperparameterTracerUpdate: input {path!r} missing '#' header") + cols = header.lstrip("#").split() + if len(cols) < 3 or cols[0] != "lnL" or cols[1] not in ("sigma_lnL", "sigma"): + sys.exit(f"util_HyperparameterTracerUpdate: header must start with " + f"'lnL sigma_lnL ...', got {cols!r}") + rows = np.loadtxt(path) + if rows.ndim == 1: + rows = rows[None, :] + return cols, rows + + +def _extract_X(cols, rows, parameter_order): + idx = [cols.index(p) for p in parameter_order] + return rows[:, idx] + + +def _write_dat(path, cols, rows): + np.savetxt(path, rows, header=" ".join(cols)) + + +def _build_downselect(opts): + d = {} + if opts.downselect_parameter: + for name, rng in zip(opts.downselect_parameter, + opts.downselect_parameter_range or []): + d[name] = list(eval(rng)) + return d + + +def _coord_box(parameter_order, downselect_dict, X): + d = len(parameter_order) + box = np.zeros((d, 2)) + for i, name in enumerate(parameter_order): + if name in downselect_dict: + box[i] = downselect_dict[name] + else: + lo = float(X[:, i].min()) + hi = float(X[:, i].max()) + pad = 0.1 * (hi - lo + 1e-9) + box[i] = (lo - pad, hi + pad) + return box + + +# ------------------------------ main --------------------------------------- # + +def main(argv=None): + opts = build_parser().parse_args(argv) + rng = np.random.default_rng(opts.rng_seed) + + cols, rows = _read_dat(opts.inj_file) + X = _extract_X(cols, rows, opts.parameter) + Y = rows[:, 0] # lnL column + S = rows[:, 1] if rows.shape[1] >= 2 else None + downselect = _build_downselect(opts) + prior_box = _coord_box(opts.parameter, downselect, X) + + method = opts.update_method + + # ---- puffball regression path -------------------------------------- # + if method == "puffball": + cov = (np.cov(X.T) * opts.puff_factor**2 if X.shape[1] > 1 + else np.array([[X.std() ** 2]])) + cov = np.atleast_2d(cov) + if np.min(np.linalg.eigvalsh(cov)) < 1e-12: + cov = cov + 1e-8 * np.eye(cov.shape[0]) + delta = rng.multivariate_normal(np.zeros(X.shape[1]), cov, size=len(X)) + X_out = X + delta + # write back into rows; zero lnL/sigma (puffball convention) + out_rows = rows.copy() + for i, name in enumerate(opts.parameter): + out_rows[:, cols.index(name)] = X_out[:, i] + out_rows[:, 0] = 0.0 + out_rows[:, 1] = 0.0 + _write_dat(opts.inj_file_out, cols, out_rows) + return + + # ---- tracer path --------------------------------------------------- # + if not _TRACER_OK: + sys.stderr.write("util_HyperparameterTracerUpdate: RIFT.misc.tracer_placement " + "not installed; falling back to puffball.\n") + opts.update_method = "puffball" + return main(argv) + + X_train, Y_train, S_train = X, Y, S + fit_prev = None + + if opts.inj_file_prev is not None and os.path.exists(opts.inj_file_prev): + cols_p, rows_p = _read_dat(opts.inj_file_prev) + X_prev = _extract_X(cols_p, rows_p, opts.parameter) + Y_prev = rows_p[:, 0] + S_prev = rows_p[:, 1] if rows_p.shape[1] >= 2 else None + fit_prev = _tracer_fits.build(opts.tracer_fit_method, + X_prev, Y_prev, sigma=S_prev) + if not opts.no_union_refit: + X_train = np.vstack([X_prev, X]) + Y_train = np.concatenate([Y_prev, Y]) + if S is not None and S_prev is not None: + S_train = np.concatenate([S_prev, S]) + else: + S_train = None + + fit_now = _tracer_fits.build(opts.tracer_fit_method, + X_train, Y_train, sigma=S_train) + + state = {} + if opts.state_in and os.path.exists(opts.state_in): + with open(opts.state_in, "rb") as f: + state = pickle.load(f) + + sampler_map = { + "smc-mala-bd": _tracer_samplers.smc_mala_bd, + "smc-mala": _tracer_samplers.smc_mala, + "birth-death": _tracer_samplers.birth_death, + } + if hasattr(_tracer_samplers, "ucb_place"): + sampler_map["ucb"] = _tracer_samplers.ucb_place + if method not in sampler_map: + sys.exit(f"util_HyperparameterTracerUpdate: --update-method {method!r} " + f"not available in installed engine (have: {sorted(sampler_map)}).") + sampler = sampler_map[method] + + sampler_kw = dict( + particles=X, + surrogate=fit_now, + surrogate_prev=fit_prev, + prior_box=prior_box, + rng=rng, + n_mala_steps=opts.n_mala_steps, + target_ess_frac=opts.target_ess_frac, + birth_death_rate=opts.birth_death_rate, + state=state, + ) + if method == "ucb": + sampler_kw.update( + kappa=opts.ucb_kappa, + n_candidates=opts.ucb_n_candidates, + ) + X_out, info = sampler(**sampler_kw) + + # --- self-avoidance (--force-away) ----------------------------------- # + if opts.force_away and opts.force_away > 0 and len(X_out) > 1: + # Use the input grid's covariance as the Mahalanobis metric (stable, + # independent of where the engine moved particles to). + if X.shape[1] > 1: + cov_in = np.cov(X.T) + else: + cov_in = np.array([[float(X.std() ** 2) + 1e-12]]) + cov_in = np.atleast_2d(cov_in) + X_out, _ = _force_away_decimate( + X_kept=X_out, X_pool=X_out, cov=cov_in, threshold=opts.force_away) + + if opts.state_out: + with open(opts.state_out, "wb") as f: + pickle.dump(info.get("state", {}), f) + + # Apply downselect manually (puffball-equivalent behavior) + if downselect: + mask = np.ones(len(X_out), dtype=bool) + for k, (lo, hi) in downselect.items(): + if k in opts.parameter: + col = opts.parameter.index(k) + mask &= (X_out[:, col] >= lo) & (X_out[:, col] <= hi) + X_out = X_out[mask] + + out_rows = np.zeros((len(X_out), rows.shape[1])) + # carry forward any extra columns from input rows (just zero them; marg driver overwrites) + for i, name in enumerate(opts.parameter): + out_rows[:, cols.index(name)] = X_out[:, i] + out_rows[:, 0] = 0.0 + out_rows[:, 1] = 0.0 + _write_dat(opts.inj_file_out, cols, out_rows) + + +if __name__ == "__main__": + main() diff --git a/MonteCarloMarginalizeCode/Code/bin/util_ParameterTracerUpdate.py b/MonteCarloMarginalizeCode/Code/bin/util_ParameterTracerUpdate.py new file mode 100755 index 00000000..12da709c --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/bin/util_ParameterTracerUpdate.py @@ -0,0 +1,342 @@ +#! /usr/bin/env python +# +# util_ParameterTracerUpdate.py +# +# DRAFT - parsimonious-placement project (rev. 2026-05-13) +# +# GOAL +# Drop-in alternative to util_ParameterPuffball.py for iterative grid update. +# Instead of resampling-from-fit + Gaussian jitter, advance the existing grid +# through a tracer-particle update: tempered SMC with MALA moves on a surrogate, +# plus optional birth-death rejuvenation. The tool REFITS its own surrogate from +# the same .dat file CIP reads, so it is fully self-contained (no model files +# passed between jobs, no CIP coupling, no extra OSG transfers). +# +# COMPATIBILITY +# --parameter, --inj-file, --inj-file-out, --fref, --fmin, --random-parameter, +# --random-parameter-range, --mc-range, --eta-range, --mtot-range, +# --downselect-parameter, --downselect-parameter-range, --reflect-parameter, +# --enforce-duration-bound : same semantics as util_ParameterPuffball.py. +# +# DATA-INGEST (mirrors CIP) +# --fname path to current iteration's ILE composite .dat file +# --fname-prev optional, previous iteration's .dat (enables SMC bridge) +# --lnL-col column index of lnL (caller specifies; matches CIP's expectations) +# --sigma-col optional column index of sigma_lnL +# +# NEW +# --update-method {smc-mala-bd, smc-mala, birth-death, puffball} +# Default smc-mala-bd. "puffball" reproduces util_ParameterPuffball.py for regression. +# --tracer-fit-method {rf, rbf, quadratic, polynomial} default rf +# --no-union-refit if --fname-prev given, do NOT include prev points in f_k fit +# --n-mala-steps INT default 8 +# --target-ess-frac FLOAT default 0.5 +# --birth-death-rate FLOAT default 1.0 +# --rng-seed INT deterministic when given +# --state-in tiny pickle: {mala_eps, last_rng_state} (~100 bytes) +# --state-out where to dump updated state for the next iteration +# +# USAGE EXAMPLE +# util_ParameterTracerUpdate.py \ +# --parameter mc --parameter eta --parameter chi_eff --parameter chi_p \ +# --inj-file overlap-grid_k.xml.gz --inj-file-out overlap-grid_kp1 \ +# --fname all_lnL_k.dat --fname-prev all_lnL_km1.dat \ +# --lnL-col 9 \ +# --update-method smc-mala-bd --tracer-fit-method rf \ +# --mc-range '[20,40]' --eta-range '[0.10,0.25]' \ +# --state-in tracer_state_k.pkl --state-out tracer_state_kp1.pkl +# +# NOTES +# - This is a draft skeleton. The sampler engine lives in +# parsimonious_placement_plan.md's proto/samplers/. For production it should be +# promoted into RIFT.misc.tracer_placement and imported here. +# - Surrogate refitting happens inside this tool every call. RF and RBF refits on +# the typical (X, lnL) size (10^2 - 10^4 points) cost seconds; negligible vs ILE. +# + +import argparse +import os +import pickle +import sys +import numpy as np + +import RIFT.lalsimutils as lalsimutils +import lalsimulation as lalsim # noqa: F401 +import lal + +try: + # Canonical path once the engine is merged into RIFT proper. + from RIFT.misc.tracer_placement import samplers as _tracer_samplers + from RIFT.misc.tracer_placement import fits as _tracer_fits + _TRACER_OK = True +except ImportError: + try: + # Local-dev fallback: PYTHONPATH points at a directory containing + # `tracer_placement/` directly (no RIFT/ wrapper). See the demo Makefile. + from tracer_placement import samplers as _tracer_samplers + from tracer_placement import fits as _tracer_fits + _TRACER_OK = True + except ImportError: + _TRACER_OK = False + +from igwn_ligolw import lsctables, ligolw # noqa: F401 +lsctables.use_in(ligolw.LIGOLWContentHandler) + + +# --------------------------------- CLI ------------------------------------- # + +def build_parser(): + p = argparse.ArgumentParser(description=__doc__) + # Mirror util_ParameterPuffball.py + p.add_argument("--inj-file", required=True) + p.add_argument("--inj-file-out", default="output-tracer") + p.add_argument("--fail-if-empty", action="store_true") + p.add_argument("--approx-output", default="SEOBNRv2") + p.add_argument("--fref", default=None, type=float) + p.add_argument("--fmin", default=None, type=float) + p.add_argument("--parameter", action="append", required=True) + p.add_argument("--random-parameter", action="append") + p.add_argument("--random-parameter-range", action="append", type=str) + p.add_argument("--mc-range", default=None) + p.add_argument("--eta-range", default=None) + p.add_argument("--mtot-range", default=None) + p.add_argument("--downselect-parameter", action="append") + p.add_argument("--downselect-parameter-range", action="append", type=str) + p.add_argument("--reflect-parameter", action="append", type=str) + p.add_argument("--enforce-duration-bound", default=None, type=float) + # Data ingest (mirror CIP) + p.add_argument("--fname", required=True, + help="Current iteration ILE composite .dat (same file CIP reads).") + p.add_argument("--fname-prev", default=None, + help="Optional previous iteration .dat; enables SMC bridging.") + p.add_argument("--lnL-col", type=int, required=True, + help="Column index of lnL in --fname (caller specifies, matches CIP).") + p.add_argument("--sigma-col", type=int, default=None, + help="Optional column index of sigma_lnL.") + p.add_argument("--lnL-downscale-factor", type=float, default=1.0) + # Tracer-specific + p.add_argument("--update-method", + choices=("smc-mala-bd", "smc-mala", "birth-death", "puffball"), + default="smc-mala-bd") + p.add_argument("--tracer-fit-method", + choices=("rf", "rbf", "quadratic", "polynomial"), + default="rf") + p.add_argument("--no-union-refit", action="store_true", + help="If --fname-prev is given, do NOT include those points in the f_k fit.") + p.add_argument("--n-mala-steps", default=8, type=int) + p.add_argument("--target-ess-frac", default=0.5, type=float) + p.add_argument("--birth-death-rate", default=1.0, type=float) + p.add_argument("--rng-seed", default=None, type=int) + p.add_argument("--state-in", default=None) + p.add_argument("--state-out", default=None) + # Back-compat with puffball for the regression path + p.add_argument("--puff-factor", default=1.0, type=float) + p.add_argument("--force-away", default=0.0, type=float) + p.add_argument("--regularize", action="store_true") + p.add_argument("--no-correlation", type=str, action="append") + return p + + +# --------------------- XML -> X (coord array) ------------------------------ # + +def _xml_to_X(opts, coord_names): + P_list = lalsimutils.xml_to_ChooseWaveformParams_array(opts.inj_file) + rows = [] + for P in P_list: + if opts.fmin is not None: + P.fmin = opts.fmin + if opts.fref is not None: + P.fref = opts.fref + line = np.zeros(len(coord_names)) + for i, name in enumerate(coord_names): + fac = lal.MSUN_SI if name in ("mc", "m1", "m2", "mtot") else 1.0 + line[i] = P.extract_param(name) / fac + rows.append(line) + return np.array(rows), P_list + + +def _X_to_xml(X_out, P_template_list, opts, coord_names, downselect_dict): + P_out_list = [] + n = min(len(P_template_list), len(X_out)) + for i in range(n): + P = P_template_list[i] + for j, name in enumerate(coord_names): + fac = lal.MSUN_SI if name in ("mc", "m1", "m2", "mtot") else 1.0 + P.assign_param(name, X_out[i, j] * fac) + keep = True + for k, (lo, hi) in downselect_dict.items(): + v = P.extract_param(k) + if k in ("mc", "m1", "m2", "mtot"): + v /= lal.MSUN_SI + if v < lo or v > hi: + keep = False + break + if opts.enforce_duration_bound is not None: + if lalsimutils.estimateWaveformDuration(P) > opts.enforce_duration_bound: + keep = False + if keep: + P_out_list.append(P) + if opts.fail_if_empty and not P_out_list: + sys.stderr.write("tracer: produced empty grid; aborting.\n") + sys.exit(2) + fref = P_out_list[0].fref if P_out_list else opts.fref + lalsimutils.ChooseWaveformParams_array_to_xml( + P_out_list, fname=opts.inj_file_out, fref=fref) + + +def _build_downselect(opts): + d = {} + if opts.mc_range is not None: + d["mc"] = list(eval(opts.mc_range)) + if opts.eta_range is not None: + d["eta"] = list(eval(opts.eta_range)) + if opts.mtot_range is not None: + d["mtot"] = list(eval(opts.mtot_range)) + if opts.downselect_parameter: + for name, rng in zip(opts.downselect_parameter, opts.downselect_parameter_range or []): + d[name] = list(eval(rng)) + return d + + +def _coord_box(coord_names, downselect_dict, X): + d = len(coord_names) + box = np.zeros((d, 2)) + for i, name in enumerate(coord_names): + if name in downselect_dict: + box[i] = downselect_dict[name] + else: + box[i] = (float(X[:, i].min()), float(X[:, i].max())) + return box + + +# --------------- .dat -> (X, Y, sigma) in coord-frame ---------------------- # +# +# We need (X, Y) in the SAME coordinate frame the sampler will move in. The .dat +# columns are not in coord-frame in general; CIP does its own conversion. We use +# the simplest robust approach: match each .dat row to a P in the input XML by +# row order (the standard RIFT contract), then read coord-frame X from the XML +# and lnL from the matched .dat row. +# +# When --fname-prev is used, the caller must also pass a previous-grid XML +# (--inj-file-prev) so we can do the same matching for f_{k-1}. To keep the CLI +# minimal in this draft we expect the convention that --fname-prev sits next to +# overlap-grid_{k-1}.xml.gz in the same directory (a one-line lookup) — TODO +# in production: add --inj-file-prev. + +def _load_lnL_for_grid(fname_dat, lnL_col, sigma_col, n_points, downscale): + arr = np.loadtxt(fname_dat) + if arr.ndim == 1: + arr = arr[None, :] + if len(arr) < n_points: + sys.stderr.write( + f"tracer: .dat has fewer rows ({len(arr)}) than XML grid ({n_points}); " + "alignment assumption (row order) violated.\n") + sys.exit(3) + Y = arr[:n_points, lnL_col] * downscale + if sigma_col is not None: + S = arr[:n_points, sigma_col] + else: + S = None + return Y, S + + +# ------------------------------- main -------------------------------------- # + +def main(argv=None): + opts = build_parser().parse_args(argv) + coord_names = list(opts.parameter) + downselect = _build_downselect(opts) + + X, P_template = _xml_to_X(opts, coord_names) + if X.size == 0: + sys.stderr.write("tracer: empty input grid; aborting.\n") + sys.exit(2) + prior_box = _coord_box(coord_names, downselect, X) + rng = np.random.default_rng(opts.rng_seed) + + method = opts.update_method + + # ---- puffball regression path -------------------------------------- # + if method == "puffball": + cov = np.cov(X.T) * opts.puff_factor**2 if X.shape[1] > 1 else np.array([[X.std()**2]]) + cov = np.atleast_2d(cov) + delta = rng.multivariate_normal(np.zeros(X.shape[1]), cov, size=len(X)) + X_out = X + delta + _X_to_xml(X_out, P_template, opts, coord_names, downselect) + return + + # ---- tracer path: refit our own surrogate from .dat ---------------- # + if not _TRACER_OK: + sys.stderr.write("tracer: RIFT.misc.tracer_placement not installed; " + "falling back to puffball.\n") + opts.update_method = "puffball" + return main(argv) + + Y_k, S_k = _load_lnL_for_grid(opts.fname, opts.lnL_col, opts.sigma_col, + len(X), opts.lnL_downscale_factor) + + X_train_k = X + Y_train_k = Y_k + S_train_k = S_k + X_prev = None + fit_prev = None + + if opts.fname_prev is not None: + # Read previous-iteration grid + lnL. Production CLI should accept + # --inj-file-prev explicitly; here we follow the naming convention. + inj_prev = opts.fname_prev.replace(".dat", ".xml.gz") + if not os.path.exists(inj_prev): + sys.stderr.write( + f"tracer: cannot locate previous grid XML ({inj_prev}); " + "ignoring --fname-prev and running unbridged.\n") + else: + opts_prev = argparse.Namespace(**vars(opts)) + opts_prev.inj_file = inj_prev + X_prev, _ = _xml_to_X(opts_prev, coord_names) + Y_prev, S_prev = _load_lnL_for_grid( + opts.fname_prev, opts.lnL_col, opts.sigma_col, + len(X_prev), opts.lnL_downscale_factor) + if not opts.no_union_refit: + X_train_k = np.vstack([X_prev, X]) + Y_train_k = np.concatenate([Y_prev, Y_k]) + S_train_k = (np.concatenate([S_prev, S_k]) + if (S_prev is not None and S_k is not None) else None) + # f_{k-1} fit on prior data only + fit_prev = _tracer_fits.build(opts.tracer_fit_method, + X_prev, Y_prev, sigma=S_prev) + + fit_now = _tracer_fits.build(opts.tracer_fit_method, + X_train_k, Y_train_k, sigma=S_train_k) + + state = {} + if opts.state_in and os.path.exists(opts.state_in): + with open(opts.state_in, "rb") as f: + state = pickle.load(f) + + sampler = { + "smc-mala-bd": _tracer_samplers.smc_mala_bd, + "smc-mala": _tracer_samplers.smc_mala, + "birth-death": _tracer_samplers.birth_death, + }[method] + + X_out, info = sampler( + particles=X, + surrogate=fit_now, + surrogate_prev=fit_prev, + prior_box=prior_box, + rng=rng, + n_mala_steps=opts.n_mala_steps, + target_ess_frac=opts.target_ess_frac, + birth_death_rate=opts.birth_death_rate, + state=state, + ) + + if opts.state_out: + with open(opts.state_out, "wb") as f: + pickle.dump(info.get("state", {}), f) + + _X_to_xml(X_out, P_template, opts, coord_names, downselect) + + +if __name__ == "__main__": + main() diff --git a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py index abe11bd6..010d9a6c 100644 --- a/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py +++ b/MonteCarloMarginalizeCode/Code/bin/util_RIFT_hyperpipe.py @@ -1,194 +1,576 @@ #! /usr/bin/env python -# -# EXAMPLE -# - -# ARGUMENT SPECIFICATION -# arch -# method: -# n-iterations -# n-samples-per-job -# explode marg jobs: -# post: -# coord-module: -# coords-fit: -# coords-sample: -# exe: -# settings: -# n-max -# n-step -# sampler-method -# sigma-cut -# marg-list: -# - name: -# exe: -# args: -# - name: -# .... -# puff: -# exe: -# settings: -# puff-factor: -# test: -# init: -# file: -# generation: -# placement method: -# params_and_ranges: -# npts: -# external_code: -# external arguments: -# general: -# retries -# request-disk -# request-memory -# use-osg -# -# - -import numpy as np -import os, shutil +""" +util_RIFT_hyperpipe.py +====================== + +Top-level driver for the RIFT hyperpipeline --- the iterative +marginalize + fit + puff loop used to infer hyperparameters (e.g. EOS +parameters, population-model parameters, or any user-defined high-level +quantity) from one or more underlying likelihood evaluators ("marg +drivers"). + +This is the hyperpipe analog of :file:`util_RIFT_pseudo_pipe.py`. Where +``pseudo_pipe`` builds a single-event GW PE pipeline from argparse +flags and a ``.ini`` file, this script builds a hyperpipe DAG from an +ini-style **Hydra** configuration with three first-class features: + + * **ini-based configuration** (Hydra/OmegaConf) --- see the default + template :file:`hyperpipe_conf.yaml` next to this script and the + schema documented in :mod:`RIFT.hyperpipe.config`. + + * **Flexible multi-event input** via the ``marg-list:`` section --- + one entry per (likelihood-driver, event) pair, with per-entry + executables, args, event files, ``n-chunk`` (heterogeneous batch + sizes), and an optional per-entry coord module. + + * **Coordinate-transformation framework** --- the ``post`` stage + consumes a CIP-mirror coord module via + ``--supplementary-coordinate-code``, the same convention + ``util_ConstructEOSPosterior.py`` already understands; the + parameters / integration ranges are emitted automatically from + ``post.coords-fit`` / ``post.coords-sample`` (and optionally + ``coords-implied`` / ``coords-nofit``). + +Examples +-------- +Reproduce a minimal Gaussian-toy hyperpipe analysis:: + + util_RIFT_hyperpipe.py \\ + general.rundir=./my_run \\ + init.file=./blind_gaussian_plus_minus.dat + +Override a few defaults from the CLI (Hydra syntax):: + + util_RIFT_hyperpipe.py \\ + arch.n-iterations=15 \\ + general.use-osg=true general.use-singularity=true \\ + general.condor-local-nonworker=true + +Point at a config file living anywhere on disk (no install / no +``--config-dir`` plumbing needed):: + + util_RIFT_hyperpipe.py --config ./my_tracer.yaml + util_RIFT_hyperpipe.py -c /full/path/to/run_config.yaml \\ + puff.settings.update-method=ucb + +The ``--config PATH`` shim accepts an absolute or relative path and +splits it into the ``--config-dir`` / ``--config-name`` pair Hydra +expects. Trailing ``.yaml`` / ``.yml`` extensions are stripped +automatically (so ``--config-name foo.yaml`` works too). A bare name +falls through to Hydra's normal search path (the installed +``hyperpipe_conf.yaml`` next to the script). + +Implementation notes +-------------------- +The real work lives in :mod:`RIFT.hyperpipe`: + + * :func:`RIFT.hyperpipe.config.validate_config` -- structural validation + * :func:`RIFT.hyperpipe.coords.coord_spec_from_config_section` -- coord-spec + * :func:`RIFT.hyperpipe.marg_list.assemble_marg_list` -- multi-event assembly + +This script is intentionally thin: it parses Hydra config, materializes +the args / exe / event / nchunk files that +``create_eos_posterior_pipeline`` consumes, builds the matching +command line, and either runs it or (with ``general.dry-run=true``) +prints it. +""" + +from __future__ import annotations + import logging -logger = logging.getLogger(__name__) +import os +import shutil +import shlex +import sys +from typing import List, Optional -# install reuires hydra-core -from omegaconf import DictConfig, OmegaConf import hydra -import RIFT.misc.dag_utils_generic as dag_utils +from omegaconf import DictConfig, OmegaConf + +from RIFT.hyperpipe import config as hyper_config +from RIFT.hyperpipe.coords import coord_spec_from_config_section +from RIFT.hyperpipe.marg_list import assemble_marg_list + +logger = logging.getLogger(__name__) + + +# -------------------------------------------------------------------------- +# Helpers +# -------------------------------------------------------------------------- + + +def _which_or_self(name: str) -> str: + """Resolve an exe via PATH (or RIFT's which); fall back to the bare name.""" + found = shutil.which(name) + if found: + return found + try: + from RIFT.misc.dag_utils_generic import which as rift_which + + rw = rift_which(name) + if rw: + return rw + except Exception: + pass + return name + + +def _cfg_get(node, key, default=None): + """Forgiving get() that works on DictConfig or plain dict.""" + if node is None: + return default + try: + return node.get(key, default) + except AttributeError: + return node[key] if key in node else default + + +def _maybe_generate_initial_grid( + init_cfg, base_dir: str, run_dir: str +) -> str: + """Materialize an initial_grid.dat in run_dir. + + Honors (in priority order): + 1. ``init.file``: copy verbatim + 2. ``init.generation.placement-method == 'uniform'``: call out to + ``util_HyperparameterGrid.py`` with random-parameter/range/npts + pulled from the generation block. + 3. ``init.generation.external-code``: invoke an arbitrary user + command (``external-code external-args``) expected to drop + ``initial_grid.dat`` in cwd. + """ + initial_grid = os.path.join(run_dir, "initial_grid.dat") + init_file = _cfg_get(init_cfg, "file") + if init_file: + src = hyper_config.expand_path(init_file, base_dir) + if not os.path.exists(src): + raise FileNotFoundError(f"init.file does not exist: {src!r}") + shutil.copyfile(src, initial_grid) + return initial_grid + + gen = _cfg_get(init_cfg, "generation") or {} + placement = _cfg_get(gen, "placement-method") + external_code = _cfg_get(gen, "external-code") + + if placement == "uniform": + params_and_ranges = (_cfg_get(gen, "params-and-ranges") or "").strip() + npts = _cfg_get(gen, "npts") + if not params_and_ranges or not npts: + raise ValueError( + "init.generation.placement-method='uniform' requires " + "params-and-ranges and npts." + ) + # Parse params-and-ranges: "x:[a,b] y:[c,d] ..." + # and emit --random-parameter / --random-parameter-range pairs. + from RIFT.hyperpipe.coords import parse_range_string + + ranges = parse_range_string(params_and_ranges) + bits = [_which_or_self("util_HyperparameterGrid.py")] + for name, (lo, hi) in ranges.items(): + bits.append(f"--random-parameter {name}") + bits.append(f"--random-parameter-range [{lo},{hi}]") + bits.append(f"--npts {int(npts)}") + bits.append(f"--fname-out {initial_grid}") + cmd = " ".join(bits) + logger.info("Generating initial grid: %s", cmd) + rc = os.system(cmd) + if rc != 0: + raise SystemExit(f"util_HyperparameterGrid.py exited with code {rc}.") + return initial_grid + + if external_code: + ext_args = _cfg_get(gen, "external-args") or "" + cmd = f"{external_code} {ext_args}".strip() + logger.info("Generating initial grid via external code: %s", cmd) + rc = os.system(f"cd {shlex.quote(run_dir)} && {cmd}") + if rc != 0: + raise SystemExit(f"external-code exited with code {rc}.") + if not os.path.exists(initial_grid): + raise FileNotFoundError( + f"external-code did not produce {initial_grid!r}; " + "it must write to that exact path." + ) + return initial_grid + + raise ValueError( + "init: no usable initialization. Set either init.file, " + "init.generation.placement-method='uniform' (with params-and-ranges + npts), " + "or init.generation.external-code." + ) + + +def _build_post_args(cfg, coord_spec) -> str: + """Compose the args_eos_post.txt body from the coord spec + post.settings + extras.""" + post = cfg["post"] + args = coord_spec.to_post_args() + settings = _cfg_get(post, "settings") or {} + setting_flags = [ + ("n-max", "--n-max"), + ("n-step", "--n-step"), + ("n-eff", "--n-eff"), + ("sampler-method", "--sampler-method"), + ("fit-method", "--fit-method"), + ("sigma-cut", "--sigma-cut"), + ] + for key, flag in setting_flags: + val = _cfg_get(settings, key) + if val is not None and val != "": + args += f" {flag} {val}" + extra = (_cfg_get(post, "extra-args") or "").strip() + if extra: + args += " " + extra + return args -base_dir =None -config_here = None +def _build_puff_args(cfg, coord_spec) -> str: + """Compose args_puff.txt from the coord spec + puff.settings + extras. -@hydra.main(version_base=None, config_path=".",config_name='hyperpipe_conf') -def my_app(cfg: DictConfig): - global config_here - logging.basicConfig() - logger.info(" ---- INPUT CONFIG --- ") + The legacy puffball (util_HyperparameterPuffball.py) accepts only the + coord-spec flags + force-away/puff-factor. The tracer drop-ins + (util_HyperparameterTracerUpdate.py / util_ParameterTracerUpdate.py) + accept those plus a handful of sampler hyperparameters; we expose those + declaratively here so users can stay in Hydra rather than escaping into + extra-args strings. + """ + puff = _cfg_get(cfg, "puff") or {} + args = coord_spec.to_puff_args( + force_away=_cfg_get(puff, "force-away", 0.03), + puff_factor=_cfg_get(puff, "puff-factor", 0.5), + ) + # Pass the prior bounds through as --downselect-parameter so the tracer's + # internal prior_box covers the user's actual coords-sample range instead + # of the data bounding box. Without this, a narrow initial grid (e.g. a + # corner seed for blind-recovery tests) clips the tracer to data_bbox + + # 10% pad and particles cannot escape the convex hull. Both the legacy + # util_HyperparameterPuffball.py and the tracer drop-ins accept these + # flags; the legacy puffball treats them as a post-puff downselect mask, + # the tracer drop-ins also use them to widen prior_box. + # The range value is intentionally unquoted: create_eos_posterior_pipeline + # wraps any [..] in single quotes when staging args_puff.txt for Condor. + for p in coord_spec.parameters: + lo, hi = coord_spec.parameter_ranges[p] + args += ( + f" --downselect-parameter {p}" + f" --downselect-parameter-range [{coord_spec._fmt_num(lo)}," + f"{coord_spec._fmt_num(hi)}]" + ) + settings = _cfg_get(puff, "settings") or {} + # Value-bearing flags. Each pair is (yaml key, CLI flag). Empty / null is skipped. + setting_flags = [ + ("update-method", "--update-method"), + ("tracer-fit-method", "--tracer-fit-method"), + ("ucb-kappa", "--ucb-kappa"), + ("ucb-n-candidates", "--ucb-n-candidates"), + ("n-mala-steps", "--n-mala-steps"), + ("target-ess-frac", "--target-ess-frac"), + ("birth-death-rate", "--birth-death-rate"), + ("inj-file-prev", "--inj-file-prev"), + ("rng-seed", "--rng-seed"), + ("state-in", "--state-in"), + ("state-out", "--state-out"), + ] + for key, flag in setting_flags: + val = _cfg_get(settings, key) + if val is not None and val != "": + args += f" {flag} {val}" + # Bool flags. Each pair is (yaml key, CLI flag). + setting_bool_flags = [ + ("no-union-refit", "--no-union-refit"), + ("regularize", "--regularize"), + ] + for key, flag in setting_bool_flags: + if hyper_config.truthy(_cfg_get(settings, key, False)): + args += f" {flag}" + extra = (_cfg_get(puff, "extra-args") or "").strip() + if extra: + args += " " + extra + return args + + +def _build_test_args(cfg, coord_spec) -> str: + test = _cfg_get(cfg, "test") or {} + args = coord_spec.to_test_args( + method=_cfg_get(test, "method", "JS"), + threshold=_cfg_get(test, "threshold", 0.05), + ) + # test.settings: forwards convergence_test_samples.py-specific flags. All + # nullable -- omitted keys produce no flag, mirroring post.settings. + settings = _cfg_get(test, "settings") or {} + setting_flags = [ + ("iteration-threshold", "--iteration-threshold"), + ("write-file-on-success", "--write-file-on-success"), + ("test-output", "--test-output"), + ] + for key, flag in setting_flags: + val = _cfg_get(settings, key) + if val is not None and val != "": + args += f" {flag} {val}" + bool_flags = [ + ("always-succeed", "--always-succeed"), + ("verbose", "--verbose"), + ] + for key, flag in bool_flags: + if hyper_config.truthy(_cfg_get(settings, key, False)): + args += f" {flag}" + extra = (_cfg_get(test, "extra-args") or "").strip() + if extra: + args += " " + extra + return args + + +# -------------------------------------------------------------------------- +# Hydra entry point +# -------------------------------------------------------------------------- + + +@hydra.main(version_base=None, config_path=".", config_name="hyperpipe_conf") +def my_app(cfg: DictConfig) -> None: + logging.basicConfig(level=logging.INFO, format="%(levelname)s %(name)s: %(message)s") + + base_dir = hydra.utils.get_original_cwd() + + logger.info("---- INPUT CONFIG ----") print(OmegaConf.to_yaml(cfg)) - config_here=cfg -if __name__ == "__main__": - my_app() + hyper_config.validate_config(cfg) - base_dir = os.getcwd() - # make run directory - if config_here['general']['rundir'] : - if not(os.path.exists(config_here['general']['rundir'])): - os.mkdir(config_here['general']['rundir']) - os.chdir(config_here['general']['rundir']) + # ----- run directory --------------------------------------------------- + rundir = _cfg_get(cfg["general"], "rundir") + if rundir: + rundir = hyper_config.expand_path(rundir, base_dir) + os.makedirs(rundir, exist_ok=True) + os.chdir(rundir) run_dir = os.getcwd() + logger.info("Working directory: %s", run_dir) + + # ----- coord spec ------------------------------------------------------ + coord_spec = coord_spec_from_config_section(cfg["post"]) + coord_spec.validate(strict_import=False) + + # ----- marg-list assembly --------------------------------------------- + marg = assemble_marg_list(cfg, base_dir=base_dir, run_dir=run_dir) + logger.info( + "marg-list: %d entries (%s)", + len(marg.names), + ", ".join(marg.names), + ) + + # ----- emit per-stage files ------------------------------------------- + args_marg_path = os.path.join(run_dir, "args_marg_eos.txt") + args_marg_exe_path = os.path.join(run_dir, "args_marg_eos_exe.txt") + args_post_path = os.path.join(run_dir, "args_eos_post.txt") + args_puff_path = os.path.join(run_dir, "args_puff.txt") + args_test_path = os.path.join(run_dir, "args_test.txt") + event_nchunk_path = os.path.join(run_dir, "event_nchunk.txt") + transfer_list_path = os.path.join(run_dir, "transfer_file_list.txt") + + marg.write_args_file(args_marg_path) + marg.write_exe_file(args_marg_exe_path) + marg.write_nchunk_file(event_nchunk_path) + + # create_eos_posterior_pipeline reads these via --eos-post-args / + # --puff-args / --test-args and discards the first whitespace-separated + # token (see split(' ')[1:] in create_eos_posterior_pipeline). We prepend + # a dummy "X" so the real first flag survives. The marg-list args file is + # read line-by-line and does NOT get this treatment. + _PIPELINE_DUMMY = "X" + with open(args_post_path, "w") as f: + f.write(_PIPELINE_DUMMY + " " + _build_post_args(cfg, coord_spec) + "\n") + with open(args_puff_path, "w") as f: + f.write(_PIPELINE_DUMMY + " " + _build_puff_args(cfg, coord_spec) + "\n") + with open(args_test_path, "w") as f: + f.write(_PIPELINE_DUMMY + " " + _build_test_args(cfg, coord_spec) + "\n") + + # ----- initial grid --------------------------------------------------- + initial_grid = _maybe_generate_initial_grid(cfg["init"], base_dir, run_dir) + + # ----- transfer-file list --------------------------------------------- + general = cfg["general"] + extra_transfer: List[str] = list(_cfg_get(general, "transfer-files") or []) + use_osg = hyper_config.truthy(_cfg_get(general, "use-osg", False)) + use_singularity = hyper_config.truthy(_cfg_get(general, "use-singularity", False)) + use_transfer = use_osg or use_singularity or bool(extra_transfer) + if use_transfer: + marg.write_transfer_file_list(transfer_list_path, extra=extra_transfer) + + # ----- command assembly ----------------------------------------------- + arch = cfg["arch"] + post_exe = _cfg_get(cfg["post"], "exe") or _which_or_self("util_ConstructEOSPosterior.py") + puff_exe = ( + _cfg_get(_cfg_get(cfg, "puff"), "exe") + or _which_or_self("util_HyperparameterPuffball.py") + ) + test_exe = _cfg_get(_cfg_get(cfg, "test"), "exe", "convergence_test_samples") + + cmd_parts: List[str] = [ + _which_or_self("create_eos_posterior_pipeline"), + f"--n-samples-per-job {int(arch['n-samples-per-job'])}", + f"--n-iterations {int(arch['n-iterations'])}", + f"--working-directory {run_dir}", + f"--input-grid {initial_grid}", + f"--marg-event-exe-list-file {args_marg_exe_path}", + f"--marg-event-args-list-file {args_marg_path}", + f"--marg-event-nchunk-list-file {event_nchunk_path}", + f"--eos-post-args {args_post_path}", + f"--eos-post-exe {post_exe}", + f"--puff-exe {puff_exe}", + f"--puff-args {args_puff_path}", + f"--test-args {args_test_path}", + f"--test-exe {test_exe}", + ] + # one --event-file per marg entry (critical for multi-event!) + for ev in marg.event_files: + cmd_parts.append(f"--event-file {ev}") + + # arch tunables + explode = _cfg_get(arch, "explode-marg-jobs") + if explode is not None: + cmd_parts.append(f"--eos-post-explode-jobs {int(explode)}") + explode_last = _cfg_get(arch, "explode-marg-jobs-last") + if explode_last is not None: + cmd_parts.append(f"--eos-post-explode-jobs-last {int(explode_last)}") + start_iter = int(_cfg_get(arch, "start-iteration", 0) or 0) + if start_iter: + cmd_parts.append(f"--start-iteration {start_iter}") + # tracer-aware puff input source (posterior|marg_net). Default posterior + # keeps the legacy puffball path byte-identical. + puff_input_source = _cfg_get(_cfg_get(cfg, "puff"), "input-source") + if puff_input_source: + cmd_parts.append(f"--puff-input-source {puff_input_source}") - # Create necessary files (note event files will need to be copied in place) - # - MARG - lines_marg = '' - lines_marg_exe = '' - fnames_marg_exe_long = [] - lines_event_list = '' - nchunk_list =[] - for indx in range(len(config_here['marg-list'])): - prefix=base_dir - lines_marg += config_here['marg-list'][indx]['args'] + "\n" - - fname_orig = config_here['marg-list'][indx]['exe'] + "\n" - fname_clean = os.path.basename(fname_orig.strip()) - lines_marg_exe += fname_orig - loc = dag_utils.which(fname_clean) - if loc is None: - loc = base_dir + "/" +fname_clean - print(fname_orig, loc) - if not('MonteCarloMarginalizeCode/Code/bin' in loc): # don't copy core rift executables - shutil.copyfile(loc, './'+fname_clean) # copy file - fnames_marg_exe_long += [run_dir + "/" + fname_clean] - - # copy event files - if 'event file' in config_here['marg-list'][indx]: - prefix = base_dir - fname_orig = config_here['marg-list'][indx]['event file'] - if fname_orig[0] != '/': - fname_orig = base_dir +'/' +fname_orig - shutil.copy(fname_orig, 'event_file_{}.dat'.format(indx)) - else: - lines_event_list += "empty_event_file\n" - if 'n-chunk' in config_here['marg-list'][indx]: - nchunk_list.append(int(config_here['marg-list'][indx]['n-chunk'])) - else: - nchunk_list.append(1) - with open("args_marg_eos.txt", "w") as f: - f.write(lines_marg) - with open("args_marg_eos_exe.txt", "w") as f: - f.write(lines_marg_exe) - # Create event file. Use empty file if empty - with open("event_files.txt", "w") as f: - f.write(lines_event_list) - np.savetxt("event_nchunk.txt", np.array(nchunk_list,dtype=int),fmt="%i") - - # - POST - line_post ='' - line_post_exe = None - coord_names = config_here['post']['coords-fit'].split() - coord_range_blocks = config_here['post']['coords-sample'].split() - for name in coord_names: - line_post += " --parameter {} ".format(name) # no matching : fit coords in principle independent, may even use converter - for block in coord_range_blocks: - line_post += " --integration-parameter-range {} ".format(block) - with open("args_eos_post.txt", "w") as f: - f.write(line_post) - - # - PUFF - force_away_val =0.1 - if 'puff' in config_here: - if 'puff factor' in config_here['puff']: - force_away_val = config_here['puff']['puff factor'] - line_puff = ' --force-away {} '.format(force_away_val) - for name in coord_names: - line_puff += " --parameter {} ".format(name) # default: puff in all parameters - with open("args_puff.txt", "w") as f: - f.write(line_puff) - - - # Create initialization file - if 'file' in config_here['init']: - # default: copy from base directory, unless absolute - prefix = base_dir - target_file = config_here["init"]['file'] - if target_file[0] != '/': - target_file = prefix+ "/" + target_file - shutil.copy(target_file, 'initial_grid.dat') - - # Extract architecture arguments - n_iterations = config_here['arch']['n-iterations'] - n_samples_per_job = config_here['arch']['n-samples-per-job'] - - - files_transferred=False - fname_transfer_files = 'transfer_file_list.txt' - # Create command line - cmd = "create_eos_posterior_pipeline --n-samples-per-job {} --n-iterations {} --working-dir `pwd` ".format(n_samples_per_job, n_iterations) - # MARG - cmd += " --marg-event-nchunk-list-file `pwd`/event_nchunk.txt --event-file `pwd`/event_files.txt " - cmd += " --marg-event-exe-list-file `pwd`/args_marg_eos_exe.txt --marg-event-args-list-file `pwd`/args_marg_eos.txt " - # POST - cmd += " --eos-post-args `pwd`/args_eos_post.txt --eos-post-exe `which util_ConstructEOSPosterior.py` " - # PUFF - cmd += " --puff-exe `which util_HyperparameterPuffball.py` --puff-args `pwd`/args_puff.txt " - # init - cmd += " --input-grid `pwd`/initial_grid.dat " # somewhat redundant to copy it, but we might be building in place # general - if 'use-osg' in config_here['general']: - check = config_here['general']['use-osg'] - if (isinstance(check, bool) and check) or (isinstance(check, str) and (check == 'True' or check == 'true') ): - cmd += " --use-osg --use-singularity " - # Transfer executables - with open('transfer_file_list.txt', 'a') as f: - for line in fnames_marg_exe_long: - f.write(line) - files_transferred=True - if 'condor-local-nonworker' in config_here['general']: - cmd += " --condor-local-nonworker " - if 'condor-local-nonworker-igwn-prefix' in config_here['general']: - cmd += " --condor-local-nonworker-igwn-prefix " - if files_transferred: # should embed this as control to transfer each executable ? - cmd += " --transfer-file-list `pwd`/transfer_file_list.txt " # redundant - transfers all things every time for all MARG, need to improve + bool_flag_pairs = [ + ("use-osg", "--use-osg"), + ("use-singularity", "--use-singularity"), + ("use-singularity-local", "--use-singularity-local"), + ("condor-local-nonworker", "--condor-local-nonworker"), + ("condor-local-nonworker-igwn-prefix", "--condor-local-nonworker-igwn-prefix"), + ("condor-nogrid-nonworker", "--condor-nogrid-nonworker"), + ("use-full-submit-paths", "--use-full-submit-paths"), + ] + for key, flag in bool_flag_pairs: + if hyper_config.truthy(_cfg_get(general, key, False)): + cmd_parts.append(flag) + + retries = int(_cfg_get(general, "retries", 0) or 0) + if retries: + cmd_parts.append(f"--general-retries {retries}") + req_disk = _cfg_get(general, "request-disk") + if req_disk: + cmd_parts.append(f"--general-request-disk {req_disk}") + req_mem = _cfg_get(general, "request-memory") + if req_mem is not None: + cmd_parts.append(f"--request-memory-marg {int(req_mem)}") + if use_transfer: + cmd_parts.append(f"--transfer-file-list {transfer_list_path}") + + cmd = " ".join(cmd_parts) print(cmd) - os.system(cmd) + + if hyper_config.truthy(_cfg_get(general, "dry-run", False)): + logger.info("dry-run set; not invoking create_eos_posterior_pipeline.") + return + + rc = os.system(cmd) + if rc != 0: + raise SystemExit( + f"create_eos_posterior_pipeline exited with code {rc} (cmd: {cmd!r})." + ) + + +def _preprocess_config_args(argv): + """User-friendly --config shim translated into Hydra's --config-dir / --config-name. + + Hydra's native CLI requires two flags (``--config-dir DIR --config-name NAME``) + and the config-name must be bare (no ``.yaml`` extension). Users who pass a full + path to a config file expect the script to "just use it". This shim accepts: + + --config /full/path/to/myconf.yaml + --config myconf.yaml (resolved against CWD) + --config myconf (Hydra default search path) + -c /full/path/to/myconf.yaml (short form) + + and rewrites argv so Hydra sees the equivalent of:: + + --config-dir /full/path/to --config-name myconf + + Also strips a trailing ``.yaml`` / ``.yml`` from ``--config-name=...`` / + ``--config-name foo.yaml`` (the most common user-mistake from Hydra's docs). + + Fails fast with a clear error if the resolved file does not exist. + """ + out = [] + i = 0 + n = len(argv) + + def _strip_ext(name): + for ext in (".yaml", ".yml"): + if name.endswith(ext): + return name[: -len(ext)] + return name + + def _split_path_or_name(raw): + """Given what the user passed, return (dir_or_None, bare_name).""" + if os.sep in raw or raw.startswith(".") or raw.endswith(".yaml") or raw.endswith(".yml"): + # path-shaped or extension-bearing + abs_path = os.path.abspath(raw) + d = os.path.dirname(abs_path) + base = os.path.basename(abs_path) + return d, _strip_ext(base) + # bare name -> leave dir alone (use built-in search path) + return None, _strip_ext(raw) + + def _emit(dirpart, namepart, raw): + # Validate existence when we have a directory commitment + if dirpart is not None: + full = os.path.join(dirpart, namepart + ".yaml") + full_yml = os.path.join(dirpart, namepart + ".yml") + if not (os.path.isfile(full) or os.path.isfile(full_yml)): + sys.stderr.write( + f"util_RIFT_hyperpipe.py: config file not found from --config {raw!r}\n" + f" looked for: {full}\n" + f" or: {full_yml}\n" + ) + sys.exit(2) + out.append(f"--config-dir={dirpart}") + out.append(f"--config-name={namepart}") + + while i < n: + a = argv[i] + # Long form: --config X or --config=X + if a == "--config" or a == "-c": + if i + 1 >= n: + sys.stderr.write("util_RIFT_hyperpipe.py: --config requires an argument\n") + sys.exit(2) + raw = argv[i + 1] + d, name = _split_path_or_name(raw) + _emit(d, name, raw) + i += 2 + continue + if a.startswith("--config=") or a.startswith("-c="): + raw = a.split("=", 1)[1] + d, name = _split_path_or_name(raw) + _emit(d, name, raw) + i += 1 + continue + # Belt-and-braces: if the user passed --config-name foo.yaml, strip the ext. + if a == "--config-name" and i + 1 < n: + out.append(a) + out.append(_strip_ext(argv[i + 1])) + i += 2 + continue + if a.startswith("--config-name="): + out.append("--config-name=" + _strip_ext(a.split("=", 1)[1])) + i += 1 + continue + out.append(a) + i += 1 + return out + + +if __name__ == "__main__": + # Translate any user-friendly --config PATH into Hydra's native flags + # before @hydra.main parses sys.argv. Idempotent: passing Hydra's + # native --config-dir / --config-name through unchanged still works. + sys.argv = [sys.argv[0]] + _preprocess_config_args(sys.argv[1:]) + my_app() diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile index 5e61769e..b8323290 100644 --- a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/Makefile @@ -137,5 +137,14 @@ Gaussian_adaptive_bimodal: # ALTERNATIVE PIPELINE IMPLEMENTATION # - This approach has configuration in 'hyperpipe_conf.yaml' +# Note: util_RIFT_hyperpipe.py without --config loads the install-time default +# next to the script (which has init.file: null and so fails validation). The +# --config shim points it at the local demo yaml. rundir: - python util_RIFT_hyperpipe.py + util_RIFT_hyperpipe.py --config hyperpipe_conf.yaml + +rundir_tracer: + util_RIFT_hyperpipe.py --config hyperpipe_conf_tracer.yaml + +rundir_tracer_plots: + (cd rundir_tracer; plot_posterior_corner.py --posterior-file posterior-3.dat --composite-file all.marg_net --composite-file-has-labels --parameter x --parameter y --parameter z --lnL-cut 15 --use-all-composite-but-grayscale) diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/README.md b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/README.md index c67e8eca..89e0c1bb 100644 --- a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/README.md +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/README.md @@ -6,3 +6,9 @@ For more details, see the auto-generated documentation [here](https://rift-docu Similar content is also available in the supplementary document 'technical_doc.txt' for a pedagogical overview of the proceedure and implementation of the pipeline. +# Hyperpipe hydra pipeline writer +New tool +``` +util_RIFT_hyperpipe.py --config-name hyperpipe_conf_tracer.yaml +``` + diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/example_gaussian.py b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/example_gaussian.py index 190b79dc..7a3e9846 100755 --- a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/example_gaussian.py +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/example_gaussian.py @@ -66,6 +66,12 @@ def likelihood_evaluation(): if opts.using_eos[:5] =='file:': eoss = np.genfromtxt(opts.using_eos[5:],dtype='str') else: eoss = np.genfromtxt(opts.using_eos[:],dtype='str') + + # Sanity check on ranges : standard, do no work if not needed + if opts.eos_start_index > len(eoss): + sys.exit(0) # nothing to do + if opts.eos_end_index >= len(eoss): + opts.eos_end_index = len(eoss)-1 likelihood_dict = {} diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf.yaml b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf.yaml index 873ec65c..05f49476 100644 --- a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf.yaml +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf.yaml @@ -1,22 +1,41 @@ +# Baseline hyperpipe demo configuration. Drives the iterative MARG + EOS_POST +# loop on a 3-D Gaussian toy with a 1000-point uniform seed grid spanning a +# corner of the [-7,7]^3 prior. Compare with hyperpipe_conf_tracer.yaml for +# the parsimonious-placement (tracer) variant of the same demo. +# +# Run with: +# make rundir +# which is equivalent to: +# util_RIFT_hyperpipe.py --config ./hyperpipe_conf.yaml arch: - method: default - n-iterations: 3 - n-samples-per-job: 1000 - explode marg jobs: 5 + method: default + n-iterations: 3 + n-samples-per-job: 1000 + explode-marg-jobs: 5 + post: coords-fit: "x y z" coords-sample: "x:[-7,7] y:[-7,7] z:[-7,7]" + marg-list: - name: Gaussian exe: example_gaussian.py - args: --outdir Gaussian_example --conforming-output-name + args: "--outdir Gaussian_example --conforming-output-name" n-chunk: 100 + puff: - settings: - puff factor: 0.5 + # No puff.exe -> no PUFF/MARG_PUFF lane is emitted. The iterative loop + # is MARG -> CON -> UNIFY -> EOS_POST -> next-iteration MARG, with new + # grids drawn from the posterior alone. + puff-factor: 0.5 + force-away: 0.03 + init: file: blind_gaussian_3d_xy_plus.dat + general: rundir: rundir - + # Toy demo footprint; the GW-PE-tuned 16 GB default is overkill for a + # 3-D Gaussian. + request-memory: 200 diff --git a/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf_tracer.yaml b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf_tracer.yaml new file mode 100644 index 00000000..b5ba6912 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/demo/hyperpipe/hyperpipe_conf_tracer.yaml @@ -0,0 +1,75 @@ +# Example hyperpipe configuration that enables the parsimonious-placement +# tracer workflow. +# +# Differences from the baseline hyperpipe_conf.yaml in this directory: +# +# * puff.exe -> util_HyperparameterTracerUpdate.py +# (the tracer drop-in for util_HyperparameterPuffball.py) +# * puff.input-source = marg_net +# tracer consumes all.marg_net (ILE-evaluated table) +# and writes grid-{k+1}.dat directly, suppressing the +# MARG_PUFF lane entirely. *Required* alongside a +# tracer-aware puff.exe -- without it the pipeline +# falls back to legacy posterior-mode wiring and +# emits the MARG_PUFF lane with grid_puff-*.dat I/O. +# * puff.settings -> sampler hyperparameters (smc-mala-bd / RF surrogate) +# +# The savings (~1.7-1.8x for N=5-6) come from killing the MARG_PUFF lane, +# not from skipping MARG. MARG runs every iteration as normal -- the tracer +# consumes each iteration's all.marg_net to place the next grid. +# +# Run with: +# util_RIFT_hyperpipe.py --config ./hyperpipe_conf_tracer.yaml +# +# Override any field on the command line with Hydra syntax, e.g.: +# util_RIFT_hyperpipe.py --config ./hyperpipe_conf_tracer.yaml \ +# arch.n-iterations=6 puff.settings.update-method=birth-death + +arch: + method: default + n-iterations: 5 + n-samples-per-job: 1000 + explode-marg-jobs: 5 + start-iteration: 0 + +post: + coords-fit: "x y z" + coords-sample: "x:[-7,7] y:[-7,7] z:[-7,7]" + settings: + fit-method: rf + +marg-list: + - name: Gaussian + exe: example_gaussian.py + args: "--outdir Gaussian_example --conforming-output-name" + n-chunk: 100 + +puff: + # Tracer-aware updater. Falls back to puffball automatically if + # RIFT.misc.tracer_placement is not importable (e.g. mismatched RIFT + # version), so this config remains usable even on older worker images. + exe: util_HyperparameterTracerUpdate.py + # REQUIRED for the tracer pathway -- without this the pipeline runs the + # tracer in legacy posterior mode and also emits the MARG_PUFF lane, + # which reads grid_puff-{k+1}.dat (not produced when tracer-only-marg is + # on, so MARG_PUFF jobs fail with "No such file or directory"). + input-source: marg_net + puff-factor: 0.5 + force-away: 0.03 + settings: + update-method: smc-mala-bd # primary recommendation (Tier-1 results) + tracer-fit-method: rf # production default; cheap to rebuild + n-mala-steps: 8 + target-ess-frac: 0.5 + birth-death-rate: 1.0 + rng-seed: null # set to an int for deterministic runs + +init: + file: blind_gaussian_3d_xy_plus.dat + +general: + rundir: rundir_tracer + # Demo footprint: a 3-D Gaussian toy needs ~tens of MB per job, not the + # 16 GB baseline default tuned for GW PE. Bump back up if you adapt this + # for production hyperpipe runs. + request-memory: 200 diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/README.md b/MonteCarloMarginalizeCode/Code/test/hyperpipe/README.md new file mode 100644 index 00000000..84e7e477 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/README.md @@ -0,0 +1,92 @@ +# RIFT.hyperpipe test suite + +Live integration tests for the in-tree `RIFT.hyperpipe` package +(`Code/RIFT/hyperpipe/`) and the `util_RIFT_hyperpipe.py` / +`util_HyperMargGaussian.py` console scripts (`Code/bin/`). Backed by a +self-contained [pixi](https://pixi.sh) environment so the suite works +from any RIFT clone without polluting your global Python. + +## Quick run + +```sh +# one-time, if you don't have pixi: +curl -fsSL https://pixi.sh/install.sh | bash + +cd MonteCarloMarginalizeCode/Code/test/hyperpipe +pixi install # heavy: pulls lalsuite + igwn-ligolw + gwpy + scientific stack +pixi run test # full pytest suite (the live test) +``` + +Auxiliary entry points: + +```sh +pixi run test-minimal # pytest-free smoke check (also useful in CI without pytest) +pixi run demo-dryrun # end-to-end dry-run of util_RIFT_hyperpipe.py +pixi run which-rift # confirm pixi resolved the in-tree RIFT correctly +``` + +## Layout + +``` +test/hyperpipe/ +├── pixi.toml # env spec: python + scientific stack + full lalsuite +├── README.md # you are here +└── tests/ + ├── conftest.py # auto-detects RIFT_ROOT from this file's location + ├── test_coords.py # CIP-mirror coord-spec emission + ├── test_marg_list.py # mono + heterogeneous marg-list assembly + ├── test_config.py # schema-validation paths + ├── test_drivers.py # MargDriverBase + util_HyperMargGaussian end-to-end + ├── test_hydra_integration.py # real-Hydra subprocess test of util_RIFT_hyperpipe.py + ├── standalone_check.py # pytest-free smoke test + └── demo_dryrun.py # full dry-run demo +``` + +The `[activation.env]` block in `pixi.toml` derives `RIFT_ROOT` / +`RIFT_PY` / `RIFT_BIN` relative to `$PIXI_PROJECT_ROOT`, prepends +`$RIFT_BIN` to `PATH`, and sets `PYTHONPATH` so the pytest subprocesses +can find the in-tree `RIFT` package. No user-specific hardcoded paths. + +## What each suite proves + +| File | What it proves | +|---|---| +| `test_coords.py` | Coord-spec emission exactly matches the Gaussian-demo `args_*.txt` files (including `-8` vs `-8.0` formatting). NICER-style spec with `--supplementary-coordinate-code` + likelihood-factor trio renders correctly. | +| `test_marg_list.py` | Mono-driver assembly reproduces the Gaussian demo verbatim. Heterogeneous assembly (NICER + GW) preserves per-driver batch sizes and routes the per-driver coord-module override only to the entry that asked for it. | +| `test_config.py` | `validate_config` rejects empty configs / empty `marg-list` / non-positive `n-iterations` / missing `init`. Truthy coercion handles "true"/"True"/"1"/`True`/`1`. | +| `test_drivers.py` | `MargDriverBase` read→mutate→write round-trips without numpy fixed-width truncation. The actual `util_HyperMargGaussian.py` subprocess runs against a 5-point grid and the mode at `(4, 0, 0)` outranks the far point at `(10, 10, 10)`. | +| `test_hydra_integration.py` | The real Hydra entry point of `util_RIFT_hyperpipe.py` runs as a subprocess with CLI overrides, writes every expected artefact (`args_*.txt`, `event-0.net`, `event_nchunk.txt`, `initial_grid.dat`, `transfer_file_list.txt`), and the emitted `create_eos_posterior_pipeline` command line carries every flag we asked for. | + +## When something fails + +* **`RIFT root not found`** in test skip messages: you're outside a RIFT + clone, or this suite has been moved away from + `test/hyperpipe/tests/`. `pixi run which-rift` should print a + sensible path. +* **lalsuite solver issues on Apple Silicon**: conda-forge's lalsuite is + reasonably supported on `osx-arm64` now; if you hit solver trouble, + try `pixi install --platform osx-64` (Rosetta). +* **`PYTHONPATH` collisions** with an existing system RIFT install: + inside `pixi shell`, `python -c "import RIFT; print(RIFT.__file__)"` + should resolve to the in-tree copy. If not, check whether some + outer activation script is overriding `PYTHONPATH`. + +## Adding a new test + +The `hp_modules` fixture in `conftest.py` exposes the four lightweight +hyperpipe modules (`coords`, `config`, `marg_list`, `drivers_base`) as +attributes on a namespace. New tests should follow the existing pattern: + +```python +def test_my_new_thing(hp_modules, tmp_path): + spec = hp_modules.coords.HyperCoordSpec.from_strings( + coords_fit="x y", coords_sample="x:[0,1] y:[0,1]", + ) + ... +``` + +For tests that need to run `util_RIFT_hyperpipe.py` or `util_HyperMargGaussian.py` +as a subprocess (i.e. exercise the real Hydra entry point), use the +`rift_py` / `rift_bin` fixtures to find the scripts and propagate +`PYTHONPATH` to the child via `os.environ.copy()` — see +`test_hydra_integration.py` for the canonical example. diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/pixi.toml b/MonteCarloMarginalizeCode/Code/test/hyperpipe/pixi.toml new file mode 100644 index 00000000..1c152672 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/pixi.toml @@ -0,0 +1,74 @@ +# Pixi project for the RIFT.hyperpipe test suite. +# +# Location: $RIFT_ROOT/MonteCarloMarginalizeCode/Code/test/hyperpipe/ +# +# The full lalsuite stack is in the default env because RIFT/__init__.py +# unconditionally imports lalsimutils --- ``import RIFT.hyperpipe.*`` +# always pays for the LAL chain. Heavier ``pixi install``, but matches +# how RIFT is actually deployed. +# +# Tasks +# ----- +# pixi run test # full pytest suite (the live test) +# pixi run test-minimal # pytest-free smoke check +# pixi run demo-dryrun # end-to-end dry-run of util_RIFT_hyperpipe.py +# pixi run which-rift # confirm RIFT_ROOT is wired correctly +# +# First-time setup +# ---------------- +# curl -fsSL https://pixi.sh/install.sh | bash # one-time +# cd $RIFT_ROOT/MonteCarloMarginalizeCode/Code/test/hyperpipe +# pixi install +# pixi run test + +[project] +name = "rift-hyperpipe-test" +version = "0.1.0" +description = "Live-test harness for RIFT.hyperpipe (in-tree hyperpipeline driver)." +authors = ["RIFT developers"] +channels = ["conda-forge"] +platforms = ["osx-64", "osx-arm64", "linux-64"] + +[dependencies] +python = ">=3.11,<3.13" +numpy = "*" +scipy = "*" +hydra-core = "*" +omegaconf = "*" +pyyaml = "*" +pytest = "*" +# Full RIFT runtime stack. Required because RIFT/__init__.py +# unconditionally imports lalsimutils. +six = "*" +h5py = "*" +matplotlib = "*" +lalsuite = "*" +python-lal = "*" +python-lalsimulation = "*" +python-lalmetaio = "*" +python-lalframe = "*" +igwn-ligolw = "*" +lscsoft-glue = "*" +gwpy = "*" + +# Resolve RIFT paths relative to this pixi.toml's location so the +# project works from any RIFT clone (no hardcoded user path). The +# layout is: +# +# $RIFT_ROOT/MonteCarloMarginalizeCode/Code/test/hyperpipe/pixi.toml +# ^^^^^^^^^^^^^ +# PIXI_PROJECT_ROOT +# +# so going up four levels lands at $RIFT_ROOT. +[activation.env] +RIFT_ROOT = "$PIXI_PROJECT_ROOT/../../../.." +RIFT_PY = "$PIXI_PROJECT_ROOT/../.." +RIFT_BIN = "$PIXI_PROJECT_ROOT/../../bin" +PYTHONPATH = "$PIXI_PROJECT_ROOT/../..:$PYTHONPATH" +PATH = "$PIXI_PROJECT_ROOT/../../bin:$PATH" + +[tasks] +test = "pytest -v tests/" +test-minimal = "python tests/standalone_check.py" +demo-dryrun = "python tests/demo_dryrun.py" +which-rift = "echo RIFT_ROOT=$RIFT_ROOT && ls -la $RIFT_PY/RIFT/hyperpipe/" diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/conftest.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/conftest.py new file mode 100644 index 00000000..4844e382 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/conftest.py @@ -0,0 +1,86 @@ +""" +pytest fixtures + path wiring for the in-tree RIFT.hyperpipe test suite. + +Resolves ``RIFT_ROOT`` in two ways, in order: + 1. the ``$RIFT_ROOT`` environment variable, if set (e.g. by pixi's + activation block); + 2. walking up from this file's location --- with the canonical layout + ``$RIFT_ROOT/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/conftest.py``, + ``parents[5]`` of ``__file__`` is ``$RIFT_ROOT``. + +This means the suite works from any RIFT clone with no user-specific +paths to edit. +""" +from __future__ import annotations + +import importlib +import os +import sys +import types +from pathlib import Path + +import pytest + + +_HERE = Path(__file__).resolve() + + +def _is_rift_root(p: Path) -> bool: + return (p / "MonteCarloMarginalizeCode" / "Code" / "RIFT" / "hyperpipe").exists() + + +def _rift_root() -> Path: + # 1. honor explicit env var if it points at a real RIFT clone + env = os.environ.get("RIFT_ROOT") + if env: + p = Path(env).resolve() + if _is_rift_root(p): + return p + # 2. canonical in-tree location: parents[5] of this file + if len(_HERE.parents) > 5: + candidate = _HERE.parents[5] + if _is_rift_root(candidate): + return candidate + # 3. give up + pytest.skip( + "Could not locate RIFT root. Either set $RIFT_ROOT or run this " + "suite from its canonical location at " + "$RIFT_ROOT/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/." + ) + + +@pytest.fixture(scope="session") +def rift_root() -> Path: + return _rift_root() + + +@pytest.fixture(scope="session") +def hyperpipe_dir(rift_root: Path) -> Path: + return rift_root / "MonteCarloMarginalizeCode" / "Code" / "RIFT" / "hyperpipe" + + +@pytest.fixture(scope="session") +def rift_bin(rift_root: Path) -> Path: + return rift_root / "MonteCarloMarginalizeCode" / "Code" / "bin" + + +@pytest.fixture(scope="session") +def rift_py(rift_root: Path) -> Path: + return rift_root / "MonteCarloMarginalizeCode" / "Code" + + +@pytest.fixture(scope="session") +def hp_modules(rift_py: Path): + """Import the four lightweight hyperpipe modules and expose on a namespace.""" + if str(rift_py) not in sys.path: + sys.path.insert(0, str(rift_py)) + coords = importlib.import_module("RIFT.hyperpipe.coords") + config = importlib.import_module("RIFT.hyperpipe.config") + marg_list = importlib.import_module("RIFT.hyperpipe.marg_list") + drivers_base = importlib.import_module("RIFT.hyperpipe.drivers.base") + return types.SimpleNamespace( + coords=coords, + config=config, + marg_list=marg_list, + drivers_base=drivers_base, + ) diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/demo_dryrun.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/demo_dryrun.py new file mode 100644 index 00000000..57f2ea21 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/demo_dryrun.py @@ -0,0 +1,94 @@ +""" +End-to-end dry-run demo: drives util_RIFT_hyperpipe.py with a +Gaussian-like config, lets it write all the per-stage args files into +./demo_run/, and prints the resulting create_eos_posterior_pipeline +command line --- WITHOUT actually executing it. + +Run via: ``pixi run demo-dryrun`` +""" +from __future__ import annotations + +import os +import shlex +import shutil +import subprocess +import sys +from pathlib import Path + + +_HERE = Path(__file__).resolve() + + +def _is_rift_root(p: Path) -> bool: + return (p / "MonteCarloMarginalizeCode" / "Code" / "RIFT" / "hyperpipe").exists() + + +def _rift_root() -> Path: + env = os.environ.get("RIFT_ROOT") + if env: + p = Path(env).resolve() + if _is_rift_root(p): + return p + if len(_HERE.parents) > 5: + candidate = _HERE.parents[5] + if _is_rift_root(candidate): + return candidate + raise SystemExit("RIFT root not found.") + + +RIFT_ROOT = _rift_root() +RIFT_PY = RIFT_ROOT / "MonteCarloMarginalizeCode" / "Code" +SCRIPT = RIFT_PY / "bin" / "util_RIFT_hyperpipe.py" +GAUSS_EXE = RIFT_PY / "bin" / "util_HyperMargGaussian.py" + +run_dir = Path("demo_run").resolve() +base_dir = Path("demo_base").resolve() + +for d in (run_dir, base_dir): + if d.exists(): + shutil.rmtree(d) +base_dir.mkdir() + +grid = base_dir / "blind_gaussian_plus_minus.dat" +with open(grid, "w") as f: + f.write("# lnL sigma_lnL x y z\n") + for v in (-4, -2, 0, 2, 4): + f.write(f"0 0 {v} 0 0\n") + f.write(f"0 0 {v} 1 -1\n") + +overrides = [ + f"general.rundir={run_dir}", + "general.dry-run=true", + "general.use-osg=true", + "general.use-singularity=true", + "general.condor-local-nonworker=true", + "general.condor-local-nonworker-igwn-prefix=true", + "general.retries=5", + "general.request-disk=2G", + "arch.n-iterations=20", + "arch.explode-marg-jobs=5", + f"init.file={grid}", + 'marg-list=[{name:gaussian, exe:' + shlex.quote(str(GAUSS_EXE)) + + ', args:"--outdir Gaussian_example --conforming-output-name", ' + 'event-file:null, n-chunk:100, coord-module:null, extra-args:""}]', +] + +cmd = [sys.executable, str(SCRIPT), *overrides] +env = dict(os.environ) +env["PYTHONPATH"] = str(RIFT_PY) + os.pathsep + env.get("PYTHONPATH", "") + +print("=" * 72) +print("RIFT_ROOT =", RIFT_ROOT) +print("Invoking:", " ".join(cmd)) +print("=" * 72) +proc = subprocess.run(cmd, env=env, cwd=base_dir) +if proc.returncode != 0: + sys.exit(proc.returncode) + +print("\n" + "=" * 72) +print("Artefacts written into:", run_dir) +print("=" * 72) +for p in sorted(run_dir.iterdir()): + if p.is_file() and p.stat().st_size < 4096: + print(f"\n--- {p.name} ---") + print(p.read_text().rstrip()) diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/standalone_check.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/standalone_check.py new file mode 100644 index 00000000..d7c79015 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/standalone_check.py @@ -0,0 +1,131 @@ +""" +Standalone (no-pytest) smoke test --- useful for ``pixi run test-minimal``. + +Verifies the same things as the pytest suite, but runs as a regular +script so it's easy to invoke from a make rule or a CI job that doesn't +want a pytest dependency. + +Locates RIFT_ROOT by: + 1. honoring the ``$RIFT_ROOT`` env var if set; + 2. otherwise walking up from this file's location + (``parents[5]`` of the canonical + ``test/hyperpipe/tests/standalone_check.py`` path is the RIFT root). +""" +from __future__ import annotations + +import importlib.util +import os +import sys +import tempfile +import types +from pathlib import Path + + +_HERE = Path(__file__).resolve() + + +def _is_rift_root(p: Path) -> bool: + return (p / "MonteCarloMarginalizeCode" / "Code" / "RIFT" / "hyperpipe").exists() + + +def _rift_root() -> Path: + env = os.environ.get("RIFT_ROOT") + if env: + p = Path(env).resolve() + if _is_rift_root(p): + return p + if len(_HERE.parents) > 5: + candidate = _HERE.parents[5] + if _is_rift_root(candidate): + return candidate + raise SystemExit( + "Could not locate RIFT root. Set $RIFT_ROOT or run from " + "$RIFT_ROOT/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/." + ) + + +RIFT_ROOT = _rift_root() +HP = RIFT_ROOT / "MonteCarloMarginalizeCode" / "Code" / "RIFT" / "hyperpipe" +RIFT_PY = RIFT_ROOT / "MonteCarloMarginalizeCode" / "Code" + +# Bypass RIFT/__init__.py so we don't need the full lalsuite stack to run +# the smoke test --- this script is intended to be runnable from any +# Python env that has numpy. +fake_rift = types.ModuleType("RIFT") +fake_rift.__path__ = [str(RIFT_PY / "RIFT")] +sys.modules["RIFT"] = fake_rift +fake_hp = types.ModuleType("RIFT.hyperpipe") +fake_hp.__path__ = [str(HP)] +sys.modules["RIFT.hyperpipe"] = fake_hp + + +def _load(name, path): + spec = importlib.util.spec_from_file_location(name, str(path)) + mod = importlib.util.module_from_spec(spec) + sys.modules[name] = mod + spec.loader.exec_module(mod) + return mod + + +coords = _load("RIFT.hyperpipe.coords", HP / "coords.py") +config = _load("RIFT.hyperpipe.config", HP / "config.py") +marg_list = _load("RIFT.hyperpipe.marg_list", HP / "marg_list.py") +drivers_base = _load("RIFT.hyperpipe.drivers.base", HP / "drivers" / "base.py") + + +def run_checks() -> None: + # 1. coord-spec + spec = coords.HyperCoordSpec.from_strings( + coords_fit="x y z", + coords_sample="x:[-8,8] y:[-8,8] z:[-8,8]", + ) + spec.validate(strict_import=False) + assert "--integration-parameter-range x:[-8,8]" in spec.to_post_args() + + # 2. mono marg-list assembly + with tempfile.TemporaryDirectory() as tmpd: + base = Path(tmpd) / "base" + run = Path(tmpd) / "run" + base.mkdir() + run.mkdir() + (base / "example.py").write_text("#!/usr/bin/env python\n") + os.chmod(base / "example.py", 0o755) + cfg = { + "marg-list": [ + {"name": "g", "exe": "example.py", "args": "--ok", + "event-file": None, "n-chunk": 100, "coord-module": None} + ] + } + m = marg_list.assemble_marg_list(cfg, base_dir=str(base), run_dir=str(run)) + assert m.n_chunks == [100] + assert (run / "event-0.net").read_text().strip() == "empty_event_file" + + # 3. validate_config rejects empty config + try: + config.validate_config({}) + except ValueError: + pass + else: + raise AssertionError("validate_config({}) should have raised") + + # 4. base driver round-trip + with tempfile.TemporaryDirectory() as tmpd: + grid = Path(tmpd) / "g.dat" + grid.write_text("# lnL sigma_lnL x y z\n0 0 1.0 2.0 3.0\n") + rows, cols = drivers_base.read_grid(f"file:{grid}") + assert cols == ["x", "y", "z"] + rows[0, 0] = "-3.1415926535" + out = drivers_base.write_marg_output( + rows, cols, + fname_output_integral="f.txt", + outdir=str(Path(tmpd) / "out"), + fname=None, + conforming_output_name=True, + ) + assert "-3.1415926535" in Path(out).read_text() + + print(f"standalone_check: ALL OK (RIFT_ROOT={RIFT_ROOT})") + + +if __name__ == "__main__": + run_checks() diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_config.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_config.py new file mode 100644 index 00000000..619de2ba --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_config.py @@ -0,0 +1,66 @@ +"""Unit tests for RIFT.hyperpipe.config.validate_config + helpers.""" +from __future__ import annotations + +import pytest + + +def test_validate_missing_section(hp_modules): + with pytest.raises(ValueError, match="missing required section"): + hp_modules.config.validate_config({}) + + +def test_validate_empty_marg_list(hp_modules): + cfg = { + "arch": {"n-iterations": 5, "n-samples-per-job": 1000}, + "post": {"coords-fit": "x"}, + "marg-list": [], + "puff": {}, + "init": {"file": "x"}, + "general": {}, + } + with pytest.raises(ValueError, match="at least one entry"): + hp_modules.config.validate_config(cfg) + + +def test_validate_bad_arch(hp_modules): + cfg = { + "arch": {"n-iterations": 0, "n-samples-per-job": 1000}, + "post": {"coords-fit": "x"}, + "marg-list": [{"exe": "x"}], + "puff": {}, + "init": {"file": "x"}, + "general": {}, + } + with pytest.raises(ValueError, match="n-iterations"): + hp_modules.config.validate_config(cfg) + + +def test_validate_missing_init(hp_modules): + cfg = { + "arch": {"n-iterations": 5, "n-samples-per-job": 1000}, + "post": {"coords-fit": "x"}, + "marg-list": [{"exe": "x"}], + "puff": {}, + "init": {}, + "general": {}, + } + with pytest.raises(ValueError, match="init.file"): + hp_modules.config.validate_config(cfg) + + +def test_validate_happy_path(hp_modules): + hp_modules.config.validate_config({ + "arch": {"n-iterations": 5, "n-samples-per-job": 1000}, + "post": {"coords-fit": "x y z"}, + "marg-list": [{"exe": "example.py"}], + "puff": {}, + "init": {"file": "/tmp/x"}, + "general": {}, + }) + + +def test_truthy_string_handling(hp_modules): + t = hp_modules.config.truthy + assert t("true") and t("True") and t("YES") and t("1") and t("on") + assert not t("false") and not t("no") and not t("") and not t(None) + assert t(True) and not t(False) and t(1) and not t(0) diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_coords.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_coords.py new file mode 100644 index 00000000..72391081 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_coords.py @@ -0,0 +1,79 @@ +""" +Unit tests for RIFT.hyperpipe.coords. +""" +from __future__ import annotations + +import pytest + + +def test_parse_range_block(hp_modules): + c = hp_modules.coords + assert c.parse_range_block("x:[-8,8]") == ("x", (-8.0, 8.0)) + assert c.parse_range_block("g0=[0.2,2]") == ("g0", (0.2, 2.0)) + + +def test_parse_range_string(hp_modules): + c = hp_modules.coords + out = c.parse_range_string("x:[-8,8] y:[-1.5,1.5] z:[0,1e3]") + assert out == {"x": (-8.0, 8.0), "y": (-1.5, 1.5), "z": (0.0, 1e3)} + + +def test_gaussian_demo_emission(hp_modules): + """post / puff / test args must match the Gaussian demo verbatim.""" + spec = hp_modules.coords.HyperCoordSpec.from_strings( + coords_fit="x y z", + coords_sample="x:[-8,8] y:[-8,8] z:[-8,8]", + ) + spec.validate(strict_import=False) + + assert sorted(spec.to_post_args().split()) == sorted( + "--parameter x --parameter y --parameter z " + "--integration-parameter-range x:[-8,8] " + "--integration-parameter-range y:[-8,8] " + "--integration-parameter-range z:[-8,8]".split() + ) + assert sorted(spec.to_puff_args(force_away=0.03, puff_factor=0.5).split()) == sorted( + "--parameter x --parameter y --parameter z " + "--force-away 0.03 --puff-factor 0.5".split() + ) + assert sorted(spec.to_test_args().split()) == sorted( + "--parameter x --parameter y --parameter z " + "--method JS --threshold 0.05".split() + ) + + +def test_nicer_style_post_args(hp_modules): + """NICER-style spec with coord module and likelihood-factor trio.""" + spec = hp_modules.coords.HyperCoordSpec.from_strings( + name="rift_default", + coords_fit="g0 g1 g2 g3", + coords_sample="g0:[0.2,2] g1:[-1.6,1.7] g2:[-0.6,0.6] g3:[-0.02,0.02]", + likelihood_factor=("my_module", "my_factor", "my.ini"), + ) + post = spec.to_post_args() + for needle in ( + "--supplementary-coordinate-code rift_default", + "--supplementary-likelihood-factor-code my_module", + "--supplementary-likelihood-factor-function my_factor", + "--supplementary-likelihood-factor-ini my.ini", + "--integration-parameter-range g1:[-1.6,1.7]", + ): + assert needle in post + + +def test_fmt_num_integer_preserved(hp_modules): + """Integer-valued floats round-trip as ints (matches demo format).""" + f = hp_modules.coords.HyperCoordSpec._fmt_num + assert f(-8.0) == "-8" + assert f(2.0) == "2" + assert f(-1.6) == "-1.6" + assert f(0.02) == "0.02" + + +def test_validate_rejects_missing_range(hp_modules): + spec = hp_modules.coords.HyperCoordSpec.from_strings( + coords_fit="x y", + coords_sample="x:[-1,1]", + ) + with pytest.raises(ValueError, match="No integration range supplied"): + spec.validate(strict_import=False) diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_drivers.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_drivers.py new file mode 100644 index 00000000..817e8721 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_drivers.py @@ -0,0 +1,98 @@ +""" +Tests for the toy Gaussian marg driver. + +Exercises the full in-tree code path (real env has scipy + lalsuite). +""" +from __future__ import annotations + +import os +import subprocess +import sys +from pathlib import Path + +import numpy as np + + +def test_base_driver_grid_roundtrip(hp_modules, tmp_path): + """Read -> mutate lnL/sigma -> write should round-trip without truncation.""" + grid = tmp_path / "grid.dat" + grid.write_text( + "# lnL sigma_lnL x y z\n" + "0 0 1.0 2.0 3.0\n" + "0 0 -1.0 -2.0 -3.0\n" + ) + rows, cols = hp_modules.drivers_base.read_grid(f"file:{grid}") + assert cols == ["x", "y", "z"] + + rows[0, 0] = "-3.1415926535" + rows[0, 1] = "1.234567e-3" + out = hp_modules.drivers_base.write_marg_output( + rows[:1], cols, + fname_output_integral="f.txt", + outdir=str(tmp_path / "out"), + fname=None, + conforming_output_name=True, + ) + assert Path(out).name == "f.txt+annotation.dat" + text = Path(out).read_text() + assert "-3.1415926535" in text + assert "1.234567e-3" in text + assert "lnL" in text and "sigma_lnL" in text and "x y z" in text + + +def test_gaussian_driver_end_to_end(rift_root, rift_py, tmp_path): + """ + Actually run util_HyperMargGaussian.py against a tiny grid and verify + the output file shape and that the mode point at (4, 0, 0) outranks + the far point at (10, 10, 10). + """ + grid = tmp_path / "grid.dat" + points = [ + ( 4.0, 0.0, 0.0), + (-4.0, 0.0, 0.0), + ( 0.0, 0.0, 0.0), + (10.0, 10.0, 10.0), + ( 4.0, 0.5, -0.5), + ] + with open(grid, "w") as f: + f.write("# lnL sigma_lnL x y z\n") + for x, y, z in points: + f.write(f"0 0 {x} {y} {z}\n") + + outdir = tmp_path / "out" + + cmd = [ + sys.executable, + str(rift_py / "bin" / "util_HyperMargGaussian.py"), + "--using-eos", f"file:{grid}", + "--eos_start_index", "0", + "--eos_end_index", "5", + "--outdir", str(outdir), + "--fname-output-integral", "lnL.txt", + "--conforming-output-name", + ] + env = dict(os.environ) + env["PYTHONPATH"] = str(rift_py) + os.pathsep + env.get("PYTHONPATH", "") + + proc = subprocess.run(cmd, capture_output=True, text=True, env=env) + assert proc.returncode == 0, ( + f"util_HyperMargGaussian.py exited {proc.returncode}\n" + f"STDOUT:\n{proc.stdout}\nSTDERR:\n{proc.stderr}" + ) + + out_file = outdir / "lnL.txt+annotation.dat" + assert out_file.exists(), f"expected {out_file} to exist" + + lines = [ln for ln in out_file.read_text().splitlines() if ln.strip()] + assert lines[0].lstrip("#").split() == ["lnL", "sigma_lnL", "x", "y", "z"] + data_lines = lines[1:] + assert len(data_lines) == 5 + for ln in data_lines: + assert len(ln.split()) == 5 + + arr = np.array([[float(c) for c in ln.split()] for ln in data_lines]) + lnL = arr[:, 0] + assert lnL[0] > lnL[3], ( + f"mode point (4,0,0) should have higher lnL than (10,10,10); got {lnL}" + ) + assert lnL[1] > lnL[3] diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_hydra_integration.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_hydra_integration.py new file mode 100644 index 00000000..8a34eaa8 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_hydra_integration.py @@ -0,0 +1,130 @@ +""" +Live integration test: invoke util_RIFT_hyperpipe.py as a subprocess with +the real hydra-core / omegaconf installed by pixi. + +Verifies that the emitted command line and per-stage args/exe/event/nchunk +files match what create_eos_posterior_pipeline expects, without actually +running the DAG-builder (general.dry-run=true). +""" +from __future__ import annotations + +import os +import shlex +import subprocess +import sys +from pathlib import Path + +import pytest + + +def _ensure_hydra_available() -> None: + try: + import hydra # noqa: F401 + import omegaconf # noqa: F401 + except ImportError as exc: + pytest.skip(f"hydra/omegaconf not importable in this env: {exc}") + + +def test_util_rift_hyperpipe_dry_run(rift_root, rift_py, tmp_path): + _ensure_hydra_available() + + base = tmp_path / "base" + run = tmp_path / "run" + base.mkdir() + real_exe = rift_py / "bin" / "util_HyperMargGaussian.py" + if not real_exe.exists(): + pytest.skip(f"in-tree {real_exe} missing") + + grid_path = base / "blind_gaussian_plus_minus.dat" + grid_path.write_text( + "# lnL sigma_lnL x y z\n" + + "\n".join(f"0 0 {i} {i} {i}" for i in range(10)) + + "\n" + ) + + overrides = [ + f"general.rundir={run}", + "general.dry-run=true", + "general.use-osg=true", + "general.use-singularity=true", + "general.condor-local-nonworker=true", + "general.condor-local-nonworker-igwn-prefix=true", + "general.retries=5", + "general.request-disk=2G", + "arch.n-iterations=20", + "arch.n-samples-per-job=1000", + "arch.explode-marg-jobs=5", + f"init.file={grid_path}", + 'marg-list=[{name:gaussian, exe:' + + shlex.quote(str(real_exe)) + + ', args:"--outdir Gaussian_example --conforming-output-name", ' + 'event-file:null, n-chunk:100, coord-module:null, extra-args:""}]', + ] + + cmd = [sys.executable, str(rift_py / "bin" / "util_RIFT_hyperpipe.py"), *overrides] + env = dict(os.environ) + env["PYTHONPATH"] = str(rift_py) + os.pathsep + env.get("PYTHONPATH", "") + + proc = subprocess.run(cmd, capture_output=True, text=True, env=env, cwd=base) + print("STDOUT:", proc.stdout) + print("STDERR:", proc.stderr) + assert proc.returncode == 0, ( + f"util_RIFT_hyperpipe.py exited {proc.returncode}\n" + f"STDOUT:\n{proc.stdout}\nSTDERR:\n{proc.stderr}" + ) + + for fname in ( + "args_marg_eos.txt", + "args_marg_eos_exe.txt", + "args_eos_post.txt", + "args_puff.txt", + "args_test.txt", + "event-0.net", + "event_nchunk.txt", + "initial_grid.dat", + "transfer_file_list.txt", + ): + path = run / fname + assert path.exists(), f"expected {path} to be written" + + assert (run / "event-0.net").read_text().strip() == "empty_event_file" + assert (run / "event_nchunk.txt").read_text().strip() == "100" + + marg_args = (run / "args_marg_eos.txt").read_text().strip() + assert marg_args == "--outdir Gaussian_example --conforming-output-name", marg_args + + post = (run / "args_eos_post.txt").read_text() + for needle in ( + "--parameter x", "--parameter y", "--parameter z", + "--integration-parameter-range x:[-8,8]", + "--integration-parameter-range y:[-8,8]", + "--integration-parameter-range z:[-8,8]", + ): + assert needle in post, (needle, post) + + full = proc.stdout + for needle in ( + "create_eos_posterior_pipeline", + "--n-samples-per-job 1000", + "--n-iterations 20", + "--marg-event-exe-list-file", + "--marg-event-args-list-file", + "--marg-event-nchunk-list-file", + "--eos-post-args", + "--eos-post-exe", + "--puff-exe", + "--puff-args", + "--test-args", + "--test-exe convergence_test_samples", + "--use-osg", + "--use-singularity", + "--condor-local-nonworker", + "--condor-local-nonworker-igwn-prefix", + "--general-retries 5", + "--general-request-disk 2G", + "--request-memory-marg 16384", + "--eos-post-explode-jobs 5", + "--transfer-file-list", + "--event-file", + ): + assert needle in full, f"missing flag in dry-run command line: {needle}" diff --git a/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_marg_list.py b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_marg_list.py new file mode 100644 index 00000000..1c61ea44 --- /dev/null +++ b/MonteCarloMarginalizeCode/Code/test/hyperpipe/tests/test_marg_list.py @@ -0,0 +1,100 @@ +""" +Unit tests for RIFT.hyperpipe.marg_list. +""" +from __future__ import annotations + +import os +from pathlib import Path + +import pytest + + +def _make_fake_exe(p: Path) -> None: + p.write_text("#!/usr/bin/env python\n") + os.chmod(p, 0o755) + + +def test_mono_gaussian_assembly(hp_modules, tmp_path): + base = tmp_path / "base" + run = tmp_path / "run" + base.mkdir() + run.mkdir() + _make_fake_exe(base / "example_gaussian.py") + + cfg = { + "marg-list": [ + { + "name": "gaussian", + "exe": "example_gaussian.py", + "args": "--outdir Gaussian_example --conforming-output-name", + "event-file": None, + "n-chunk": 100, + "coord-module": None, + } + ] + } + marg = hp_modules.marg_list.assemble_marg_list( + cfg, base_dir=str(base), run_dir=str(run) + ) + + assert marg.names == ["gaussian"] + assert marg.n_chunks == [100] + assert marg.args_lines == ["--outdir Gaussian_example --conforming-output-name"] + assert Path(marg.exe_paths[0]).name == "example_gaussian.py" + assert (run / "event-0.net").read_text().strip() == "empty_event_file" + + marg.write_args_file(str(run / "args_marg_eos.txt")) + marg.write_exe_file(str(run / "args_marg_eos_exe.txt")) + marg.write_nchunk_file(str(run / "event_nchunk.txt")) + assert (run / "args_marg_eos.txt").read_text().strip() == ( + "--outdir Gaussian_example --conforming-output-name" + ) + assert (run / "event_nchunk.txt").read_text().strip() == "100" + + +def test_heterogeneous_nicer_plus_gw(hp_modules, tmp_path): + """Two drivers, different batch sizes, per-driver coord override.""" + base = tmp_path / "base" + run = tmp_path / "run" + base.mkdir() + run.mkdir() + _make_fake_exe(base / "reading_NICER_MR.py") + _make_fake_exe(base / "util_ConstructIntrinsicPosterior_GenericCoordinates.py") + (base / "my_event_B.net").write_text("# lnL sigma_lnL m1 m2\n0 0 1.4 1.4\n") + + cfg = { + "marg-list": [ + { + "name": "nicer", + "exe": "reading_NICER_MR.py", + "args": "--j0740 --j0030 --conforming-output-name", + "event-file": None, + "n-chunk": 5, + }, + { + "name": "gw", + "exe": "util_ConstructIntrinsicPosterior_GenericCoordinates.py", + "args": "--eos-param spectral --aligned-prior alignedspin-zprior", + "event-file": "my_event_B.net", + "n-chunk": 1, + "coord-module": "rift_default", + }, + ] + } + marg = hp_modules.marg_list.assemble_marg_list( + cfg, base_dir=str(base), run_dir=str(run) + ) + assert marg.n_chunks == [5, 1] + assert marg.names == ["nicer", "gw"] + assert "--supplementary-coordinate-code rift_default" in marg.args_lines[1] + assert "--supplementary-coordinate-code" not in marg.args_lines[0] + assert (run / "event-0.net").read_text().strip() == "empty_event_file" + assert "m1 m2" in (run / "event-1.net").read_text() + + +def test_missing_exe_raises(hp_modules, tmp_path): + cfg = {"marg-list": [{"name": "x"}]} + with pytest.raises(ValueError, match="missing required key 'exe'"): + hp_modules.marg_list.assemble_marg_list( + cfg, base_dir=str(tmp_path), run_dir=str(tmp_path) + )