From d2a75ef5862dbc67a26f98f64a7d478d663286ae Mon Sep 17 00:00:00 2001 From: Louis-Frey Date: Tue, 21 Apr 2026 15:32:35 +0200 Subject: [PATCH 01/29] Add inference + experiment configs for new ICON-REA-L interpolator Co-authored-by: Francesco Zanetta <62377868+frazane@users.noreply.github.com> Co-authored-by: Michele Cattaneo <44707621+MicheleCattaneo@users.noreply.github.com> Co-authored-by: Hugues de Laroussilhe --- ...ich1-oper-try-interpolator_for-Santis.yaml | 72 ++++++ ...rs-ich1-oper-try-interpolator_initial.yaml | 68 +++++ ...-oper-try-interpolator_minimal_Claude.yaml | 68 +++++ debugging_summary_interpolator_2.txt | 195 ++++++++++++++ ...gm-interpolator-global_trimedge_multi.yaml | 82 ++++++ .../sgm-interpolator-ich1-oper-patch.yaml | 241 ++++++++++++++++++ 6 files changed, 726 insertions(+) create mode 100644 config/forecasters-ich1-oper-try-interpolator_for-Santis.yaml create mode 100644 config/forecasters-ich1-oper-try-interpolator_initial.yaml create mode 100644 config/forecasters-ich1-oper-try-interpolator_minimal_Claude.yaml create mode 100644 debugging_summary_interpolator_2.txt create mode 100644 resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml create mode 100644 resources/inference/metadata/sgm-interpolator-ich1-oper-patch.yaml diff --git a/config/forecasters-ich1-oper-try-interpolator_for-Santis.yaml b/config/forecasters-ich1-oper-try-interpolator_for-Santis.yaml new file mode 100644 index 00000000..729bc100 --- /dev/null +++ b/config/forecasters-ich1-oper-try-interpolator_for-Santis.yaml @@ -0,0 +1,72 @@ +# yaml-language-server: $schema=../workflow/tools/config.schema.json +description: | + Evaluate skill of new time-interpolator (trained on ICON-REA-L, fine-tuned on KENDA-ICON-CH1) + driven by ICON-CH1 stage_E forecaster, using anemoi-inference patch from issue #482. + VERSION FOR SANTIS -- includes fixes for issues 2 (anemoi-datasets) and 3 (runner rename). + Does NOT include Balfrin-specific path remapping. + NOTE: requires sgm-interpolator-global_trimedge_multi.yaml inference config (runner: time_multi_interpolator). + NOTE: may also need dataset date-range fix (issue 6) if training datasets end at 2024-12-31 on Santis. + +dates: + start: 2025-03-01T00:00 + end: 2025-03-02T00:00 + frequency: 24h + + +runs: + + - interpolator: + inference_resources: + slurm_partition: normal-shared + checkpoint: https://mlflow.ecmwf.int/#/experiments/456/runs/f9279244ed6f4c458597bdcf335ab36f + label: interpolator_ICON-REA-L + steps: 0/120/1 + config: resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml + forecaster: + checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/fd63e17043014af59170c7beca516b95 + config: resources/inference/configs/sgm-multidataset-forecaster-global-ich1-oper.yaml + steps: 0/120/6 + extra_requirements: + - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 + extra_requirements: + - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 + - anemoi-datasets==0.5.35 + + +baselines: + - baseline: + baseline_id: ICON-CH2-EPS + label: ICON-CH2-ctrl + root: /scratch/mch/cmerker/ICON-CH2-EPS + steps: 0/120/6 + +truth: + label: KENDA-CH1 + root: /store_new/mch/msopr/ml/datasets/mch-ich1-1km-2024-2025-1h-pl13-v1.0.zarr + +stratification: + regions: + - jura + - mittelland + - voralpen + - alpennordhang + - innerealpentaeler + - alpensuedseite + root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 + +locations: + output_root: output/ + +profile: + executor: slurm + global_resources: + gpus: 16 + default_resources: + slurm_partition: "postproc" + cpus_per_task: 1 + mem_mb_per_cpu: 1800 + runtime: "1h" + gpus: 0 + jobs: 50 + batch_rules: + plot_forecast_frame: 32 diff --git a/config/forecasters-ich1-oper-try-interpolator_initial.yaml b/config/forecasters-ich1-oper-try-interpolator_initial.yaml new file mode 100644 index 00000000..7f11199f --- /dev/null +++ b/config/forecasters-ich1-oper-try-interpolator_initial.yaml @@ -0,0 +1,68 @@ +# yaml-language-server: $schema=../workflow/tools/config.schema.json +description: | + Evaluate skill of new time-interpolator (trained on ICON-REA-L, fine-tuned on KENDA-ICON-CH1) + driven by ICON-CH1 stage_E forecaster, using anemoi-inference patch from issue #482. + INITIAL VERSION -- before any debugging fixes. + +dates: + start: 2025-03-01T00:00 + end: 2025-03-02T00:00 + frequency: 24h + + +runs: + + - interpolator: + inference_resources: + slurm_partition: normal-shared + checkpoint: https://mlflow.ecmwf.int/#/experiments/456/runs/f9279244ed6f4c458597bdcf335ab36f + label: interpolator_ICON-REA-L + steps: 0/120/1 + config: resources/inference/configs/sgm-interpolator-global_trimedge.yaml + forecaster: + checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/fd63e17043014af59170c7beca516b95 + config: resources/inference/configs/sgm-multidataset-forecaster-global-ich1-oper.yaml + steps: 0/120/6 + extra_requirements: + - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 + extra_requirements: + - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 + + +baselines: + - baseline: + baseline_id: ICON-CH2-EPS + label: ICON-CH2-ctrl + root: /scratch/mch/cmerker/ICON-CH2-EPS + steps: 0/120/6 + +truth: + label: KENDA-CH1 + root: /store_new/mch/msopr/ml/datasets/mch-ich1-1km-2024-2025-1h-pl13-v1.0.zarr + +stratification: + regions: + - jura + - mittelland + - voralpen + - alpennordhang + - innerealpentaeler + - alpensuedseite + root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 + +locations: + output_root: output/ + +profile: + executor: slurm + global_resources: + gpus: 16 + default_resources: + slurm_partition: "postproc" + cpus_per_task: 1 + mem_mb_per_cpu: 1800 + runtime: "1h" + gpus: 0 + jobs: 50 + batch_rules: + plot_forecast_frame: 32 diff --git a/config/forecasters-ich1-oper-try-interpolator_minimal_Claude.yaml b/config/forecasters-ich1-oper-try-interpolator_minimal_Claude.yaml new file mode 100644 index 00000000..74b98dac --- /dev/null +++ b/config/forecasters-ich1-oper-try-interpolator_minimal_Claude.yaml @@ -0,0 +1,68 @@ +# yaml-language-server: $schema=../workflow/tools/config.schema.json +description: | + Evaluate skill of new time-interpolator (trained on ICON-REA-L, fine-tuned on KENDA-ICON-CH1) + driven by ICON-CH1 stage_E forecaster, using anemoi-inference patch from issue #482. + +dates: + start: 2025-03-01T00:00 + end: 2025-03-02T00:00 + frequency: 24h + + +runs: + + - interpolator: + inference_resources: + slurm_partition: normal-shared + checkpoint: /scratch/mch/miccatta/ICON_interpolator_checkpoints/checkpoint_stage-C-interpolator-n320-6hto1h-reduced-variables/f9279244ed6f4c458597bdcf335ab36f/inference-anemoi-by_epoch-epoch_000-step_001000.ckpt + label: interpolator_ICON-REA-L + steps: 0/120/1 + config: resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml + forecaster: + checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/fd63e17043014af59170c7beca516b95 + config: resources/inference/configs/sgm-multidataset-forecaster-global-ich1-oper.yaml + steps: 0/120/6 + extra_requirements: + - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 + extra_requirements: + - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 + - anemoi-datasets==0.5.35 + + +baselines: + - baseline: + baseline_id: ICON-CH2-EPS + label: ICON-CH2-ctrl + root: /scratch/mch/cmerker/ICON-CH2-EPS + steps: 0/120/6 + +truth: + label: KENDA-CH1 + root: /store_new/mch/msopr/ml/datasets/mch-ich1-1km-2024-2025-1h-pl13-v1.0.zarr + +stratification: + regions: + - jura + - mittelland + - voralpen + - alpennordhang + - innerealpentaeler + - alpensuedseite + root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 + +locations: + output_root: output/ + +profile: + executor: slurm + global_resources: + gpus: 16 + default_resources: + slurm_partition: "postproc" + cpus_per_task: 1 + mem_mb_per_cpu: 1800 + runtime: "1h" + gpus: 0 + jobs: 50 + batch_rules: + plot_forecast_frame: 32 diff --git a/debugging_summary_interpolator_2.txt b/debugging_summary_interpolator_2.txt new file mode 100644 index 00000000..f2bee469 --- /dev/null +++ b/debugging_summary_interpolator_2.txt @@ -0,0 +1,195 @@ +Debugging summary: running the new ICON-REA-L interpolator in evalML on Balfrin +================================================================================ + +Interpolator checkpoint: https://mlflow.ecmwf.int/#/experiments/456/runs/f9279244ed6f4c458597bdcf335ab36f + Local path: /scratch/mch/miccatta/ICON_interpolator_checkpoints/checkpoint_stage-C-interpolator-n320-6hto1h-reduced-variables/f9279244ed6f4c458597bdcf335ab36f/inference-anemoi-by_epoch-epoch_000-step_001000.ckpt +Forecaster checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/fd63e17043014af59170c7beca516b95 +anemoi-inference patch: https://github.com/ecmwf/anemoi-inference/issues/482 (commit e369b1a) + +The interpolator checkpoint was trained on Santis. Running it on Balfrin required +fixing 10 issues. Issues 1, 5, 6, 7 are Balfrin-specific (cross-site portability). +Issues 2, 3, 4, 8, 9, 10 affect both Santis and Balfrin. + +Status as of 2026-04-16 17:30: inference runs to completion (all 20 interpolation +windows, GRIB output written). The remaining failure is in the evalML verification +step (verif_metrics), not in inference itself. See issue 10. + + +Issue 1: Checkpoint not accessible via MLflow [Balfrin-only] +------------------------------------------------------------ +The MLflow script resolved the checkpoint URL to a path on miccatta's home dir: + /users/miccatta/symlinks-scratch/anemoi-outputs/checkpoint_stage-C-interpolator-n320-6hto1h-reduced-variables/... +This path does not exist on Balfrin (likely a broken symlink or permissions issue). + +Fix: Use the local checkpoint path directly in the evalML config: + /scratch/mch/miccatta/ICON_interpolator_checkpoints/... + + +Issue 2: Bad git hash for anemoi-datasets in checkpoint metadata [Both] +----------------------------------------------------------------------- +The auto-extracted requirements from the checkpoint contained a bogus git commit +hash for anemoi-datasets (d2e9f8c7...) that doesn't exist in the public repo. + +Root cause: https://github.com/ecmwf/anemoi-utils/issues/284 +When .venv is inside a git repo, gather_provenance_info() incorrectly records the +parent repo's SHA for almost all packages. Fix PR: https://github.com/ecmwf/anemoi-utils/pull/285 + +Fix: Add `anemoi-datasets==0.5.35` to the interpolator's extra_requirements in +the evalML config, which overrides the broken git hash with the correct PyPI release. + + +Issue 3: Runner renamed in anemoi-inference [Both] +-------------------------------------------------- +The anemoi-inference version from the #482 patch (commit e369b1a) does not have a +`time-interpolator` runner. It was renamed to `time-multi-interpolator`. + +Available runners: default, external-graph, time-multi-interpolator, parallel, +plugin, simple, testing, no-model. + +Fix: Created a new inference config (sgm-interpolator-global_trimedge_multi.yaml) +based on sgm-interpolator-global_trimedge.yaml with: + runner: time_multi_interpolator + + +Issue 4: patch_metadata path convention in inference config [Both] +------------------------------------------------------------------ +The evalML workflow copies files from resources/inference/metadata/ into the run's +resources/ directory. So the inference config must reference patch files as: + patch_metadata: resources/sgm-interpolator-ich1-oper-patch.yaml +NOT: + patch_metadata: resources/inference/metadata/sgm-interpolator-ich1-oper-patch.yaml + +Fix: Corrected the path in the inference config. + + +Issue 5: Training dataset paths from Santis not available on Balfrin [Balfrin-only] +----------------------------------------------------------------------------------- +The checkpoint metadata embeds the training dataset paths from Santis: + /capstor/store/mch/msopr/ml/datasets/mch-realch1-fdb-1km-2005-2025-1h-pl13-ifsnames-1h-precip-v1.0.zarr + /capstor/store/mch/msopr/ml/datasets/aifs-ea-an-oper-0001-mars-n320-1979-2024-1h-v2-with-era51.zarr + +/capstor/store/mch is not mounted on Balfrin. The same datasets exist under /store_new/mch/... + +Fix: Created a patch_metadata file (resources/inference/metadata/sgm-interpolator-ich1-oper-patch.yaml) +that remaps the dataset paths -- following the same pattern as the forecaster's +sgm-multidataset-ich1-oper-patch.yaml. + + +Issue 6: Training datasets don't cover experiment dates (2025) [Balfrin-only] +----------------------------------------------------------------------------- +The original training datasets (mch-realch1-fdb-..., aifs-ea-...) only have data +through 2024-12-31, but the experiment starts 2025-03-01. The constant forcings +loader requires the forecast date to exist in the dataset, even for time-invariant +fields (z, lsm). + +Fix: In the patch_metadata, swapped to datasets that cover 2025: + mch-ich1-1km-2024-2025-1h-pl13-ifsnames-v1.0.zarr (LAM, 1h) + aifs-od-an-oper-0001-mars-n320-2016-2025-6h-v1-combined-land.zarr (global, 6h) + +Note: On Santis this may also be an issue if the training datasets end at 2024-12-31. + + +Issue 7: Frequency mismatch between LAM (1h) and global (6h) datasets [Balfrin-only] +------------------------------------------------------------------------------------- +The replacement global dataset (aifs-od-...) is 6-hourly, but the checkpoint +metadata specifies frequency=1h. The dataset open call fails with: + "Requested frequency 1h is not a multiple of the dataset frequency 6:00:00" + +This only affects constant forcings (z, lsm) -- the main input comes from GRIB files. + +Fix: Override frequency to 6h in the patch_metadata at the correct nesting level: + config.dataloader.test.datasets.data.dataset_config.frequency: 6h + +Important: the frequency field must be inside dataset_config (alongside dataset), +NOT alongside start/end (which are at the dataloader level). The deep merge +applies at the dataset_config level. + + +Issue 8: No source0/trimedge_mask in checkpoint [Both] +------------------------------------------------------ +The inference config (copied from the CO2 interpolator) referenced a +source0/trimedge_mask supporting array for pre/post-processing of boundary points. +This checkpoint was NOT trained with edge trimming -- the array doesn't exist. + +Available supporting arrays: latitudes, longitudes, lam_0/cutout_mask, +global/cutout_mask, source0/latitudes, source0/longitudes, source1/latitudes, +source1/longitudes. + +Fix: Removed both the extract_mask pre-processor and assign_mask post-processor +that referenced source0/trimedge_mask from the inference config. + + +Issue 9: GRIB output template mismatch [Both] +---------------------------------------------- +Two sub-issues: + +a) The LAM output used COSMO templates (templates_index_cosmo.yaml) which match on + grid=0.02 or grid=0.01. This checkpoint outputs on an ICON-CH1 grid. + Fix: Switched to templates_index_icon.yaml. + +b) Even with ICON templates, the template lookup failed because the output variables + use IFS naming (10u, 2t, msl...) but the ICON templates match on ICON param names + (U_10M, T_2M, PMSL...). The forecaster works because its patch_metadata includes + a variables_metadata section that maps IFS names to ICON GRIB params. + Fix: Added the full variables_metadata section (copied from the forecaster's + sgm-multidataset-ich1-oper-patch.yaml) to the interpolator's patch_metadata. + + +Issue 10: Assertion error in verbose output printing [Both] +----------------------------------------------------------- +With verbosity=1, the _print_output_tensor method asserts that the first tensor +dimension is 1 or multi_step_input (2), but the multi-output interpolator produces +6 time steps. This is a bug in anemoi-inference (the verbose printing code doesn't +account for multi-output interpolators). + +Fix: Set verbosity: 0 in the inference config to skip the debug tensor printing. + +Note: This should be reported as a bug to anemoi-inference. + + +CURRENT STATUS: Verification step failure (not an inference issue) +------------------------------------------------------------------ +The interpolator inference now runs to completion (all 20 windows, 120h lead time, +hourly output). GRIB files are written for both LAM and IFS grids. + +The remaining failure is in evalML's verif_metrics step: + AttributeError: 'Dataset' object has no attribute 'ref_time' +The verification reader can't parse the output GRIB files -- likely missing +reference time metadata in the GRIB encoding. This needs investigation in the +evalML verification code or the GRIB encoding configuration. + + +Files created/modified +---------------------- +New files: + resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml + - Inference config for the new interpolator + - runner: time_multi_interpolator + - No trimedge pre/post-processors + - ICON templates instead of COSMO + - verbosity: 0 + + resources/inference/metadata/sgm-interpolator-ich1-oper-patch.yaml + - Checkpoint metadata patch for Balfrin + - Remaps dataset paths from /capstor/store/mch to /store_new/mch + - Uses 2025-covering datasets for constant forcings + - Overrides frequency to 6h (for the 6h global dataset) + - Full variables_metadata for IFS-to-ICON param name mapping + + config/forecasters-ich1-oper-try-interpolator_minimal_Claude.yaml + - evalML experiment config for Balfrin + - Local checkpoint path + - anemoi-datasets==0.5.35 override + + config/forecasters-ich1-oper-try-interpolator_initial.yaml + - The initial config before any fixes (for reference) + + config/forecasters-ich1-oper-try-interpolator_for-Santis.yaml + - Config for running on Santis (no Balfrin-specific path remapping) + - Includes fixes for issues 2, 3 (universal issues) + + debugging_summary_interpolator.txt + - First version of this summary (covers issues 1-6) + + debugging_summary_interpolator_2.txt + - This file (covers all 10 issues + current status) diff --git a/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml b/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml new file mode 100644 index 00000000..0bbb5fbc --- /dev/null +++ b/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml @@ -0,0 +1,82 @@ +runner: time_multi_interpolator + +input: + cutout: + - lam_0: + grib: + path: forecaster/20* + namer: &namer + rules: + - - shortName: T + - t_{level} + - - shortName: U + - u_{level} + - - shortName: V + - v_{level} + - - shortName: W + - w_{level} + - - shortName: QV + - q_{level} + - - shortName: FI + - z_{level} + - - shortName: PMSL + - msl + - - shortName: FIS + - z + - - shortName: PS + - sp + - - shortName: T_2M + - 2t + - - shortName: TD_2M + - 2d + - - shortName: T_G + - skt + - - shortName: U_10M + - 10u + - - shortName: V_10M + - 10v + - - shortName: FR_LAND + - lsm + - - shortName: TOT_PREC + - tp + - global: + grib: + path: forecaster/ifs* + namer: *namer + +constant_forcings: + test: + use_original_paths: true + +patch_metadata: resources/sgm-interpolator-ich1-oper-patch.yaml + +output: + tee: + - grib: + path: grib/{dateTime}_{step:03}.grib + encoding: + typeOfGeneratingProcess: 2 + templates: + samples: resources/templates_index_icon.yaml + post_processors: + - extract_mask: # removes global points + mask: "lam_0/cutout_mask" + as_slice: true + + - grib: + path: grib/ifs-{dateTime}_{step:03}.grib + encoding: + typeOfGeneratingProcess: 2 + templates: + samples: resources/templates_index_ifs.yaml + post_processors: + - extract_mask: # removes lam points + mask: "lam_0/cutout_mask" + as_slice: true + inverse: true + - assign_mask: # fill local/global overlapping points with nan + mask: "global/cutout_mask" + +verbosity: 0 +allow_nans: true +output_frequency: "1h" diff --git a/resources/inference/metadata/sgm-interpolator-ich1-oper-patch.yaml b/resources/inference/metadata/sgm-interpolator-ich1-oper-patch.yaml new file mode 100644 index 00000000..b1dde369 --- /dev/null +++ b/resources/inference/metadata/sgm-interpolator-ich1-oper-patch.yaml @@ -0,0 +1,241 @@ +config: + dataloader: + test: + datasets: + data: + dataset_config: + dataset: + cutout: + - dataset: /store_new/mch/msopr/ml/datasets/mch-ich1-1km-2024-2025-1h-pl13-ifsnames-v1.0.zarr + - dataset: /store_new/mch/msopr/ml/datasets/aifs-od-an-oper-0001-mars-n320-2016-2025-6h-v1-combined-land.zarr + frequency: 6h + start: null + end: null + +dataset: + data: + constant_fields: [z, lsm] + variables_metadata: + 10u: + mars: + date: 20050101 + levtype: sfc + param: U_10M + step: 12 + time: 0 + 10v: + mars: + date: 20050101 + levtype: sfc + param: V_10M + step: 12 + time: 0 + 2d: + mars: + date: 20050101 + levtype: sfc + param: TD_2M + step: 12 + time: 0 + 2t: + mars: + date: 20050101 + levtype: sfc + param: T_2M + step: 12 + time: 0 + cos_julian_day: + computed_forcing: true + constant_in_time: false + cos_latitude: + computed_forcing: true + constant_in_time: true + cos_local_time: + computed_forcing: true + constant_in_time: false + cos_longitude: + computed_forcing: true + constant_in_time: true + insolation: + computed_forcing: true + constant_in_time: false + lsm: + constant_in_time: true + mars: + date: 20050101 + levtype: sfc + param: FR_LAND + step: 0 + time: 12 + msl: + mars: + date: 20050101 + levtype: sfc + param: PMSL + step: 12 + time: 0 + q_100: + mars: {date: 20050101, levelist: 100, levtype: pl, param: QV, step: 12, time: 0} + q_1000: + mars: {date: 20050101, levelist: 1000, levtype: pl, param: QV, step: 12, time: 0} + q_150: + mars: {date: 20050101, levelist: 150, levtype: pl, param: QV, step: 12, time: 0} + q_200: + mars: {date: 20050101, levelist: 200, levtype: pl, param: QV, step: 12, time: 0} + q_250: + mars: {date: 20050101, levelist: 250, levtype: pl, param: QV, step: 12, time: 0} + q_300: + mars: {date: 20050101, levelist: 300, levtype: pl, param: QV, step: 12, time: 0} + q_400: + mars: {date: 20050101, levelist: 400, levtype: pl, param: QV, step: 12, time: 0} + q_50: + mars: {date: 20050101, levelist: 50, levtype: pl, param: QV, step: 12, time: 0} + q_500: + mars: {date: 20050101, levelist: 500, levtype: pl, param: QV, step: 12, time: 0} + q_700: + mars: {date: 20050101, levelist: 700, levtype: pl, param: QV, step: 12, time: 0} + q_850: + mars: {date: 20050101, levelist: 850, levtype: pl, param: QV, step: 12, time: 0} + q_925: + mars: {date: 20050101, levelist: 925, levtype: pl, param: QV, step: 12, time: 0} + sin_julian_day: + computed_forcing: true + constant_in_time: false + sin_latitude: + computed_forcing: true + constant_in_time: true + sin_local_time: + computed_forcing: true + constant_in_time: false + sin_longitude: + computed_forcing: true + constant_in_time: true + sp: + mars: + date: 20050101 + levtype: sfc + param: PS + step: 12 + time: 0 + skt: + mars: + date: 20050101 + levtype: sfc + param: T_G + step: 12 + time: 0 + t_100: + mars: {date: 20050101, levelist: 100, levtype: pl, param: T, step: 12, time: 0} + t_1000: + mars: {date: 20050101, levelist: 1000, levtype: pl, param: T, step: 12, time: 0} + t_150: + mars: {date: 20050101, levelist: 150, levtype: pl, param: T, step: 12, time: 0} + t_200: + mars: {date: 20050101, levelist: 200, levtype: pl, param: T, step: 12, time: 0} + t_250: + mars: {date: 20050101, levelist: 250, levtype: pl, param: T, step: 12, time: 0} + t_300: + mars: {date: 20050101, levelist: 300, levtype: pl, param: T, step: 12, time: 0} + t_400: + mars: {date: 20050101, levelist: 400, levtype: pl, param: T, step: 12, time: 0} + t_50: + mars: {date: 20050101, levelist: 50, levtype: pl, param: T, step: 12, time: 0} + t_500: + mars: {date: 20050101, levelist: 500, levtype: pl, param: T, step: 12, time: 0} + t_700: + mars: {date: 20050101, levelist: 700, levtype: pl, param: T, step: 12, time: 0} + t_850: + mars: {date: 20050101, levelist: 850, levtype: pl, param: T, step: 12, time: 0} + t_925: + mars: {date: 20050101, levelist: 925, levtype: pl, param: T, step: 12, time: 0} + tp: + mars: + date: 20050101 + levtype: sfc + param: TOT_PREC + step: 12 + time: 0 + period: + - 1h + process: accumulation + u_100: + mars: {date: 20050101, levelist: 100, levtype: pl, param: U, step: 12, time: 0} + u_1000: + mars: {date: 20050101, levelist: 1000, levtype: pl, param: U, step: 12, time: 0} + u_150: + mars: {date: 20050101, levelist: 150, levtype: pl, param: U, step: 12, time: 0} + u_200: + mars: {date: 20050101, levelist: 200, levtype: pl, param: U, step: 12, time: 0} + u_250: + mars: {date: 20050101, levelist: 250, levtype: pl, param: U, step: 12, time: 0} + u_300: + mars: {date: 20050101, levelist: 300, levtype: pl, param: U, step: 12, time: 0} + u_400: + mars: {date: 20050101, levelist: 400, levtype: pl, param: U, step: 12, time: 0} + u_50: + mars: {date: 20050101, levelist: 50, levtype: pl, param: U, step: 12, time: 0} + u_500: + mars: {date: 20050101, levelist: 500, levtype: pl, param: U, step: 12, time: 0} + u_700: + mars: {date: 20050101, levelist: 700, levtype: pl, param: U, step: 12, time: 0} + u_850: + mars: {date: 20050101, levelist: 850, levtype: pl, param: U, step: 12, time: 0} + u_925: + mars: {date: 20050101, levelist: 925, levtype: pl, param: U, step: 12, time: 0} + v_100: + mars: {date: 20050101, levelist: 100, levtype: pl, param: V, step: 12, time: 0} + v_1000: + mars: {date: 20050101, levelist: 1000, levtype: pl, param: V, step: 12, time: 0} + v_150: + mars: {date: 20050101, levelist: 150, levtype: pl, param: V, step: 12, time: 0} + v_200: + mars: {date: 20050101, levelist: 200, levtype: pl, param: V, step: 12, time: 0} + v_250: + mars: {date: 20050101, levelist: 250, levtype: pl, param: V, step: 12, time: 0} + v_300: + mars: {date: 20050101, levelist: 300, levtype: pl, param: V, step: 12, time: 0} + v_400: + mars: {date: 20050101, levelist: 400, levtype: pl, param: V, step: 12, time: 0} + v_50: + mars: {date: 20050101, levelist: 50, levtype: pl, param: V, step: 12, time: 0} + v_500: + mars: {date: 20050101, levelist: 500, levtype: pl, param: V, step: 12, time: 0} + v_700: + mars: {date: 20050101, levelist: 700, levtype: pl, param: V, step: 12, time: 0} + v_850: + mars: {date: 20050101, levelist: 850, levtype: pl, param: V, step: 12, time: 0} + v_925: + mars: {date: 20050101, levelist: 925, levtype: pl, param: V, step: 12, time: 0} + z: + constant_in_time: true + mars: + date: 20050101 + levelist: null + levtype: sfc + param: FIS + step: 0 + time: 12 + z_100: + mars: {date: 20050101, levelist: 100, levtype: pl, param: FI, step: 12, time: 0} + z_1000: + mars: {date: 20050101, levelist: 1000, levtype: pl, param: FI, step: 12, time: 0} + z_150: + mars: {date: 20050101, levelist: 150, levtype: pl, param: FI, step: 12, time: 0} + z_200: + mars: {date: 20050101, levelist: 200, levtype: pl, param: FI, step: 12, time: 0} + z_250: + mars: {date: 20050101, levelist: 250, levtype: pl, param: FI, step: 12, time: 0} + z_300: + mars: {date: 20050101, levelist: 300, levtype: pl, param: FI, step: 12, time: 0} + z_400: + mars: {date: 20050101, levelist: 400, levtype: pl, param: FI, step: 12, time: 0} + z_50: + mars: {date: 20050101, levelist: 50, levtype: pl, param: FI, step: 12, time: 0} + z_500: + mars: {date: 20050101, levelist: 500, levtype: pl, param: FI, step: 12, time: 0} + z_700: + mars: {date: 20050101, levelist: 700, levtype: pl, param: FI, step: 12, time: 0} + z_850: + mars: {date: 20050101, levelist: 850, levtype: pl, param: FI, step: 12, time: 0} + z_925: + mars: {date: 20050101, levelist: 925, levtype: pl, param: FI, step: 12, time: 0} From f60a9ab71c107baf013cb52c789e3a5d51e545c6 Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Wed, 22 Apr 2026 08:29:42 +0200 Subject: [PATCH 02/29] Renamed interpolator config and switch to hourly baseline --- ...interpolator_minimal_Claude.yaml => interpolators-ich1.yaml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename config/{forecasters-ich1-oper-try-interpolator_minimal_Claude.yaml => interpolators-ich1.yaml} (98%) diff --git a/config/forecasters-ich1-oper-try-interpolator_minimal_Claude.yaml b/config/interpolators-ich1.yaml similarity index 98% rename from config/forecasters-ich1-oper-try-interpolator_minimal_Claude.yaml rename to config/interpolators-ich1.yaml index 74b98dac..ba6544d6 100644 --- a/config/forecasters-ich1-oper-try-interpolator_minimal_Claude.yaml +++ b/config/interpolators-ich1.yaml @@ -34,7 +34,7 @@ baselines: baseline_id: ICON-CH2-EPS label: ICON-CH2-ctrl root: /scratch/mch/cmerker/ICON-CH2-EPS - steps: 0/120/6 + steps: 0/120/1 truth: label: KENDA-CH1 From 5f86d81877ea7abd43e3ade2c1eb9918b1c2b6ae Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Wed, 22 Apr 2026 08:32:03 +0200 Subject: [PATCH 03/29] Remove intermediate configs --- ...ich1-oper-try-interpolator_for-Santis.yaml | 72 ------------------- ...rs-ich1-oper-try-interpolator_initial.yaml | 68 ------------------ 2 files changed, 140 deletions(-) delete mode 100644 config/forecasters-ich1-oper-try-interpolator_for-Santis.yaml delete mode 100644 config/forecasters-ich1-oper-try-interpolator_initial.yaml diff --git a/config/forecasters-ich1-oper-try-interpolator_for-Santis.yaml b/config/forecasters-ich1-oper-try-interpolator_for-Santis.yaml deleted file mode 100644 index 729bc100..00000000 --- a/config/forecasters-ich1-oper-try-interpolator_for-Santis.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# yaml-language-server: $schema=../workflow/tools/config.schema.json -description: | - Evaluate skill of new time-interpolator (trained on ICON-REA-L, fine-tuned on KENDA-ICON-CH1) - driven by ICON-CH1 stage_E forecaster, using anemoi-inference patch from issue #482. - VERSION FOR SANTIS -- includes fixes for issues 2 (anemoi-datasets) and 3 (runner rename). - Does NOT include Balfrin-specific path remapping. - NOTE: requires sgm-interpolator-global_trimedge_multi.yaml inference config (runner: time_multi_interpolator). - NOTE: may also need dataset date-range fix (issue 6) if training datasets end at 2024-12-31 on Santis. - -dates: - start: 2025-03-01T00:00 - end: 2025-03-02T00:00 - frequency: 24h - - -runs: - - - interpolator: - inference_resources: - slurm_partition: normal-shared - checkpoint: https://mlflow.ecmwf.int/#/experiments/456/runs/f9279244ed6f4c458597bdcf335ab36f - label: interpolator_ICON-REA-L - steps: 0/120/1 - config: resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml - forecaster: - checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/fd63e17043014af59170c7beca516b95 - config: resources/inference/configs/sgm-multidataset-forecaster-global-ich1-oper.yaml - steps: 0/120/6 - extra_requirements: - - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 - extra_requirements: - - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 - - anemoi-datasets==0.5.35 - - -baselines: - - baseline: - baseline_id: ICON-CH2-EPS - label: ICON-CH2-ctrl - root: /scratch/mch/cmerker/ICON-CH2-EPS - steps: 0/120/6 - -truth: - label: KENDA-CH1 - root: /store_new/mch/msopr/ml/datasets/mch-ich1-1km-2024-2025-1h-pl13-v1.0.zarr - -stratification: - regions: - - jura - - mittelland - - voralpen - - alpennordhang - - innerealpentaeler - - alpensuedseite - root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 - -locations: - output_root: output/ - -profile: - executor: slurm - global_resources: - gpus: 16 - default_resources: - slurm_partition: "postproc" - cpus_per_task: 1 - mem_mb_per_cpu: 1800 - runtime: "1h" - gpus: 0 - jobs: 50 - batch_rules: - plot_forecast_frame: 32 diff --git a/config/forecasters-ich1-oper-try-interpolator_initial.yaml b/config/forecasters-ich1-oper-try-interpolator_initial.yaml deleted file mode 100644 index 7f11199f..00000000 --- a/config/forecasters-ich1-oper-try-interpolator_initial.yaml +++ /dev/null @@ -1,68 +0,0 @@ -# yaml-language-server: $schema=../workflow/tools/config.schema.json -description: | - Evaluate skill of new time-interpolator (trained on ICON-REA-L, fine-tuned on KENDA-ICON-CH1) - driven by ICON-CH1 stage_E forecaster, using anemoi-inference patch from issue #482. - INITIAL VERSION -- before any debugging fixes. - -dates: - start: 2025-03-01T00:00 - end: 2025-03-02T00:00 - frequency: 24h - - -runs: - - - interpolator: - inference_resources: - slurm_partition: normal-shared - checkpoint: https://mlflow.ecmwf.int/#/experiments/456/runs/f9279244ed6f4c458597bdcf335ab36f - label: interpolator_ICON-REA-L - steps: 0/120/1 - config: resources/inference/configs/sgm-interpolator-global_trimedge.yaml - forecaster: - checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/fd63e17043014af59170c7beca516b95 - config: resources/inference/configs/sgm-multidataset-forecaster-global-ich1-oper.yaml - steps: 0/120/6 - extra_requirements: - - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 - extra_requirements: - - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 - - -baselines: - - baseline: - baseline_id: ICON-CH2-EPS - label: ICON-CH2-ctrl - root: /scratch/mch/cmerker/ICON-CH2-EPS - steps: 0/120/6 - -truth: - label: KENDA-CH1 - root: /store_new/mch/msopr/ml/datasets/mch-ich1-1km-2024-2025-1h-pl13-v1.0.zarr - -stratification: - regions: - - jura - - mittelland - - voralpen - - alpennordhang - - innerealpentaeler - - alpensuedseite - root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 - -locations: - output_root: output/ - -profile: - executor: slurm - global_resources: - gpus: 16 - default_resources: - slurm_partition: "postproc" - cpus_per_task: 1 - mem_mb_per_cpu: 1800 - runtime: "1h" - gpus: 0 - jobs: 50 - batch_rules: - plot_forecast_frame: 32 From 5a243804c8c9e31669e01b3d2ed0ea9d5868531f Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Wed, 22 Apr 2026 14:01:54 +0200 Subject: [PATCH 04/29] Add accumulation post-processor to ensure that TOT_PREC is accumulated (as in baseline) --- .../configs/sgm-interpolator-global_trimedge_multi.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml b/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml index 0bbb5fbc..67ba859e 100644 --- a/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml +++ b/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml @@ -62,6 +62,9 @@ output: - extract_mask: # removes global points mask: "lam_0/cutout_mask" as_slice: true + - accumulate_from_start_of_forecast: # accumulate tp from start of forecast + accumulations: + - tp - grib: path: grib/ifs-{dateTime}_{step:03}.grib @@ -76,6 +79,9 @@ output: inverse: true - assign_mask: # fill local/global overlapping points with nan mask: "global/cutout_mask" + - accumulate_from_start_of_forecast: # accumulate tp from start of forecast + accumulations: + - tp verbosity: 0 allow_nans: true From ba0029bba658a2444bef47f142ae8d1f705c23b5 Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Wed, 22 Apr 2026 14:03:44 +0200 Subject: [PATCH 05/29] Update checkpoints and baselines --- config/interpolators-ich1.yaml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/config/interpolators-ich1.yaml b/config/interpolators-ich1.yaml index ba6544d6..1df7f2a7 100644 --- a/config/interpolators-ich1.yaml +++ b/config/interpolators-ich1.yaml @@ -1,7 +1,8 @@ # yaml-language-server: $schema=../workflow/tools/config.schema.json description: | Evaluate skill of new time-interpolator (trained on ICON-REA-L, fine-tuned on KENDA-ICON-CH1) - driven by ICON-CH1 stage_E forecaster, using anemoi-inference patch from issue #482. + driven by ICON-CH1 stage_E forecaster with subgrid orography, using anemoi-inference patch from + issue #482. dates: start: 2025-03-01T00:00 @@ -10,16 +11,15 @@ dates: runs: - - interpolator: inference_resources: slurm_partition: normal-shared - checkpoint: /scratch/mch/miccatta/ICON_interpolator_checkpoints/checkpoint_stage-C-interpolator-n320-6hto1h-reduced-variables/f9279244ed6f4c458597bdcf335ab36f/inference-anemoi-by_epoch-epoch_000-step_001000.ckpt + checkpoint: /store_new/mch/msopr/ml/tmp/inference-last.ckpt label: interpolator_ICON-REA-L steps: 0/120/1 config: resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml forecaster: - checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/fd63e17043014af59170c7beca516b95 + checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/c30490b6ba064e4db03b430f3a2595ad config: resources/inference/configs/sgm-multidataset-forecaster-global-ich1-oper.yaml steps: 0/120/6 extra_requirements: @@ -33,8 +33,13 @@ baselines: - baseline: baseline_id: ICON-CH2-EPS label: ICON-CH2-ctrl - root: /scratch/mch/cmerker/ICON-CH2-EPS + root: /store_new/mch/msopr/ml/ICON-CH2-EPS steps: 0/120/1 + - baseline: + baseline_id: ICON-CH1-EPS + label: ICON-CH1-ctrl + root: /store_new/mch/msopr/ml/ICON-CH1-EPS + steps: 0/33/1 truth: label: KENDA-CH1 From 31fcb657b1a11e0ba78e8a819c9b2fca7e95249e Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Wed, 22 Apr 2026 14:04:14 +0200 Subject: [PATCH 06/29] Fix trailing whitespace --- config/interpolators-ich1.yaml | 2 +- .../configs/sgm-interpolator-global_trimedge_multi.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/interpolators-ich1.yaml b/config/interpolators-ich1.yaml index 1df7f2a7..0eeba0b9 100644 --- a/config/interpolators-ich1.yaml +++ b/config/interpolators-ich1.yaml @@ -1,7 +1,7 @@ # yaml-language-server: $schema=../workflow/tools/config.schema.json description: | Evaluate skill of new time-interpolator (trained on ICON-REA-L, fine-tuned on KENDA-ICON-CH1) - driven by ICON-CH1 stage_E forecaster with subgrid orography, using anemoi-inference patch from + driven by ICON-CH1 stage_E forecaster with subgrid orography, using anemoi-inference patch from issue #482. dates: diff --git a/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml b/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml index 67ba859e..37f7927d 100644 --- a/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml +++ b/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml @@ -63,7 +63,7 @@ output: mask: "lam_0/cutout_mask" as_slice: true - accumulate_from_start_of_forecast: # accumulate tp from start of forecast - accumulations: + accumulations: - tp - grib: @@ -80,7 +80,7 @@ output: - assign_mask: # fill local/global overlapping points with nan mask: "global/cutout_mask" - accumulate_from_start_of_forecast: # accumulate tp from start of forecast - accumulations: + accumulations: - tp verbosity: 0 From 91cdf6136c9ddef65de122a7130246d3b2d1acae Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Wed, 22 Apr 2026 15:59:41 +0200 Subject: [PATCH 07/29] Inlined debugging comments and remove debugging text file --- ...1.yaml => interpolators-ich1-balfrin.yaml} | 5 +- debugging_summary_interpolator_2.txt | 195 ------------------ ...gm-interpolator-global_trimedge_multi.yaml | 2 + .../sgm-interpolator-ich1-oper-patch.yaml | 5 + 4 files changed, 11 insertions(+), 196 deletions(-) rename config/{interpolators-ich1.yaml => interpolators-ich1-balfrin.yaml} (88%) delete mode 100644 debugging_summary_interpolator_2.txt diff --git a/config/interpolators-ich1.yaml b/config/interpolators-ich1-balfrin.yaml similarity index 88% rename from config/interpolators-ich1.yaml rename to config/interpolators-ich1-balfrin.yaml index 0eeba0b9..45a195f5 100644 --- a/config/interpolators-ich1.yaml +++ b/config/interpolators-ich1-balfrin.yaml @@ -6,7 +6,7 @@ description: | dates: start: 2025-03-01T00:00 - end: 2025-03-02T00:00 + end: 2025-03-03T00:00 frequency: 24h @@ -14,6 +14,8 @@ runs: - interpolator: inference_resources: slurm_partition: normal-shared + # for checkpoints trained on a different HPC, using mlflow doesn't work due to difference in + # paths, so we directly specify the checkpoint path here checkpoint: /store_new/mch/msopr/ml/tmp/inference-last.ckpt label: interpolator_ICON-REA-L steps: 0/120/1 @@ -26,6 +28,7 @@ runs: - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 extra_requirements: - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 + # pinned anemoi-datasets because of ecmwf/anemoi-utils#284, can be removed when fixed - anemoi-datasets==0.5.35 diff --git a/debugging_summary_interpolator_2.txt b/debugging_summary_interpolator_2.txt deleted file mode 100644 index f2bee469..00000000 --- a/debugging_summary_interpolator_2.txt +++ /dev/null @@ -1,195 +0,0 @@ -Debugging summary: running the new ICON-REA-L interpolator in evalML on Balfrin -================================================================================ - -Interpolator checkpoint: https://mlflow.ecmwf.int/#/experiments/456/runs/f9279244ed6f4c458597bdcf335ab36f - Local path: /scratch/mch/miccatta/ICON_interpolator_checkpoints/checkpoint_stage-C-interpolator-n320-6hto1h-reduced-variables/f9279244ed6f4c458597bdcf335ab36f/inference-anemoi-by_epoch-epoch_000-step_001000.ckpt -Forecaster checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/fd63e17043014af59170c7beca516b95 -anemoi-inference patch: https://github.com/ecmwf/anemoi-inference/issues/482 (commit e369b1a) - -The interpolator checkpoint was trained on Santis. Running it on Balfrin required -fixing 10 issues. Issues 1, 5, 6, 7 are Balfrin-specific (cross-site portability). -Issues 2, 3, 4, 8, 9, 10 affect both Santis and Balfrin. - -Status as of 2026-04-16 17:30: inference runs to completion (all 20 interpolation -windows, GRIB output written). The remaining failure is in the evalML verification -step (verif_metrics), not in inference itself. See issue 10. - - -Issue 1: Checkpoint not accessible via MLflow [Balfrin-only] ------------------------------------------------------------- -The MLflow script resolved the checkpoint URL to a path on miccatta's home dir: - /users/miccatta/symlinks-scratch/anemoi-outputs/checkpoint_stage-C-interpolator-n320-6hto1h-reduced-variables/... -This path does not exist on Balfrin (likely a broken symlink or permissions issue). - -Fix: Use the local checkpoint path directly in the evalML config: - /scratch/mch/miccatta/ICON_interpolator_checkpoints/... - - -Issue 2: Bad git hash for anemoi-datasets in checkpoint metadata [Both] ------------------------------------------------------------------------ -The auto-extracted requirements from the checkpoint contained a bogus git commit -hash for anemoi-datasets (d2e9f8c7...) that doesn't exist in the public repo. - -Root cause: https://github.com/ecmwf/anemoi-utils/issues/284 -When .venv is inside a git repo, gather_provenance_info() incorrectly records the -parent repo's SHA for almost all packages. Fix PR: https://github.com/ecmwf/anemoi-utils/pull/285 - -Fix: Add `anemoi-datasets==0.5.35` to the interpolator's extra_requirements in -the evalML config, which overrides the broken git hash with the correct PyPI release. - - -Issue 3: Runner renamed in anemoi-inference [Both] --------------------------------------------------- -The anemoi-inference version from the #482 patch (commit e369b1a) does not have a -`time-interpolator` runner. It was renamed to `time-multi-interpolator`. - -Available runners: default, external-graph, time-multi-interpolator, parallel, -plugin, simple, testing, no-model. - -Fix: Created a new inference config (sgm-interpolator-global_trimedge_multi.yaml) -based on sgm-interpolator-global_trimedge.yaml with: - runner: time_multi_interpolator - - -Issue 4: patch_metadata path convention in inference config [Both] ------------------------------------------------------------------- -The evalML workflow copies files from resources/inference/metadata/ into the run's -resources/ directory. So the inference config must reference patch files as: - patch_metadata: resources/sgm-interpolator-ich1-oper-patch.yaml -NOT: - patch_metadata: resources/inference/metadata/sgm-interpolator-ich1-oper-patch.yaml - -Fix: Corrected the path in the inference config. - - -Issue 5: Training dataset paths from Santis not available on Balfrin [Balfrin-only] ------------------------------------------------------------------------------------ -The checkpoint metadata embeds the training dataset paths from Santis: - /capstor/store/mch/msopr/ml/datasets/mch-realch1-fdb-1km-2005-2025-1h-pl13-ifsnames-1h-precip-v1.0.zarr - /capstor/store/mch/msopr/ml/datasets/aifs-ea-an-oper-0001-mars-n320-1979-2024-1h-v2-with-era51.zarr - -/capstor/store/mch is not mounted on Balfrin. The same datasets exist under /store_new/mch/... - -Fix: Created a patch_metadata file (resources/inference/metadata/sgm-interpolator-ich1-oper-patch.yaml) -that remaps the dataset paths -- following the same pattern as the forecaster's -sgm-multidataset-ich1-oper-patch.yaml. - - -Issue 6: Training datasets don't cover experiment dates (2025) [Balfrin-only] ------------------------------------------------------------------------------ -The original training datasets (mch-realch1-fdb-..., aifs-ea-...) only have data -through 2024-12-31, but the experiment starts 2025-03-01. The constant forcings -loader requires the forecast date to exist in the dataset, even for time-invariant -fields (z, lsm). - -Fix: In the patch_metadata, swapped to datasets that cover 2025: - mch-ich1-1km-2024-2025-1h-pl13-ifsnames-v1.0.zarr (LAM, 1h) - aifs-od-an-oper-0001-mars-n320-2016-2025-6h-v1-combined-land.zarr (global, 6h) - -Note: On Santis this may also be an issue if the training datasets end at 2024-12-31. - - -Issue 7: Frequency mismatch between LAM (1h) and global (6h) datasets [Balfrin-only] -------------------------------------------------------------------------------------- -The replacement global dataset (aifs-od-...) is 6-hourly, but the checkpoint -metadata specifies frequency=1h. The dataset open call fails with: - "Requested frequency 1h is not a multiple of the dataset frequency 6:00:00" - -This only affects constant forcings (z, lsm) -- the main input comes from GRIB files. - -Fix: Override frequency to 6h in the patch_metadata at the correct nesting level: - config.dataloader.test.datasets.data.dataset_config.frequency: 6h - -Important: the frequency field must be inside dataset_config (alongside dataset), -NOT alongside start/end (which are at the dataloader level). The deep merge -applies at the dataset_config level. - - -Issue 8: No source0/trimedge_mask in checkpoint [Both] ------------------------------------------------------- -The inference config (copied from the CO2 interpolator) referenced a -source0/trimedge_mask supporting array for pre/post-processing of boundary points. -This checkpoint was NOT trained with edge trimming -- the array doesn't exist. - -Available supporting arrays: latitudes, longitudes, lam_0/cutout_mask, -global/cutout_mask, source0/latitudes, source0/longitudes, source1/latitudes, -source1/longitudes. - -Fix: Removed both the extract_mask pre-processor and assign_mask post-processor -that referenced source0/trimedge_mask from the inference config. - - -Issue 9: GRIB output template mismatch [Both] ----------------------------------------------- -Two sub-issues: - -a) The LAM output used COSMO templates (templates_index_cosmo.yaml) which match on - grid=0.02 or grid=0.01. This checkpoint outputs on an ICON-CH1 grid. - Fix: Switched to templates_index_icon.yaml. - -b) Even with ICON templates, the template lookup failed because the output variables - use IFS naming (10u, 2t, msl...) but the ICON templates match on ICON param names - (U_10M, T_2M, PMSL...). The forecaster works because its patch_metadata includes - a variables_metadata section that maps IFS names to ICON GRIB params. - Fix: Added the full variables_metadata section (copied from the forecaster's - sgm-multidataset-ich1-oper-patch.yaml) to the interpolator's patch_metadata. - - -Issue 10: Assertion error in verbose output printing [Both] ------------------------------------------------------------ -With verbosity=1, the _print_output_tensor method asserts that the first tensor -dimension is 1 or multi_step_input (2), but the multi-output interpolator produces -6 time steps. This is a bug in anemoi-inference (the verbose printing code doesn't -account for multi-output interpolators). - -Fix: Set verbosity: 0 in the inference config to skip the debug tensor printing. - -Note: This should be reported as a bug to anemoi-inference. - - -CURRENT STATUS: Verification step failure (not an inference issue) ------------------------------------------------------------------- -The interpolator inference now runs to completion (all 20 windows, 120h lead time, -hourly output). GRIB files are written for both LAM and IFS grids. - -The remaining failure is in evalML's verif_metrics step: - AttributeError: 'Dataset' object has no attribute 'ref_time' -The verification reader can't parse the output GRIB files -- likely missing -reference time metadata in the GRIB encoding. This needs investigation in the -evalML verification code or the GRIB encoding configuration. - - -Files created/modified ----------------------- -New files: - resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml - - Inference config for the new interpolator - - runner: time_multi_interpolator - - No trimedge pre/post-processors - - ICON templates instead of COSMO - - verbosity: 0 - - resources/inference/metadata/sgm-interpolator-ich1-oper-patch.yaml - - Checkpoint metadata patch for Balfrin - - Remaps dataset paths from /capstor/store/mch to /store_new/mch - - Uses 2025-covering datasets for constant forcings - - Overrides frequency to 6h (for the 6h global dataset) - - Full variables_metadata for IFS-to-ICON param name mapping - - config/forecasters-ich1-oper-try-interpolator_minimal_Claude.yaml - - evalML experiment config for Balfrin - - Local checkpoint path - - anemoi-datasets==0.5.35 override - - config/forecasters-ich1-oper-try-interpolator_initial.yaml - - The initial config before any fixes (for reference) - - config/forecasters-ich1-oper-try-interpolator_for-Santis.yaml - - Config for running on Santis (no Balfrin-specific path remapping) - - Includes fixes for issues 2, 3 (universal issues) - - debugging_summary_interpolator.txt - - First version of this summary (covers issues 1-6) - - debugging_summary_interpolator_2.txt - - This file (covers all 10 issues + current status) diff --git a/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml b/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml index 37f7927d..ae353d4d 100644 --- a/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml +++ b/resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml @@ -65,6 +65,7 @@ output: - accumulate_from_start_of_forecast: # accumulate tp from start of forecast accumulations: - tp + # here, the trimedge mask can be specified when available - grib: path: grib/ifs-{dateTime}_{step:03}.grib @@ -83,6 +84,7 @@ output: accumulations: - tp +# silenced due to bug in anemoi-inference for multi-step interpolators, can be removed when fixed verbosity: 0 allow_nans: true output_frequency: "1h" diff --git a/resources/inference/metadata/sgm-interpolator-ich1-oper-patch.yaml b/resources/inference/metadata/sgm-interpolator-ich1-oper-patch.yaml index b1dde369..1695812c 100644 --- a/resources/inference/metadata/sgm-interpolator-ich1-oper-patch.yaml +++ b/resources/inference/metadata/sgm-interpolator-ich1-oper-patch.yaml @@ -6,8 +6,11 @@ config: dataset_config: dataset: cutout: + # training datasets not available on system due to training on different system + # and/or training period not in dataset - dataset: /store_new/mch/msopr/ml/datasets/mch-ich1-1km-2024-2025-1h-pl13-ifsnames-v1.0.zarr - dataset: /store_new/mch/msopr/ml/datasets/aifs-od-an-oper-0001-mars-n320-2016-2025-6h-v1-combined-land.zarr + # avoid errors from frequency mismatch between LAM (1h) and global (6h) datasets when aligning datasets frequency: 6h start: null end: null @@ -15,6 +18,8 @@ config: dataset: data: constant_fields: [z, lsm] + # variables_metadata is necessary due to missing variables_metadata section in interpolator patch_metdata + # else mapping with grib templates fails variables_metadata: 10u: mars: From 0088c37e85d9745b47403a5cc0ddaa3ca07a8277 Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Thu, 23 Apr 2026 08:52:49 +0200 Subject: [PATCH 08/29] move computation to inner loop to avoid dask graph bloat --- src/verification/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/verification/__init__.py b/src/verification/__init__.py index 97a62505..c6104a4d 100644 --- a/src/verification/__init__.py +++ b/src/verification/__init__.py @@ -216,12 +216,12 @@ def verify( score = xr.concat(score, dim="region") fcst_statistics = xr.concat(fcst_statistics, dim="region") obs_statistics = xr.concat(obs_statistics, dim="region") - statistics.append(xr.concat([fcst_statistics, obs_statistics], dim="source")) - scores.append(score) + param_statistics = xr.concat([fcst_statistics, obs_statistics], dim="source") + # Compute eagerly per parameter to prevent dask graph bloat + scores.append(_merge_metrics([score])) + statistics.append(_merge_metrics([param_statistics])) - scores = _merge_metrics(scores) - statistics = _merge_metrics(statistics) - out = xr.merge([scores, statistics], join="outer", compat="no_conflicts") + out = xr.merge(scores + statistics, join="outer", compat="no_conflicts") LOG.info("Computed metrics in %.2f seconds", time.time() - start) LOG.info("Metrics dataset: \n%s", out) return out From 9b07bd0f366ce3f5cc2e281f706a4bbf804280e4 Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Thu, 23 Apr 2026 09:05:18 +0200 Subject: [PATCH 09/29] Use allocated CPU resources in dataset computation --- src/verification/__init__.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/verification/__init__.py b/src/verification/__init__.py index c6104a4d..9bc043b8 100644 --- a/src/verification/__init__.py +++ b/src/verification/__init__.py @@ -1,4 +1,5 @@ import logging +import os import time from pathlib import Path @@ -122,11 +123,11 @@ def _compute_statistics( return stats -def _merge_metrics(ds: xr.Dataset) -> xr.Dataset: +def _merge_metrics(ds: xr.Dataset, num_workers: int = 4) -> xr.Dataset: out = xr.merge(ds, compat="no_conflicts") if "ref_time" not in out.dims: out = out.expand_dims("ref_time").set_coords("ref_time") - out = out.compute(num_workers=4, scheduler="threads") + out = out.compute(num_workers=num_workers, scheduler="threads") return out @@ -147,6 +148,7 @@ def verify( obs_label: str, regions: list[str] | None = None, dim: list[str] | None = None, + num_workers: int | None = None, ) -> xr.Dataset: """ Compare two xarray Datasets (fcst and obs) and return pandas DataFrame with @@ -154,6 +156,12 @@ def verify( """ start = time.time() + if num_workers is None: + try: + num_workers = len(os.sched_getaffinity(0)) + except AttributeError: + num_workers = max((os.cpu_count() or 6) - 2, 1) + if dim is None: if "x" in fcst.dims and "y" in fcst.dims: dim = ["x", "y"] @@ -218,8 +226,8 @@ def verify( obs_statistics = xr.concat(obs_statistics, dim="region") param_statistics = xr.concat([fcst_statistics, obs_statistics], dim="source") # Compute eagerly per parameter to prevent dask graph bloat - scores.append(_merge_metrics([score])) - statistics.append(_merge_metrics([param_statistics])) + scores.append(_merge_metrics([score], num_workers=num_workers)) + statistics.append(_merge_metrics([param_statistics], num_workers=num_workers)) out = xr.merge(scores + statistics, join="outer", compat="no_conflicts") LOG.info("Computed metrics in %.2f seconds", time.time() - start) From 7b0b2477c8afb762d9bf587a5b88892afe35fdfb Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Thu, 23 Apr 2026 09:28:23 +0200 Subject: [PATCH 10/29] Update config/interpolators-ich1-balfrin.yaml Co-authored-by: Daniele Nerini --- config/interpolators-ich1-balfrin.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/config/interpolators-ich1-balfrin.yaml b/config/interpolators-ich1-balfrin.yaml index 45a195f5..73397e2b 100644 --- a/config/interpolators-ich1-balfrin.yaml +++ b/config/interpolators-ich1-balfrin.yaml @@ -30,9 +30,6 @@ runs: - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 # pinned anemoi-datasets because of ecmwf/anemoi-utils#284, can be removed when fixed - anemoi-datasets==0.5.35 - - -baselines: - baseline: baseline_id: ICON-CH2-EPS label: ICON-CH2-ctrl From 7fcbf8acc4e6a57007c238a9ae45b9b2cb85239b Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Thu, 23 Apr 2026 09:28:38 +0200 Subject: [PATCH 11/29] Update config/interpolators-ich1-balfrin.yaml Co-authored-by: Daniele Nerini --- config/interpolators-ich1-balfrin.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/interpolators-ich1-balfrin.yaml b/config/interpolators-ich1-balfrin.yaml index 73397e2b..5cb8e175 100644 --- a/config/interpolators-ich1-balfrin.yaml +++ b/config/interpolators-ich1-balfrin.yaml @@ -17,7 +17,7 @@ runs: # for checkpoints trained on a different HPC, using mlflow doesn't work due to difference in # paths, so we directly specify the checkpoint path here checkpoint: /store_new/mch/msopr/ml/tmp/inference-last.ckpt - label: interpolator_ICON-REA-L + label: Varda-Single steps: 0/120/1 config: resources/inference/configs/sgm-interpolator-global_trimedge_multi.yaml forecaster: From 2d25a535a5ad8d3d64463aa29824bf8843a24f64 Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Thu, 23 Apr 2026 09:31:51 +0200 Subject: [PATCH 12/29] renamed interpolator config --- .../{interpolators-ich1-balfrin.yaml => interpolators-ich1.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename config/{interpolators-ich1-balfrin.yaml => interpolators-ich1.yaml} (100%) diff --git a/config/interpolators-ich1-balfrin.yaml b/config/interpolators-ich1.yaml similarity index 100% rename from config/interpolators-ich1-balfrin.yaml rename to config/interpolators-ich1.yaml From 24848f1c23d328ac60027a1f31bf6d7dbae90b74 Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Tue, 28 Apr 2026 09:08:21 +0200 Subject: [PATCH 13/29] generalize the plot of meteograms to any location --- src/evalml/config.py | 21 ++++++++++++++++++ workflow/Snakefile | 36 +++++++++++++++++++------------ workflow/rules/plot.smk | 2 +- workflow/tools/config.schema.json | 36 +++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 15 deletions(-) diff --git a/src/evalml/config.py b/src/evalml/config.py index a4eb497b..26fb2346 100644 --- a/src/evalml/config.py +++ b/src/evalml/config.py @@ -208,6 +208,23 @@ class BaselineItem(BaseModel): baseline: BaselineConfig +class ShowcaseConfig(BaseModel): + """Configuration for the showcase workflow.""" + + meteograms: bool = Field( + default=True, + description="Whether to generate meteograms (time series plots at stations).", + ) + animations: bool = Field( + default=True, + description="Whether to generate forecast animations (GIFs per param and region).", + ) + stations: List[str] = Field( + default=["GVE", "KLO", "LUG"], + description="List of PeakWeather station IDs to generate meteograms for.", + ) + + class Locations(BaseModel): """Locations of data and services used in the workflow.""" @@ -318,6 +335,10 @@ class ConfigModel(BaseModel): stratification: Stratification locations: Locations profile: Profile + showcase: ShowcaseConfig = Field( + default_factory=ShowcaseConfig, + description="Settings for the showcase workflow.", + ) model_config = { "extra": "forbid", # fail on misspelled keys diff --git a/workflow/Snakefile b/workflow/Snakefile index fb0cbdf9..a9d67316 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -138,21 +138,29 @@ rule experiment_all: rule showcase_all: """Target rule for showcase workflow.""" input: - expand( - rules.make_forecast_animation.output, - init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], - run_id=CANDIDATES, - param=["T_2M", "SP_10M"], - region=["globe", "europe", "switzerland"], - showcase=EXPERIMENT_NAME, + ( + expand( + rules.make_forecast_animation.output, + init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], + run_id=CANDIDATES, + param=["T_2M", "SP_10M"], + region=["globe", "europe", "switzerland"], + showcase=EXPERIMENT_NAME, + ) + if config["showcase"]["animations"] + else [] ), - expand( - rules.plot_meteogram.output, - init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], - run_id=CANDIDATES, - param=["T_2M", "SP_10M"], - sta=["GVE", "KLO", "LUG"], - showcase=EXPERIMENT_NAME, + ( + expand( + rules.plot_meteogram.output, + init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], + run_id=CANDIDATES, + param=["T_2M", "SP_10M"], + sta=config["showcase"]["stations"], + showcase=EXPERIMENT_NAME, + ) + if config["showcase"]["meteograms"] + else [] ), diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk index 73badb2b..be483c16 100644 --- a/workflow/rules/plot.smk +++ b/workflow/rules/plot.smk @@ -38,7 +38,7 @@ rule plot_meteogram: resources: slurm_partition="postproc", cpus_per_task=1, - runtime="10m", + runtime="60m", params: ana_label=lambda wc: config["truth"]["label"], fcst_grib=lambda wc: ( diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json index 20f65126..75663a79 100644 --- a/workflow/tools/config.schema.json +++ b/workflow/tools/config.schema.json @@ -465,6 +465,38 @@ "title": "Profile", "type": "object" }, + "ShowcaseConfig": { + "description": "Configuration for the showcase workflow.", + "properties": { + "meteograms": { + "default": true, + "description": "Whether to generate meteograms (time series plots at stations).", + "title": "Meteograms", + "type": "boolean" + }, + "animations": { + "default": true, + "description": "Whether to generate forecast animations (GIFs per param and region).", + "title": "Animations", + "type": "boolean" + }, + "stations": { + "default": [ + "GVE", + "KLO", + "LUG" + ], + "description": "List of PeakWeather station IDs to generate meteograms for.", + "items": { + "type": "string" + }, + "title": "Stations", + "type": "array" + } + }, + "title": "ShowcaseConfig", + "type": "object" + }, "Stratification": { "description": "Stratification settings for the analysis.", "properties": { @@ -589,6 +621,10 @@ }, "profile": { "$ref": "#/$defs/Profile" + }, + "showcase": { + "$ref": "#/$defs/ShowcaseConfig", + "description": "Settings for the showcase workflow." } }, "required": [ From f3e1c30cd3cc1ec4f206786d60f036f7173613b9 Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Fri, 1 May 2026 17:12:04 +0200 Subject: [PATCH 14/29] config for regions and params --- src/evalml/config.py | 29 ++++++++++++++++++++++ src/plotting/__init__.py | 9 +++++++ src/plotting/compat.py | 11 ++++++-- workflow/Snakefile | 6 ++--- workflow/rules/common.smk | 21 ++++++++++++++++ workflow/rules/plot.smk | 10 ++++++++ workflow/scripts/plot_forecast_frame.mo.py | 27 ++++++++++++++++++-- 7 files changed, 106 insertions(+), 7 deletions(-) diff --git a/src/evalml/config.py b/src/evalml/config.py index 26fb2346..08610bf8 100644 --- a/src/evalml/config.py +++ b/src/evalml/config.py @@ -208,6 +208,22 @@ class BaselineItem(BaseModel): baseline: BaselineConfig +class RegionConfig(BaseModel): + """A custom map region defined by name, extent, and projection.""" + + name: str = Field(..., description="Name for the custom region (used as wildcard).") + extent: List[float] | None = Field( + None, + description="Geographic extent as [lon_min, lon_max, lat_min, lat_max] in PlateCarree coordinates. None means full globe.", + ) + projection: str = Field( + "orthographic", + description="Projection name (must be a key in plotting._PROJECTIONS, e.g. 'orthographic').", + ) + + model_config = {"extra": "forbid"} + + class ShowcaseConfig(BaseModel): """Configuration for the showcase workflow.""" @@ -219,10 +235,23 @@ class ShowcaseConfig(BaseModel): default=True, description="Whether to generate forecast animations (GIFs per param and region).", ) + params: List[str] = Field( + default=["T_2M", "SP_10M"], + description="List of parameters to generate animations and meteograms for.", + ) stations: List[str] = Field( default=["GVE", "KLO", "LUG"], description="List of PeakWeather station IDs to generate meteograms for.", ) + regions: List[str | RegionConfig] = Field( + default=["globe", "europe", "switzerland"], + description=( + "Regions to generate animations for. Each entry is either a named region " + "(e.g. 'globe', 'europe', 'switzerland') defined in plotting.DOMAINS, " + "or a custom region dict with 'name', optional 'extent' " + "[lon_min, lon_max, lat_min, lat_max], and optional 'projection'." + ), + ) class Locations(BaseModel): diff --git a/src/plotting/__init__.py b/src/plotting/__init__.py index ce5e4e63..810468ad 100644 --- a/src/plotting/__init__.py +++ b/src/plotting/__init__.py @@ -20,6 +20,15 @@ # Mapping of region names to their geographic extent and projection # extent [lon_min, lon_max, lat_min, lat_max] in PlateCarree coordinates +def get_projection(name: str) -> "ccrs.Projection": + """Look up a projection by name.""" + if name not in _PROJECTIONS: + raise ValueError( + f"Unknown projection {name!r}. Available: {list(_PROJECTIONS)}" + ) + return _PROJECTIONS[name] + + DOMAINS = { "globe": { "extent": None, # full globe view diff --git a/src/plotting/compat.py b/src/plotting/compat.py index 665287e0..7c4d14d3 100644 --- a/src/plotting/compat.py +++ b/src/plotting/compat.py @@ -18,8 +18,15 @@ def load_state_from_grib( fds = data_source.FileDataSource(datafiles=[str(file)]) ds = grib_decoder.load(fds, {"param": paramlist}) state = {} - lats = ds[paramlist[0]].lat.data.flatten() - lons = ds[paramlist[0]].lon.data.flatten() + ref_param = next((p for p in (paramlist or []) if p in ds), None) + if ref_param is None: + raise ValueError( + f"None of the requested params {paramlist} found in {file}. " + "The GRIB file may not contain these fields at this lead time " + "(e.g. accumulated fields like TOT_PREC are undefined at step 0)." + ) + lats = ds[ref_param].lat.data.flatten() + lons = ds[ref_param].lon.data.flatten() state["forecast_reference_time"] = reftime state["valid_time"] = reftime + pd.to_timedelta(lead_time_hours, unit="h") state["longitudes"] = lons diff --git a/workflow/Snakefile b/workflow/Snakefile index dd804911..3e564742 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -143,8 +143,8 @@ rule showcase_all: rules.make_forecast_animation.output, init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], run_id=CANDIDATES, - param=["T_2M", "SP_10M"], - region=["globe", "europe", "switzerland"], + param=config["showcase"]["params"], + region=list(SHOWCASE_REGIONS.keys()), showcase=EXPERIMENT_NAME, ) if config["showcase"]["animations"] @@ -155,7 +155,7 @@ rule showcase_all: rules.plot_meteogram.output, init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], run_id=CANDIDATES, - param=["T_2M", "SP_10M"], + param=config["showcase"]["params"], sta=config["showcase"]["stations"], showcase=EXPERIMENT_NAME, ) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index e858c80f..bcb8c18f 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -80,6 +80,26 @@ def parse_regions(): return regions_txt +def parse_showcase_regions(): + """Parse showcase regions from config. + + Returns a dict mapping region name -> {extent, projection}. + Named regions (strings) have extent=None and projection=None, + meaning the plot script will fall back to the DOMAINS lookup. + Custom regions carry their explicit extent and projection. + """ + result = {} + for r in config["showcase"]["regions"]: + if isinstance(r, str): + result[r] = {"extent": None, "projection": None} + else: + result[r["name"]] = { + "extent": r.get("extent"), + "projection": r.get("projection", "orthographic"), + } + return result + + # ============================================================================ # Run entries configuration management # ============================================================================ @@ -294,6 +314,7 @@ def master_hash() -> str: REGIONS = parse_regions() +SHOWCASE_REGIONS = parse_showcase_regions() REFTIMES = parse_reference_times() RUN_CONFIGS = collect_all_runs() ENV_CONFIGS = collect_all_envs() diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk index 06a02526..5bb312be 100644 --- a/workflow/rules/plot.smk +++ b/workflow/rules/plot.smk @@ -92,6 +92,7 @@ rule plot_forecast_frame: / "data/runs/{run_id}/{init_time}/frames/frame_{leadtime}_{param}_{region}.png", wildcard_constraints: leadtime=r"\d+", # only digits + region="|".join(map(re.escape, SHOWCASE_REGIONS.keys())), resources: slurm_partition="postproc", cpus_per_task=1, @@ -100,12 +101,21 @@ rule plot_forecast_frame: grib_out_dir=lambda wc: ( Path(OUT_ROOT) / f"data/runs/{wc.run_id}/{wc.init_time}/grib" ).resolve(), + region_extra=lambda wc: ( + "--extent {} --projection {}".format( + " ".join(map(str, SHOWCASE_REGIONS[wc.region]["extent"])), + SHOWCASE_REGIONS[wc.region]["projection"], + ) + if SHOWCASE_REGIONS.get(wc.region, {}).get("extent") is not None + else "" + ), shell: """ export ECCODES_DEFINITION_PATH=$(realpath .venv/share/eccodes-cosmo-resources/definitions) python {input.script} \ --input {params.grib_out_dir} --date {wildcards.init_time} --outfn {output[0]} \ --param {wildcards.param} --leadtime {wildcards.leadtime} --region {wildcards.region} \ + {params.region_extra} # interactive editing (needs to set localrule: True and use only one core) # marimo edit {input.script} -- \ # --input {params.grib_out_dir} --date {wildcards.init_time} --outfn {output[0]}\ diff --git a/workflow/scripts/plot_forecast_frame.mo.py b/workflow/scripts/plot_forecast_frame.mo.py index b8592141..eee4fe04 100644 --- a/workflow/scripts/plot_forecast_frame.mo.py +++ b/workflow/scripts/plot_forecast_frame.mo.py @@ -15,6 +15,7 @@ def _(): import numpy as np from plotting import DOMAINS + from plotting import get_projection from plotting import StatePlotter from plotting.colormap_defaults import CMAP_DEFAULTS from plotting.compat import load_state_from_grib @@ -29,6 +30,7 @@ def _(): logging, np, DOMAINS, + get_projection, ccrs, ) @@ -53,6 +55,20 @@ def _(ArgumentParser, Path): parser.add_argument("--leadtime", type=str, help="leadtime") parser.add_argument("--param", type=str, help="parameter") parser.add_argument("--region", type=str, help="name of region") + parser.add_argument( + "--extent", + type=float, + nargs=4, + default=None, + metavar=("LON_MIN", "LON_MAX", "LAT_MIN", "LAT_MAX"), + help="custom geographic extent in PlateCarree coordinates; overrides DOMAINS lookup", + ) + parser.add_argument( + "--projection", + type=str, + default=None, + help="projection name (e.g. 'orthographic'); used only together with --extent", + ) args = parser.parse_args() grib_dir = Path(args.input) @@ -191,6 +207,7 @@ def _( StatePlotter, args, get_style, + get_projection, outfn, param, preprocess_field, @@ -205,11 +222,17 @@ def _( state["latitudes"], outfn.parent, ) + if args.extent is not None: + _projection = get_projection(args.projection or "orthographic") + _extent = args.extent + else: + _projection = DOMAINS[region]["projection"] + _extent = DOMAINS[region]["extent"] fig = plotter.init_geoaxes( nrows=1, ncols=1, - projection=DOMAINS[region]["projection"], - bbox=DOMAINS[region]["extent"], + projection=_projection, + bbox=_extent, name=region, size=(6, 6), ) From 51bb6fc16f582865122d99706b4d3edb290901ad Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Mon, 4 May 2026 09:55:13 +0200 Subject: [PATCH 15/29] update the schema --- workflow/tools/config.schema.json | 70 +++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json index 75663a79..7537d296 100644 --- a/workflow/tools/config.schema.json +++ b/workflow/tools/config.schema.json @@ -465,6 +465,44 @@ "title": "Profile", "type": "object" }, + "RegionConfig": { + "additionalProperties": false, + "description": "A custom map region defined by name, extent, and projection.", + "properties": { + "name": { + "description": "Name for the custom region (used as wildcard).", + "title": "Name", + "type": "string" + }, + "extent": { + "anyOf": [ + { + "items": { + "type": "number" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Geographic extent as [lon_min, lon_max, lat_min, lat_max] in PlateCarree coordinates. None means full globe.", + "title": "Extent" + }, + "projection": { + "default": "orthographic", + "description": "Projection name (must be a key in plotting._PROJECTIONS, e.g. 'orthographic').", + "title": "Projection", + "type": "string" + } + }, + "required": [ + "name" + ], + "title": "RegionConfig", + "type": "object" + }, "ShowcaseConfig": { "description": "Configuration for the showcase workflow.", "properties": { @@ -480,6 +518,18 @@ "title": "Animations", "type": "boolean" }, + "params": { + "default": [ + "T_2M", + "SP_10M" + ], + "description": "List of parameters to generate animations and meteograms for.", + "items": { + "type": "string" + }, + "title": "Params", + "type": "array" + }, "stations": { "default": [ "GVE", @@ -492,6 +542,26 @@ }, "title": "Stations", "type": "array" + }, + "regions": { + "default": [ + "globe", + "europe", + "switzerland" + ], + "description": "Regions to generate animations for. Each entry is either a named region (e.g. 'globe', 'europe', 'switzerland') defined in plotting.DOMAINS, or a custom region dict with 'name', optional 'extent' [lon_min, lon_max, lat_min, lat_max], and optional 'projection'.", + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/$defs/RegionConfig" + } + ] + }, + "title": "Regions", + "type": "array" } }, "title": "ShowcaseConfig", From e2db0c9ba47fe6f9586e5902f0db395e41188b4a Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Mon, 4 May 2026 10:22:40 +0200 Subject: [PATCH 16/29] fix use a default when no params or regions are defined --- workflow/Snakefile | 4 ++-- workflow/rules/common.smk | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 3e564742..5dfab820 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -143,7 +143,7 @@ rule showcase_all: rules.make_forecast_animation.output, init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], run_id=CANDIDATES, - param=config["showcase"]["params"], + param=SHOWCASE_PARAMS, region=list(SHOWCASE_REGIONS.keys()), showcase=EXPERIMENT_NAME, ) @@ -155,7 +155,7 @@ rule showcase_all: rules.plot_meteogram.output, init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], run_id=CANDIDATES, - param=config["showcase"]["params"], + param=SHOWCASE_PARAMS, sta=config["showcase"]["stations"], showcase=EXPERIMENT_NAME, ) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index bcb8c18f..1327819e 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -89,7 +89,7 @@ def parse_showcase_regions(): Custom regions carry their explicit extent and projection. """ result = {} - for r in config["showcase"]["regions"]: + for r in config.get("showcase", {}).get("regions", ["globe", "europe", "switzerland"]): if isinstance(r, str): result[r] = {"extent": None, "projection": None} else: @@ -315,6 +315,7 @@ def master_hash() -> str: REGIONS = parse_regions() SHOWCASE_REGIONS = parse_showcase_regions() +SHOWCASE_PARAMS = config.get("showcase", {}).get("params", ["T_2M", "SP_10M"]) REFTIMES = parse_reference_times() RUN_CONFIGS = collect_all_runs() ENV_CONFIGS = collect_all_envs() From 1144291eed774397afdeadbf93ff949a7d2fbff3 Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Mon, 4 May 2026 10:52:08 +0200 Subject: [PATCH 17/29] linting --- workflow/rules/common.smk | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 1327819e..0dd9c460 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -89,7 +89,9 @@ def parse_showcase_regions(): Custom regions carry their explicit extent and projection. """ result = {} - for r in config.get("showcase", {}).get("regions", ["globe", "europe", "switzerland"]): + for r in config.get("showcase", {}).get( + "regions", ["globe", "europe", "switzerland"] + ): if isinstance(r, str): result[r] = {"extent": None, "projection": None} else: From f2180a15cb9c3b467ed4ea0f987e459c586321d8 Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Mon, 4 May 2026 17:32:05 +0200 Subject: [PATCH 18/29] add a showcase config example to a config --- config/interpolators-ich1.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/config/interpolators-ich1.yaml b/config/interpolators-ich1.yaml index 5cb8e175..417a0897 100644 --- a/config/interpolators-ich1.yaml +++ b/config/interpolators-ich1.yaml @@ -55,6 +55,22 @@ stratification: - alpensuedseite root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 +showcase: + params: + - T_2M + - SP_10M + - TOT_PREC + meteograms: false + animations: true + regions: + - europe + - switzerland + - name: alpine_arc + extent: [-16.0, 25.0, 30.0, 65.0] + projection: orthographic + + stations: [JUN] #, COV, GOR, WFJ, SAE, SAM, DAV, ZER, ANT, VSBAS, BRT, LTB, GOS, CEV, BIA] + locations: output_root: output/ From c26dd24d234b1fbb44eba74123941ec5b26d939d Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Tue, 5 May 2026 12:28:52 +0200 Subject: [PATCH 19/29] regions -> domains --- src/evalml/config.py | 6 +++--- workflow/rules/common.smk | 10 +++++----- workflow/rules/plot.smk | 2 +- workflow/tools/config.schema.json | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/evalml/config.py b/src/evalml/config.py index 08610bf8..9f305ec1 100644 --- a/src/evalml/config.py +++ b/src/evalml/config.py @@ -243,12 +243,12 @@ class ShowcaseConfig(BaseModel): default=["GVE", "KLO", "LUG"], description="List of PeakWeather station IDs to generate meteograms for.", ) - regions: List[str | RegionConfig] = Field( + domains: List[str | RegionConfig] = Field( default=["globe", "europe", "switzerland"], description=( - "Regions to generate animations for. Each entry is either a named region " + "Domains to generate animations for. Each entry is either a named domain " "(e.g. 'globe', 'europe', 'switzerland') defined in plotting.DOMAINS, " - "or a custom region dict with 'name', optional 'extent' " + "or a custom domain dict with 'name', optional 'extent' " "[lon_min, lon_max, lat_min, lat_max], and optional 'projection'." ), ) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 0dd9c460..e1a5a8b4 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -81,16 +81,16 @@ def parse_regions(): def parse_showcase_regions(): - """Parse showcase regions from config. + """Parse showcase domains from config. - Returns a dict mapping region name -> {extent, projection}. - Named regions (strings) have extent=None and projection=None, + Returns a dict mapping domain name -> {extent, projection}. + Named domains (strings) have extent=None and projection=None, meaning the plot script will fall back to the DOMAINS lookup. - Custom regions carry their explicit extent and projection. + Custom domains carry their explicit extent and projection. """ result = {} for r in config.get("showcase", {}).get( - "regions", ["globe", "europe", "switzerland"] + "domains", ["globe", "europe", "switzerland"] ): if isinstance(r, str): result[r] = {"extent": None, "projection": None} diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk index 29df048d..bd52d978 100644 --- a/workflow/rules/plot.smk +++ b/workflow/rules/plot.smk @@ -116,7 +116,7 @@ rule plot_forecast_frame: python {input.script} \ --input {params.grib_out_dir} --date {wildcards.init_time} --outfn {output[0]} \ --param {wildcards.param} --leadtime {wildcards.leadtime} --region {wildcards.region} \ - {params.region_extra} + {params.region_extra} \ --accu {params.accu} \ # interactive editing (needs to set localrule: True and use only one core) # marimo edit {input.script} -- \ diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json index 7537d296..7ee2cb1f 100644 --- a/workflow/tools/config.schema.json +++ b/workflow/tools/config.schema.json @@ -543,13 +543,13 @@ "title": "Stations", "type": "array" }, - "regions": { + "domains": { "default": [ "globe", "europe", "switzerland" ], - "description": "Regions to generate animations for. Each entry is either a named region (e.g. 'globe', 'europe', 'switzerland') defined in plotting.DOMAINS, or a custom region dict with 'name', optional 'extent' [lon_min, lon_max, lat_min, lat_max], and optional 'projection'.", + "description": "Domains to generate animations for. Each entry is either a named domain (e.g. 'globe', 'europe', 'switzerland') defined in plotting.DOMAINS, or a custom domain dict with 'name', optional 'extent' [lon_min, lon_max, lat_min, lat_max], and optional 'projection'.", "items": { "anyOf": [ { @@ -560,7 +560,7 @@ } ] }, - "title": "Regions", + "title": "Domains", "type": "array" } }, From 2fe543ae0d166b97753dfca87e02362f67e80f2b Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Tue, 5 May 2026 20:24:48 +0200 Subject: [PATCH 20/29] fix for TOT_PREC (missing on step 0) --- src/data_input/__init__.py | 7 +++++-- workflow/rules/plot.smk | 12 +++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/data_input/__init__.py b/src/data_input/__init__.py index 71d99ca1..49956303 100644 --- a/src/data_input/__init__.py +++ b/src/data_input/__init__.py @@ -118,11 +118,14 @@ def load_fct_data_from_grib( ds[var] = da.rename({"z": da.attrs["vcoord_type"]}) ds = xr.merge([ds[p].rename(p) for p in ds], compat="no_conflicts") lead_times = np.array(steps, dtype="timedelta64[h]") - # Restrict to the requested lead times so that the TOT_PREC disaggregation + # Reindex to the requested lead times so that the TOT_PREC disaggregation # below operates on the correct step interval even if the GRIB contains # extra (e.g. hourly) steps beyond those requested — e.g. when consuming # output from an interpolator emulator or a baseline with sub-step output. - ds = ds.sel(lead_time=lead_times) + # reindex (rather than sel) fills missing steps with NaN instead of raising + # KeyError — needed because accumulated fields like TOT_PREC are absent at + # step 0 in the GRIB, and the NaN-fill logic below handles that case. + ds = ds.reindex(lead_time=lead_times) if "TOT_PREC" in ds.data_vars: ## Disaggregate TOT_PREC from cumulative-from-start (expected when the ## accumulate_from_start_of_forecast post-processor is enabled in diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk index bd52d978..6d35c248 100644 --- a/workflow/rules/plot.smk +++ b/workflow/rules/plot.smk @@ -137,11 +137,17 @@ def get_leadtimes(wc): rule make_forecast_animation: localrule: True + wildcard_constraints: + param="|".join(map(re.escape, SHOWCASE_PARAMS)), + region="|".join(map(re.escape, SHOWCASE_REGIONS.keys())), input: - expand( + lambda wc: expand( rules.plot_forecast_frame.output, - leadtime=lambda wc: get_leadtimes(wc), - allow_missing=True, + run_id=wc.run_id, + init_time=wc.init_time, + param=wc.param, + region=wc.region, + leadtime=get_leadtimes(wc), ), output: OUT_ROOT From 68487bab5ff7b04d9b0e8e8f4f886d37fa3b1707 Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Tue, 5 May 2026 20:28:03 +0200 Subject: [PATCH 21/29] update the extent of alpine arc --- config/interpolators-ich1.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/interpolators-ich1.yaml b/config/interpolators-ich1.yaml index 417a0897..b89d5e09 100644 --- a/config/interpolators-ich1.yaml +++ b/config/interpolators-ich1.yaml @@ -66,7 +66,7 @@ showcase: - europe - switzerland - name: alpine_arc - extent: [-16.0, 25.0, 30.0, 65.0] + extent: [3.0, 17.0, 43.5, 48.5] projection: orthographic stations: [JUN] #, COV, GOR, WFJ, SAE, SAM, DAV, ZER, ANT, VSBAS, BRT, LTB, GOS, CEV, BIA] From 8e76a1bdaf57c0cb56cb7d140f3bc7295ab7b415 Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Tue, 5 May 2026 20:38:22 +0200 Subject: [PATCH 22/29] add forgotten rename --- config/interpolators-ich1.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/interpolators-ich1.yaml b/config/interpolators-ich1.yaml index b89d5e09..5fd24a73 100644 --- a/config/interpolators-ich1.yaml +++ b/config/interpolators-ich1.yaml @@ -62,7 +62,7 @@ showcase: - TOT_PREC meteograms: false animations: true - regions: + domains: - europe - switzerland - name: alpine_arc From c9bb0eddd06d4aa183b7c976e1789dd41b495d42 Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Wed, 6 May 2026 11:15:43 +0200 Subject: [PATCH 23/29] add an animation speed --- src/evalml/config.py | 5 +++++ workflow/rules/plot.smk | 6 +++++- workflow/tools/config.schema.json | 7 +++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/evalml/config.py b/src/evalml/config.py index 9f305ec1..14f15424 100644 --- a/src/evalml/config.py +++ b/src/evalml/config.py @@ -252,6 +252,11 @@ class ShowcaseConfig(BaseModel): "[lon_min, lon_max, lat_min, lat_max], and optional 'projection'." ), ) + animation_speed: float = Field( + default=10.0, + gt=0, + description="Animation playback speed in simulated hours per second.", + ) class Locations(BaseModel): diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk index 6d35c248..0fd3aa6b 100644 --- a/workflow/rules/plot.smk +++ b/workflow/rules/plot.smk @@ -153,7 +153,11 @@ rule make_forecast_animation: OUT_ROOT / "results/{showcase}/{run_id}/{init_time}/{init_time}_{param}_{region}.gif", params: - delay=lambda wc: 10 * int(RUN_CONFIGS[wc.run_id]["steps"].split("/")[2]), + delay=lambda wc: round( + int(RUN_CONFIGS[wc.run_id]["steps"].split("/")[2]) + / config["showcase"].get("animation_speed", 10.0) + * 100 + ), shell: """ convert -delay {params.delay} -loop 0 {input} {output} diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json index 7ee2cb1f..33285626 100644 --- a/workflow/tools/config.schema.json +++ b/workflow/tools/config.schema.json @@ -562,6 +562,13 @@ }, "title": "Domains", "type": "array" + }, + "animation_speed": { + "default": 10.0, + "description": "Animation playback speed in simulated hours per second.", + "exclusiveMinimum": 0, + "title": "Animation Speed", + "type": "number" } }, "title": "ShowcaseConfig", From c72310f87cdc11a499d1a40fd12ed74c27e4e52a Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Thu, 7 May 2026 16:13:30 +0200 Subject: [PATCH 24/29] make comparisons side by side --- config/forecasters-ich1.yaml | 47 +++-- config/interpolators-ich1.yaml | 39 ++-- src/data_input/__init__.py | 3 +- src/evalml/config.py | 65 +++++-- src/plotting/compat.py | 56 +++++- workflow/Snakefile | 20 ++- workflow/rules/common.smk | 130 +++++++++++++- workflow/rules/plot.smk | 142 ++++++++++++++- workflow/scripts/plot_combine_animations.py | 137 ++++++++++++++ workflow/scripts/plot_zarr_frame.py | 187 ++++++++++++++++++++ workflow/tools/config.schema.json | 132 +++++++++----- 11 files changed, 856 insertions(+), 102 deletions(-) create mode 100644 workflow/scripts/plot_combine_animations.py create mode 100644 workflow/scripts/plot_zarr_frame.py diff --git a/config/forecasters-ich1.yaml b/config/forecasters-ich1.yaml index 983479ed..727686d0 100644 --- a/config/forecasters-ich1.yaml +++ b/config/forecasters-ich1.yaml @@ -3,30 +3,20 @@ description: | Evaluate skill of ICON-CH1 single. dates: - start: 2024-01-01T00:00 - end: 2024-01-02T00:00 - frequency: 6h +# start: 2024-01-01T00:00 +# end: 2024-01-02T00:00 +# frequency: 6h + - 2025-03-27T06:00 -runs: - - forecaster: - checkpoint: https://servicedepl.meteoswiss.ch/mlstore#/experiments/409/runs/b30acf68520a4bbd8324c44666561696 - label: stage_C_icon_1km - steps: 0/120/6 - config: resources/inference/configs/sgm-forecaster-global-ich1.yaml - extra_requirements: - - earthkit-utils<0.2.0 - - earthkit-data<0.19.0 - - git+https://github.com/ecmwf/anemoi-inference.git@main +runs: - forecaster: - checkpoint: /scratch/mch/apennino/output/checkpoint/973b78b8b39543949abc2b154c5f98d9/inference-last.ckpt - label: stage_C_icon_1km_hidden_11 + checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/c30490b6ba064e4db03b430f3a2595ad + label: stage_E steps: 0/120/6 - config: resources/inference/configs/sgm-forecaster-global-ich1.yaml + config: resources/inference/configs/sgm-multidataset-forecaster-global-ich1-oper.yaml extra_requirements: - - earthkit-utils<0.2.0 - - earthkit-data<0.19.0 - - git+https://github.com/ecmwf/anemoi-inference.git@main + - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 # - forecaster: # checkpoint: https://huggingface.co/met-no/bris_cloudy-skies/blob/main/2025-10/cloudy-skies-2025-10.ckpt @@ -58,6 +48,25 @@ stratification: - alpensuedseite root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 +showcase: + params: + - T_2M + - SP_10M + - TOT_PREC + meteograms: + enabled: false + stations: [JUN] #, COV, GOR, WFJ, SAE, SAM, DAV, ZER, ANT, VSBAS, BRT, LTB, GOS, CEV, BIA] + animations: + enabled: true + domains: + - europe + - switzerland + - globe +# - name: alpine_arc +# extent: [3.0, 17.0, 43.5, 48.5] +# projection: orthographic + speed: 10 # simulated hours per second + locations: output_root: output/ diff --git a/config/interpolators-ich1.yaml b/config/interpolators-ich1.yaml index 5fd24a73..caf48aaf 100644 --- a/config/interpolators-ich1.yaml +++ b/config/interpolators-ich1.yaml @@ -5,10 +5,10 @@ description: | issue #482. dates: - start: 2025-03-01T00:00 - end: 2025-03-03T00:00 - frequency: 24h - +# start: 2025-03-01T00:00 +# end: 2025-03-03T00:00 +# frequency: 24h + - 2025-03-27T06:00 runs: - interpolator: @@ -24,6 +24,7 @@ runs: checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/c30490b6ba064e4db03b430f3a2595ad config: resources/inference/configs/sgm-multidataset-forecaster-global-ich1-oper.yaml steps: 0/120/6 + label: stage_E extra_requirements: - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 extra_requirements: @@ -60,17 +61,25 @@ showcase: - T_2M - SP_10M - TOT_PREC - meteograms: false - animations: true - domains: - - europe - - switzerland - - name: alpine_arc - extent: [3.0, 17.0, 43.5, 48.5] - projection: orthographic - - stations: [JUN] #, COV, GOR, WFJ, SAE, SAM, DAV, ZER, ANT, VSBAS, BRT, LTB, GOS, CEV, BIA] - + meteograms: + enabled: false + stations: [JUN] #, COV, GOR, WFJ, SAE, SAM, DAV, ZER, ANT, VSBAS, BRT, LTB, GOS, CEV, BIA] + animations: + enabled: true + domains: + - europe + - switzerland + - globe +# - name: alpine_arc +# extent: [3.0, 17.0, 43.5, 48.5] +# projection: orthographic + speed: 10 # simulated hours per second + runs: + - Varda-Single + comparisons: + - left: Varda-Single # nested forecaster (non-candidate) + right: KENDA-CH1 # interpolator (candidate) + locations: output_root: output/ diff --git a/src/data_input/__init__.py b/src/data_input/__init__.py index 49956303..32e0b45c 100644 --- a/src/data_input/__init__.py +++ b/src/data_input/__init__.py @@ -61,7 +61,8 @@ def load_analysis_data_from_zarr( "PMSL": "msl", "TOT_PREC": "tp", } - tot_prec_string = "TOT_PREC_6H" if min(np.diff(steps)) == 6 else "TOT_PREC_1H" + _diffs = np.diff(steps) + tot_prec_string = "TOT_PREC_6H" if len(_diffs) > 0 and min(_diffs) == 6 else "TOT_PREC_1H" PARAMS_MAP_COSMO1 = { v: v.replace("TOT_PREC", tot_prec_string) for v in PARAMS_MAP_COSMO2.keys() } diff --git a/src/evalml/config.py b/src/evalml/config.py index 14f15424..542949cf 100644 --- a/src/evalml/config.py +++ b/src/evalml/config.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, List, Any, ClassVar, FrozenSet +from typing import Dict, List, Any, ClassVar, FrozenSet, Optional from pydantic import BaseModel, Field, RootModel, field_validator @@ -224,25 +224,33 @@ class RegionConfig(BaseModel): model_config = {"extra": "forbid"} -class ShowcaseConfig(BaseModel): - """Configuration for the showcase workflow.""" +class AnimationComparison(BaseModel): + """A side-by-side comparison animation between two runs.""" + + left: str = Field(..., description="Label of the run shown in the left panel.") + right: str = Field(..., description="Label of the run shown in the right panel.") + + +class MeteogramConfig(BaseModel): + """Configuration for meteogram generation.""" - meteograms: bool = Field( + enabled: bool = Field( default=True, description="Whether to generate meteograms (time series plots at stations).", ) - animations: bool = Field( - default=True, - description="Whether to generate forecast animations (GIFs per param and region).", - ) - params: List[str] = Field( - default=["T_2M", "SP_10M"], - description="List of parameters to generate animations and meteograms for.", - ) stations: List[str] = Field( default=["GVE", "KLO", "LUG"], description="List of PeakWeather station IDs to generate meteograms for.", ) + + +class AnimationsConfig(BaseModel): + """Configuration for animation generation.""" + + enabled: bool = Field( + default=True, + description="Whether to generate forecast animations (GIFs per param and region).", + ) domains: List[str | RegionConfig] = Field( default=["globe", "europe", "switzerland"], description=( @@ -252,11 +260,42 @@ class ShowcaseConfig(BaseModel): "[lon_min, lon_max, lat_min, lat_max], and optional 'projection'." ), ) - animation_speed: float = Field( + speed: float = Field( default=10.0, gt=0, description="Animation playback speed in simulated hours per second.", ) + runs: Optional[List[str]] = Field( + default=None, + description=( + "Labels of runs to generate individual animations for. " + "Defaults to all candidate runs when omitted." + ), + ) + comparisons: List[AnimationComparison] = Field( + default=[], + description=( + "Side-by-side two-panel comparison animations. Each entry specifies " + "the labels of the left and right panel runs." + ), + ) + + +class ShowcaseConfig(BaseModel): + """Configuration for the showcase workflow.""" + + params: List[str] = Field( + default=["T_2M", "SP_10M"], + description="List of parameters to generate animations and meteograms for.", + ) + meteograms: MeteogramConfig = Field( + default_factory=MeteogramConfig, + description="Configuration for meteogram generation.", + ) + animations: AnimationsConfig = Field( + default_factory=AnimationsConfig, + description="Configuration for animation generation.", + ) class Locations(BaseModel): diff --git a/src/plotting/compat.py b/src/plotting/compat.py index 7c4d14d3..bf2ad9e2 100644 --- a/src/plotting/compat.py +++ b/src/plotting/compat.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timedelta from pathlib import Path import earthkit.data as ekd @@ -77,6 +77,60 @@ def load_state_from_grib( return state +def load_state_from_zarr( + zarr_root: Path, + reftime: datetime, + lead_time_hours: int, + params: list[str], + source_type: str = "analysis", +) -> dict: + """Load a single time step from a zarr source into the state dict used by StatePlotter. + + Parameters + ---------- + zarr_root: + Path to the zarr dataset. + reftime: + Forecast reference time (init time). + lead_time_hours: + Lead time in hours to load. + params: + List of parameter names (ICON convention, e.g. ``['U_10M', 'V_10M']``). + source_type: + ``'analysis'`` for truth zarrs (loads via ``load_analysis_data_from_zarr``), + ``'baseline'`` for baseline forecast zarrs. + """ + from data_input import load_analysis_data_from_zarr, load_baseline_from_zarr + + steps = [lead_time_hours] + + if source_type == "analysis": + ds = load_analysis_data_from_zarr(zarr_root, reftime, steps, params) + ds_t = ds.isel(time=0) if "time" in ds.dims else ds.squeeze() + else: + ds = load_baseline_from_zarr(zarr_root, reftime, steps, params) + ds_t = ds.isel(lead_time=0) if "lead_time" in ds.dims else ds.squeeze() + + lat = ds_t.lat.values.flatten() + lon = ds_t.lon.values.flatten() + + hull = MultiPoint(list(zip(lon.tolist(), lat.tolist()))).convex_hull + state = { + "forecast_reference_time": reftime, + "valid_time": reftime + timedelta(hours=lead_time_hours), + "longitudes": lon, + "latitudes": lat, + "lam_envelope": gpd.GeoSeries([hull], crs="EPSG:4326"), + "fields": {}, + } + for param in params: + if param in ds_t.data_vars: + state["fields"][param] = ds_t[param].values.flatten() + else: + state["fields"][param] = np.full(lat.size, np.nan, dtype=float) + return state + + def load_state_from_raw( file: Path, paramlist: list[str] | None = None ) -> dict[str, np.ndarray | dict[str, np.ndarray]]: diff --git a/workflow/Snakefile b/workflow/Snakefile index 5dfab820..956a5985 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -142,12 +142,24 @@ rule showcase_all: expand( rules.make_forecast_animation.output, init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], - run_id=CANDIDATES, + run_id=SHOWCASE_ANIMATION_RUN_IDS, + param=SHOWCASE_PARAMS, + region=list(SHOWCASE_REGIONS.keys()), + showcase=EXPERIMENT_NAME, + ) + if config["showcase"]["animations"]["enabled"] + else [] + ), + ( + expand( + rules.make_comparison_animation.output, + init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], + comparison_id=[c["id"] for c in SHOWCASE_COMPARISONS], param=SHOWCASE_PARAMS, region=list(SHOWCASE_REGIONS.keys()), showcase=EXPERIMENT_NAME, ) - if config["showcase"]["animations"] + if config["showcase"]["animations"]["enabled"] and SHOWCASE_COMPARISONS else [] ), ( @@ -156,10 +168,10 @@ rule showcase_all: init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], run_id=CANDIDATES, param=SHOWCASE_PARAMS, - sta=config["showcase"]["stations"], + sta=config["showcase"]["meteograms"]["stations"], showcase=EXPERIMENT_NAME, ) - if config["showcase"]["meteograms"] + if config["showcase"]["meteograms"]["enabled"] else [] ), diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index e1a5a8b4..2438c967 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -89,7 +89,7 @@ def parse_showcase_regions(): Custom domains carry their explicit extent and projection. """ result = {} - for r in config.get("showcase", {}).get( + for r in config.get("showcase", {}).get("animations", {}).get( "domains", ["globe", "europe", "switzerland"] ): if isinstance(r, str): @@ -323,3 +323,131 @@ RUN_CONFIGS = collect_all_runs() ENV_CONFIGS = collect_all_envs() BASELINE_CONFIGS = collect_all_baselines() EXPERIMENT_PARTICIPANTS = collect_experiment_participants() + + +# ============================================================================ +# Showcase animation helpers +# ============================================================================ + + +def sanitize_label(label: str) -> str: + """Sanitize a run label for use as a path component.""" + import re as _re + return _re.sub(r"[^a-zA-Z0-9_-]", "_", label) + + +def collect_zarr_sources() -> dict: + """Collect zarr-based sources (truth + baselines) keyed by their label. + + Returns a dict mapping label -> {root, step, total_hours, source_type}. + """ + sources = {} + + # Truth (analysis) + truth_cfg = config.get("truth", {}) + if truth_cfg and "root" in truth_cfg: + label = truth_cfg.get("label", "truth") + sources[label] = { + "root": truth_cfg["root"], + "step": 1, + "total_hours": 120, + "source_type": "analysis", + } + + # Baselines + for baseline_id, cfg in BASELINE_CONFIGS.items(): + label = cfg.get("label", baseline_id) + _, total, step = map(int, cfg.get("steps", "0/120/1").split("/")) + sources[label] = { + "root": cfg["root"], + "step": step, + "total_hours": total, + "source_type": "baseline", + } + + return sources + + +def _resolve_label(label: str) -> dict: + """Resolve a label to a source descriptor used in comparison entries. + + Returns a dict with: + type — ``'run'`` or ``'zarr'`` + run_id — present when type == 'run' + label — present when type == 'zarr' + step — time step in hours + """ + # ML runs (candidates and non-candidates such as nested forecasters) + for run_id, cfg in RUN_CONFIGS.items(): + if cfg.get("label") == label: + return { + "type": "run", + "run_id": run_id, + "step": int(cfg["steps"].split("/")[2]), + } + # Zarr sources (truth / baselines) + if label in ZARR_SOURCES: + z = ZARR_SOURCES[label] + return {"type": "zarr", "label": label, "step": z["step"]} + + available_runs = sorted({cfg.get("label") for cfg in RUN_CONFIGS.values() if cfg.get("label")}) + available_zarr = sorted(ZARR_SOURCES.keys()) + raise ValueError( + f"No source found with label {label!r}. " + f"ML run labels: {available_runs}. " + f"Zarr source labels: {available_zarr}." + ) + + +def label_to_run_id(label: str) -> str: + """Return the run_id for the given label (ML runs only). + + Searches both candidate and non-candidate runs (e.g. nested forecasters). + Raises ValueError if not found. + """ + for run_id, cfg in RUN_CONFIGS.items(): + if cfg.get("label") == label: + return run_id + available = sorted({cfg.get("label") for cfg in RUN_CONFIGS.values() if cfg.get("label")}) + raise ValueError( + f"No run found with label {label!r}. Available ML run labels: {available}" + ) + + +def parse_showcase_animation_runs() -> list: + """Return the run_ids to animate individually. + + If ``animations.runs`` is set in the showcase config, filter by those labels + (ML runs only; zarr sources have their own animation pipeline). + Otherwise return all candidate run_ids. + """ + labels = config.get("showcase", {}).get("animations", {}).get("runs") + if labels is None: + return list(collect_all_candidates().keys()) + return [label_to_run_id(label) for label in labels] + + +def parse_showcase_comparisons() -> list: + """Parse ``animations.comparisons`` from the showcase config. + + Each returned entry has: + id — sanitised ``{left_label}_vs_{right_label}`` path component + left — source descriptor (type, run_id/label, step) + right — source descriptor (type, run_id/label, step) + """ + comparisons = config.get("showcase", {}).get("animations", {}).get("comparisons", []) + result = [] + for c in comparisons: + left_label = c["left"] + right_label = c["right"] + result.append({ + "id": f"{sanitize_label(left_label)}_vs_{sanitize_label(right_label)}", + "left": _resolve_label(left_label), + "right": _resolve_label(right_label), + }) + return result + + +ZARR_SOURCES = collect_zarr_sources() +SHOWCASE_ANIMATION_RUN_IDS = parse_showcase_animation_runs() +SHOWCASE_COMPARISONS = parse_showcase_comparisons() diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk index 0fd3aa6b..f0d9c1b7 100644 --- a/workflow/rules/plot.smk +++ b/workflow/rules/plot.smk @@ -138,6 +138,7 @@ def get_leadtimes(wc): rule make_forecast_animation: localrule: True wildcard_constraints: + run_id="|".join(map(re.escape, RUN_CONFIGS.keys())), param="|".join(map(re.escape, SHOWCASE_PARAMS)), region="|".join(map(re.escape, SHOWCASE_REGIONS.keys())), input: @@ -155,10 +156,149 @@ rule make_forecast_animation: params: delay=lambda wc: round( int(RUN_CONFIGS[wc.run_id]["steps"].split("/")[2]) - / config["showcase"].get("animation_speed", 10.0) + / config["showcase"]["animations"].get("speed", 10.0) * 100 ), shell: """ convert -delay {params.delay} -loop 0 {input} {output} """ + + +def _comparison_by_id(comparison_id: str) -> dict: + """Look up a SHOWCASE_COMPARISONS entry by its id wildcard.""" + for c in SHOWCASE_COMPARISONS: + if c["id"] == comparison_id: + return c + raise ValueError(f"No comparison with id {comparison_id!r}") + + +def _side_gif_path(side: dict, wc) -> list: + """Return the GIF path list for one side of a comparison (run or zarr).""" + if side["type"] == "run": + return expand( + rules.make_forecast_animation.output, + run_id=side["run_id"], + init_time=wc.init_time, + param=wc.param, + region=wc.region, + showcase=wc.showcase, + ) + else: + return expand( + rules.make_zarr_animation.output, + source_id=side["label"], + init_time=wc.init_time, + param=wc.param, + region=wc.region, + showcase=wc.showcase, + ) + + +def get_zarr_leadtimes(wc): + """Get lead times for a zarr source, skipping step 0 for TOT_PREC.""" + cfg = ZARR_SOURCES[wc.source_id] + step = cfg["step"] + total = cfg["total_hours"] + start = step # always skip lead time 0 (no meaningful accumulation at t=0) + return [f"{i:03}" for i in range(start, total + 1, step)] + + +rule plot_zarr_frame: + input: + script="workflow/scripts/plot_zarr_frame.py", + output: + OUT_ROOT + / "data/zarr/{source_id}/{init_time}/frames/frame_{leadtime}_{param}_{region}.png", + wildcard_constraints: + source_id="|".join(map(re.escape, ZARR_SOURCES.keys())) or "NEVER", + leadtime=r"\d+", + region="|".join(map(re.escape, SHOWCASE_REGIONS.keys())), + resources: + slurm_partition="postproc", + cpus_per_task=1, + runtime="10m", + params: + zarr_path=lambda wc: ZARR_SOURCES[wc.source_id]["root"], + source_type=lambda wc: ZARR_SOURCES[wc.source_id]["source_type"], + region_extra=lambda wc: ( + "--extent {} --projection {}".format( + " ".join(map(str, SHOWCASE_REGIONS[wc.region]["extent"])), + SHOWCASE_REGIONS[wc.region]["projection"], + ) + if SHOWCASE_REGIONS.get(wc.region, {}).get("extent") is not None + else "" + ), + accu=lambda wc: ZARR_SOURCES[wc.source_id]["step"], + shell: + """ + export ECCODES_DEFINITION_PATH=$(realpath .venv/share/eccodes-cosmo-resources/definitions) + python {input.script} \ + --zarr {params.zarr_path} \ + --source_type {params.source_type} \ + --date {wildcards.init_time} \ + --outfn {output} \ + --param {wildcards.param} \ + --leadtime {wildcards.leadtime} \ + --region {wildcards.region} \ + {params.region_extra} \ + --accu {params.accu} + """ + + +rule make_zarr_animation: + localrule: True + wildcard_constraints: + source_id="|".join(map(re.escape, ZARR_SOURCES.keys())) or "NEVER", + param="|".join(map(re.escape, SHOWCASE_PARAMS)), + region="|".join(map(re.escape, SHOWCASE_REGIONS.keys())), + input: + lambda wc: expand( + rules.plot_zarr_frame.output, + source_id=wc.source_id, + init_time=wc.init_time, + param=wc.param, + region=wc.region, + leadtime=get_zarr_leadtimes(wc), + ), + output: + OUT_ROOT + / "results/{showcase}/zarr/{source_id}/{init_time}/{init_time}_{param}_{region}.gif", + params: + delay=lambda wc: round( + ZARR_SOURCES[wc.source_id]["step"] + / config["showcase"]["animations"].get("speed", 10.0) + * 100 + ), + shell: + """ + convert -delay {params.delay} -loop 0 {input} {output} + """ + + +rule make_comparison_animation: + """Side-by-side two-panel animation comparing two sources, synced in simulated time.""" + localrule: True + wildcard_constraints: + param="|".join(map(re.escape, SHOWCASE_PARAMS)), + region="|".join(map(re.escape, SHOWCASE_REGIONS.keys())), + comparison_id="|".join(map(re.escape, [c["id"] for c in SHOWCASE_COMPARISONS])) or "NEVER", + input: + left=lambda wc: _side_gif_path(_comparison_by_id(wc.comparison_id)["left"], wc), + right=lambda wc: _side_gif_path(_comparison_by_id(wc.comparison_id)["right"], wc), + script="workflow/scripts/plot_combine_animations.py", + output: + OUT_ROOT + / "results/{showcase}/comparisons/{comparison_id}/{init_time}/{init_time}_{param}_{region}.gif", + params: + left_step=lambda wc: _comparison_by_id(wc.comparison_id)["left"]["step"], + right_step=lambda wc: _comparison_by_id(wc.comparison_id)["right"]["step"], + speed=config["showcase"]["animations"].get("speed", 10.0), + shell: + """ + python {input.script} \ + --left {input.left} --left_step {params.left_step} \ + --right {input.right} --right_step {params.right_step} \ + --output {output} \ + --speed {params.speed} + """ diff --git a/workflow/scripts/plot_combine_animations.py b/workflow/scripts/plot_combine_animations.py new file mode 100644 index 00000000..1c5d95ab --- /dev/null +++ b/workflow/scripts/plot_combine_animations.py @@ -0,0 +1,137 @@ +"""Combine two GIF animations side by side, synced in simulated time. + +Each GIF may have a different time step (e.g. 6h vs 1h). The output plays at +the finest resolution, holding frames from the coarser GIF steady while the +finer one advances. Both panels stay in sync with respect to simulated time. + +Usage +----- + python plot_combine_animations.py \\ + --left left.gif --left_step 6 \\ + --right right.gif --right_step 1 \\ + --output comparison.gif \\ + [--speed 6] # simulated hours per second (default: 6) + [--total_hours 120] # total simulated hours covered (default: 120) + [--start_hour 1] # first simulated hour in the GIFs (default: min step) +""" + +import argparse +from pathlib import Path + +from PIL import Image, ImageSequence + + +def load_frames(path: str) -> list[Image.Image]: + im = Image.open(path) + frames = [] + for frame in ImageSequence.Iterator(im): + frames.append(frame.convert("RGBA")) + return frames + + +def frame_for_hour(frames: list[Image.Image], step: int, hour: int) -> Image.Image: + """Return the frame that is valid at the given simulated hour. + + Frames are assumed to start at ``step`` (i.e. frames[0] covers hour=step, + frames[1] covers hour=2*step, etc.). Hours before the first frame return + frames[0]; hours beyond the last frame return the last frame. + """ + idx = max(0, min(len(frames) - 1, (hour - 1) // step)) + return frames[idx] + + +def combine( + left_path: str, + right_path: str, + out_path: str, + left_step: int, + right_step: int, + speed: float = 6.0, + total_hours: int = 120, + start_hour: int | None = None, +) -> None: + left_frames = load_frames(left_path) + right_frames = load_frames(right_path) + + out_step = min(left_step, right_step) + if start_hour is None: + start_hour = out_step + + sim_hours = list(range(start_hour, total_hours + 1, out_step)) + delay_ms = round(out_step / speed * 1000) + + w = left_frames[0].width + right_frames[0].width + h = max(left_frames[0].height, right_frames[0].height) + + out_frames = [] + for sh in sim_hours: + canvas = Image.new("RGBA", (w, h), (255, 255, 255, 255)) + lf = frame_for_hour(left_frames, left_step, sh) + rf = frame_for_hour(right_frames, right_step, sh) + canvas.paste(lf, (0, 0)) + canvas.paste(rf, (left_frames[0].width, 0)) + out_frames.append(canvas.convert("P", palette=Image.ADAPTIVE, colors=256)) + + out_frames[0].save( + out_path, + save_all=True, + append_images=out_frames[1:], + loop=0, + duration=delay_ms, + optimize=False, + ) + print( + f"Written {len(out_frames)} frames " + f"({out_step}h steps, {delay_ms}ms/frame, " + f"{speed} sim-h/s) → {out_path}" + ) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Combine two GIF animations side by side, synced in simulated time." + ) + parser.add_argument("--left", required=True, help="Path to the left GIF") + parser.add_argument("--right", required=True, help="Path to the right GIF") + parser.add_argument("--output", required=True, help="Output GIF path") + parser.add_argument( + "--left_step", type=int, required=True, help="Time step of the left GIF (h)" + ) + parser.add_argument( + "--right_step", type=int, required=True, help="Time step of the right GIF (h)" + ) + parser.add_argument( + "--speed", + type=float, + default=6.0, + help="Animation speed in simulated hours per second (default: 6)", + ) + parser.add_argument( + "--total_hours", + type=int, + default=120, + help="Total simulated hours covered by the GIFs (default: 120)", + ) + parser.add_argument( + "--start_hour", + type=int, + default=None, + help="First simulated hour in the GIFs (default: min step size)", + ) + args = parser.parse_args() + + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + combine( + left_path=args.left, + right_path=args.right, + out_path=args.output, + left_step=args.left_step, + right_step=args.right_step, + speed=args.speed, + total_hours=args.total_hours, + start_hour=args.start_hour, + ) + + +if __name__ == "__main__": + main() diff --git a/workflow/scripts/plot_zarr_frame.py b/workflow/scripts/plot_zarr_frame.py new file mode 100644 index 00000000..df974b39 --- /dev/null +++ b/workflow/scripts/plot_zarr_frame.py @@ -0,0 +1,187 @@ +"""Plot a single forecast frame from a zarr source (truth or baseline). + +Analogous to plot_forecast_frame.mo.py but reads zarr instead of GRIB. +TOT_PREC disaggregation is handled by the data_input loading functions, +so no accumulation arithmetic is needed here. + +Usage +----- + python plot_zarr_frame.py \\ + --zarr /path/to/data.zarr \\ + --source_type analysis \\ # or 'baseline' + --date 202503270600 \\ + --leadtime 006 \\ + --param T_2M \\ + --region switzerland \\ + --outfn /path/to/frame.png \\ + [--extent LON_MIN LON_MAX LAT_MIN LAT_MAX] \\ + [--projection orthographic] \\ + [--accu 1] +""" + +import logging +from argparse import ArgumentParser +from datetime import datetime +from pathlib import Path + +import cartopy.crs as ccrs +import numpy as np + +from plotting import DOMAINS, StatePlotter, get_projection +from plotting.colormap_defaults import CMAP_DEFAULTS +from plotting.compat import load_state_from_zarr + +LOG = logging.getLogger(__name__) +LOG_FMT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" +logging.basicConfig(level=logging.INFO, format=LOG_FMT) + + +def get_style(param, units_override=None, accu=1): + lookup = f"{param}_{accu}H" if param == "TOT_PREC" else param + cfg = CMAP_DEFAULTS[lookup] + import earthkit.plots as ekp + + units = units_override if units_override is not None else cfg.get("units", "") + return { + "style": ekp.styles.Style( + levels=cfg.get("bounds", cfg.get("levels", None)), + extend="both", + units=units, + colors=cfg.get("colors", None), + ), + "norm": cfg.get("norm", None), + "cmap": cfg.get("cmap", None), + "levels": cfg.get("levels", None), + "vmin": cfg.get("vmin", None), + "vmax": cfg.get("vmax", None), + "colors": cfg.get("colors", None), + } + + +def preprocess_field(param, state): + try: + import pint + + _ureg = pint.UnitRegistry() + + def _k_to_c(arr): + try: + return (_ureg.Quantity(arr, _ureg.kelvin).to(_ureg.degC)).magnitude + except Exception: + return arr - 273.15 + + def _ms_to_knots(arr): + try: + return ( + _ureg.Quantity(arr, _ureg.meter / _ureg.second).to(_ureg.knot) + ).magnitude + except Exception: + return arr * 1.943844 + + def _m_to_mm(arr): + try: + return (_ureg.Quantity(arr, _ureg.meter).to(_ureg.millimeter)).magnitude + except Exception: + return arr * 1000 + + except Exception: + LOG.warning("pint not available; using hardcoded conversions") + + def _k_to_c(arr): + return arr - 273.15 + + def _ms_to_knots(arr): + return arr * 1.943844 + + def _m_to_mm(arr): + return arr * 1000 + + fields = state["fields"] + if param in ("T_2M", "TD_2M", "T", "TD"): + return _k_to_c(fields[param]), "°C" + if param == "SP_10M": + return np.sqrt(fields["U_10M"] ** 2 + fields["V_10M"] ** 2), "m/s" + if param == "SP": + return np.sqrt(fields["U"] ** 2 + fields["V"] ** 2), "m/s" + if param == "TOT_PREC": + return np.maximum(_m_to_mm(fields[param]), 0), "mm" + return fields[param], None + + +def main(): + parser = ArgumentParser() + parser.add_argument("--zarr", type=str, required=True, help="Path to zarr dataset") + parser.add_argument( + "--source_type", + type=str, + default="analysis", + choices=["analysis", "baseline"], + help="Zarr source type", + ) + parser.add_argument("--date", type=str, required=True, help="Reference datetime (YYYYmmddHHMM)") + parser.add_argument("--outfn", type=str, required=True, help="Output filename") + parser.add_argument("--leadtime", type=str, required=True, help="Lead time (hours, zero-padded)") + parser.add_argument("--param", type=str, required=True, help="Parameter name") + parser.add_argument("--region", type=str, required=True, help="Region name") + parser.add_argument( + "--extent", + type=float, + nargs=4, + default=None, + metavar=("LON_MIN", "LON_MAX", "LAT_MIN", "LAT_MAX"), + ) + parser.add_argument("--projection", type=str, default=None) + parser.add_argument("--accu", type=int, default=1, help="Accumulation period in hours") + args = parser.parse_args() + + reftime = datetime.strptime(args.date, "%Y%m%d%H%M") + lead_time_hours = int(args.leadtime) + outfn = Path(args.outfn) + param = args.param + + if param == "SP_10M": + paramlist = ["U_10M", "V_10M"] + elif param == "SP": + paramlist = ["U", "V"] + else: + paramlist = [param] + + state = load_state_from_zarr( + zarr_root=Path(args.zarr), + reftime=reftime, + lead_time_hours=lead_time_hours, + params=paramlist, + source_type=args.source_type, + ) + + plotter = StatePlotter(state["longitudes"], state["latitudes"], outfn.parent) + + if args.extent is not None: + projection = get_projection(args.projection or "orthographic") + extent = args.extent + else: + projection = DOMAINS[args.region]["projection"] + extent = DOMAINS[args.region]["extent"] + + fig = plotter.init_geoaxes( + nrows=1, ncols=1, projection=projection, bbox=extent, name=args.region, size=(6, 6) + ) + subplot = fig.add_map(row=0, column=0) + + field, units_override = preprocess_field(param, state) + plotter.plot_field(subplot, field, **get_style(param, units_override, accu=args.accu)) + subplot.ax.add_geometries( + state["lam_envelope"], + edgecolor="black", + facecolor="none", + crs=ccrs.PlateCarree(), + ) + + validtime = state["valid_time"].strftime("%Y%m%d%H%M") + fig.title(f"{param}, time: {validtime}") + fig.save(outfn, bbox_inches="tight", dpi=200) + LOG.info(f"saved: {outfn}") + + +if __name__ == "__main__": + main() diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json index 33285626..2b759d69 100644 --- a/workflow/tools/config.schema.json +++ b/workflow/tools/config.schema.json @@ -503,72 +503,110 @@ "title": "RegionConfig", "type": "object" }, - "ShowcaseConfig": { - "description": "Configuration for the showcase workflow.", + "AnimationComparison": { "properties": { - "meteograms": { - "default": true, - "description": "Whether to generate meteograms (time series plots at stations).", - "title": "Meteograms", - "type": "boolean" + "left": { + "description": "Label of the run shown in the left panel.", + "title": "Left", + "type": "string" }, - "animations": { + "right": { + "description": "Label of the run shown in the right panel.", + "title": "Right", + "type": "string" + } + }, + "required": ["left", "right"], + "title": "AnimationComparison", + "type": "object" + }, + "AnimationsConfig": { + "description": "Configuration for animation generation.", + "properties": { + "enabled": { "default": true, "description": "Whether to generate forecast animations (GIFs per param and region).", - "title": "Animations", + "title": "Enabled", "type": "boolean" }, - "params": { - "default": [ - "T_2M", - "SP_10M" - ], - "description": "List of parameters to generate animations and meteograms for.", - "items": { - "type": "string" - }, - "title": "Params", - "type": "array" - }, - "stations": { - "default": [ - "GVE", - "KLO", - "LUG" - ], - "description": "List of PeakWeather station IDs to generate meteograms for.", - "items": { - "type": "string" - }, - "title": "Stations", - "type": "array" - }, "domains": { - "default": [ - "globe", - "europe", - "switzerland" - ], + "default": ["globe", "europe", "switzerland"], "description": "Domains to generate animations for. Each entry is either a named domain (e.g. 'globe', 'europe', 'switzerland') defined in plotting.DOMAINS, or a custom domain dict with 'name', optional 'extent' [lon_min, lon_max, lat_min, lat_max], and optional 'projection'.", "items": { "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/$defs/RegionConfig" - } + {"type": "string"}, + {"$ref": "#/$defs/RegionConfig"} ] }, "title": "Domains", "type": "array" }, - "animation_speed": { + "speed": { "default": 10.0, "description": "Animation playback speed in simulated hours per second.", "exclusiveMinimum": 0, - "title": "Animation Speed", + "title": "Speed", "type": "number" + }, + "runs": { + "anyOf": [ + {"items": {"type": "string"}, "type": "array"}, + {"type": "null"} + ], + "default": null, + "description": "Labels of runs to generate individual animations for. Defaults to all candidate runs when omitted.", + "title": "Runs" + }, + "comparisons": { + "default": [], + "description": "Side-by-side two-panel comparison animations.", + "items": {"$ref": "#/$defs/AnimationComparison"}, + "title": "Comparisons", + "type": "array" + } + }, + "title": "AnimationsConfig", + "type": "object" + }, + "MeteogramConfig": { + "description": "Configuration for meteogram generation.", + "properties": { + "enabled": { + "default": true, + "description": "Whether to generate meteograms (time series plots at stations).", + "title": "Enabled", + "type": "boolean" + }, + "stations": { + "default": ["GVE", "KLO", "LUG"], + "description": "List of PeakWeather station IDs to generate meteograms for.", + "items": {"type": "string"}, + "title": "Stations", + "type": "array" + } + }, + "title": "MeteogramConfig", + "type": "object" + }, + "ShowcaseConfig": { + "description": "Configuration for the showcase workflow.", + "properties": { + "params": { + "default": ["T_2M", "SP_10M"], + "description": "List of parameters to generate animations and meteograms for.", + "items": {"type": "string"}, + "title": "Params", + "type": "array" + }, + "meteograms": { + "$ref": "#/$defs/MeteogramConfig", + "default": {}, + "description": "Configuration for meteogram generation." + }, + "animations": { + "$ref": "#/$defs/AnimationsConfig", + "default": {}, + "description": "Configuration for animation generation." } }, "title": "ShowcaseConfig", From f91cd6599fbe92a1c5a7edfa8f86cf197071e54e Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Thu, 7 May 2026 17:10:41 +0200 Subject: [PATCH 25/29] organize the configs --- config/interpolators-ich1.yaml | 21 ++++---- src/evalml/config.py | 40 ++++++++++---- workflow/Snakefile | 6 +-- workflow/rules/common.smk | 2 +- workflow/tools/config.schema.json | 87 ++++++++++++++++--------------- 5 files changed, 90 insertions(+), 66 deletions(-) diff --git a/config/interpolators-ich1.yaml b/config/interpolators-ich1.yaml index 5fd24a73..fc835e18 100644 --- a/config/interpolators-ich1.yaml +++ b/config/interpolators-ich1.yaml @@ -60,16 +60,17 @@ showcase: - T_2M - SP_10M - TOT_PREC - meteograms: false - animations: true - domains: - - europe - - switzerland - - name: alpine_arc - extent: [3.0, 17.0, 43.5, 48.5] - projection: orthographic - - stations: [JUN] #, COV, GOR, WFJ, SAE, SAM, DAV, ZER, ANT, VSBAS, BRT, LTB, GOS, CEV, BIA] + meteograms: + enabled: false + stations: [JUN] #, COV, GOR, WFJ, SAE, SAM, DAV, ZER, ANT, VSBAS, BRT, LTB, GOS, CEV, BIA] + animations: + enabled: true + domains: + - europe + - switzerland + - name: alpine_arc + extent: [3.0, 17.0, 43.5, 48.5] + projection: orthographic locations: output_root: output/ diff --git a/src/evalml/config.py b/src/evalml/config.py index 9f305ec1..7a031c1f 100644 --- a/src/evalml/config.py +++ b/src/evalml/config.py @@ -224,25 +224,26 @@ class RegionConfig(BaseModel): model_config = {"extra": "forbid"} -class ShowcaseConfig(BaseModel): - """Configuration for the showcase workflow.""" +class MeteogramConfig(BaseModel): + """Configuration for meteogram generation.""" - meteograms: bool = Field( + enabled: bool = Field( default=True, description="Whether to generate meteograms (time series plots at stations).", ) - animations: bool = Field( - default=True, - description="Whether to generate forecast animations (GIFs per param and region).", - ) - params: List[str] = Field( - default=["T_2M", "SP_10M"], - description="List of parameters to generate animations and meteograms for.", - ) stations: List[str] = Field( default=["GVE", "KLO", "LUG"], description="List of PeakWeather station IDs to generate meteograms for.", ) + + +class AnimationsConfig(BaseModel): + """Configuration for animation generation.""" + + enabled: bool = Field( + default=True, + description="Whether to generate forecast animations (GIFs per param and region).", + ) domains: List[str | RegionConfig] = Field( default=["globe", "europe", "switzerland"], description=( @@ -254,6 +255,23 @@ class ShowcaseConfig(BaseModel): ) +class ShowcaseConfig(BaseModel): + """Configuration for the showcase workflow.""" + + params: List[str] = Field( + default=["T_2M", "SP_10M"], + description="List of parameters to generate animations and meteograms for.", + ) + meteograms: MeteogramConfig = Field( + default_factory=MeteogramConfig, + description="Configuration for meteogram generation.", + ) + animations: AnimationsConfig = Field( + default_factory=AnimationsConfig, + description="Configuration for animation generation.", + ) + + class Locations(BaseModel): """Locations of data and services used in the workflow.""" diff --git a/workflow/Snakefile b/workflow/Snakefile index 5dfab820..655b9207 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -147,7 +147,7 @@ rule showcase_all: region=list(SHOWCASE_REGIONS.keys()), showcase=EXPERIMENT_NAME, ) - if config["showcase"]["animations"] + if config["showcase"]["animations"]["enabled"] else [] ), ( @@ -156,10 +156,10 @@ rule showcase_all: init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES], run_id=CANDIDATES, param=SHOWCASE_PARAMS, - sta=config["showcase"]["stations"], + sta=config["showcase"]["meteograms"]["stations"], showcase=EXPERIMENT_NAME, ) - if config["showcase"]["meteograms"] + if config["showcase"]["meteograms"]["enabled"] else [] ), diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index e1a5a8b4..ae54ddab 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -89,7 +89,7 @@ def parse_showcase_regions(): Custom domains carry their explicit extent and projection. """ result = {} - for r in config.get("showcase", {}).get( + for r in config.get("showcase", {}).get("animations", {}).get( "domains", ["globe", "europe", "switzerland"] ): if isinstance(r, str): diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json index 7ee2cb1f..0e83e64d 100644 --- a/workflow/tools/config.schema.json +++ b/workflow/tools/config.schema.json @@ -503,67 +503,72 @@ "title": "RegionConfig", "type": "object" }, - "ShowcaseConfig": { - "description": "Configuration for the showcase workflow.", + "MeteogramConfig": { + "description": "Configuration for meteogram generation.", "properties": { - "meteograms": { + "enabled": { "default": true, "description": "Whether to generate meteograms (time series plots at stations).", - "title": "Meteograms", - "type": "boolean" - }, - "animations": { - "default": true, - "description": "Whether to generate forecast animations (GIFs per param and region).", - "title": "Animations", + "title": "Enabled", "type": "boolean" }, - "params": { - "default": [ - "T_2M", - "SP_10M" - ], - "description": "List of parameters to generate animations and meteograms for.", - "items": { - "type": "string" - }, - "title": "Params", - "type": "array" - }, "stations": { - "default": [ - "GVE", - "KLO", - "LUG" - ], + "default": ["GVE", "KLO", "LUG"], "description": "List of PeakWeather station IDs to generate meteograms for.", - "items": { - "type": "string" - }, + "items": {"type": "string"}, "title": "Stations", "type": "array" + } + }, + "title": "MeteogramConfig", + "type": "object" + }, + "AnimationsConfig": { + "description": "Configuration for animation generation.", + "properties": { + "enabled": { + "default": true, + "description": "Whether to generate forecast animations (GIFs per param and region).", + "title": "Enabled", + "type": "boolean" }, "domains": { - "default": [ - "globe", - "europe", - "switzerland" - ], + "default": ["globe", "europe", "switzerland"], "description": "Domains to generate animations for. Each entry is either a named domain (e.g. 'globe', 'europe', 'switzerland') defined in plotting.DOMAINS, or a custom domain dict with 'name', optional 'extent' [lon_min, lon_max, lat_min, lat_max], and optional 'projection'.", "items": { "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/$defs/RegionConfig" - } + {"type": "string"}, + {"$ref": "#/$defs/RegionConfig"} ] }, "title": "Domains", "type": "array" } }, + "title": "AnimationsConfig", + "type": "object" + }, + "ShowcaseConfig": { + "description": "Configuration for the showcase workflow.", + "properties": { + "params": { + "default": ["T_2M", "SP_10M"], + "description": "List of parameters to generate animations and meteograms for.", + "items": {"type": "string"}, + "title": "Params", + "type": "array" + }, + "meteograms": { + "$ref": "#/$defs/MeteogramConfig", + "default": {}, + "description": "Configuration for meteogram generation." + }, + "animations": { + "$ref": "#/$defs/AnimationsConfig", + "default": {}, + "description": "Configuration for animation generation." + } + }, "title": "ShowcaseConfig", "type": "object" }, From 8729776903f804fa4c9153669b3961a0e1f3a8c1 Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Mon, 18 May 2026 11:23:44 +0200 Subject: [PATCH 26/29] fix linting --- workflow/rules/common.smk | 6 +- workflow/tools/config.schema.json | 121 +++++++++++++++++------------- 2 files changed, 72 insertions(+), 55 deletions(-) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index ae54ddab..30a5b77e 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -89,8 +89,10 @@ def parse_showcase_regions(): Custom domains carry their explicit extent and projection. """ result = {} - for r in config.get("showcase", {}).get("animations", {}).get( - "domains", ["globe", "europe", "switzerland"] + for r in ( + config.get("showcase", {}) + .get("animations", {}) + .get("domains", ["globe", "europe", "switzerland"]) ): if isinstance(r, str): result[r] = {"extent": None, "projection": None} diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json index f178b9d7..3d749361 100644 --- a/workflow/tools/config.schema.json +++ b/workflow/tools/config.schema.json @@ -1,5 +1,38 @@ { "$defs": { + "AnimationsConfig": { + "description": "Configuration for animation generation.", + "properties": { + "enabled": { + "default": true, + "description": "Whether to generate forecast animations (GIFs per param and region).", + "title": "Enabled", + "type": "boolean" + }, + "domains": { + "default": [ + "globe", + "europe", + "switzerland" + ], + "description": "Domains to generate animations for. Each entry is either a named domain (e.g. 'globe', 'europe', 'switzerland') defined in plotting.DOMAINS, or a custom domain dict with 'name', optional 'extent' [lon_min, lon_max, lat_min, lat_max], and optional 'projection'.", + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "$ref": "#/$defs/RegionConfig" + } + ] + }, + "title": "Domains", + "type": "array" + } + }, + "title": "AnimationsConfig", + "type": "object" + }, "BaselineConfig": { "description": "Configuration for a single baseline to include in the verification.", "properties": { @@ -445,6 +478,32 @@ "title": "Locations", "type": "object" }, + "MeteogramConfig": { + "description": "Configuration for meteogram generation.", + "properties": { + "enabled": { + "default": true, + "description": "Whether to generate meteograms (time series plots at stations).", + "title": "Enabled", + "type": "boolean" + }, + "stations": { + "default": [ + "GVE", + "KLO", + "LUG" + ], + "description": "List of PeakWeather station IDs to generate meteograms for.", + "items": { + "type": "string" + }, + "title": "Stations", + "type": "array" + } + }, + "title": "MeteogramConfig", + "type": "object" + }, "Profile": { "description": "Workflow execution profile.", "properties": { @@ -521,70 +580,26 @@ "title": "RegionConfig", "type": "object" }, - "MeteogramConfig": { - "description": "Configuration for meteogram generation.", - "properties": { - "enabled": { - "default": true, - "description": "Whether to generate meteograms (time series plots at stations).", - "title": "Enabled", - "type": "boolean" - }, - "stations": { - "default": ["GVE", "KLO", "LUG"], - "description": "List of PeakWeather station IDs to generate meteograms for.", - "items": {"type": "string"}, - "title": "Stations", - "type": "array" - } - }, - "title": "MeteogramConfig", - "type": "object" - }, - "AnimationsConfig": { - "description": "Configuration for animation generation.", - "properties": { - "enabled": { - "default": true, - "description": "Whether to generate forecast animations (GIFs per param and region).", - "title": "Enabled", - "type": "boolean" - }, - "domains": { - "default": ["globe", "europe", "switzerland"], - "description": "Domains to generate animations for. Each entry is either a named domain (e.g. 'globe', 'europe', 'switzerland') defined in plotting.DOMAINS, or a custom domain dict with 'name', optional 'extent' [lon_min, lon_max, lat_min, lat_max], and optional 'projection'.", - "items": { - "anyOf": [ - {"type": "string"}, - {"$ref": "#/$defs/RegionConfig"} - ] - }, - "title": "Domains", - "type": "array" - } - }, - "title": "AnimationsConfig", - "type": "object" - }, "ShowcaseConfig": { "description": "Configuration for the showcase workflow.", "properties": { "params": { - "default": ["T_2M", "SP_10M"], + "default": [ + "T_2M", + "SP_10M" + ], "description": "List of parameters to generate animations and meteograms for.", - "items": {"type": "string"}, + "items": { + "type": "string" + }, "title": "Params", "type": "array" }, "meteograms": { - "$ref": "#/$defs/MeteogramConfig", - "default": {}, - "description": "Configuration for meteogram generation." + "$ref": "#/$defs/MeteogramConfig" }, "animations": { - "$ref": "#/$defs/AnimationsConfig", - "default": {}, - "description": "Configuration for animation generation." + "$ref": "#/$defs/AnimationsConfig" } }, "title": "ShowcaseConfig", From bc2716886f82473ba47b482df2b38a660cc0e5c9 Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Mon, 18 May 2026 14:08:13 +0200 Subject: [PATCH 27/29] revert config --- config/forecasters-ich1.yaml | 46 +++++++++++++++--------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/config/forecasters-ich1.yaml b/config/forecasters-ich1.yaml index c87f626b..3f8ee7db 100644 --- a/config/forecasters-ich1.yaml +++ b/config/forecasters-ich1.yaml @@ -3,20 +3,30 @@ description: | Evaluate skill of ICON-CH1 single. dates: -# start: 2024-01-01T00:00 -# end: 2024-01-02T00:00 -# frequency: 6h - - 2025-03-27T06:00 - + start: 2024-01-01T00:00 + end: 2024-01-02T00:00 + frequency: 6h runs: - forecaster: - checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/c30490b6ba064e4db03b430f3a2595ad - label: stage_E + checkpoint: https://servicedepl.meteoswiss.ch/mlstore#/experiments/409/runs/b30acf68520a4bbd8324c44666561696 + label: stage_C_icon_1km + steps: 0/120/6 + config: resources/inference/configs/sgm-forecaster-global-ich1.yaml + extra_requirements: + - earthkit-utils<0.2.0 + - earthkit-data<0.19.0 + - git+https://github.com/ecmwf/anemoi-inference.git@main + + - forecaster: + checkpoint: /scratch/mch/apennino/output/checkpoint/973b78b8b39543949abc2b154c5f98d9/inference-last.ckpt + label: stage_C_icon_1km_hidden_11 steps: 0/120/6 - config: resources/inference/configs/sgm-multidataset-forecaster-global-ich1-oper.yaml + config: resources/inference/configs/sgm-forecaster-global-ich1.yaml extra_requirements: - - git+https://github.com/ecmwf/anemoi-inference.git@e369b1a90313e9701db13f63364a467aa281cf36 + - earthkit-utils<0.2.0 + - earthkit-data<0.19.0 + - git+https://github.com/ecmwf/anemoi-inference.git@main # - forecaster: # checkpoint: https://huggingface.co/met-no/bris_cloudy-skies/blob/main/2025-10/cloudy-skies-2025-10.ckpt @@ -48,24 +58,6 @@ stratification: - alpensuedseite root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 -showcase: - params: - - T_2M - - SP_10M - - TOT_PREC - meteograms: - enabled: false - stations: [JUN] #, COV, GOR, WFJ, SAE, SAM, DAV, ZER, ANT, VSBAS, BRT, LTB, GOS, CEV, BIA] - animations: - enabled: true - domains: - - europe - - switzerland - - globe -# - name: alpine_arc -# extent: [3.0, 17.0, 43.5, 48.5] -# projection: orthographic - speed: 10 # simulated hours per second thresholds: TOT_PREC: gt: [0.0, 0.001, 0.005] From c283580ff8c5dcdb31dc743c0312a23a4ea616f3 Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Mon, 18 May 2026 14:16:41 +0200 Subject: [PATCH 28/29] add an include to better organize the yamls --- config/interpolators-ich1.yaml | 24 ---------------------- config/showcase-interpolators-ich1.yaml | 27 +++++++++++++++++++++++++ src/evalml/cli.py | 17 +++++++++++++++- 3 files changed, 43 insertions(+), 25 deletions(-) create mode 100644 config/showcase-interpolators-ich1.yaml diff --git a/config/interpolators-ich1.yaml b/config/interpolators-ich1.yaml index 6d3651a3..c581ea91 100644 --- a/config/interpolators-ich1.yaml +++ b/config/interpolators-ich1.yaml @@ -56,30 +56,6 @@ stratification: - alpensuedseite root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517 -showcase: - params: - - T_2M - - SP_10M - - TOT_PREC - meteograms: - enabled: false - stations: [JUN] #, COV, GOR, WFJ, SAE, SAM, DAV, ZER, ANT, VSBAS, BRT, LTB, GOS, CEV, BIA] - animations: - enabled: true - domains: - - europe - - switzerland - - globe -# - name: alpine_arc -# extent: [3.0, 17.0, 43.5, 48.5] -# projection: orthographic - speed: 10 # simulated hours per second - runs: - - Varda-Single - comparisons: - - left: Varda-Single # nested forecaster (non-candidate) - right: KENDA-CH1 # interpolator (candidate) - thresholds: TOT_PREC: gt: [0.0, 0.001, 0.005] diff --git a/config/showcase-interpolators-ich1.yaml b/config/showcase-interpolators-ich1.yaml new file mode 100644 index 00000000..def7f896 --- /dev/null +++ b/config/showcase-interpolators-ich1.yaml @@ -0,0 +1,27 @@ +# Showcase config for interpolators-ich1. +# Includes the base config and overrides only the showcase section. +# +# .venv/bin/evalml showcase config/showcase-interpolators-ich1.yaml + +include: interpolators-ich1.yaml + +showcase: + params: + - T_2M + - SP_10M + - TOT_PREC + meteograms: + enabled: false + stations: [JUN] + animations: + enabled: true + domains: + - europe + - switzerland + - globe + speed: 10 # simulated hours per second + runs: + - Varda-Single + comparisons: + - left: Varda-Single + right: KENDA-CH1 diff --git a/src/evalml/cli.py b/src/evalml/cli.py index 51a9ed45..51f20096 100644 --- a/src/evalml/cli.py +++ b/src/evalml/cli.py @@ -69,9 +69,24 @@ def generate_graph( click.echo(f"Graph saved to {output_file}") +def _deep_merge(base: dict, override: dict) -> dict: + """Recursively merge override into base. Override wins on conflicts.""" + result = dict(base) + for key, value in override.items(): + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = _deep_merge(result[key], value) + else: + result[key] = value + return result + + def load_yaml(path: Path) -> dict[str, Any]: with path.open("r") as f: - return yaml.safe_load(f) + data = yaml.safe_load(f) + if include := data.pop("include", None): + base = load_yaml(path.parent / include) + data = _deep_merge(base, data) + return data def workflow_options(func): From a291c59a61c021ff979810ddc5925c5aaf7d5002 Mon Sep 17 00:00:00 2001 From: Carlos Osuna Date: Wed, 20 May 2026 09:45:14 +0200 Subject: [PATCH 29/29] add tests --- src/data_input/__init__.py | 4 ++- tests/unit/test_config.py | 38 +++++++++++++++++++++++++++++ workflow/scripts/plot_zarr_frame.py | 23 +++++++++++++---- 3 files changed, 59 insertions(+), 6 deletions(-) diff --git a/src/data_input/__init__.py b/src/data_input/__init__.py index e184e686..277d2a36 100644 --- a/src/data_input/__init__.py +++ b/src/data_input/__init__.py @@ -62,7 +62,9 @@ def load_analysis_data_from_zarr( "TOT_PREC": "tp", } _diffs = np.diff(steps) - tot_prec_string = "TOT_PREC_6H" if len(_diffs) > 0 and min(_diffs) == 6 else "TOT_PREC_1H" + tot_prec_string = ( + "TOT_PREC_6H" if len(_diffs) > 0 and min(_diffs) == 6 else "TOT_PREC_1H" + ) PARAMS_MAP_COSMO1 = { v: v.replace("TOT_PREC", tot_prec_string) for v in PARAMS_MAP_COSMO2.keys() } diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 40281a6e..74d6ba5e 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -2,9 +2,47 @@ import pytest +from evalml.cli import _deep_merge, load_yaml from evalml.config import ConfigModel +def test_deep_merge_override_wins(): + base = {"a": 1, "b": {"x": 1, "y": 2}} + override = {"b": {"y": 99}, "c": 3} + result = _deep_merge(base, override) + assert result == {"a": 1, "b": {"x": 1, "y": 99}, "c": 3} + + +def test_deep_merge_non_dict_override_replaces(): + base = {"a": {"x": 1}} + override = {"a": [1, 2, 3]} + result = _deep_merge(base, override) + assert result["a"] == [1, 2, 3] + + +def test_load_yaml_without_include(tmp_path): + f = tmp_path / "config.yaml" + f.write_text("a: 1\n") + assert load_yaml(f) == {"a": 1} + + +def test_load_yaml_include_merges_base(tmp_path): + base = tmp_path / "base.yaml" + base.write_text("a: 1\nb:\n x: 1\n y: 2\n") + + child = tmp_path / "child.yaml" + child.write_text("include: base.yaml\nb:\n y: 99\nc: 3\n") + + result = load_yaml(child) + assert result == {"a": 1, "b": {"x": 1, "y": 99}, "c": 3} + + +def test_load_yaml_include_validates_as_config_model(): + path = Path("config/showcase-interpolators-ich1.yaml") + data = load_yaml(path) + _ = ConfigModel.model_validate(data) + + def test_example_forecasters_config(example_forecasters_config): """Test that the example config loads correctly.""" diff --git a/workflow/scripts/plot_zarr_frame.py b/workflow/scripts/plot_zarr_frame.py index df974b39..ce0c5f75 100644 --- a/workflow/scripts/plot_zarr_frame.py +++ b/workflow/scripts/plot_zarr_frame.py @@ -118,9 +118,13 @@ def main(): choices=["analysis", "baseline"], help="Zarr source type", ) - parser.add_argument("--date", type=str, required=True, help="Reference datetime (YYYYmmddHHMM)") + parser.add_argument( + "--date", type=str, required=True, help="Reference datetime (YYYYmmddHHMM)" + ) parser.add_argument("--outfn", type=str, required=True, help="Output filename") - parser.add_argument("--leadtime", type=str, required=True, help="Lead time (hours, zero-padded)") + parser.add_argument( + "--leadtime", type=str, required=True, help="Lead time (hours, zero-padded)" + ) parser.add_argument("--param", type=str, required=True, help="Parameter name") parser.add_argument("--region", type=str, required=True, help="Region name") parser.add_argument( @@ -131,7 +135,9 @@ def main(): metavar=("LON_MIN", "LON_MAX", "LAT_MIN", "LAT_MAX"), ) parser.add_argument("--projection", type=str, default=None) - parser.add_argument("--accu", type=int, default=1, help="Accumulation period in hours") + parser.add_argument( + "--accu", type=int, default=1, help="Accumulation period in hours" + ) args = parser.parse_args() reftime = datetime.strptime(args.date, "%Y%m%d%H%M") @@ -164,12 +170,19 @@ def main(): extent = DOMAINS[args.region]["extent"] fig = plotter.init_geoaxes( - nrows=1, ncols=1, projection=projection, bbox=extent, name=args.region, size=(6, 6) + nrows=1, + ncols=1, + projection=projection, + bbox=extent, + name=args.region, + size=(6, 6), ) subplot = fig.add_map(row=0, column=0) field, units_override = preprocess_field(param, state) - plotter.plot_field(subplot, field, **get_style(param, units_override, accu=args.accu)) + plotter.plot_field( + subplot, field, **get_style(param, units_override, accu=args.accu) + ) subplot.ax.add_geometries( state["lam_envelope"], edgecolor="black",