From c7d1d2360345a4a87ab59e8111848c794c71a3eb Mon Sep 17 00:00:00 2001 From: erikposchivo Date: Mon, 20 Apr 2026 13:35:27 +0200 Subject: [PATCH 1/2] modified volume_matching method by Andrea Giacobbi --- gpm/gv/routines.py | 189 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 183 insertions(+), 6 deletions(-) diff --git a/gpm/gv/routines.py b/gpm/gv/routines.py index e3e43363..6090fbb4 100644 --- a/gpm/gv/routines.py +++ b/gpm/gv/routines.py @@ -637,7 +637,7 @@ def retrieve_ds_sr(start_time, end_time, extent_gr, download_sr=False): version=version, start_time=start_time, end_time=end_time, - chunks={}, # only read the data needed around GR instead of full granule + chunks=-1, # only read the data needed around GR instead of full granule ) # - 1B product @@ -648,7 +648,7 @@ def retrieve_ds_sr(start_time, end_time, extent_gr, download_sr=False): version=version, start_time=start_time, end_time=end_time, - chunks={}, # only read the data needed around GR instead of full granule + chunks=-1, # only read the data needed around GR instead of full granule ) # Crop SR datasets to region of interest @@ -676,6 +676,7 @@ def volume_matching( radar_band, ds_sr=None, z_variable_gr="DBZH", + zdr_variable_gr="ZDR", # ADDED BY ME beamwidth_gr: float = 1.0, z_min_threshold_gr=0, z_min_threshold_sr=10, @@ -857,7 +858,7 @@ def volume_matching( x_sr, y_sr, z_sr = retrieve_gates_projection_coordinates(ds_sr, dst_crs=crs_gr) #### - Retrieve SR (range, azimuth, elevation) coordinates - range_sr, azimuth_sr, elevation_sr = xyz_to_antenna_coordinates( # noqa: RUF059 + range_sr, azimuth_sr, elevation_sr = xyz_to_antenna_coordinates( x=x_sr, y=y_sr, z=z_sr, @@ -892,7 +893,7 @@ def volume_matching( ####-----------------------------------------------------------------------------. #### Retrieve custom SR variables #### - Retrieve Bright Band (BB) Ratio - da_bb_ratio, da_bb_mask = ds_sr.gpm.retrieve("bright_band_ratio", return_bb_mask=True) # noqa: RUF059 + da_bb_ratio, da_bb_mask = ds_sr.gpm.retrieve("bright_band_ratio", return_bb_mask=True) #### - Retrieve Precipitation and Hydrometeors Types ds_sr["flagPrecipitationType"] = ds_sr.gpm.retrieve("flagPrecipitationType", method="major_rain_type") @@ -1042,7 +1043,7 @@ def volume_matching( "y", "z", ] - + # Initialize Dataset where to add aggregated SR gates ds_sr_match_ppi = xr.Dataset() @@ -1221,9 +1222,92 @@ def volume_matching( ####-----------------------------------------------------------------------------. #### Aggregate GR data on SR footprints + # CHANGED BLOCK FROM HERE================== # Define PolyAggregator aggregator = PolyAggregator(source_polygons=gr_poly, target_polygons=sr_poly, parallel=False) + # Helper function to calculate statistics for any given variable + def _calculate_stats(var_name, is_reflectivity=False): + stats = {} + if var_name in gdf_gr.columns: + if is_reflectivity: + stats['mean'] = wrl.trafo.decibel(aggregator.average(values=wrl.trafo.idecibel(gdf_gr[var_name]))) + else: + stats['mean'] = aggregator.average(values=gdf_gr[var_name]) + stats['std'] = aggregator.std(values=gdf_gr[var_name]) + stats['max'] = aggregator.max(values=gdf_gr[var_name]) + stats['min'] = aggregator.min(values=gdf_gr[var_name]) + stats['range'] = stats['max'] - stats['min'] + stats['cov'] = stats['std'] / stats['mean'] + else: + nan_series = pd.Series(np.nan, index=gdf_sr.index) + stats = {k: nan_series for k in ['mean', 'std', 'max', 'min', 'range', 'cov']} + return stats + + # List of all variables to aggregate + variables_to_aggregate = { + "Z": {"name": z_variable_gr, "is_reflectivity": True}, + "ZDR": {"name": zdr_variable_gr, "is_reflectivity": True}, + "AH": {"name": "AH", "is_reflectivity": False}, + "PIA": {"name": "PIA", "is_reflectivity": False}, + "DBZH_COR_AH": {"name": "DBZH_COR_AH", "is_reflectivity": True}, + "ADP": {"name": "ADP", "is_reflectivity": False}, + "PIDA": {"name": "PIDA", "is_reflectivity": False}, + "KDP_COR_ADP": {"name": "KDP_COR_ADP", "is_reflectivity": True}, + "RHOHV": {"name": "RHOHV", "is_reflectivity": False}, + "KDP": {"name": "KDP", "is_reflectivity": False}, + "WRADH": {"name": "WRADH", "is_reflectivity": False}, + "VRADH": {"name": "VRADH", "is_reflectivity": False}, + "ZHCLUT": {"name": "ZHCLUT", "is_reflectivity": True}, + "ZVV": {"name": "ZVV", "is_reflectivity": True}, + "SNR": {"name": "SNR", "is_reflectivity": False}, + } + + # Dictionary to hold all the final data for the DataFrame + df_data = {} + + # Loop through the list, calculate stats, and add to the dictionary + for prefix, var_info in variables_to_aggregate.items(): + stats = _calculate_stats(var_info["name"], is_reflectivity=var_info["is_reflectivity"]) + for stat_name, stat_series in stats.items(): + df_data[f"GR_{prefix}_{stat_name}"] = stat_series + + # Add other general statistics + df_data["GR_time"] = aggregator.first(values=gdf_gr["time"]) + df_data["GR_gate_volume_sum"] = aggregator.sum(values=gdf_gr["gate_volume"]) + df_data["GR_fraction_covered_area"] = aggregator.fraction_covered_area() + df_data["GR_counts"] = aggregator.counts() + counts = aggregator.counts() + df_data["GR_counts_valid"] = aggregator.apply(lambda x, weights: np.sum(~np.isnan(x)), values=gdf_gr[z_variable_gr]) + + # Calculate fraction above sensitivity thresholds + counts_series = df_data["GR_counts"] + for thr in sr_sensitivity_thresholds: + counts_above_thr = aggregator.apply(lambda x, weights: np.sum(x > thr), values=gdf_gr[z_variable_gr]) + + # --- START OF CORRECTION --- + # Perform safe division with NumPy to avoid division-by-zero warnings + fraction = np.zeros_like(counts_above_thr, dtype=float) + # Only divide where the total count is not zero; otherwise, the fraction remains 0 + np.divide(counts_above_thr, counts_series, out=fraction, where=counts_series != 0) + # --- END OF CORRECTION --- + + df_data[f"GR_Z_fraction_above_{thr}dBZ"] = fraction + + # Add scalar GR attributes + #if "sweep_number" in ds_gr: + # df_data["GR_sweep_number"] = ds_gr["sweep_number"].item() + if "sweep_fixed_angle" in ds_gr: + df_data["GR_elevation_angle"] = ds_gr["sweep_fixed_angle"].item() + + # Create the final DataFrame + df = pd.DataFrame(df_data, index=gdf_sr.index) + gdf_gr_match = gpd.GeoDataFrame(df, crs=crs_gr, geometry=aggregator.target_polygons) + # CHANGED BLOCK UNTIL HERE================== + + '''# Define PolyAggregator + aggregator = PolyAggregator(source_polygons=gr_poly, target_polygons=sr_poly, parallel=False) + # Aggregate GR reflecitvities and compute statistics # - Timestep of acquisition # - NaT where no intersection with SR footprints ! @@ -1249,6 +1333,61 @@ def volume_matching( z_range = z_max - z_min z_cov = z_std / z_mean # coefficient of variation + # --- ADDED BY ME: Calculate statistics for ZDR --- + if zdr_variable_gr in gdf_gr.columns: + # Calculate ZDR mean in linear space + zdr_mean = wrl.trafo.decibel(aggregator.average(values=wrl.trafo.idecibel(gdf_gr[zdr_variable_gr]))) + # Calculate other ZDR stats directly in decibels + zdr_std = aggregator.std(values=gdf_gr[zdr_variable_gr]) + zdr_max = aggregator.max(values=gdf_gr[zdr_variable_gr]) + zdr_min = aggregator.min(values=gdf_gr[zdr_variable_gr]) + zdr_range = zdr_max - zdr_min + zdr_cov = zdr_std / zdr_mean + else: + # If ZDR column doesn't exist, create NaNs for all stats + nan_series = pd.Series(np.nan, index=gdf_sr.index) + zdr_mean, zdr_std, zdr_max, zdr_min, zdr_range, zdr_cov = (nan_series,) * 6 + # --- END MODIFICATION --- + + # --- ADDED BY ME: Calculate statistics for PIA_H (Path Integrated Attenuation) --- + if "PIA_H" in gdf_gr.columns: + pia_mean = aggregator.average(values=gdf_gr["PIA_H"]) + pia_std = aggregator.std(values=gdf_gr["PIA_H"]) + pia_max = aggregator.max(values=gdf_gr["PIA_H"]) + pia_min = aggregator.min(values=gdf_gr["PIA_H"]) + pia_range = pia_max - pia_min + pia_cov = pia_std / pia_mean + else: + nan_series = pd.Series(np.nan, index=gdf_sr.index) + pia_mean, pia_std, pia_max, pia_min, pia_range, pia_cov = (nan_series,) * 6 + + # --- ADDED BY ME: Calculate statistics for AH (Specific Attenuation) --- + if "AH" in gdf_gr.columns: + att_mean = aggregator.average(values=gdf_gr["AH"]) + #att_std = aggregator.std(values=gdf_gr["AH"]) + #att_max = aggregator.max(values=gdf_gr["AH"]) + #att_min = aggregator.min(values=gdf_gr["AH"]) + #att_range = att_max - att_min + #att_cov = att_std / att_mean + else: + nan_series = pd.Series(np.nan, index=gdf_sr.index) + att_mean, att_std, att_max, att_min, att_range, att_cov = (nan_series,) * 6 + att_mean = nan_series + # --- END MODIFICATION --- + + # --- ADDED BY ME: Calculate statistics for DBZH_COR_AH (Corrected Reflectivity) --- + if "DBZH_COR_AH" in gdf_gr.columns: + dbzh_cor_ah_mean = wrl.trafo.decibel(aggregator.average(values=wrl.trafo.idecibel(gdf_gr["DBZH_COR_AH"]))) + dbzh_cor_ah_std = aggregator.std(values=gdf_gr["DBZH_COR_AH"]) + dbzh_cor_ah_max = aggregator.max(values=gdf_gr["DBZH_COR_AH"]) + dbzh_cor_ah_min = aggregator.min(values=gdf_gr["DBZH_COR_AH"]) + dbzh_cor_ah_range = dbzh_cor_ah_max - dbzh_cor_ah_min + dbzh_cor_ah_cov = dbzh_cor_ah_std / dbzh_cor_ah_mean + else: + nan_series = pd.Series(np.nan, index=gdf_sr.index) + dbzh_cor_ah_mean, dbzh_cor_ah_std, dbzh_cor_ah_max, dbzh_cor_ah_min, dbzh_cor_ah_range, dbzh_cor_ah_cov = (nan_series,) * 6 +# --- END MODIFICATION --- + # Create DataFrame with GR matched statistics df = pd.DataFrame( { @@ -1258,6 +1397,41 @@ def volume_matching( "GR_Z_min": z_min, "GR_Z_range": z_range, "GR_Z_cov": z_cov, + + # --- ADDED BY ME --- + # ZDR Statistics + "GR_ZDR_mean": zdr_mean, + "GR_ZDR_std": zdr_std, + "GR_ZDR_max": zdr_max, + "GR_ZDR_min": zdr_min, + "GR_ZDR_range": zdr_range, + "GR_ZDR_cov": zdr_cov, + + # PIAH Statistics + "GR_PIAH_mean": pia_mean, + "GR_PIAH_std": pia_std, + "GR_PIAH_max": pia_max, + "GR_PIAH_min": pia_min, + "GR_PIAH_range": pia_range, + "GR_PIAH_cov": pia_cov, + + # AH Statistics + "GR_AH_mean": att_mean, + #"GR_AH_std": att_std, + #"GR_AH_max": att_max, + #"GR_AH_min": att_min, + #"GR_AH_range": att_range, + #"GR_AH_cov": att_cov, + + # DBZH_COR_AH Statistics + "GR_DBZH_COR_AH_mean": dbzh_cor_ah_mean, + "GR_DBZH_COR_AH_std": dbzh_cor_ah_std, + "GR_DBZH_COR_AH_max": dbzh_cor_ah_max, + "GR_DBZH_COR_AH_min": dbzh_cor_ah_min, + "GR_DBZH_COR_AH_range": dbzh_cor_ah_range, + "GR_DBZH_COR_AH_cov": dbzh_cor_ah_cov, + # --- ADDED BY ME --- + "GR_time": time_gr, "GR_gate_volume_sum": sum_vol, "GR_fraction_covered_area": fraction_covered_area, @@ -1267,7 +1441,7 @@ def volume_matching( index=gdf_sr.index, ) gdf_gr_match = gpd.GeoDataFrame(df, crs=crs_gr, geometry=aggregator.target_polygons) - gdf_gr_match.head() + gdf_gr_match.head()''' # Add GR range statistics gdf_gr_match["GR_range_max"] = aggregator.max(values=gdf_gr["range"]) @@ -1278,6 +1452,9 @@ def volume_matching( gdf_gr_match["GR_azimuth"] = aggregator.first(values=gdf_gr["azimuth"]) gdf_gr_match["GR_elevation"] = aggregator.first(values=gdf_gr["elevation"]) + # if "sweep_number" in ds_gr: + # gdf_gr_match["sweep_number"] = ds_gr["sweep_number"].item() + # Fraction above sensitivity thresholds for thr in sr_sensitivity_thresholds: fraction = aggregator.apply(lambda x, weights: np.sum(x > thr), values=gdf_gr[z_variable_gr]) / counts # noqa From 2ff1f0ebb5c10d3343be16b67a531a0e2962c384 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 20 Apr 2026 11:40:10 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- gpm/gv/routines.py | 56 +++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/gpm/gv/routines.py b/gpm/gv/routines.py index 6090fbb4..04876d16 100644 --- a/gpm/gv/routines.py +++ b/gpm/gv/routines.py @@ -676,7 +676,7 @@ def volume_matching( radar_band, ds_sr=None, z_variable_gr="DBZH", - zdr_variable_gr="ZDR", # ADDED BY ME + zdr_variable_gr="ZDR", # ADDED BY ME beamwidth_gr: float = 1.0, z_min_threshold_gr=0, z_min_threshold_sr=10, @@ -1043,7 +1043,7 @@ def volume_matching( "y", "z", ] - + # Initialize Dataset where to add aggregated SR gates ds_sr_match_ppi = xr.Dataset() @@ -1231,34 +1231,34 @@ def _calculate_stats(var_name, is_reflectivity=False): stats = {} if var_name in gdf_gr.columns: if is_reflectivity: - stats['mean'] = wrl.trafo.decibel(aggregator.average(values=wrl.trafo.idecibel(gdf_gr[var_name]))) + stats["mean"] = wrl.trafo.decibel(aggregator.average(values=wrl.trafo.idecibel(gdf_gr[var_name]))) else: - stats['mean'] = aggregator.average(values=gdf_gr[var_name]) - stats['std'] = aggregator.std(values=gdf_gr[var_name]) - stats['max'] = aggregator.max(values=gdf_gr[var_name]) - stats['min'] = aggregator.min(values=gdf_gr[var_name]) - stats['range'] = stats['max'] - stats['min'] - stats['cov'] = stats['std'] / stats['mean'] + stats["mean"] = aggregator.average(values=gdf_gr[var_name]) + stats["std"] = aggregator.std(values=gdf_gr[var_name]) + stats["max"] = aggregator.max(values=gdf_gr[var_name]) + stats["min"] = aggregator.min(values=gdf_gr[var_name]) + stats["range"] = stats["max"] - stats["min"] + stats["cov"] = stats["std"] / stats["mean"] else: nan_series = pd.Series(np.nan, index=gdf_sr.index) - stats = {k: nan_series for k in ['mean', 'std', 'max', 'min', 'range', 'cov']} + stats = dict.fromkeys(["mean", "std", "max", "min", "range", "cov"], nan_series) return stats # List of all variables to aggregate variables_to_aggregate = { - "Z": {"name": z_variable_gr, "is_reflectivity": True}, + "Z": {"name": z_variable_gr, "is_reflectivity": True}, "ZDR": {"name": zdr_variable_gr, "is_reflectivity": True}, - "AH": {"name": "AH", "is_reflectivity": False}, + "AH": {"name": "AH", "is_reflectivity": False}, "PIA": {"name": "PIA", "is_reflectivity": False}, - "DBZH_COR_AH": {"name": "DBZH_COR_AH", "is_reflectivity": True}, + "DBZH_COR_AH": {"name": "DBZH_COR_AH", "is_reflectivity": True}, "ADP": {"name": "ADP", "is_reflectivity": False}, - "PIDA": {"name": "PIDA", "is_reflectivity": False}, + "PIDA": {"name": "PIDA", "is_reflectivity": False}, "KDP_COR_ADP": {"name": "KDP_COR_ADP", "is_reflectivity": True}, - "RHOHV": {"name": "RHOHV", "is_reflectivity": False}, + "RHOHV": {"name": "RHOHV", "is_reflectivity": False}, "KDP": {"name": "KDP", "is_reflectivity": False}, - "WRADH": {"name": "WRADH", "is_reflectivity": False}, + "WRADH": {"name": "WRADH", "is_reflectivity": False}, "VRADH": {"name": "VRADH", "is_reflectivity": False}, - "ZHCLUT": {"name": "ZHCLUT", "is_reflectivity": True}, + "ZHCLUT": {"name": "ZHCLUT", "is_reflectivity": True}, "ZVV": {"name": "ZVV", "is_reflectivity": True}, "SNR": {"name": "SNR", "is_reflectivity": False}, } @@ -1271,7 +1271,7 @@ def _calculate_stats(var_name, is_reflectivity=False): stats = _calculate_stats(var_info["name"], is_reflectivity=var_info["is_reflectivity"]) for stat_name, stat_series in stats.items(): df_data[f"GR_{prefix}_{stat_name}"] = stat_series - + # Add other general statistics df_data["GR_time"] = aggregator.first(values=gdf_gr["time"]) df_data["GR_gate_volume_sum"] = aggregator.sum(values=gdf_gr["gate_volume"]) @@ -1284,7 +1284,7 @@ def _calculate_stats(var_name, is_reflectivity=False): counts_series = df_data["GR_counts"] for thr in sr_sensitivity_thresholds: counts_above_thr = aggregator.apply(lambda x, weights: np.sum(x > thr), values=gdf_gr[z_variable_gr]) - + # --- START OF CORRECTION --- # Perform safe division with NumPy to avoid division-by-zero warnings fraction = np.zeros_like(counts_above_thr, dtype=float) @@ -1295,7 +1295,7 @@ def _calculate_stats(var_name, is_reflectivity=False): df_data[f"GR_Z_fraction_above_{thr}dBZ"] = fraction # Add scalar GR attributes - #if "sweep_number" in ds_gr: + # if "sweep_number" in ds_gr: # df_data["GR_sweep_number"] = ds_gr["sweep_number"].item() if "sweep_fixed_angle" in ds_gr: df_data["GR_elevation_angle"] = ds_gr["sweep_fixed_angle"].item() @@ -1305,7 +1305,7 @@ def _calculate_stats(var_name, is_reflectivity=False): gdf_gr_match = gpd.GeoDataFrame(df, crs=crs_gr, geometry=aggregator.target_polygons) # CHANGED BLOCK UNTIL HERE================== - '''# Define PolyAggregator + """# Define PolyAggregator aggregator = PolyAggregator(source_polygons=gr_poly, target_polygons=sr_poly, parallel=False) # Aggregate GR reflecitvities and compute statistics @@ -1397,7 +1397,7 @@ def _calculate_stats(var_name, is_reflectivity=False): "GR_Z_min": z_min, "GR_Z_range": z_range, "GR_Z_cov": z_cov, - + # --- ADDED BY ME --- # ZDR Statistics "GR_ZDR_mean": zdr_mean, @@ -1408,11 +1408,11 @@ def _calculate_stats(var_name, is_reflectivity=False): "GR_ZDR_cov": zdr_cov, # PIAH Statistics - "GR_PIAH_mean": pia_mean, - "GR_PIAH_std": pia_std, - "GR_PIAH_max": pia_max, - "GR_PIAH_min": pia_min, - "GR_PIAH_range": pia_range, + "GR_PIAH_mean": pia_mean, + "GR_PIAH_std": pia_std, + "GR_PIAH_max": pia_max, + "GR_PIAH_min": pia_min, + "GR_PIAH_range": pia_range, "GR_PIAH_cov": pia_cov, # AH Statistics @@ -1441,7 +1441,7 @@ def _calculate_stats(var_name, is_reflectivity=False): index=gdf_sr.index, ) gdf_gr_match = gpd.GeoDataFrame(df, crs=crs_gr, geometry=aggregator.target_polygons) - gdf_gr_match.head()''' + gdf_gr_match.head()""" # Add GR range statistics gdf_gr_match["GR_range_max"] = aggregator.max(values=gdf_gr["range"])