Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 182 additions & 5 deletions gpm/gv/routines.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@
version=version,
start_time=start_time,
end_time=end_time,
chunks={}, # only read the data needed around GR instead of full granule
chunks=-1, # only read the data needed around GR instead of full granule
)

# - 1B product
Expand All @@ -648,7 +648,7 @@
version=version,
start_time=start_time,
end_time=end_time,
chunks={}, # only read the data needed around GR instead of full granule
chunks=-1, # only read the data needed around GR instead of full granule
)

# Crop SR datasets to region of interest
Expand Down Expand Up @@ -676,6 +676,7 @@
radar_band,
ds_sr=None,
z_variable_gr="DBZH",
zdr_variable_gr="ZDR", # ADDED BY ME
beamwidth_gr: float = 1.0,
z_min_threshold_gr=0,
z_min_threshold_sr=10,
Expand Down Expand Up @@ -857,7 +858,7 @@
x_sr, y_sr, z_sr = retrieve_gates_projection_coordinates(ds_sr, dst_crs=crs_gr)

#### - Retrieve SR (range, azimuth, elevation) coordinates
range_sr, azimuth_sr, elevation_sr = xyz_to_antenna_coordinates( # noqa: RUF059
range_sr, azimuth_sr, elevation_sr = xyz_to_antenna_coordinates(
x=x_sr,
y=y_sr,
z=z_sr,
Expand Down Expand Up @@ -892,7 +893,7 @@
####-----------------------------------------------------------------------------.
#### Retrieve custom SR variables
#### - Retrieve Bright Band (BB) Ratio
da_bb_ratio, da_bb_mask = ds_sr.gpm.retrieve("bright_band_ratio", return_bb_mask=True) # noqa: RUF059
da_bb_ratio, da_bb_mask = ds_sr.gpm.retrieve("bright_band_ratio", return_bb_mask=True)

#### - Retrieve Precipitation and Hydrometeors Types
ds_sr["flagPrecipitationType"] = ds_sr.gpm.retrieve("flagPrecipitationType", method="major_rain_type")
Expand Down Expand Up @@ -1221,9 +1222,92 @@

####-----------------------------------------------------------------------------.
#### Aggregate GR data on SR footprints
# CHANGED BLOCK FROM HERE==================
# Define PolyAggregator
aggregator = PolyAggregator(source_polygons=gr_poly, target_polygons=sr_poly, parallel=False)

# Helper function to calculate statistics for any given variable
def _calculate_stats(var_name, is_reflectivity=False):
stats = {}
if var_name in gdf_gr.columns:
if is_reflectivity:
stats["mean"] = wrl.trafo.decibel(aggregator.average(values=wrl.trafo.idecibel(gdf_gr[var_name])))
else:
stats["mean"] = aggregator.average(values=gdf_gr[var_name])
stats["std"] = aggregator.std(values=gdf_gr[var_name])
stats["max"] = aggregator.max(values=gdf_gr[var_name])
stats["min"] = aggregator.min(values=gdf_gr[var_name])
stats["range"] = stats["max"] - stats["min"]
stats["cov"] = stats["std"] / stats["mean"]
else:
nan_series = pd.Series(np.nan, index=gdf_sr.index)
stats = dict.fromkeys(["mean", "std", "max", "min", "range", "cov"], nan_series)
return stats

Check warning on line 1245 in gpm/gv/routines.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Code Health Review (main)

❌ New issue: Bumpy Road Ahead

_calculate_stats has 6 blocks with nested conditional logic. Any nesting of 2 or deeper is considered. Threshold is 2 blocks per function. The Bumpy Road code smell is a function that contains multiple chunks of nested conditional logic. The deeper the nesting and the more bumps, the lower the code health.

# List of all variables to aggregate
variables_to_aggregate = {
"Z": {"name": z_variable_gr, "is_reflectivity": True},
"ZDR": {"name": zdr_variable_gr, "is_reflectivity": True},
"AH": {"name": "AH", "is_reflectivity": False},
"PIA": {"name": "PIA", "is_reflectivity": False},
"DBZH_COR_AH": {"name": "DBZH_COR_AH", "is_reflectivity": True},
"ADP": {"name": "ADP", "is_reflectivity": False},
"PIDA": {"name": "PIDA", "is_reflectivity": False},
"KDP_COR_ADP": {"name": "KDP_COR_ADP", "is_reflectivity": True},
"RHOHV": {"name": "RHOHV", "is_reflectivity": False},
"KDP": {"name": "KDP", "is_reflectivity": False},
"WRADH": {"name": "WRADH", "is_reflectivity": False},
"VRADH": {"name": "VRADH", "is_reflectivity": False},
"ZHCLUT": {"name": "ZHCLUT", "is_reflectivity": True},
"ZVV": {"name": "ZVV", "is_reflectivity": True},
"SNR": {"name": "SNR", "is_reflectivity": False},
}

# Dictionary to hold all the final data for the DataFrame
df_data = {}

# Loop through the list, calculate stats, and add to the dictionary
for prefix, var_info in variables_to_aggregate.items():
stats = _calculate_stats(var_info["name"], is_reflectivity=var_info["is_reflectivity"])
for stat_name, stat_series in stats.items():
df_data[f"GR_{prefix}_{stat_name}"] = stat_series

# Add other general statistics
df_data["GR_time"] = aggregator.first(values=gdf_gr["time"])
df_data["GR_gate_volume_sum"] = aggregator.sum(values=gdf_gr["gate_volume"])
df_data["GR_fraction_covered_area"] = aggregator.fraction_covered_area()
df_data["GR_counts"] = aggregator.counts()
counts = aggregator.counts()
df_data["GR_counts_valid"] = aggregator.apply(lambda x, weights: np.sum(~np.isnan(x)), values=gdf_gr[z_variable_gr])

# Calculate fraction above sensitivity thresholds
counts_series = df_data["GR_counts"]
for thr in sr_sensitivity_thresholds:
counts_above_thr = aggregator.apply(lambda x, weights: np.sum(x > thr), values=gdf_gr[z_variable_gr])

# --- START OF CORRECTION ---
# Perform safe division with NumPy to avoid division-by-zero warnings
fraction = np.zeros_like(counts_above_thr, dtype=float)
# Only divide where the total count is not zero; otherwise, the fraction remains 0
np.divide(counts_above_thr, counts_series, out=fraction, where=counts_series != 0)
# --- END OF CORRECTION ---

df_data[f"GR_Z_fraction_above_{thr}dBZ"] = fraction

# Add scalar GR attributes
# if "sweep_number" in ds_gr:
# df_data["GR_sweep_number"] = ds_gr["sweep_number"].item()
if "sweep_fixed_angle" in ds_gr:
df_data["GR_elevation_angle"] = ds_gr["sweep_fixed_angle"].item()

# Create the final DataFrame
df = pd.DataFrame(df_data, index=gdf_sr.index)
gdf_gr_match = gpd.GeoDataFrame(df, crs=crs_gr, geometry=aggregator.target_polygons)
# CHANGED BLOCK UNTIL HERE==================

"""# Define PolyAggregator
aggregator = PolyAggregator(source_polygons=gr_poly, target_polygons=sr_poly, parallel=False)

# Aggregate GR reflecitvities and compute statistics
# - Timestep of acquisition
# - NaT where no intersection with SR footprints !
Expand All @@ -1249,6 +1333,61 @@
z_range = z_max - z_min
z_cov = z_std / z_mean # coefficient of variation

# --- ADDED BY ME: Calculate statistics for ZDR ---
if zdr_variable_gr in gdf_gr.columns:
# Calculate ZDR mean in linear space
zdr_mean = wrl.trafo.decibel(aggregator.average(values=wrl.trafo.idecibel(gdf_gr[zdr_variable_gr])))
# Calculate other ZDR stats directly in decibels
zdr_std = aggregator.std(values=gdf_gr[zdr_variable_gr])
zdr_max = aggregator.max(values=gdf_gr[zdr_variable_gr])
zdr_min = aggregator.min(values=gdf_gr[zdr_variable_gr])
zdr_range = zdr_max - zdr_min
zdr_cov = zdr_std / zdr_mean
else:
# If ZDR column doesn't exist, create NaNs for all stats
nan_series = pd.Series(np.nan, index=gdf_sr.index)
zdr_mean, zdr_std, zdr_max, zdr_min, zdr_range, zdr_cov = (nan_series,) * 6
# --- END MODIFICATION ---

# --- ADDED BY ME: Calculate statistics for PIA_H (Path Integrated Attenuation) ---
if "PIA_H" in gdf_gr.columns:
pia_mean = aggregator.average(values=gdf_gr["PIA_H"])
pia_std = aggregator.std(values=gdf_gr["PIA_H"])
pia_max = aggregator.max(values=gdf_gr["PIA_H"])
pia_min = aggregator.min(values=gdf_gr["PIA_H"])
pia_range = pia_max - pia_min
pia_cov = pia_std / pia_mean
else:
nan_series = pd.Series(np.nan, index=gdf_sr.index)
pia_mean, pia_std, pia_max, pia_min, pia_range, pia_cov = (nan_series,) * 6

# --- ADDED BY ME: Calculate statistics for AH (Specific Attenuation) ---
if "AH" in gdf_gr.columns:
att_mean = aggregator.average(values=gdf_gr["AH"])
#att_std = aggregator.std(values=gdf_gr["AH"])
#att_max = aggregator.max(values=gdf_gr["AH"])
#att_min = aggregator.min(values=gdf_gr["AH"])
#att_range = att_max - att_min
#att_cov = att_std / att_mean
else:
nan_series = pd.Series(np.nan, index=gdf_sr.index)
att_mean, att_std, att_max, att_min, att_range, att_cov = (nan_series,) * 6
att_mean = nan_series
# --- END MODIFICATION ---

# --- ADDED BY ME: Calculate statistics for DBZH_COR_AH (Corrected Reflectivity) ---
if "DBZH_COR_AH" in gdf_gr.columns:
dbzh_cor_ah_mean = wrl.trafo.decibel(aggregator.average(values=wrl.trafo.idecibel(gdf_gr["DBZH_COR_AH"])))
dbzh_cor_ah_std = aggregator.std(values=gdf_gr["DBZH_COR_AH"])
dbzh_cor_ah_max = aggregator.max(values=gdf_gr["DBZH_COR_AH"])
dbzh_cor_ah_min = aggregator.min(values=gdf_gr["DBZH_COR_AH"])
dbzh_cor_ah_range = dbzh_cor_ah_max - dbzh_cor_ah_min
dbzh_cor_ah_cov = dbzh_cor_ah_std / dbzh_cor_ah_mean
else:
nan_series = pd.Series(np.nan, index=gdf_sr.index)
dbzh_cor_ah_mean, dbzh_cor_ah_std, dbzh_cor_ah_max, dbzh_cor_ah_min, dbzh_cor_ah_range, dbzh_cor_ah_cov = (nan_series,) * 6
# --- END MODIFICATION ---

# Create DataFrame with GR matched statistics
df = pd.DataFrame(
{
Expand All @@ -1258,6 +1397,41 @@
"GR_Z_min": z_min,
"GR_Z_range": z_range,
"GR_Z_cov": z_cov,

# --- ADDED BY ME ---
# ZDR Statistics
"GR_ZDR_mean": zdr_mean,
"GR_ZDR_std": zdr_std,
"GR_ZDR_max": zdr_max,
"GR_ZDR_min": zdr_min,
"GR_ZDR_range": zdr_range,
"GR_ZDR_cov": zdr_cov,

# PIAH Statistics
"GR_PIAH_mean": pia_mean,
"GR_PIAH_std": pia_std,
"GR_PIAH_max": pia_max,
"GR_PIAH_min": pia_min,
"GR_PIAH_range": pia_range,
"GR_PIAH_cov": pia_cov,

# AH Statistics
"GR_AH_mean": att_mean,
#"GR_AH_std": att_std,
#"GR_AH_max": att_max,
#"GR_AH_min": att_min,
#"GR_AH_range": att_range,
#"GR_AH_cov": att_cov,

# DBZH_COR_AH Statistics
"GR_DBZH_COR_AH_mean": dbzh_cor_ah_mean,
"GR_DBZH_COR_AH_std": dbzh_cor_ah_std,
"GR_DBZH_COR_AH_max": dbzh_cor_ah_max,
"GR_DBZH_COR_AH_min": dbzh_cor_ah_min,
"GR_DBZH_COR_AH_range": dbzh_cor_ah_range,
"GR_DBZH_COR_AH_cov": dbzh_cor_ah_cov,
# --- ADDED BY ME ---

"GR_time": time_gr,
"GR_gate_volume_sum": sum_vol,
"GR_fraction_covered_area": fraction_covered_area,
Expand All @@ -1267,7 +1441,7 @@
index=gdf_sr.index,
)
gdf_gr_match = gpd.GeoDataFrame(df, crs=crs_gr, geometry=aggregator.target_polygons)
gdf_gr_match.head()
gdf_gr_match.head()"""

# Add GR range statistics
gdf_gr_match["GR_range_max"] = aggregator.max(values=gdf_gr["range"])
Expand All @@ -1278,6 +1452,9 @@
gdf_gr_match["GR_azimuth"] = aggregator.first(values=gdf_gr["azimuth"])
gdf_gr_match["GR_elevation"] = aggregator.first(values=gdf_gr["elevation"])

# if "sweep_number" in ds_gr:
# gdf_gr_match["sweep_number"] = ds_gr["sweep_number"].item()

# Fraction above sensitivity thresholds
for thr in sr_sensitivity_thresholds:
fraction = aggregator.apply(lambda x, weights: np.sum(x > thr), values=gdf_gr[z_variable_gr]) / counts # noqa
Expand Down