diff --git a/src/BioSimSpace/FreeEnergy/_relative.py b/src/BioSimSpace/FreeEnergy/_relative.py index 98801998..19bc9e08 100644 --- a/src/BioSimSpace/FreeEnergy/_relative.py +++ b/src/BioSimSpace/FreeEnergy/_relative.py @@ -1059,12 +1059,16 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"): raise ValueError("Parquet metadata does not contain 'lambda'.") if not is_mbar: try: - lambda_grad = metadata["lambda_grad"] + # Normalise to floats to match the DataFrame column type expected + # by alchemlyb (handles both old float and new string metadata). + lambda_grad = [float(v) for v in metadata["lambda_grad"]] except: raise ValueError("Parquet metadata does not contain 'lambda grad'") else: try: - lambda_grad = metadata["lambda_grad"] + # Normalise to floats to match the DataFrame column type expected + # by alchemlyb (handles both old float and new string metadata). + lambda_grad = [float(v) for v in metadata["lambda_grad"]] except: lambda_grad = [] @@ -1078,6 +1082,19 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"): # Convert to a pandas dataframe. df = table.to_pandas() + # Normalise column names to floats so that comparisons are consistent + # regardless of whether the parquet was written with float keys (old + # sire) or formatted string keys (new sire). float("0.10000") and + # float("0.1") give the same IEEE754 value, so old and new files are + # handled identically and the alchemlyb index check passes. + df.columns = [ + float(c) + if isinstance(c, str) + and c.replace(".", "", 1).replace("-", "", 1).isdigit() + else c + for c in df.columns + ] + if is_mbar: # Extract all columns other than those used for the gradient. df = df[[x for x in df.columns if x not in lambda_grad]]