diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ba24c8a24..95c167bf6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,9 +2,9 @@ name: CI
 
 on:
   push:
-    branches: [rift_O4d, rift_O4d_junior]
+    branches: [rift_O4d, rift_O4d_junior, rift_O4d_gmm_gpu]
   pull_request:
-    branches: [rift_O4d, rift_O4d_junior]
+    branches: [rift_O4d, rift_O4d_junior, rift_O4d_gmm_gpu]
   workflow_dispatch:
 
 concurrency:
@@ -17,7 +17,10 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.10', '3.11', '3.12', '3.13']
+        # Smoke-test that the package resolves and installs across the supported
+        # Python range, including the legacy py3.9 lane (paired with the pinned
+        # numpy==1.24.4 used by the integrator gate below).
+        python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
@@ -60,11 +63,27 @@ jobs:
   import-check:
     needs: install
     runs-on: ubuntu-latest
+    # Verify every declared module imports cleanly under both the pinned
+    # legacy lane (py3.9 + numpy 1.24.4) and the modern lane (py3.12 +
+    # numpy 2.x). Catches platform-portability regressions like the
+    # np.float128 import-time crash on numpy 2.x systems without an
+    # extended-precision long double.
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - lane: legacy
+            python-version: '3.9'
+            numpy-pin: 'numpy==1.24.4'
+          - lane: modern
+            python-version: '3.12'
+            numpy-pin: 'numpy>=2.0,<3.0'
+    name: import-check (${{ matrix.lane }} py${{ matrix.python-version }})
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: ${{ matrix.python-version }}
           cache: 'pip'
           cache-dependency-path: requirements.txt
       - name: Enable symlink
@@ -73,8 +92,14 @@ jobs:
         run: |
           python -m pip install --upgrade pip --break-system-packages
           python -m pip install -r requirements.txt --break-system-packages
+          # Pin numpy AFTER requirements.txt so it overrides the unpinned
+          # 'numpy' line in requirements.txt without changing the file.
+          python -m pip install '${{ matrix.numpy-pin }}' --break-system-packages
           python -m pip install coverage pytest --break-system-packages
           python -m pip install --editable . --break-system-packages
+      - name: Show resolved versions
+        run: |
+          python -c "import sys, numpy, scipy; print('python', sys.version); print('numpy', numpy.__version__); print('scipy', scipy.__version__)"
       - name: Run import check
         run: python .travis/test-all-mod.py
 
@@ -108,11 +133,29 @@ jobs:
   test-run:
     needs: install
     runs-on: ubuntu-latest
+    # Integrator + posterior gate. We run this in two CI lanes:
+    #   - legacy : py3.9 + numpy 1.24.4 -- the historically known-good
+    #              configuration on Linux x86_64 where np.float128 is real.
+    #   - modern : py3.12 + numpy 2.x -- the forward-looking target. Catches
+    #              numpy 2.x removals (np.product, np.cumproduct, np.in1d,
+    #              np.alltrue, np.float_) and scipy >= 1.16 mvnun removal.
+    # Both lanes must pass test-integrate.sh's GMM/AC/AV consistency check.
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - lane: legacy
+            python-version: '3.9'
+            numpy-pin: 'numpy==1.24.4'
+          - lane: modern
+            python-version: '3.12'
+            numpy-pin: 'numpy>=2.0,<3.0'
+    name: test-run (${{ matrix.lane }} py${{ matrix.python-version }})
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: ${{ matrix.python-version }}
           cache: 'pip'
           cache-dependency-path: requirements.txt
       - name: Enable symlink
@@ -121,8 +164,14 @@ jobs:
         run: |
           python -m pip install --upgrade pip --break-system-packages
           python -m pip install -r requirements.txt --break-system-packages
+          # Pin numpy AFTER requirements.txt so it overrides the unpinned
+          # 'numpy' line in requirements.txt without changing the file.
+          python -m pip install '${{ matrix.numpy-pin }}' --break-system-packages
           python -m pip install coverage pytest pytest-cov --break-system-packages
           python -m pip install --editable . --break-system-packages
+      - name: Show resolved versions
+        run: |
+          python -c "import sys, numpy, scipy; print('python', sys.version); print('numpy', numpy.__version__); print('scipy', scipy.__version__)"
       - name: Run test scripts
         run: |
           . .travis/test-coord.sh
@@ -135,7 +184,7 @@ jobs:
         if: failure()
         uses: actions/upload-artifact@v4
         with:
-          name: test-logs
+          name: test-logs-${{ matrix.lane }}-py${{ matrix.python-version }}
           path: |
             **/*.log
             **/test-results/*.xml
diff --git a/MonteCarloMarginalizeCode/Code/RIFT/integrators/MonteCarloEnsemble.py b/MonteCarloMarginalizeCode/Code/RIFT/integrators/MonteCarloEnsemble.py
old mode 100644
new mode 100755
index d86227d54..163a76ca0
--- a/MonteCarloMarginalizeCode/Code/RIFT/integrators/MonteCarloEnsemble.py
+++ b/MonteCarloMarginalizeCode/Code/RIFT/integrators/MonteCarloEnsemble.py
@@ -11,6 +11,21 @@
 import time
 from scipy.special import logsumexp
 
+try:
+    import cupy
+    import cupyx
+    xpy_default = cupy
+    xpy_special_default = cupyx.scipy.special
+    identity_convert = cupy.asnumpy
+    identity_convert_togpu = cupy.asarray
+    cupy_ok = True
+except ImportError:
+    xpy_default = np
+    xpy_special_default = None
+    identity_convert = lambda x: x
+    identity_convert_togpu = lambda x: x
+    cupy_ok = False
+
 regularize_log_scale = 1e-64  # before taking np.log, add this, so we don't propagate infinities
 
 try:
@@ -75,6 +90,11 @@ def __init__(self, d, bounds, gmm_dict, n_comp, n=None, prior=None,
         self.proc_count = proc_count
         self.use_lnL = use_lnL
         self.return_lnI = return_lnI
+        
+        self.xpy = xpy_default
+        self.identity_convert = identity_convert
+        self.identity_convert_togpu = identity_convert_togpu
+        
         # constants
         self.t = 0.02 # percent estimated error threshold
         if n is None:
@@ -107,10 +127,10 @@ def __init__(self, d, bounds, gmm_dict, n_comp, n=None, prior=None,
             self.total_value = None
         self.n_max = float('inf')
         # saved values
-        self.cumulative_samples = np.empty((0, d))
-        self.cumulative_values = np.empty(0)
-        self.cumulative_p = np.empty(0)
-        self.cumulative_p_s = np.empty(0)
+        self.cumulative_samples = self.xpy.empty((0, d))
+        self.cumulative_values = self.xpy.empty(0)
+        self.cumulative_p = self.xpy.empty(0)
+        self.cumulative_p_s = self.xpy.empty(0)
         self.tempering_exp=tempering_exp
         self.temper_log=temper_log
         if L_cutoff is None:
@@ -120,73 +140,63 @@ def __init__(self, d, bounds, gmm_dict, n_comp, n=None, prior=None,
         
     def _calculate_prior(self):
         if self.prior is None:
-            self.prior_array = np.ones(self.n)
+            self.prior_array = self.xpy.ones(self.n)
         else:
             self.prior_array = self.prior(self.sample_array).flatten()
 
     def _sample(self):
-        self.sampling_prior_array = np.ones(self.n)
-        self.sample_array = np.empty((self.n, self.d))
+        self.sampling_prior_array = self.xpy.ones(self.n)
+        self.sample_array = self.xpy.empty((self.n, self.d))
         for dim_group in self.gmm_dict: # iterate over grouped dimensions
             # create a matrix of the left and right limits for this set of dimensions
-            new_bounds = np.empty((len(dim_group), 2))
+            new_bounds = self.xpy.empty((len(dim_group), 2))
             new_bounds = self.bounds[dim_group]
             if len(new_bounds.shape) < 2:
-                new_bounds = np.array([new_bounds])
-            # index = 0
-            # for dim in dim_group:
-            #     new_bounds[index] = self.bounds[dim]
-            #     index += 1
+                new_bounds = self.xpy.array([new_bounds])
             model = self.gmm_dict[dim_group]
             if model is None:
                 # sample uniformly for this group of dimensions
                 llim = new_bounds[:,0]
                 rlim = new_bounds[:,1]
-                temp_samples = np.random.uniform(llim, rlim, (self.n, len(dim_group)))
+                temp_samples = self.xpy.random.uniform(llim, rlim, (self.n, len(dim_group)))
                 # update responsibilities
-                vol = np.prod(rlim - llim)
+                vol = self.xpy.prod(rlim - llim)
                 self.sampling_prior_array *= 1.0 / vol
             else:
                 # sample from the gmm
-                temp_samples = model.sample(self.n)#, new_bounds)
+                temp_samples = model.sample(self.n)
                 # update responsibilities
-                self.sampling_prior_array *= model.score(temp_samples)#, new_bounds)
+                self.sampling_prior_array *= model.score(temp_samples)
             index = 0
             for dim in dim_group:
-                # put columns of temp_samples in final places in sample_array
                 self.sample_array[:,dim] = temp_samples[:,index]
                 index += 1
 
     def _train(self):
-        sample_array, value_array, sampling_prior_array = np.copy(self.sample_array), np.copy(self.value_array), np.copy(self.sampling_prior_array)
+        sample_array, value_array, sampling_prior_array = self.xpy.copy(self.sample_array), self.xpy.copy(self.value_array), self.xpy.copy(self.sampling_prior_array)
         if self.use_lnL:
             lnL = value_array
         else:
-            lnL = np.log(value_array+regularize_log_scale) # note we can get negative infinity here
-        log_weights = self.tempering_exp*lnL + np.log(self.prior_array) - sampling_prior_array
+            lnL = self.xpy.log(value_array+regularize_log_scale)
+        
+        log_weights = self.tempering_exp*lnL + self.xpy.log(self.prior_array) - sampling_prior_array
         if self.temper_log:
-            log_weights =np.log(np.maximum(lnL,1e-5))   # simplest to do it this way
+            log_weights = self.xpy.log(self.xpy.maximum(lnL,1e-5))
+            
         for dim_group in self.gmm_dict: # iterate over grouped dimensions
             if self.gmm_adapt:
                 if (dim_group in self.gmm_adapt):
-                    if not(self.gmm_adapt[dim_group]):   # disabling adaptation requires user *specifically request* not to use that dimension set; all other choices lead to adaptation
+                    if not(self.gmm_adapt[dim_group]):
                         continue
-            # create a matrix of the left and right limits for this set of dimensions
-            new_bounds = np.empty((len(dim_group), 2))
+            new_bounds = self.xpy.empty((len(dim_group), 2))
             new_bounds = self.bounds[dim_group]
-#            index = 0
-#            for dim in dim_group:
-#                new_bounds[index] = self.bounds[dim]
-#                index += 1
-            model = self.gmm_dict[dim_group] # get model for this set of dimensions
-            temp_samples = np.empty((self.n, len(dim_group)))
+            model = self.gmm_dict[dim_group]
+            temp_samples = self.xpy.empty((self.n, len(dim_group)))
             index = 0
             for dim in dim_group:
-                # get samples corresponding to the current model
                 temp_samples[:,index] = sample_array[:,dim]
                 index += 1
             if model is None:
-                # model doesn't exist yet
                 if isinstance(self.n_comp, int) and self.n_comp != 0:
                     model = GMM.gmm(self.n_comp, new_bounds,epsilon=self.gmm_epsilon)
                     model.fit(temp_samples, log_sample_weights=log_weights)
@@ -196,7 +206,6 @@ def _train(self):
             else:
                 model.update(temp_samples, log_sample_weights=log_weights)
             try:
-                # Verify model can evaluated! Quick and dirty test to confirm not singular
                 model.score(temp_samples[:5])
                 self.gmm_dict[dim_group] = model
             except:
@@ -205,125 +214,91 @@ def _train(self):
 
     def _calculate_results(self):
         if self.use_lnL:
-            lnL = np.copy(self.value_array) # changing the naming convention, just for this function, now that I know better
+            lnL = self.xpy.copy(self.value_array)
         else:
-            lnL = np.log(self.value_array+regularize_log_scale)
-        # strip off any samples with likelihoods less than our cutoff
-        mask = np.ones(lnL.shape,dtype=bool)
-        if not(self.L_cutoff is None):  # if not none
-            if not(np.isinf(self.L_cutoff)):  # and not infinite, then apply the cutoff
-                mask = lnL > (np.log(self.L_cutoff) if self.L_cutoff > 0 else -np.inf)
-#        print(mask, self.L_cutoff, lnL)
+            lnL = self.xpy.log(self.value_array+regularize_log_scale)
+        mask = self.xpy.ones(lnL.shape,dtype=bool)
+        if not(self.L_cutoff is None):
+            if not(self.xpy.isinf(self.L_cutoff)):
+                mask = lnL > (self.xpy.log(self.L_cutoff) if self.L_cutoff > 0 else -self.xpy.inf)
         lnL = lnL[mask]
         prior = self.prior_array[mask]
         sampling_prior = self.sampling_prior_array[mask]
         
-        # append to the cumulative arrays
-        self.cumulative_samples = np.append(self.cumulative_samples, self.sample_array[mask], axis=0)
-        self.cumulative_values = np.append(self.cumulative_values, lnL, axis=0)
-        self.cumulative_p = np.append(self.cumulative_p, prior, axis=0)
-        self.cumulative_p_s = np.append(self.cumulative_p_s, sampling_prior, axis=0)
+        self.cumulative_samples = self.xpy.append(self.cumulative_samples, self.sample_array[mask], axis=0)
+        self.cumulative_values = self.xpy.append(self.cumulative_values, lnL, axis=0)
+        self.cumulative_p = self.xpy.append(self.cumulative_p, prior, axis=0)
+        self.cumulative_p_s = self.xpy.append(self.cumulative_p_s, sampling_prior, axis=0)
         
-        # compute the log sample weights
-        log_weights = lnL + np.log(prior) - np.log(sampling_prior)
-        if np.any(np.isnan(log_weights)):
+        log_weights = lnL + self.xpy.log(prior) - self.xpy.log(sampling_prior)
+        if self.xpy.any(self.xpy.isnan(log_weights)):
             print(" NAN weight ")
             raise ValueError
         if self.terrible_lnw_threshold: 
-            if np.max(log_weights) <self.terrible_lnw_threshold:  #this is a terrible fit. Note the ABSOLUTE SCALE, which is problem dependent!
+            if self.xpy.max(log_weights) < self.terrible_lnw_threshold:
                 print(" TERRIBLE FIT ")
                 raise ValueError
 
-        # do a shift so that the highest log weight is 0, keeping track of the shift
-        log_scale_factor = np.max(log_weights) # don't insert nan here
+        log_scale_factor = self.xpy.max(log_weights)
         if not(self.return_lnI):
-            scale_factor = np.exp(log_scale_factor)
+            scale_factor = self.xpy.exp(log_scale_factor)
             log_weights -= log_scale_factor
-            summed_vals = scale_factor * np.sum(np.exp(log_weights))
+            summed_vals = scale_factor * self.xpy.sum(self.xpy.exp(log_weights))
             integral_value = summed_vals / self.n
         
-            # Calculate the log of the Monte Carlo integration error.
-            # Let `a` be the scale factor, and let `w` be the weights with the scale factor divided out (i.e. weight = a w), then
-            #     error^2 = var(a w) / N = a^2 var(w) / N.
-            # The point is that 0 <= w <= 1, so there's no potential for overflow here when calculating the variance.
-            # Since the scale factor `a` is potentially very large, squaring it could overflow; therefore, we keep out a factor of ln(a^2) = 2ln(a).
-            # Note that ln(a) is exactly the log_scale_factor variable defined above.
-            scaled_error_squared = np.var(np.exp(log_weights)) / self.n
+            scaled_error_squared = self.xpy.var(self.xpy.exp(log_weights)) / self.n
             log_error_scale_factor = 2. * log_scale_factor
         
             self.integral = (self.iterations * self.integral + integral_value) / (self.iterations + 1)
+            self.scaled_error_squared = (self.iterations * self.xpy.exp(self.log_error_scale_factor - log_error_scale_factor) * self.scaled_error_squared + scaled_error_squared) / (self.iterations + 1)
+            self.log_error_scale_factor = log_error_scale_factor
         
-            # Calculate the running average of the variance.
-            # This calculation is complicated by the scale factors: the previous running average is scaled by exp(self.log_error_scale_factor),
-            # which is currently the log_error_scale_factor from the previous iteration.
-            # We can avoid overflows by factoring our *new* log_error_scale_factor out of the running average, so that we exponentiate a smaller number.
-            self.scaled_error_squared = (self.iterations * np.exp(self.log_error_scale_factor - log_error_scale_factor) * self.scaled_error_squared + scaled_error_squared) / (self.iterations + 1)
-            self.log_error_scale_factor = log_error_scale_factor # having computed the running average, update this
-        
-            # calculate the effective samples
             self.total_value += summed_vals
-            self.max_value = max(scale_factor, self.max_value)
+            self.max_value = self.xpy.maximum(scale_factor, self.max_value)
             self.eff_samp = self.total_value / self.max_value
-        else: # using lnI return values
-            # Evaluate integral and effective sample count
-            # Note with lnI return values, no more need to keep track of a scale factor at all
-            log_sum_weights = logsumexp(log_weights) 
-            log_integral_here = log_sum_weights - np.log(self.n)
+        else:
+            if cupy_ok:
+                log_sum_weights = cupyx.scipy.special.logsumexp(log_weights)
+            else:
+                log_sum_weights = logsumexp(log_weights)
+                
+            log_integral_here = log_sum_weights - self.xpy.log(self.n)
             if not(self.integral ):
                 self.integral = log_integral_here  
                 self.total_value = log_sum_weights  
                 self.max_value = log_scale_factor
             else:
-                self.integral = logsumexp([ self.integral +np.log(self.iterations), log_integral_here]) - np.log(self.iterations+1)
-                self.total_value= logsumexp([self.total_value, log_sum_weights])
-                self.max_value = np.max([self.max_value,np.max(log_weights)])
-            self.eff_samp = np.exp(self.total_value - (self.max_value  ))
+                if cupy_ok:
+                    self.integral = cupyx.scipy.special.logsumexp([ self.integral + self.xpy.log(self.iterations), log_integral_here]) - self.xpy.log(self.iterations+1)
+                    self.total_value= cupyx.scipy.special.logsumexp([self.total_value, log_sum_weights])
+                else:
+                    self.integral = logsumexp([ self.integral + np.log(self.iterations), log_integral_here]) - np.log(self.iterations+1)
+                    self.total_value = logsumexp([self.total_value, log_sum_weights])
+                self.max_value = self.xpy.maximum(self.max_value, self.xpy.max(log_weights))
+            self.eff_samp = self.xpy.exp(self.total_value - (self.max_value  ))
 
-            # Evaluate error squared, avoiding overflow
-            tmp_max = np.max(log_weights)
-            log_scaled_error_squared = np.log(np.var(np.exp(log_weights - tmp_max))) + 2*tmp_max   - np.log(self.n)
+            tmp_max = self.xpy.max(log_weights)
+            log_scaled_error_squared = self.xpy.log(self.xpy.var(self.xpy.exp(log_weights - tmp_max))) + 2*tmp_max - self.xpy.log(self.n)
             if not(self.scaled_error_squared):
                 self.scaled_error_squared = log_scaled_error_squared
             else:
-                self.scaled_error_squared = logsumexp([ self.scaled_error_squared + np.log(self.iterations), log_scaled_error_squared]) - np.log(self.iterations+1)
+                if cupy_ok:
+                    self.scaled_error_squared = cupyx.scipy.special.logsumexp([ self.scaled_error_squared + self.xpy.log(self.iterations), log_scaled_error_squared]) - self.xpy.log(self.iterations+1)
+                else:
+                    self.scaled_error_squared = logsumexp([ self.scaled_error_squared + np.log(self.iterations), log_scaled_error_squared]) - np.log(self.iterations+1)
 
     def _reset(self):
-        ### reset GMMs
         for k in self.gmm_dict:
             self.gmm_dict[k] = None
         
 
     def integrate(self, func, min_iter=10, max_iter=20, var_thresh=0.0, max_err=10,
             neff=float('inf'), nmax=None, progress=False, epoch=None,verbose=True,force_no_adapt=False,use_lnL=False,return_lnI=False,**kwargs):
-        '''
-        Evaluate the integral
-
-        Parameters
-        ----------
-        func : function
-            Integrand function
-        min_iter : int
-            Minimum number of integrator iterations
-        max_iter : int
-            Maximum number of integrator iterations
-        var_thresh : float
-            Variance threshold for terminating integration
-        max_err : int
-            Maximum number of errors to catch before terminating integration
-        neff : float
-            Effective samples threshold for terminating integration
-        nmax : int
-            Maximum number of samples to draw
-        progress : bool
-            Print GMM parameters each iteration
-        n_adapt: number of *adaptations* we will perform, before freezing the GMM
-        '''
         n_adapt = int(kwargs["n_adapt"]) if "n_adapt" in kwargs else 100
-        tripwire_fraction = kwargs["tripwire_fraction"] if "tripwire_fraction" in kwargs else 2  # make it impossible to trigger
-        tripwire_epsilon = kwargs["tripwire_epsilon"] if "tripwire_epsilon" in kwargs else 0.001 # if we are not reasonably far away from unity, fail!
-        self.use_lnL = use_lnL   # option to override initialization of sampler : argument passed in another way
-        self.return_lnI = return_lnI   # option to override initialization of sampler : argument passed in another way
-
+        tripwire_fraction = kwargs["tripwire_fraction"] if "tripwire_fraction" in kwargs else 2
+        tripwire_epsilon = kwargs["tripwire_epsilon"] if "tripwire_epsilon" in kwargs else 0.001
+        self.use_lnL = use_lnL
+        self.return_lnI = return_lnI
 
         err_count = 0
         cumulative_eval_time = 0
@@ -337,7 +312,6 @@ def integrate(self, func, min_iter=10, max_iter=20, var_thresh=0.0, max_err=10,
 
             if force_no_adapt or self.iterations >= n_adapt:
                 adapting=False
-#            print('Iteration:', self.iterations)
             if err_count >= max_err:
                 print('Exiting due to errors...')
                 break
@@ -354,11 +328,13 @@ def integrate(self, func, min_iter=10, max_iter=20, var_thresh=0.0, max_err=10,
                 continue
             t1 = time.time()
             if self.proc_count is None:
-                self.value_array = func(np.copy(self.sample_array)).flatten()
+                # Ensure input to func is numpy for CPU-based user functions if needed, 
+                # but the user is encouraged to support xpy.
+                self.value_array = func(self.xpy.copy(self.sample_array)).flatten()
             else:
-                split_samples = np.array_split(self.sample_array, self.proc_count)
+                split_samples = self.xpy.array_split(self.sample_array, self.proc_count)
                 p = Pool(self.proc_count)
-                self.value_array = np.concatenate(p.map(func, split_samples), axis=0)
+                self.value_array = self.xpy.concatenate(p.map(func, split_samples), axis=0)
                 p.close()
             cumulative_eval_time += time.time() - t1
             self._calculate_prior()
@@ -375,10 +351,10 @@ def integrate(self, func, min_iter=10, max_iter=20, var_thresh=0.0, max_err=10,
                 continue
             self.iterations += 1
             self.ntotal += self.n
-            testval =self.scaled_error_squared
+            testval = self.scaled_error_squared
             if not(self.return_lnI):
-                testval = np.log(self.scaled_error_squared) + self.log_error_scale_factor
-            if self.iterations >= min_iter and testval < np.log(var_thresh):
+                testval = self.xpy.log(self.scaled_error_squared) + self.log_error_scale_factor
+            if self.iterations >= min_iter and testval < self.xpy.log(var_thresh):
                 break
             try:
                 if adapting:
@@ -400,10 +376,10 @@ def integrate(self, func, min_iter=10, max_iter=20, var_thresh=0.0, max_err=10,
             if epoch is not None and self.iterations % epoch == 0:
                 self._reset()
             if verbose:
-                # Standard mcsampler message, to monitor convergence
+                print(self.scaled_error_squared)
                 if not(self.return_lnI):
-                    print(" : {} {} {} {} {} ".format((self.iterations-1)*self.n, self.eff_samp, np.sqrt(2*np.max(self.cumulative_values)), np.sqrt(2*(np.log(self.integral))),  np.sqrt(self.scaled_error_squared )/self.integral/np.sqrt(self.iterations ) ) )
+                    print(" : {} {} {} {} {} ".format((self.iterations-1)*self.n, self.eff_samp, self.xpy.sqrt(2*self.xpy.max(self.cumulative_values)), self.xpy.sqrt(2*(self.xpy.log(self.integral))),  self.xpy.sqrt(self.scaled_error_squared )/self.integral/self.xpy.sqrt(self.iterations ) ) )
                 else:
-                    print(" : {} {} {} {} {} ".format((self.iterations-1)*self.n, self.eff_samp, np.sqrt(2*np.max(self.cumulative_values)), np.sqrt(2*self.integral), np.exp(0.5*(self.scaled_error_squared - self.integral*2) )/np.sqrt(self.iterations)))
+                    print(" : {} {} {} {} {} ".format((self.iterations-1)*self.n, self.eff_samp, self.xpy.sqrt(2*self.xpy.max(self.cumulative_values)), self.xpy.sqrt(2*self.integral), self.xpy.exp(0.5*(self.scaled_error_squared - self.integral*2) )/self.xpy.sqrt(self.iterations)))
         print('cumulative eval time: ', cumulative_eval_time)
         print('integrator iterations: ', self.iterations)
diff --git a/MonteCarloMarginalizeCode/Code/RIFT/integrators/gaussian_mixture_model.py b/MonteCarloMarginalizeCode/Code/RIFT/integrators/gaussian_mixture_model.py
old mode 100644
new mode 100755
index 98fd8c256..51113cda3
--- a/MonteCarloMarginalizeCode/Code/RIFT/integrators/gaussian_mixture_model.py
+++ b/MonteCarloMarginalizeCode/Code/RIFT/integrators/gaussian_mixture_model.py
@@ -14,6 +14,20 @@
 import numpy as np
 from scipy.stats import multivariate_normal,norm
 
+try:
+    import cupy
+    import cupyx
+    xpy_default = cupy
+    xpy_special_default = cupyx.scipy.special
+    identity_convert = cupy.asnumpy
+    identity_convert_togpu = cupy.asarray
+    cupy_ok = True
+except ImportError:
+    xpy_default = np
+    xpy_special_default = None # scipy.special is used via scipy if needed
+    identity_convert = lambda x: x
+    identity_convert_togpu = lambda x: x
+    cupy_ok = False
 
 # 1. Try to find the legacy mvnun in known locations
 try:
@@ -49,16 +63,45 @@ def mvnun(lower, upper, mean, cov, maxpts=None, abseps=1e-5, releps=1e-5):
         return p, 0
 
         
-#from scipy.misc import logsumexp
 from scipy.special import logsumexp
 from . import multivariate_truncnorm as truncnorm
 import itertools
 
 
-# Equation references are from Numerical Recipes for general GMM and
-# https://www.cs.nmsu.edu/~joemsong/publications/Song-SPIE2005-updated.pdf for
-# online updating features
+def gpu_logpdf(x, mean, cov, xpy):
+    """
+    GPU-compatible multivariate normal log-pdf.
+    x: (n, d) array
+    mean: (d,) array
+    cov: (d, d) array
+
+    Uses Cholesky + a generic linear solve (xpy.linalg.solve) so the same
+    code path works for both numpy and cupy. Note: solve_triangular is
+    NOT in numpy.linalg or cupy.linalg (only in scipy.linalg /
+    cupyx.scipy.linalg), so we deliberately use the generic solver here.
+    """
+    d = mean.shape[0]
+    diff = x - mean
+    # Use cholesky for efficiency and stability
+    try:
+        L = xpy.linalg.cholesky(cov)
+    except xpy.linalg.LinAlgError:
+        # Fallback to adding small epsilon to diagonal
+        eps = 1e-6 * xpy.eye(d)
+        L = xpy.linalg.cholesky(cov + eps)
+
+    # Solve L*y = diff^T => y = L^-1 * diff^T
+    # diff is (n, d), so diff.T is (d, n)
+    y = xpy.linalg.solve(L, diff.T)
+
+    # quad_form = sum(y^2, axis=0)
+    quad_form = xpy.sum(y**2, axis=0)
 
+    # log_det = 2 * sum(log(diag(L)))
+    log_det = 2.0 * xpy.sum(xpy.log(xpy.diag(L)))
+
+    log_prob = -0.5 * (d * xpy.log(2 * xpy.pi) + log_det + quad_form)
+    return log_prob
 
 class estimator:
     '''
@@ -87,14 +130,17 @@ def __init__(self, k, max_iters=100, tempering_coeff=1e-8,adapt=None):
         self.cov_avg_ratio = 0.05
         self.epsilon = 1e-4
         self.tempering_coeff = tempering_coeff
+        self.xpy = xpy_default
+        self.identity_convert = identity_convert
+        self.identity_convert_togpu = identity_convert_togpu
 
     def _initialize(self, n, sample_array, log_sample_weights=None):
-        p_weights = np.exp(log_sample_weights - np.max(log_sample_weights)).flatten()
-        p_weights[np.isnan(p_weights)] = 0 # zero out the nan weights
-        p_weights /= np.sum(p_weights)
-        self.means = sample_array[np.random.choice(n, self.k, p=p_weights.astype(sample_array.dtype)), :]
-        self.covariances = [np.identity(self.d)] * self.k
-        self.weights = np.ones(self.k) / self.k
+        p_weights = self.xpy.exp(log_sample_weights - self.xpy.max(log_sample_weights)).flatten()
+        p_weights[self.xpy.isnan(p_weights)] = 0 # zero out the nan weights
+        p_weights /= self.xpy.sum(p_weights)
+        self.means = sample_array[self.xpy.random.choice(n, self.k, p=p_weights.astype(sample_array.dtype)), :]
+        self.covariances = [self.xpy.identity(self.d)] * self.k
+        self.weights = self.xpy.ones(self.k) / self.k
         self.adapt = [True] * self.k
 
     def _e_step(self, n, sample_array, log_sample_weights=None):
@@ -102,49 +148,61 @@ def _e_step(self, n, sample_array, log_sample_weights=None):
         Expectation step
         '''
         if log_sample_weights is None:
-            log_sample_weights = np.zeros(n)
-        p_nk = np.empty((n, self.k))
+            log_sample_weights = self.xpy.zeros(n)
+        p_nk = self.xpy.empty((n, self.k))
         for index in range(self.k):
             mean = self.means[index]
             cov = self.covariances[index]
-            log_p = np.log(self.weights[index])
-            log_pdf = multivariate_normal.logpdf(x=sample_array, mean=mean, cov=cov, allow_singular=True) # (16.1.4)
-            # note that allow_singular=True in the above line is probably really dumb and
-            # terrible, but it seems to occasionally keep the whole thing from blowing up
-            # so it stays for now
+            log_p = self.xpy.log(self.weights[index])
+            
+            if cupy_ok:
+                log_pdf = gpu_logpdf(sample_array, mean, cov, self.xpy)
+            else:
+                log_pdf = multivariate_normal.logpdf(x=sample_array, mean=mean, cov=cov, allow_singular=True)
+                
             p_nk[:,index] = log_pdf + log_p # (16.1.5)
-        p_xn = logsumexp(p_nk, axis=1)#, keepdims=True) # (16.1.3)
-        self.p_nk = p_nk - p_xn[:,np.newaxis] # (16.1.5)
+            
+        # Use cupy or scipy for logsumexp
+        if cupy_ok:
+            p_xn = cupyx.scipy.special.logsumexp(p_nk, axis=1)
+        else:
+            p_xn = logsumexp(p_nk, axis=1)
+            
+        self.p_nk = p_nk - p_xn[:,self.xpy.newaxis] # (16.1.5)
         # normalize log sample weights as well, before modifying things with them
-        self.p_nk += log_sample_weights[:,np.newaxis]  -         logsumexp(log_sample_weights) 
-        self.log_prob = np.sum(p_xn + log_sample_weights) # (16.1.2)
+        if cupy_ok:
+            ls_sum = cupyx.scipy.special.logsumexp(log_sample_weights)
+        else:
+            ls_sum = logsumexp(log_sample_weights)
+            
+        self.p_nk += log_sample_weights[:,self.xpy.newaxis]  - ls_sum 
+        
+        if cupy_ok:
+            self.log_prob = self.xpy.sum(p_xn + log_sample_weights)
+        else:
+            self.log_prob = np.sum(p_xn + log_sample_weights)
 
     def _m_step(self, n, sample_array):
         '''
         Maximization step
         '''
-        p_nk = np.exp(self.p_nk)
-        weights = np.sum(p_nk, axis=0)   # weight of a single component
+        p_nk = self.xpy.exp(self.p_nk)
+        weights = self.xpy.sum(p_nk, axis=0)   # weight of a single component
         for index in range(self.k):
           if self.adapt[index]:
             # (16.1.6)
             w = weights[index]   # should be 1 for a single component, note
             p_k = p_nk[:,index]
-            mean = np.sum(np.multiply(sample_array, p_k[:,np.newaxis]), axis=0)
+            mean = self.xpy.sum(self.xpy.multiply(sample_array, p_k[:,self.xpy.newaxis]), axis=0)
             mean /= w
             self.means[index] = mean
             # (16.1.6)
             diff = sample_array - mean
-            cov = np.dot((p_k[:,np.newaxis] * diff).T, diff) / w
-#            cov = np.cov(diff.T, aweights=p_k)/w   # don't reinvent the wheel
-#            if len(mean)<2:
-#                cov =np.array([[cov]])
-            # attempt to fix non-positive-semidefinite covariances
+            cov = self.xpy.dot((p_k[:,self.xpy.newaxis] * diff).T, diff) / w
             self.covariances[index] = self._near_psd(cov)
             # (16.17)
-        weights /= np.sum(p_nk[:,self.adapt])
-        # if we are not adapting some of the gaussians, we need to renormalize again. Note the weight of the fixed item remains fixed!
-        weights /= np.sum(weights)
+        weights /= self.xpy.sum(p_nk[:,self.adapt])
+        weights /= self.xpy.sum(weights)
         self.weights = weights
 
 
@@ -158,34 +216,31 @@ def _tol(self, n):
     def _near_psd(self, x):
         '''
         Calculates the nearest postive semi-definite matrix for a correlation/covariance matrix
-        
-        Code from here:
-        https://stackoverflow.com/questions/10939213/how-can-i-calculate-the-nearest-positive-semi-definite-matrix
         '''
         n = x.shape[0]
-        var_list = np.array([np.sqrt(x[i,i]) for i in range(n)])
-        y = np.array([[x[i, j]/(var_list[i]*var_list[j]) for i in range(n)] for j in range(n)])
+        var_list = self.xpy.array([self.xpy.sqrt(x[i,i]) for i in range(n)])
+        # Use broadcasting for y instead of nested list comprehension
+        y = x / (var_list[:, None] * var_list[None, :])
         while True:
             epsilon = self.epsilon
-            if min(np.linalg.eigvals(y)) > epsilon:
+            if self.xpy.min(self.xpy.linalg.eigvals(y)) > epsilon:
                 return x
 
-            # Removing scaling factor of covariance matrix
-            var_list = np.array([np.sqrt(x[i,i]) for i in range(n)])
-            y = np.array([[x[i, j]/(var_list[i]*var_list[j]) for i in range(n)] for j in range(n)])
-
-            # getting the nearest correlation matrix
-            eigval, eigvec = np.linalg.eig(y)
-            val = np.matrix(np.maximum(eigval, epsilon))
-            vec = np.matrix(eigvec)
-            T = 1/(np.multiply(vec, vec) * val.T)
-            T = np.matrix(np.sqrt(np.diag(np.array(T).reshape((n)) )))
-            B = T * vec * np.diag(np.array(np.sqrt(val)).reshape((n)))
-            near_corr = B*B.T    
-
-            # returning the scaling factors
-            near_cov = np.array([[near_corr[i, j]*(var_list[i]*var_list[j]) for i in range(n)] for j in range(n)])
-            if np.isreal(near_cov).all():
+            var_list = self.xpy.array([self.xpy.sqrt(x[i,i]) for i in range(n)])
+            y = x / (var_list[:, None] * var_list[None, :])
+
+            eigval, eigvec = self.xpy.linalg.eig(y)
+            val = self.xpy.maximum(eigval, epsilon)
+            vec = eigvec
+            
+            # Standard PSD projection:
+            val_psd = self.xpy.maximum(eigval, epsilon)
+            near_corr = vec @ self.xpy.diag(val_psd) @ vec.T
+            
+            # Re-scale back to covariance
+            near_cov = near_corr * (var_list[:, None] * var_list[None, :])
+            
+            if self.xpy.isreal(near_cov).all():
                 break
             else:
                 x = near_cov.real
@@ -194,13 +249,6 @@ def _near_psd(self, x):
     def fit(self, sample_array, log_sample_weights):
         '''
         Fit the model to data
-
-        Parameters
-        ----------
-        sample_array : np.ndarray
-            Array of samples to fit
-        log_sample_weights : np.ndarray
-            Weights for samples
         '''
         n, self.d = sample_array.shape
         self._initialize(n, sample_array, log_sample_weights)
@@ -214,21 +262,24 @@ def fit(self, sample_array, log_sample_weights):
             count += 1
         for index in range(self.k):
             cov = self.covariances[index]
-            # temper
-            #   - note this introduces a PREFERRED LENGTH SCALE into the problem, which is dangerous
-            cov = (cov + self.tempering_coeff * np.eye(self.d)) / (1 + self.tempering_coeff)
+            cov = (cov + self.tempering_coeff * self.xpy.eye(self.d)) / (1 + self.tempering_coeff)
             self.covariances[index] = cov
 
     def print_params(self):
         '''
         Prints the model's parameters in an easily-readable format
         '''
+        # Convert to numpy for printing
+        means_np = [self.identity_convert(m) for m in self.means]
+        covs_np = [self.identity_convert(c) for c in self.covariances]
+        weights_np = self.identity_convert(self.weights)
+        
         if self.d ==1:
             print("GMM:   component wt mean std ")
         for i in range(self.k):
-            mean = self.means[i]
-            cov = self.covariances[i]
-            weight = self.weights[i]
+            mean = means_np[i]
+            cov = covs_np[i]
+            weight = weights_np[i]
             if self.d >1:
                 print('________________________________________\n')
                 print('Component', i)
@@ -245,22 +296,11 @@ def print_params(self):
 class gmm:
     '''
     More sophisticated implementation built on top of estimator class
-
-    Includes functionality to update with new data rather than re-fit, as well
-    as sampling and scoring of samples.
-
-    Parameters
-    ----------
-    k : int
-        Number of Gaussian components
-    max_iters : int
-        Maximum number of Expectation-Maximization iterations
     '''
 
     def __init__(self, k, bounds, max_iters=1000,epsilon=None,tempering_coeff=1e-8):
         self.k = k
         self.bounds = bounds
-        #self.tol = tol
         self.max_iters = max_iters
         self.means = [None] * k
         self.covariances =[None] * k
@@ -272,14 +312,17 @@ def __init__(self, k, bounds, max_iters=1000,epsilon=None,tempering_coeff=1e-8):
         self.N = 0
         self.epsilon =epsilon
         if self.epsilon is None:
-            self.epsilon = 1e-6  # allow very strong correlations
+            self.epsilon = 1e-6
         else:
             self.epsilon=epsilon
         self.tempering_coeff = tempering_coeff
+        self.xpy = xpy_default
+        self.identity_convert = identity_convert
+        self.identity_convert_togpu = identity_convert_togpu
 
     def _normalize(self, samples):
         n, d = samples.shape
-        out = np.empty((n, d))
+        out = self.xpy.empty((n, d))
         for i in range(d):
             [llim, rlim] = self.bounds[i]
             out[:,i] = (2.0 * samples[:,i] - (rlim + llim)) / (rlim - llim)
@@ -287,7 +330,7 @@ def _normalize(self, samples):
 
     def _unnormalize(self, samples):
         n, d = samples.shape
-        out = np.empty((n, d))
+        out = self.xpy.empty((n, d))
         for i in range(d):
             [llim, rlim] = self.bounds[i]
             out[:,i] = 0.5 * ((rlim - llim) * samples[:,i] + (llim + rlim))
@@ -296,18 +339,11 @@ def _unnormalize(self, samples):
     def fit(self, sample_array, log_sample_weights=None):
         '''
         Fit the model to data
-
-        Parameters
-        ----------
-        sample_array : np.ndarray
-            Array of samples to fit
-        sample_weights : np.ndarray
-            Weights for samples
         '''
         self.N, self.d = sample_array.shape
         if log_sample_weights is None:
-            log_sample_weights = np.zeros(self.N)
-        # just use base estimator
+            log_sample_weights = self.xpy.zeros(self.N)
+        
         model = estimator(self.k, tempering_coeff=self.tempering_coeff,adapt=self.adapt)
         model.fit(self._normalize(sample_array), log_sample_weights)
         self.means = model.means
@@ -328,84 +364,71 @@ def _match_components(self, new_model):
             dist = 0
             i = 0
             for j in order:
-                # get Mahalanobis distance between current pair of components
-                diff = new_model.means[j] - self.means[i]
-                cov_inv = np.linalg.inv(self.covariances[i])
-                temp_cov_inv = np.linalg.inv(new_model.covariances[j])
+                # These are likely small vectors, stay on CPU
+                diff = self.identity_convert(new_model.means[j]) - self.identity_convert(self.means[i])
+                cov_inv = np.linalg.inv(self.identity_convert(self.covariances[i]))
+                temp_cov_inv = np.linalg.inv(self.identity_convert(new_model.covariances[j]))
                 dist += np.sqrt(np.dot(np.dot(diff, cov_inv), diff))
                 dist += np.sqrt(np.dot(np.dot(diff, temp_cov_inv), diff))
                 i += 1
             distances[index] = dist
             index += 1
-        return orders[np.argmin(distances)] # returns order which gives minimum net Mahalanobis distance
+        return orders[np.argmin(distances)]
 
     def _merge(self, new_model, M):
         '''
         Merge corresponding components of new model and old model
-
-        Refer to paper linked at the top of this file
-
-        M is the number of samples that the new model was fit using
         '''
         order = self._match_components(new_model)
         for i in range(self.k):
-            j = order[i] # get corresponding component
+            j = order[i]
             old_mean = self.means[i]
             temp_mean = new_model.means[j]
             old_cov = self.covariances[i]
             temp_cov = new_model.covariances[j]
             old_weight = self.weights[i]
             temp_weight = new_model.weights[j]
-            denominator = (self.N * old_weight) + (M * temp_weight) # this shows up a lot so just compute it once
-            # start equation (6)
+            denominator = (self.N * old_weight) + (M * temp_weight)
+            
             mean = (self.N * old_weight * old_mean) + (M * temp_weight * temp_mean)
             mean /= denominator
-            # start equation (7)
+            
             cov1 = (self.N * old_weight * old_cov) + (M * temp_weight * temp_cov)
             cov1 /= denominator
-            cov2 = (self.N * old_weight * old_mean * old_mean.T) + (M * temp_weight * temp_mean * temp_mean.T)
+            
+            # outer product for means
+            cov2 = (self.N * old_weight * self.xpy.outer(old_mean, old_mean)) + (M * temp_weight * self.xpy.outer(temp_mean, temp_mean))
             cov2 /= denominator
-            cov = cov1 + cov2 - mean * mean.T
-            # check for positive-semidefinite
+            
+            cov = cov1 + cov2 - self.xpy.outer(mean, mean)
             cov = self._near_psd(cov)
-            # start equation (8)
+            
             weight = denominator / (self.N + M)
-            # update everything
+            
             self.means[i] = mean
             self.covariances[i] = cov
             self.weights[i] = weight
 
     def _near_psd(self, x):
         '''
-	Calculates the nearest postive semi-definite matrix for a correlation/covariance matrix
-
-        Code from here:
-        https://stackoverflow.com/questions/10939213/how-can-i-calculate-the-nearest-positive-semi-definite-matrix
+        Calculates the nearest postive semi-definite matrix for a correlation/covariance matrix
         '''
         n = x.shape[0]
-        var_list = np.array([np.sqrt(x[i,i]) for i in range(n)])
-        y = np.array([[x[i, j]/(var_list[i]*var_list[j]) for i in range(n)] for j in range(n)])
+        var_list = self.xpy.array([self.xpy.sqrt(x[i,i]) for i in range(n)])
+        y = x / (var_list[:, None] * var_list[None, :])
         while True:
             epsilon = self.epsilon
-            if min(np.linalg.eigvals(y)) > epsilon:
+            if self.xpy.min(self.xpy.linalg.eigvals(y)) > epsilon:
                 return x
 
-            # Removing scaling factor of covariance matrix
-            var_list = np.array([np.sqrt(x[i,i]) for i in range(n)])
-            y = np.array([[x[i, j]/(var_list[i]*var_list[j]) for i in range(n)] for j in range(n)])
-
-            # getting the nearest correlation matrix
-            eigval, eigvec = np.linalg.eig(y)
-            val = np.matrix(np.maximum(eigval, epsilon))
-            vec = np.matrix(eigvec)
-            T = 1/(np.multiply(vec, vec) * val.T)
-            T = np.matrix(np.sqrt(np.diag(np.array(T).reshape((n)) )))
-            B = T * vec * np.diag(np.array(np.sqrt(val)).reshape((n)))
-            near_corr = B*B.T    
-
-            # returning the scaling factors
-            near_cov = np.array([[near_corr[i, j]*(var_list[i]*var_list[j]) for i in range(n)] for j in range(n)])
-            if np.isreal(near_cov).all():
+            var_list = self.xpy.array([self.xpy.sqrt(x[i,i]) for i in range(n)])
+            y = x / (var_list[:, None] * var_list[None, :])
+
+            eigval, eigvec = self.xpy.linalg.eig(y)
+            val_psd = self.xpy.maximum(eigval, epsilon)
+            near_corr = eigvec @ self.xpy.diag(val_psd) @ eigvec.T
+            near_cov = near_corr * (var_list[:, None] * var_list[None, :])
+            if self.xpy.isreal(near_cov).all():
                 break
             else:
                 x = near_cov.real
@@ -414,122 +437,132 @@ def _near_psd(self, x):
     def update(self, sample_array, log_sample_weights=None):
         '''
         Updates the model with new data without doing a full retraining.
-
-        Parameters
-        ----------
-        sample_array : np.ndarray
-            Array of samples to fit
-        sample_weights : np.ndarray
-            Weights for samples
         '''
         self.tempering_coeff /= 2
         new_model = estimator(self.k, self.max_iters, self.tempering_coeff)
-        # Strip non-finite training data
-        indx_ok = np.isfinite(log_sample_weights)  
-        new_model.fit(self._normalize(sample_array[indx_ok]), log_sample_weights[indx_ok])
+        
+        # Filter non-finite
+        if log_sample_weights is not None:
+            indx_ok = self.xpy.isfinite(log_sample_weights)
+            s_filtered = sample_array[indx_ok]
+            w_filtered = log_sample_weights[indx_ok]
+        else:
+            s_filtered = sample_array
+            w_filtered = None
+            
+        new_model.fit(self._normalize(s_filtered), w_filtered)
         M, _ = sample_array.shape
         self._merge(new_model, M)
         self.N += M
 
     def score(self, sample_array,assume_normalized=True):
         '''
-        Score samples (i.e. calculate likelihood of each sample) under the current
-        model.
-
-        Note the bounds are stored *not* normalized, and we need to compensate for that.
-        Note the normalized bounds are always -1,1 ... but we won't hardcode that, in case normalization changes
-
-        Parameters
-        ----------
-        sample_array : np.ndarray
-            Array of samples to fit
-        bounds : np.ndarray
-            Bounds for samples, used for renormalizing scores
+        Score samples under the current model.
         '''
         n, d = sample_array.shape
-        scores = np.zeros(n)
-        sample_array = self._normalize(sample_array)
-        bounds_normalized = np.zeros(self.bounds.shape)
-        bounds_normalized= self._normalize(self.bounds.T).T
+        scores = self.xpy.zeros(n)
+        sample_array_norm = self._normalize(sample_array)
+        
+        # bounds_normalized
+        bounds_norm = self._normalize(self.bounds.T).T
         normalization_constant = 0.
+        
         for i in range(self.k):
             w = self.weights[i]
             mean = self.means[i]
             cov = self.covariances[i]
-            if(len(mean)>1):
-                scores += multivariate_normal.pdf(x=sample_array, mean=mean, cov=cov, allow_singular=True) * w
-                normalization_constant += w*mvnun(bounds_normalized[:,0], bounds_normalized[:,1], mean, cov)[0] # this function is very fast at integrating multivariate normal distributions
+            
+            if self.d > 1:
+                if cupy_ok:
+                    # Use gpu_logpdf and exponentiate
+                    log_pdf = gpu_logpdf(sample_array_norm, mean, cov, self.xpy)
+                    pdf = self.xpy.exp(log_pdf)
+                else:
+                    pdf = multivariate_normal.pdf(x=sample_array_norm, mean=mean, cov=cov, allow_singular=True)
+                
+                scores += pdf * w
+                # mvnun is CPU only
+                mean_cpu = self.identity_convert(mean)
+                cov_cpu = self.identity_convert(cov)
+                bounds_norm_cpu = self.identity_convert(bounds_norm)
+                normalization_constant += w * mvnun(bounds_norm_cpu[:,0], bounds_norm_cpu[:,1], mean_cpu, cov_cpu)[0]
             else:
                 sigma2 = cov[0,0]
-                val = 1./np.sqrt(2*np.pi*sigma2) * np.exp( - 0.5*( sample_array[:,0] - mean[0])**2/sigma2)
+                val = 1./self.xpy.sqrt(2*self.xpy.pi*sigma2) * self.xpy.exp( - 0.5*( sample_array_norm[:,0] - mean[0])**2/sigma2)
                 scores += val * w
-                my_cdf = norm(loc=mean[0],scale=np.sqrt(sigma2)).cdf
-                normalization_constant += w*(my_cdf( bounds_normalized[0,1]) - my_cdf( bounds_normalized[0,0]))
-            # note that allow_singular=True in the above line is probably really dumb and
-            # terrible, but it seems to occasionally keep the whole thing from blowing up
-            # so it stays for now
-        # we need to renormalize the PDF
-        # to do this we sample from a full distribution (i.e. without truncation) and use the
-        # fraction of samples that fall inside the bounds to renormalize
-        #full_sample_array = self.sample(n, use_bounds=False)
-        #llim = np.rot90(self.bounds[:,[0]])
-        #rlim = np.rot90(self.bounds[:,[1]])
-        #n1 = np.greater(full_sample_array, llim).all(axis=1)
-        #n2 = np.less(full_sample_array, rlim).all(axis=1)
-        #normalize = np.array(np.logical_and(n1, n2)).flatten()
-        #m = float(np.sum(normalize)) / n
-        #scores /= m
+                
+                mean_cpu = self.identity_convert(mean)[0]
+                sigma_cpu = self.identity_convert(np.sqrt(sigma2))
+                bounds_norm_cpu = self.identity_convert(bounds_norm[0])
+                my_cdf = norm(loc=mean_cpu, scale=sigma_cpu).cdf
+                normalization_constant += w * (my_cdf(bounds_norm_cpu[1]) - my_cdf(bounds_norm_cpu[0]))
+        
         scores /= normalization_constant
-        vol = np.prod(self.bounds[:,1] - self.bounds[:,0])
-        scores *= 2.0**d / vol # account for renormalization of dimensions
+        vol = self.xpy.prod(self.bounds[:,1] - self.bounds[:,0])
+        scores *= (2.0**self.d) / vol
         return scores
 
     def sample(self, n, use_bounds=True):
         '''
-        Draw samples from the current model, either with or without bounds
-
-        Parameters
-        ----------
-        n : int
-            Number of samples to draw
-        bounds : np.ndarray
-            Bounds for samples
+        Draw samples from the current model.
+
+        Note the model's means/covariances are stored in *normalized* coordinates
+        (the [-1, 1] image of self.bounds under self._normalize). Samples are
+        therefore drawn in normalized coordinates and then unnormalized back to
+        the original coordinate frame before being returned, matching the
+        pre-port behavior expected by MonteCarloEnsemble._sample().
         '''
-        sample_array = np.empty((n, self.d))
+        # Sampling is kept on CPU for stability (truncnorm is CPU-only)
+        means_np = [self.identity_convert(m) for m in self.means]
+        covs_np = [self.identity_convert(c) for c in self.covariances]
+        weights_np = self.identity_convert(self.weights)
+
+        # truncnorm bounds must match the coordinate frame of the model
+        # parameters (mean/cov), which is normalized [-1, 1].
+        bounds_normalized = np.empty((self.d, 2))
+        bounds_normalized[:, 0] = -1.0
+        bounds_normalized[:, 1] = 1.0
+
+        sample_array_np = np.empty((n, self.d))
         start = 0
-        bounds = np.empty(self.bounds.shape)
-        bounds[:,0] = -1.0
-        bounds[:,1] = 1.0
         for component in range(self.k):
-            w = self.weights[component]
-            mean = self.means[component]
-            cov = self.covariances[component]
-            num_samples = int(n * w)  # NOT a poisson draw, note : we draw exactly the expected number from each one (since we have a fixed number to fill)
+            w = weights_np[component]
+            mean = means_np[component]
+            cov = covs_np[component]
+            num_samples = int(n * w)
             if component == self.k - 1:
                 end = n
             else:
                 end = start + num_samples
             try:
                 if not use_bounds:
-                    sample_array[start:end] = np.random.multivariate_normal(mean, cov, end - start)
+                    sample_array_np[start:end] = np.random.multivariate_normal(mean, cov, end - start)
                 else:
-                    sample_array[start:end] = truncnorm.sample(mean, cov, bounds, end - start)
+                    sample_array_np[start:end] = truncnorm.sample(mean, cov, bounds_normalized, end - start)
                 start = end
-            except:
-                print('Exiting due to non-positive-semidefinite')
+            except Exception as e:
+                print('Exiting due to non-positive-semidefinite', e)
                 raise Exception("gmm covariance not positive-semidefinite")
-        return self._unnormalize(sample_array)
+
+        # Move to xpy and unnormalize back to original [llim, rlim] coordinates,
+        # so callers receive samples in the same frame as self.bounds.
+        sample_array_xpy = self.identity_convert_togpu(sample_array_np)
+        return self._unnormalize(sample_array_xpy)
 
     def print_params(self):
         '''
         Prints the model's parameters in an easily-readable format
         '''
+        means_np = [self.identity_convert(m) for m in self.means]
+        covs_np = [self.identity_convert(c) for c in self.covariances]
+        weights_np = self.identity_convert(self.weights)
+        
         if self.d ==1:
             print("GMM:   component wt mean_correct mean_normed std_normed ")
         for i in range(self.k):
-            mean = self.means[i]
-            cov = self.covariances[i]
-            weight = self.weights[i]
+            mean = means_np[i]
+            cov = covs_np[i]
+            weight = weights_np[i]
             if self.d >1:
                 print('________________________________________\n')
                 print('Component', i)
diff --git a/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsampler.py b/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsampler.py
index e02fbaa52..822605d7b 100644
--- a/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsampler.py
+++ b/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsampler.py
@@ -6,6 +6,7 @@
 from collections import defaultdict
 
 import numpy
+from RIFT.precision import RiftFloat  # platform-portable replacement for np.float128
 from scipy import integrate, interpolate
 from ..integrators.statutils import cumvar, welford, update, finalize
 import itertools
@@ -411,7 +412,7 @@ def integrate(self, func, *args, **kwargs):
         # Determine stopping conditions
         #
         nmax = int(kwargs["nmax"]) if "nmax" in kwargs else float("inf")
-        neff = kwargs["neff"] if "neff" in kwargs else numpy.float128("inf")
+        neff = kwargs["neff"] if "neff" in kwargs else RiftFloat("inf")
         n = int(kwargs["n"]) if "n" in kwargs else min(1000, nmax)
         convergence_tests = kwargs["convergence_tests"] if "convergence_tests" in kwargs else None
 
@@ -463,12 +464,12 @@ def integrate(self, func, *args, **kwargs):
                 print(" Initiating multiprocessor pool : ", nProcesses)
             p = Pool(nProcesses)
 
-        int_val1 = numpy.float128(0)
+        int_val1 = RiftFloat(0)
         self.ntotal = 0
         maxval = -float("Inf")
         maxlnL = -float("Inf")
         eff_samp = 0
-        mean, var = None, numpy.float128(0)    # to prevent infinite variance due to overflow
+        mean, var = None, RiftFloat(0)    # to prevent infinite variance due to overflow
 
         if bShowEvaluationLog:
             print("iteration Neff  sqrt(2*lnLmax) sqrt(2*lnLmarg) ln(Z/Lmax) int_var")
@@ -501,7 +502,7 @@ def integrate(self, func, *args, **kwargs):
             # Calculate the overall p_s assuming each pdf is independent
             joint_p_s = numpy.prod(p_s, axis=0)
             joint_p_prior = numpy.prod(p_prior, axis=0)
-            joint_p_prior = numpy.array(joint_p_prior,dtype=numpy.float128)  # Force type. Some type issues have arisen (dtype=object returns by accident)
+            joint_p_prior = numpy.array(joint_p_prior,dtype=RiftFloat)  # Force type. Some type issues have arisen (dtype=object returns by accident)
 
 #            print "Joint prior ",  type(joint_p_prior), joint_p_prior.dtype, joint_p_prior
 #            print "Joint sampling prior ", type(joint_p_s), joint_p_s.dtype
@@ -850,7 +851,7 @@ def q_samp_vector(qmin,qmax,x):
     scale = 1./(1+qmin) - 1./(1+qmax)
     return 1/numpy.power((1+x),2)/scale
 def q_cdf_inv_vector(qmin,qmax,x):
-    return numpy.array((qmin + qmax*qmin + qmax*x - qmin*x)/(1 + qmax - qmax*x + qmin*x),dtype=np.float128)
+    return numpy.array((qmin + qmax*qmin + qmax*x - qmin*x)/(1 + qmax - qmax*x + qmin*x),dtype=RiftFloat)
 
 # total mass. Assumed used with q.  2M/Mmax^2-Mmin^2
 def M_samp_vector(Mmin,Mmax,x):
diff --git a/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerAdaptiveVolume.py b/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerAdaptiveVolume.py
index b19687ceb..82c28b912 100644
--- a/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerAdaptiveVolume.py
+++ b/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerAdaptiveVolume.py
@@ -11,6 +11,7 @@
 
 import numpy
 np=numpy #import numpy as np
+from RIFT.precision import RiftFloat  # platform-portable replacement for np.float128
 from scipy import integrate, interpolate, special
 import itertools
 import functools
@@ -469,7 +470,7 @@ def integrate_log(self, lnF, *args, xpy=xpy_default,**kwargs):
         # Determine stopping conditions
         #
         nmax = kwargs["nmax"] if "nmax" in kwargs else float("inf")
-        neff = kwargs["neff"] if "neff" in kwargs else numpy.float128("inf")
+        neff = kwargs["neff"] if "neff" in kwargs else RiftFloat("inf")
         n = int(kwargs["n"] if "n" in kwargs else min(100000, nmax))
         convergence_tests = kwargs["convergence_tests"] if "convergence_tests" in kwargs else None
         save_no_samples = kwargs["save_no_samples"] if "save_no_samples" in kwargs else None
diff --git a/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerEnsemble.py b/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerEnsemble.py
old mode 100644
new mode 100755
index a9a22c069..f1aaa7982
--- a/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerEnsemble.py
+++ b/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerEnsemble.py
@@ -1,10 +1,25 @@
-
 import sys
 import math
 import bisect
 from collections import defaultdict
 
 import numpy as np
+from RIFT.precision import RiftFloat  # platform-portable replacement for np.float128
+
+try:
+    import cupy
+    import cupyx
+    xpy_default = cupy
+    xpy_special_default = cupyx.scipy.special
+    identity_convert = cupy.asnumpy
+    identity_convert_togpu = cupy.asarray
+    cupy_ok = True
+except ImportError:
+    xpy_default = np
+    xpy_special_default = None
+    identity_convert = lambda x: x
+    identity_convert_togpu = lambda x: x
+    cupy_ok = False
 
 import itertools
 import functools
@@ -38,27 +53,10 @@ class MCSampler(object):
 
     @staticmethod
     def match_params_from_args(args, params):
-        """
-        Given two unordered sets of parameters, one a set of all "basic" elements
-        (strings) possible, and one a set of elements both "basic" strings and
-        "combined" (basic strings in tuples), determine whether the sets are equivalent
-        if no basic element is repeated.
-
-        e.g. set A ?= set B
-
-        ("a", "b", "c") ?= ("a", "b", "c") ==> True
-        (("a", "b", "c")) ?= ("a", "b", "c") ==> True
-        (("a", "b"), "d")) ?= ("a", "b", "c") ==> False  # basic element 'd' not in set B
-        (("a", "b"), "d")) ?= ("a", "b", "d", "c") ==> False  # not all elements in set B 
-        represented in set A
-        """
         not_common = set(args) ^ set(params)
         if len(not_common) == 0:
-            # All params match
             return True
         if all([not isinstance(i, tuple) for i in not_common]):
-            # The only way this is possible is if there are
-            # no extraneous params in args
             return False
 
         to_match = [i for i in not_common if not isinstance(i, tuple)]
@@ -72,52 +70,31 @@ def match_params_from_args(args, params):
 
 
     def __init__(self):
-        # Total number of samples drawn
         self.ntotal = 0
-        # Samples per iteration
         self.n = 0
-        # Parameter names
         self.params = set()
-        self.params_ordered = []  # keep them in order. Important to break likelihood function need for names
-        # parameter -> pdf function object
+        self.params_ordered = []
         self.pdf = {}
-        # If the pdfs aren't normalized, this will hold the normalization 
-        # constant
         self._pdf_norm = defaultdict(lambda: 1)
-        # Cache for the sampling points
         self._rvs = {}
-        # parameter -> cdf^{-1} function object
         self.cdf = {}
         self.cdf_inv = {}
-        # params for left and right limits
         self.llim, self.rlim = {}, {}
-        # Keep track of the adaptive parameters
         self.adaptive = []
-
-        # Keep track of the adaptive parameter 1-D marginalizations
         self._hist = {}
-
-        # MEASURES (=priors): ROS needs these at the sampler level, to clearly separate their effects
-        # ASSUMES the user insures they are normalized
         self.prior_pdf = {}
-
         self.func = None
         self.sample_format = None
         self.curr_args = None
+        self.gmm_dict ={} 
+        self.integrator = None 
 
-        self.gmm_dict ={} # state variable
-        self.integrator = None # state variable
-
-        # portfolio interfacing/GPU compatible cross-sampler operations
-        self.xpy = np
-        self.identity_convert = lambda x: x  # if needed, convert to numpy format  (e.g, cupy.asnumpy)
-        self.identity_convert_togpu = lambda x: x
+        self.xpy = xpy_default
+        self.identity_convert = identity_convert
+        self.identity_convert_togpu = identity_convert_togpu
 
 
     def clear(self):
-        """
-        Clear out the parameters and their settings, as well as clear the sample cache.
-        """
         self.params = set()
         self.params_ordered = []
         self.pdf = {}
@@ -133,17 +110,7 @@ def clear(self):
 
     def add_parameter(self, params, pdf=None,  cdf_inv=None, left_limit=None, right_limit=None, 
                         prior_pdf=None, adaptive_sampling=False):
-        """
-        Add one (or more) parameters to sample dimensions. params is either a string 
-        describing the parameter, or a tuple of strings. The tuple will indicate to 
-        the sampler that these parameters must be sampled together. left_limit and 
-        right_limit are on the infinite interval by default, but can and probably should 
-        be specified. If several params are given, left_limit, and right_limit must be a 
-        set of tuples with corresponding length. Sampling PDF is required, and if not 
-        provided, the cdf inverse function will be determined numerically from the 
-        sampling PDF.
-        """
-        self.params.add(params) # does NOT preserve order in which parameters are provided
+        self.params.add(params)
         self.params_ordered.append(params)
         if rosDebugMessages:
             print(" mcsampler: Adding parameter ", params, " with limits ", [left_limit, right_limit])
@@ -174,40 +141,24 @@ def add_parameter(self, params, pdf=None,  cdf_inv=None, left_limit=None, right_
                 self.prior_pdf[params] = prior_pdf
 
     def evaluate(self, samples):
-        '''
-        Interfaces between monte_carlo_integrator sample format (1 (n x d) array)
-        and likelihood function sample format (d 1D arrays in a list)
-        '''
-        # integrand expects a list of 1D rows
         temp = []
         for index in range(len(self.curr_args)):
             temp.append(samples[:,index])
         temp_ret = self.func(*temp)
-        return np.rot90([temp_ret], -1) # monte_carlo_integrator expects a column
+        return self.xpy.rot90([temp_ret], -1)
 
 
     def calc_pdf(self, samples):
-        '''
-        Similar to evaluate(), interfaces between sample formats. Must also handle
-        possibility of no prior for one of more dimensions
-        '''
         n, _ = samples.shape
         temp_ret = self.xpy.ones((n, 1))
-        # pdf functions expect 1D rows
         for index in range(len(self.curr_args)):
             if self.curr_args[index] in self.prior_pdf:
                 pdf_func = self.prior_pdf[self.curr_args[index]]
                 temp_samples = samples[:,index]
-                # monte carlo integrator expects a column
-                temp_ret *= pdf_func(temp_samples).reshape( temp_ret.shape) #self.xpy.rot90([pdf_func(temp_samples)], -1)
+                temp_ret *= pdf_func(temp_samples).reshape( temp_ret.shape)
         return temp_ret
 
     def setup(self,n_comp=None,**kwargs):
-      """
-      setup
-
-      Call after add_parameter
-      """
       integrator_func  = kwargs['integrator_func'] if "integrator_func" in kwargs  else None
       mcsamp_func  = kwargs['mcsamp_func'] if "mcsamp_func" in kwargs  else None
       proc_count = kwargs['proc_count'] if "proc_count" in kwargs else None
@@ -224,102 +175,82 @@ def setup(self,n_comp=None,**kwargs):
       lnw_failure_cut = kwargs["lnw_failure_cut"] if "lnw_failure_cut" in kwargs else None
       nmax = kwargs["nmax"] if "nmax" in kwargs else 1e6
       neff = kwargs["neff"] if "neff" in kwargs else 1000
-      n = kwargs["n"] if "n" in kwargs else min(1000, nmax)  # chunk size
+      n = kwargs["n"] if "n" in kwargs else min(1000, nmax)
 
-      self.n = n  # this needs to be set
-      self.curr_args = self.params_ordered # assume we integrate over all. State variable used in a few places
+      self.n = n
+      self.curr_args = self.params_ordered
 
       if 'gmm_dict' in list(kwargs.keys()):
-          gmm_dict = kwargs['gmm_dict']  # required
+          gmm_dict = kwargs['gmm_dict']
       else:
           gmm_dict = None
       dim = len(self.params_ordered)
       bounds=[]
       for param in self.params_ordered:
             bounds.append([self.llim[param], self.rlim[param]])
-      raw_bounds = np.array(bounds)
+      raw_bounds = self.xpy.array(bounds)
           
       if gmm_dict is None:
             bounds = {}
-            for indx in np.arange(len(raw_bounds)):
+            for indx in self.xpy.arange(len(raw_bounds)):
                 bounds[(indx,)] = raw_bounds[indx]
             bounds=raw_bounds
             if correlate_all_dims:
                 gmm_dict = {tuple(range(dim)):None}
-                bounds = {tuple(np.arange(len(bounds))): raw_bounds}
+                bounds = {tuple(self.xpy.arange(len(bounds))): raw_bounds}
             else:
                 gmm_dict = {}
                 for i in range(dim):
                     gmm_dict[(i,)] = None
       else:
-            # create bounds that depend on the dimension specifiers in the gmm integrator
             bounds ={}
             for dims in gmm_dict:
                 n_dims = len(dims)
-                bounds_here = np.empty((n_dims,2))
-                for indx in np.arange(n_dims):
-                    bounds_here[indx] = raw_bounds[dims[indx]]  # pull out bounds index
+                bounds_here = self.xpy.empty((n_dims,2))
+                for indx in self.xpy.arange(n_dims):
+                    bounds_here[indx] = raw_bounds[dims[indx]]
                 bounds[dims]=bounds_here
 
-          
-      # instantiate an integrator object, as that is front end to all the things we need.
-      # we will need some dummy things 
       self.integrator = monte_carlo.integrator(dim, bounds, gmm_dict, n_comp, n=self.n, prior=self.calc_pdf,
-                         user_func=integrator_func, proc_count=proc_count,L_cutoff=L_cutoff,gmm_adapt=gmm_adapt,gmm_epsilon=gmm_epsilon,tempering_exp=tempering_exp) # reflect=reflect,
+                         user_func=integrator_func, proc_count=proc_count,L_cutoff=L_cutoff,gmm_adapt=gmm_adapt,gmm_epsilon=gmm_epsilon,tempering_exp=tempering_exp)
 
     def update_sampling_prior(self,ln_weights, n_history,tempering_exp=1,log_scale_weights=True,floor_integrated_probability=0,external_rvs=None,**kwargs):
-      """
-      update_sampling_prior
-
-      Attempt to duplicate code inside 'integrate' to update sampling prior based on information potentially including externally-obtained samples
-      """
       rvs_here = self._rvs
       if external_rvs:
         rvs_here = external_rvs
 
-      xpy_here = np  # force np internal, because we don't have GMM implemented
-
-      # apply tempering exponent (structurally slightly different than in low-level code - not just to likelihood)
-      ln_weights  = np.array(self.identity_convert(ln_weights)) # force copy
+      ln_weights  = self.xpy.array(self.identity_convert(ln_weights))
       ln_weights *= tempering_exp
 
-      gmm_dict = self.integrator.gmm_dict  # direct acess
+      gmm_dict = self.integrator.gmm_dict
 
-      n_history_to_use = np.min([n_history, len(ln_weights), len(rvs_here[self.params_ordered[0]])] )
+      n_history_to_use = self.xpy.min([n_history, len(ln_weights), len(rvs_here[self.params_ordered[0]])] )
 
-      # Create appropriate history array
-      # keep in GPU form, because we don't need it all !  Only adapt in SOME dimensions, reduce bandwidth!
       sample_array = self.xpy.empty( (len(self.params_ordered), n_history_to_use))
       for indx, p in enumerate(self.params_ordered):
           sample_array[indx] = rvs_here[p][-n_history_to_use:]
       sample_array = sample_array.T
 
-
-      for dim_group in gmm_dict: # iterate over grouped dimensions
+      for dim_group in gmm_dict:
             if self.integrator.gmm_adapt:
                 if (dim_group in self.integrator.gmm_adapt):
-                    if not(self.integrator.gmm_adapt[dim_group]):   # disabling adaptation requires user *specifically request* not to use that dimension set; all other choices lead to adaptation
+                    if not(self.integrator.gmm_adapt[dim_group]):
                         continue
-            # create a matrix of the left and right limits for this set of dimensions
-            new_bounds = np.empty((len(dim_group), 2))
+            new_bounds = self.xpy.empty((len(dim_group), 2))
             new_bounds = self.integrator.bounds[dim_group]
-            model = self.integrator.gmm_dict[dim_group] # get model for this set of dimensions
-            temp_samples = np.empty((n_history_to_use, len(dim_group)))
+            model = self.integrator.gmm_dict[dim_group]
+            temp_samples = self.xpy.empty((n_history_to_use, len(dim_group)))
             index = 0
             for dim in dim_group:
-                # get samples corresponding to the current model
-                # send from GPU as needed
                 temp_samples[:,index] = self.identity_convert(sample_array[:,dim])
                 index += 1
 
-            # don't train with nan!
-            if any(np.isnan(ln_weights)):
-                ok_indx = ~np.isnan(ln_weights)
+            if self.xpy.any(self.xpy.isnan(ln_weights)):
+                ok_indx = ~self.xpy.isnan(ln_weights)
                 temp_samples = temp_samples[ok_indx]
                 ln_weights = ln_weights[ok_indx]
             
             if model is None:
-                # model doesn't exist yet
                 if isinstance(self.integrator.n_comp, int) and self.integrator.n_comp != 0:
                     model = GMM.gmm(self.integrator.n_comp, new_bounds,epsilon=self.integrator.gmm_epsilon)
                     model.fit(temp_samples, log_sample_weights=ln_weights)
@@ -332,11 +263,8 @@ def update_sampling_prior(self,ln_weights, n_history,tempering_exp=1,log_scale_w
 
 
     def draw_simplified(self,n,*args,**kwargs):
-        """
-        Draw a set of random variates for parameter(s) args. Left and right limits are handed to the function. If args is None, then draw *all* parameters. 'rdict' parameter is a boolean. If true, returns a dict matched to param name rather than list. rvs must be either a list of uniform random variates to transform for sampling, or an integer number of samples to draw.
-        """
         n_samples = int(n)
-        self.integrator.n = n # need to override this, so we sample with correct size
+        self.integrator.n = n
 
         if len(args) == 0:
             args = self.params
@@ -346,8 +274,6 @@ def draw_simplified(self,n,*args,**kwargs):
         if 'save_no_samples' in list(kwargs.keys()):
             save_no_samples = kwargs['save_no_samples']
 
-
-        # Allocate memory.
         rv = self.xpy.empty((n_params, n_samples), dtype=np.float64)
         joint_p_s = self.xpy.ones(n_samples, dtype=np.float64)
         joint_p_prior = self.xpy.ones(n_samples, dtype=np.float64)
@@ -365,14 +291,6 @@ def draw_simplified(self,n,*args,**kwargs):
 
 
     def integrate_log(self, func, *args,**kwargs):
-        '''
-        Integrate the specified function over the specified parameters.
-
-        func: function to integrate
-
-        Simple wrapper to standardize interface
-
-        '''
         args_passed = {}
         args_passed.update(kwargs)
         args_passed['use_lnL']=True
@@ -380,56 +298,12 @@ def integrate_log(self, func, *args,**kwargs):
         return integrate(func, *args, args_passed)
 
     def integrate(self, func, *args,**kwargs):
-        '''
-        Integrate the specified function over the specified parameters.
-
-        func: function to integrate
-
-        args: list of parameters to integrate over
-
-        direct_eval (bool): whether func can be evaluated directly with monte_carlo_integrator
-        format or not
-
-        n_comp: number of gaussian components for model
-
-        n: number of samples per iteration
-
-        nmax: maximum number of samples for all iterations
-
-        write_to_file (bool): write data to file
-
-        gmm_dict: dictionary of dimensions and mixture models (see monte_carlo_integrator
-        documentation for more)
-
-        var_thresh: result variance threshold for termination
-
-        min_iter: minimum number of integrator iterations
-
-        max_iter: maximum number of integrator iterations
-
-        neff: eff_samp cutoff for termination
-
-        reflect (bool): whether or not to reflect samples over boundaries (you should
-        basically never use this, it's really slow)
-
-        mcsamp_func: function to be executed before mcsampler_new terminates (for example,
-        to print results or debugging info)
-
-        integrator_func: function to be executed each iteration of the integrator (for
-        example, to print intermediate results)
-
-        proc_count: size of multiprocessing pool. set to None to not use multiprocessing
-        tempering_exp -- Exponent to raise the weights of the 1-D marginalized histograms for adaptive sampling prior generation, by default it is 0 which will turn off adaptive sampling regardless of other settings
-        temper_log -- Adapt in min(ln L, 10^(-5))^tempering_exp
-
-        max_err : Maximum number of errors allowed for GMM sampler
-        '''
         nmax = kwargs["nmax"] if "nmax" in kwargs else 1e6
         neff = kwargs["neff"] if "neff" in kwargs else 1000
-        n = kwargs["n"] if "n" in kwargs else min(1000, nmax)  # chunk size
+        n = kwargs["n"] if "n" in kwargs else min(1000, nmax)
         n_comp = kwargs["n_comp"] if "n_comp" in kwargs else 1
         if 'gmm_dict' in list(kwargs.keys()):
-            gmm_dict = kwargs['gmm_dict']  # required
+            gmm_dict = kwargs['gmm_dict']
         else:
             gmm_dict = None
         reflect = kwargs['reflect'] if "reflect" in kwargs else False
@@ -447,16 +321,15 @@ def integrate(self, func, *args,**kwargs):
         L_cutoff = kwargs["L_cutoff"] if "L_cutoff" in kwargs else None
         tempering_exp = kwargs["tempering_exp"] if "tempering_exp" in kwargs else 1.0
         lnw_failure_cut = kwargs["lnw_failure_cut"] if "lnw_failure_cut" in kwargs else None
-#        tempering_exp = kwargs["adapt_weight_exponent"] if "adapt_weight_exponent" in kwargs else 1.0
 
-        max_err = kwargs["max_err"] if "max_err" in kwargs else 10  # default
+        max_err = kwargs["max_err"] if "max_err" in kwargs else 10
 
-        verbose = kwargs["verbose"] if "verbose" in kwargs else False  # default
-        super_verbose = kwargs["super_verbose"] if "super_verbose" in kwargs else False  # default
-        dict_return_q = kwargs["dict_return"] if "dict_return" in kwargs else False  # default.  Method for passing back rich data structures for debugging
+        verbose = kwargs["verbose"] if "verbose" in kwargs else False
+        super_verbose = kwargs["super_verbose"] if "super_verbose" in kwargs else False
+        dict_return_q = kwargs["dict_return"] if "dict_return" in kwargs else False
 
-        tripwire_fraction = kwargs["tripwire_fraction"] if "tripwire_fraction" in kwargs else 2  # make it impossible to trigger
-        tripwire_epsilon = kwargs["tripwire_epsilon"] if "tripwire_epsilon" in kwargs else 0.001 # if we are not reasonably far away from unity, fail!
+        tripwire_fraction = kwargs["tripwire_fraction"] if "tripwire_fraction" in kwargs else 2
+        tripwire_epsilon = kwargs["tripwire_epsilon"] if "tripwire_epsilon" in kwargs else 0.001
 
         use_lnL = kwargs["use_lnL"] if "use_lnL" in kwargs else False 
         return_lnI = kwargs["return_lnI"] if "return_lnI" in kwargs else False
@@ -464,7 +337,6 @@ def integrate(self, func, *args,**kwargs):
         bFairdraw  = kwargs["igrand_fairdraw_samples"] if "igrand_fairdraw_samples" in kwargs else False
         n_extr = kwargs["igrand_fairdraw_samples_max"] if "igrand_fairdraw_samples_max" in kwargs else None
 
-        # set up a lot of preliminary stuff
         self.func = func
         self.curr_args = args
         if n_comp is None:
@@ -474,36 +346,32 @@ def integrate(self, func, *args,**kwargs):
         bounds=[]
         for param in args:
             bounds.append([self.llim[param], self.rlim[param]])
-        raw_bounds = np.array(bounds)
+        raw_bounds = self.xpy.array(bounds)
 
         bounds=None
-        # generate default gmm_dict if not specified
         if gmm_dict is None:
             bounds = {}
-            for indx in np.arange(len(raw_bounds)):
+            for indx in self.xpy.arange(len(raw_bounds)):
                 bounds[(indx,)] = raw_bounds[indx]
             bounds=raw_bounds
             if correlate_all_dims:
                 gmm_dict = {tuple(range(dim)):None}
-                bounds = {tuple(np.arange(len(bounds))): raw_bounds}
+                bounds = {tuple(self.xpy.arange(len(bounds))): raw_bounds}
             else:
                 gmm_dict = {}
                 for i in range(dim):
                     gmm_dict[(i,)] = None
         else:
-            # create bounds that depend on the dimension specifiers in the gmm integrator
             bounds ={}
             for dims in gmm_dict:
                 n_dims = len(dims)
-                bounds_here = np.empty((n_dims,2))
-                for indx in np.arange(n_dims):
-                    bounds_here[indx] = raw_bounds[dims[indx]]  # pull out bounds index
+                bounds_here = self.xpy.empty((n_dims,2))
+                for indx in self.xpy.arange(n_dims):
+                    bounds_here[indx] = raw_bounds[dims[indx]]
                 bounds[dims]=bounds_here
-#            bounds = np.array(bounds)
-        # do the integral
 
         integrator = monte_carlo.integrator(dim, bounds, gmm_dict, n_comp, n=n, prior=self.calc_pdf,
-                         user_func=integrator_func, proc_count=proc_count,L_cutoff=L_cutoff,gmm_adapt=gmm_adapt,gmm_epsilon=gmm_epsilon,tempering_exp=tempering_exp) # reflect=reflect,
+                         user_func=integrator_func, proc_count=proc_count,L_cutoff=L_cutoff,gmm_adapt=gmm_adapt,gmm_epsilon=gmm_epsilon,tempering_exp=tempering_exp)
         if not direct_eval:
             func = self.evaluate
         if use_lnL:
@@ -512,31 +380,26 @@ def integrate(self, func, *args,**kwargs):
             print(" ==> internal calculations and return values are lnI ")
         integrator.integrate(func, min_iter=min_iter, max_iter=max_iter, var_thresh=var_thresh, neff=neff, nmax=nmax,max_err=max_err,verbose=verbose,progress=super_verbose,tripwire_fraction=tripwire_fraction,tripwire_epsion=tripwire_epsilon,use_lnL=use_lnL,return_lnI=return_lnI,lnw_failure_cut=lnw_failure_cut)
 
-        # get results
-
         self.n = int(integrator.n)
         self.ntotal = int(integrator.ntotal)
         integral = integrator.integral
         print("Result ",integrator.scaled_error_squared, integrator.integral)
         if not(return_lnI):
-            error_squared = integrator.scaled_error_squared * np.exp(integrator.log_error_scale_factor)/ (self.ntotal/self.n)
+            error_squared = integrator.scaled_error_squared * self.xpy.exp(integrator.log_error_scale_factor)/ (self.ntotal/self.n)
         else:
-            error_squared = integrator.scaled_error_squared  - np.log(self.ntotal/self.n)
+            error_squared = integrator.scaled_error_squared  - self.xpy.log(self.ntotal/self.n)
         eff_samp = integrator.eff_samp
         sample_array = integrator.cumulative_samples
         if not(return_lnI):
-            value_array = np.exp(integrator.cumulative_values)  # stored as ln(integrand) !
+            value_array = self.xpy.exp(integrator.cumulative_values)
         else:
             value_array = integrator.cumulative_values
         p_array = integrator.cumulative_p_s
         prior_array = integrator.cumulative_p
 
-        # user-defined function
         if mcsamp_func is not None:
             mcsamp_func(self, integrator)
 
-        # populate dictionary
-
         index = 0
         for param in args:
             self._rvs[param] = sample_array[:,index]
@@ -545,35 +408,31 @@ def integrate(self, func, *args,**kwargs):
         self._rvs['joint_s_prior'] = p_array
         self._rvs['integrand'] = value_array
 
-        # Do a fair draw of points, if option is set. CAST POINTS BACK TO NUMPY, IDEALLY
         if bFairdraw and not(n_extr is None):
-           n_extr = int(np.min([n_extr,1.5*eff_samp,1.5*neff]))
+           n_extr = int(self.xpy.min([n_extr,1.5*eff_samp,1.5*neff]))
            print(" Fairdraw size : ", n_extr)
            if return_lnI:
                ln_wt =  integrator.cumulative_values
            else:
-               ln_wt = np.log(value_array)
-           ln_wt += np.log(prior_array/p_array)
-           ln_wt += - scipy.special.logsumexp(ln_wt)
-           wt = np.exp(ln_wt)
+               ln_wt = self.xpy.log(value_array)
+           ln_wt += self.xpy.log(prior_array/p_array)
+           ln_wt += - scipy.special.logsumexp(self.identity_convert(ln_wt))
+           wt = self.xpy.exp(ln_wt)
            if n_extr < len(value_array):
-               indx_list = np.random.choice(np.arange(len(wt)), size=n_extr,replace=True,p=wt) # fair draw
-               # FIXME: See previous FIXME
+               indx_list = self.xpy.random.choice(self.xpy.arange(len(wt)), size=n_extr,replace=True,p=wt)
                for key in list(self._rvs.keys()):
                    if isinstance(key, tuple):
                        self._rvs[key] = self._rvs[key][:,indx_list]
                    else:
                        self._rvs[key] = self._rvs[key][indx_list]
 
-        # if special return structure, fill it
         dict_return = {}
         if dict_return_q:
             dict_return["integrator"] = integrator
 
-        # write data to file
         if write_to_file:
-            dat_out = np.c_[sample_array, value_array, p_array]
-            np.savetxt('mcsampler_data.txt', dat_out,
+            dat_out = self.xpy.c_[sample_array, value_array, p_array]
+            np.savetxt('mcsampler_data.txt', self.identity_convert(dat_out),
                         header=" ".join(['sample_array', 'value_array', 'p_array']))
 
         return integral, error_squared, eff_samp, dict_return
@@ -588,46 +447,37 @@ def gauss_samp(mu, std, x):
 def gauss_samp_withfloor(mu, std, myfloor, x):
     return 1.0/np.sqrt(2*np.pi*std**2)*np.exp(-(x-mu)**2/2/std**2) + myfloor
 
-#gauss_samp_withfloor_vector = np.vectorize(gauss_samp_withfloor,excluded=['mu','std','myfloor'],otypes=[np.float64])
 gauss_samp_withfloor_vector = np.vectorize(gauss_samp_withfloor,otypes=[np.float64])
 
 
-# Mass ratio. PDF propto 1/(1+q)^2.  Defined so mass ratio is < 1
-# expr = Integrate[1/(1 + q)^2, q]
-# scale = (expr /. q -> qmax )  - (expr /. q -> qmin)
-# (expr - (expr /. q -> qmin))/scale == x // Simplify
-# q /. Solve[%, q][[1]] // Simplify
-# % // CForm
 def q_samp_vector(qmin,qmax,x):
     scale = 1./(1+qmin) - 1./(1+qmax)
     return 1/np.power((1+x),2)/scale
 def q_cdf_inv_vector(qmin,qmax,x):
-    return np.array((qmin + qmax*qmin + qmax*x - qmin*x)/(1 + qmax - qmax*x + qmin*x),dtype=np.float128)
+    return np.array((qmin + qmax*qmin + qmax*x - qmin*x)/(1 + qmax - qmax*x + qmin*x),dtype=RiftFloat)
 
-# total mass. Assumed used with q.  2M/Mmax^2-Mmin^2
 def M_samp_vector(Mmin,Mmax,x):
     scale = 2./(Mmax**2 - Mmin**2)
     return x*scale
 
 
 def cos_samp(x):
-        return np.sin(x)/2   # x from 0, pi
+        return np.sin(x)/2
 
 def dec_samp(x):
-        return np.sin(x+np.pi/2)/2   # x from 0, pi
+        return np.sin(x+np.pi/2)/2
 
 cos_samp_vector = np.vectorize(cos_samp,otypes=[np.float64])
 dec_samp_vector = np.vectorize(dec_samp,otypes=[np.float64])
 def cos_samp_cdf_inv_vector(p):
-    return np.arccos( 2*p-1)   # returns from 0 to pi
+    return np.arccos( 2*p-1)
 def dec_samp_cdf_inv_vector(p):
-    return np.arccos(2*p-1) - np.pi/2  # target from -pi/2 to pi/2
+    return np.arccos(2*p-1) - np.pi/2
 
 
 def pseudo_dist_samp(r0,r):
-        return r*r*np.exp( - (r0/r)*(r0/r)/2. + r0/r)+0.01  # put a floor on probability, so we converge. Note this floor only cuts out NEARBY distances
+        return r*r*np.exp( - (r0/r)*(r0/r)/2. + r0/r)+0.01
 
-#pseudo_dist_samp_vector = np.vectorize(pseudo_dist_samp,excluded=['r0'],otypes=[np.float64])
 pseudo_dist_samp_vector = np.vectorize(pseudo_dist_samp,otypes=[np.float64])
 
 def delta_func_pdf(x_0, x):
@@ -641,36 +491,12 @@ def delta_func_samp(x_0, x):
 delta_func_samp_vector = np.vectorize(delta_func_samp, otypes=[np.float64])
 
 class HealPixSampler(object):
-    """
-    Class to sample the sky using a FITS healpix map. Equivalent to a joint 2-D pdf in RA and dec.
-    """
-
     @staticmethod
     def thph2decra(th, ph):
-        """
-        theta/phi to RA/dec
-        theta (north to south) (0, pi)
-        phi (east to west) (0, 2*pi)
-        declination: north pole = pi/2, south pole = -pi/2
-        right ascension: (0, 2*pi)
-        
-        dec = pi/2 - theta
-        ra = phi
-        """
         return np.pi/2-th, ph
 
     @staticmethod
     def decra2thph(dec, ra):
-        """
-        theta/phi to RA/dec
-        theta (north to south) (0, pi)
-        phi (east to west) (0, 2*pi)
-        declination: north pole = pi/2, south pole = -pi/2
-        right ascension: (0, 2*pi)
-        
-        theta = pi/2 - dec
-        ra = phi
-        """
         return np.pi/2-dec, ra
 
     def __init__(self, skymap, massp=1.0):
@@ -689,43 +515,31 @@ def massp(self, value):
         norm = self.renormalize()
 
     def renormalize(self):
-        """
-        Identify the points contributing to the overall cumulative probability distribution, and set the proper normalization.
-        """
         res = healpy.npix2nside(len(self.skymap))
         self.pdf_sorted = sorted([(p, i) for i, p in enumerate(self.skymap)], reverse=True)
         self.valid_points_decra = []
-        cdf, np = 0, 0
+        cdf, np_count = 0, 0
         for p, i in self.pdf_sorted:
             if p == 0:
-                continue # Can't have a zero prior
+                continue
             self.valid_points_decra.append(HealPixSampler.thph2decra(*healpy.pix2ang(res, i)))
             cdf += p
             if cdf > self._massp:
                 break
         self._renorm = cdf
-        # reset to indicate we'd need to recalculate this
         self.valid_points_hist = None
         return self._renorm
 
     def __expand_valid(self, min_p=1e-7):
-        #
-        # Determine what the 'quanta' of probabilty is
-        #
         if self._massp == 1.0:
-            # This is to ensure we don't blow away everything because the map
-            # is very spread out
             min_p = min(min_p, max(self.skymap))
         else:
-            # NOTE: Only valid if CDF descending order is kept
             min_p = self.pseudo_pdf(*self.valid_points_decra[-1])
 
         self.valid_points_hist = []
         ns = healpy.npix2nside(len(self.skymap))
 
-        # Renormalize first so that the vector histogram is properly normalized
         self._renorm = 0
-        # Account for probability lost due to cut off
         for i, v in enumerate(self.skymap >= min_p):
             self._renorm += self.skymap[i] if v else 0
 
@@ -738,32 +552,21 @@ def __expand_valid(self, min_p=1e-7):
         self.valid_points_hist = np.array(self.valid_points_hist).T
 
     def pseudo_pdf(self, dec_in, ra_in):
-        """
-        Return pixel probability for a given dec_in and ra_in. Note, uses healpy functions to identify correct pixel.
-        """
         th, ph = HealPixSampler.decra2thph(dec_in, ra_in)
         res = healpy.npix2nside(len(self.skymap))
         return self.skymap[healpy.ang2pix(res, th, ph)]/self._renorm
 
     def pseudo_cdf_inverse(self, dec_in=None, ra_in=None, ndraws=1, stype='vecthist'):
-        """
-        Select points from the skymap with a distribution following its corresponding pixel probability. If dec_in, ra_in are suupplied, they are ignored except that their shape is reproduced. If ndraws is supplied, that will set the shape. Will return a 2xN np array of the (dec, ra) values.
-        stype controls the type of sampling done to retrieve points. Valid choices are
-        'rejsamp': Rejection sampling: accurate but slow
-        'vecthist': Expands a set of points into a larger vector with the multiplicity of the points in the vector corresponding roughly to the probability of drawing that point. Because this is not an exact representation of the proability, some points may not be represented at all (less than quantum of minimum probability) or inaccurately (a significant fraction of the fundamental quantum).
-        """
-
         if ra_in is not None:
             ndraws = len(ra_in)
         if ra_in is None:
             ra_in, dec_in = np.zeros((2, ndraws))
 
         if stype == 'rejsamp':
-            # FIXME: This is only valid under descending ordered CDF summation
             ceiling = max(self.skymap)
-            i, np = 0, len(self.valid_points_decra)
+            i, np_count = 0, len(self.valid_points_decra)
             while i < len(ra_in):
-                rnd_n = np.random.randint(0, np)
+                rnd_n = np.random.randint(0, np_count)
                 trial = np.random.uniform(0, ceiling)
                 if trial <= self.pseudo_pdf(*self.valid_points_decra[rnd_n]):
                     dec_in[i], ra_in[i] = self.valid_points_decra[rnd_n]
@@ -772,77 +575,43 @@ def pseudo_cdf_inverse(self, dec_in=None, ra_in=None, ndraws=1, stype='vecthist'
         elif stype == 'vecthist':
             if self.valid_points_hist is None:
                 self.__expand_valid()
-            np = self.valid_points_hist.shape[1]
-            rnd_n = np.random.randint(0, np, len(ra_in))
+            np_count = self.valid_points_hist.shape[1]
+            rnd_n = np.random.randint(0, np_count, len(ra_in))
             dec_in, ra_in = self.valid_points_hist[:,rnd_n]
             return np.array([dec_in, ra_in])
         else:
             raise ValueError("%s is not a recgonized sampling type" % stype)
 
-#pseudo_dist_samp_vector = np.vectorize(pseudo_dist_samp,excluded=['r0'],otypes=[np.float64])
 pseudo_dist_samp_vector = np.vectorize(pseudo_dist_samp,otypes=[np.float64])
 
 
 def sanityCheckSamplerIntegrateUnity(sampler,*args,**kwargs):
         return sampler.integrate(lambda *args: 1,*args,**kwargs)
 
-###
-### CONVERGENCE TESTS
-###
-
-
-# neff by another name:
-#    - value: tests for 'smooth' 1-d cumulative distributions
-#    - require  require the most significant-weighted point be less than p of all cumulative probability
-#    - this test is *equivalent* to neff > 1/p
-#    - provided to illustrate the interface
 def convergence_test_MostSignificantPoint(pcut, rvs, params):
-    weights = rvs["weights"] #rvs["integrand"]* rvs["joint_prior"]/rvs["joint_s_prior"]
+    weights = rvs["weights"]
     indxmax = np.argmax(weights)
     wtSum = np.sum(weights)
     return  weights[indxmax]/wtSum < pcut
 
-
-# normality test: is the MC integral normally distributed, with a small standard deviation?
-#    - value: tests for converged integral
-#    - arguments: 
-#         - ncopies:               # of sub-integrals
-#         - pcutNormalTest     Threshold p-value for normality test
-#         - sigmaCutErrorThreshold   Threshold relative error in the integral
-#    - implement normality test on **log(integral)** since the log should also be normally distributed if well converged
-#           - this helps us handle large orders-of-magnitude differences
-#           - compatible with a *relative* error threshold on integral
-#           - only works for *positive-definite* integrands
-#    - other python normality tests:  
-#          scipy.stats.shapiro
-#          scipy.stats.anderson
-#  WARNING:
-#    - this test assumes *unsorted* past history: the 'ncopies' segments are assumed independent.
-import scipy.stats as stats
 def convergence_test_NormalSubIntegrals(ncopies, pcutNormalTest, sigmaCutRelativeErrorThreshold, rvs, params):
-    weights = rvs["integrand"]* rvs["joint_prior"]/rvs["joint_s_prior"]  # rvs["weights"] # rvs["weights"] is *sorted* (side effect?), breaking test. Recalculated weights are not.  Use explicitly calculated weights until sorting effect identified
-#    weights = weights /np.sum(weights)    # Keep original normalization, so the integral values printed to stdout have meaning relative to the overall integral value.  No change in code logic : this factor scales out (from the log, below)
+    weights = rvs["integrand"]* rvs["joint_prior"]/rvs["joint_s_prior"]
     igrandValues = np.zeros(ncopies)
-    len_part = np.int(len(weights)/ncopies)  # deprecated: np.floor->np.int
+    len_part = int(len(weights)/ncopies)
     for indx in np.arange(ncopies):
-        igrandValues[indx] = np.log(np.mean(weights[indx*len_part:(indx+1)*len_part]))  # change to mean rather than sum, so sub-integrals have meaning
-    igrandValues= np.sort(igrandValues)#[2:]                            # Sort.  Useful in reports
-    valTest = stats.normaltest(igrandValues)[1]                              # small value is implausible
-    igrandSigma = (np.std(igrandValues))/np.sqrt(ncopies)   # variance in *overall* integral, estimated from variance of sub-integrals
+        igrandValues[indx] = np.log(np.mean(weights[indx*len_part:(indx+1)*len_part]))
+    igrandValues= np.sort(igrandValues)
+    valTest = stats.normaltest(igrandValues)[1]
+    igrandSigma = (np.std(igrandValues))/np.sqrt(ncopies)
     print(" Test values on distribution of log evidence:  (gaussianity p-value; standard deviation of ln evidence) ", valTest, igrandSigma)
     print(" Ln(evidence) sub-integral values, as used in tests  : ", igrandValues)
-    return valTest> pcutNormalTest and igrandSigma < sigmaCutRelativeErrorThreshold   # Test on left returns a small value if implausible. Hence pcut ->0 becomes increasingly difficult (and requires statistical accidents). Test on right requires relative error in integral also to be small when pcut is small.   FIXME: Give these variables two different names
-    
-
+    return valTest> pcutNormalTest and igrandSigma < sigmaCutRelativeErrorThreshold
 
 from . import gaussian_mixture_model as GMM
 def create_wide_single_component_prior(bounds, epsilon=None):
-    """
-    create_wide_single_component_prior(bounds) : returns a gmm dictionary which is very wide
-    """
     model = GMM.gmm(1, bounds, epsilon=epsilon)
     widths = np.array([ bounds[k][1] - bounds[k][0] for k in np.arange(len(bounds))])  
-    model.means = [np.array([np.mean(bounds[k]) for k in np.arange(len(bounds))]) ]  # single component
+    model.means = [np.array([np.mean(bounds[k]) for k in np.arange(len(bounds))]) ]
     model.covariances = [np.diag( widths**2)]
     model.weights = [1]
     model.adapt = [False]
diff --git a/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerGPU.py b/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerGPU.py
index 86241f966..2f1724352 100644
--- a/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerGPU.py
+++ b/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerGPU.py
@@ -11,6 +11,7 @@
 
 import numpy
 np=numpy #import numpy as np
+from RIFT.precision import RiftFloat  # platform-portable replacement for np.float128
 from scipy import integrate, interpolate, special
 import itertools
 import functools
@@ -546,7 +547,7 @@ def update_sampling_prior(self,ln_weights, n_history,tempering_exp=1,log_scale_w
       weights_alt = self.xpy.maximum(weights_alt, 1e-5)    # prevent negative weights, in case integrating function with lnL < 0
       # now treat as sum
       weights_alt = weights_alt/(weights_alt.sum())
-      if weights_alt.dtype == numpy.float128:
+      if weights_alt.dtype == RiftFloat:
         weights_alt = weights_alt.astype(numpy.float64,copy=False)
 
       def function_wrapper(f, p):
@@ -621,7 +622,7 @@ def integrate_log(self, lnF, *args, xpy=xpy_default,**kwargs):
         # Determine stopping conditions
         #
         nmax = kwargs["nmax"] if "nmax" in kwargs else float("inf")
-        neff = kwargs["neff"] if "neff" in kwargs else numpy.float128("inf")
+        neff = kwargs["neff"] if "neff" in kwargs else RiftFloat("inf")
         n = int(kwargs["n"] if "n" in kwargs else min(1000, nmax))
         convergence_tests = kwargs["convergence_tests"] if "convergence_tests" in kwargs else None
         save_no_samples = kwargs["save_no_samples"] if "save_no_samples" in kwargs else None
@@ -808,7 +809,7 @@ def inner(arg):
             weights_alt = self._rvs["log_integrand"][-n_history:]+np.max([maxlnL, 200])  # try to make sure we have some dynamic range here
             weights_alt = self.xpy.maximum(weights_alt, 1e-5)  # prevent negative weights. NOTE THIS IS IMPORTANT: if you are integrating a function with lnL<0, use an offset!
             weights_alt = weights_alt/(weights_alt.sum())
-            if weights_alt.dtype == numpy.float128:
+            if weights_alt.dtype == RiftFloat:
               weights_alt = weights_alt.astype(numpy.float64,copy=False)
 
             for itr, p in enumerate(self.params_ordered):
@@ -980,7 +981,7 @@ def integrate(self, func, *args, **kwargs):
         # Determine stopping conditions
         #
         nmax = kwargs["nmax"] if "nmax" in kwargs else float("inf")
-        neff = kwargs["neff"] if "neff" in kwargs else numpy.float128("inf")
+        neff = kwargs["neff"] if "neff" in kwargs else RiftFloat("inf")
         n = int(kwargs["n"] if "n" in kwargs else min(1000, nmax))
         convergence_tests = kwargs["convergence_tests"] if "convergence_tests" in kwargs else None
         save_no_samples = kwargs["save_no_samples"] if "save_no_samples" in kwargs else None
@@ -1023,12 +1024,12 @@ def integrate(self, func, *args, **kwargs):
         if bShowEvaluationLog:
             print(" .... mcsampler : providing verbose output ..... ")
 
-        int_val1 = numpy.float128(0)
+        int_val1 = RiftFloat(0)
         self.ntotal = 0
         maxval = -float("Inf")
         maxlnL = -float("Inf")
         eff_samp = 0
-        mean, var = None, numpy.float128(0)    # to prevent infinite variance due to overflow
+        mean, var = None, RiftFloat(0)    # to prevent infinite variance due to overflow
         if cupy_ok:
           var = xpy_default.float64(0)   # cupy doesn't have float128
 
@@ -1228,7 +1229,7 @@ def inner(arg):
 
             weights_alt = weights_alt/(weights_alt.sum())
             # Type convert as needed: if weights are float128, convert to float64; otherwise we hit a typing error later with bincount
-            if weights_alt.dtype == numpy.float128:
+            if weights_alt.dtype == RiftFloat:
               weights_alt = weights_alt.astype(numpy.float64,copy=False)
 #            weights_alt = floor_integrated_probability*xpy_default.ones(len(weights_alt))/len(weights_alt) + (1-floor_integrated_probability)*weights_alt
 
@@ -1412,7 +1413,7 @@ def q_samp_vector(qmin,qmax,x):
     scale = 1./(1+qmin) - 1./(1+qmax)
     return 1/numpy.power((1+x),2)/scale
 def q_cdf_inv_vector(qmin,qmax,x,xpy=xpy_default):
-    return np.array((qmin + qmax*qmin + qmax*x - qmin*x)/(1 + qmax - qmax*x + qmin*x),dtype=np.float128)
+    return np.array((qmin + qmax*qmin + qmax*x - qmin*x)/(1 + qmax - qmax*x + qmin*x),dtype=RiftFloat)
 
 # total mass. Assumed used with q.  2M/Mmax^2-Mmin^2
 def M_samp_vector(Mmin,Mmax,x):
diff --git a/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerNFlow.py b/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerNFlow.py
index 965f7b27c..758097e98 100644
--- a/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerNFlow.py
+++ b/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerNFlow.py
@@ -13,6 +13,7 @@
 
 import numpy
 np=numpy #import numpy as np
+from RIFT.precision import RiftFloat  # platform-portable replacement for np.float128
 from scipy import integrate, interpolate, special
 import itertools
 import functools
@@ -642,7 +643,7 @@ def update_sampling_prior(self, lnw, *args, xpy=xpy_default,no_protect_names=Tru
 #      weights_alt = self.xpy.maximum(weights_alt, 1e-5)    # prevent negative weights, in case integrating function with lnL < 0
       # now treat as sum
       weights_alt = weights_alt/(weights_alt.sum())
-      if weights_alt.dtype == numpy.float128:
+      if weights_alt.dtype == RiftFloat:
         weights_alt = weights_alt.astype(numpy.float64,copy=False)
 
 
@@ -726,7 +727,7 @@ def integrate_log(self, lnF, *args, xpy=xpy_default,**kwargs):
         # Determine stopping conditions
         #
         nmax = kwargs["nmax"] if "nmax" in kwargs else float("inf")
-        neff = kwargs["neff"] if "neff" in kwargs else numpy.float128("inf")
+        neff = kwargs["neff"] if "neff" in kwargs else RiftFloat("inf")
         n = int(kwargs["n"] if "n" in kwargs else min(100000, nmax))
         convergence_tests = kwargs["convergence_tests"] if "convergence_tests" in kwargs else None
         save_no_samples = kwargs["save_no_samples"] if "save_no_samples" in kwargs else None
diff --git a/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerPortfolio.py b/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerPortfolio.py
index 07fb1b2b8..e34235bde 100644
--- a/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerPortfolio.py
+++ b/MonteCarloMarginalizeCode/Code/RIFT/integrators/mcsamplerPortfolio.py
@@ -6,6 +6,7 @@
 
 import numpy
 np=numpy #import numpy as np
+from RIFT.precision import RiftFloat  # platform-portable replacement for np.float128
 from scipy import integrate, interpolate, special
 import itertools
 import functools
@@ -322,7 +323,7 @@ def integrate_log(self, lnF, *args, xpy=xpy_default,**kwargs):
         # Determine stopping conditions
         #
         nmax = kwargs["nmax"] if "nmax" in kwargs else float("inf")
-        neff = kwargs["neff"] if "neff" in kwargs else numpy.float128("inf")
+        neff = kwargs["neff"] if "neff" in kwargs else RiftFloat("inf")
         n = int(kwargs["n"] if "n" in kwargs else min(100000, nmax))
         convergence_tests = kwargs["convergence_tests"] if "convergence_tests" in kwargs else None
         save_no_samples = kwargs["save_no_samples"] if "save_no_samples" in kwargs else None
diff --git a/MonteCarloMarginalizeCode/Code/RIFT/integrators/statutils.py b/MonteCarloMarginalizeCode/Code/RIFT/integrators/statutils.py
index 69d1986af..166d84562 100644
--- a/MonteCarloMarginalizeCode/Code/RIFT/integrators/statutils.py
+++ b/MonteCarloMarginalizeCode/Code/RIFT/integrators/statutils.py
@@ -1,4 +1,5 @@
 import numpy
+from RIFT.precision import RiftFloat  # platform-portable replacement for np.float128
 import scipy.special
 
 __author__ = "Chris Pankow <pankow@gravity.phys.uwm.edu>, R. O'Shaughnessy"
@@ -38,12 +39,12 @@ def cumvar(arr, mean=None, var=None, n=0):
     for algorithm details.
 	"""
 	if mean and var:
-		m, s = numpy.zeros(len(arr)+1), numpy.zeros(len(arr)+1,dtype=numpy.float128)
+		m, s = numpy.zeros(len(arr)+1), numpy.zeros(len(arr)+1,dtype=RiftFloat)
 		m[0] = mean
 		s[0] = var*(n-1)
 		buf = numpy.array([0])
 	else:
-		m, s = numpy.zeros(arr.shape), numpy.zeros(arr.shape,dtype=numpy.float128)
+		m, s = numpy.zeros(arr.shape), numpy.zeros(arr.shape,dtype=RiftFloat)
 		m[0] = arr[0]
 		buf = numpy.array([])
 
diff --git a/MonteCarloMarginalizeCode/Code/RIFT/interpolators/BayesianLeastSquares.py b/MonteCarloMarginalizeCode/Code/RIFT/interpolators/BayesianLeastSquares.py
index fa736f0ee..fc89b3706 100644
--- a/MonteCarloMarginalizeCode/Code/RIFT/interpolators/BayesianLeastSquares.py
+++ b/MonteCarloMarginalizeCode/Code/RIFT/interpolators/BayesianLeastSquares.py
@@ -7,6 +7,7 @@
 
 import scipy.linalg as linalg
 import numpy as np
+from RIFT.precision import RiftFloat  # platform-portable replacement for np.float128
 
 
 def fit_quadratic(x,y,x0=None,variable_symmetry_list=None,gamma_x=None,prior_x_gamma=None,prior_quadratic_gamma=None,verbose=False,n_digits=None,hard_regularize_negative=False,hard_regularize_scale=1):
@@ -38,7 +39,7 @@ def fit_quadratic(x,y,x0=None,variable_symmetry_list=None,gamma_x=None,prior_x_g
     # Constant, linear, quadratic functions. 
     # Beware of lambda:  f_list = [(lambda x: k) for k in range(5)] does not  work, but this does
     #     f_list = [(lambda x,k=k: k) for k in range(5)]
-    f0 = [lambda z: np.ones(len(z),dtype=np.float128)]
+    f0 = [lambda z: np.ones(len(z),dtype=RiftFloat)]
     # indx_lookup_linear = {}   # protect against packing errors
     # indx_here = len(f0)
     # f_linear = []
@@ -73,11 +74,11 @@ def fit_quadratic(x,y,x0=None,variable_symmetry_list=None,gamma_x=None,prior_x_g
     #         print " Grid test " , pair,  fn_now(np.array([1,0])), fn_now(np.array([0,1])), fn_now(np.array([1,1])) ,fn_now(np.array([1,-1])) 
 
 
-    F = np.matrix(np.zeros((len(x), n_params_model),dtype=np.float128))
+    F = np.matrix(np.zeros((len(x), n_params_model),dtype=RiftFloat))
     for q in np.arange(n_params_model):
-        fval = f_list[q](np.array(x,dtype=np.float128))
+        fval = f_list[q](np.array(x,dtype=RiftFloat))
         F[:,q] = np.reshape(fval, (len(x),1))
-    gamma = np.matrix( np.diag(np.ones(npts,dtype=np.float128)))
+    gamma = np.matrix( np.diag(np.ones(npts,dtype=RiftFloat)))
     if not(gamma_x is None):
         gamma = np.matrix(gamma_x)
     Gamma = F.T * gamma * F      # Fisher matrix for the fit
diff --git a/MonteCarloMarginalizeCode/Code/RIFT/likelihood/factored_likelihood.py b/MonteCarloMarginalizeCode/Code/RIFT/likelihood/factored_likelihood.py
index d319ce535..165ae8084 100644
--- a/MonteCarloMarginalizeCode/Code/RIFT/likelihood/factored_likelihood.py
+++ b/MonteCarloMarginalizeCode/Code/RIFT/likelihood/factored_likelihood.py
@@ -32,6 +32,7 @@
 import RIFT.lalsimutils as lsu  # problem of relative comprehensive import - dangerous due to package name
 log_loud = lsu.log_loud
 import numpy as np
+from RIFT.precision import RiftFloat  # platform-portable replacement for np.float128
 try:
   import cupy
   from . import optimized_gpu_tools
@@ -670,7 +671,7 @@ def FactoredLogLikelihoodTimeMarginalized(tvals, extr_params, rholms_intp, rholm
     Ylms = ComputeYlms(Lmax, incl, -phiref, selected_modes=rholms_intp[list(rholms.keys())[0]].keys())
 
 #    lnL = 0.
-    lnL = np.zeros(len(tvals),dtype=np.float128)
+    lnL = np.zeros(len(tvals),dtype=RiftFloat)
     for det in detectors:
         CT = crossTerms[det]
         CTV = crossTermsV[det]
@@ -1570,7 +1571,7 @@ def  DiscreteFactoredLogLikelihoodViaArray(tvals, P, lookupNKDict, rholmsArrayDi
 
     deltaT = P.deltaT
 
-    lnL = np.zeros(npts,dtype=np.float128)
+    lnL = np.zeros(npts,dtype=RiftFloat)
 
 
     for det in detectors:
@@ -1648,10 +1649,10 @@ def  DiscreteFactoredLogLikelihoodViaArrayVector(tvals, P_vec, lookupNKDict, rho
     deltaT = P_vec.deltaT # this is stored as a scalar
 
     # Array to use for work
-    lnL = np.zeros(npts,dtype=np.float128)
-    lnL_array = np.zeros((npts_extrinsic,npts),dtype=np.float128)
+    lnL = np.zeros(npts,dtype=RiftFloat)
+    lnL_array = np.zeros((npts_extrinsic,npts),dtype=RiftFloat)
     # Array to use for output
-    lnLmargOut = np.zeros(npts_extrinsic,dtype=np.float128)
+    lnLmargOut = np.zeros(npts_extrinsic,dtype=RiftFloat)
 #    term1  = np.zeros(npts, dtype=complex) # workspace
 
     for det in detectors:  # strings right now - need to change to make ufunc-able
diff --git a/MonteCarloMarginalizeCode/Code/RIFT/precision.py b/MonteCarloMarginalizeCode/Code/RIFT/precision.py
new file mode 100644
index 000000000..2f6976be6
--- /dev/null
+++ b/MonteCarloMarginalizeCode/Code/RIFT/precision.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+"""
+RIFT.precision
+==============
+
+Centralized high-precision floating-point dtype for RIFT.
+
+RIFT historically used ``numpy.float128`` directly throughout the integrators
+to suppress overflow when accumulating very large or very small probability
+weights.  ``numpy.float128`` is, however, a *platform-dependent* alias:
+
+* On x86_64 Linux it is ``numpy.longdouble`` with 80-bit extended precision
+  stored in 16 bytes, and exposes the alias ``numpy.float128``.
+* On macOS arm64 (Apple Silicon), Windows MSVC, and many embedded / non-x86
+  Linux builds, ``numpy.longdouble`` has the same width as ``numpy.float64``
+  (8 bytes), and ``numpy.float128`` does not exist.
+* In NumPy 2.x the ``numpy.float128`` alias was further narrowed and is now
+  only present where the platform actually has a wider long double type.
+
+Hardcoding ``numpy.float128`` therefore breaks imports on any platform that
+lacks an extended-precision long double.  This module provides:
+
+* ``RiftFloat`` -- the dtype to use for high-precision accumulators.  Equals
+  ``numpy.longdouble`` whenever the platform really gives extra precision
+  (``itemsize > 8``), otherwise falls back to ``numpy.float64``.
+* ``RIFT_FLOAT_HIGH_PRECISION`` -- ``True`` iff ``RiftFloat`` is wider than
+  ``numpy.float64``.  Use this if a code path needs to know whether the
+  extra precision is actually available (e.g. to skip a downcast that is
+  only meaningful when float128 is real).
+
+Recommended usage
+-----------------
+
+Replace ::
+
+    import numpy
+    neff = numpy.float128("inf")
+    arr = numpy.array(values, dtype=numpy.float128)
+    if weights.dtype == numpy.float128:
+        weights = weights.astype(numpy.float64)
+
+with ::
+
+    from RIFT.precision import RiftFloat
+    neff = RiftFloat("inf")
+    arr = numpy.array(values, dtype=RiftFloat)
+    if weights.dtype == RiftFloat:
+        weights = weights.astype(numpy.float64)
+
+When ``RiftFloat`` is ``numpy.float64`` the type-equality guards become
+self-consistent (the ``astype(float64)`` is a no-op) and no platform-specific
+branching is required at the call site.
+"""
+
+import numpy as _np
+
+__all__ = [
+    "RiftFloat",
+    "RIFT_FLOAT_HIGH_PRECISION",
+    "RIFT_FLOAT_NAME",
+]
+
+
+def _select_high_precision_dtype():
+    """Pick the widest available real-floating dtype, falling back to float64.
+
+    Prefers ``numpy.longdouble`` (always defined) whenever the platform
+    actually gives more than 8-byte storage; otherwise returns
+    ``numpy.float64`` so that consumers can use the result as a drop-in
+    replacement for ``numpy.float128`` without raising on platforms that
+    do not expose it.
+    """
+    longdouble_itemsize = _np.dtype(_np.longdouble).itemsize
+    if longdouble_itemsize > 8:
+        return _np.longdouble, True
+    # Some NumPy 1.x builds expose float128 even when longdouble is 8 bytes
+    # (older alias kept for ABI stability).  Treat that as high-precision.
+    if hasattr(_np, "float128"):
+        try:
+            if _np.dtype(_np.float128).itemsize > 8:
+                return _np.float128, True
+        except TypeError:
+            pass
+    return _np.float64, False
+
+
+RiftFloat, RIFT_FLOAT_HIGH_PRECISION = _select_high_precision_dtype()
+RIFT_FLOAT_NAME = _np.dtype(RiftFloat).name
diff --git a/MonteCarloMarginalizeCode/Code/bin/cepp_basic_htcondor b/MonteCarloMarginalizeCode/Code/bin/cepp_basic_htcondor
deleted file mode 100755
index 330d1c242..000000000
--- a/MonteCarloMarginalizeCode/Code/bin/cepp_basic_htcondor
+++ /dev/null
@@ -1,2054 +0,0 @@
-#! /usr/bin/env python
-import htcondor2 as htcondor
-from htcondor2.dags import DAG, Node
-#
-#  GOAL
-#    Simple pipeline: just ILE+fitting iteration jobs
-#    User must provide arglists for both ILE and fitting/iteration jobs
-#    Assumes user has done all necessary setup (e.g., PSDs, data selection, picking channel names, etc)
-#    Will add in fit assessment jobs later
-#
-#  WHY DO THIS?
-#    We're often reanalyzing a single stretch of data again and again. (Or otherwise have some simple setup).
-#    This saves time for large-scale injection studies or systematics reruns
-#
-#    Intend to replace naive iteration code with reproducible, extendable/scalable workflow
-#
-#  SEE ALSO
-#     - original shell-script-based generation scripts
-#     - create_event_parameter_pipeline  # long term plan
-#     - create_postprocessing_event_dag.py
-#     - create_event_dag_via_grid   # basic iteration code
-#
-# WORKFLOW STRUCTURE
-#    - Required elements:
-#          cache file  (specified in  --cache <cache> in cip_args)
-#          PSD files (.... --psd  in cip_args)
-#          channel names
-#          event time
-#    - Inputs
-#         args_cip.txt   # no --fname, --fname-output-...  (overrridden)
-#         args_ile.txt    # no --sim-xml,  --event (overrridden)      ... though you may want to add them for command-single.sh to run properly
-#    - Workspaces
-#          Creates one directory for each task, for each iteration
-#          Main directory:
-#              - output-ILE-<iteration>.xml.gz, starting with iteration 0 (=input)
-#              - *.composite files, from each consolidation step
-#              - all.net : union of all *.composite files 
-#   
-# EXAMPLES
-#    python create_event_parameter_pipeline_BasicIteration.py --ile-args args_ile.txt --cip-args args.txt 
-#
-# TEST SEQUENCE
-#        unixhome/Projects/LIGO-ILE-Applications/communications/20180806-Me-PipelineDevelopment
-#    * Trivial iteration, fitting a constant likelihood at each step: 
-#           rm -rf test_workflow_trivial; make test_workflow_trivial
-#         
-
-
-###
-###  PLANS FOR IMPROVEMENT
-###
-#
-# * Stress-testing final fit
-#       - single-iteration runs which change lnL and/or coordinates
-#       - note this can be done with a DRIVER, does not need to be hardcoded?
-# * BCR/BCI (single-ifo vs multi-ifo run)
-#       - runs which do PE on single IFOs and 
-#       - again, this can be done with a DRIVER
-
-import argparse
-import sys
-import os
-import shutil
-import numpy as np
-import RIFT.lalsimutils as lalsimutils
-import lalsimulation as lalsim
-import lal
-import functools
-import itertools
-import json
-
-# Backend-neutral pipeline namespace (htcondor/glue/slurm) provided by dag_utils_htcondor
-
-
-from RIFT.misc import dag_utils_htcondor as dag_utils
-from RIFT.misc.dag_utils_htcondor import mkdir
-from RIFT.misc.dag_utils_htcondor import which
-
-
-def add_batch_ILE_nodes_to_dag(my_dag,my_ile_job,my_parent_node, my_child_node, n_max, n_group, it, n_retries=3,it_start=0,convert_psd_node_list=[],node_list_dict={}):
-    for event in np.arange(n_max): 
-        ile_node = Node(my_ile_job)
-        ile_node.retry = n_retries
-        ile_node.add_variable("macroevent", event*n_group)
-        ile_node.add_variable("macrongroup", n_group)
-        ile_node.add_variable("macroiteration", it)
-        if not(my_parent_node is None):
-            ile_node.add_parent(my_parent_node)
-        if it == it_start:
-            for node in convert_psd_node_list:  # for every PSD conversion job, make sure PSD is present before we run the first iteration!
-                ile_node.add_parent(node)
-        if my_child_node:
-            my_child_node.add_parent(ile_node) # consolidate depends on all of the individual jobs
-        my_dag.add_node(ile_node)
-        # global vraiable edit
-        if node_list_dict:
-            if it in node_list_dict:
-                node_list_dict[it].append(ile_node)
-
-
-def parse_ile_args_flexible(my_arg_str):
-    """
-    Narrower-scope parser with flexible arguments, to target
-    """
-    import argparse
-    import shlex
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--n-eff",type=int)
-    parser.add_argument("--cache-file",type=str)
-    parser.add_argument('other', nargs=argparse.REMAINDER)
-    args  = parser.parse_args(shlex.split(my_arg_str))
-    return args
-
-def parse_ile_args_lazy(my_arg_str,key_target):
-    str_split = my_arg_str.split('--')
-    matches = [x for x in str_split if x.startswith(key_target)]
-    print(matches)
-    return matches[-1].split(' ')[1]
-                
-def parse_ile_args_for_bw(my_arg_str):
-    """
-    this is HARD TO MAINTAIN since it needs to match ILE exactly
-    """
-    # Copied verbatim from ILE
-    from optparse import OptionParser, OptionGroup
-    optp = OptionParser()
-    optp.add_option("-c", "--cache-file", default=None, help="LIGO cache file containing all data needed.")
-    optp.add_option("-C", "--channel-name", action="append", help="instrument=channel-name, e.g. H1=FAKE-STRAIN. Can be given multiple times for different instruments.")
-    optp.add_option("-p", "--psd-file", action="append", help="instrument=psd-file, e.g. H1=H1_PSD.xml.gz. Can be given multiple times for different instruments.")
-    optp.add_option("-k", "--skymap-file", help="Use skymap stored in given FITS file.")
-    optp.add_option("-x", "--coinc-xml", help="gstlal_inspiral XML file containing coincidence information.")
-    optp.add_option("-I", "--sim-xml", help="XML file containing mass grid to be evaluated")
-    optp.add_option("-E", "--event", default=0,type=int, help="Event number used for this run")
-    optp.add_option("--n-events-to-analyze", default=1,type=int, help="Number of events to analyze from this XML")
-    optp.add_option("--soft-fail-event-range",action='store_true',help='Soft failure (exit 0) if event ID is out of range. This happens in pipelines, if we have pre-built a DAG attempting to analyze more points than we really have')
-    optp.add_option("-f", "--reference-freq", type=float, default=100.0, help="Waveform reference frequency. Required, default is 100 Hz.")
-    optp.add_option("--fmin-template", dest='fmin_template', type=float, default=40, help="Waveform starting frequency.  Default is 40 Hz. Also equal to starting frequency for integration") 
-    optp.add_option("--fmin-template-correct-for-lmax",action='store_true',help="Modify amount of data selected, waveform starting frequency to account for l-max, to better insure all requested modes start within the targeted band")
-    optp.add_option("--fmin-ifo", action='append' , help="Minimum frequency for each IFO. Implemented by setting the PSD=0 below this cutoff. Use with care.") 
-    optp.add_option('--nr-group', default=None,help="If using a *ssingle specific simulation* specified on the command line, provide it here")
-    optp.add_option('--nr-param', default=None,help="If using a *ssingle specific simulation* specified on the command line, provide it here")
-    optp.add_option("--nr-lookup",action='store_true', help=" Look up parameters from an NR catalog, instead of using the approximant specified")
-    optp.add_option("--nr-lookup-group",action='append', help="Restriction on 'group' for NR lookup")
-    optp.add_option("--nr-hybrid-use",action='store_true',help="Enable use of NR (or ROM!) hybrid, using --approx as the default approximant and with a frequency fmin")
-    optp.add_option("--nr-hybrid-method",default="taper_add",help="Hybridization method for NR (or ROM!).  Passed through to LALHybrid. pseudo_aligned_from22 will provide ad-hoc higher modes, if the early-time hybridization model only includes the 22 mode")
-    optp.add_option("--rom-group",default=None)
-    optp.add_option("--rom-param",default=None)
-    optp.add_option("--rom-use-basis",default=False,action='store_true',help="Use the ROM basis for inner products.")
-    optp.add_option("--rom-limit-basis-size-to",default=None,type=int)
-    optp.add_option("--rom-integrate-intrinsic",default=False,action='store_true',help='Integrate over intrinsic variables. REQUIRES rom_use_basis at present. ONLY integrates in mass ratio as present')
-    optp.add_option("--nr-perturbative-extraction",default=False,action='store_true')
-    optp.add_option("--nr-perturbative-extraction-full",default=False,action='store_true')
-    optp.add_option("--nr-use-provided-strain",default=False,action='store_true')
-    optp.add_option("--no-memory",default=False,action='store_true', help="At present, turns off m=0 modes. Use with EXTREME caution only if requested by model developer")
-    optp.add_option("--restricted-mode-list-file",default=None,help="A list of ALL modes to use in likelihood. Incredibly dangerous. Only use when comparing with models which provide restricted mode sets, or otherwise to isolate the effect of subsets of modes on the whole")
-    optp.add_option("--use-external-EOB",default=False,action='store_true')
-    optp.add_option("--maximize-only",default=False, action='store_true',help="After integrating, attempts to find the single best fitting point")
-    optp.add_option("--dump-lnL-time-series",default=False, action='store_true',help="(requires --sim-xml) Dump lnL(t) at the injected parameters")
-    optp.add_option("-a", "--approximant", default="TaylorT4", help="Waveform family to use for templates. Any approximant implemented in LALSimulation is valid.")
-    optp.add_option("-A", "--amp-order", type=int, default=0, help="Include amplitude corrections in template waveforms up to this e.g. (e.g. 5 <==> 2.5PN), default is Newtonian order.")
-    optp.add_option("--l-max", type=int, default=2, help="Include all (l,m) modes with l less than or equal to this value.")
-    optp.add_option("-s", "--data-start-time", type=float, default=None, help="GPS start time of data segment. If given, must also give --data-end-time. If not given, sane start and end time will automatically be chosen.")
-    optp.add_option("-e", "--data-end-time", type=float, default=None, help="GPS end time of data segment. If given, must also give --data-start-time. If not given, sane start and end time will automatically be chosen.")
-    optp.add_option("--data-integration-window-half",default=50*1e-3,type=float,help="Only change this window size if you are an expert. The window for time integration is -/+ this quantity around the event time")
-    optp.add_option("-F", "--fmax", type=float, help="Upper frequency of signal integration. Default is use PSD's maximum frequency.")
-    optp.add_option("--srate",default=16384,type=int,help="Sampling rate. Change ONLY IF YOU ARE ABSOLUTELY SURE YOU KNOW WHAT YOU ARE DOING.")
-    optp.add_option("-t", "--event-time", type=float, help="GPS time of the event --- probably the end time. Required if --coinc-xml not given.")
-    optp.add_option("-i", "--inv-spec-trunc-time", type=float, default=8., help="Timescale of inverse spectrum truncation in seconds (Default is 8 - give 0 for no truncation)")
-    optp.add_option("-w", "--window-shape", type=float, default=0, help="Shape of Tukey window to apply to data (default is no windowing)")
-    optp.add_option("--psd-window-shape", type=float, default=0, help="Shape of Tukey window that *was* applied to the PSD being passed. If nonzero, we will rescale the PSD by the ratio of window shape results")
-    optp.add_option("-m", "--time-marginalization", action="store_true", help="Perform marginalization over time via direct numerical integration. Default is false.")
-    optp.add_option("-d", "--distance-marginalization", action="store_true", help="Perform marginalization over distance via a look-up table. Default is false.")
-    optp.add_option("-l", "--distance-marginalization-lookup-table", default=None, help="Look-up table for distance marginalization.")
-    optp.add_option("--vectorized", action="store_true", help="Perform manipulations of lm and timeseries using numpy arrays, not LAL data structures.  (Combine with --gpu to enable GPU use, where available)")
-    optp.add_option("--gpu", action="store_true", help="Perform manipulations of lm and timeseries using numpy arrays, CONVERTING TO GPU when available. You MUST use this option with --vectorized (otherwise it is a no-op). You MUST have a suitable version of cupy installed, your cuda operational, etc")
-    optp.add_option("--force-gpu-only", action="store_true", help="Hard fail if no GPU present (assessed by cupy not loading)")
-    optp.add_option("--force-xpy", action="store_true", help="Use the xpy code path.  Use with --vectorized --gpu to use the fallback CPU-based code path. Useful for debugging.")
-    optp.add_option("-o", "--output-file", help="Save result to this file.")
-    optp.add_option("-O", "--output-format", default='xml', help="[xml|hdf5]")
-    optp.add_option("-S", "--save-samples", action="store_true", help="Save sample points to output-file. Requires --output-file to be defined.")
-    optp.add_option("-L", "--save-deltalnL", type=float, default=float("Inf"), help="Threshold on deltalnL for points preserved in output file.  Requires --output-file to be defined")
-    optp.add_option("-P", "--save-P", type=float,default=0.1, help="Threshold on cumulative probability for points preserved in output file.  Requires --output-file to be defined")
-    optp.add_option("--verbose",action='store_true')
-    integration_params = OptionGroup(optp, "Integration Parameters", "Control the integration with these options.")
-    integration_params.add_option("--n-max", type=int, help="Total number of samples points to draw. If this number is hit before n_eff, then the integration will terminate. Default is 'infinite'.",default=1e7)
-    integration_params.add_option("--n-eff", type=int, default=100, help="Total number of effective samples points to calculate before the integration will terminate. Default is 100")
-    integration_params.add_option("--n-chunk", type=int, help="Chunk'.",default=10000)
-    integration_params.add_option("--convergence-tests-on",default=False,action='store_true')
-    integration_params.add_option("--seed", type=int, help="Random seed to use. Default is to not seed the RNG.")
-    integration_params.add_option("--no-adapt", action="store_true", help="Turn off adaptive sampling. Adaptive sampling is on by default.")
-    integration_params.add_option("--no-adapt-distance", action="store_true", help="Turn off adaptive sampling, just for distance. Adaptive sampling is on by default.")
-    integration_params.add_option("--no-adapt-after-first",action='store_true',help="Disables adaptation after first iteration with significant lnL")
-    integration_params.add_option("--adapt-weight-exponent", type=float, default=1.0, help="Exponent to use with weights (likelihood integrand) when doing adaptive sampling. Used in tandem with --adapt-floor-level to prevent overconvergence. Default is 1.0.")
-    integration_params.add_option("--adapt-floor-level", type=float, default=0.1, help="Floor to use with weights (likelihood integrand) when doing adaptive sampling. This is necessary to ensure the *sampling* prior is non zero during adaptive sampling and to prevent overconvergence. Default is 0.1 (no floor)")
-    integration_params.add_option("--adapt-adapt",action='store_true',help="Adapt the tempering exponent")
-    integration_params.add_option("--adapt-log",action='store_true',help="Use a logarithmic tempering exponent")
-    integration_params.add_option("--interpolate-time", default=False,help="If using time marginalization, compute using a continuously-interpolated array. (Default=false)")
-    integration_params.add_option("--d-prior",default='Euclidean' ,type=str,help="Distance prior for dL.  Options are dL^2 (Euclidean) and 'pseudo-cosmo'  .")
-    integration_params.add_option("--d-max", default=10000,type=float,help="Maximum distance in volume integral. Used to SET THE PRIOR; changing this value changes the numerical answer.")
-    integration_params.add_option("--d-min", default=1,type=float,help="Minimum distance in volume integral. Used to SET THE PRIOR; changing this value changes the numerical answer.")
-    integration_params.add_option("--declination-cosine-sampler",action='store_true',help="If specified, the parameter used for declination is cos(dec), not dec")
-    integration_params.add_option("--inclination-cosine-sampler",action='store_true',help="If specified, the parameter used for inclination is cos(dec), not dec")
-    integration_params.add_option("--internal-rotate-phase", action='store_true',help="If specified, the integration sampler uses phase_p ==phi+psi and phase_m == phi-psi as sampling coordinates, both ranging from 0 to 4 pi.  The prior is twice as large.")
-    integration_params.add_option("--internal-sky-network-coordinates",action='store_true',help="If specified, perform integration in sky coordinates aligned with the first two IFOs provided")
-    integration_params.add_option("--manual-logarithm-offset",type=float,default=0,help="Target value of logarithm lnL. Integrand is reduced by exp(-manual_logarithm_offset).  Important for high-SNR sources!   Should be set dynamically")
-    integration_params.add_option("--sampler-method",default="adaptive_cartesian",help="adaptive_cartesian|GMM|adaptive_cartesian_gpu")
-    integration_params.add_option("--sampler-xpy",default=None,help="numpy|cupy  if the adaptive_cartesian_gpu sampler is active, use that.")
-    optp.add_option_group(integration_params)
-
-    intrinsic_params = OptionGroup(optp, "Intrinsic Parameters", "Intrinsic parameters (e.g component mass) to use.")
-#intrinsic_params.add_option("--pin-distance-to-sim",action='store_true', help="Pin *distance* value to sim entry. Used to enable source frame reconstruction with NR.")
-    intrinsic_params.add_option("--mass1", type=float, help="Value of first component mass, in solar masses. Required if not providing coinc tables.")
-    intrinsic_params.add_option("--mass2", type=float, help="Value of second component mass, in solar masses. Required if not providing coinc tables.")
-    intrinsic_params.add_option("--eff-lambda", type=float, help="Value of effective tidal parameter. Optional, ignored if not given.")
-    intrinsic_params.add_option("--deff-lambda", type=float, help="Value of second effective tidal parameter. Optional, ignored if not given")
-    optp.add_option_group(intrinsic_params)
-
-    intrinsic_int_params = OptionGroup(optp, "Intrinsic integrated parameters", "Intrinsic parameters to integrate over. ONLY currently used with ROM version")
-    intrinsic_int_params.add_option("--parameter",action='append')
-    intrinsic_int_params.add_option("--parameter-range",action='append',type=str)
-    intrinsic_int_params.add_option("--adapt-intrinsic",action='store_true')
-    optp.add_option_group(intrinsic_int_params)
-
-
-    opts, args = optp.parse_args(my_arg_str.split())  # parse full set, assume initial argument already stripped
-
-    return opts
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--working-directory",default="./")
-parser.add_argument("--input-grid",default="overlap-grid.xml.gz")
-parser.add_argument("--cip-args",default=None,help="filename of args_cip.txt file  which holds CIP arguments.  Should NOT conflict with arguments auto-set by this DAG ... in particular, i/o arguments will be modified. ")
-parser.add_argument("--cip-args-list",default=None,help="filename of args_cip_list.file, which holds CIP arguments.  Overrides cip-args if present. One CIP_n.sub file is created for each line in the file, which is used for an integer m iterations, where m is the first item of each line (normally 'X' in CIP)")
-parser.add_argument("--cip-explode-jobs",default=None,type=int,help="Number of CIP jobs to use to use in parallel to produce posterior samples for each iteration. Code will generate a fit first, save it, use this number of workers in parallel to generate samples, and then consolidates the samples together. Note that --n-output-samples and --n-eff are NOT adjusted ... if the user wants to adaptively fix the resolution, that needs to be controlled at a higher level")
-parser.add_argument("--cip-explode-jobs-last",default=None,type=int,help="Like cip_explode_jobs, but ONLY for last batch. Only applies if using --cip-args-list. Note does NOT adjust control settinggs like n-eff, n-max, so you will need to do this in outside control tools")
-parser.add_argument("--cip-explode-jobs-dag",action='store_true',help="If using a worker, that worker has its own node, not a 'queue N' statement on one node. Helps for stability with many workers with high failure probability")
-parser.add_argument("--cip-explode-jobs-subdag",action='store_true',help="If using a worker, create a subdag that iterates until the target number of samples are produced, using a POST script on a subdag to repeat effort. Imposes cip_explode_jobs_dag. Note that the actual number used for cip_explode_jobs MAY NOT BE IMPLEMENTED or may be a cap (in progress)")
-parser.add_argument("--cip-explode-jobs-flat",action='store_true',help="Pass to use the same arguments for all worker jobs. The main job will be /bin/true.")
-parser.add_argument("--puff-exe",default=None,help="util_ParameterPuffball.py")
-parser.add_argument("--puff-args",default=None,help="util_ParameterPuffball arguments.  If not specified, puffball will not be performed  ")
-parser.add_argument("--puff-cadence",default=None,type=int,help="Every n iterations (not including 0), the puffball code will be applied.  Puffball points will be done *in addition* to the usual results from the DAG.  (The puffball is based on perturbing points from that iteration, and this will roughly double that iteration in ILE job size).  Proposed value 2 (i.e., puff overlap-grid-2, ...-4, ...-6.  If not specified, puffball will not be performed ")
-parser.add_argument("--puff-max-it",default=-1,type=int,help="Maximum iteration number that puffball is applied.  If negative, puffball is not applied ")
-parser.add_argument("--first-iteration-jumpstart",action='store_true',help="No ILE jobs the first iteration.  Assumes you already have .composite files and want to get going. Particularly helpful for subdag systems")
-parser.add_argument("--last-iteration-extrinsic",action='store_true',help="Configure last iteration to extract *one* set of extrinsic parameters from each intrinsic point. [This is highly inefficient, but people like having one extrinsic point per intrinsic point.]  Requires --convert-args")
-parser.add_argument("--last-iteration-extrinsic-nsamples",default=3000,type=int,help="Construct this number of extrinsic samples")
-parser.add_argument("--last-iteration-extrinsic-samples-per-ile",default=5,type=int,help="Draw this many samples from each ILE job")
-parser.add_argument("--last-iteration-extrinsic-samples-per-ile-internal",default=10,type=int,help="Draw this many samples from each ILE job")
-parser.add_argument("--last-iteration-extrinsic-batched-convert",action='store_true',help="Used batched converter for output of extrinsic samples")
-parser.add_argument("--last-iteration-extrinsic-time-resampling",action='store_true',help="Last iterations use time resampling (+ the fairdraw is done inside ILE itself), so different code path for final stages")
-parser.add_argument("--ile-args",default=None,help="filename of args_ile.txt file  which holds ILE arguments.  Should NOT conflict with arguments auto-set by this DAG ... in particular, i/o arguments will be modified")
-parser.add_argument("--ile-exe",default=None,help="filename of ILE or equivalent executable. Will default to `which integrate_likelihood_extrinsic` in low-level code")
-parser.add_argument("--ile-retries",default=0,type=int,help="Number of retry attempts for ILE jobs. (These can fail)")
-parser.add_argument("--ile-group-subdag",action='store_true',help="If true, ILE jobs are grouped in a subdag. Useful mainly to create iterative DAGs with random ILE jobs/etc")
-parser.add_argument("--ile-group-subdag-check-work",action='store_true',help="If true, provides a check for work for each subdag type.")
-parser.add_argument("--ile-post-exe",default=None,help="filename of script to execute with SCRIPT POST joblist scripname JOBID RETURN worklevel in the dag. This will be run after each ILE completes in the main dag. Objective is to enable larger-scale CIP queues which work efficiently to a target work level (e.g,, n_eff). Note this script is responsible for reading htcondor logs to identify the run directory!")
-parser.add_argument("--ile-condor-commands",default=None,type=str,help="File name for key value pairs for condor commands to add to ILE submit file")
-parser.add_argument("--general-retries",default=0,type=int,help="Number of retry attempts for internal jobs (convert, CIP, ...). (These can fail, albeit more rarely, usually due to filesystem problems)")
-parser.add_argument("--general-request-disk",default="10M",type=str,help="Request disk passed to condor. Must be done for all jobs now")
-parser.add_argument("--ile-request-disk",default="10M",type=str,help="Request disk passed to condor for ILE. Must be done for all jobs now")
-parser.add_argument("--cip-request-disk",default="10M",type=str,help="Request disk passed to condor for ILE. Must be done for all jobs now")
-parser.add_argument("--cal-request-disk",default="8G",type=str,help="Request disk passed to condor for ILE. Must be done for all jobs now")
-parser.add_argument("--ile-n-events-to-analyze",default=1,type=int,help="If >1, you are using ILE_batchmode.  Structures the DAG correctly to account for batch cadence")
-parser.add_argument("--ile-n-events-to-analyze-first",default=None,type=int,help="If provided, use a different number batch size for iteration 0. Can be helpful if initial grid is broad.")
-parser.add_argument("--ile-runtime-max-minutes",default=None,type=int,help="If not none, kills ILE jobs that take longer than the specified integer number of minutes. Do not use unless an expert")
-parser.add_argument("--cip-exe",default=None,help="filename of CIP or equivalent executable. Will default to `which util_ConstructIntrinsicPosterior_GenericCoordinates` in low-level code")
-parser.add_argument("--cip-post-exe",default=None,help="filename of script to execute with SCRIPT POST joblist scripname JOBID RETURN worklevel in the dag. This will be run after each CIP completes in the main dag. Objective is to enable larger-scale CIP queues which work efficiently to a target work level (e.g,, n_eff). Note this script is responsible for reading htcondor logs to identify the run directory!")
-parser.add_argument("--cip-exe-G",default=None,help="filename of CIP or equivalent executable, as ALTERNATE CIP used when a 'G' iteration is requested ")
-parser.add_argument("--use-eccentricity",default=False,action='store_true')
-parser.add_argument("--use-meanPerAno",default=False,action='store_true')
-parser.add_argument("--use-eccentricity-squared-sampling",default=False,action='store_true')
-parser.add_argument("--use-tabular-eos-file",default=False,action='store_true')
-parser.add_argument("--test-exe",default=None,help="filename of test code or equivalent executable.  Must have a --test-output <fname> argument.  Used for convergence testing or other termination. NOT ACTIVE; see 'convergence_test_samples.py' for example")
-parser.add_argument("--plot-exe",default=None,help="filename of plot code or equivalent executable.  Will default to `which plot_posterior_corner.py`.  Default is to plot last set of samples")
-parser.add_argument("--gridinit-exe",default=None,help="filename of initial grid creation or equivalent executable.  Will default to `which util_ManualOverlapGrid.py`.  Must create overlap-grid-0.xml.gz")
-parser.add_argument("--frame-rotation",action='store_true',help=" Calculation was done in J frame, not L frame.  Rotate at end. Only if extrinsic samples.")
-parser.add_argument("--calibration-reweighting-exe",default=None,help="Calibration reweighting script")
-parser.add_argument("--calibration-reweighting",action='store_true',help="Run calibration reweighting on final posterior samples")
-parser.add_argument("--calibration-reweighting-batchsize",type=int,default=None,help="If not 'None', tries to group the final set of points based on jobs of a fixed size")
-parser.add_argument("--calibration-reweighting-initial-extra-args",type=str,default='',help="Passed to reweighting script as extra args")
-parser.add_argument("--calibration-reweighting-extra-args",type=str,default='',help="Passed to the combination script as extra arguments. ")
-parser.add_argument("--calibration-reweighting-count",type=int,default=100,help="Number of calibration marginalization realizations. Default is 100")
-parser.add_argument("--calibration-reweighting-osg",action='store_true',help="Attempt to use settings for OSG for cal reweighting. Remove after developed")
-parser.add_argument("--convert-ascii2h5-exe",default=None,help="Converting ascii to h5 file script")
-parser.add_argument("--comov-distance-reweighting-exe",default=None,help="Comoving distance reweighting script")
-parser.add_argument("--comov-distance-reweighting",action='store_true',help="Run comoving distance reweighting on final posterior samples")
-parser.add_argument("--bilby-pickle-exe",default=None,help="Bilby pickle generation script")
-parser.add_argument("--bilby-ini-file",default=None,help="Filename of bilby ini file used to generate pickle file (used for calibration reweighting)")
-parser.add_argument("--bilby-pickle-file",default=None,help="Filename of bilby pickle file used for calibration reweighting")
-parser.add_argument("--bw-exe",default=None,help="BayesWave location (or wrapper)")
-parser.add_argument("--bw-post-exe",default=None,help="BayesWave location (or wrapper)")
-parser.add_argument("--fetch-ext-grid-exe",default=None,help="filename of command for fetching external grid. Must have an --output-file argument, which will be auto-set.")
-parser.add_argument("--test-args",default=None,help="filename of args_test.txt, which holds test arguments.  Note i/o arguments will be modified, so should NOT specify the samples files or the output file, just the test to be performed and any related arguments")
-parser.add_argument("--convert-args",default=None,help="filename of args_convert.txt, which holds arguments to the convert function. Needed if you plan to export tide informaton")
-parser.add_argument("--plot-args",default=None,help="filename of args_plot.txt, which holds plot arguments.  Note i/o arguments will be modified, so should NOT specify the samples files or the output file, just the test to be performed and any related arguments.  To create the posterior samples file, you probably need to run the convert job (e.g., via the tests)")
-parser.add_argument("--gridinit-args",default=None,help="arguments for grid creation or equivalent executable.  If empty, will attempt to use the --input-grid argument")
-parser.add_argument("--fetch-ext-grid-args",default=None,help="arguments for command  for fetching external grid. Don't over-specify the --output-file argument, which will be used by the pipeline")
-parser.add_argument("--ile-batch",action='store_true',help="Different workflow: the ILE job is assumed to create a .composite file directly in the current working directory. Designed for fake ILE sets and long-term batch workflow. NOT IMPLEMENTED ")
-parser.add_argument("--transfer-file-list",default=None,help="File containing list of *input* filenames to transfer, one name per file. Copied into transfer_files for condor directly.  If provided, also enables attempts to deduce files that need to be transferred for the pipeline to operate, as needed for OSG, etc")
-parser.add_argument("--request-memory-ILE",default=4096,type=int,help="Memory request for condor (in Mb) for ILE jobs.")
-parser.add_argument("--request-gpu-ILE",action='store_true',help="ILE will request a GPU. [Note: if you do this, your code will only run on GPU-enabled slots]")
-parser.add_argument("--request-xpu-ILE",action='store_true',help="ILE will *prefer* a gpu, but not require it")
-parser.add_argument("--request-memory-CIP",default=16384,type=int,help="Memory request for condor (in Mb) for fitting jobs.")
-parser.add_argument("--request-memory-CIP-flex",action='store_true',help="If true,memory request is flexible and will increment. Beware of possible runaway requests")
-parser.add_argument("--use-singularity",action='store_true',help="Attempts to use a singularity image in SINGULARITY_RIFT_IMAGE")
-parser.add_argument("--use-osg",action='store_true',help="Attempts to set up an OSG workflow.  Must submit from an osg allowed submit machine")
-parser.add_argument("--use-osg-cip",action='store_true',help="If use-osg, attempts to run CIP on OSG as well.  Default is to run OSG on local machines")
-parser.add_argument("--use-osg-simple-requirements",action='store_true',help="Uses an aggressive simplified requirements string for worker jobs")
-parser.add_argument("--condor-local-nonworker",action='store_true',help="Uses local universe for non-worker condor jobs. Important to run in non-NFS location, as other jobs don't have file transfer set up.  Required for public OSG")
-parser.add_argument("--condor-containerize-nonworker",action='store_true',help="Provides SingularityImage.  Note the intent is still to use on a *shared* filesystem, not to set up file transfer for the 'nonworker' intermediate glue jobs. Note the default is NOT to use SingularityBindCVMFS right now -- intended for workflows wanting to use a fixed container for all  components")
-parser.add_argument("--condor-local-nonworker-igwn-prefix",action='store_true', help="Adds some prefix text to start up cvmfs igwn environment, so local jobs have access to standard RIFT operators. Required for public OSG.")
-parser.add_argument("--condor-nogrid-nonworker",action='store_true',help="Uses local flocking for non-worker condor jobs. Important to run in non-NFS location, as other jobs don't have file transfer set up.")
-parser.add_argument("--frames-dir",default=None,help="If you are using synthetic dat")
-parser.add_argument("--cache-file",default=None,help="If you are using real data and have CVMS frames, you want to transfer the cache file over.")
-parser.add_argument("--use-oauth-files",default=None, type=str, help="Authentication method, written into condor file. Valid methods include 'scitokens' " )
-parser.add_argument("--use-cvmfs-frames",action='store_true',help="If true, require LIGO frames are present (usually via CVMFS). User is responsible for generating cache file compatible with it.")
-parser.add_argument('--n-copies',default=2,type=int,help="Number of duplicates of each ILE job, for redundant Monte Carlo")
-parser.add_argument('--n-iterations',default=3,type=int,help="Number of iterations to perform")
-parser.add_argument('--n-iterations-subdag-max',default=10,type=int,help="Number of iterations to perform in subdag, maximum ")
-parser.add_argument("--start-iteration",default=0,type=int,help="starting iteration. If >0, does not copy over --input-grid. DOES rewrite sub files.  This allows you to change the arguments provided (e.g., use more iterations or settings at late times). Note this overwrites the .sub files ")
-parser.add_argument('--n-samples-per-job',default=2000,type=int,help="Number of samples generated each iteration; also, number of ILE jobs run each iteration. Should increase with dimension of problem")
-parser.add_argument('--n-samples-per-job-threshold',default=None,type=int,help="If we get this many ILE evaluations, then we have done well enough and can move on; only used with auto-terminating framework.  Note this is applied everywhere")
-parser.add_argument('--neff-threshold',default=800,type=int,help="Number of samples generated each iteration")
-parser.add_argument("--workflow",default='single',help="[single|fit+posterior|full] describes workflow layout used.  'Single' is a single node, running the fit and posterior for each iteration; 'full' produces many followup jobs to produce a reliable posterior")
-parser.add_argument("--n-post-jobs",default=1,type=int,help="Number of posterior jobs. Used in posterior and fit+posterior workflows")
-parser.add_argument("--use-bw-psd",action='store_true',help="Use BW PSD, attempting to use fiducial arguments as in LI for placement (i.e., signal at seglen -2). Assumes LI style data convention.  Necessary BW will be parsed out of ile-args.txt (required)")
-parser.add_argument("--use-full-submit-paths",action='store_true',help="DAG created has full paths to submit files generated. Note this is implemented on a per-file/as-needed basis, mainly to facilitate using this dag as an external subdag")
-opts=  parser.parse_args()
-
-
-local_worker_universe="vanilla"
-local_worker_universe_cip = "vanilla"
-no_worker_grid=False
-if opts.condor_local_nonworker:
-    local_worker_universe="local"
-    local_worker_universe_cip="vanilla"  # usually CIP jobs are remote
-if opts.condor_nogrid_nonworker:
-    no_worker_grid=True
-
-if opts.cip_explode_jobs_subdag:
-    opts.cip_explode_jobs_dag =True
-    opts.cip_explode_jobs = 2 # nominal
-    
-extra_text = ''
-if opts.condor_local_nonworker_igwn_prefix:
-    extra_text += "source /cvmfs/software.igwn.org/conda/etc/profile.d/conda.sh \n"
-    extra_text +=  "conda activate igwn \n"
-
-if not opts.cip_explode_jobs_last:  # set realistic default, if not specified
-    opts.cip_explode_jobs_last = opts.cip_explode_jobs
-
-working_dir_inside_local = working_dir_inside = opts.working_directory
-working_dir_inside_cip = working_dir_inside_local
-out_dir_inside_cip = working_dir_inside_local
-if opts.cip_explode_jobs:
-    out_dir_inside_cip+= "/iteration_$(macroiteration)_cip/"
-os.chdir(opts.working_directory)
-# working_dir_inside_local is for jobs on local nodes
-if opts.use_singularity or opts.use_osg:
-    working_dir_inside = "./" # all files on the remote machine are in the current directory
-    if opts.use_osg_cip:
-        working_dir_inside_cip = "./"
-        out_dir_inside_cip = "./"
-
-singularity_image = None
-if opts.use_singularity:
-    print(" === USING SINGULARITY === ")
-    singularity_image = os.environ["SINGULARITY_RIFT_IMAGE"]  # must be present to use singularity
-    # SINGULARITY IMAGES ARE ON CVMFS, SO WE CAN AVOID THE SINGULARITY EXEC CALL
-    # hardcoding a fiducial copy of lalapps_path2cache; beware about the executable name change
-    os.environ['LALAPPS_PATH2CACHE'] = "/cvmfs/oasis.opensciencegrid.org/ligo/sw/conda/envs/igwn-py39/bin/lalapps_path2cache" #"singularity exec {singularity_image} lalapps_path2cache".format(singularity_image=singularity_image)
-    print(singularity_image)
-
-    # see https://computing.docs.ligo.org/guide/htcondor/credentials/
-    if 'igwn+osdf:' in singularity_image and not opts.use_oauth_files:
-        opts.use_oauth_files = 'igwn' # force-set this variable, to save time for end user. This will always be the case
-    elif 'osdf:' in singularity_image and not opts.use_oauth_files:
-        opts.use_oauth_files = 'scitokens' # force-set this variable, to save time for end user. This will always be the case
-
-if (opts.cip_args is None) and (opts.cip_args_list is None):
-    print(" No arguments provided for low-level job")
-    sys.exit(0)
-
-# Load args.txt. Remove first item.  Store
-if not (opts.cip_args is None):
-    with open(opts.cip_args) as f:
-        cip_args_list = f.readlines()
-    cip_args = ' '.join( [x.replace('\\','') for x in cip_args_list] )
-    cip_args = ' '.join(cip_args.split(' ')[1:])
-    # Some argument protection for later
-    cip_args = cip_args.replace('[', ' \'[')
-    cip_args = cip_args.replace(']', ']\'')
-    cip_args=cip_args.rstrip()
-    cip_args += ' --no-plots '  
-    print("CIP", cip_args)
-
-cip_args_lines = None
-cip_args_prefixes = []  # so it works correctly even in flat mode
-if not (opts.cip_args_list is None):
-    with open(opts.cip_args_list) as f:
-        cip_args_lines = f.readlines()
-    # if one of the prefixes is 'Z', we will be calling a RECURSIVE dag writer, warning!  It will go inside the corresponding CIP directory...
-    cip_args_prefixes = [(x.split(' ')[0]) for x in cip_args_lines]  # Pull off the integer
-    # 
-    cip_args_n = (cip_args_prefixes.copy())
-    creating_subdag_list = []
-    creating_subdag_arg_list = []
-    creating_G_list=[]
-    for indx in np.arange(len(cip_args_n)):
-        if cip_args_prefixes[indx][0]=='Z':
-            creating_subdag_list.append(indx)
-            creating_subdag_arg_list.append(' '.join(cip_args_lines[indx].split(' ')[1:]))
-            cip_args_n[indx] = 1  # one nominal iteration; the CIP stage in this iteration is replaced by 'run until convergence'
-        elif cip_args_prefixes[indx][0] =='G':
-            n_G = int(cip_args_prefixes[indx][1:])  # integer after G assumed
-            cip_args_n[indx] = int(n_G)
-            creating_G_list.append(indx)  # flag for alternate executable
-        else:
-            cip_args_n[indx]  = int(cip_args_n[indx])
-#    cip_args_n = [int(x.split(' ')[0]) for x in cip_args_lines]  # Pull off the integer
-    cip_args_lines = [' '.join(x.split(' ')[1:]) for x in cip_args_lines] # pull off the integer
-    cip_args_lines = [x.replace('[', ' \'[').replace(']', ']\'').rstrip() for x in cip_args_lines]
-    cip_args_lines = [x.lstrip() for x in cip_args_lines] # remove leading whitespace
-    for line in cip_args_lines:
-        print(" CIP ", line)
-
-    # Above only makes sense if last iteration isn't 'Z'
-    if np.sum(cip_args_n) < opts.n_iterations:
-        print(" Not enough CIP versions to complete all requested iterations; extending ")
-        cip_args_n[-1]  = opts.n_iterations - np.sum(cip_args_n)
-
-# Load args.txt. Remove first item.  Store
-with open(opts.ile_args) as f:
-    ile_args_list = f.readlines()
-ile_args = ' '.join( [x.replace('\\','') for x in ile_args_list] )
-ile_args = ' '.join(ile_args.split(' ')[1:])
-# Some argument protection for later
-ile_args = ile_args.replace('[', ' \'[')
-ile_args = ile_args.replace(']', ']\'')
-ile_args=ile_args.rstrip()
-if opts.request_gpu_ILE or opts.request_xpu_ILE:
-    ile_args+=" --vectorized --gpu "  # append this
-if opts.ile_n_events_to_analyze > 1:
-    ile_args+= " --n-events-to-analyze  $(macrongroup) " # + str(opts.ile_n_events_to_analyze)
-#ile_args += ' --soft-fail-event-range '  #important in case event is out of range. Simply exit NO ...not backwards compatible
-print("ILE", ile_args)
-
-puff_args=None
-puff_cadence = None
-puff_max_it = opts.puff_max_it
-if opts.puff_args and opts.puff_cadence:
-    puff_cadence = opts.puff_cadence
-    # Load args.txt. Remove first item.  Store
-    with open(opts.puff_args) as f:
-        puff_args_list = f.readlines()
-    puff_args = ' '.join( [x.replace('\\','') for x in puff_args_list] )
-    puff_args = ' '.join(puff_args.split(' ')[1:])
-# Some argument protection for later
-    puff_args = puff_args.replace('[', ' \'[')
-    puff_args = puff_args.replace(']', ']\'')
-    puff_args=puff_args.rstrip()
-    print("PUFF", puff_args)
-    print("PUFF CADENCE", puff_cadence)
-
-# Load args.txt. Remove first item.  Store
-fetch_args = None
-if opts.fetch_ext_grid_args and opts.fetch_ext_grid_exe:
-    with open(opts.fetch_ext_grid_args) as f:
-        fetch_args_list = f.readlines()
-    fetch_args = ' '.join( [x.replace('\\','') for x in fetch_args_list] )
-    fetch_args = ' '.join(fetch_args.split(' ')[1:])
-# Some argument protection for later
-    fetch_args = fetch_args.replace('[', ' \'[')
-    fetch_args = fetch_args.replace(']', ']\'')
-    fetch_args=fetch_args.rstrip()
-    print("FETCH", fetch_args)
-
-
-test_args=None
-if opts.test_args:
-    # Load args.txt. Remove first item.  Store
-    with open(opts.test_args) as f:
-        test_args_list = f.readlines()
-    test_args = ' '.join( [x.replace('\\','') for x in test_args_list] )
-    test_args = ' '.join(test_args.split(' ')[1:])
-# Some argument protection for later
-    test_args = test_args.replace('[', ' \'[')
-    test_args = test_args.replace(']', ']\'')
-    test_args=test_args.rstrip()
-    print("CONVERGE", test_args)
-
-plot_args=None
-if opts.plot_args:
-    # Load args.txt. Remove first item.  Store
-    with open(opts.plot_args) as f:
-        plot_args_list = f.readlines()
-    plot_args = ' '.join( [x.replace('\\','') for x in plot_args_list] )
-    plot_args = ' '.join(plot_args.split(' ')[1:])
-# Some argument protection for later
-    plot_args = plot_args.replace('[', ' \'[')
-    plot_args = plot_args.replace(']', ']\'')
-    plot_args=plot_args.rstrip()
-    print("PLOT", plot_args)
-
-convert_args=None
-if opts.convert_args:
-    # Load args.txt. Remove first item.  Store
-    with open(opts.convert_args) as f:
-        convert_args_list = f.readlines()
-    convert_args = ' '.join( [x.replace('\\','') for x in convert_args_list] )
-    convert_args = ' '.join(convert_args.split(' ')[1:])
-# Some argument protection for later
-    convert_args = convert_args.replace('[', ' \'[')
-    convert_args = convert_args.replace(']', ']\'')
-    convert_args=convert_args.rstrip()
-    print("CONVERT", convert_args)
-
-gridinit_args=None
-if opts.gridinit_args:
-    # Load args.txt. Remove first item.  Store
-    with open(opts.gridinit_args) as f:
-        gridinit_args_list = f.readlines()
-    gridinit_args = ' '.join( [x.replace('\\','') for x in gridinit_args_list] )
-    gridinit_args = ' '.join(gridinit_args.split(' ')[1:])
-# Some argument protection for later
-    gridinit_args = gridinit_args.replace('[', ' \'[')
-    gridinit_args = gridinit_args.replace(']', ']\'')
-    gridinit_args=gridinit_args.rstrip()
-    print("GRIDINIT", gridinit_args)
-
-
-# Copy seed grid into place as overlap-grid-0.xml.gz
-it_start = opts.start_iteration
-n_initial = opts.n_samples_per_job
-if (it_start == 0) and not gridinit_args:
-    shutil.copyfile(opts.input_grid,"overlap-grid-0.xml.gz")  # put in working directory !
-    n_initial = len(lalsimutils.xml_to_ChooseWaveformParams_array("overlap-grid-0.xml.gz"))
-
-transfer_file_names = []
-if not (opts.transfer_file_list is None):
-    transfer_file_names=[]
-    # Load args.txt. Remove first item.  Store
-    with open(opts.transfer_file_list) as f:
-        for  line in f.readlines():
-            transfer_file_names.append(line.rstrip())
-    print(" Input files to transfer to job working directory (note!)", transfer_file_names)
-
-###
-### Fiducial fit job (=sanity check that code will run)
-###
-if (opts.cip_args is None):
-    cip_args = cip_args_lines[0]
-cmdname="%s/command-single_fit.sh" % opts.working_directory
-cmd = open(cmdname, 'w')
-arg_list = cip_args
-exe = dag_utils.which("util_ConstructIntrinsicPosterior_GenericCoordinates.py")
-cmd.write('#!/usr/bin/env bash\n')
-cmd.close()
-st = os.stat(cmdname)
-import stat
-os.chmod(cmdname, st.st_mode | stat.S_IEXEC)
-
-
-cmdname="%s/command-single.sh" % opts.working_directory
-cmd = open(cmdname, 'w')
-arg_list = ile_args
-arg_list = arg_list.replace('$(macrongroup)','5')
-exe = dag_utils.which("integrate_likelihood_extrinsic")
-if opts.ile_n_events_to_analyze:
-    exe = dag_utils.which("integrate_likelihood_extrinsic_batchmode")
-cmd.write('#!/usr/bin/env bash\n')
-cmd.write('# (run me only in working directories)\n')
-cmd.write(exe + ' ' + arg_list + " --sim-xml overlap-grid-0.xml.gz")  # make it concrete, so I can test it
-cmd.close()
-st = os.stat(cmdname)
-import stat
-os.chmod(cmdname, st.st_mode | stat.S_IEXEC)
-# Add argument to ile_args.txt to 
-#    identify overlap grid input
-#    identify output file names (?)
-ile_args_orig = ile_args  # provides ability to strip out the output and replace it with alternate
-ile_args+= ' --sim-xml ' + working_dir_inside + '/overlap-grid-$(macroiteration).xml.gz '
-ile_args_forpuff= ile_args_orig + ' --sim-xml ' + working_dir_inside + '/puffball-$(macroiteration).xml.gz '
-ile_args_forfetch= ile_args_orig + ' --sim-xml ' + working_dir_inside + '/fetch-$(macroiteration).xml.gz '
-
-
-###
-### DAG generation
-###
-
-dag = DAG(log=os.getcwd())
-
-
-# Make PSD, if requested
-if opts.use_bw_psd:
-    print(" ===> Adding BW PSD to pipeline <=== ")
-    opts_ile = parse_ile_args_for_bw(ile_args_orig)
-
-    channel_names = {}
-    for inst, chan in [c.split("=") for c in opts_ile.channel_name]:
-        channel_names[inst] = chan
-
-    flow_ifo_dict = {}
-    if opts_ile.fmin_ifo:
-        for inst, freq_str in [c.split("=") for c in opts_ile.fmin_ifo]:
-           flow_ifo_dict[inst]=float(freq_str) 
-
-    
-    channel_dict_bw = {}
-    for ifo in list(channel_names.keys()):
-        fmin_here= opts_ile.fmin_template
-        if ifo in list(flow_ifo_dict.keys()):
-            fmin_here  = flow_ifo_dict[ifo]
-        channel_dict_bw[ifo]= [channel_names[ifo], fmin_here]
-    
-    # make BW directory
-    bw_dir = opts.working_directory+"/make_bw_psds"
-    mkdir(bw_dir)
-    for ifo in list(channel_names.keys()):
-        mkdir(bw_dir+"/"+ifo)
-
-    # seglen computation
-    seglen  = opts_ile.data_end_time - opts_ile.data_start_time
-
-    # Write bw sub files
-    bw_universe='local'
-    if no_worker_grid:
-        bw_universe='vanilla'
-    bw_job, bw_job_name = dag_utils.write_psd_sub_BW_monoblock(cache_file=opts_ile.cache_file,psd_length=seglen,srate=opts_ile.srate,event_time=opts_ile.event_time,log_dir=bw_dir+"/",exe=opts.bw_exe,no_grid=no_worker_grid,universe=bw_universe)
-    bw_job.add_condor_cmd("initialdir",bw_dir+"/$(ifo)")
-    bw_job.add_condor_cmd('request_disk',opts.general_request_disk)
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+bw_job.get_sub_file()
-        bw_job.set_sub_file(fname)
-    bw_job.write_sub_file()
-
-    # bw_job, bw_job_name = dag_utils.write_psd_sub_BW_step0(channel_dict=channel_dict_bw,cache_file=opts_ile.cache_file,psd_length=seglen,srate=opts_ile.srate,event_time=opts_ile.event_time,log_dir=bw_dir+"/",exe=opts.bw_exe)
-    # bw2_job, bw2_job_name=dag_utils.write_psd_sub_BW_step1(channel_dict=channel_dict_bw,cache_file=opts_ile.cache_file,psd_length=seglen,srate=opts_ile.srate,event_time=opts_ile.event_time,log_dir=bw_dir+"/",exe=opts.bw_post_exe)
-    # bw_job.add_condor_cmd("initialdir",bw_dir)
-    # bw2_job.add_condor_cmd("initialdir",bw_dir)
-    # bw2_job.write_sub_file()
-
-    # write convert command
-#    event_time_trunc ='%.2f' % float(opts_ile.event_time)   # format so only 3 digits after decimal 
-#    event_time_trunc += '0'  # add last character, always zero?
-#    convert_psd_job, convert_psd_job_name = dag_utils.write_convertpsd_sub(ifo="$(ifo)",file_input=bw_dir+"/" + str(event_time_trunc) + "_$(ifo)-PSD.dat",log_dir=bw_dir+"/")
-    convert_psd_job, convert_psd_job_name = dag_utils.write_convertpsd_sub(ifo="$(ifo)",file_input=bw_dir+"/$(ifo)/$(ifo)_fairdraw_psd.dat",log_dir=bw_dir+"/$(ifo)/")
-    convert_psd_job.add_condor_cmd('initialdir',working_dir_inside)
-    convert_psd_job.add_condor_cmd('request_disk',opts.general_request_disk)
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+convert_psd_job.get_sub_file()
-        convert_psd_job.set_sub_file(fname)
-    convert_psd_job.write_sub_file()
-
-
-# Make directories for all iterations
-subdag_dir = opts.working_directory+"/various_subdags"
-mkdir(subdag_dir)
-for indx in np.arange(it_start,opts.n_iterations+1):
-    ile_dir = opts.working_directory+"/iteration_"+str(indx)+"_ile"
-    cip_dir = opts.working_directory+"/iteration_"+str(indx)+"_cip"
-    consolidate_dir = opts.working_directory+"/iteration_"+str(indx)+"_con"
-#    convert_dir = opts.working_directory+"/iteration_"+str(indx)+"_change"
-    mkdir(ile_dir); mkdir(ile_dir+"/logs")
-    mkdir(cip_dir);  mkdir(cip_dir+"/logs")
-    mkdir(consolidate_dir); mkdir(consolidate_dir+"/logs")
-#    mkdir(change_dir); mkdir(change_dir+"/logs")
-
-    if opts.test_args:
-        test_dir = opts.working_directory+"/iteration_"+str(indx)+"_test"
-        mkdir(test_dir); mkdir(test_dir+'/logs')
-        
-    if opts.plot_args:  # Overkill: currently only making plots on last iteration
-        plot_dir = opts.working_directory+"/iteration_"+str(indx)+"_plot"
-        mkdir(plot_dir); mkdir(plot_dir+'/logs')
-
-# make calmarg directory and log directory
-if opts.calibration_reweighting:
-    cal_dir = opts.working_directory+"/calmarg"
-    mkdir(cal_dir)
-    mkdir(cal_dir+"/logs")
-
-# ++++
-# Create workflow tasks
-# ++++
-
-##   ILE job
-
-ile_exe =opts.ile_exe
-ile_condor_commands=None
-if opts.ile_condor_commands and os.path.exists(opts.ile_condor_commands):
-    lines =None
-    ile_condor_commands = {}
-    with open(opts.ile_condor_commands, 'r') as f:
-      for line in f:
-        key, value = line.split(" ", 1) # force a single splitf
-        ile_condor_commands[key] = value
-    # reset to None if none provided
-    if len(ile_condor_commands) < 1:
-        ile_condor_commands = None
-if (opts.ile_n_events_to_analyze > 1) and (exe is None):
-    ile_exe = dag_utils.which("integrate_likelihood_extrinsic_batchmode")
-output_file_names = None
-request_disk=opts.ile_request_disk
-if opts.use_singularity or opts.use_osg:
-    transfer_file_names.append("../overlap-grid-$(macroiteration).xml.gz")
-    #request_disk=4
-    #output_file_names = ','.join(["CME_out-$(macroevent)-$(cluster)-$(process).xml_{0}_.dat".format(x) for x in np.arange(opts.ile_n_events_to_analyze)])
-    #print "OUTPUT FILES ", output_file_names
-ile_job, ile_job_name = dag_utils.write_ILE_sub_simple(tag='ILE',log_dir=None,arg_str=ile_args,output_file="CME_out.xml",ncopies=opts.n_copies,exe=ile_exe,transfer_files=transfer_file_names,transfer_output_files=output_file_names,request_memory=opts.request_memory_ILE,request_disk=request_disk,request_gpu=opts.request_gpu_ILE,request_cross_platform=opts.request_xpu_ILE,use_singularity=opts.use_singularity,singularity_image=singularity_image,use_osg=opts.use_osg,simple_osg_requirements=opts.use_osg_simple_requirements,frames_dir=opts.frames_dir,cache_file=opts.cache_file,use_cvmfs_frames=opts.use_cvmfs_frames,use_oauth_files=opts.use_oauth_files,max_runtime_minutes=opts.ile_runtime_max_minutes,condor_commands=ile_condor_commands)
-# Modify: create macro for iteration 
-#   - added on a per-node basis
-# Modify: add macro argument for overlap grid to be used (kept in top-level directory)
-# Modify: set 'initialdir'
-#     NOTE: logs will be specified relative to this directory
-ile_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_ile")
-# Modify output argument: change logs and working directory to be subdirectory for the run
-ile_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/ILE-$(macroevent)-$(cluster)-$(process).log")
-ile_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/ILE-$(macroevent)-$(cluster)-$(process).err")
-ile_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/ILE-$(macroevent)-$(cluster)-$(process).out")
-if opts.use_full_submit_paths:
-    fname = opts.working_directory+"/"+ile_job.get_sub_file()
-    ile_job.set_sub_file(fname)
-ile_job.write_sub_file()
-
-if not (opts.puff_args is None):
-    transfer_file_names_puff = list(transfer_file_names[:-1])
-    transfer_file_names_puff.append(opts.working_directory+"/puffball-$(macroiteration).xml.gz") # could also use ../puffball, because relative is ok to initialdir
-    # Write a version of the ILE job that uses puffball inputs ... IDENTICAL to code above for standard ILE case,just different argument for input
-    # Yes, it would logically be simpler to make overlap-grid.xml.gz larger ... but I want to keep 'real samples' and 'puffed samples' seperate.
-    ilePuff_job, ilePuff_job_name = dag_utils.write_ILE_sub_simple(tag='ILE_puff',log_dir=None,arg_str=ile_args_forpuff,output_file="CME_puff_out.xml",ncopies=opts.n_copies,exe=ile_exe,transfer_files=transfer_file_names_puff,request_memory=opts.request_memory_ILE,request_disk=request_disk,request_gpu=opts.request_gpu_ILE,request_cross_platform=opts.request_xpu_ILE,use_singularity=opts.use_singularity,singularity_image=singularity_image,use_osg=opts.use_osg,simple_osg_requirements=opts.use_osg_simple_requirements,frames_dir=opts.frames_dir,cache_file=opts.cache_file,use_cvmfs_frames=opts.use_cvmfs_frames,use_oauth_files=opts.use_oauth_files,max_runtime_minutes=opts.ile_runtime_max_minutes,condor_commands=ile_condor_commands)
-    ilePuff_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_ile")
-    ilePuff_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/ILE_puff-$(macroevent)-$(cluster)-$(process).log")
-    ilePuff_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/ILE_puff-$(macroevent)-$(cluster)-$(process).err")
-    ilePuff_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/ILE_puff-$(macroevent)-$(cluster)-$(process).out")
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+ilePuff_job.get_sub_file()
-        ilePuff_job.set_sub_file(fname)
-    ilePuff_job.write_sub_file()
-
-if not (fetch_args is None):
-    transfer_file_names_fetch = list(transfer_file_names[:-1])
-    transfer_file_names_fetch.append(opts.working_directory+"/fetch-$(macroiteration).xml.gz") # could also use ../puffball, because relative is ok to initialdir
-    # Write a version of the ILE job that uses puffball inputs ... IDENTICAL to code above for standard ILE case,just different argument for input
-    # Yes, it would logically be simpler to make overlap-grid.xml.gz larger ... but I want to keep 'real samples' and 'puffed samples' seperate.
-    ileFetch_job, ileFetch_job_name = dag_utils.write_ILE_sub_simple(tag='ILE_fetch',log_dir=None,arg_str=ile_args_forfetch,output_file="CME_fetch_out.xml",ncopies=opts.n_copies,exe=ile_exe,transfer_files=transfer_file_names_fetch,request_memory=opts.request_memory_ILE,request_disk=request_disk,request_gpu=opts.request_gpu_ILE,request_cross_platform=opts.request_xpu_ILE,use_singularity=opts.use_singularity,singularity_image=singularity_image,use_osg=opts.use_osg,simple_osg_requirements=opts.use_osg_simple_requirements,frames_dir=opts.frames_dir,cache_file=opts.cache_file,use_cvmfs_frames=opts.use_cvmfs_frames,oauth_files=opts.use_oauth_files,max_runtime_minutes=opts.ile_runtime_max_minutes,condor_commands=ile_condor_commands)
-    ileFetch_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_ile")
-    ileFetch_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/ILE_fetch-$(macroevent)-$(cluster)-$(process).log")
-    ileFetch_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/ILE_fetch-$(macroevent)-$(cluster)-$(process).err")
-    ileFetch_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/ILE_fetch-$(macroevent)-$(cluster)-$(process).out")
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+ileFetch_job.get_sub_file()
-        ileFetch_job.set_sub_file(fname)
-    ileFetch_job.write_sub_file()
-
-
-if  (opts.last_iteration_extrinsic):
-    n_points_per_ILE = opts.last_iteration_extrinsic_samples_per_ile
-    neff_orig = parse_ile_args_lazy(ile_args_orig,'n-eff') # parse to get originally requested n_eff, as string
-    print(" Orig n_eff ", neff_orig, n_points_per_ILE)
-    n_eff_last = np.min([2*n_points_per_ILE,opts.last_iteration_extrinsic_samples_per_ile])  # make n_eff large enough to get all samples requested. Provide cap in case user requests very vew extrinsic workers.
-    if isinstance(neff_orig,str):
-        n_eff_last  = np.max([n_eff_last, int(neff_orig)]) 
-    # # ILE job with modified output format
-    # #  - note we *double* the memory request, because we need space to save samples
-    # #  - note
-    # opts_ile = parse_ile_args_flexible(ile_args_orig) # parse to get originally requested n_eff, cache-file
-    # n_eff_last = np.min([2*n_points_per_ILE,opts.last_iteration_extrinsic_samples_per_ile])  # make n_eff large enough to get all samples requested. Provide cap in case user requests very vew extrinsic workers.
-    # if hasattr(opts_ile, 'n_eff'):
-    #     n_eff_last  = np.max([n_eff_last, opts_ile.n_eff]) # NEVER USE FEWER n_eff THAN WAS USED FOR intrinsic run. Fallback to no change in case (never happens) not in arg list
-    ile_args_extr = ile_args + " --save-P 0.01 --save-samples --n-eff  "  +str(n_eff_last)  # modify convergence criteria so output of reasonable size/matching needs of extrinsic output
-    #  - note we *disable* --no-adapt-after-first (if present), so each point is independent (e.g., in sky location)
-    ile_args_extr = ile_args_extr.replace('--no-adapt-after-first','')
-    if opts.last_iteration_extrinsic_time_resampling:
-        ile_args_extr += " --resample-time-marginalization --fairdraw-extrinsic-output --fairdraw-extrinsic-output-n-max {} ".format(n_points_per_ILE)
-        print(" Time resampling in extraction iteration: **DISABLING** distance marginalization if present ")
-        ile_args_extr = ile_args_extr.replace("--distance-marginalization ", ' ')  # *currently* cannot use distance marginalization in the last step if we want fairdraw output.  Note the *unfairdraw* output can ignore this ...
-    ileExtr_job, ileExtr_job_name = dag_utils.write_ILE_sub_simple(tag='ILE_extr',log_dir=None,arg_str=ile_args_extr,output_file="EXTR_out.xml",simple_unique=True,ncopies=1,exe=ile_exe,transfer_files=transfer_file_names,request_memory=opts.request_memory_ILE*2,request_gpu=opts.request_gpu_ILE,request_cross_platform=opts.request_xpu_ILE,use_cvmfs_frames=opts.use_cvmfs_frames,request_disk=request_disk,use_singularity=opts.use_singularity,singularity_image=singularity_image,use_osg=opts.use_osg,simple_osg_requirements=opts.use_osg_simple_requirements,use_oauth_files=opts.use_oauth_files,frames_dir=opts.frames_dir,cache_file=opts.cache_file)
-    ileExtr_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_ile")
-    ileExtr_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/ILEextr-$(macroevent)-$(cluster)-$(process).log")
-    ileExtr_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/ILEextr-$(macroevent)-$(cluster)-$(process).err")
-    ileExtr_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/ILEextr-$(macroevent)-$(cluster)-$(process).out")
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+ileExtr_job.get_sub_file()
-        ileExtr_job.set_sub_file(fname)
-    ileExtr_job.write_sub_file()
-    
-    # Convert task 
-    if opts.last_iteration_extrinsic_time_resampling:
-        # very simple: igwn_ligolw add on all final output, then a single convert
-        convert_args_extr =  " --convention LI  --export-cosmology --use-interpolated-cosmology " 
-        if not (convert_args is None):
-            convert_args_extr += convert_args
-        # batched convert shell script
-        relevant_path = dag_utils.which('util_JoinExtrXML.py')
-        relevant_path_2 = dag_utils.which('convert_output_format_ile2inference')
-        if opts.condor_local_nonworker_igwn_prefix:
-            # we are in igwn environment
-            relevant_path = 'util_JoinExtrXML.py'  
-            relevant_path_2 = 'convert_output_format_ile2inference'
-        # randomization: 'shuf' is preferred, but otherwise use ' sort -R'. Note performed locally, so local filesystem/os is fine.
-        extra_shuffle_command = ' | cat'
-        which_shuf = which('shuf'); which_sort = which('sort')
-        if isinstance(which_shuf, str):
-            extra_shuffle_command = ' | {} '.format(which_shuf)
-        elif isinstance(which_sort,str):
-            extra_shuffle_command  = ' | {} -R '.format(which_sort)
-        with open("allinone_convert.sh",'w') as f:
-            f.write(f"""#! /bin/bash
-{extra_text}
-{relevant_path} ./iteration_$1_ile/'EXTR_out-*.xml_*_.xml.gz' --output ./tmp_converted.xml.gz
-{relevant_path_2} {convert_args_extr} ./tmp_converted.xml.gz  > ./tmp_converted.dat
-head -n 1 ./tmp_converted.dat > ./extrinsic_posterior_samples.dat
-sed 1d ./tmp_converted.dat  {extra_shuffle_command} >> ./extrinsic_posterior_samples.dat
-""")#.format(extra_text,relevant_path,'.','.',relevant_path_2,convert_args_extr,opts.working_directory,extra_shuffle_command,opts.working_directory))
-        os.system("chmod a+x allinone_convert.sh")
-        # Create job submit file
-        batchConvertExtr_job, batchConvertExtr_job_name = dag_utils.write_convert_sub(exe=opts.working_directory+"/allinone_convert.sh",tag='convert_extr',log_dir=None,arg_str='',file_input="$(macroiteration) ",file_output="/dev/null", out_dir=opts.working_directory,universe=local_worker_universe,no_grid=no_worker_grid)
-        batchConvertExtr_job.add_condor_cmd("initialdir",opts.working_directory)
-        batchConvertExtr_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/batchconvert-$(macroevent).log")
-        batchConvertExtr_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/batchconvert-$(macroevent).err")
-        batchConvertExtr_job.add_condor_cmd('request_disk',opts.general_request_disk)
-        if opts.use_full_submit_paths:
-            fname = opts.working_directory+"/"+batchConvertExtr_job.get_sub_file()
-            batchConvertExtr_job.set_sub_file(fname)
-        batchConvertExtr_job.write_sub_file()
-    elif opts.last_iteration_extrinsic_batched_convert:
-        # Batched conversion: one job for a great many items
-        convert_args_extr =  " --convention LI  --export-cosmology --use-interpolated-cosmology " 
-        if opts.last_iteration_extrinsic_samples_per_ile:
-            convert_args_extr += " --n-output-samples-per-file {}".format(opts.last_iteration_extrinsic_samples_per_ile)
-        if not (convert_args is None):
-            convert_args_extr += convert_args
-        relevant_path = dag_utils.which('util_BatchConvertResampleILEOutput.py')
-        if opts.condor_local_nonworker_igwn_prefix:
-            # we are in igwn environment
-            relevant_path = 'util_BatchConvertResampleILEOutput.py'
-        # batched convert shell script
-        with open("batch_convert.sh",'w') as f:
-            f.write("""#! /bin/bash
-{}
-{} {}/iteration_$1_ile/EXTR_out-$2.xml_*_.xml.gz {}
-""".format(extra_text,relevant_path,opts.working_directory,convert_args_extr))
-        os.system("chmod a+x batch_convert.sh")
-        # Create job submit file
-        batchConvertExtr_job, batchConvertExtr_job_name = dag_utils.write_convert_sub(exe=opts.working_directory+"/batch_convert.sh",tag='convert_extr',log_dir=None,arg_str='',file_input="$(macroiteration) $(macroevent)",file_output=opts.working_directory+"/iteration_$(macroiteration)_ile/EXTR_out-$(macroevent).downsampled_dat.dat", out_dir=opts.working_directory+"/iteration_$(macroiteration)_ile/",universe=local_worker_universe,no_grid=no_worker_grid)
-        batchConvertExtr_job.add_condor_cmd("initialdir",opts.working_directory)
-        batchConvertExtr_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/batchconvert-$(macroevent).log")
-        batchConvertExtr_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/batchconvert-$(macroevent).err")
-        batchConvertExtr_job.add_condor_cmd('request_disk',opts.general_request_disk)
-        if opts.use_full_submit_paths:
-            fname = opts.working_directory+"/"+batchConvertExtr_job.get_sub_file()
-            batchConvertExtr_job.set_sub_file(fname)
-        batchConvertExtr_job.write_sub_file()
-    else:
-        # Old style convert
-        convert_args_extr =  " --convention LI  --export-cosmology --use-interpolated-cosmology " 
-        if not (convert_args is None):
-            convert_args_extr += convert_args
-        convertExtr_job, convertExtr_job_name = dag_utils.write_convert_sub(tag='convert_extr',log_dir=None,arg_str=convert_args_extr,file_input=opts.working_directory+"/iteration_$(macroiteration)_ile/EXTR_out-$(macroevent).xml_$(macroindx)_.xml.gz",file_output=opts.working_directory+"/iteration_$(macroiteration)_ile/EXTR_out-$(macroevent).xml_$(macroindx)_.dat", out_dir=opts.working_directory+"/iteration_$(macroiteration)_ile/",universe=local_worker_universe,no_grid=no_worker_grid)
-        convertExtr_job.add_condor_cmd("initialdir",opts.working_directory)
-        convertExtr_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/convert-$(macroevent)-$(macroindx).log")
-        convertExtr_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/convert-$(macroevent)-$(macroindx).err")
-        convertExtr_job.add_condor_cmd('request_disk',opts.general_request_disk)
-        if opts.use_full_submit_paths:
-            fname = opts.working_directory+"/"+convertExtr_job.get_sub_file()
-            convertExtr_job.set_sub_file(fname)
-        convertExtr_job.write_sub_file()
-
-        # Resample task 
-        resample_args = ' --n-output-samples  ' + str(n_points_per_ILE)  # pick 5 random points from each ILE run
-        resample_job, resample_job_name = dag_utils.write_resample_sub('resample',log_dir=None,arg_str=resample_args,file_input=opts.working_directory+"/iteration_$(macroiteration)_ile/EXTR_out-$(macroevent).xml_$(macroindx)_.dat",file_output=opts.working_directory+"/iteration_$(macroiteration)_ile/EXTR_out-$(macroevent).xml_$(macroindx)_.downsampled_dat",universe=local_worker_universe,no_grid=no_worker_grid)
-        resample_job.add_condor_cmd("initialdir",opts.working_directory)
-        resample_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/resample-$(macroevent)-$(macroindx).log")
-        resample_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/resample-$(macroevent)-$(macroindx).err")
-        resample_job.add_condor_cmd('request_disk',opts.general_request_disk)
-        if opts.use_full_submit_paths:
-            fname = opts.working_directory+"/"+resample_job.get_sub_file()
-            resample_job.set_sub_file(fname)
-        resample_job.write_sub_file()
-
-    # Combination task at end -- probably should be a general utility
-    cat_job, cat_job_name = dag_utils.write_cat_sub(file_prefix='EXTR', file_postfix='.downsampled_dat.dat',file_output='extrinsic_posterior_samples.dat',universe=local_worker_universe,no_grid=no_worker_grid)
-    cat_job.add_condor_cmd("initialdir",opts.working_directory)
-    cat_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/cat-$(cluster)-$(process).log")
-    cat_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/cat-$(cluster)-$(process).out")
-    cat_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_ile/logs/cat-$(cluster)-$(process).err")
-    cat_job.add_condor_cmd('request_disk',opts.general_request_disk)
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+cat_job.get_sub_file()
-        cat_job.set_sub_file(fname)
-    cat_job.write_sub_file()
-
-
-##   Consolidate job(s)
-#   - consolidate output of single previous job
-con_arg_str = ''
-if opts.use_eccentricity:
-    con_arg_str += " --eccentricity "
-    if opts.use_meanPerAno:
-        con_arg_str += " --meanPerAno "
-if opts.use_tabular_eos_file:
-    con_arg_str += " --tabular-eos-file "
-con_job, con_job_name = dag_utils.write_consolidate_sub_simple(tag='join',log_dir=None,arg_str=con_arg_str,base=opts.working_directory+"/iteration_$(macroiteration)_ile", target=opts.working_directory+'/consolidated_$(macroiteration)',universe=local_worker_universe,no_grid=no_worker_grid,extra_text=extra_text)
-# Modify: set 'initialdir' : should run in top-level direcory
-#con_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_con")
-# Modify output argument: change logs and working directory to be subdirectory for the run
-con_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_con/logs/con-$(cluster)-$(process).log")
-con_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_con/logs/con-$(cluster)-$(process).err")
-con_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_con/logs/con-$(cluster)-$(process).out")
-con_job.add_condor_cmd('request_disk',opts.general_request_disk)
-if opts.use_full_submit_paths:
-    fname = opts.working_directory+"/"+con_job.get_sub_file()
-    con_job.set_sub_file(fname)
-if opts.condor_containerize_nonworker and singularity_image:
-    con_job.add_condor_cmd("+SingularityImage", '"' + singularity_image + '"')
-con_job.write_sub_file()
-
-##   Unify job
-#   - update 'all.net' to include all previous events
-unify_arg_str = ''
-if opts.use_eccentricity:
-    unify_arg_str += " --eccentricity "
-    if opts.use_meanPerAno:
-        unify_arg_str += " --meanPerAno "
-if opts.use_tabular_eos_file:
-    unify_arg_str += " --tabular-eos-file "
-unify_job, unify_job_name = dag_utils.write_unify_sub_simple(tag='unify',log_dir='',arg_str=unify_arg_str, base=opts.working_directory, target=opts.working_directory+'/all.net',universe=local_worker_universe,no_grid=no_worker_grid,extra_text=extra_text)
-unify_job.add_condor_cmd("initialdir",opts.working_directory)
-unify_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_con/logs/unify-$(cluster)-$(process).log")
-unify_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_con/logs/unify-$(cluster)-$(process).err")
-unify_job.set_stdout_file(opts.working_directory+"/all.net")
-unify_job.add_condor_cmd('request_disk',opts.general_request_disk)
-if opts.use_full_submit_paths:
-    fname = opts.working_directory+"/"+unify_job.get_sub_file()
-    unify_job.set_sub_file(fname)
-if opts.condor_containerize_nonworker and singularity_image:
-    unify_job.add_condor_cmd("+SingularityImage", '"' + singularity_image + '"')
-unify_job.write_sub_file()
-
-
-exe_evidence = which('util_CIPDirSummarizeEvidence.py')
-evidence_job, evidence_job_name = dag_utils.write_convert_sub(tag='evidence', exe=exe_evidence, log_dir='', file_input='',  arg_str=" --cip-dir iteration_$(macroiteration)_cip --output evidence_$(macroiteration) ",  universe=local_worker_universe, no_grid=True)
-evidence_job.add_condor_cmd("initialdir",opts.working_directory)
-evidence_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_con/logs/evidence-$(cluster)-$(process).log")
-evidence_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_con/logs/evidence-$(cluster)-$(process).err")
-evidence_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_con/logs/evidence-$(cluster)-$(process).out")
-evidence_job.add_condor_cmd('request_disk',opts.general_request_disk)
-if opts.use_full_submit_paths:
-    fname = opts.working_directory+"/"+evidence_job.get_sub_file()
-    evidence_job.set_sub_file(fname)
-if opts.condor_containerize_nonworker and singularity_image:
-    evidence_job.add_condor_cmd("+SingularityImage", '"' + singularity_image + '"')
-evidence_job.write_sub_file()
-
-
-##   Fit job: default case
-cip_args_base = cip_args
-cip_request_disk=opts.general_request_disk
-if opts.cip_request_disk:
-    cip_request_disk = opts.cip_request_disk
-#out_dir_base = opts.working_directory
-cip_exe =opts.cip_exe
-cip_exe_master = None
-if opts.cip_exe:
-    cip_exe_master = "{}".format(cip_exe) # force clpy, so NOT BY REFERENCE
-cip_no_grid = no_worker_grid
-if opts.use_osg_cip:
-    cip_no_grid = False
-if (opts.cip_explode_jobs):
-    if not opts.cip_explode_jobs_flat:
-        cip_args_base += " --fit-save-gp " + out_dir_inside_cip + "/my_fit"
-    else:
-        cip_exe_master = "/bin/true"
-#    out_dir_base += "/iteration_$(macroiteration)_cip/"
-cip_job, cip_job_name = dag_utils.write_CIP_sub(tag='CIP',log_dir=None,arg_str=cip_args_base,request_memory=opts.request_memory_CIP,input_net=working_dir_inside_cip+'/all.net',output='overlap-grid-$(macroiterationnext)',out_dir=out_dir_inside_cip,exe=cip_exe_master,universe=local_worker_universe_cip,no_grid=cip_no_grid,use_osg=opts.use_osg_cip,use_singularity=opts.use_osg_cip and opts.use_singularity,singularity_image=singularity_image,use_simple_osg_requirements=opts.use_osg_cip,use_oauth_files=opts.use_oauth_files,transfer_files=['../all.net'])
-# Modify: set 'initialdir'
-cip_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_cip")
-# Modify output argument: change logs and working directory to be subdirectory for the run
-cip_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/cip-$(cluster)-$(process).log")
-cip_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/cip-$(cluster)-$(process).err")
-cip_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/cip-$(cluster)-$(process).out")
-cip_job.add_condor_cmd('request_disk',opts.cip_request_disk)
-if opts.cip_explode_jobs:
-    cip_job.add_arg('--not-worker')  # prevent anything that causes failure, reduce writing of output
-if opts.use_full_submit_paths:
-    fname = opts.working_directory+"/"+cip_job.get_sub_file()
-    cip_job.set_sub_file(fname)
-cip_job.write_sub_file()
-if opts.cip_explode_jobs:
-    print(" Exploding stage 1, with  ",opts.cip_explode_jobs, " workers producing samples ")
-    cip_args_base = cip_args_base.replace('fit-save-gp','fit-load-gp')
-    n_explode =opts.cip_explode_jobs
-    fname_out = 'overlap-grid-$(macroiterationnext)-$(process)'
-    if opts.cip_explode_jobs_dag:
-        n_explode=1
-        fname_out = 'overlap-grid-$(macroiterationnext)-$(cluster)'
-    cip_args_base = cip_args_base.replace('my_fit', 'my_fit.pkl')  # yes, asymmetric arguments
-    transfer_files_cip =['../all.net']
-    if opts.use_osg_cip and 'fit-method gp' in cip_args_base:
-        transfer_files_cip += ['my_fit.pkl']  # transfer current working directory fit
-    cip_job_worker, cip_job_worker_name = dag_utils.write_CIP_sub(tag='CIP_worker',log_dir=None,arg_str=cip_args_base,request_memory=opts.request_memory_CIP,request_memory_flex=opts.request_memory_CIP_flex,input_net=working_dir_inside_cip+'/all.net',output=fname_out,out_dir=out_dir_inside_cip,exe=cip_exe,ncopies=n_explode,universe=local_worker_universe_cip,no_grid=cip_no_grid,use_osg=opts.use_osg_cip,use_singularity=opts.use_osg_cip and opts.use_singularity,singularity_image=singularity_image,use_simple_osg_requirements=opts.use_osg_cip,use_oauth_files=opts.use_oauth_files,transfer_files=transfer_files_cip)
-    # Modify: set 'initialdir'
-    cip_job_worker.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_cip")
-    # Modify output argument: change logs and working directory to be subdirectory for the run
-    cip_job_worker.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/cip-$(cluster)-$(process).log")
-    cip_job_worker.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/cip-$(cluster)-$(process).err")
-    cip_job_worker.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/cip-$(cluster)-$(process).out")
-    cip_job_worker.add_condor_cmd('request_disk',cip_request_disk)
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+cip_job_worker.get_sub_file()
-        cip_job_worker.set_sub_file(fname)
-    cip_job_worker.write_sub_file()
-
-    # Create worker join job
-    #   ... if we are using cip_explode_jobs_dag, we need a different script entirely
-    #     filename to join : note *two* minus signs, because we don't want the non-worker job to match! It has tiny output. This can cause pathological behavior for the random join stage
-    join_cip_job,join_cip_job_name = dag_utils.write_joingrids_sub(input_pattern=opts.working_directory+"/iteration_$(macroiteration)_cip/output-grid-*-*.xml.gz",target_dir=opts.working_directory,output_base="overlap-grid-$(macroiterationnext)",n_explode=n_explode,log_dir=opts.working_directory+"/iteration_$(macroiteration)_cip/logs",universe=local_worker_universe,no_grid=no_worker_grid,extra_text=extra_text)
-    join_cip_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_cip")
-    # Modify output argument: change logs and working directory to be subdirectory for the run
-    join_cip_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/join-$(cluster)-$(process).log")
-    join_cip_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/join-$(cluster)-$(process).err")
-    join_cip_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/join-$(cluster)-$(process).out")
-    join_cip_job.add_condor_cmd('request_disk',opts.general_request_disk)
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+join_cip_job.get_sub_file()
-        join_cip_job.set_sub_file(fname)
-    if opts.condor_containerize_nonworker and singularity_image:
-        join_cip_job.add_condor_cmd("+SingularityImage", '"' + singularity_image + '"')
-    join_cip_job.write_sub_file()
-
-##   puffball job: default case
-if puff_args and puff_cadence:
-    puff_job, puff_job_name = dag_utils.write_puff_sub(tag='PUFF',log_dir=None,arg_str=puff_args,request_memory=opts.request_memory_ILE,input_net=opts.working_directory+'/overlap-grid-$(macroiterationnext).xml.gz',output=opts.working_directory+'/puffball-$(macroiterationnext)',out_dir=opts.working_directory,exe=opts.puff_exe,universe=local_worker_universe,no_grid=no_worker_grid,extra_text=extra_text)
-    # Modify: set 'initialdir' to CIP WORKING DIR 
-    puff_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_cip")
-    # Modify output argument: change logs and working directory to be subdirectory for the run
-    puff_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/puff-$(cluster)-$(process).log")
-    puff_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/puff-$(cluster)-$(process).err")
-    puff_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/puff-$(cluster)-$(process).out")
-    puff_job.add_condor_cmd('request_disk',opts.general_request_disk)
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+puff_job.get_sub_file()
-        puff_job.set_sub_file(fname)
-    if opts.condor_containerize_nonworker and singularity_image:
-        puff_job.add_condor_cmd("+SingularityImage", '"' + singularity_image + '"')
-    puff_job.write_sub_file()
-
-
-##   fetch job.  Fetch is before CURRENT iteration, so it is just-in-time and can be used at iteration 0
-if fetch_args:
-    fetch_job, fetch_job_name = dag_utils.write_puff_sub(tag='FETCH',log_dir=None,arg_str=fetch_args,request_memory=opts.request_memory_ILE,input_net=None,output=opts.working_directory+'/fetch-$(macroiteration).xml.gz',out_dir=opts.working_directory,exe=opts.fetch_ext_grid_exe,universe=local_worker_universe,no_grid=no_worker_grid,extra_text=extra_text)
-    # Modify: set 'initialdir' to CIP WORKING DIR 
-    fetch_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_cip")
-    # Modify output argument: change logs and working directory to be subdirectory for the run
-    fetch_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/fetch-$(cluster)-$(process).log")
-    fetch_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/fetch-$(cluster)-$(process).err")
-    fetch_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/fetch-$(cluster)-$(process).out")
-    fetch_job.add_condor_cmd('request_disk',opts.general_request_disk)
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+fetch_job.get_sub_file()
-        fetch_job.set_sub_file(fname)
-    if opts.condor_containerize_nonworker and singularity_image:
-        fetch_job.add_condor_cmd("+SingularityImage", '"' + singularity_image + '"')
-    fetch_job.write_sub_file()
-
-
-
-cip_job_list = None
-if  (cip_args_lines is None):
-    # Write the default cip_job into cip_job_list n times
-    # add some redundancy in case this happens : we're hitting a problem with having too few copies of this/fencepost error later
-    if opts.cip_explode_jobs is None:
-        cip_job_list =(opts.n_iterations)* [cip_job ]
-    else:
-        cip_job_list =(opts.n_iterations)* [ [cip_job, cip_job_worker] ]
-else:  
-    # we have different cip jobs for different iteration numbers
-    cip_job_list = []
-    for indx in np.arange(len(cip_args_lines)):
-#        out_dir_base = opts.working_directory
-        cip_args_extra = ""
-        cip_args_truncate=''
-        # If we are a 'G' iteration, flag to use other executable
-        cip_exe_here = None
-        cip_exe_here_master = None
-        if cip_exe:
-            cip_exe_here = "{} ".format(cip_exe)
-            cip_exe_here_master = " {} ".format(cip_exe_master)
-        if indx in creating_G_list:
-            cip_exe_here = opts.cip_exe_G
-        if opts.cip_explode_jobs:
-            if not opts.cip_explode_jobs_flat:
-                cip_args_extra += " --fit-save-gp " + out_dir_inside_cip+"/my_fit"
-            # else:
-            #     cip_exe_here_master = "/bin/true"
-#            out_dir_base += "/iteration_$(macroiteration)_cip/"
-            # set n_eff for primary job to be small. ONLY used for the primary non-worker job
-            cip_args_truncate = " --n-eff 5 --n-max 10000 --n-output-samples 1 "  # cap n_eff, number of iterations, and *output samples* for non-worker jobs, to avoid contamination
-        # Write the appropriate CIP jobs. [note only one CIP per iteration, so unique
-#        cip_exe_here =cip_exe
-        cip_job, cip_job_name = dag_utils.write_CIP_sub(tag='CIP_'+str(indx),log_dir=None,arg_str=cip_args_lines[indx]+cip_args_extra+cip_args_truncate,request_memory=opts.request_memory_CIP,request_memory_flex=opts.request_memory_CIP_flex,input_net=working_dir_inside_cip+'/all.net',output='overlap-grid-$(macroiterationnext)',out_dir=out_dir_inside_cip,exe=cip_exe_here_master,universe=local_worker_universe_cip,no_grid=cip_no_grid,use_osg=opts.use_osg_cip,use_singularity=opts.use_osg_cip and opts.use_singularity,singularity_image=singularity_image,use_simple_osg_requirements=opts.use_osg_cip,use_oauth_files=opts.use_oauth_files,transfer_files=['../all.net'])
-        # Modify: set 'initialdir'
-        cip_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_cip")
-        # Modify output argument: change logs and working directory to be subdirectory for the run
-        cip_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/cip-$(cluster)-$(process).log")
-        cip_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/cip-$(cluster)-$(process).err")
-        cip_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/cip-$(cluster)-$(process).out")
-        cip_job.add_condor_cmd('request_disk',cip_request_disk)
-        if opts.cip_explode_jobs:
-            cip_job.add_arg("--not-worker")
-        if opts.use_full_submit_paths:
-            fname = opts.working_directory+"/"+cip_job.get_sub_file()
-            cip_job.set_sub_file(fname)
-        cip_job.write_sub_file()
-
-
-        if opts.cip_explode_jobs:
-           print(" Exploding stage 2, with  ",opts.cip_explode_jobs, " workers producing samples ")
-           n_explode =opts.cip_explode_jobs
-           fname_out = 'overlap-grid-$(macroiterationnext)-$(process)'
-           if opts.cip_explode_jobs_dag:
-                n_explode=1
-                fname_out = 'overlap-grid-$(macroiterationnext)-$(cluster)'
-           cip_args_extra = cip_args_extra.replace('fit-save-gp','fit-load-gp')
-           cip_args_extra = cip_args_extra.replace('my_fit','my_fit.pkl')
-           # if the last set of workers, we are making extrinsic samples, so change the output samples  
-           if indx == len(cip_args_lines) -1 and opts.last_iteration_extrinsic: # on last iteration, doing extrinsic phase
-               n_samples_per_job = int(opts.last_iteration_extrinsic_nsamples/(1.0*opts.cip_explode_jobs_last))
-               cip_args_extra +=" --n-eff {} --n-output-samples {} ".format(n_samples_per_job,n_samples_per_job)
-               if not (opts.use_eccentricity_squared_sampling):
-                   cip_args_lines[indx] = cip_args_lines[indx].replace(' --parameter eccentricity_squared ',' --parameter-implied eccentricity_squared --parameter-nofit eccentricity ')
-           transfer_files_cip=['../all.net']
-           if opts.use_osg_cip and 'fit-method gp' in cip_args_base:
-               transfer_files_cip += ['my_fit.pkl']  # transfer current working directory fit
-           cip_job_worker, cip_job_worker_name = dag_utils.write_CIP_sub(tag='CIP_worker'+str(indx),log_dir=None,arg_str=cip_args_lines[indx]+cip_args_extra,request_memory=opts.request_memory_CIP,request_memory_flex=opts.request_memory_CIP_flex,input_net=working_dir_inside_cip+'/all.net',output=fname_out,out_dir=out_dir_inside_cip,exe=cip_exe_here,ncopies=n_explode,universe=local_worker_universe_cip,no_grid=cip_no_grid,use_osg=opts.use_osg_cip,use_singularity=opts.use_osg_cip and opts.use_singularity,singularity_image=singularity_image,use_simple_osg_requirements=opts.use_osg_cip,use_oauth_files=opts.use_oauth_files,transfer_files=transfer_files_cip)
-           # Modify: set 'initialdir'
-           cip_job_worker.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_cip")
-           # Modify output argument: change logs and working directory to be subdirectory for the run
-           cip_job_worker.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/cipworker-$(cluster)-$(process).log")
-           cip_job_worker.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/cipworker-$(cluster)-$(process).err")
-           cip_job_worker.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_cip/logs/cipworker-$(cluster)-$(process).out")
-           cip_job_worker.add_condor_cmd('request_disk',cip_request_disk)
-           if opts.use_full_submit_paths:
-               fname = opts.working_directory+"/"+cip_job_worker.get_sub_file()
-               cip_job_worker.set_sub_file(fname)
-           cip_job_worker.write_sub_file()
-
-
-        # augment the CIP job list
-        # Note, if we have a subdag, we create the word "subdag"
-        if not(cip_args_prefixes[indx][0]=='Z'):
-            n_to_add = int(cip_args_n[indx])
-            if (indx == len(cip_args_lines)-1) and np.sum(cip_args_n[:indx])<opts.n_iterations:
-                n_to_add = int(opts.n_iterations - np.sum(cip_args_n[:(indx-1)]) )
-            if opts.cip_explode_jobs is None:
-                cip_job_list = cip_job_list + n_to_add*[cip_job]
-            else:
-                print(" Exploding stage 3 ")
-                cip_job_list = cip_job_list + n_to_add*[[cip_job,cip_job_worker]]
-        else:
-            print(" Indefinite subdag-to-convergence requested for group ", indx)
-            cip_job_list += ["Subdag"]
-print(" ===> Iteration size ", len(cip_job_list), opts.n_iterations)
-##   Test job (terminate, convergence
-if opts.test_args:
-    test_node_list = []
-    ##  Convert job : make results accessible after every iteration.  (Only performed if the tests are active, to make my life easier)
-    convert_job, convert_job_name =dag_utils.write_convert_sub(tag='convert',log_dir=None,arg_str=convert_args,file_input=opts.working_directory+'/overlap-grid-$(macroiteration).xml.gz', file_output=opts.working_directory+'/posterior_samples-$(macroiteration).dat' ,out_dir=opts.working_directory,exe=opts.test_exe,universe=local_worker_universe,no_grid=no_worker_grid)
-    convert_job.add_condor_cmd("initialdir",opts.working_directory)
-    convert_job.set_log_file(opts.working_directory+"/iteration_$(macroiterationlast)_test/logs/convert-$(cluster)-$(process).log")
-    convert_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiterationlast)_test/logs/convert-$(cluster)-$(process).err")
-    convert_job.add_condor_cmd('request_disk',opts.general_request_disk)
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+convert_job.get_sub_file()
-        convert_job.set_sub_file(fname)
-    if opts.condor_containerize_nonworker and singularity_image:
-        convert_job.add_condor_cmd("+SingularityImage", '"' + singularity_image + '"')
-    convert_job.write_sub_file()
-
-
-    test_job, test_job_name = dag_utils.write_test_sub(tag='test',log_dir=None,arg_str=test_args,samples_files=[ opts.working_directory+'/posterior_samples-$(macroiteration).dat', opts.working_directory+'/posterior_samples-$(macroiterationlast).dat'] ,out_dir=opts.working_directory,exe=opts.test_exe,universe=local_worker_universe,no_grid=no_worker_grid)
-    # Modify: set 'initialdir'
-    test_job.add_condor_cmd("initialdir",opts.working_directory+"/iteration_$(macroiteration)_test")
-    # Modify output argument: change logs and working directory to be subdirectory for the run
-    test_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_test/logs/test-$(cluster)-$(process).log")
-    test_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_test/logs/test-$(cluster)-$(process).err")
-    test_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_test/logs/test-$(cluster)-$(process).out")
-    test_job.add_condor_cmd('request_disk',opts.general_request_disk)
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+test_job.get_sub_file()
-        test_job.set_sub_file(fname)
-    if opts.condor_containerize_nonworker and singularity_image:
-        test_job.add_condor_cmd("+SingularityImage", '"' + singularity_image + '"')
-    test_job.write_sub_file()
-
-
-if opts.plot_args and opts.test_args:
-    # default: last two iterations
-    #  ... unless there is extrinsic samples, then use those
-    #  ... note this has to work with PEsummary as well, which only takes one set of samples
-#    samples_files =['posterior_samples-$(macroiteration).dat','posterior_samples-$(macroiterationlast).dat'] 
-#    if opts.last_iteration_extrinsic:
-#        samples_files =[]
-    # User will be responsible for passing argument strings.  We are not hardcoding in the argument format
-    samples_files =[]
-    plot_job, plot_job_name = dag_utils.write_plot_sub(tag='plot',log_dir=None,arg_str=plot_args,samples_files=samples_files,out_dir=opts.working_directory,exe=opts.plot_exe,universe=local_worker_universe)
-    plot_job.set_log_file(opts.working_directory+"/iteration_$(macroiteration)_plot/logs/test-$(cluster)-$(process).log")
-    plot_job.set_stderr_file(opts.working_directory+"/iteration_$(macroiteration)_plot/logs/test-$(cluster)-$(process).err")
-    plot_job.set_stdout_file(opts.working_directory+"/iteration_$(macroiteration)_plot/logs/test-$(cluster)-$(process).out")
-    plot_job.add_condor_cmd('request_disk',opts.general_request_disk)
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+plot_job.get_sub_file()
-        plot_job.set_sub_file(fname)
-    plot_job.write_sub_file()
-
-
-if opts.gridinit_args:
-    gridinit_job, gridinit_job_name = dag_utils.write_init_sub(tag='grid',log_dir=None,arg_str=gridinit_args,out_dir=opts.working_directory,exe=opts.gridinit_exe,universe=local_worker_universe)
-    gridinit_job.set_log_file(opts.working_directory+"/init-$(cluster)-$(process).log")
-    gridinit_job.set_stderr_file(opts.working_directory+"/init--$(cluster)-$(process).err")
-    gridinit_job.set_stdout_file(opts.working_directory+"/init-$(cluster)-$(process).out")
-    gridinit_job.add_condor_cmd('request_disk',opts.general_request_disk)
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+gridinit_job.get_sub_file()
-        gridinit_job.set_sub_file(fname)
-    gridinit_job.write_sub_file()
-
-if opts.frame_rotation:
-    # Wriite command to do the conversion
-    extra_arg=''
-    if 'fref' in ile_args:
-        # retrieve fref
-        tmp_args = ile_args.split('--')
-        lines = [x for x in tmp_args if x.startswith('reference-freq')]
-        fref_str = lines[-1].split()[-1]
-        extra_arg = ' --fref {} '.format(fref_str)
-    with open('fix_frame_rot.sh','w') as f:
-        f.write("""#!/bin/bash
-if [ ! -e extrinsic_posterior_samples_orig.dat ]; then
-   mv extrinsic_posterior_samples.dat  extrinsic_posterior_samples_orig.dat;
-   convert_ascii_framechange_xphm.py --extrinsic-posterior-file   extrinsic_posterior_samples_orig .dat --fname-out extrinsic_posterior_samples.dat {}
-fi;
-""".format(extra_arg))
-    # Write submit file for the conversion
-    rotate_job, rotate_job_name = dag_utils.write_convert_sub(exe=which('fix_frame_rot.sh'),tag='ROTATE',log_dir=None,arg_str='',file_input='',file_output=None, out_dir=opts.working_directory,universe=local_worker_universe,no_grid=no_worker_grid)
-    rotate_job.add_condor_cmd("initialdir",opts.working_directory)
-    rotate_job.set_log_file(opts.working_directory+"/reweight-merge-$(cluster)-$(process).log")
-    rotate_job.set_stderr_file(opts.working_directory+"/reweight-merge-$(cluster)-$(process).err")
-    rotate_job.set_stdout_file(opts.working_directory+"/reweight-merge-$(cluster)-$(process).out")
-    rotate_job.add_condor_cmd('request_disk',opts.cal_request_disk)
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+rotate_job.get_sub_file()
-        rotate_job.set_sub_file(fname)
-    if opts.use_osg:
-        rotate_job.add_condor_cmd("+DESIRED_SITES",'"nogrid"')
-        rotate_job.add_condor_cmd("+flock_local",'true')
-    rotate_job.write_sub_file()
-
-if opts.calibration_reweighting:
-    # bilby ini must be created FIRST for correct operation  if writing cal envelopes
-    if opts.bilby_ini_file:
-        pickle_job, pickle_job_name = dag_utils.write_bilby_pickle_sub(tag='Bilby_pickle',log_dir=None,bilby_ini_file=opts.bilby_ini_file,exe=opts.bilby_pickle_exe,universe='local',no_grid=no_worker_grid,cache_file=opts.cache_file,frames_dir=opts.frames_dir,ile_args=ile_args)
-        pickle_job.set_log_file(opts.working_directory+"/pickle-$(cluster)-$(process).log")
-        pickle_job.set_stderr_file(opts.working_directory+"/pickle-$(cluster)-$(process).err")
-        pickle_job.set_stdout_file(opts.working_directory+"/pickle-$(cluster)-$(process).out")
-        pickle_job.add_condor_cmd('request_disk',opts.general_request_disk)
-
-        if opts.use_full_submit_paths:
-            fname = opts.working_directory+"/"+pickle_job.get_sub_file()
-            pickle_job.set_sub_file(fname)
-        pickle_job.write_sub_file()
-    
-    extra_args_dict = {}
-    extra_args_dict['time_marg'] = not (opts.last_iteration_extrinsic)   # pass time_marg=False if we are resampling in time
-    if opts.bilby_pickle_file:
-        extra_args_dict['pickle_file'] = opts.bilby_pickle_file
-    else:
-        extra_args_dict['pickle_file'] =opts.working_directory+"/calmarg/data/calmarg_data_dump.pickle"
-    # OSG options
-    if opts.calibration_reweighting_osg:
-        extra_args_dict['use_osg'] =  opts.use_osg
-        extra_args_dict['use_singularity'] =  opts.use_singularity
-        extra_args_dict['singularity_image'] = singularity_image
-        extra_args_dict['universe'] = 'vanilla'
-    else:
-        extra_args_dict['no_grid']  = no_worker_grid
-        extra_args_dict['universe'] = local_worker_universe
-    if opts.transfer_file_list:
-        # Handle scenario where h5 file needed by calmarg job. Note arranged as transfer because we don't know if execute nodes have the file, even if local
-        any_h5_needed_list = [x for x in transfer_file_names if x.endswith('.h5')]  # NRSur, etc h5 files we need for lalsuite
-        extra_args_dict['transfer_files'] = any_h5_needed_list
-        
-    calibration_job, calibration_job_name = dag_utils.write_calibration_uncertainty_reweighting_sub(tag='Calib_reweight',log_dir=None,posterior_file=opts.working_directory+"/extrinsic_posterior_samples.dat",exe=opts.calibration_reweighting_exe,ile_args=ile_args,n_cal=opts.calibration_reweighting_count,use_oauth_files=opts.use_oauth_files,**extra_args_dict)
-        
-    if opts.calibration_reweighting_initial_extra_args:
-        # properly handle quoted extra arguments, such as dictionaries of waveform arguments
-        add_str = opts.calibration_reweighting_initial_extra_args
-        if '"' in add_str:
-            add_str = dag_utils.safely_quote_arg_str(add_str)
-        calibration_job.add_arg(add_str)
-    if not(opts.calibration_reweighting_batchsize): # single run
-        calibration_job.set_log_file(opts.working_directory+"/calmarg/logs/calibration-$(cluster)-$(process).log")
-        calibration_job.set_stderr_file(opts.working_directory+"/calmarg/logs/calibration-$(cluster)-$(process).err")
-        calibration_job.set_stdout_file(opts.working_directory+"/calmarg/logs/calibration-$(cluster)-$(process).out")
-    else:
-        if not(opts.calibration_reweighting_osg):
-            calibration_job.add_condor_cmd("initialdir",opts.working_directory+"/calmarg/")
-        else:
-            calibration_job.add_condor_cmd("initialdir",opts.working_directory)            
-        calibration_job.add_arg(" --start_index $(macrostartidx) ")
-        calibration_job.add_arg(" --end_index $(macroendidx) ")
-        calibration_job.set_log_file(opts.working_directory+"/calmarg/logs/calibration-$(macrostartidx)-$(cluster)-$(process).log")
-        calibration_job.set_stderr_file(opts.working_directory+"/calmarg/logs/calibration-$(macrostartidx)-$(cluster)-$(process).err")
-        calibration_job.set_stdout_file(opts.working_directory+"/calmarg/logs/calibration-$(macrostartidx)-$(cluster)-$(process).out")
-    if opts.use_singularity and opts.calibration_reweighting_osg:
-        calibration_job.add_condor_cmd('request_disk',opts.ile_request_disk)  # similar task, so use similar request
-    else:
-        calibration_job.add_condor_cmd('request_disk',opts.general_request_disk)
-
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+calibration_job.get_sub_file()
-        calibration_job.set_sub_file(fname)
-    calibration_job.write_sub_file()
-
-    # job to combine things. Write here to make logic human-readable
-    if (opts.calibration_reweighting_batchsize): # single run
-        reweight_merge_job, reweight_merge_job_name = dag_utils.write_convert_sub(exe=which('combine_weights_and_rejection_sample.py'),tag='CAL_REWEIGHT_COMBINE',log_dir=None,arg_str=opts.calibration_reweighting_extra_args,file_input=opts.working_directory+'/extrinsic_posterior_samples.dat weight_files/',file_output=None, out_dir=opts.working_directory,universe=local_worker_universe,no_grid=no_worker_grid)
-        reweight_merge_job.add_condor_cmd("initialdir",opts.working_directory) # NOT calmarg, because the normal 'weight files' is put in the location of the *samples*, which is the rundir
-        reweight_merge_job.set_log_file(opts.working_directory+"/calmarg/logs/reweight-merge-$(cluster)-$(process).log")
-        reweight_merge_job.set_stderr_file(opts.working_directory+"/calmarg/logs/reweight-merge-$(cluster)-$(process).err")
-        reweight_merge_job.set_stdout_file(opts.working_directory+"/calmarg/logs/reweight-merge-$(cluster)-$(process).out")
-        reweight_merge_job.add_condor_cmd('request_disk',opts.cal_request_disk)
-        if opts.use_full_submit_paths:
-            fname = opts.working_directory+"/"+reweight_merge_job.get_sub_file()
-            reweight_merge_job.set_sub_file(fname)
-        if opts.use_osg:
-            reweight_merge_job.add_condor_cmd("+DESIRED_SITES",'"nogrid"')
-            reweight_merge_job.add_condor_cmd("+flock_local",'true')
-        reweight_merge_job.write_sub_file()
-
-
-
-    
-
-if opts.comov_distance_reweighting:
-    if opts.calibration_reweighting:
-        print(opts.convert_ascii2h5_exe)
-        convert_ascii_job, convert_ascii_job_name = dag_utils.write_convert_ascii_to_h5_sub(tag='Convert_ascii2h5',log_dir=None,output_file=opts.working_directory+"/posterior_samples.h5",posterior_file=opts.working_directory+"/reweighted_posterior_samples.dat",convert_ascii_to_h5_exe=opts.convert_ascii2h5_exe,universe=local_worker_universe,no_grid=no_worker_grid)
-        convert_ascii_job.set_log_file(opts.working_directory+"/convert-ascii-$(cluster)-$(process).log")
-        convert_ascii_job.set_stderr_file(opts.working_directory+"/convert_ascii-$(cluster)-$(process).err")
-        convert_ascii_job.set_stdout_file(opts.working_directory+"/convert_ascii-$(cluster)-$(process).out")
-        convert_ascii_job.add_condor_cmd('request_disk',opts.general_request_disk)
-
-        if opts.use_full_submit_paths:
-            fname = opts.working_directory+"/"+convert_ascii_job.get_sub_file()
-            convert_ascii_job.set_sub_file(fname)
-        convert_ascii_job.write_sub_file()
-    else:
-        convert_ascii_job, convert_ascii_job_name = dag_utils.write_convert_ascii_to_h5_sub(tag='Convert_ascii2h5',log_dir=None,output_file=opts.working_directory+"/posterior_samples.h5",posterior_file=opts.working_directory+"/extrinsic_posterior_samples.dat",convert_ascii_to_h5_exe=opts.convert_ascii2h5_exe,universe=local_worker_universe)
-        convert_ascii_job.set_log_file(opts.working_directory+"/convert-ascii-$(cluster)-$(process).log")
-        convert_ascii_job.set_stderr_file(opts.working_directory+"/convert_ascii-$(cluster)-$(process).err")
-        convert_ascii_job.set_stdout_file(opts.working_directory+"/convert_ascii-$(cluster)-$(process).out")
-        convert_ascii_job.add_condor_cmd('request_disk',opts.general_request_disk)
-
-        if opts.use_full_submit_paths:
-            fname = opts.working_directory+"/"+convert_ascii_job.get_sub_file()
-            convert_ascii_job.set_sub_file(fname)
-        convert_ascii_job.write_sub_file()
-
-    
-    distance_job, distance_job_name = dag_utils.write_comov_distance_reweighting_sub(tag='Comov_dist',log_dir=None,reweight_location=opts.working_directory+"/cosmo_reweight.h5",posterior_file=opts.working_directory+"/posterior_samples.h5",comov_distance_reweighting_exe=opts.comov_distance_reweighting_exe,universe=local_worker_universe)
-    distance_job.set_log_file(opts.working_directory+"/pickle-$(cluster)-$(process).log")
-    distance_job.set_stderr_file(opts.working_directory+"/pickle-$(cluster)-$(process).err")
-    distance_job.set_stdout_file(opts.working_directory+"/pickle-$(cluster)-$(process).out")
-    distance_job.add_condor_cmd('request_disk',opts.general_request_disk)
-
-    if opts.use_full_submit_paths:
-        fname = opts.working_directory+"/"+distance_job.get_sub_file()
-        distance_job.set_sub_file(fname)
-    distance_job.write_sub_file()
-##   Convert job(s)
-#    - relocate input grid to correct location.  (MAKE TRIVIAL: do via output arguments in previous stage!)
-##   Assessment job (no-op for now)
-
-
-
-
-# ++++
-# Create workflow 
-# ++++
-
-# Create workflow
-#   - Create grid node as needed
-#   - Loop over iterations
-#      - if iteration0, use seed grid (should already be copied in place)
-#      - if not iteration 0, grid should be in place (from previous stage)
-#      - Loop over events, make ILE node per event
-#      - create consolidate job, make it depend on all events in that iteration
-#      - create fit job, make it depend on consolidate job
-#    
-
-parent_fit_node = None
-last_node=None
-
-if opts.gridinit_args:
-   grid_node = Node(gridinit_job) 
-   dag.add_node(grid_node)
-   parent_fit_node = grid_node  # this must happen before the first ILE jobs
-
-convert_psd_node_list = []
-if opts.use_bw_psd:
-    print(" ===> Adding BW PSD nodes to dag <=== ")
-
-#    bw1_node =Node(bw2_job)
-#    bw1_node.add_parent(bw0_node)
-#    dag.add_node(bw1_node)
-
-    # Add nodes to convert the PSD to the correct format
-    # 
-    for ifo in list(channel_names.keys()):
-        bw0_node =Node(bw_job)
-        bw0_node.add_variable('ifo',ifo)
-        # Create argument string
-        bw_arg_str = ''
-        channel_name, channel_flow = channel_dict_bw[ifo]
-        bw_arg_str += " --ifo "+ifo
-        bw_arg_str += " --"+ifo+"-channel "+ifo+":"+channel_name
-        ifo_cache_name =  working_dir_inside_local +"/for_bw_"+ifo+".cache"  # bw requires single-IFO cache files, argh!!! Note these are NOT on OSG
-        ifo_char = ifo[0]
-        os.system("grep ^"+ifo_char +  " " +opts_ile.cache_file+ " > " + ifo_cache_name)
-        bw_arg_str += " --"+ifo+"-cache "+ifo_cache_name
-        bw_arg_str += " --"+ifo+"-flow "+str(channel_flow)
-        bw_arg_str += " --"+ifo+"-timeslide 0.0"
-        bw0_node.add_variable('macroargument0',bw_arg_str)
-        bw0_node.retry = opts.ile_retries  # these fail all the time
-        dag.add_node(bw0_node)
-
-        # Need correct inheritance: this will be annoying b/c loop below does not allow for it
-        # ...so do the PSD convert steps *serially* for now.  (The individual PSDs can be done in parallel)
-        convert_psd_node =Node(convert_psd_job)
-        convert_psd_node.add_parent(bw0_node)
-#        if not(parent_fit_node is None):
-#            convert_psd_node.add_parent(parent_fit_node)
-        parent_fit_node = convert_psd_node
-        convert_psd_node.add_variable('ifo',ifo)
-        convert_psd_node.retry = opts.ile_retries  # these fail all the time
-        dag.add_node(convert_psd_node)
-        convert_psd_node_list.append(convert_psd_node)
-
-n_group = opts.ile_n_events_to_analyze
-n_group_first = opts.ile_n_events_to_analyze
-if opts.ile_n_events_to_analyze_first:
-    n_group_first= opts.ile_n_events_to_analyze_first
-
-con_node_list = []
-unify_node_list = []
-ile_node_list_per_iteration={}
-ile_target_worklevel_per_iteration={}
-cip_subdag_nodes = {}
-cip_subdag_work_targets = {} 
-for it in np.arange(it_start,opts.n_iterations):
-    ile_node_list_per_iteration[it] = []
-    print(it, opts.n_iterations)
-    consolidate_now = None
-    fit_node_now = None
-    # Create consolidate job
-    con_node = Node(con_job)
-    con_node.add_variable("macroiteration",it)
-    con_node.retry = opts.general_retries
-    con_node_list.append(con_node)
-    # Create unify job
-    unify_node = Node(unify_job)
-    unify_node.add_variable("macroiteration",it)
-    unify_node.add_parent(con_node)
-    unify_node.retry = opts.general_retries
-    unify_node_list.append(unify_node)
-    # Create evidence job. ALWAYS HAS con_node as parent, guarantees it happens at the right time, same time as unify, NO CHILDREN.
-    # It is also *backward looking*
-    evidence_node = Node(evidence_job)
-    evidence_node.add_variable("macroiteration",it-1)  # LAST ITERATION being analyzed, not this one
-    evidence_node.retry = opts.general_retries
-    if it > it_start:
-        evidence_node.add_parent(parent_fit_node)  # run evidence after previous iteration, on previous
-        dag.add_node(evidence_node)
-
-    n_group_here = n_group
-    if it ==0 and n_group_first:
-        n_group_here=n_group_first
-        
-    # Create one node per job
-    n_jobs_this_time = opts.n_samples_per_job
-    if it ==it_start:
-        n_jobs_this_time = n_initial
-    indx_max = int((1.0*n_jobs_this_time)/n_group_here)
-    if indx_max*n_jobs_this_time < n_group_here:
-        indx_max+=1
-    if not(it==it_start and opts.first_iteration_jumpstart):   # if on first iteration ,don't do this for jumpstart
-        if opts.ile_group_subdag:
-            # Write the subdag
-            my_ile_subdag_name = subdag_dir + "/subdag_ILE_{}".format(it)
-            my_ile_subdag = DAG(log=os.getcwd())
-            add_batch_ILE_nodes_to_dag(my_ile_subdag, ile_job, None, None, indx_max, n_group_here, it, n_retries=opts.ile_retries,it_start=-1)
-            my_ile_subdag.set_dag_file(my_ile_subdag_name)
-            my_ile_subdag.write_concrete_dag()
-            my_ile_subdag_name += ".dag"
-            print(" ILE subdag plan ", my_ile_subdag_name)
-            # Create a high-level node for the subdag in this dag
-            main_subdag =  DAG(my_ile_subdag_name)
-            main_analysis_node = main_subdag.create_node()
-            main_analysis_node.retry = 0  # currently low-level RETRY takes care of things ... beware RETRY will re do all nodes!
-            if parent_fit_node:
-                main_analysis_node.add_parent(parent_fit_node)
-            con_node.add_parent(main_analysis_node)
-            dag.add_node(main_analysis_node)
-        else:
-            add_batch_ILE_nodes_to_dag(dag, ile_job, parent_fit_node, con_node, indx_max, n_group_here, it, n_retries=opts.ile_retries,it_start=it_start,convert_psd_node_list=convert_psd_node_list,node_list_dict=ile_node_list_per_iteration)
-#       for event in np.arange(indx_max): #np.arange(n_jobs_this_time):
-#         # Add task per ILE operation
-#         ile_node = Node(ile_job)
-# #        ile_node.set_priority(JOB_PRIORITIES["ILE"])
-#         ile_node.retry = opts.ile_retries
-#         ile_node.add_variable("macroevent", event*n_group_here)
-#         ile_node.add_variable("macrongroup", n_group_here)
-#         ile_node.add_variable("macroiteration", it)
-#         if not(parent_fit_node is None):
-#             ile_node.add_parent(parent_fit_node)
-#         if it == it_start:
-#             for node in convert_psd_node_list:  # for every PSD conversion job, make sure PSD is present before we run the first iteration!
-#                 ile_node.add_parent(node)
-#         con_node.add_parent(ile_node) # consolidate depends on all of the individual jobs
-#         dag.add_node(ile_node)
-#         ile_node_list_per_iteration[it].append(ile_node)
-
-    if puff_args and puff_cadence:
-     if it>it_start and it <= puff_max_it  and (it-1)%puff_cadence ==0:  # we made a puffball last iteration, so run it through ILE now
-        print(" ILE jobs for puffball on iteration ", it)
-        add_batch_ILE_nodes_to_dag(dag, ilePuff_job, parent_fit_node, con_node, indx_max, n_group_here, it, n_retries=opts.ile_retries,node_list_dict=ile_node_list_per_iteration)
-        # for event in np.arange(indx_max):
-        #     ile_node = Node(ilePuff_job)  # only difference is here: uses puffball, which by construction is the same size/ perturbed points
-        #     ile_node.retry = opts.ile_retries
-        #     ile_node.add_variable("macroevent", event*n_group_here)
-        #     ile_node.add_variable("macrongroup", n_group_here)
-        #     ile_node.add_variable("macroiteration", it)
-        #     if not(parent_fit_node is None):
-        #         ile_node.add_parent(parent_fit_node)
-        #     con_node.add_parent(ile_node) # consolidate depends on all of the individual jobs
-        #     dag.add_node(ile_node)
-        #     ile_node_list_per_iteration[it].append(ile_node)
-        
-    if fetch_args:
-      if not(it==it_start and opts.first_iteration_jumpstart):   # if on first iteration ,don't do this for jumpstart 
-        print(" Fetching active for iteration ", it)  # must fetch before we can do the job
-        fetch_node = Node(fetch_job)
-        fetch_node.retry = opts.ile_retries
-        fetch_node.add_variable("macroiteration", it)
-        if not(parent_fit_node is None):
-            fetch_node.add_parent(parent_fit_node)
-        parent_fit_node=fetch_node
-        dag.add_node(fetch_node)
-
-        print(" ILE jobs for fetch on iteration ", it)
-        add_batch_ILE_nodes_to_dag(dag, ileFetch_job, parent_fit_node, con_node, indx_max, n_group_here, it, n_retries=opts.ile_retries,node_list_dict=ile_node_list_per_iteration)
-        # for event in np.arange(indx_max):
-        #     ile_node = Node(ileFetch_job)  # only difference is here: uses fetch. ASSUME it is the same size (!!) -- should make this controllable
-        #     ile_node.retry = opts.ile_retries
-        #     ile_node.add_variable("macroevent", event*n_group_here)
-        #     ile_node.add_variable("macrongroup", n_group_here)
-        #     ile_node.add_variable("macroiteration", it)
-        #     if not(parent_fit_node is None):
-        #         ile_node.add_parent(parent_fit_node)
-        #     con_node.add_parent(ile_node) # consolidate depends on all of the individual jobs
-        #     dag.add_node(ile_node)
-        #     ile_node_list_per_iteration[it].append(ile_node)
-            
-    # add con job
-    dag.add_node(con_node)
-    dag.add_node(unify_node)
-    
-    # Create fit node, which depends on consolidate node
-    cip_job = cip_job_list[it]
-#    print(it, cip_job,creating_subdag_arg_list)
-    last_cip_subdag = False
-    if isinstance(cip_job,list):
-        cip_worker_job = cip_job[1]
-        cip_job=cip_job[0]
-    elif isinstance(cip_job, str):
-        last_cip_subdag = True
-        print(" ==> attempting indefinite convergence subdag : {} <== ".format(it))
-        # Subdag procedure, with indefinite convergence being requested
-        # Write modified CIP arg string to file!
-        #   - problem, need to know which CIP arg list to use.  Assume list exists
-        subdag_args_now = creating_subdag_arg_list[0]; creating_subdag_arg_list = creating_subdag_arg_list[1:]
-        cip_args_here = '{} '.format(opts.n_iterations_subdag_max) + subdag_args_now
-        fname_subdag_cip_args = "iteration_{}_cip/args_cip.txt".format(it)
-        with open(fname_subdag_cip_args,'w') as f:
-            f.write(cip_args_here)
-        # Create dag inside directory, as usual,  pointing to existing run (note grid size issue problem)
-        cmd = "create_event_parameter_pipeline_BasicIteration --use-full-submit-paths --first-iteration-jumpstart "  # we don't use overlap-grid-0.xml.gz !
-        # note we will use subdags, and do this actively.  The 0th iteration needs a SPECIFIC grid though!
-        cmd += " --ile-n-events-to-analyze {} --input-grid {} --ile-exe {} --ile-args {} ".format(opts.ile_n_events_to_analyze,opts.working_directory+"/overlap-grid-0.xml.gz".format(it),opts.ile_exe, opts.ile_args)
-        cmd += " --cip-args {}/iteration_{}_cip/args_cip.txt ".format(opts.working_directory,it)
-        if opts.convert_args:
-            cmd += "  --convert-args {} ".format(opts.convert_args) # passthrough, important for tides
-        cmd += " --n-samples-per-job {} ".format(opts.n_samples_per_job) 
-        cmd += " --neff-threshold {} ".format(opts.neff_threshold)
-        cmd += " --general-request-disk {} --ile-request-disk {} --cip-request-disk {} ".format(opts.general_request_disk,opts.ile_request_disk,opts.cip_request_disk)
-        cmd += " --request-memory-ILE {} --request-memory-CIP {} ".format(opts.request_memory_ILE,opts.request_memory_CIP)
-        if opts.use_eccentricity:
-            cmd += " --use-eccentricity "
-            if opts.use_meanPerAno:
-                cmd += " --use-meanPerAno "
-        if opts.cip_explode_jobs:
-            cmd += " --cip-explode-jobs {} ".format(opts.cip_explode_jobs)
-        if opts.cip_explode_jobs_dag:   # should be deafult
-            cmd += " --cip-explode-jobs-dag "  
-        if opts.cip_explode_jobs_subdag:   # should be deafult
-            cmd += " --cip-explode-jobs-subdag "  
-        if opts.request_gpu_ILE:
-            cmd += " --request-gpu-ILE "  # DAG writer needs to make appropririate requests
-        if opts.request_xpu_ILE:
-            cmd += " --request-xpu-ILE "  # DAG writer needs to make appropririate requests
-        if opts.use_tabular_eos_file:
-            cmd += " --use-tabular-eos-file "
-        if opts.ile_retries:
-            cmd += " --ile-retries {} ".format(opts.ile_retries)
-        if opts.general_retries:
-            cmd += " --general-retries {} ".format(opts.general_retries)
-        if opts.use_singularity:
-            cmd += " --use-singularity "
-        if opts.cache_file:
-            cmd += " --cache-file {} ".format(opts.cache_file)
-        if opts.frames_dir:
-            cmd += " --frames-dir {} ".format(opts.frames_dir)
-        if opts.ile_runtime_max_minutes:
-            cmd += " --ile-runtime-max-minutes {} ".format(opts.ile_runtime_max_minutes)
-        if opts.ile_condor_commands:
-            cmd += " --ile-condor-commands {} ".format(opts.ile_condor_commands)
-        if opts.use_osg:
-            cmd += " --use-osg "
-            if opts.use_osg_cip:
-                cmd += " --use-osg-cip "
-            if opts.use_cvmfs_frames:
-                cmd += " --use-cvmfs-frames "
-            if opts.use_osg_simple_requirements:
-                cmd +=  " --use-osg-simple-requirements "
-            if opts.condor_local_nonworker:
-                cmd += " --condor-local-nonworker "
-            if opts.condor_local_nonworker_igwn_prefix:
-                cmd += " --condor-local-nonworker-igwn-prefix "
-            if opts.condor_nogrid_nonworker:
-                cmd += " --condor-nogrid-nonworker "
-            if opts.transfer_file_list:
-                cmd += " --transfer-file-list {} ".format(opts.transfer_file_list)
-        if opts.test_args:
-            import re
-            test_args_optin = "X " + test_args  # copy not reference
-            test_args_optin = re.sub('--always-succeed', '', test_args_optin)  # tests must operate
-            # remove iteration threshold, so they ALWAYS operate
-            test_args_optin = re.sub('--iteration-threshold [1-9] ', '', test_args_optin)
-        else:
-            test_args_optin = "X  --parameter m1 --method lame "
-        if opts.puff_args and opts.puff_cadence:
-            # always puff in the final stage, it can be dangerous not to.  ALL iterations
-            cmd += " --puff-exe {} --puff-args {} --puff-cadence {} --puff-max-it {}  ".format(opts.puff_exe, opts.puff_args, opts.puff_cadence,opts.n_iterations_subdag_max) 
-        fname_subdag_test_args = opts.working_directory+"/iteration_{}_cip/args_test.txt".format(it)
-        with open(fname_subdag_test_args,'w') as f:
-            f.write(test_args_optin)
-        cmd+= " --test-args {}/iteration_{}_cip/args_test.txt ".format(opts.working_directory,it)
-        cmd += " --working-directory {}/iteration_{}_cip/ ".format(opts.working_directory,it)
-        cmd += " --n-iterations {} ".format(opts.n_iterations_subdag_max) # default will be 10 iterations, but argument controls
-        # 
-        print(" SUBDAG COMMAND : ", cmd)
-        os.system(cmd)
-        # link the *input* grid in, overwriting the (temporary, placeholder) grid used to create the workflow
-        cmd = "ln -sf {}/overlap-grid-{}.xml.gz iteration_{}_cip/overlap-grid-0.xml.gz".format(opts.working_directory,it,it)
-        os.system(cmd)
-
-        main_subdag =  DAG("{}/iteration_{}_cip/marginalize_intrinsic_parameters_BasicIterationWorkflow.dag".format(opts.working_directory,it))  # assumes subdag is created
-
-        # Create viable job
-        main_analysis_node = main_subdag.create_node()
-        main_analysis_node.add_parent(parent_fit_node)
-        main_analysis_node.retry = opts.general_retries
-
-        # Add post script OR NODE to fetch information from the job
-        #   - problem: dagman jobs don't allow post scripts usually, may need to write this manually
-        fname_fetch = opts.working_directory+"/"+"internal_fetch_from_{}.json".format(it)
-        my_dict = {"method":"native", "source":opts.working_directory+"/iteration_{}_cip".format(it),"n_max":3000}
-        with open(fname_fetch,'w') as f:
-            json.dump(my_dict,f)
-        fetch_subdag_args =" --inj-file-out overlap-grid-{}.xml.gz ".format(it+1)
-        fetch_subdag_args += " --input-json {} ".format(fname_fetch)
-        fetch_subdag_job, fetch_subdag_job_name = dag_utils.write_puff_sub(tag='FETCH_{}_subdag'.format(it),log_dir=opts.working_directory+"/"+"iteration_{}_cip/logs/".format(it),arg_str=fetch_subdag_args,request_memory=opts.request_memory_ILE,input_net=None,output=opts.working_directory+'/overlap-grid-$(macroiterationnext).xml.gz',out_dir=opts.working_directory,exe=dag_utils.which("util_FetchExternalGrid.py"),universe=local_worker_universe,no_grid=no_worker_grid)
-        fetch_subdag_job.add_condor_cmd('request_disk',opts.general_request_disk)
-        fetch_subdag_job.write_sub_file()
-
-        # Because later we are going to take care of the iteration crap and parents for the 
-        dag.add_node(main_analysis_node)
-        cip_node= main_analysis_node
-        cip_node.add_parent(unify_node)
-        # increment definitions, so the code later works fine
-        unify_node=main_analysis_node
-        cip_job = fetch_subdag_job  # we will create this as a job and make its parent equal to above
-
-        # Create link to HARVEST ALL COMPOSITE DATA generated during subdag operation, so we don't need to regenerate it
-        cmd_fetch_composite = "ln -sf {}/iteration_{}_cip/all.net {}/bonus_subdag_{}.composite ".format(opts.working_directory, it,opts.working_directory,it)
-        os.system(cmd_fetch_composite)
-
-
-        # Create link to FEED  COMPOSITE DATA generated during top-level run into low-level directory
-        # Make sure we are only linking *prior* iterations information, for clarity of provenance
-        for indx in np.arange(it+1): # we still do one ILE iteration beforehand at top level, rather than doing it in subdag, because CIP follows ILE
-            cmd_fetch_composite = "ln -sf {}/consolidated_{}.composite {}/iteration_{}_cip/input_{}.composite   ".format(opts.working_directory, indx,  opts.working_directory, it,indx)
-            os.system(cmd_fetch_composite)
-
-
-        print(" ==> end setup convergence subdag : {} <== ".format(it))
-        # Need postprocessing, based on previous stage
-
-        # if isinstance(cip_job_list[it-1],list):
-        #     cip_job = cip_job_list[it-1][0]
-        # else:
-        #     cip_job= cip_job_list[it-1]
-
-    # Node generation for cip job tasks
-    if not(isinstance(cip_job,str)):
-        fit_node = Node(cip_job)
-    else:
-        fit_node = main_analysis_node
-    fit_node.add_variable("macroiteration", it)
-    fit_node.add_variable("macroiterationnext", it+1)
-    fit_node.set_category("CIP")
-    fit_node.add_parent(unify_node)  # only fit if we have results from the previous iteration
-    fit_node.retry = opts.general_retries
-    # skip CIP node creation if we have worker nodes! Note this will BREAK GP, so also use it for GP
-    if (opts.cip_explode_jobs is None) or (last_cip_subdag) or ('fit-method gp' in cip_args_base):
-        dag.add_node(fit_node) 
-        parent_fit_node = fit_node
-    else:
-        parent_fit_node = unify_node
-    if not(opts.cip_explode_jobs is None) and not(last_cip_subdag): # don't explode if we just made a subdag!
-        print(" Exploding workers out ")
-        # Create job to consolidate worker outputs
-        join_node =Node(join_cip_job)
-        join_node.add_variable("macroiteration", it)
-        join_node.add_variable("macroiterationnext", it+1)
-        join_node.set_category("join_cip")
-        join_node.retry = opts.general_retries
-        
-        # Create exploded worker job nodes
-        if opts.cip_explode_jobs is None:
-            worker_node =Node(cip_worker_job)
-            worker_node.add_variable("macroiteration", it)
-            worker_node.add_variable("macroiterationnext", it+1)
-            worker_node.set_category("CIP_worker")
-            worker_node.add_parent(parent_fit_node)  # only fit if we have results from the previous iteration
-            worker_node.retry = opts.general_retries
-            join_node.add_parent(worker_node)  # make sure to add worker node as parent
-            dag.add_node(worker_node)
-        elif opts.cip_explode_jobs_subdag:
-            n_explode= int(opts.cip_explode_jobs)
-            # write appropriate subdag
-            subdag_name = dag_utils.write_CIP_single_iteration_subdag(cip_worker_job, it, str(it), subdag_dir,n_explode=n_explode)
-            print(" Subdag plan ", subdag_name)
-            # Create node
-            main_subdag =  DAG(subdag_name)
-            main_analysis_node = main_subdag.create_node()
-            main_analysis_node.set_category("cip_subdag")
-            main_analysis_node.retry = 1000 # some enormous number, should be user specified
-            # add links
-            main_analysis_node.add_parent(parent_fit_node)
-            join_node.add_parent(main_analysis_node)
-            # register subdag, so we can add SCRIPT POST at the end
-            cip_subdag_nodes[it] = main_analysis_node
-            cip_subdag_work_targets[it] = opts.n_samples_per_job
-            # add to DAG
-            dag.add_node(main_analysis_node)
-        else:
-            n_explode = opts.cip_explode_jobs
-            # if we are on the last iteration and we want to use more exploded jobs now, explode more jobs
-            if opts.cip_explode_jobs_last and (it == opts.n_iterations-1): # last iteration
-                print("   Last iteration explode size ", opts.cip_explode_jobs_last)
-                n_explode = opts.cip_explode_jobs_last
-            for indx in np.arange(n_explode):
-                worker_node =Node(cip_worker_job)
-                worker_node.add_variable("macroiteration", it)
-                worker_node.add_variable("macroiterationnext", it+1)
-                worker_node.set_category("CIP_worker")
-                worker_node.add_parent(parent_fit_node)  # only fit if we have results from the previous iteration
-                worker_node.retry = opts.general_retries
-                join_node.add_parent(worker_node)  # make sure to add worker node as parent
-                dag.add_node(worker_node)
-        dag.add_node(join_node)
-        parent_fit_node=join_node
-
-    # Check if puffball being created, and if so create it *immediately* after CIP job creates the overlap-grid file
-    if puff_args and puff_cadence:
-      if it > -1 and it <= puff_max_it and it%puff_cadence ==0:
-        print(" Puffball for iteration ", it)
-        puff_node = Node(puff_job)
-        puff_node.add_variable("macroiteration", it)
-        puff_node.add_variable("macroiterationnext", it+1)
-        puff_node.set_category("PUFF")
-        puff_node.retry = opts.general_retries
-        if not (parent_fit_node is None):
-            puff_node.add_parent(parent_fit_node)  # only fit if we have results from the previous iteration
-        dag.add_node(puff_node) 
-        
-        parent_fit_node = puff_node
-
-        
-
-    # Create convert node, which depends on fit node, *if* tests are being performed
-    if opts.test_args:
-        convert_node=Node(convert_job)
-        convert_node.add_variable("macroiteration", it+1)  # convert the NEWLY-PRODUCED iteration
-        convert_node.add_variable("macroiterationlast", it)  # use log files in the previous directory
-        convert_node.add_parent(parent_fit_node)
-        convert_node.set_category("CONVERT")
-        convert_node.retry = opts.general_retries
-        dag.add_node(convert_node)
-
-        parent_fit_node = convert_node
-
-
-
-    if opts.test_args and it>0:
-        # Cannot run test on first iteration
-        test_node = Node(test_job)
-        test_node.add_variable("macroiteration", it+1)   # test the NEWLY-PRODUCED iteration against the old
-        test_node.add_variable("macroiterationlast", it)
-        test_node.add_parent(parent_fit_node)
-        test_node.set_category("CONVERGE")
-        dag.add_node(test_node)
-        test_node_list.append(test_node)
-
-        parent_fit_node=test_node
-
-
-# Create export stages for extrinsic samples
-last_node= parent_fit_node # backstop
-if opts.last_iteration_extrinsic:
-    # Check if 'it' is defined : it will not always be, if done later
-    if not ('it' in globals()):
-        it = opts.n_iterations  # last iteration
-    elif it < opts.n_iterations and 'Z' in cip_args_prefixes:
-        it += 1   # deal with an offset/fencepost issue that can happen when using subdags for some reason
-    print(" Extrinsic information being pulled from iteration {} ".format(it))
-
-    # Create nodes for followup tasks
-    cat_node = Node(cat_job)
-    cat_node.add_variable("macroiteration", it)  # needed to identify log file location
-
-    # Perform final ILE run on all points, saving samples
-    # Need to perform number of events CONSISTENT WITH TARGET SAMPLE SIZE
-    #    - *not* always same as number of ILE events being analyzed
-    #    - *assumes* grid files have sufficiently large numbers of samples to allow this! (as in many other cases)
-    n_jobs_extrinsic = int(opts.last_iteration_extrinsic_nsamples/(1.0*n_group))
-
-    if opts.last_iteration_extrinsic_time_resampling:
-        convert_node = Node(batchConvertExtr_job)
-        convert_node.retry = opts.ile_retries  # this can fail too
-        convert_node.add_variable("macroiteration",it)  # needed so we find the correct data to read
-
-
-    for event in np.arange(n_jobs_extrinsic):
-        # Add task per ILE operation
-        ile_node = Node(ileExtr_job)
-#        ile_node.set_priority(JOB_PRIORITIES["ILE"])
-        ile_node.retry = opts.ile_retries
-        ile_node.add_variable("macroevent", event*n_group)
-        ile_node.add_variable("macrongroup", n_group)
-        ile_node.add_variable("macroiteration", it)
-        if not(parent_fit_node is None):
-            ile_node.add_parent(parent_fit_node)
-        dag.add_node(ile_node)
-#        ile_node_list_per_iteration[it].append(ile_node)
-
-        # Add convert and resample task *for each output file*
-        if opts.last_iteration_extrinsic_time_resampling:
-            # establish parent-child relationship
-            convert_node.add_parent(ile_node)
-        elif opts.last_iteration_extrinsic_batched_convert:
-            convert_node = Node(batchConvertExtr_job)
-            convert_node.add_variable("macroevent", event*n_group)
-            convert_node.add_variable("macrongroup", n_group)
-            convert_node.add_variable("macroiteration", it)
-            convert_node.retry = opts.ile_retries  # this can fail too
-            convert_node.add_parent(ile_node)
-            dag.add_node(convert_node)
-            cat_node.add_parent(convert_node)
-        else:
-          for indx in np.arange(n_group):
-            convert_node = Node(convertExtr_job)
-            convert_node.add_variable("macroevent", event*n_group)
-            convert_node.add_variable("macroiteration", it)
-            convert_node.add_variable("macroindx",indx)
-            convert_node.retry = opts.ile_retries  # this can fail too
-            convert_node.add_parent(ile_node)
-
-            resample_node = Node(resample_job)
-            resample_node.add_variable("macroevent", event*n_group)
-            resample_node.add_variable("macroiteration", it)
-            resample_node.add_variable("macroindx",indx)
-            resample_node.retry = opts.ile_retries  # these occasionally fail for stupid reasons - nodes missing software, etc
-            resample_node.add_parent(convert_node)
-            
-            # Make cat job 
-            cat_node.add_parent(resample_node)
-            cat_node.retry = opts.ile_retries  # this can fail too
-
-            # Add nodes
-            dag.add_node(convert_node)
-            dag.add_node(resample_node)
-   
-    if not(opts.last_iteration_extrinsic_time_resampling):
-        dag.add_node(cat_node)
-        last_node=cat_node
-    else:
-        dag.add_node(convert_node)   # this is the time resampling task
-        last_node=convert_node
-
-# Create rotation job (if extrinsic available)
-if opts.frame_rotation and opts.last_iteration_extrinsic:
-    rotate_node = Node(rotate_job)
-    rotate_node.set_category("ROTATE")
-    rotate_node.add_parent(last_node)
-    last_node=rotate_node
-    dag.add_node(rotate_node)
-
-# Creating calibration uncertainty job
-if opts.calibration_reweighting:
-    if opts.bilby_ini_file:
-        pickle_node = Node(pickle_job)
-        pickle_node.add_variable("macroiteration",it)
-        pickle_node.set_category("PICKLE")
-        pickle_node.add_parent(last_node)
-        last_node=pickle_node
-        dag.add_node(pickle_node)
-    # Do single job if only one needed, otherwise loop
-    if not(opts.calibration_reweighting_batchsize):
-        calibration_node = Node(calibration_job)
-        calibration_node.add_variable("macroiteration",it)
-        calibration_node.set_category("CALIBRATION")
-        calibration_node.add_parent(last_node)
-        calibration_node.retry = opts.general_retries
-        dag.add_node(calibration_node)
-    else:
-        # Make job to reweight everything, after producing samples
-        reweight_merge_node =    Node(reweight_merge_job)
-        reweight_merge_node.add_variable("macroiteration",it)
-
-        # Make individual calibration nodes
-        for start_indx in np.arange(start=0,stop=opts.last_iteration_extrinsic_nsamples,step=opts.calibration_reweighting_batchsize):
-            end_indx = start_indx + opts.calibration_reweighting_batchsize  # end index is not included, python-style indexing so no fencepost problems
-            calibration_node = Node(calibration_job)
-            calibration_node.add_variable("macroiteration",it)
-            calibration_node.add_variable("macrostartidx",start_indx)
-            calibration_node.add_variable("macroendidx",end_indx)
-            calibration_node.set_category("CALIBRATION")
-            calibration_node.retry = opts.general_retries
-            calibration_node.add_parent(last_node)
-            reweight_merge_node.add_parent(calibration_node)
-            dag.add_node(calibration_node)
-            
-        # add the merge node.  Thouls be the last node
-        dag.add_node(reweight_merge_node)
-    
-if opts.comov_distance_reweighting:
-    convert_ascii_node = Node(convert_ascii_job)
-    convert_ascii_node.add_variable("macroiteration",it)
-    convert_ascii_node.set_category("CONVERT_ASCII")
-    convert_ascii_node.add_parent(last_node)
-    dag.add_node(convert_ascii_node)
-    distance_node = Node(distance_job)
-    distance_node.add_variable("macroiteration",it)
-    distance_node.set_category("COMOV")
-    distance_node.add_parent(convert_ascii_node)
-    last_node=distance_node
-    dag.add_node(distance_node)
-# Create final node for overall plots.  (Note: default setup is designed to enable plots of the last two iterations *at each step* but this seems like overkill)
-if plot_args:
-        # Cannot run test on first iteration
-        plot_node = Node(plot_job)
-        plot_node.add_variable("macroiteration", it)
-        plot_node.add_variable("macroiterationlast", it-1)
-        plot_node.add_parent(parent_fit_node)
-        plot_node.set_category("PLOT")
-        dag.add_node(plot_node)
-
-dag_name="marginalize_intrinsic_parameters_BasicIterationWorkflow"
-dag.set_dag_file(dag_name)
-dag.write_concrete_dag()
-
-# if convergence tests are active, add the ABORT-DAG-ON operation to the dag we just wrote, and STOP THE DAG WITH SUCCESS if we test ok
-# https://htcondor.readthedocs.io/en/latest/users-manual/dagman-workflows.html
-if opts.test_args:
-    my_names = [x._CondorDAGNode__md5name for x in test_node_list]
-    with open(dag_name+".dag",'a') as f:
-        for name in my_names:
-            f.write("ABORT-DAG-ON "+ name + " 1 RETURN 0 \n")
-
-my_names_con = [x._CondorDAGNode__md5name for x in con_node_list]
-my_names_unify = [x._CondorDAGNode__md5name for x in unify_node_list]
-indx =0 # start with consolidated_0
-if opts.first_iteration_jumpstart:
-    my_names_unify = my_names_unify[1:] # drop first element
-    indx=1
-with open("confirm_exist.sh",'w') as f:
-    f.write("""#! /bin/bash
-[ -s $1 ]
-""")
-st=os.stat("confirm_exist.sh"); os.chmod("confirm_exist.sh", st.st_mode | stat.S_IEXEC)
-with open(dag_name+".dag",'a') as f:
-    for indx_here, name in enumerate(my_names_con):
-        if indx_here >=indx: # skip 0th entry if appropriate
-            f.write("SCRIPT POST "+ name + " {}/confirm_exist.sh {}/consolidated_{}.composite \n".format(opts.working_directory,opts.working_directory,indx_here))
-    for name in my_names_unify:
-        f.write("SCRIPT POST "+ name + " {}/confirm_exist.sh {}/all.net \n".format(opts.working_directory,opts.working_directory))
-    # add dot file, for dag visualization
-    f.write('DOT my_dag_vis.dot \n')
-
-    
-
-# CIP adaptive batch size per iteration.  BACKSTOP with 'default' script to check work by lilne-counting (dangerous!)
-if opts.cip_explode_jobs_subdag:
-    if opts.cip_post_exe is None:
-    # script to count lines in overlap-grid files. Note this does NOT check n_eff directly ... should count n_eff instead based on +annotation.dat files
-        with open("cip_check_work.sh", 'w') as f:
-            f.write("""#! /bin/bash
-LINES=`igwn_ligolw_print -c mass1 $1/overlap-grid*.xml.gz | wc -l `
-[ ${LINES} -ge $2 ]
-""")
-        st=os.stat("cip_check_work.sh"); os.chmod("cip_check_work.sh", st.st_mode | stat.S_IEXEC)
-        opts.cip_post_exe = opts.working_directory + "/cip_check_work.sh"
-    # add POST SCRIPT to subdag. Note it will USUALLY FAIL and get retried A LARGE NUMBER OF TIMES, until we complete work!
-    with open(dag_name+".dag",'a') as f:
-        for it in cip_subdag_nodes:
-            node = cip_subdag_nodes[it]
-            f.write("SCRIPT POST " + node._CondorDAGNode__md5name +  " {} iteration_{}_cip {} \n".format(opts.cip_post_exe, it, cip_subdag_work_targets[it])) 
-
-# ILE adaptive batch size per iteration: If ile_post_exe exists, write the necessary items
-# WARNING: currently framed as a post script that is run after EVERY job
-if opts.ile_post_exe:
-    with open(dag_name+".dag",'a') as f:
-        for it in ile_node_list_per_iteration:
-            for node in ile_node_list_per_iteration[it]:
-                # write the script post line. Add in the worklevel target
-                n_target = n_initial
-                if opts.n_samples_per_job_threshold:
-                    n_target = opts.n_samples_per_job_threshold
-                f.write("SCRIPT POST " + node._CondorDAGNode__md5name + " $(JOBID) $(RETURN) " + str(it) + " " + str(n_target) + "\n")
-            
-