Miri bg cleaning #280

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

AarynnCarter merged 2 commits into develop from miri_bg_cleaning

Jan 6, 2026

spaceKLIP/imagetools.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -2144,6 +2144,235 @@ def fix_bad_pixels_interp2d(self, @@
             pass
+        def clean_background(self,
+                             sigma=3.,
+                             gaussian_smoothing={'skip':True, 'sigma':1.},
+                             types=['SCI_BG', 'REF_BG'],
+                             subdir='bgcleaned'):
+            """
+            Clean data from any contaminating sources in background observations using sigma clipping.
+            Used for MIRI background observations.
+            Parameters
+            ----------
+            sigma : float, optional
+                Sigma clipping threshold for background cleaning. The default is 3.
+            gaussian_smoothing : dict, optional
+                Dictionary with keyword arguments to use scipy.ndimage.gaussian_filter.
+                This will smooth the backgrounds before the cleaning process. If 'skip' is True,
+                no Gaussian smoothing will be applied. The default is {'skip':True, 'sigma':1.}.
+            types : list of str, optional
+                List of data types for which background cleaning shall be applied.
+                The default is ['SCI_BG', 'REF_BG'].
+            subdir : str, optional
+                Name of the directory where the data products shall be saved. The
+                default is 'bgcleaned'.
+            Returns
+            -------
+            None.
+            """
+            # Set output directory.
+            output_dir = os.path.join(self.database.output_dir, subdir)
+            if not os.path.exists(output_dir):
+                os.makedirs(output_dir)
+            # Loop through concatenations.
+            for i, key in enumerate(self.database.obs.keys()):
+                log.info('--> Concatenation ' + key)
+                for data_type in types:
+                    log.info('--> Starting cleaning process for ' + data_type + ' files')
+                    # find science background files
+                    ww_bg = np.where(self.database.obs[key]['TYPE'] == data_type)[0]
+                    # Loop through science background files.
+                    if len(ww_bg) == 2:
+                        bg_data = []
+                        for j in ww_bg:
+                            # Read FITS file.
+                            fitsfile = self.database.obs[key]['FITSFILE'][j]
+                            data, erro, pxdq, head_pri, head_sci, is2d, align_shift, center_shift, align_mask, center_mask, maskoffs = ut.read_obs(fitsfile)
+                            bg_data += [data]
+                        bg_data = np.array(bg_data)
+                        img1 = np.ones_like(bg_data[0])
+                        img2 = np.ones_like(bg_data[1])
+                        # Calculate SNR maps per pixel
+                        snr_map1 = np.nanstd(bg_data[0], axis=0)
+                        snr_map2 = np.nanstd(bg_data[1], axis=0)
+                        if not gaussian_smoothing['skip']:
+                            log.info('  --> Applying Gaussian smoothing with sigma = %.2f' % gaussian_smoothing['sigma'])
+                            snr_map1 = gaussian_filter(bg_data[0], sigma=gaussian_smoothing['sigma'])
+                            snr_map2 = gaussian_filter(bg_data[1], sigma=gaussian_smoothing['sigma'])
+                        # Looping over integrations
+                        for i in range(bg_data.shape[1]):
+                            diff = (bg_data[0,i,:,:] - bg_data[1,i,:,:])/np.sqrt(snr_map1**2 + snr_map2**2)
+                            img1[i,:,:] = np.where(diff > sigma, np.nan, bg_data[0,i,:,:])
+                            img2[i,:,:] = np.where(diff < -sigma, np.nan, bg_data[1,i,:,:])
+                        cleaned_bg_data = np.array([img1, img2])
+                        # Write FITS file and PSF mask.
+                        for i,j in enumerate(ww_bg):
+                            # Read FITS file and PSF mask.
+                            fitsfile = self.database.obs[key]['FITSFILE'][j]
+                            data, erro, pxdq, head_pri, head_sci, is2d, align_shift, center_shift, align_mask, center_mask, maskoffs = ut.read_obs(fitsfile)
+                            maskfile = self.database.obs[key]['MASKFILE'][j]
+                            mask = ut.read_msk(maskfile)
+                            # Replace data with cleaned data
+                            fitsfile = ut.write_obs(fitsfile, output_dir, cleaned_bg_data[i], erro, pxdq, head_pri, head_sci, is2d,
+                                                    align_shift=align_shift, center_shift=center_shift, align_mask=align_mask,
+                                                    center_mask=center_mask, maskoffs=maskoffs)
+                            maskfile = ut.write_msk(maskfile, mask, fitsfile)
+                            # Update spaceKLIP database.
+                            self.database.update_obs(key, j, fitsfile, maskfile)
+                    else:
+                        raise NotImplementedError('Background cleaning currently only implemented if 2 background files available.')
+                # Saving unmodified files
+                log.info('--> Skipping cleaning process for non background files.')
+                nfitsfiles = len(self.database.obs[key])
+                for j in range(nfitsfiles):
+                    # Skip file types that are in the list of types.
+                    if self.database.obs[key]['TYPE'][j] not in types:
+                        # Read FITS file.
+                        fitsfile = self.database.obs[key]['FITSFILE'][j]
+                        data, erro, pxdq, head_pri, head_sci, is2d, align_shift, center_shift, align_mask, center_mask, maskoffs = ut.read_obs(fitsfile)
+                        maskfile = self.database.obs[key]['MASKFILE'][j]
+                        mask = ut.read_msk(maskfile)
+                        # Write FITS file and PSF mask.
+                        fitsfile = ut.write_obs(fitsfile, output_dir, data, erro, pxdq, head_pri, head_sci, is2d,
+                                                align_shift=align_shift, center_shift=center_shift, align_mask=align_mask,
+                                                center_mask=center_mask, maskoffs=maskoffs)
+                        maskfile = ut.write_msk(maskfile, mask, fitsfile)
+                        # Update spaceKLIP database.
+                        self.database.update_obs(key, j, fitsfile, maskfile)
+        def persistence_trimming(self,
+                                 radius_pxl=4,
+                                 ints_to_trim=2,
+                                 types=['SCI', 'REF'],
+                                 subdir='persistence_trimmed'):
+            """
+            Remove persistence artifacts from the initial N integrations by trimming
+            a circular region around the persistence location. This is useful for
+            MIRI 4QPM data where persistence artifacts from target acquisition can
+            remain in early integrations.
+            Note that for this function to work, TA images must be included in the
+            database and previous data reduction steps must have been performed to
+            determine the persistence location.
+            Parameters
+            ----------
+            radius_pxl : int, optional
+                Radius (in pixels) of the circular region around the persistence
+                location to trim/mask. The default is 4.
+            ints_to_trim : int, optional
+                Number of initial integrations to apply the persistence trimming to.
+                The default is 2.
+            types : list of str, optional
+                List of data types for which persistence trimming shall be applied.
+                The default is ['SCI', 'REF'].
+            subdir : str, optional
+                Name of the directory where the data products shall be saved. The
+                default is 'persistence_trimmed'.
+            Returns
+            -------
+            None.
+            """
+            # Set output directory.
+            output_dir = os.path.join(self.database.output_dir, subdir)
+            if not os.path.exists(output_dir):
+                os.makedirs(output_dir)
+            # Loop through concatenations.
+            for i, key in enumerate(self.database.obs.keys()):
+                log.info('--> Concatenation ' + key)
+                # Loop through FITS files.
+                nfitsfiles = len(self.database.obs[key])
+                for j in range(nfitsfiles):
+                    # Read FITS file.
+                    fitsfile = self.database.obs[key]['FITSFILE'][j]
+                    data, erro, pxdq, head_pri, head_sci, is2d, align_shift, center_shift, align_mask, center_mask, maskoffs = ut.read_obs(fitsfile)
+                    maskfile = self.database.obs[key]['MASKFILE'][j]
+                    mask = ut.read_msk(maskfile)
+                    # Skip file types that are not in the list of types.
+                    if self.database.obs[key]['TYPE'][j] in types:
+                        log.info(f'  --> {self.database.obs[key]["TYPE"][j]} Persistence trimming: ' + os.path.basename(fitsfile))
+                        # Get persistence location from TA observation.
+                        ta_key = self.database.obs[key]['TYPE'][j] + '_TA'
+                        ww_ta = np.where(self.database.obs[key]['TYPE'] == ta_key)[0]
+                        pers_loc = []
+                        log.info(f'    --> Finding persistence location in ' + ta_key)
+                        for k in ww_ta:
+                            # Match ROLL angle to find correct TA observation.
+                            if round(self.database.obs[key]['ROLL_REF'][j], 2) == round(self.database.obs[key]['ROLL_REF'][k], 2):
+                                log.info(f'      --> Matching TA found: ' + os.path.basename(self.database.obs[key]['FITSFILE'][k]))
+                                # Read TA FITS file.
+                                fitsfile_ta = self.database.obs[key]['FITSFILE'][k]
+                                data_ta, erro_ta, pxdq_ta, head_pri_ta, head_sci_ta, is2d_ta, align_shift_ta, center_shift_ta, align_mask_ta, center_mask_ta, maskoffs_ta = ut.read_obs(fitsfile_ta)
+                                # Get persistence location from TA data.
+                                pers_loc += [np.where(data_ta == np.nanmax(data_ta))[1:]]
+                        if len(pers_loc) == 0:
+                            raise ValueError('No matching TA observation found for ' + os.path.basename(fitsfile) + '. Cannot determine persistence location.')
+                        elif len(pers_loc) == 1:
+                            raise ValueError('Only one matching TA observation found for ' + os.path.basename(fitsfile) + '. Need the two TA observations to find both persistence locations.')
+                        # Check for valid number of integrations to trim.
+                        if ints_to_trim >= data.shape[0]:
+                            raise ValueError(f'Number of integrations to trim ({ints_to_trim}) is greater than or equal to total number of integrations in {self.database.obs[key]["TYPE"][j]} observations ({data.shape[0]}). \n Try again with a smaller number of integrations to trim or remove persistence trimming for {self.database.obs[key]["TYPE"][j]} observations.')
+                        # Trim persistence region in initial integrations.
+                        for nints in range(data.shape[0]):
+                            if nints < ints_to_trim:
+                                for loc in pers_loc:
+                                    for yy in range(data.shape[1]):
+                                        for xx in range(data.shape[2]):
+                                            if (yy - loc[0])**2 + (xx - loc[1])**2 < radius_pxl**2:
+                                                data[nints, yy, xx] = np.nan
+                    # Write FITS file and PSF mask.
+                    fitsfile = ut.write_obs(fitsfile, output_dir, data, erro, pxdq, head_pri, head_sci, is2d,
+                                            align_shift=align_shift, center_shift=center_shift, align_mask=align_mask,
+                                            center_mask=center_mask, maskoffs=maskoffs)
+                    maskfile = ut.write_msk(maskfile, mask, fitsfile)
+                    # Update spaceKLIP database.
+                    self.database.update_obs(key, j, fitsfile, maskfile)
         def replace_nans(self,
                          cval=0.,
                          types=['SCI', 'SCI_BG', 'REF', 'REF_BG'],
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Miri bg cleaning #280

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!