Source code for libpyhat.IO.io_supercam_pds

import datetime
import os
import pickle
from pathlib import Path

import numpy as np
import pandas as pd
from astropy.io import fits
from joblib import Parallel, delayed

from libpyhat.spectral_data import SpectralData


# Reads individual fits files. This is the function that is run in parallel
# by supercam_batch

[docs]
def read_supercam_fits(i, filelist, shot_to_shot, headerkeys):
    # suppress warnings
    import warnings

    warnings.simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
    # pd.options.mode.chained_assignment = None

    # get the file from the list for this instance
    file = filelist[i]
    filename = os.path.basename(file)
    print(
        str(round(i / len(filelist) * 100, 1)) + "% - " + filename
    )  # print progress message

    # open the file and get the header
    with fits.open(file) as fitsfile:
        header = fitsfile["PRIMARY"].header
        try:
            # if we're not doing shot to shot, get the mean spectrum
            if shot_to_shot is False:
                spect = pd.DataFrame(fitsfile[7].data)["Mean"]
                # the dtype here is important because it takes care of byte
                # order issues from fits
                spect = pd.DataFrame(np.array(spect.values, dtype=float)).T
            else:  # else get the individual shots spectra
                spect = pd.DataFrame(fitsfile[6].data)
                # the dtype here is important because it takes care of byte
                # order issues from fits
                spect = pd.DataFrame(np.array(spect, dtype=float)).T

            wvls = np.array(
                fitsfile[8].data["Wavelength"], dtype=float
            )  # get the wavelengths of the
            # spectral channels

            spect.columns = wvls  # set columns to wvl values
            spect.columns = pd.MultiIndex.from_arrays(
                [["wvl"] * len(spect.columns), spect.columns.values]
            )  # make columns multiindexed
            spect[("meta", "files")] = filename  # add a column with the file name
            spect[("meta", "spectrum_sum")] = np.sum(spect["wvl"].values)  #
            # calculate spectrum sum and add as a column
            # add header info to the spectrum data frame
            for key in headerkeys:
                try:
                    spect[("meta", key)] = header[key]
                except:
                    pass

            try:
                datarow = spect.reset_index(drop=True)
                datarow = datarow.loc[:, ~datarow.columns.duplicated()].copy()  # remove
                # duplicate columns
            except:
                print("Something went wrong!")
            return datarow
        except:
            print("something went wrong, returning empty data frame")
            return pd.DataFrame()




[docs]
def supercam_batch(
    searchdirs,
    searchstring="*cl1*.fits",
    headerkeys=None,
    shot_to_shot=False,
    to_csv=False,
    to_pickle=False,
    data_name="SuperCam",
    outpath="",
    outfile=None,
):
    if headerkeys is None:  # If no header keys are provided, use a default set
        headerkeys = [
            "TARGETNM",
            "FN_SCLOK",
            "ENV_COM",
            "INSTRENV",
            "FOC_DIST",
            "USERDIST",
            "LASPOWER",
            "UVROWS",
            "VIOROWS",
            "GREEROWS",
            "ORANROWS",
            "REDROWS",
            "N_UV",
            "N_VIO",
            "N_GRE",
            "N_ORA",
            "N_RED",
        ]

    filelist = []  # create empty list
    for dir_i in searchdirs:  # step through each dir in list
        dir = Path(dir_i)
        filelist.append(
            list(dir.rglob(searchstring))
        )  # search recursively for files matching searchstr

    filelist = np.sort(
        np.hstack(filelist)
    )  # stack multiple filelist in case of multiple searchdirs

    # read the individual files in parallel
    print("Reading " + str(len(filelist)) + " files...")
    results = Parallel(n_jobs=-1)(
        delayed(read_supercam_fits)(i, filelist, shot_to_shot, headerkeys)
        for i in range(len(filelist))
    )

    # results are a list of 1-row dataframes. Now concatenate them
    data = pd.concat(results, ignore_index=True)
    data = data[["meta", "wvl"]]  # make metadata columns come first

    if outfile is None:
        todays_date = str(datetime.datetime.today()).split()[0].replace("-", "_")
        # get the date to label output file
        if shot_to_shot is False:
            outfile = data_name + "_" + todays_date
        else:
            outfile = data_name + "_" + todays_date + "_shots"

    if to_pickle is True:  # save to a pickle file
        print("Saving to " + outpath + "//" + outfile + ".pkl")
        pickle_file = open(outpath + "//" + outfile + ".pkl", "wb")
        pickle.dump(data, pickle_file)
        pickle_file.close()

    if to_csv is True:  # save to a csv file
        data.to_csv(outpath + "//" + outfile + ".csv")

    return SpectralData(
        data, name=data_name, spect_label="wvl", meta_label="meta", comp_label=None
    )  # return a PyHAT SpectralData object