import datetime
import os
import pickle
from pathlib import Path
import numpy as np
import pandas as pd
from astropy.io import fits
from joblib import Parallel, delayed
from libpyhat.spectral_data import SpectralData
# Reads individual fits files. This is the function that is run in parallel
# by supercam_batch
[docs]
def read_supercam_fits(i, filelist, shot_to_shot, headerkeys):
# suppress warnings
import warnings
warnings.simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
# pd.options.mode.chained_assignment = None
# get the file from the list for this instance
file = filelist[i]
filename = os.path.basename(file)
print(
str(round(i / len(filelist) * 100, 1)) + "% - " + filename
) # print progress message
# open the file and get the header
with fits.open(file) as fitsfile:
header = fitsfile["PRIMARY"].header
try:
# if we're not doing shot to shot, get the mean spectrum
if shot_to_shot is False:
spect = pd.DataFrame(fitsfile[7].data)["Mean"]
# the dtype here is important because it takes care of byte
# order issues from fits
spect = pd.DataFrame(np.array(spect.values, dtype=float)).T
else: # else get the individual shots spectra
spect = pd.DataFrame(fitsfile[6].data)
# the dtype here is important because it takes care of byte
# order issues from fits
spect = pd.DataFrame(np.array(spect, dtype=float)).T
wvls = np.array(
fitsfile[8].data["Wavelength"], dtype=float
) # get the wavelengths of the
# spectral channels
spect.columns = wvls # set columns to wvl values
spect.columns = pd.MultiIndex.from_arrays(
[["wvl"] * len(spect.columns), spect.columns.values]
) # make columns multiindexed
spect[("meta", "files")] = filename # add a column with the file name
spect[("meta", "spectrum_sum")] = np.sum(spect["wvl"].values) #
# calculate spectrum sum and add as a column
# add header info to the spectrum data frame
for key in headerkeys:
try:
spect[("meta", key)] = header[key]
except:
pass
try:
datarow = spect.reset_index(drop=True)
datarow = datarow.loc[:, ~datarow.columns.duplicated()].copy() # remove
# duplicate columns
except:
print("Something went wrong!")
return datarow
except:
print("something went wrong, returning empty data frame")
return pd.DataFrame()
[docs]
def supercam_batch(
searchdirs,
searchstring="*cl1*.fits",
headerkeys=None,
shot_to_shot=False,
to_csv=False,
to_pickle=False,
data_name="SuperCam",
outpath="",
outfile=None,
):
if headerkeys is None: # If no header keys are provided, use a default set
headerkeys = [
"TARGETNM",
"FN_SCLOK",
"ENV_COM",
"INSTRENV",
"FOC_DIST",
"USERDIST",
"LASPOWER",
"UVROWS",
"VIOROWS",
"GREEROWS",
"ORANROWS",
"REDROWS",
"N_UV",
"N_VIO",
"N_GRE",
"N_ORA",
"N_RED",
]
filelist = [] # create empty list
for dir_i in searchdirs: # step through each dir in list
dir = Path(dir_i)
filelist.append(
list(dir.rglob(searchstring))
) # search recursively for files matching searchstr
filelist = np.sort(
np.hstack(filelist)
) # stack multiple filelist in case of multiple searchdirs
# read the individual files in parallel
print("Reading " + str(len(filelist)) + " files...")
results = Parallel(n_jobs=-1)(
delayed(read_supercam_fits)(i, filelist, shot_to_shot, headerkeys)
for i in range(len(filelist))
)
# results are a list of 1-row dataframes. Now concatenate them
data = pd.concat(results, ignore_index=True)
data = data[["meta", "wvl"]] # make metadata columns come first
if outfile is None:
todays_date = str(datetime.datetime.today()).split()[0].replace("-", "_")
# get the date to label output file
if shot_to_shot is False:
outfile = data_name + "_" + todays_date
else:
outfile = data_name + "_" + todays_date + "_shots"
if to_pickle is True: # save to a pickle file
print("Saving to " + outpath + "//" + outfile + ".pkl")
pickle_file = open(outpath + "//" + outfile + ".pkl", "wb")
pickle.dump(data, pickle_file)
pickle_file.close()
if to_csv is True: # save to a csv file
data.to_csv(outpath + "//" + outfile + ".csv")
return SpectralData(
data, name=data_name, spect_label="wvl", meta_label="meta", comp_label=None
) # return a PyHAT SpectralData object