Source code for libpyhat.IO.io_supercam_pds

import datetime
import os
import pickle
from pathlib import Path

import numpy as np
import pandas as pd
from astropy.io import fits
from joblib import Parallel, delayed

from libpyhat.spectral_data import SpectralData


# Reads individual fits files. This is the function that is run in parallel
# by supercam_batch
[docs] def read_supercam_fits(i, filelist, shot_to_shot, headerkeys): # suppress warnings import warnings warnings.simplefilter(action="ignore", category=pd.errors.PerformanceWarning) # pd.options.mode.chained_assignment = None # get the file from the list for this instance file = filelist[i] filename = os.path.basename(file) print( str(round(i / len(filelist) * 100, 1)) + "% - " + filename ) # print progress message # open the file and get the header with fits.open(file) as fitsfile: header = fitsfile["PRIMARY"].header try: # if we're not doing shot to shot, get the mean spectrum if shot_to_shot is False: spect = pd.DataFrame(fitsfile[7].data)["Mean"] # the dtype here is important because it takes care of byte # order issues from fits spect = pd.DataFrame(np.array(spect.values, dtype=float)).T else: # else get the individual shots spectra spect = pd.DataFrame(fitsfile[6].data) # the dtype here is important because it takes care of byte # order issues from fits spect = pd.DataFrame(np.array(spect, dtype=float)).T wvls = np.array( fitsfile[8].data["Wavelength"], dtype=float ) # get the wavelengths of the # spectral channels spect.columns = wvls # set columns to wvl values spect.columns = pd.MultiIndex.from_arrays( [["wvl"] * len(spect.columns), spect.columns.values] ) # make columns multiindexed spect[("meta", "files")] = filename # add a column with the file name spect[("meta", "spectrum_sum")] = np.sum(spect["wvl"].values) # # calculate spectrum sum and add as a column # add header info to the spectrum data frame for key in headerkeys: try: spect[("meta", key)] = header[key] except: pass try: datarow = spect.reset_index(drop=True) datarow = datarow.loc[:, ~datarow.columns.duplicated()].copy() # remove # duplicate columns except: print("Something went wrong!") return datarow except: print("something went wrong, returning empty data frame") return pd.DataFrame()
[docs] def supercam_batch( searchdirs, searchstring="*cl1*.fits", headerkeys=None, shot_to_shot=False, to_csv=False, to_pickle=False, data_name="SuperCam", outpath="", outfile=None, ): if headerkeys is None: # If no header keys are provided, use a default set headerkeys = [ "TARGETNM", "FN_SCLOK", "ENV_COM", "INSTRENV", "FOC_DIST", "USERDIST", "LASPOWER", "UVROWS", "VIOROWS", "GREEROWS", "ORANROWS", "REDROWS", "N_UV", "N_VIO", "N_GRE", "N_ORA", "N_RED", ] filelist = [] # create empty list for dir_i in searchdirs: # step through each dir in list dir = Path(dir_i) filelist.append( list(dir.rglob(searchstring)) ) # search recursively for files matching searchstr filelist = np.sort( np.hstack(filelist) ) # stack multiple filelist in case of multiple searchdirs # read the individual files in parallel print("Reading " + str(len(filelist)) + " files...") results = Parallel(n_jobs=-1)( delayed(read_supercam_fits)(i, filelist, shot_to_shot, headerkeys) for i in range(len(filelist)) ) # results are a list of 1-row dataframes. Now concatenate them data = pd.concat(results, ignore_index=True) data = data[["meta", "wvl"]] # make metadata columns come first if outfile is None: todays_date = str(datetime.datetime.today()).split()[0].replace("-", "_") # get the date to label output file if shot_to_shot is False: outfile = data_name + "_" + todays_date else: outfile = data_name + "_" + todays_date + "_shots" if to_pickle is True: # save to a pickle file print("Saving to " + outpath + "//" + outfile + ".pkl") pickle_file = open(outpath + "//" + outfile + ".pkl", "wb") pickle.dump(data, pickle_file) pickle_file.close() if to_csv is True: # save to a csv file data.to_csv(outpath + "//" + outfile + ".csv") return SpectralData( data, name=data_name, spect_label="wvl", meta_label="meta", comp_label=None ) # return a PyHAT SpectralData object