# This function masks out specified ranges of the data
import numpy as np
import pandas as pd
[docs]
def mask(df, maskfile, maskvar="wvl"):
# Grab the intensity spectra from the dataset
df_spectra = df[maskvar]
# Pull out all other data, which we'll reinject after mask is applied
cols = df.columns.levels[0] != maskvar
df_other = df[df.columns.levels[0][cols]]
# Grab the mask data from mask file
mask = pd.read_csv(maskfile, sep=",")
# Loop over each mask, which is a row in the mask file
masked_indexes = []
for i in mask.index:
# Make a list of arrays, each array contains a list of boolean values
# according to whether or not the wavelength is within the mask (true if so)
masked_indexes.append(
(np.array(df[maskvar].columns, dtype="float") >= mask.iloc[i]["min_wvl"])
& (np.array(df[maskvar].columns, dtype="float") <= mask.iloc[i]["max_wvl"])
)
# For a given wavelength, if *any* of the masks show 'True' then apply mask
# This will be a 1-D array of boolean values, True wherever a mask applies.
masked = np.any(np.array(masked_indexes), axis=0)
# get the list of columns
spectcols = list(df_spectra.columns)
# Loop through each wavelength
for i, j in enumerate(masked):
# If j is True, then the value is masked; change label accordingly
# NumPy's True is not a boolean type, so you can't use 'is True' here.
# The linter doesn't realize this, so the noqa line is added below.
if j == True: # noqa: E712
spectcols[i] = ("masked", float(spectcols[i]))
# Otherwise, the label should be preserved (the intensity is unmasked)
else:
spectcols[i] = (maskvar, float(spectcols[i]))
# assign the multiindex columns based on the new tuples
df_spectra.columns = pd.MultiIndex.from_tuples(spectcols)
# merge the masked spectra back with the other data and return it
df = pd.concat([df_spectra, df_other], axis=1)
return df