# this function is used to bin a spectrum based on the intervals between
# local minima, effectively summing major peaks into individual values.
# Can be useful for spectra with many narrow peaks, such as LIBS spectra.
import numpy as np
import pandas as pd
import scipy as sp
[docs]
def peak_area(df, peaks_mins_file=None, spect_label="wvl"):
    wvls = np.array(df[spect_label].columns.values, dtype=float)  # get the wavelengths
    if peaks_mins_file is not None:
        peaks_mins = pd.read_csv(peaks_mins_file, sep=",")
        peaks = np.array(
            peaks_mins[peaks_mins["type"] == "peak"][spect_label], dtype="float"
        )
        mins = np.array(
            peaks_mins[peaks_mins["type"] == "min"][spect_label], dtype="float"
        )
        # keep only peaks that have mins on both sides
        badpeaks = np.sum([peaks < np.min(mins), peaks > np.max(mins)], axis=0)
        if np.sum(badpeaks == 1) > 0:
            print("Removing unbounded peaks: " + str(peaks[badpeaks == 1]))
            peaks = peaks[badpeaks == 0]
    else:
        ave_spect = np.average(
            np.array(df[spect_label]), axis=0
        )  # find the average of the spectra in
        # the data frame
        peaks = wvls[
            sp.signal.argrelextrema(ave_spect, np.greater)[0]
        ]  # find the maxima in the average spectrum
        mins = wvls[
            sp.signal.argrelextrema(ave_spect, np.less)[0]
        ]  # find the maxima in the average spectrum
    spectra = np.array(df[spect_label])
    for i in range(len(peaks)):
        # get the wavelengths between two minima
        try:
            low = mins[np.where(mins < peaks[i])[0][-1]]
        except:
            low = mins[0]
        try:
            high = mins[np.where(mins > peaks[i])[0][0]]
        except:
            high = mins[-1]
        # subtract a small amount from each to avoid rounding errors causing
        # issues
        low = low - 0.00001 * low
        high = high - 0.00001 * high
        peak_indices = np.all((wvls >= low, wvls < high), axis=0)
        # TODO: Check to make sure we aren't overwriting existing peak_area
        #  column names
        df[("peak_area", peaks[i])] = spectra[:, peak_indices].sum(axis=1)
    return df, peaks, mins