Source code for libpyhat.transform.peak_area

# this function is used to bin a spectrum based on the intervals between
# local minima, effectively summing major peaks into individual values.
# Can be useful for spectra with many narrow peaks, such as LIBS spectra.
import numpy as np
import pandas as pd
import scipy as sp



[docs]
def peak_area(df, peaks_mins_file=None, spect_label="wvl"):
    wvls = np.array(df[spect_label].columns.values, dtype=float)  # get the wavelengths

    if peaks_mins_file is not None:
        peaks_mins = pd.read_csv(peaks_mins_file, sep=",")
        peaks = np.array(
            peaks_mins[peaks_mins["type"] == "peak"][spect_label], dtype="float"
        )
        mins = np.array(
            peaks_mins[peaks_mins["type"] == "min"][spect_label], dtype="float"
        )
        # keep only peaks that have mins on both sides
        badpeaks = np.sum([peaks < np.min(mins), peaks > np.max(mins)], axis=0)

        if np.sum(badpeaks == 1) > 0:
            print("Removing unbounded peaks: " + str(peaks[badpeaks == 1]))
            peaks = peaks[badpeaks == 0]

    else:
        ave_spect = np.average(
            np.array(df[spect_label]), axis=0
        )  # find the average of the spectra in
        # the data frame
        peaks = wvls[
            sp.signal.argrelextrema(ave_spect, np.greater)[0]
        ]  # find the maxima in the average spectrum
        mins = wvls[
            sp.signal.argrelextrema(ave_spect, np.less)[0]
        ]  # find the maxima in the average spectrum

    spectra = np.array(df[spect_label])

    for i in range(len(peaks)):
        # get the wavelengths between two minima
        try:
            low = mins[np.where(mins < peaks[i])[0][-1]]

        except:
            low = mins[0]

        try:
            high = mins[np.where(mins > peaks[i])[0][0]]

        except:
            high = mins[-1]

        # subtract a small amount from each to avoid rounding errors causing
        # issues
        low = low - 0.00001 * low
        high = high - 0.00001 * high

        peak_indices = np.all((wvls >= low, wvls < high), axis=0)
        # TODO: Check to make sure we aren't overwriting existing peak_area
        #  column names
        df[("peak_area", peaks[i])] = spectra[:, peak_indices].sum(axis=1)

    return df, peaks, mins