# this function is used to bin a spectrum based on the intervals between
# local minima, effectively summing major peaks into individual values.
# Can be useful for spectra with many narrow peaks, such as LIBS spectra.
import numpy as np
import pandas as pd
import scipy as sp
[docs]
def peak_area(df, peaks_mins_file=None, spect_label="wvl"):
wvls = np.array(df[spect_label].columns.values, dtype=float) # get the wavelengths
if peaks_mins_file is not None:
peaks_mins = pd.read_csv(peaks_mins_file, sep=",")
peaks = np.array(
peaks_mins[peaks_mins["type"] == "peak"][spect_label], dtype="float"
)
mins = np.array(
peaks_mins[peaks_mins["type"] == "min"][spect_label], dtype="float"
)
# keep only peaks that have mins on both sides
badpeaks = np.sum([peaks < np.min(mins), peaks > np.max(mins)], axis=0)
if np.sum(badpeaks == 1) > 0:
print("Removing unbounded peaks: " + str(peaks[badpeaks == 1]))
peaks = peaks[badpeaks == 0]
else:
ave_spect = np.average(
np.array(df[spect_label]), axis=0
) # find the average of the spectra in
# the data frame
peaks = wvls[
sp.signal.argrelextrema(ave_spect, np.greater)[0]
] # find the maxima in the average spectrum
mins = wvls[
sp.signal.argrelextrema(ave_spect, np.less)[0]
] # find the maxima in the average spectrum
spectra = np.array(df[spect_label])
for i in range(len(peaks)):
# get the wavelengths between two minima
try:
low = mins[np.where(mins < peaks[i])[0][-1]]
except:
low = mins[0]
try:
high = mins[np.where(mins > peaks[i])[0][0]]
except:
high = mins[-1]
# subtract a small amount from each to avoid rounding errors causing
# issues
low = low - 0.00001 * low
high = high - 0.00001 * high
peak_indices = np.all((wvls >= low, wvls < high), axis=0)
# TODO: Check to make sure we aren't overwriting existing peak_area
# column names
df[("peak_area", peaks[i])] = spectra[:, peak_indices].sum(axis=1)
return df, peaks, mins