Source code for gui.core.caltranMethods.caltran_prepare_data

import numpy as np
import pandas as pd


[docs] def prepare_data(A, B, dataAmatchcol, dataBmatchcol, spect_label="wvl"): print("Removing rows that are not present in both data sets.") A = A.loc[A[("meta", dataAmatchcol)].isin(B[("meta", dataBmatchcol)])] B = B.loc[B[("meta", dataBmatchcol)].isin(A[("meta", dataAmatchcol)])] A = A.sort_values(("meta", dataAmatchcol)) B = B.sort_values(("meta", dataBmatchcol)) # TODO: These assertions should be swapped for exception handling. print("Verifying that the wavelengths for all data sets are the same") assert len(A[spect_label].columns) == len( B[spect_label].columns ), "Data sets A and B have different numbers of spectral channels!" assert A[spect_label].columns.values[0] == B[spect_label].columns.values[0], ( "Data set A and B wavelengths are not identical. Check rounding " "and/or resample one data set onto the other's wavelengths" ) print("Averaging repeat spectra") A_uniques = np.unique(A[("meta", dataAmatchcol)]) if len(A_uniques) == len(A[("meta", dataAmatchcol)]): print("Data set A: No repeat spectra, no need to calculate averages!") A_mean = A else: A_mean = pd.DataFrame(index=A.columns, columns=A_uniques) for value in A_uniques: print("Averaging data set A spectra: " + value) temp = pd.DataFrame(A[A[("meta", dataAmatchcol)] == value].mean(axis=0)) temp.at[("meta", dataAmatchcol), 0] = value A_mean.loc[:, value] = temp A_mean = A_mean.T B_uniques = np.unique(B[("meta", dataBmatchcol)]) if len(B_uniques) == len(B[("meta", dataBmatchcol)]): print("Data set B: No repeat spectra, no need to calculate averages!") B_mean = B else: B_mean = pd.DataFrame(index=B.columns, columns=B_uniques) for value in np.unique(B[("meta", dataBmatchcol)]): print("Averaging data set B spectra: " + value) temp = pd.DataFrame(B[B[("meta", dataBmatchcol)] == value].mean(axis=0)) temp.at[("meta", dataBmatchcol), 0] = value B_mean.loc[:, value] = temp B_mean = B_mean.T return A_mean, B_mean