Source code for libpyhat.transform.cal_tran_cv

import copy

import numpy as np
import pandas as pd

import libpyhat.transform.caltran_utils as ctu
from libpyhat.transform.cal_tran import cal_tran


[docs] def cal_tran_cv( A, B, paramgrid, data_A_col, data_B_col, spect_label="wvl", meta_label="meta" ): # get the data sets A_mean, B_mean = ctu.prepare_data(A, B, data_A_col, data_B_col, colvar=spect_label) # prepare for cross validation uniquevals = np.unique(A_mean[(meta_label, data_A_col)]) cv_results = pd.DataFrame() ind = 0 for params in paramgrid: # step through all the different permutations print(params) for key in params.keys(): # store parameters in the results file cv_results.loc[ind, key] = params[key] ct_obj = cal_tran(params) # , A, B, data_A_col, data_B_col) A_mean_transformed = copy.deepcopy(A_mean) A_mean_transformed[spect_label] = A_mean_transformed[spect_label] * 0 rmses = [] for val in uniquevals: # hold out each unique spectrum in turn print(val) # define the validation data (the held out spectrum) # and the training data (the spectra that are not held out) # for both data sets val_data_A = np.squeeze( np.array( A_mean[A_mean[(meta_label, data_A_col)] == val][spect_label], dtype="float", ) ) train_data_A = np.squeeze( np.array( A_mean[A_mean[(meta_label, data_A_col)] != val][spect_label], dtype="float", ) ) val_data_B = np.squeeze( np.array( B_mean[B_mean[(meta_label, data_B_col)] == val][spect_label], dtype="float", ) ) train_data_B = np.squeeze( np.array( B_mean[B_mean[(meta_label, data_B_col)] != val][spect_label], dtype="float", ) ) ct_obj.derive_transform(train_data_A, train_data_B) # derive the transform # based on the training data val_data_A_transformed = ct_obj.apply_transform( val_data_A ) # apply the transform to the held out spectrum # from A rmses.append(np.sqrt(np.mean((val_data_A_transformed - val_data_B) ** 2))) cv_results.loc[ind, val + "_RMSE"] = rmses[ -1 ] # record the RMSE for the held out spectrum cv_results.loc[ind, "average_RMSE"] = np.mean(rmses) ind = ind + 1 cv_results.columns = pd.MultiIndex.from_tuples( [("cv", col) for col in cv_results.columns] ) return cv_results
[docs] def call_cal_tran_cv( A, B, dataAmatchcol, dataBmatchcol, paramgrid, spect_label="wvl", dataAname="A", dataBname="B", ): print( "Running Calibration Transfer CV. This evaluates multiple options " "for transforming " + dataAname + " to match " + dataBname + " but **does not apply a " "final transform.**" ) ctu.check_data(A, B, dataAname, dataBname, spect_label=spect_label) ct_cv_results = cal_tran_cv( A, B, paramgrid, dataAmatchcol, dataBmatchcol, spect_label=spect_label ) return ct_cv_results