import copy
import numpy as np
import pandas as pd
import libpyhat.transform.caltran_utils as ctu
from libpyhat.transform.cal_tran import cal_tran
[docs]
def cal_tran_cv(
A, B, paramgrid, data_A_col, data_B_col, spect_label="wvl", meta_label="meta"
):
# get the data sets
A_mean, B_mean = ctu.prepare_data(A, B, data_A_col, data_B_col, colvar=spect_label)
# prepare for cross validation
uniquevals = np.unique(A_mean[(meta_label, data_A_col)])
cv_results = pd.DataFrame()
ind = 0
for params in paramgrid: # step through all the different permutations
print(params)
for key in params.keys(): # store parameters in the results file
cv_results.loc[ind, key] = params[key]
ct_obj = cal_tran(params)
# , A, B, data_A_col, data_B_col)
A_mean_transformed = copy.deepcopy(A_mean)
A_mean_transformed[spect_label] = A_mean_transformed[spect_label] * 0
rmses = []
for val in uniquevals: # hold out each unique spectrum in turn
print(val)
# define the validation data (the held out spectrum)
# and the training data (the spectra that are not held out)
# for both data sets
val_data_A = np.squeeze(
np.array(
A_mean[A_mean[(meta_label, data_A_col)] == val][spect_label],
dtype="float",
)
)
train_data_A = np.squeeze(
np.array(
A_mean[A_mean[(meta_label, data_A_col)] != val][spect_label],
dtype="float",
)
)
val_data_B = np.squeeze(
np.array(
B_mean[B_mean[(meta_label, data_B_col)] == val][spect_label],
dtype="float",
)
)
train_data_B = np.squeeze(
np.array(
B_mean[B_mean[(meta_label, data_B_col)] != val][spect_label],
dtype="float",
)
)
ct_obj.derive_transform(train_data_A, train_data_B) # derive the transform
# based on the training data
val_data_A_transformed = ct_obj.apply_transform(
val_data_A
) # apply the transform to the held out spectrum
# from A
rmses.append(np.sqrt(np.mean((val_data_A_transformed - val_data_B) ** 2)))
cv_results.loc[ind, val + "_RMSE"] = rmses[
-1
] # record the RMSE for the held out spectrum
cv_results.loc[ind, "average_RMSE"] = np.mean(rmses)
ind = ind + 1
cv_results.columns = pd.MultiIndex.from_tuples(
[("cv", col) for col in cv_results.columns]
)
return cv_results
[docs]
def call_cal_tran_cv(
A,
B,
dataAmatchcol,
dataBmatchcol,
paramgrid,
spect_label="wvl",
dataAname="A",
dataBname="B",
):
print(
"Running Calibration Transfer CV. This evaluates multiple options "
"for transforming "
+ dataAname
+ " to match "
+ dataBname
+ " but **does not apply a "
"final transform.**"
)
ctu.check_data(A, B, dataAname, dataBname, spect_label=spect_label)
ct_cv_results = cal_tran_cv(
A, B, paramgrid, dataAmatchcol, dataBmatchcol, spect_label=spect_label
)
return ct_cv_results