# -*- coding: utf-8 -*-
"""
Created on Sat Mar 26 20:15:46 2016
@author: [username deleted]
"""
import numpy as np
import scipy.optimize as opt
[docs]
class sm:
def __init__(self, blendranges, random_seed=None):
self.blendranges = blendranges
self.random_seed = random_seed
[docs]
def do_blend(self, predictions, truevals=None, verbose=True):
# create the array indicating which models to blend for each blend
# range
# For three models, this creates an array like: [[0,0],[0,1],[1,1],
# [1,2],[2,2]]
# Which indicates that in the first range, just use model 0
# In the second range, blend models 0 and 1
# in the third range, use model 1
# in the fourth range, blend models 1 and 2
# in the fifth range, use model 2
if self.random_seed is not None:
np.random.seed(self.random_seed)
self.toblend = []
for i in range(len(predictions) - 1):
self.toblend.append([i, i])
if i < len(predictions) - 2:
self.toblend.append([i, i + 1])
# If the true compositions are provided, then optimize the ranges
# over which the results are blended to minimize the RMSEC
blendranges = np.array(
self.blendranges
).flatten() # squash the ranges to be a 1d array
blendranges.sort() # sort the entries. These will be used by
# submodels_blend to decide how to combine the predictions
self.blendranges = blendranges
if truevals is not None:
self.rmse = 99999999
n_opt = 5
i = 0
while i < n_opt:
if i > 0:
# TODO: scale randomization bsed on range values
blendranges = np.hstack(
(
[-9999],
blendranges[1:-1]
+ 0.1 * np.random.random(len(blendranges) - 2),
[9999],
)
) # add some
# randomness to the blendranges each time
truevals = np.squeeze(np.array(truevals))
result = opt.minimize(
self.get_rmse,
blendranges,
(predictions, truevals, verbose),
tol=0.00001,
)
if result.fun < self.rmse:
self.blendranges = result.x
self.rmse = result.fun
if verbose is True:
print(self.blendranges.sort())
print("RMSE =" + str(self.rmse))
else:
pass
i = i + 1
print(" ")
print("Optimum settings:")
print("RMSE = " + str(self.rmse))
print(
"Low model: "
+ str(round(self.blendranges[0], 4))
+ " to "
+ str(round(self.blendranges[2], 4))
)
i = 1
m = 2
while i + 3 < len(self.blendranges) - 1:
print(
"Submodel "
+ str(m)
+ ": "
+ str(round(self.blendranges[i], 4))
+ " to "
+ str(round(self.blendranges[i + 3], 4))
)
i = i + 2
m = m + 1
print(
"High model: "
+ str(round(self.blendranges[-3], 4))
+ " to "
+ str(round(self.blendranges[-1], 4))
)
else:
self.blendranges = blendranges
# print(self.blendranges)
# calculate the blended results
blended = self.submodels_blend(predictions, self.blendranges, overwrite=False)
return blended
[docs]
def get_rmse(
self,
blendranges,
predictions,
truevals,
verbose,
rangemin=0.0,
rangemax=100,
roundval=10,
):
blendranges[1:-1][blendranges[1:-1] < rangemin] = rangemin # ensure range
# boundaries don't drift below min
blendranges[1:-1][blendranges[1:-1] > rangemax] = rangemax # ensure range
# boundaries don't drift above max
blendranges.sort() # ensure range boundaries stay in order
blended = self.submodels_blend(predictions, blendranges, overwrite=False)
# calculate the RMSE. Round to specified precision as a way to
# control how long optimization runs
# Note: don't want to round too much - optimization needs some
# wiggle room
RMSE = np.round(np.sqrt(np.mean((blended - truevals) ** 2)), roundval)
if verbose is True:
print("RMSE = " + str(RMSE))
print(
"Low model: "
+ str(round(blendranges[0], 4))
+ " to "
+ str(round(blendranges[2], 4))
)
i = 1
m = 2
while i + 3 < len(blendranges) - 1:
if verbose is True:
print(
"Submodel "
+ str(m)
+ ": "
+ str(round(blendranges[i], 4))
+ " to "
+ str(round(blendranges[i + 3], 4))
)
i = i + 2
m = m + 1
if verbose is True:
print(
"High model: "
+ str(round(blendranges[-3], 4))
+ " to "
+ str(round(blendranges[-1], 4))
)
return RMSE
[docs]
def submodels_blend(self, predictions, blendranges, overwrite=False):
blended = np.squeeze(np.zeros_like(predictions[0]))
# format the blending ranges (note, initial formatting is done in
# do_blend)
blendranges = np.hstack(
(blendranges, blendranges[1:-1])
) # duplicate the middle entries
blendranges.sort() # re-sort them
blendranges = np.reshape(
blendranges, (int(len(blendranges) / 2), int(2))
) # turn the vector
# back into a 2d array (one pair of values for each submodel)
self.toblend.append([len(predictions) - 1, len(predictions) - 1])
blendranges = np.vstack((blendranges, [-9999999, 999999]))
# print(blendranges)
for i in range(len(blendranges)): # loop over each composition range
for j in range(len(predictions[0])): # loop over each spectrum
ref_tmp = predictions[-1][j] # get the reference model predicted value
# check whether the prediction for the reference spectrum is
# within the current range
inrangecheck = (ref_tmp > blendranges[i][0]) & (
ref_tmp < blendranges[i][1]
)
if inrangecheck:
try:
if (
self.toblend[i][0] == self.toblend[i][1]
): # if the results being blended are
# identical, no blending necessary!
blendval = predictions[self.toblend[i][0]][j]
else:
weight1 = 1 - (ref_tmp - blendranges[i][0]) / (
blendranges[i][1] - blendranges[i][0]
) # define the weight applied to the
# lower model
weight2 = (ref_tmp - blendranges[i][0]) / (
blendranges[i][1] - blendranges[i][0]
) # define the weight applied to the
# higher model
# calculated the blended value (weighted sum)
blendval = (
weight1 * predictions[self.toblend[i][0]][j]
+ weight2 * predictions[self.toblend[i][1]][j]
)
except:
pass
if overwrite:
blended[j] = blendval # If overwrite is true, write the
# blended result no matter what
else:
# If overwrite is false, only write the blended
# result if there is not already a result there
if blended[j] == 0:
blended[j] = blendval
return blended