Source code for libpyhat.regression.sm

# -*- coding: utf-8 -*-
"""
Created on Sat Mar 26 20:15:46 2016

@author: [username deleted]
"""
import numpy as np
import scipy.optimize as opt


[docs] class sm: def __init__(self, blendranges, random_seed=None): self.blendranges = blendranges self.random_seed = random_seed
[docs] def do_blend(self, predictions, truevals=None, verbose=True): # create the array indicating which models to blend for each blend # range # For three models, this creates an array like: [[0,0],[0,1],[1,1], # [1,2],[2,2]] # Which indicates that in the first range, just use model 0 # In the second range, blend models 0 and 1 # in the third range, use model 1 # in the fourth range, blend models 1 and 2 # in the fifth range, use model 2 if self.random_seed is not None: np.random.seed(self.random_seed) self.toblend = [] for i in range(len(predictions) - 1): self.toblend.append([i, i]) if i < len(predictions) - 2: self.toblend.append([i, i + 1]) # If the true compositions are provided, then optimize the ranges # over which the results are blended to minimize the RMSEC blendranges = np.array( self.blendranges ).flatten() # squash the ranges to be a 1d array blendranges.sort() # sort the entries. These will be used by # submodels_blend to decide how to combine the predictions self.blendranges = blendranges if truevals is not None: self.rmse = 99999999 n_opt = 5 i = 0 while i < n_opt: if i > 0: # TODO: scale randomization bsed on range values blendranges = np.hstack( ( [-9999], blendranges[1:-1] + 0.1 * np.random.random(len(blendranges) - 2), [9999], ) ) # add some # randomness to the blendranges each time truevals = np.squeeze(np.array(truevals)) result = opt.minimize( self.get_rmse, blendranges, (predictions, truevals, verbose), tol=0.00001, ) if result.fun < self.rmse: self.blendranges = result.x self.rmse = result.fun if verbose is True: print(self.blendranges.sort()) print("RMSE =" + str(self.rmse)) else: pass i = i + 1 print(" ") print("Optimum settings:") print("RMSE = " + str(self.rmse)) print( "Low model: " + str(round(self.blendranges[0], 4)) + " to " + str(round(self.blendranges[2], 4)) ) i = 1 m = 2 while i + 3 < len(self.blendranges) - 1: print( "Submodel " + str(m) + ": " + str(round(self.blendranges[i], 4)) + " to " + str(round(self.blendranges[i + 3], 4)) ) i = i + 2 m = m + 1 print( "High model: " + str(round(self.blendranges[-3], 4)) + " to " + str(round(self.blendranges[-1], 4)) ) else: self.blendranges = blendranges # print(self.blendranges) # calculate the blended results blended = self.submodels_blend(predictions, self.blendranges, overwrite=False) return blended
[docs] def get_rmse( self, blendranges, predictions, truevals, verbose, rangemin=0.0, rangemax=100, roundval=10, ): blendranges[1:-1][blendranges[1:-1] < rangemin] = rangemin # ensure range # boundaries don't drift below min blendranges[1:-1][blendranges[1:-1] > rangemax] = rangemax # ensure range # boundaries don't drift above max blendranges.sort() # ensure range boundaries stay in order blended = self.submodels_blend(predictions, blendranges, overwrite=False) # calculate the RMSE. Round to specified precision as a way to # control how long optimization runs # Note: don't want to round too much - optimization needs some # wiggle room RMSE = np.round(np.sqrt(np.mean((blended - truevals) ** 2)), roundval) if verbose is True: print("RMSE = " + str(RMSE)) print( "Low model: " + str(round(blendranges[0], 4)) + " to " + str(round(blendranges[2], 4)) ) i = 1 m = 2 while i + 3 < len(blendranges) - 1: if verbose is True: print( "Submodel " + str(m) + ": " + str(round(blendranges[i], 4)) + " to " + str(round(blendranges[i + 3], 4)) ) i = i + 2 m = m + 1 if verbose is True: print( "High model: " + str(round(blendranges[-3], 4)) + " to " + str(round(blendranges[-1], 4)) ) return RMSE
[docs] def submodels_blend(self, predictions, blendranges, overwrite=False): blended = np.squeeze(np.zeros_like(predictions[0])) # format the blending ranges (note, initial formatting is done in # do_blend) blendranges = np.hstack( (blendranges, blendranges[1:-1]) ) # duplicate the middle entries blendranges.sort() # re-sort them blendranges = np.reshape( blendranges, (int(len(blendranges) / 2), int(2)) ) # turn the vector # back into a 2d array (one pair of values for each submodel) self.toblend.append([len(predictions) - 1, len(predictions) - 1]) blendranges = np.vstack((blendranges, [-9999999, 999999])) # print(blendranges) for i in range(len(blendranges)): # loop over each composition range for j in range(len(predictions[0])): # loop over each spectrum ref_tmp = predictions[-1][j] # get the reference model predicted value # check whether the prediction for the reference spectrum is # within the current range inrangecheck = (ref_tmp > blendranges[i][0]) & ( ref_tmp < blendranges[i][1] ) if inrangecheck: try: if ( self.toblend[i][0] == self.toblend[i][1] ): # if the results being blended are # identical, no blending necessary! blendval = predictions[self.toblend[i][0]][j] else: weight1 = 1 - (ref_tmp - blendranges[i][0]) / ( blendranges[i][1] - blendranges[i][0] ) # define the weight applied to the # lower model weight2 = (ref_tmp - blendranges[i][0]) / ( blendranges[i][1] - blendranges[i][0] ) # define the weight applied to the # higher model # calculated the blended value (weighted sum) blendval = ( weight1 * predictions[self.toblend[i][0]][j] + weight2 * predictions[self.toblend[i][1]][j] ) except: pass if overwrite: blended[j] = blendval # If overwrite is true, write the # blended result no matter what else: # If overwrite is false, only write the blended # result if there is not already a result there if blended[j] == 0: blended[j] = blendval return blended