Source code for libpyhat.regression.local_regression

# -*- coding: utf-8 -*-

import numpy as np
from joblib import Parallel, delayed
from sklearn.linear_model import ElasticNetCV
from sklearn.model_selection import GroupKFold
from sklearn.neighbors import NearestNeighbors


[docs] def fit_predict_parallel( i, x_train=None, y_train=None, x_predict=None, model=None, neighbors=None, verbose=True, ): if verbose is True: print("Predicting spectrum " + str(i + 1)) x_temp = np.array(x_predict)[i, :] foo, ind = neighbors.kneighbors([x_temp]) x_train_local = np.squeeze(np.array(x_train)[ind]) y_train_local = np.squeeze(np.array(y_train)[ind]) cv = GroupKFold(n_splits=3) cv = cv.split(x_train_local, y_train_local, groups=y_train_local) model.fit(x_train_local, y_train_local) predictions = model.predict([x_temp])[0] coeffs = model.coef_ intercepts = model.intercept_ return predictions, coeffs, intercepts
[docs] class LocalRegression: """This class implements "local" regression. Given a set of training data and a set of unknown data, iterate through each unknown spectrum, find the nearest training spectra, and generate a model. Each of these local models is optimized using built-in cross validation methods from scikit.""" def __init__(self, params, n_neighbors=250, verbose=True, n_jobs=-1): """Initialize LocalRegression Arguments: params = Dict containing the keywords and parameters for the regression method to be used. Keyword arguments: n_neighbors = User-specified number of training spectra to use to generate the local regression model for each unknown spectrum. """ self.model = ElasticNetCV( **params ) # For now, the only option is Elastic Net. Other # methods to be added in the future # params is a dict containing the keywords and parameters for # ElasticNetCV self.neighbors = NearestNeighbors(n_neighbors=n_neighbors) self.verbose = verbose self.n_jobs = n_jobs
[docs] def fit_predict(self, x_train, y_train, x_predict): """Use local regression to predict values for unknown data. Arguments: x_train = The training data spectra. y_train = The values of the quantity being predicted for the training data x_predict = The unknown spectra for which y needs to be predicted. """ self.neighbors.fit(x_train) predictions = [] coeffs = [] intercepts = [] args = list(range(x_predict.shape[0])) kwargs = { "x_train": x_train, "y_train": y_train, "x_predict": x_predict, "model": self.model, "neighbors": self.neighbors, "verbose": True, } results = Parallel(n_jobs=self.n_jobs)( delayed(fit_predict_parallel)(i, **kwargs) for i in args ) for i in results: predictions.append(i[0]) coeffs.append(i[1]) intercepts.append(i[2]) return predictions, coeffs, intercepts