Source code for libpyhat.transform.dim_reductions.lfda

"""Translated from https://github.com/cran/lfda/blob/master/R/lfda.R
#' Local Fisher Discriminant Analysis for
#' Supervised Dimensionality Reduction
#'
#' Performs local fisher discriminant analysis (LFDA) on the given data.
#'
#' LFDA is a method for linear dimensionality reduction that maximizes
#' between-class scatter and minimizes within-class scatter while at the
#' same time maintain the local structure of the data so that multimodal
#' data can be embedded appropriately. Its limitation is that it only
#' looks for linear boundaries between clusters. In this case, a non-linear
#' version called kernel LFDA will be used instead. Three metric types can
#' be used if needed.
#'
#' x = n x d matrix of original samples.
#'          n is the number of samples.
#' y = length n vector of class labels
#' r = dimensionality of reduced space (default: d)
#' metric = type of metric in the embedding space (no default)
#'               'weighted'        --- weighted eigenvectors
#'               'orthonormalized' --- orthonormalized
#'               'plain'           --- raw eigenvectors
#' knn = parameter used in local scaling method (default: 5)
#'
#' Returns:
#' T = d x r transformation matrix (Z = x * T)
#' Z = n x r matrix of dimensionality reduced samples
#'
#' Keywords: lfda local fisher discriminant transformation mahalanobis metric
#'
#' @author Yuan Tang
#' @seealso See klfda for the kernelized variant of
#'          LFDA (Kernel LFDA).
#'
#' References:
#' Sugiyama, M (2007).
#' Dimensionality reduction of multimodal labeled data by
#' local Fisher discriminant analysis.
#' Journal of Machine Learning Research, vol.8, 1027--1061.
#'
#' Sugiyama, M (2006).
#' Local Fisher discriminant analysis for supervised dimensionality reduction.
#' In W. W. Cohen and A. Moore (Eds.), Proceedings of 23rd International
#' Conference on Machine Learning (ICML2006), 905--912.
#'
"""
import warnings

import numpy as np


# ' Get Affinity Matrix
# '
# ' This function returns an affinity matrix within knn-nearest neighbors
# from the distance matrix.
# '
# ' distance2  =The distance matrix for each observation
# ' knn = The number of nearest neighbors
# ' nc = The number of observations for data in this class
# returns:
# A = an affinity matrix - the larger the element in the matrix, the closer
# two data points are



[docs]
def getAffinity(distance2, knn, nc):
    sortarr = np.sort(distance2, axis=0)
    if sortarr.shape[1] < knn + 1:
        print("knn is too large, please try to reduce it.")
        return
    kNNdist2 = sortarr[knn, :]
    sigma = np.sqrt(kNNdist2)
    sigma = np.expand_dims(sigma, axis=0)
    localscale = np.dot(sigma.T, sigma)
    flag = localscale != 0
    A = np.zeros((nc, nc))
    A[flag] = np.exp(-distance2[flag] / localscale[flag])
    return A




[docs]
class LFDA:
    def __init__(self, r=None, metric="plain", knn=5):
        self.metric = metric
        self.knn = knn
        self.r = r


[docs]
    def fit(self, x, y):
        # metric can be: "orthonormalized", "plain", "weighted"
        x = np.array(x).T
        y = np.array(y).T
        d = np.size(x, 0)  # number of rows
        n = np.size(x, 1)  # number of columns

        # if no dimension reduction requested, set r to d
        if self.r is None:
            self.r = d

        tSb = np.zeros((d, d))
        tSw = np.zeros((d, d))

        # compute the optimal scatter matrices in a classwise manner
        for value in np.unique(y.flatten()):
            Xc = x[:, y == value]
            nc = np.size(Xc, 1)  # number of columns

            # # determine local scaling for locality-preserving projection
            Xc2 = np.array(np.sum(np.power(Xc, 2), axis=0))

            # # calculate the distance, using a self-defined repmat function
            # that's the same
            # # as repmat() in Matlab
            Xc2tile = np.tile(Xc2, (nc, 1))
            distance2 = Xc2tile + Xc2tile.T - 2 * np.dot(Xc.T, Xc)

            # # Get affinity matrix
            A = getAffinity(distance2, self.knn, nc)

            Xc1 = np.expand_dims(np.array(Xc.sum(axis=1)), axis=1)

            A_tiled = np.tile(A.sum(axis=0), (d, 1)).T
            G = np.dot(Xc, A_tiled * Xc.T) - np.dot(Xc, np.dot(A, Xc.T))
            tSb = (
                tSb
                + (G / n)
                + np.dot(Xc, Xc.T) * (1 - nc / n)
                + np.dot(Xc1, (Xc1.T / n))
            )
            tSw = tSw + G / nc

        X1 = np.expand_dims(np.sum(x, axis=1), axis=0)
        tSb = tSb - np.dot((X1.T / n), X1) - tSw
        tSb = (tSb + tSb.T) / 2
        tSw = (tSw + tSw.T) / 2

        # find generalized eigenvalues and normalized eigenvectors of the
        # problem
        eigVal, eigVec = np.linalg.eig(np.dot(np.linalg.inv(tSw), tSb))
        if self.r == d:
            # without dimensionality reduction
            pass
        else:
            # dimensionality reduction (select only the r largest
            # eigenvalues of the problem)
            eigVal = eigVal[0 : self.r]
            eigVec = eigVec[:, 0 : self.r]

        pass
        # Based on metric return other values
        # options to require a particular type of returned transform matrix

        if self.metric == "orthonormalized":
            Tr = np.linalg.qr(eigVec)[0]
        elif self.metric == "weighted":
            Tr = eigVec * np.tile(np.sqrt(eigVal).T, (d, 1))
        elif self.metric == "plain":
            Tr = eigVec
        else:
            "Invalid Metric Type. Using 'plain'"
            Tr = eigVec

        Z = np.dot(Tr.T, x).T

        if np.any(np.iscomplex(Z)):
            warnings.warn(
                "The returned matricies are complex! The numpy.linalg.eig "
                "function within the lfda implementation is likely the "
                "culprit."
            )

        self.Tr = Tr
        self.Z = Z

        return


    # ' LFDA Transformation/Prediction on New Data
    # '
    # ' This function transforms a data set, usually a testing set,
    # using the trained LFDA metric
    # ' newdata = The data to be transformed

[docs]
    def transform(self, newdata=None):
        return np.dot(newdata, self.Tr)