Source code for libpyhat.transform.norm

import numpy as np
import pandas as pd


# This function normalizes specified ranges of the data by their respective
# sums

[docs]
def norm(df, ranges, col_var="wvl"):
    df_tonorm = df[col_var]
    top_level_cols = df.columns.levels[0]
    top_level_cols = top_level_cols[top_level_cols != col_var]
    df_other = df[top_level_cols]
    cols = df_tonorm.columns.values

    df_sub_norm = []
    allind = []
    for i in ranges:
        # Find the indices for the range
        ind = (np.array(cols, dtype="float") >= i[0]) & (
            np.array(cols, dtype="float") <= i[1]
        )
        # find the columns for the range
        normcols = cols[ind]
        # keep track of the indices used for all ranges
        allind.append(ind)
        # normalize over the current range
        df_sub_norm.append(norm_total(df_tonorm[normcols]))

    # collapse the list of indices used to a single array
    allind = np.sum(allind, axis=0)
    # identify columns that were not used by where the allind array is less
    # than 1
    cols_excluded = cols[np.where(allind < 1)]
    # create a separate data frame containing the un-normalized columns
    df_masked = df_tonorm[cols_excluded]
    # combine the normalized data frames into one
    df_norm = pd.concat(df_sub_norm, axis=1)

    # make the columns into multiindex
    df_masked.columns = [["masked"] * len(df_masked.columns), df_masked.columns]
    df_norm.columns = [[col_var] * len(df_norm.columns), df_norm.columns.values]

    # combine the normalized data frames, the excluded columns, and the
    # metadata into a single data frame
    df_new = pd.concat([df_other, df_norm, df_masked], axis=1)
    df = df_new

    return df




[docs]
def norm_total(df):
    df = df.astype(float).div(df.astype(float).sum(axis=1), axis=0)
    return df