Source code for libpyhat.transform.norm

import numpy as np
import pandas as pd


# This function normalizes specified ranges of the data by their respective
# sums
[docs] def norm(df, ranges, col_var="wvl"): df_tonorm = df[col_var] top_level_cols = df.columns.levels[0] top_level_cols = top_level_cols[top_level_cols != col_var] df_other = df[top_level_cols] cols = df_tonorm.columns.values df_sub_norm = [] allind = [] for i in ranges: # Find the indices for the range ind = (np.array(cols, dtype="float") >= i[0]) & ( np.array(cols, dtype="float") <= i[1] ) # find the columns for the range normcols = cols[ind] # keep track of the indices used for all ranges allind.append(ind) # normalize over the current range df_sub_norm.append(norm_total(df_tonorm[normcols])) # collapse the list of indices used to a single array allind = np.sum(allind, axis=0) # identify columns that were not used by where the allind array is less # than 1 cols_excluded = cols[np.where(allind < 1)] # create a separate data frame containing the un-normalized columns df_masked = df_tonorm[cols_excluded] # combine the normalized data frames into one df_norm = pd.concat(df_sub_norm, axis=1) # make the columns into multiindex df_masked.columns = [["masked"] * len(df_masked.columns), df_masked.columns] df_norm.columns = [[col_var] * len(df_norm.columns), df_norm.columns.values] # combine the normalized data frames, the excluded columns, and the # metadata into a single data frame df_new = pd.concat([df_other, df_norm, df_masked], axis=1) df = df_new return df
[docs] def norm_total(df): df = df.astype(float).div(df.astype(float).sum(axis=1), axis=0) return df