Source code for libpyhat.utils.lookup

import numpy as np
import pandas as pd

# TODO: Describe: unmatching rows in resulting df are empty when their lookupdata empty
# TODO: GUI load data can't load lookup2.csv


[docs] def lookup( df, lookupfile=None, lookupdf=None, sep=",", skiprows=0, left_on="sclock", right_on="Spacecraft Clock", meta_label="meta", ): # Either the user provides a data frame with lookup information OR they profile a # file with the dataframe within it, but not both. if (lookupdf is None) == (lookupfile is None): raise Exception( "User to provide EITHER a lookup file path OR lookup dataframe, not both " "or neither." ) return df # If the user provided a file string path to a dataframe, let's load it (them) if lookupfile is not None: if isinstance(lookupfile, list): # this loop concatenates together multiple lookup files if provided # (mostly to handle the three different master lists for chemcam) for x in lookupfile: tmp = pd.read_csv(x, sep=sep, skiprows=skiprows, header=[0, 1]) lookupdf = pd.concat([lookupdf, tmp]) else: lookupdf = pd.read_csv( lookupfile, sep=sep, skiprows=skiprows, header=[0, 1] ) # Check to see that the lookup table has columns in the target dataframe. If this # isn't the case, then this function has nothing to do. if (meta_label, right_on) not in lookupdf.columns: raise Warning("User provided lookup columns not present in target dataframe.") return df if len(lookupdf[(meta_label, right_on)]) > len( np.unique(np.array(lookupdf[(meta_label, right_on)], dtype=str)) ): raise Warning( "Non-unique values found in metadata column of the lookup " + "dataset " + str((meta_label, right_on)) + "! Removing duplicates, but please review your dataframes." ) lookupdf.drop_duplicates((meta_label, right_on), inplace=True) # Grab all metadata from the target dataset # metadata = df # [meta_label] # Grab all metadata from the secondary dataset # lookupdata = lookupdf # [meta_label] # Build a dataset of the combined metadata # TODO: add documentation about how this works and that data append to right col new_df = df.merge( lookupdf, left_on=[("meta", left_on)], right_on=[("meta", right_on)], how="left", ) return new_df