Source code for libpyhat.utils.lookup

import numpy as np
import pandas as pd

# TODO: Describe: unmatching rows in resulting df are empty when their lookupdata empty
# TODO: GUI load data can't load lookup2.csv



[docs]
def lookup(
    df,
    lookupfile=None,
    lookupdf=None,
    sep=",",
    skiprows=0,
    left_on="sclock",
    right_on="Spacecraft Clock",
    meta_label="meta",
):
    # Either the user provides a data frame with lookup information OR they profile a
    # file with the dataframe within it, but not both.
    if (lookupdf is None) == (lookupfile is None):
        raise Exception(
            "User to provide EITHER a lookup file path OR lookup dataframe, not both "
            "or neither."
        )
        return df

    # If the user provided a file string path to a dataframe, let's load it (them)
    if lookupfile is not None:
        if isinstance(lookupfile, list):
            # this loop concatenates together multiple lookup files if provided
            # (mostly to handle the three different master lists for chemcam)
            for x in lookupfile:
                tmp = pd.read_csv(x, sep=sep, skiprows=skiprows, header=[0, 1])
                lookupdf = pd.concat([lookupdf, tmp])
        else:
            lookupdf = pd.read_csv(
                lookupfile, sep=sep, skiprows=skiprows, header=[0, 1]
            )

    # Check to see that the lookup table has columns in the target dataframe. If this
    # isn't the case, then this function has nothing to do.
    if (meta_label, right_on) not in lookupdf.columns:
        raise Warning("User provided lookup columns not present in target dataframe.")
        return df

    if len(lookupdf[(meta_label, right_on)]) > len(
        np.unique(np.array(lookupdf[(meta_label, right_on)], dtype=str))
    ):
        raise Warning(
            "Non-unique values found in metadata column of the lookup "
            + "dataset "
            + str((meta_label, right_on))
            + "! Removing duplicates, but please review your dataframes."
        )
        lookupdf.drop_duplicates((meta_label, right_on), inplace=True)

    # Grab all metadata from the target dataset
    # metadata = df  # [meta_label]

    # Grab all metadata from the secondary dataset
    # lookupdata = lookupdf  # [meta_label]

    # Build a dataset of the combined metadata
    # TODO: add documentation about how this works and that data append to right col
    new_df = df.merge(
        lookupdf,
        left_on=[("meta", left_on)],
        right_on=[("meta", right_on)],
        how="left",
    )
    return new_df