import numpy as np
import pandas as pd
# TODO: Describe: unmatching rows in resulting df are empty when their lookupdata empty
# TODO: GUI load data can't load lookup2.csv
[docs]
def lookup(
df,
lookupfile=None,
lookupdf=None,
sep=",",
skiprows=0,
left_on="sclock",
right_on="Spacecraft Clock",
meta_label="meta",
):
# Either the user provides a data frame with lookup information OR they profile a
# file with the dataframe within it, but not both.
if (lookupdf is None) == (lookupfile is None):
raise Exception(
"User to provide EITHER a lookup file path OR lookup dataframe, not both "
"or neither."
)
return df
# If the user provided a file string path to a dataframe, let's load it (them)
if lookupfile is not None:
if isinstance(lookupfile, list):
# this loop concatenates together multiple lookup files if provided
# (mostly to handle the three different master lists for chemcam)
for x in lookupfile:
tmp = pd.read_csv(x, sep=sep, skiprows=skiprows, header=[0, 1])
lookupdf = pd.concat([lookupdf, tmp])
else:
lookupdf = pd.read_csv(
lookupfile, sep=sep, skiprows=skiprows, header=[0, 1]
)
# Check to see that the lookup table has columns in the target dataframe. If this
# isn't the case, then this function has nothing to do.
if (meta_label, right_on) not in lookupdf.columns:
raise Warning("User provided lookup columns not present in target dataframe.")
return df
if len(lookupdf[(meta_label, right_on)]) > len(
np.unique(np.array(lookupdf[(meta_label, right_on)], dtype=str))
):
raise Warning(
"Non-unique values found in metadata column of the lookup "
+ "dataset "
+ str((meta_label, right_on))
+ "! Removing duplicates, but please review your dataframes."
)
lookupdf.drop_duplicates((meta_label, right_on), inplace=True)
# Grab all metadata from the target dataset
# metadata = df # [meta_label]
# Grab all metadata from the secondary dataset
# lookupdata = lookupdf # [meta_label]
# Build a dataset of the combined metadata
# TODO: add documentation about how this works and that data append to right col
new_df = df.merge(
lookupdf,
left_on=[("meta", left_on)],
right_on=[("meta", right_on)],
how="left",
)
return new_df