Source code for libpyhat.utils.utils

from functools import reduce

import numpy as np
import pandas as pd



[docs]
def crossform(a):
    """
    Return the cross form, e.g. a in the cross product of a b.
    Parameters
    ----------
    a : ndarray
        (3,) vector

    Returns
    -------
    a : ndarray
        (3,3)
    """
    return np.array([[0, -a[2], a[1]], [a[2], 0, -a[0]], [-a[1], a[0], 0]])




[docs]
def normalize_vector(line):
    """
    Normalize a standard form line

    Parameters
    ----------
    line : ndarray
           Standard form of a line (Ax + By + C = 0)

    Returns
    -------
    line : ndarray
           The normalized line

    Examples
    --------
    >>> x = np.random.random((3,3))
    >>> normalize_vector(x)
    array([[ 0.88280225,  0.4697448 ,  0.11460811],
       [ 0.26090555,  0.96536433,  0.91648305],
       [ 0.58271501,  0.81267657,  0.30796395]])
    """
    if isinstance(line, pd.DataFrame):
        line = line.values
    try:
        n = np.sqrt(line[:, 0] ** 2 + line[:, 1] ** 2).reshape(-1, 1)
    except:
        n = np.sqrt(line[0] ** 2 + line[1] ** 2)
    line = line / n
    return line




[docs]
def getnearest(iterable, value):
    """
    Given an iterable, get the index nearest to the input value

    Parameters
    ----------
    iterable : iterable
               An iterable to search

    value : int, float
            The value to search for

    Returns
    -------
        : int
          The index into the list
    """
    return min(enumerate(iterable), key=lambda i: abs(i[1] - value))




[docs]
def checkbandnumbers(bands, checkbands):
    """
    Given a list of input bands, check that the passed
    tuple contains those bands.

    In case of THEMIS, we check for band 9 as band 9 is the temperature
    band required to derive thermal temperature.  We also check for band 10
    which is required for TES atmosphere calculations.

    Parameters
    ----------
    bands : tuple
            of bands in the input image
    checkbands : list
                 of bands to check against

    Returns
    -------
     : bool
       True if the bands are present, else False
    """
    for c in checkbands:
        if c not in bands:
            return False
    return True




[docs]
def checkdeplaid(incidence):
    """
    Given an incidence angle, select the appropriate deplaid method.

    Parameters
    ----------
    incidence : float
                incidence angle extracted from the campt results.

    """
    if incidence >= 95 and incidence <= 180:
        return "night"
    elif incidence >= 90 and incidence < 95:
        return "night"
    elif incidence >= 85 and incidence < 90:
        return "day"
    elif incidence >= 0 and incidence < 85:
        return "day"
    else:
        return False




[docs]
def checkmonotonic(iterable, piecewise=False):
    """
    Check if a given iterable is monotonically increasing.

    Parameters
    ----------
    iterable : iterable
                Any Python iterable object

    piecewise : boolean
                If false, return a boolean for the entire iterable,
                else return a list with elementwise monotinicy checks

    Returns
    -------
    monotonic : bool/list
                A boolean list of all True if monotonic, or including
                an inflection point
    """
    monotonic = [True] + [x < y for x, y in zip(iterable, iterable[1:])]
    if piecewise is True:
        return monotonic
    else:
        return all(monotonic)




[docs]
def find_in_dict(obj, key):
    """
    Recursively find an entry in a dictionary

    Parameters
    ----------
    obj : dict
          The dictionary to search
    key : str
          The key to find in the dictionary

    Returns
    -------
    item : obj
           The value from the dictionary
    """
    if key in obj:
        return obj[key]
    for k, v in obj.items():
        if isinstance(v, dict):
            item = find_in_dict(v, key)
            if item is not None:
                return item




[docs]
def find_nested_in_dict(data, key_list):
    """
    Traverse a list of keys into a dict.

    Parameters
    ----------
    data : dict
           The dictionary to be traversed
    key_list: list
              The list of keys to be travered.  Keys are
              traversed in the order they are entered in
              the list

    Returns
    -------
    value : object
            The value in the dict
    """
    return reduce(lambda d, k: d[k], key_list, data)




[docs]
def make_homogeneous(points):
    """
    Convert a set of points (n x dim array) to
        homogeneous coordinates.

    Parameters
    ----------
    points : ndarray
             n x m array of points, where n is the number
             of points.

    Returns
    -------
     : ndarray
       n x m + 1 array of homogeneous points
    """
    return np.hstack((points, np.ones((points.shape[0], 1))))




[docs]
def remove_field_name(a, name):
    """
    Given a numpy structured array, remove a column and return
    a copy of the remainder of the array

    Parameters
    ----------
    a : ndarray
        Numpy structured array

    name : str
           of the index (column) to be removed

    Returns
    -------
    b : ndarray
        Numpy structured array with the 'name' column removed
    """
    names = list(a.dtype.names)
    if name in names:
        names.remove(name)
    b = a[names]
    return b



# This function finds rows of the data frame where a specified column has
# values matching a specified set of values
# (Useful for extracting folds)

[docs]
def rows_match(df, column_name, isin_array, invert=False):
    if invert:
        new_df = df.loc[-df[column_name].isin(isin_array)]
    else:
        new_df = df.loc[df[column_name].isin(isin_array)]
    return new_df



# This function takes the sum of data over two specified wavelength ranges,
# calculates the ratio of the sums, and adds the ratio as a column in the
# data frame

[docs]
def ratio(df, range1, range2, rationame="", spect_label="wvl"):
    cols = df[spect_label].columns.values
    cols1 = cols[(cols >= range1[0]) & (cols <= range1[1])]
    cols2 = cols[(cols >= range2[0]) * (cols <= range2[1])]

    df1 = df[spect_label].loc[:, cols1]
    df2 = df[spect_label].loc[:, cols2]

    sum1 = df1.sum(axis=1)
    sum2 = df2.sum(axis=1)

    ratio = sum1 / sum2

    df[("ratio", rationame)] = ratio
    return df




[docs]
def col_within_range(self, rangevals, col, meta_label="meta"):
    mask = (self.df[(meta_label, col)] > rangevals[0]) & (
        self.df[(meta_label, col)] < rangevals[1]
    )
    return self.df.loc[mask]




[docs]
def enumerate_duplicates(df, col, meta_label="meta"):
    rows = df[(meta_label, col)]
    rows = rows.fillna("-")
    rows = [str(x) for x in rows]
    unique_rows = np.unique(rows)
    rows = np.array(rows)
    rows_list = list(rows)
    for i in unique_rows:
        if i != "-":
            matchindex = np.where(rows == i)[0]

            if len(matchindex) > 1:
                for n, name in enumerate(rows[matchindex]):
                    rows_list[matchindex[n]] = i + " - " + str(n + 1)

    df[(meta_label, col)] = rows_list
    return df




[docs]
def remove_rows(df, matching_values, spect_label="wvl"):
    # matching values should be a list of dicts. Each dict should have the
    # keys:
    # 'column': the column to match on
    # 'operator': the mathematical operator to evaluate the column on
    # 'values': the value with which to compare the column elements to
    # identify matches
    matches = []
    for val in matching_values:
        matches.append(
            identify_rows(
                df,
                val["column"],
                val["operator"],
                val["value"],
                spect_label=spect_label,
            )
        )
    match_combined = np.all(matches, axis=0)
    print("Shape before removing rows: " + str(df.shape))
    df = df.iloc[~match_combined]
    print("Shape after removing rows: " + str(df.shape))
    return df



# this function is used by Remove Rows to identify the rows matching a given
# operator

[docs]
def identify_rows(df, colname, operator, value, spect_label="wvl"):
    vars_level0 = df.columns.get_level_values(0)
    vars_level1 = df.columns.get_level_values(1)
    vars_level1 = list(vars_level1[vars_level0 != spect_label])
    vars_level0 = list(vars_level0[vars_level0 != spect_label])
    colname = (vars_level0[vars_level1.index(colname)], colname)
    coldata = np.array(df[colname])

    match = coldata == coldata
    if operator == "=":
        try:
            match = np.array(coldata, dtype=float) == float(value)
        except:
            match = coldata == value
    elif operator == ">":
        try:
            match = np.array(coldata, dtype=float) > float(value)
        except:
            match = coldata > value
    elif operator == "<":
        try:
            match = np.array(coldata, dtype=float) < float(value)
        except:
            match = coldata < value
    elif operator == "<=":
        try:
            match = np.array(coldata, dtype=float) <= float(value)
        except:
            match = coldata <= value
    elif operator == ">=":
        try:
            match = np.array(coldata, dtype=float) >= float(value)
        except:
            match = coldata >= value
    else:
        pass

    return match