Source code for libpyhat.utils.utils

from functools import reduce

import numpy as np
import pandas as pd


[docs] def crossform(a): """ Return the cross form, e.g. a in the cross product of a b. Parameters ---------- a : ndarray (3,) vector Returns ------- a : ndarray (3,3) """ return np.array([[0, -a[2], a[1]], [a[2], 0, -a[0]], [-a[1], a[0], 0]])
[docs] def normalize_vector(line): """ Normalize a standard form line Parameters ---------- line : ndarray Standard form of a line (Ax + By + C = 0) Returns ------- line : ndarray The normalized line Examples -------- >>> x = np.random.random((3,3)) >>> normalize_vector(x) array([[ 0.88280225, 0.4697448 , 0.11460811], [ 0.26090555, 0.96536433, 0.91648305], [ 0.58271501, 0.81267657, 0.30796395]]) """ if isinstance(line, pd.DataFrame): line = line.values try: n = np.sqrt(line[:, 0] ** 2 + line[:, 1] ** 2).reshape(-1, 1) except: n = np.sqrt(line[0] ** 2 + line[1] ** 2) line = line / n return line
[docs] def getnearest(iterable, value): """ Given an iterable, get the index nearest to the input value Parameters ---------- iterable : iterable An iterable to search value : int, float The value to search for Returns ------- : int The index into the list """ return min(enumerate(iterable), key=lambda i: abs(i[1] - value))
[docs] def checkbandnumbers(bands, checkbands): """ Given a list of input bands, check that the passed tuple contains those bands. In case of THEMIS, we check for band 9 as band 9 is the temperature band required to derive thermal temperature. We also check for band 10 which is required for TES atmosphere calculations. Parameters ---------- bands : tuple of bands in the input image checkbands : list of bands to check against Returns ------- : bool True if the bands are present, else False """ for c in checkbands: if c not in bands: return False return True
[docs] def checkdeplaid(incidence): """ Given an incidence angle, select the appropriate deplaid method. Parameters ---------- incidence : float incidence angle extracted from the campt results. """ if incidence >= 95 and incidence <= 180: return "night" elif incidence >= 90 and incidence < 95: return "night" elif incidence >= 85 and incidence < 90: return "day" elif incidence >= 0 and incidence < 85: return "day" else: return False
[docs] def checkmonotonic(iterable, piecewise=False): """ Check if a given iterable is monotonically increasing. Parameters ---------- iterable : iterable Any Python iterable object piecewise : boolean If false, return a boolean for the entire iterable, else return a list with elementwise monotinicy checks Returns ------- monotonic : bool/list A boolean list of all True if monotonic, or including an inflection point """ monotonic = [True] + [x < y for x, y in zip(iterable, iterable[1:])] if piecewise is True: return monotonic else: return all(monotonic)
[docs] def find_in_dict(obj, key): """ Recursively find an entry in a dictionary Parameters ---------- obj : dict The dictionary to search key : str The key to find in the dictionary Returns ------- item : obj The value from the dictionary """ if key in obj: return obj[key] for k, v in obj.items(): if isinstance(v, dict): item = find_in_dict(v, key) if item is not None: return item
[docs] def find_nested_in_dict(data, key_list): """ Traverse a list of keys into a dict. Parameters ---------- data : dict The dictionary to be traversed key_list: list The list of keys to be travered. Keys are traversed in the order they are entered in the list Returns ------- value : object The value in the dict """ return reduce(lambda d, k: d[k], key_list, data)
[docs] def make_homogeneous(points): """ Convert a set of points (n x dim array) to homogeneous coordinates. Parameters ---------- points : ndarray n x m array of points, where n is the number of points. Returns ------- : ndarray n x m + 1 array of homogeneous points """ return np.hstack((points, np.ones((points.shape[0], 1))))
[docs] def remove_field_name(a, name): """ Given a numpy structured array, remove a column and return a copy of the remainder of the array Parameters ---------- a : ndarray Numpy structured array name : str of the index (column) to be removed Returns ------- b : ndarray Numpy structured array with the 'name' column removed """ names = list(a.dtype.names) if name in names: names.remove(name) b = a[names] return b
# This function finds rows of the data frame where a specified column has # values matching a specified set of values # (Useful for extracting folds)
[docs] def rows_match(df, column_name, isin_array, invert=False): if invert: new_df = df.loc[-df[column_name].isin(isin_array)] else: new_df = df.loc[df[column_name].isin(isin_array)] return new_df
# This function takes the sum of data over two specified wavelength ranges, # calculates the ratio of the sums, and adds the ratio as a column in the # data frame
[docs] def ratio(df, range1, range2, rationame="", spect_label="wvl"): cols = df[spect_label].columns.values cols1 = cols[(cols >= range1[0]) & (cols <= range1[1])] cols2 = cols[(cols >= range2[0]) * (cols <= range2[1])] df1 = df[spect_label].loc[:, cols1] df2 = df[spect_label].loc[:, cols2] sum1 = df1.sum(axis=1) sum2 = df2.sum(axis=1) ratio = sum1 / sum2 df[("ratio", rationame)] = ratio return df
[docs] def col_within_range(self, rangevals, col, meta_label="meta"): mask = (self.df[(meta_label, col)] > rangevals[0]) & ( self.df[(meta_label, col)] < rangevals[1] ) return self.df.loc[mask]
[docs] def enumerate_duplicates(df, col, meta_label="meta"): rows = df[(meta_label, col)] rows = rows.fillna("-") rows = [str(x) for x in rows] unique_rows = np.unique(rows) rows = np.array(rows) rows_list = list(rows) for i in unique_rows: if i != "-": matchindex = np.where(rows == i)[0] if len(matchindex) > 1: for n, name in enumerate(rows[matchindex]): rows_list[matchindex[n]] = i + " - " + str(n + 1) df[(meta_label, col)] = rows_list return df
[docs] def remove_rows(df, matching_values, spect_label="wvl"): # matching values should be a list of dicts. Each dict should have the # keys: # 'column': the column to match on # 'operator': the mathematical operator to evaluate the column on # 'values': the value with which to compare the column elements to # identify matches matches = [] for val in matching_values: matches.append( identify_rows( df, val["column"], val["operator"], val["value"], spect_label=spect_label, ) ) match_combined = np.all(matches, axis=0) print("Shape before removing rows: " + str(df.shape)) df = df.iloc[~match_combined] print("Shape after removing rows: " + str(df.shape)) return df
# this function is used by Remove Rows to identify the rows matching a given # operator
[docs] def identify_rows(df, colname, operator, value, spect_label="wvl"): vars_level0 = df.columns.get_level_values(0) vars_level1 = df.columns.get_level_values(1) vars_level1 = list(vars_level1[vars_level0 != spect_label]) vars_level0 = list(vars_level0[vars_level0 != spect_label]) colname = (vars_level0[vars_level1.index(colname)], colname) coldata = np.array(df[colname]) match = coldata == coldata if operator == "=": try: match = np.array(coldata, dtype=float) == float(value) except: match = coldata == value elif operator == ">": try: match = np.array(coldata, dtype=float) > float(value) except: match = coldata > value elif operator == "<": try: match = np.array(coldata, dtype=float) < float(value) except: match = coldata < value elif operator == "<=": try: match = np.array(coldata, dtype=float) <= float(value) except: match = coldata <= value elif operator == ">=": try: match = np.array(coldata, dtype=float) >= float(value) except: match = coldata >= value else: pass return match