Source code for libpyhat.utils.outlier_identify

import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor


# This function flags outliers in a spectral data set. Different algorithms
# can be selected.


[docs] def outlier_identify(df, col, method, params, meta_label="meta"): assert method in ["Isolation Forest", "Local Outlier Factor"] if method == "Isolation Forest": # Build object obj = IsolationForest(**params) # Perform fit fit = obj.fit(np.array(df[col])) # Get outlier scores outlier_scores = fit.decision_function(np.array(df[col])) # Save outlier scores to dataset df[(meta_label, "Outlier Scores - " + method + str(params))] = outlier_scores # Get outlier predictions is_outlier = obj.predict(np.array(df[col])) # Save predictions df[(meta_label, "Outliers - " + method + str(params))] = is_outlier elif method == "Local Outlier Factor": # Build object obj = LocalOutlierFactor(**params) # Perform fit fit = obj.fit(np.array(df[col])) # Get predictions is_outlier = obj.fit_predict(np.array(df[col])) # Save predictions and outlier factors df[ (meta_label, "Outlier Factor - " + method + str(params)) ] = obj.negative_outlier_factor_ df[(meta_label, "Outliers - " + method + str(params))] = is_outlier return df