import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
# This function flags outliers in a spectral data set. Different algorithms
# can be selected.
[docs]
def outlier_identify(df, col, method, params, meta_label="meta"):
assert method in ["Isolation Forest", "Local Outlier Factor"]
if method == "Isolation Forest":
# Build object
obj = IsolationForest(**params)
# Perform fit
fit = obj.fit(np.array(df[col]))
# Get outlier scores
outlier_scores = fit.decision_function(np.array(df[col]))
# Save outlier scores to dataset
df[(meta_label, "Outlier Scores - " + method + str(params))] = outlier_scores
# Get outlier predictions
is_outlier = obj.predict(np.array(df[col]))
# Save predictions
df[(meta_label, "Outliers - " + method + str(params))] = is_outlier
elif method == "Local Outlier Factor":
# Build object
obj = LocalOutlierFactor(**params)
# Perform fit
fit = obj.fit(np.array(df[col]))
# Get predictions
is_outlier = obj.fit_predict(np.array(df[col]))
# Save predictions and outlier factors
df[
(meta_label, "Outlier Factor - " + method + str(params))
] = obj.negative_outlier_factor_
df[(meta_label, "Outliers - " + method + str(params))] = is_outlier
return df