| | import numpy as np |
| | import matplotlib.pyplot as plt |
| | from threading import Thread |
| | from matplotlib.colors import ListedColormap |
| | from sklearn.datasets import make_moons, make_circles, make_classification |
| | from sklearn.datasets import make_blobs, make_circles, make_moons |
| | import gradio as gr |
| | import math |
| | from functools import partial |
| | import time |
| |
|
| | import matplotlib |
| |
|
| | from sklearn import svm |
| | from sklearn.datasets import make_moons, make_blobs |
| | from sklearn.covariance import EllipticEnvelope |
| | from sklearn.ensemble import IsolationForest |
| | from sklearn.neighbors import LocalOutlierFactor |
| | from sklearn.linear_model import SGDOneClassSVM |
| | from sklearn.kernel_approximation import Nystroem |
| | from sklearn.pipeline import make_pipeline |
| |
|
| | def get_groundtruth_model(X, labels): |
| | |
| | class Dummy: |
| | def __init__(self, y): |
| | self.labels_ = labels |
| |
|
| | return Dummy(labels) |
| |
|
| | |
| | FIGSIZE = 10,10 |
| | figure = plt.figure(figsize=(25, 10)) |
| |
|
| |
|
| | def train_models(input_data, outliers_fraction, n_samples, clf_name): |
| | n_outliers = int(outliers_fraction * n_samples) |
| | n_inliers = n_samples - n_outliers |
| | blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2) |
| | NAME_CLF_MAPPING = {"Robust covariance": EllipticEnvelope(contamination=outliers_fraction), |
| | "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1), |
| | "One-Class SVM (SGD)":make_pipeline( |
| | Nystroem(gamma=0.1, random_state=42, n_components=150), |
| | SGDOneClassSVM( |
| | nu=outliers_fraction, |
| | shuffle=True, |
| | fit_intercept=True, |
| | random_state=42, |
| | tol=1e-6, |
| | ), |
| | ), |
| | "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42), |
| | "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction), |
| | } |
| | DATA_MAPPING = { |
| | "Central Blob":make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0], |
| | "Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0], |
| | "Blob with Noise": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0], |
| | "Moons": 4.0 |
| | * ( |
| | make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0] |
| | - np.array([0.5, 0.25]) |
| | ), |
| | "Noise": 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5), |
| | } |
| | DATASETS = [ |
| | make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0], |
| | make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0], |
| | make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0], |
| | 4.0 |
| | * ( |
| | make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0] |
| | - np.array([0.5, 0.25]) |
| | ), |
| | 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5), |
| | ] |
| | |
| | xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150)) |
| | clf = NAME_CLF_MAPPING[clf_name] |
| | plt.figure(figsize=(len(NAME_CLF_MAPPING) * 2 + 4, 12.5)) |
| |
|
| |
|
| | plot_num = 1 |
| | rng = np.random.RandomState(42) |
| | X = DATA_MAPPING[input_data] |
| | X = np.concatenate([X, rng.uniform(low=-6, high=6, size=(n_outliers, 2))], axis=0) |
| | |
| | t0 = time.time() |
| | clf.fit(X) |
| | t1 = time.time() |
| | |
| | if clf_name == "Local Outlier Factor": |
| | y_pred = clf.fit_predict(X) |
| | else: |
| | y_pred = clf.fit(X).predict(X) |
| |
|
| | |
| | if clf_name != "Local Outlier Factor": |
| | Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) |
| | Z = Z.reshape(xx.shape) |
| | plt.contour(xx, yy, Z, levels=[0], linewidths=10, colors="black") |
| |
|
| | colors = np.array(["#377eb8", "#ff7f00"]) |
| | plt.scatter(X[:, 0], X[:, 1], s=100, color=colors[(y_pred + 1) // 2]) |
| |
|
| | plt.xlim(-7, 7) |
| | plt.ylim(-7, 7) |
| | plt.xticks(()) |
| | plt.yticks(()) |
| | plt.text( |
| | 0.99, |
| | 0.01, |
| | ("%.2fs" % (t1 - t0)).lstrip("0"), |
| | transform=plt.gca().transAxes, |
| | size=60, |
| | horizontalalignment="right", |
| | ) |
| | plot_num += 1 |
| |
|
| | return plt |
| |
|
| | description = "Learn how different anomaly detection algorithms perform in different datasets." |
| |
|
| | def iter_grid(n_rows, n_cols): |
| | |
| | for _ in range(n_rows): |
| | with gr.Row(): |
| | for _ in range(n_cols): |
| | with gr.Column(): |
| | yield |
| |
|
| | title = "🕵️♀️ compare anomaly detection algorithms 🕵️♂️" |
| | with gr.Blocks() as demo: |
| | gr.Markdown(f"## {title}") |
| | gr.Markdown(description) |
| |
|
| | input_models = ["Robust covariance","One-Class SVM","One-Class SVM (SGD)","Isolation Forest", |
| | "Local Outlier Factor"] |
| | input_data = gr.Radio( |
| | choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"], |
| | value="Moons" |
| | ) |
| | n_samples = gr.Slider(minimum=100, maximum=500, step=25, label="Number of Samples") |
| | outliers_fraction = gr.Slider(minimum=0.1, maximum=0.9, step=0.1, label="Fraction of Outliers") |
| | counter = 0 |
| |
|
| |
|
| | for _ in iter_grid(5, 5): |
| | if counter >= len(input_models): |
| | break |
| |
|
| | input_model = input_models[counter] |
| | plot = gr.Plot(label=input_model) |
| | fn = partial(train_models, clf_name=input_model) |
| | input_data.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot) |
| | n_samples.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot) |
| | outliers_fraction.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot) |
| | counter += 1 |
| |
|
| | demo.launch(enable_queue=True, debug=True) |
| |
|
| |
|