==========================================
==========================================
data are Gaussian distributed and performs better than the One-Class SVM
in that case.
- using the One-Class SVM and its ability to capture the shape of the
- using the Isolation Forest algorithm, which is based on random forests and
- using the Local Outlier Factor to measure the local deviation of a given
data point with respect to its neighbors by comparing their local density.
The ground truth about inliers and outliers is given by the points colors
while the orange-filled area indicates which points are reported as inliers
by each method.
Here, we assume that we know the fraction of outliers in the datasets.
Thus rather than using the 'predict' method of the objects, we set the
fraction.
"""
import numpy as np
import matplotlib.font_manager
print(__doc__)
rng = np.random.RandomState(42)
# Example settings
n_samples = 200
outliers_fraction = 0.25
clusters_separation = [0, 1, 2]
# define two outlier detection tools to be compared
classifiers = {
kernel="rbf", gamma=0.1),
contamination=outliers_fraction,
random_state=rng),
n_neighbors=35,
contamination=outliers_fraction)}
ground_truth[-n_outliers:] = -1
np.random.seed(42)
# Data generation
X = np.r_[X1, X2]
# Add outliers
plt.figure(figsize=(9, 7))
y_pred = clf.fit_predict(X)
scores_pred = clf.negative_outlier_factor_
else:
clf.fit(X)
scores_pred = clf.decision_function(X)
y_pred = clf.predict(X)
threshold = stats.scoreatpercentile(scores_pred,
100 * outliers_fraction)
Z = clf._decision_function(np.c_[xx.ravel(), yy.ravel()])
else:
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
subplot = plt.subplot(2, 2, i + 1)
linewidths=2, colors='red')
colors='orange')
s=20, edgecolor='k')
s=20, edgecolor='k')
subplot.axis('tight')
subplot.legend(
[a.collections[0], b, c],
prop=matplotlib.font_manager.FontProperties(size=10),
loc='lower right')
subplot.set_xlim((-7, 7))
subplot.set_ylim((-7, 7))
plt.suptitle("Outlier detection")
plt.show()