Submit
Path:
~
/
/
opt
/
alt
/
python35
/
share
/
doc
/
alt-python35-scikit-learn-0.18.1
/
examples
/
cluster
/
File Content:
plot_mini_batch_kmeans.py
""" ==================================================================== Comparison of the K-Means and MiniBatchKMeans clustering algorithms ==================================================================== We want to compare the performance of the MiniBatchKMeans and KMeans: the MiniBatchKMeans is faster, but gives slightly different results (see :ref:`mini_batch_kmeans`). We will cluster a set of data, first with KMeans and then with MiniBatchKMeans, and plot the results. We will also plot the points that are labelled differently between the two algorithms. """ print(__doc__) import time import numpy as np import matplotlib.pyplot as plt from sklearn.cluster import MiniBatchKMeans, KMeans from sklearn.metrics.pairwise import pairwise_distances_argmin from sklearn.datasets.samples_generator import make_blobs ############################################################################## # Generate sample data np.random.seed(0) batch_size = 45 centers = [[1, 1], [-1, -1], [1, -1]] n_clusters = len(centers) X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7) ############################################################################## # Compute clustering with Means k_means = KMeans(init='k-means++', n_clusters=3, n_init=10) t0 = time.time() k_means.fit(X) t_batch = time.time() - t0 ############################################################################## # Compute clustering with MiniBatchKMeans mbk = MiniBatchKMeans(init='k-means++', n_clusters=3, batch_size=batch_size, n_init=10, max_no_improvement=10, verbose=0) t0 = time.time() mbk.fit(X) t_mini_batch = time.time() - t0 ############################################################################## # Plot result fig = plt.figure(figsize=(8, 3)) fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9) colors = ['#4EACC5', '#FF9C34', '#4E9A06'] # We want to have the same colors for the same cluster from the # MiniBatchKMeans and the KMeans algorithm. Let's pair the cluster centers per # closest one. k_means_cluster_centers = np.sort(k_means.cluster_centers_, axis=0) mbk_means_cluster_centers = np.sort(mbk.cluster_centers_, axis=0) k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers) mbk_means_labels = pairwise_distances_argmin(X, mbk_means_cluster_centers) order = pairwise_distances_argmin(k_means_cluster_centers, mbk_means_cluster_centers) # KMeans ax = fig.add_subplot(1, 3, 1) for k, col in zip(range(n_clusters), colors): my_members = k_means_labels == k cluster_center = k_means_cluster_centers[k] ax.plot(X[my_members, 0], X[my_members, 1], 'w', markerfacecolor=col, marker='.') ax.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6) ax.set_title('KMeans') ax.set_xticks(()) ax.set_yticks(()) plt.text(-3.5, 1.8, 'train time: %.2fs\ninertia: %f' % ( t_batch, k_means.inertia_)) # MiniBatchKMeans ax = fig.add_subplot(1, 3, 2) for k, col in zip(range(n_clusters), colors): my_members = mbk_means_labels == order[k] cluster_center = mbk_means_cluster_centers[order[k]] ax.plot(X[my_members, 0], X[my_members, 1], 'w', markerfacecolor=col, marker='.') ax.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6) ax.set_title('MiniBatchKMeans') ax.set_xticks(()) ax.set_yticks(()) plt.text(-3.5, 1.8, 'train time: %.2fs\ninertia: %f' % (t_mini_batch, mbk.inertia_)) # Initialise the different array to all False different = (mbk_means_labels == 4) ax = fig.add_subplot(1, 3, 3) for k in range(n_clusters): different += ((k_means_labels == k) != (mbk_means_labels == order[k])) identic = np.logical_not(different) ax.plot(X[identic, 0], X[identic, 1], 'w', markerfacecolor='#bbbbbb', marker='.') ax.plot(X[different, 0], X[different, 1], 'w', markerfacecolor='m', marker='.') ax.set_title('Difference') ax.set_xticks(()) ax.set_yticks(()) plt.show()
Submit
FILE
FOLDER
Name
Size
Permission
Action
README.txt
101 bytes
0644
plot_adjusted_for_chance_measures.py
4300 bytes
0644
plot_affinity_propagation.py
2304 bytes
0644
plot_agglomerative_clustering.py
2931 bytes
0644
plot_agglomerative_clustering_metrics.py
4492 bytes
0644
plot_birch_vs_minibatchkmeans.py
3694 bytes
0644
plot_cluster_comparison.py
4681 bytes
0644
plot_cluster_iris.py
2593 bytes
0644
plot_color_quantization.py
3444 bytes
0644
plot_dbscan.py
2479 bytes
0644
plot_dict_face_patches.py
2747 bytes
0644
plot_digits_agglomeration.py
1694 bytes
0644
plot_digits_linkage.py
2959 bytes
0644
plot_face_compress.py
2479 bytes
0644
plot_face_segmentation.py
2839 bytes
0644
plot_face_ward_segmentation.py
2460 bytes
0644
plot_feature_agglomeration_vs_univariate_selection.py
3903 bytes
0644
plot_kmeans_assumptions.py
2040 bytes
0644
plot_kmeans_digits.py
4524 bytes
0644
plot_kmeans_silhouette_analysis.py
5888 bytes
0644
plot_kmeans_stability_low_dim_dense.py
4324 bytes
0644
plot_mean_shift.py
1793 bytes
0644
plot_mini_batch_kmeans.py
4092 bytes
0644
plot_segmentation_toy.py
3522 bytes
0644
plot_ward_structured_vs_unstructured.py
3369 bytes
0644
N4ST4R_ID | Naxtarrr