Submit
Path:
~
/
/
opt
/
alt
/
python35
/
share
/
doc
/
alt-python35-scikit-learn-0.18.1
/
examples
/
cluster
/
File Content:
plot_dict_face_patches.py
""" Online learning of a dictionary of parts of faces ================================================== This example uses a large dataset of faces to learn a set of 20 x 20 images patches that constitute faces. From the programming standpoint, it is interesting because it shows how to use the online API of the scikit-learn to process a very large dataset by chunks. The way we proceed is that we load an image at a time and extract randomly 50 patches from this image. Once we have accumulated 500 of these patches (using 10 images), we run the `partial_fit` method of the online KMeans object, MiniBatchKMeans. The verbose setting on the MiniBatchKMeans enables us to see that some clusters are reassigned during the successive calls to partial-fit. This is because the number of patches that they represent has become too low, and it is better to choose a random new cluster. """ print(__doc__) import time import matplotlib.pyplot as plt import numpy as np from sklearn import datasets from sklearn.cluster import MiniBatchKMeans from sklearn.feature_extraction.image import extract_patches_2d faces = datasets.fetch_olivetti_faces() ############################################################################### # Learn the dictionary of images print('Learning the dictionary... ') rng = np.random.RandomState(0) kmeans = MiniBatchKMeans(n_clusters=81, random_state=rng, verbose=True) patch_size = (20, 20) buffer = [] index = 1 t0 = time.time() # The online learning part: cycle over the whole dataset 6 times index = 0 for _ in range(6): for img in faces.images: data = extract_patches_2d(img, patch_size, max_patches=50, random_state=rng) data = np.reshape(data, (len(data), -1)) buffer.append(data) index += 1 if index % 10 == 0: data = np.concatenate(buffer, axis=0) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) kmeans.partial_fit(data) buffer = [] if index % 100 == 0: print('Partial fit of %4i out of %i' % (index, 6 * len(faces.images))) dt = time.time() - t0 print('done in %.2fs.' % dt) ############################################################################### # Plot the results plt.figure(figsize=(4.2, 4)) for i, patch in enumerate(kmeans.cluster_centers_): plt.subplot(9, 9, i + 1) plt.imshow(patch.reshape(patch_size), cmap=plt.cm.gray, interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.suptitle('Patches of faces\nTrain time %.1fs on %d patches' % (dt, 8 * len(faces.images)), fontsize=16) plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) plt.show()
Submit
FILE
FOLDER
Name
Size
Permission
Action
README.txt
101 bytes
0644
plot_adjusted_for_chance_measures.py
4300 bytes
0644
plot_affinity_propagation.py
2304 bytes
0644
plot_agglomerative_clustering.py
2931 bytes
0644
plot_agglomerative_clustering_metrics.py
4492 bytes
0644
plot_birch_vs_minibatchkmeans.py
3694 bytes
0644
plot_cluster_comparison.py
4681 bytes
0644
plot_cluster_iris.py
2593 bytes
0644
plot_color_quantization.py
3444 bytes
0644
plot_dbscan.py
2479 bytes
0644
plot_dict_face_patches.py
2747 bytes
0644
plot_digits_agglomeration.py
1694 bytes
0644
plot_digits_linkage.py
2959 bytes
0644
plot_face_compress.py
2479 bytes
0644
plot_face_segmentation.py
2839 bytes
0644
plot_face_ward_segmentation.py
2460 bytes
0644
plot_feature_agglomeration_vs_univariate_selection.py
3903 bytes
0644
plot_kmeans_assumptions.py
2040 bytes
0644
plot_kmeans_digits.py
4524 bytes
0644
plot_kmeans_silhouette_analysis.py
5888 bytes
0644
plot_kmeans_stability_low_dim_dense.py
4324 bytes
0644
plot_mean_shift.py
1793 bytes
0644
plot_mini_batch_kmeans.py
4092 bytes
0644
plot_segmentation_toy.py
3522 bytes
0644
plot_ward_structured_vs_unstructured.py
3369 bytes
0644
N4ST4R_ID | Naxtarrr