Na}{

File Content: feature_stacker.py

"""
=================================================
Concatenating multiple feature extraction methods
=================================================

In many real-world examples, there are many ways to extract features from a
dataset. Often it is beneficial to combine several methods to obtain good
performance. This example shows how to use ``FeatureUnion`` to combine
features obtained by PCA and univariate selection.

Combining features using this transformer has the benefit that it allows
cross validation and grid searches over the whole process.

The combination used in this example is not particularly helpful on this
dataset and is only used to illustrate the usage of FeatureUnion.
"""

# Author: Andreas Mueller <amueller@ais.uni-bonn.de>
#
# License: BSD 3 clause

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

iris = load_iris()

X, y = iris.data, iris.target

# This dataset is way too high-dimensional. Better do PCA:
pca = PCA(n_components=2)

# Maybe some original features where good, too?
selection = SelectKBest(k=1)

# Build estimator from PCA and Univariate selection:

combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])

# Use combined features to transform dataset:
X_features = combined_features.fit(X, y).transform(X)

svm = SVC(kernel="linear")

# Do grid search over k, n_components and C:

pipeline = Pipeline([("features", combined_features), ("svm", svm)])

param_grid = dict(features__pca__n_components=[1, 2, 3],
                  features__univ_select__k=[1, 2],
                  svm__C=[0.1, 1, 10])

grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10)
grid_search.fit(X, y)
print(grid_search.best_estimator_)

Name	Size	Permission
applications	---	0755
bicluster	---	0755
calibration	---	0755
classification	---	0755
cluster	---	0755
covariance	---	0755
cross_decomposition	---	0755
datasets	---	0755
decomposition	---	0755
ensemble	---	0755
exercises	---	0755
feature_selection	---	0755
gaussian_process	---	0755
linear_model	---	0755
manifold	---	0755
mixture	---	0755
model_selection	---	0755
neighbors	---	0755
neural_networks	---	0755
preprocessing	---	0755
semi_supervised	---	0755
svm	---	0755
text	---	0755
tree	---	0755
README.txt	116 bytes	0644
feature_stacker.py	1911 bytes	0644
hetero_feature_union.py	6241 bytes	0644
missing_values.py	3055 bytes	0644
plot_compare_reduction.py	2489 bytes	0644
plot_cv_predict.py	799 bytes	0644
plot_digits_pipe.py	1813 bytes	0644
plot_isotonic_regression.py	1767 bytes	0644
plot_johnson_lindenstrauss_bound.py	7474 bytes	0644
plot_kernel_approximation.py	8004 bytes	0644
plot_kernel_ridge_regression.py	6269 bytes	0644
plot_multilabel.py	4157 bytes	0644
plot_multioutput_face_completion.py	3019 bytes	0644