Submit
Path:
~
/
/
proc
/
thread-self
/
root
/
opt
/
alt
/
python35
/
share
/
doc
/
alt-python35-scikit-learn-0.18.1
/
examples
/
File Content:
feature_stacker.py
""" ================================================= Concatenating multiple feature extraction methods ================================================= In many real-world examples, there are many ways to extract features from a dataset. Often it is beneficial to combine several methods to obtain good performance. This example shows how to use ``FeatureUnion`` to combine features obtained by PCA and univariate selection. Combining features using this transformer has the benefit that it allows cross validation and grid searches over the whole process. The combination used in this example is not particularly helpful on this dataset and is only used to illustrate the usage of FeatureUnion. """ # Author: Andreas Mueller <amueller@ais.uni-bonn.de> # # License: BSD 3 clause from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.decomposition import PCA from sklearn.feature_selection import SelectKBest iris = load_iris() X, y = iris.data, iris.target # This dataset is way too high-dimensional. Better do PCA: pca = PCA(n_components=2) # Maybe some original features where good, too? selection = SelectKBest(k=1) # Build estimator from PCA and Univariate selection: combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)]) # Use combined features to transform dataset: X_features = combined_features.fit(X, y).transform(X) svm = SVC(kernel="linear") # Do grid search over k, n_components and C: pipeline = Pipeline([("features", combined_features), ("svm", svm)]) param_grid = dict(features__pca__n_components=[1, 2, 3], features__univ_select__k=[1, 2], svm__C=[0.1, 1, 10]) grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10) grid_search.fit(X, y) print(grid_search.best_estimator_)
Submit
FILE
FOLDER
Name
Size
Permission
Action
applications
---
0755
bicluster
---
0755
calibration
---
0755
classification
---
0755
cluster
---
0755
covariance
---
0755
cross_decomposition
---
0755
datasets
---
0755
decomposition
---
0755
ensemble
---
0755
exercises
---
0755
feature_selection
---
0755
gaussian_process
---
0755
linear_model
---
0755
manifold
---
0755
mixture
---
0755
model_selection
---
0755
neighbors
---
0755
neural_networks
---
0755
preprocessing
---
0755
semi_supervised
---
0755
svm
---
0755
text
---
0755
tree
---
0755
README.txt
116 bytes
0644
feature_stacker.py
1911 bytes
0644
hetero_feature_union.py
6241 bytes
0644
missing_values.py
3055 bytes
0644
plot_compare_reduction.py
2489 bytes
0644
plot_cv_predict.py
799 bytes
0644
plot_digits_pipe.py
1813 bytes
0644
plot_isotonic_regression.py
1767 bytes
0644
plot_johnson_lindenstrauss_bound.py
7474 bytes
0644
plot_kernel_approximation.py
8004 bytes
0644
plot_kernel_ridge_regression.py
6269 bytes
0644
plot_multilabel.py
4157 bytes
0644
plot_multioutput_face_completion.py
3019 bytes
0644
N4ST4R_ID | Naxtarrr