Test function for KNN regression feature importance¶
We generate test data for KNN regression. The goal is to provide a data set, which has relevant and irrelevant features for regression. We use a Friedman #1 problem and add zeros and random data. We optimize the selection of features with an SAES.
The optimal parameters are (0=True, 1=False, 2=False, 3=False, 4=False, 5=True, 6=True, 7=False, 8=False, 9=False, 10=True, 11=True, 12=True, 13=True, 14=True).
# Future
from __future__ import absolute_import, division, print_function, \
unicode_literals, with_statement
# First party
from metaopt.core.returnspec.util.decorator import minimize
from metaopt.core.paramspec.util import param
from metaopt.core.optimize.optimize import optimize
# Third Party
from sklearn.datasets import make_friedman1
from sklearn.neighbors import KNeighborsRegressor
from sklearn.base import BaseEstimator
from sklearn.base import RegressorMixin
from sklearn.metrics import mean_squared_error as mse
import numpy as np
import math
class KNN(BaseEstimator, RegressorMixin):
"""
K Nearest Neighbors Regression with feature filter
"""
def __init__(self, n_neighbors=5, ff=[True,True,True,True]):
"""
:param n_neighbors: number of nearest neighbors searched
:param ff: feature filter
"""
self.n_neighbors=n_neighbors
self.ff = np.array(ff,dtype=bool)
self.knn=KNeighborsRegressor(n_neighbors=self.n_neighbors)
def fit(self,X,y):
# Choose the selected features
self.knn.fit(X[:,self.ff], y)
def predict(self,X):
# Choose the selected features
return self.knn.predict(X[:,self.ff])
def score(self,X,y):
# Choose the selected features
return mse(self.knn.predict(X[:,self.ff]),y)
n_samples=1000
n_informative=5
n_zeros=5
n_random=5
n_features=n_informative+n_zeros+n_random
# Generate a regression problem data set
# Inputs X are independent features uniformly distributed on the interval [0, 1]. The output y is created according to the formula:
# y(X) = 10 * sin(pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - 0.5) ** 2 + 10 * X[:, 3] + 5 * X[:, 4] + noise * N(0, 1).
X,Y = make_friedman1(n_samples=n_samples, n_features=n_informative, noise=0.0, random_state=13)
X=np.array(X)
# Fill up the data with zeros
Z1=np.zeros((n_samples, n_zeros))
X=np.hstack((X,Z1))
# Fill up the data with random
Z2=np.random.random((n_samples, n_random))
X=np.hstack((X,Z2))
train_to, test_to = int(math.floor(len(X) * 0.5)), len(X)
X_train=X[:train_to:1]
y_train=Y[:train_to:1]
X_test=X[train_to:test_to:1]
y_test=Y[train_to:test_to:1]
"""
Evaluate the KNN regression using the featrue filter ff
"""
@minimize("KNN Regression MSE")
@param.multi(param.bool, map(str,range(n_features)))
def f(**kwargs):
"""
:param ff: feature filter
"""
clf = KNN(n_neighbors=5, ff=kwargs.values())
clf.fit(X_train, y_train)
return clf.score(X_test, y_test)
def main():
from metaopt.optimizer.saes import SAESOptimizer
from metaopt.plugin.visualization.best_fitness import VisualizeBestFitnessPlugin
timeout = 10
optimizer = SAESOptimizer(mu=5, lamb=5)
visualize_best_fitness_plugin = VisualizeBestFitnessPlugin()
plugins = [
visualize_best_fitness_plugin
]
optimum = optimize(f=f, timeout=timeout, optimizer=optimizer,
plugins=plugins)
print("The optimal parameters are %s." % str(optimum))
visualize_best_fitness_plugin.show_fitness_invocations_plot()
visualize_best_fitness_plugin.show_fitness_time_plot()
if __name__ == '__main__':
main()