from fastai.tabular.all import *
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')
from bayes_opt import BayesianOptimization
def fit_with(lr:float, wd:float, dp:float):
# create a Learner
config = tabular_config(embed_p=dp, ps=wd)
learn = tabular_learner(data, layers=[200,100], metrics=accuracy, config=config)
# Train for x epochs
with learn.no_bar():
learn.fit_one_cycle(3, lr)
# Save, print, and return the overall accuracy
acc = float(learn.validate()[1])
return acc
Let's adjust this further to show how we would go about adjusting the learning rate, embedded weight decay, drop out, and layer size:
def fit_with(lr:float, wd:float, dp:float, n_layers:float, layer_1:float, layer_2:float, layer_3:float):
print(lr, wd, dp)
if round(n_layers) == 2:
layers = [round(layer_1), round(layer_2)]
elif int(n_layers) == 3:
layers = [round(layer_1), round(layer_2), round(layer_3)]
else:
layers = [round(layer_1)]
config = tabular_config(embed_p=float(dp),
ps=float(wd))
learn = tabular_learner(dls, layers=layers, metrics=accuracy, config = config)
with learn.no_bar() and learn.no_logging():
learn.fit(5, lr=float(lr))
acc = float(learn.validate()[1])
return acc
Let's try it out
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]
y_names = 'salary'
y_block = CategoryBlock()
splits = RandomSplitter()(range_of(df))
to = TabularPandas(df, procs=procs, cat_names=cat_names, cont_names=cont_names,
y_names=y_names, y_block=y_block, splits=splits)
dls = to.dataloaders(bs=512)
We'll declare our hyper-parameters:
hps = {'lr': (1e-05, 1e-01),
'wd': (4e-4, 0.4),
'dp': (0.01, 0.5),
'n_layers': (1,3),
'layer_1': (50, 200),
'layer_2': (100, 1000),
'layer_3': (200, 2000)}
And now we build the optimizer:
optim = BayesianOptimization(
f = fit_with, # our fit function
pbounds = hps, # our hyper parameters to tune
verbose = 2, # 1 prints out when a maximum is observed, 0 for silent
random_state=1
)
And now we can search!
%time optim.maximize(n_iter=10)
We can grab the best results:
print(optim.max)
And with a few conversions we see:
- The best number of layers was 2
- The first layer a size of 57
- The second layer a size of 100 And then of course our other hyper paramters