# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
"""
Convert evaluations from
A Quantile-based Approach for Hyperparameter Transfer Learning
David Salinas Huibin Shen Valerio Perrone
http://proceedings.mlr.press/v119/salinas20a/salinas20a.pdf
"""
import pandas as pd
import numpy as np
from syne_tune.blackbox_repository.blackbox_offline import serialize, BlackboxOffline
from syne_tune.blackbox_repository.conversion_scripts.blackbox_recipe import (
BlackboxRecipe,
)
from syne_tune.blackbox_repository.conversion_scripts.utils import repository_path
import syne_tune.config_space as sp
SHA256_HASH_DEEPAR = "b282b369daedc2986b3e59646d5e0c6bc93ccb3b4fbec02980f47cd89a002605"
SHA256_HASH_XGBOOST = "2f6ebb75b2a95614eb783c8d5874cb1dc9e1859e5081fa31e6e46fc9ba56774d"
[docs]
def download(blackbox: str):
import urllib
root = "https://github.com/geoalgo/A-Quantile-based-Approach-for-Hyperparameter-Transfer-Learning/blob/master/src/blackbox/offline_evaluations/"
urllib.request.urlretrieve(
root + f"{blackbox}.csv.zip?raw=true", repository_path / f"{blackbox}.csv.zip"
)
[docs]
def serialize_deepar():
blackbox = "DeepAR"
download(blackbox=blackbox)
df = pd.read_csv(repository_path / f"{blackbox}.csv.zip")
df["hp_num_layers"] = df.hp_num_layers.apply(np.exp)
df["hp_num_cells"] = df.hp_num_cells.apply(np.exp)
df["hp_dropout_rate"] = df.hp_dropout_rate_log.apply(np.exp)
df["hp_learning_rate"] = df.hp_learning_rate_log.apply(np.exp)
df["hp_num_batches_per_epoch"] = df.hp_num_batches_per_epoch_log.apply(np.exp)
df["hp_context_length_ratio"] = df.hp_context_length_ratio_log.apply(np.exp)
df = df[[col for col in df.columns if not col.endswith("_log")]]
configuration_space = {
"hp_num_layers": sp.randint(lower=2, upper=4),
"hp_num_cells": sp.randint(lower=30, upper=120),
"hp_dropout_rate": sp.uniform(lower=0.01, upper=0.51),
"hp_learning_rate": sp.loguniform(lower=1e-4, upper=1e-2),
"hp_num_cells": sp.lograndint(lower=10, upper=10000),
"hp_context_length_ratio": sp.loguniform(lower=0.05, upper=4),
}
serialize(
{
task: BlackboxOffline(
df_evaluations=df.loc[df.task == task, :],
configuration_space=configuration_space,
objectives_names=[
col for col in df.columns if col.startswith("metric_")
],
)
for task in df.task.unique()
},
path=repository_path / "icml-deepar",
)
[docs]
def serialize_xgboost():
"""
'hp_log2_min_child_weight', 'hp_subsample', 'hp_colsample_bytree',
'hp_log2_gamma', 'hp_log2_lambda', 'hp_eta', 'hp_max_depth_index',
'hp_log2_alpha', 'metric_error', 'blackbox', 'task'
"""
blackbox = "XGBoost"
download(blackbox=blackbox)
df = pd.read_csv(repository_path / f"{blackbox}.csv.zip")
for hp in [
"hp_log2_min_child_weight",
"hp_log2_gamma",
"hp_log2_lambda",
"hp_log2_alpha",
]:
df[hp.replace("_log2", "")] = df[hp].apply(np.exp2)
df = df[[col for col in df.columns if not "_log2" in col]]
configuration_space = {
"hp_subsample": sp.uniform(lower=0.5, upper=1.0),
"hp_colsample_bytree": sp.uniform(
lower=0.3,
upper=1.0,
),
"hp_eta": sp.uniform(lower=0.0, upper=1.0),
"hp_max_depth_index": sp.uniform(
lower=0.0,
upper=12.0,
),
"hp_min_child_weight": sp.loguniform(lower=1e-5, upper=64.0),
"hp_gamma": sp.loguniform(lower=1e-5, upper=64),
"hp_lambda": sp.loguniform(lower=1e-5, upper=256),
"hp_alpha": sp.loguniform(lower=1e-5, upper=256),
}
serialize(
{
task: BlackboxOffline(
df_evaluations=df.loc[df.task == task, :],
configuration_space=configuration_space,
objectives_names=[
col for col in df.columns if col.startswith("metric_")
],
)
for task in df.task.unique()
},
path=repository_path / "icml-xgboost",
)
[docs]
class XGBoostRecipe(BlackboxRecipe):
def __init__(self):
super(XGBoostRecipe, self).__init__(
name="icml-xgboost",
hash=SHA256_HASH_XGBOOST,
cite_reference="A quantile-based approach for hyperparameter transfer learning."
"Salinas, D., Shen, H., and Perrone, V. 2021.",
)
def _generate_on_disk(self):
serialize_xgboost()
[docs]
class DeepARRecipe(BlackboxRecipe):
def __init__(self):
super(DeepARRecipe, self).__init__(
name="icml-deepar",
hash=SHA256_HASH_DEEPAR,
cite_reference="A quantile-based approach for hyperparameter transfer learning."
"Salinas, D., Shen, H., and Perrone, V. 2021.",
)
def _generate_on_disk(self):
serialize_deepar()
if __name__ == "__main__":
DeepARRecipe().generate()
XGBoostRecipe().generate()