# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
from dataclasses import dataclass
from typing import Any, Dict, List
import numpy as np
import pandas as pd
__all__ = [
"TransferLearningTaskEvaluations",
"TransferLearningMixin",
"BoundingBox",
"RUSHScheduler",
]
[docs]
@dataclass
class TransferLearningTaskEvaluations:
"""Class that contains offline evaluations for a task that can be used for transfer learning.
Args:
configuration_space: Dict the configuration space that was used when sampling evaluations.
hyperparameters: pd.DataFrame the hyperparameters values that were acquired, all keys of configuration-space
should appear as columns.
objectives_names: List[str] the name of the objectives that were acquired
objectives_evaluations: np.array values of recorded objectives, must have shape
(num_evals, num_seeds, num_fidelities, num_objectives)
"""
configuration_space: Dict
hyperparameters: pd.DataFrame
objectives_names: List[str]
objectives_evaluations: np.array
def __post_init__(self):
assert len(self.objectives_names) == self.objectives_evaluations.shape[-1]
assert len(self.hyperparameters) == self.objectives_evaluations.shape[0]
assert self.objectives_evaluations.ndim == 4, (
"objective evaluations should be of shape "
"(num_evals, num_seeds, num_fidelities, num_objectives)"
)
for col in self.hyperparameters.keys():
assert col in self.configuration_space
[docs]
def objective_values(self, objective_name: str) -> np.array:
return self.objectives_evaluations[
..., self.objective_index(objective_name=objective_name)
]
[docs]
def objective_index(self, objective_name: str) -> int:
matches = [
i for i, name in enumerate(self.objectives_names) if name == objective_name
]
assert len(matches) >= 1, (
f"could not find objective {objective_name} in recorded objectives "
f"{self.objectives_names}"
)
return matches[0]
[docs]
def top_k_hyperparameter_configurations(
self, k: int, mode: str, objective: str
) -> List[Dict[str, Any]]:
"""
Returns the best k hyperparameter configurations.
:param k: The number of top hyperparameters to return.
:param mode: 'min' or 'max', indicating the type of optimization problem.
:param objective: The objective to consider for ranking hyperparameters.
:returns: List of hyperparameters in order.
"""
assert k > 0 and isinstance(k, int), f"{k} is no positive integer."
assert mode in ["min", "max"], f"Unknown mode {mode}, must be 'min' or 'max'."
assert objective in self.objectives_names, f"Unknown objective {objective}."
# average over seed and take best fidelity
avg_objective = self.objective_values(objective_name=objective).mean(axis=1)
if mode == "max":
avg_objective = avg_objective.max(axis=1)
else:
avg_objective = avg_objective.min(axis=1)
best_hp_task_indices = avg_objective.argsort()
if mode == "max":
best_hp_task_indices = best_hp_task_indices[::-1]
return self.hyperparameters.loc[best_hp_task_indices[:k]].to_dict("records")
[docs]
class TransferLearningMixin:
def __init__(
self,
config_space: Dict,
transfer_learning_evaluations: Dict[str, TransferLearningTaskEvaluations],
metric_names: List[str],
**kwargs,
):
"""
A mixin that adds basic functionality for using offline evaluations.
:param config_space: configuration space to be sampled from
:param transfer_learning_evaluations: dictionary from task name to offline evaluations.
:param metric_names: name of the metric to be optimized.
"""
super().__init__(config_space=config_space, **kwargs)
self._metric_names = metric_names
self._check_consistency(
config_space=config_space,
transfer_learning_evaluations=transfer_learning_evaluations,
metric_names=metric_names,
)
def _check_consistency(
self,
config_space: Dict,
transfer_learning_evaluations: Dict[str, TransferLearningTaskEvaluations],
metric_names: List[str],
):
for task, evals in transfer_learning_evaluations.items():
for key in config_space.keys():
assert key in evals.hyperparameters.columns, (
f"the key {key} of the config space should appear in transfer learning evaluations "
f"hyperparameters {evals.hyperparameters.columns}"
)
assert all([m in evals.objectives_names for m in metric_names]), (
f"all objectives used in the scheduler {self.metric_names()} should appear in transfer learning "
f"evaluations objectives {evals.objectives_names}"
)
[docs]
def metric_names(self) -> List[str]:
return self._metric_names
[docs]
def top_k_hyperparameter_configurations_per_task(
self,
transfer_learning_evaluations: Dict[str, TransferLearningTaskEvaluations],
num_hyperparameters_per_task: int,
mode: str,
metric: str,
) -> Dict[str, List[Dict[str, Any]]]:
"""
Returns the best hyperparameter configurations for each task.
:param transfer_learning_evaluations: Set of candidates to choose from.
:param num_hyperparameters_per_task: The number of top hyperparameters per task to return.
:param mode: 'min' or 'max', indicating the type of optimization problem.
:param metric: The metric to consider for ranking hyperparameters.
:returns: Dict which maps from task name to list of hyperparameters in order.
"""
assert num_hyperparameters_per_task > 0 and isinstance(
num_hyperparameters_per_task, int
), f"{num_hyperparameters_per_task} is no positive integer."
assert mode in ["min", "max"], f"Unknown mode {mode}, must be 'min' or 'max'."
assert metric in self.metric_names(), f"Unknown metric {metric}."
best_hps = dict()
for task, evaluation in transfer_learning_evaluations.items():
best_hps[task] = evaluation.top_k_hyperparameter_configurations(
num_hyperparameters_per_task, mode, metric
)
return best_hps
from syne_tune.optimizer.schedulers.transfer_learning.bounding_box import BoundingBox
from syne_tune.optimizer.schedulers.transfer_learning.rush import RUSHScheduler