Source code for syne_tune.optimizer.schedulers.transfer_learning

# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
from dataclasses import dataclass
from typing import Any, Dict, List

import numpy as np
import pandas as pd

__all__ = [
    "TransferLearningTaskEvaluations",
    "TransferLearningMixin",
    "BoundingBox",
    "RUSHScheduler",
]


[docs] @dataclass class TransferLearningTaskEvaluations: """Class that contains offline evaluations for a task that can be used for transfer learning. Args: configuration_space: Dict the configuration space that was used when sampling evaluations. hyperparameters: pd.DataFrame the hyperparameters values that were acquired, all keys of configuration-space should appear as columns. objectives_names: List[str] the name of the objectives that were acquired objectives_evaluations: np.array values of recorded objectives, must have shape (num_evals, num_seeds, num_fidelities, num_objectives) """ configuration_space: Dict hyperparameters: pd.DataFrame objectives_names: List[str] objectives_evaluations: np.array def __post_init__(self): assert len(self.objectives_names) == self.objectives_evaluations.shape[-1] assert len(self.hyperparameters) == self.objectives_evaluations.shape[0] assert self.objectives_evaluations.ndim == 4, ( "objective evaluations should be of shape " "(num_evals, num_seeds, num_fidelities, num_objectives)" ) for col in self.hyperparameters.keys(): assert col in self.configuration_space
[docs] def objective_values(self, objective_name: str) -> np.array: return self.objectives_evaluations[ ..., self.objective_index(objective_name=objective_name) ]
[docs] def objective_index(self, objective_name: str) -> int: matches = [ i for i, name in enumerate(self.objectives_names) if name == objective_name ] assert len(matches) >= 1, ( f"could not find objective {objective_name} in recorded objectives " f"{self.objectives_names}" ) return matches[0]
[docs] def top_k_hyperparameter_configurations( self, k: int, mode: str, objective: str ) -> List[Dict[str, Any]]: """ Returns the best k hyperparameter configurations. :param k: The number of top hyperparameters to return. :param mode: 'min' or 'max', indicating the type of optimization problem. :param objective: The objective to consider for ranking hyperparameters. :returns: List of hyperparameters in order. """ assert k > 0 and isinstance(k, int), f"{k} is no positive integer." assert mode in ["min", "max"], f"Unknown mode {mode}, must be 'min' or 'max'." assert objective in self.objectives_names, f"Unknown objective {objective}." # average over seed and take best fidelity avg_objective = self.objective_values(objective_name=objective).mean(axis=1) if mode == "max": avg_objective = avg_objective.max(axis=1) else: avg_objective = avg_objective.min(axis=1) best_hp_task_indices = avg_objective.argsort() if mode == "max": best_hp_task_indices = best_hp_task_indices[::-1] return self.hyperparameters.loc[best_hp_task_indices[:k]].to_dict("records")
[docs] class TransferLearningMixin: def __init__( self, config_space: Dict, transfer_learning_evaluations: Dict[str, TransferLearningTaskEvaluations], metric_names: List[str], **kwargs, ): """ A mixin that adds basic functionality for using offline evaluations. :param config_space: configuration space to be sampled from :param transfer_learning_evaluations: dictionary from task name to offline evaluations. :param metric_names: name of the metric to be optimized. """ super().__init__(config_space=config_space, **kwargs) self._metric_names = metric_names self._check_consistency( config_space=config_space, transfer_learning_evaluations=transfer_learning_evaluations, metric_names=metric_names, ) def _check_consistency( self, config_space: Dict, transfer_learning_evaluations: Dict[str, TransferLearningTaskEvaluations], metric_names: List[str], ): for task, evals in transfer_learning_evaluations.items(): for key in config_space.keys(): assert key in evals.hyperparameters.columns, ( f"the key {key} of the config space should appear in transfer learning evaluations " f"hyperparameters {evals.hyperparameters.columns}" ) assert all([m in evals.objectives_names for m in metric_names]), ( f"all objectives used in the scheduler {self.metric_names()} should appear in transfer learning " f"evaluations objectives {evals.objectives_names}" )
[docs] def metric_names(self) -> List[str]: return self._metric_names
[docs] def top_k_hyperparameter_configurations_per_task( self, transfer_learning_evaluations: Dict[str, TransferLearningTaskEvaluations], num_hyperparameters_per_task: int, mode: str, metric: str, ) -> Dict[str, List[Dict[str, Any]]]: """ Returns the best hyperparameter configurations for each task. :param transfer_learning_evaluations: Set of candidates to choose from. :param num_hyperparameters_per_task: The number of top hyperparameters per task to return. :param mode: 'min' or 'max', indicating the type of optimization problem. :param metric: The metric to consider for ranking hyperparameters. :returns: Dict which maps from task name to list of hyperparameters in order. """ assert num_hyperparameters_per_task > 0 and isinstance( num_hyperparameters_per_task, int ), f"{num_hyperparameters_per_task} is no positive integer." assert mode in ["min", "max"], f"Unknown mode {mode}, must be 'min' or 'max'." assert metric in self.metric_names(), f"Unknown metric {metric}." best_hps = dict() for task, evaluation in transfer_learning_evaluations.items(): best_hps[task] = evaluation.top_k_hyperparameter_configurations( num_hyperparameters_per_task, mode, metric ) return best_hps
from syne_tune.optimizer.schedulers.transfer_learning.bounding_box import BoundingBox from syne_tune.optimizer.schedulers.transfer_learning.rush import RUSHScheduler