Source code for syne_tune.optimizer.schedulers.searchers.gp_fifo_searcher

# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
from typing import Optional, List, Dict, Any
import logging

from syne_tune.optimizer.schedulers.searchers.model_based_searcher import (
    BayesianOptimizationSearcher,
)
from syne_tune.optimizer.schedulers.searchers.gp_searcher_factory import (
    gp_fifo_searcher_factory,
    gp_fifo_searcher_defaults,
)
from syne_tune.optimizer.schedulers.searchers.gp_searcher_utils import (
    decode_state,
)
from syne_tune.optimizer.schedulers.searchers.utils.default_arguments import (
    check_and_merge_defaults,
)

logger = logging.getLogger(__name__)


[docs] class GPFIFOSearcher(BayesianOptimizationSearcher): r"""Gaussian process Bayesian optimization for FIFO scheduler This searcher must be used with :class:`~syne_tune.optimizer.schedulers.FIFOScheduler`. It provides Bayesian optimization, based on a Gaussian process surrogate model. It is *not* recommended creating :class:`GPFIFOSearcher` searcher objects directly, but rather to create :class:`~syne_tune.optimizer.schedulers.FIFOScheduler` objects with ``searcher="bayesopt"``, and passing arguments here in ``search_options``. This will use the appropriate functions from :mod:``syne_tune.optimizer.schedulers.searchers.gp_searcher_factory`` to create components in a consistent way. Most of the implementation is generic in :class:`~syne_tune.optimizer.schedulers.searchers.model_based_searcher.BayesianOptimizationSearcher`. Note: If metric values are to be maximized (``mode-"max"`` in scheduler), the searcher uses ``map_reward`` to map metric values to internal criterion values, and *minimizes* the latter. The default choice is to multiply values by -1. Pending configurations (for which evaluation tasks are currently running) are dealt with by fantasizing (i.e., target values are drawn from the current posterior, and acquisition functions are averaged over this sample, see ``num_fantasy_samples``). The GP surrogate model uses a Matern 5/2 covariance function with automatic relevance determination (ARD) of input attributes, and a constant mean function. The acquisition function is expected improvement (EI). All hyperparameters of the surrogate model are estimated by empirical Bayes (maximizing the marginal likelihood). In general, this hyperparameter fitting is the most expensive part of a :meth:`get_config` call. Note that the full logic of construction based on arguments is given in :mod:``syne_tune.optimizer.schedulers.searchers.gp_searcher_factory``. In particular, see :func:`~syne_tune.optimizer.schedulers.searchers.gp_searcher_factory.gp_fifo_searcher_defaults` for default values. Additional arguments on top of parent class :class:`~syne_tune.optimizer.schedulers.searchers.StochasticSearcher`: :param clone_from_state: Internal argument, do not use :type clone_from_state: bool :param resource_attr: Name of resource attribute in reports. This is optional here, but required for multi-fidelity searchers. If ``resource_attr`` and ``cost_attr`` are given, cost values are read from each report and stored in the state. This allows cost models to be fit on more data. :type resource_attr: str, optional :param cost_attr: Name of cost attribute in data obtained from reporter (e.g., elapsed training time). Needed only by cost-aware searchers. Depending on whether ``resource_attr`` is given, cost values are read from each report or only at the end. :type cost_attr: str, optional :param num_init_random: Number of initial :meth:`get_config` calls for which randomly sampled configs are returned. Afterwards, the model-based searcher is used. Defaults to :const:`~syne_tune.optimizer.schedulers.searchers.bayesopt.tuning_algorithms.defaults.DEFAULT_NUM_INITIAL_RANDOM_EVALUATIONS` :type num_init_random: int, optional :param num_init_candidates: Number of initial candidates sampled at random in order to seed the model-based search in ``get_config``. Defaults to :const:`~syne_tune.optimizer.schedulers.searchers.bayesopt.tuning_algorithms.defaults.DEFAULT_NUM_INITIAL_CANDIDATES` :type num_init_candidates: int, optional :param num_fantasy_samples: Number of samples drawn for fantasizing (latent target values for pending evaluations), defaults to 20 :type num_fantasy_samples: int, optional :param no_fantasizing: If True, fantasizing is not done and pending evaluations are ignored. This may lead to loss of diversity in decisions. Defaults to ``False`` :type no_fantasizing: bool, optional :param input_warping: If ``True``, we use a warping transform, so the kernel function becomes :math:`k(w(x), w(x'))`, where :math:`w(x)` is a warping transform parameterized by two non-negative numbers per component, which are learned as hyperparameters. See also :class:`~syne_tune.optimizer.schedulers.searcher.bayesopt.gpautograd.warping.Warping`. Coordinates which belong to categorical hyperparameters, are not warped. Defaults to ``False``. :type input_warping: bool, optional :param boxcox_transform: If ``True``, target values are transformed before being fitted with a Gaussian marginal likelihood. This is using the Box-Cox transform with a parameter :math:`\lambda`, which is learned alongside other parameters of the surrogate model. The transform is :math:`\log y` for :math:`\lambda = 0`, and :math:`y - 1` for :math:`\lambda = 1`. This option requires the targets to be positive. Defaults to ``False``. :type boxcox_transform: bool, optional :param gp_base_kernel: Selects the covariance (or kernel) function to be used. Supported choices are :const:`~syne_tune.optimizer.schedulers.searchers.bayesopt.models.kernel_factory.SUPPORTED_BASE_MODELS`. Defaults to "matern52-ard" (Matern 5/2 with automatic relevance determination). :type gp_base_kernel: str, optional :param acq_function: Selects the acquisition function to be used. Supported choices are :const:`~syne_tune.optimizer.schedulers.searchers.bayesopt.models.acqfunc_factory.SUPPORTED_ACQUISITION_FUNCTIONS`. Defaults to "ei" (expected improvement acquisition function). :type acq_function: str, optional :param acq_function_kwargs: Some acquisition functions have additional parameters, they can be passed here. If none are given, default values are used. :type acq_function_kwargs: dict, optional :param initial_scoring: Scoring function to rank initial candidates (local optimization of EI is started from top scorer): * "thompson_indep": Independent Thompson sampling; randomized score, which can increase exploration * "acq_func": score is the same (EI) acquisition function which is used for local optimization afterwards Defaults to :const:`~syne_tune.optimizer.schedulers.searchers.bayesopt.tuning_algorithms.defaults.DEFAULT_INITIAL_SCORING` :type initial_scoring: str, optional :param skip_local_optimization: If ``True``, the local gradient-based optimization of the acquisition function is skipped, and the top-ranked initial candidate (after initial scoring) is returned instead. In this case, ``initial_scoring="acq_func"`` makes most sense, otherwise the acquisition function will not be used. Defaults to False :type skip_local_optimization: bool, optional :param opt_nstarts: Parameter for surrogate model fitting. Number of random restarts. Defaults to 2 :type opt_nstarts: int, optional :param opt_maxiter: Parameter for surrogate model fitting. Maximum number of iterations per restart. Defaults to 50 :type opt_maxiter: int, optional :param opt_warmstart: Parameter for surrogate model fitting. If ``True``, each fitting is started from the previous optimum. Not recommended in general. Defaults to ``False`` :type opt_warmstart: bool, optional :param opt_verbose: Parameter for surrogate model fitting. If ``True``, lots of output. Defaults to ``False`` :type opt_verbose: bool, optional :param max_size_data_for_model: If this is set, we limit the number of observations the surrogate model is fitted on this value. If there are more observations, they are down sampled, see :class:`~syne_tune.optimizer.schedulers.searchers.bayesopt.models.subsample_state.SubsampleSingleFidelityStateConverter` for details. This down sampling is repeated every time the model is fit. The ``opt_skip_*`` predicates are evaluated before the state is downsampled. Pass ``None`` not to apply such a threshold. The default is :const:`~syne_tune.optimizer.schedulers.searchers.bayesopt.tuning_algorithms.defaults.DEFAULT_MAX_SIZE_DATA_FOR_MODEL`. :type max_size_data_for_model: int, optional :param max_size_top_fraction: Only used if ``max_size_data_for_model`` is set. This fraction of the down sampled set is filled with the top entries in the full set, the remaining ones are sampled at random from the full set, see :class:`~syne_tune.optimizer.schedulers.searchers.bayesopt.models.subsample_state.SubsampleSingleFidelityStateConverter` for details. Defaults to 0.25. :type max_size_top_fraction: float, optional :param opt_skip_init_length: Parameter for surrogate model fitting, skip predicate. Fitting is never skipped as long as number of observations below this threshold. Defaults to 150 :type opt_skip_init_length: int, optional :param opt_skip_period: Parameter for surrogate model fitting, skip predicate. If ``>1``, and number of observations above ``opt_skip_init_length``, fitting is done only K-th call, and skipped otherwise. Defaults to 1 (no skipping) :type opt_skip_period: int, optional :param allow_duplicates: If ``True``, :meth:`get_config` may return the same configuration more than once. Defaults to ``False`` :type allow_duplicates: bool, optional :param restrict_configurations: If given, the searcher only suggests configurations from this list. This needs ``skip_local_optimization == True``. If ``allow_duplicates == False``, entries are popped off this list once suggested. :type restrict_configurations: List[dict], optional :param map_reward: In the scheduler, the metric may be minimized or maximized, but internally, Bayesian optimization is minimizing the criterion. ``map_reward`` converts from metric to internal criterion: * "minus_x": ``criterion = -metric`` * "<a>_minus_x": ``criterion = <a> - metric``. For example "1_minus_x" maps accuracy to zero-one error From a technical standpoint, it does not matter what is chosen here, because criterion is only used internally. Also note that criterion data is always normalized to mean 0, variance 1 before fitted with a Gaussian process. Defaults to "1_minus_x" :type map_reward: str or :class:`MapReward`, optional :param transfer_learning_task_attr: Used to support transfer HPO, where the state contains observed data from several tasks, one of which is the active one. To this end, ``config_space`` must contain a categorical parameter of name ``transfer_learning_task_attr``, whose range are all task IDs. Also, ``transfer_learning_active_task`` must denote the active task, and ``transfer_learning_active_config_space`` is used as ``active_config_space`` argument in :class:`~syne_tune.optimizer.schedulers.searchers.utils.HyperparameterRanges`. This allows us to use a narrower search space for the active task than for the union of all tasks (``config_space`` must be that), which is needed if some configurations of non-active tasks lie outside of the ranges in ``active_config_space``. One of the implications is that :meth:`filter_observed_data` is selecting configs of the active task, so that incumbents or exclusion lists are restricted to data from the active task. :type transfer_learning_task_attr: str, optional :param transfer_learning_active_task: See ``transfer_learning_task_attr``. :type transfer_learning_active_task: str, optional :param transfer_learning_active_config_space: See ``transfer_learning_task_attr``. If not given, ``config_space`` is the search space for the active task as well. This active config space need not contain the ``transfer_learning_task_attr`` parameter. In fact, this parameter is set to a categorical with ``transfer_learning_active_task`` as single value, so that new configs are chosen for the active task only. :type transfer_learning_active_config_space: Dict[str, Any], optional :param transfer_learning_model: See ``transfer_learning_task_attr``. Specifies the surrogate model to be used for transfer learning: * "matern52_product": Kernel is product of Matern 5/2 (not ARD) on ``transfer_learning_task_attr`` and Matern 5/2 (ARD) on the rest. Assumes that data from same task are more closely related than data from different tasks * "matern52_same": Kernel is Matern 5/2 (ARD) on the rest of the variables, ``transfer_learning_task_attr`` is ignored. Assumes that data from all tasks can be merged together Defaults to "matern52_product" :type transfer_learning_model: str, optional """ def __init__( self, config_space: Dict[str, Any], metric: str, points_to_evaluate: Optional[List[Dict[str, Any]]] = None, clone_from_state: bool = False, **kwargs, ): super().__init__( config_space, metric=metric, points_to_evaluate=points_to_evaluate, random_seed_generator=kwargs.get("random_seed_generator"), random_seed=kwargs.get("random_seed"), ) if not clone_from_state: kwargs["config_space"] = config_space kwargs["metric"] = metric kwargs_int = self._create_kwargs_int(kwargs) else: # Internal constructor, bypassing the factory # Note: Members which are part of the mutable state, will be # overwritten in ``_restore_from_state`` kwargs_int = kwargs.copy() self._call_create_internal(kwargs_int) def _create_kwargs_int(self, kwargs): _kwargs = check_and_merge_defaults( kwargs, *gp_fifo_searcher_defaults(kwargs), dict_name="search_options" ) kwargs_int = gp_fifo_searcher_factory(**_kwargs) # Extra arguments not parsed in factory self._copy_kwargs_to_kwargs_int(kwargs_int, kwargs) return kwargs_int def _call_create_internal(self, kwargs_int): """ Part of constructor which can be different in subclasses """ self._create_internal(**kwargs_int)
[docs] def clone_from_state(self, state): # Create clone with mutable state taken from 'state' init_state = decode_state(state["state"], self._hp_ranges_in_state()) skip_optimization = state["skip_optimization"] estimator = self.state_transformer.estimator # Call internal constructor new_searcher = GPFIFOSearcher( **self._new_searcher_kwargs_for_clone(), estimator=estimator, init_state=init_state, skip_optimization=skip_optimization, ) new_searcher._restore_from_state(state) # Invalidate self (must not be used afterwards) self.state_transformer = None return new_searcher