Source code for syne_tune.optimizer.schedulers.searchers.random_grid_searcher

# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
import logging
from collections import OrderedDict
from itertools import product
from typing import Optional, List, Dict, Union, Any

import numpy as np

from syne_tune.config_space import (
    Float,
    Integer,
    Categorical,
    FiniteRange,
    Domain,
    config_space_size,
)
from syne_tune.optimizer.schedulers.searchers import (
    StochasticSearcher,
    StochasticAndFilterDuplicatesSearcher,
)
from syne_tune.optimizer.schedulers.searchers.utils.exclusion_list import ExclusionList
from syne_tune.optimizer.schedulers.searchers.bayesopt.utils.debug_log import (
    DebugLogPrinter,
)
from syne_tune.optimizer.schedulers.searchers.utils import make_hyperparameter_ranges

logger = logging.getLogger(__name__)



[docs]
class RandomSearcher(StochasticAndFilterDuplicatesSearcher):
    """
    Searcher which randomly samples configurations to try next.

    Additional arguments on top of parent class :class:`StochasticAndFilterDuplicatesSearcher`:

    :param debug_log: If ``True``, debug log printing is activated.
        Logs which configs are chosen when, and which metric values are
        obtained. Defaults to ``False``
    :param resource_attr: Optional. Key in ``result`` passed to :meth:`_update`
        for resource value (for multi-fidelity schedulers)
    """

    def __init__(
        self,
        config_space: Dict[str, Any],
        metric: Union[List[str], str],
        points_to_evaluate: Optional[List[dict]] = None,
        debug_log: Union[bool, DebugLogPrinter] = False,
        resource_attr: Optional[str] = None,
        allow_duplicates: Optional[bool] = None,
        restrict_configurations: Optional[List[Dict[str, Any]]] = None,
        **kwargs,
    ):
        super().__init__(
            config_space,
            metric=metric,
            points_to_evaluate=points_to_evaluate,
            allow_duplicates=allow_duplicates,
            restrict_configurations=restrict_configurations,
            **kwargs,
        )
        self._resource_attr = resource_attr
        # Debug log printing (switched off by default)
        if isinstance(debug_log, bool):
            if debug_log:
                self._debug_log = DebugLogPrinter()
            else:
                self._debug_log = None
        else:
            assert isinstance(
                debug_log, DebugLogPrinter
            ), f"debug_log = {debug_log} must either be bool or DebugLogPrinter"
            self._debug_log = debug_log


[docs]
    def configure_scheduler(self, scheduler):
        from syne_tune.optimizer.schedulers.multi_fidelity import (
            MultiFidelitySchedulerMixin,
        )

        super().configure_scheduler(scheduler)
        # If the scheduler is multi-fidelity, we want to know the resource
        # attribute, this is used for ``debug_log``
        if isinstance(scheduler, MultiFidelitySchedulerMixin):
            self._resource_attr = scheduler.resource_attr


    def _get_config(self, **kwargs) -> Optional[dict]:
        """Sample a new configuration at random

        If ``allow_duplicates == False``, this is done without replacement, so
        previously returned configs are not suggested again.

        :param trial_id: Optional. Used for ``debug_log``
        :return: New configuration, or None
        """
        new_config = self._next_initial_config()
        if new_config is None:
            new_config = self._get_random_config()
        if new_config is not None:
            if self._debug_log is not None:
                trial_id = kwargs.get("trial_id")
                self._debug_log.start_get_config("random", trial_id=trial_id)
                self._debug_log.set_final_config(new_config)
                # All get_config debug log info is only written here
                self._debug_log.write_block()
        else:
            msg = (
                "Failed to sample a configuration not already chosen "
                + f"before. Exclusion list has size {len(self._excl_list)}."
            )
            cs_size = self._excl_list.configspace_size
            if cs_size is not None:
                msg += f" Configuration space has size {cs_size}."
            logger.warning(msg)
        return new_config

    def _update(self, trial_id: str, config: Dict[str, Any], result: Dict[str, Any]):
        if self._debug_log is not None:
            if self._resource_attr is not None:
                # For HyperbandScheduler, also add the resource attribute
                resource = int(result[self._resource_attr])
                trial_id = trial_id + f":{resource}"
            msg = f"Update for trial_id {trial_id}: "
            if isinstance(self._metric, list):
                parts = [f"{name} = {result[name]:.3f}" for name in self._metric]
                msg += ",".join(parts)
            else:
                msg += f"{result[self._metric]:.3f}"
            logger.info(msg)


[docs]
    def clone_from_state(self, state: Dict[str, Any]):
        new_searcher = RandomSearcher(
            self.config_space,
            metric=self._metric,
            points_to_evaluate=[],
            debug_log=self._debug_log,
            allow_duplicates=self._allow_duplicates,
        )
        new_searcher._resource_attr = self._resource_attr
        new_searcher._restore_from_state(state)
        return new_searcher


    @property
    def debug_log(self):
        return self._debug_log



DEFAULT_NSAMPLE = 5



[docs]
class GridSearcher(StochasticSearcher):
    """Searcher that samples configurations from an equally spaced grid over config_space.

    It first evaluates configurations defined in points_to_evaluate and then
    continues with the remaining points from the grid.

    Additional arguments on top of parent class :class:`StochasticSearcher`.

    :param num_samples: Dictionary, optional. Number of samples per
        hyperparameter. This is required for hyperparameters of type float,
        optional for integer hyperparameters, and will be ignored for
        other types (categorical, scalar). If left unspecified, a default
        value of :const:`DEFAULT_NSAMPLE` will be used for float parameters, and
        the smallest of :const:`DEFAULT_NSAMPLE` and integer range will be used
        for integer parameters.
    :param shuffle_config: If ``True`` (default), the order of configurations
        suggested after those specified in ``points_to_evaluate`` is
        shuffled. Otherwise, the order will follow the Cartesian product
        of the configurations.
    :param allow_duplicates: If `True`, :meth:`get_config` may return the same
        configuration more than once. Defaults to `False`
    """

    def __init__(
        self,
        config_space: Dict[str, Any],
        metric: str,
        points_to_evaluate: Optional[List[dict]] = None,
        num_samples: Optional[Dict[str, int]] = None,
        shuffle_config: bool = True,
        allow_duplicates: bool = False,
        **kwargs,
    ):
        super().__init__(
            config_space, metric=metric, points_to_evaluate=points_to_evaluate, **kwargs
        )
        k = "restrict_configurations"
        if kwargs.get(k) is not None:
            logger.warning(f"{k} is not supported")
            del kwargs[k]
        self._validate_config_space(config_space, num_samples)
        self._hp_ranges = make_hyperparameter_ranges(config_space)

        if not isinstance(shuffle_config, bool):
            shuffle_config = True
        self._shuffle_config = shuffle_config
        self._generate_all_candidates_on_grid()
        self._next_index = 0
        self._allow_duplicates = allow_duplicates
        self._all_initial_configs = ExclusionList(self._hp_ranges)

    def _validate_config_space(
        self, config_space: Dict[str, Any], num_samples: Optional[Dict[str, int]]
    ):
        """
        Validates ``config_space`` from two aspects: first, that all
        hyperparameters are of acceptable types (i.e. float, integer,
        categorical). Second, ``num_samples`` is specified for float
        hyperparameters. ``num_samples`` for categorical variables are ignored
        as all of their values is used. ``num_samples`` for integer variables
        is optional, if specified it will be used and will be capped at their
        range length.

        :param config_space: Configuration space
        :param num_samples: Number of samples for each hyperparameter. Only
            required for float hyperparameters
        """
        if num_samples is None:
            num_samples = dict()
        self.num_samples = num_samples
        for hp, hp_range in config_space.items():
            # num_sample is required for float hp. If not specified default DEFAULT_NSAMPLE is used.
            if isinstance(hp_range, Float):
                if hp not in self.num_samples:
                    self.num_samples[hp] = DEFAULT_NSAMPLE
                    logger.warning(
                        f"Number of samples is required for '{hp}'. By default, "
                        f"{DEFAULT_NSAMPLE} is set as number of samples"
                    )
            # num_sample for integer hp must be capped at length of the range when specified. Otherwise,
            # minimum of default value DEFAULT_NSAMPLE and the interger range is used.
            if isinstance(hp_range, Integer):
                if hp in self.num_samples:
                    if self.num_samples[hp] > len(hp_range):
                        self.num_samples[hp] = min(len(hp_range), DEFAULT_NSAMPLE)
                        logger.info(
                            f"Number of samples for '{hp}' is larger than its "
                            "range. We set it to the minimum of the default "
                            f"number of samples (i.e. {DEFAULT_NSAMPLE}) and "
                            f"its range length (i.e. {len(hp_range)})."
                        )
                else:
                    self.num_samples[hp] = min(len(hp_range), DEFAULT_NSAMPLE)
            # num_samples is ignored for categorical hps.
            if isinstance(hp_range, Categorical) or isinstance(hp_range, FiniteRange):
                if hp in self.num_samples:
                    logger.info(
                        'number of samples for categorical variable "{}" is ignored.'.format(
                            hp
                        )
                    )

    def _generate_all_candidates_on_grid(self):
        """
        Generates all configurations to be evaluated by placing a regular,
        equally spaced grid over the configuration space.
        """
        hp_keys = []
        hp_values = []
        # adding categorical, finiteRange, scalar parameters
        for hp, hp_range in reversed(list(self.config_space.items())):
            if isinstance(hp_range, Float) or isinstance(hp_range, Integer):
                continue
            if isinstance(hp_range, Categorical):
                hp_keys.append(hp)
                values = list(OrderedDict.fromkeys(hp_range.categories))
                hp_values.append(values)
            elif isinstance(hp_range, FiniteRange):
                hp_keys.append(hp)
                hp_values.append(hp_range.values)
            elif not isinstance(hp_range, Domain):
                hp_keys.append(hp)
                hp_values.append([hp_range])

        # adding float, integer parameters
        for hpr in self._hp_ranges._hp_ranges:
            if hpr.name not in hp_keys:
                _hpr_nsamples = self.num_samples[hpr.name]
                _normalized_points = [
                    (idx + 0.5) / _hpr_nsamples for idx in range(_hpr_nsamples)
                ]
                _hpr_points = [
                    hpr.from_ndarray(np.array([point])) for point in _normalized_points
                ]
                _hpr_points = list(set(_hpr_points))
                hp_keys.append(hpr.name)
                hp_values.append(_hpr_points)

        self.hp_keys = hp_keys
        self.hp_values_combinations = list(product(*hp_values))

        if self._shuffle_config:
            self.random_state.shuffle(self.hp_values_combinations)


[docs]
    def get_config(self, **kwargs) -> Optional[dict]:
        """Select the next configuration from the grid.

        This is done without replacement, so previously returned configs are
        not suggested again.

        :return: A new configuration that is valid, or None if no new config
            can be suggested. The returned configuration is a dictionary that
            maps hyperparameters to its values.
        """

        new_config = self._next_initial_config()
        if new_config is None:
            new_config = self._next_candidate_on_grid()
        else:
            self._all_initial_configs.add(new_config)
        if new_config is None:
            msg = "All the configurations have already been evaluated."
            cs_size = config_space_size(self.config_space)
            if cs_size is not None:
                msg += f" Configuration space has size {cs_size}"
            logger.warning(msg)
        return new_config


    def _next_candidate_on_grid(self) -> Optional[dict]:
        """
        :return: Next configuration from the set of grid candidates
            or None if no candidate is left.
        """

        num_combinations = len(self.hp_values_combinations)
        candidate = None
        while candidate is None and self._next_index < num_combinations:
            candidate = dict(
                zip(self.hp_keys, self.hp_values_combinations[self._next_index])
            )
            self._next_index += 1
            if self._all_initial_configs.contains(candidate):
                candidate = None
            if self._allow_duplicates and self._next_index == num_combinations:
                # Another round through the grid. It is important to reset
                # ``_all_initial_configs`` to empty, so the initial configs can be
                # suggested again in the second round
                self._next_index = 0
                self._all_initial_configs = ExclusionList(self._hp_ranges)
        return candidate


[docs]
    def get_state(self) -> Dict[str, Any]:
        state = dict(
            super().get_state(),
            next_index=self._next_index,
            all_initial_configs=self._all_initial_configs.get_state(),
        )
        return state



[docs]
    def clone_from_state(self, state: Dict[str, Any]):
        new_searcher = GridSearcher(
            config_space=self.config_space,
            num_samples=self.num_samples,
            metric=self._metric,
            shuffle_config=self._shuffle_config,
        )
        new_searcher._restore_from_state(state)
        return new_searcher


    def _restore_from_state(self, state: Dict[str, Any]):
        super()._restore_from_state(state)
        self._next_index = state["next_index"]
        self._all_initial_configs = ExclusionList(self._hp_ranges)
        self._all_initial_configs.clone_from_state(state["all_initial_configs"])

    def _update(self, trial_id: str, config: Dict[str, Any], result: Dict[str, Any]):
        pass