Source code for syne_tune.optimizer.schedulers.searchers.bayesopt.models.subsample_state_single_fidelity

# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
from typing import Optional, List, Tuple
import copy
from numpy.random import RandomState
from operator import itemgetter

from syne_tune.optimizer.schedulers.searchers.bayesopt.datatypes.tuning_job_state import (
    TuningJobState,
)
from syne_tune.optimizer.schedulers.searchers.bayesopt.datatypes.common import (
    TrialEvaluations,
    INTERNAL_METRIC_NAME,
)
from syne_tune.optimizer.schedulers.searchers.bayesopt.models.model_transformer import (
    StateForModelConverter,
)


ObservedData = List[Tuple[int, float]]


def _extract_observations(
    trials_evaluations: List[TrialEvaluations],
) -> ObservedData:
    """
    Maps ``trials_evaluations`` to list of tuples :math:`(i, y_i)`, where
    :math:`y_i` is the observed value for trial ID :math:`i`.

    :param trials_evaluations: See above
    :return: See above
    """
    return [
        (int(trial_eval.trial_id), trial_eval.metrics[INTERNAL_METRIC_NAME])
        for trial_eval in trials_evaluations
    ]


def _create_trials_evaluations(data: ObservedData) -> List[TrialEvaluations]:
    """Inverse of :func:`_extract_observations`

    :param data: List of tuples
    :return: Resulting ``trials_evaluations``
    """
    return [
        TrialEvaluations(
            trial_id=str(trial_id), metrics={INTERNAL_METRIC_NAME: metric_val}
        )
        for trial_id, metric_val in data
    ]



[docs]
def cap_size_tuning_job_state(
    state: TuningJobState,
    max_size: int,
    mode: str,
    top_fraction: float,
    random_state: Optional[RandomState] = None,
) -> TuningJobState:
    """
    Returns state which is identical to ``state``, except that the
    ``trials_evaluations`` are replaced by a subset so the total number of
    metric values is ``<= max_size``.

    :param state: Original state to filter down
    :param max_size: Maximum number of observed metric values in new state
    :param mode: "min" or "max"
    :param top_fraction: See above
    :param random_state: Used for random sampling. Defaults to ``numpy.random``.
    :return: New state meeting the ``max_size`` constraint. This is a copy of
        ``state`` even if this meets the constraint already.
    """
    total_size = state.num_observed_cases()
    if total_size <= max_size:
        trials_evaluations = copy.deepcopy(state.trials_evaluations)
    else:
        data = sorted(
            _extract_observations(state.trials_evaluations),
            key=itemgetter(1),
            reverse=mode == "max",
        )
        n_top = int(round(max_size * top_fraction))
        new_data = data[:n_top]
        n_rem = max_size - n_top
        if n_rem > 0:
            index = random_state.choice(total_size - n_top, size=n_rem, replace=False)
            new_data += [data[n_top + i] for i in index]
        trials_evaluations = _create_trials_evaluations(new_data)
    return TuningJobState(
        hp_ranges=state.hp_ranges,
        config_for_trial=state.config_for_trial.copy(),
        trials_evaluations=trials_evaluations,
        failed_trials=state.failed_trials.copy(),
        pending_evaluations=state.pending_evaluations.copy(),
    )




[docs]
class SubsampleSingleFidelityStateConverter(StateForModelConverter):
    """
    Converts state by (possibly) down sampling the observation so that their
    total number is ``<= max_size``. If ``len(state) > max_size``, the subset
    is sampled as follows. ``max_size * top_fraction`` is filled with the best
    observations. The remainder is sampled without replacement from the
    remaining observations.

    :param max_size: Maximum number of observed metric values in new state
    :param mode: "min" or "max"
    :param top_fraction: See above
    :param random_state: Used for random sampling. Can also be set with
        :meth:`set_random_state`
    """

    def __init__(
        self,
        max_size: int,
        mode: str,
        top_fraction: float,
        random_state: Optional[RandomState] = None,
    ):
        support_mode = ["min", "max"]
        assert (
            mode in support_mode
        ), f"mode = {mode} not supported, must be in {support_mode}"
        assert (
            0 <= top_fraction <= 1
        ), f"top_fraction = {top_fraction} must be in [0, 1]"
        self.max_size = int(max_size)
        assert self.max_size >= 1
        self._random_state = random_state
        self._mode = mode
        self._top_fraction = top_fraction

    def __call__(self, state: TuningJobState) -> TuningJobState:
        assert (
            self._random_state is not None
        ), "Call set_random_state before first usage"
        return cap_size_tuning_job_state(
            state=state,
            max_size=self.max_size,
            mode=self._mode,
            top_fraction=self._top_fraction,
            random_state=self._random_state,
        )


[docs]
    def set_random_state(self, random_state: RandomState):
        self._random_state = random_state