Source code for syne_tune.callbacks.hyperband_remove_checkpoints_score

# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
import numpy as np
import logging

from syne_tune.try_import import try_import_gpsearchers_message

try:
    from scipy.special import betainc
except ImportError:
    logging.info(try_import_gpsearchers_message())


def _binomial_cdf(
    u_vals: np.ndarray, n_vals: np.ndarray, p_vals: np.ndarray
) -> np.ndarray:
    r"""
    Computes binomial cumulative distribution function :math:`P(X \le u)`, where
    :math:`X\sim \mathrm{bin}(n, p)`.

    :param u_vals: Values for :math:`u`
    :param n_vals: Values for :math:`n`
    :param p_vals: Values for :math:`p`
    :return: CDF values
    """
    a_vals = np.maximum(n_vals - u_vals, 1e-7)
    b_vals = np.maximum(u_vals + 1, 1e-7)
    result = betainc(a_vals, b_vals, 1 - p_vals)
    result[u_vals >= n_vals] = 1
    return result


def _compute_binomial_probabilities(
    l_indices: np.ndarray, p_vals: np.ndarray
) -> np.ndarray:
    r"""
    Computes the 3D tensor with entries

    .. math::
       q_{i, j}^{(s)}
       = P\left\{ \mathrm{bin}(l_i - l_{i-1}, p_r) = j \right\}, \\
       l_i = l_i^{(s)},\; p_r = p_{r_s},

    its dimension is ``(rs, rs, n_scores)``. Note that the position
    ``(i, j, s)`` maps to :math:`q_{i, r_* - j}^{(s)}`, so the ordering in the
    ``j`` index is inverted.

    We could save some time by noting that some ``(i, s)`` entries may be the
    same, but don't do this here.

    :param l_indices: :math:`[l_i^{(s)}]`, shape ``(rs, n_scores)``
    :param p_vals: Probabilities :math:`[p_{r_s}]`, shape ``(1, n_scores)``
    :return: See above, shape ``(rs, rs, n_scores)``
    """
    rs = l_indices.shape[0]
    l_diffs = np.vstack((l_indices[0], l_indices[1:, :] - l_indices[:-1, :]))
    # Tensor index is ``(i, j, s)``
    # Many of these CDF values could be = 1, and we could maybe save some
    # time here:
    cdf_vals = _binomial_cdf(
        u_vals=np.arange(rs - 1, -1, -1).reshape((1, -1, 1)),
        n_vals=l_diffs.reshape((rs, 1, -1)),
        p_vals=p_vals.reshape((1, 1, -1)),
    )
    # CDF values are :math:`P(X(l_i - l_{i-1}, p_r) <= r_* - j)`. We need
    # probabilities :math:`P(X(l_i - l_{i-1}, p_r) = r_* - j)`, which we
    # get by finite differences along the ``j`` dimension. Note the order of
    # the ``j`` index is reversed
    q_vals = np.concatenate(
        (
            cdf_vals[:, :-1, :] - cdf_vals[:, 1:, :],
            cdf_vals[:, -1, :].reshape((rs, 1, -1)),
        ),
        axis=1,
    )
    return q_vals


[docs] def compute_probabilities_of_getting_resumed( ranks: np.ndarray, rung_lens: np.ndarray, prom_quants: np.ndarray, p_vals: np.ndarray, time_ratio: float, approx_steps: int, ) -> np.ndarray: r""" Computes an approximation to the probability of getting resumed under our independence assumptions. This approximation improves with larger ``approx_steps``, but its cost scales cubically in this number. :param ranks: Ranks :math:`k`, starting from 1 (smaller is better) :param rung_lens: Rung lengths :math:`n_r` :param prom_quants: Promotion quantiles :math:`\alpha_r` :param p_vals: Probabilities :math:`p_r` :param time_ratio: Ratio :math:`\beta` between time left and time spent :param approx_steps: Number of approximation steps, see above :return: Approximations of probability to get resumed """ assert time_ratio > 0 approx_steps = int(approx_steps) assert approx_steps >= 1 # Precompute :math:`l_r` and binomial probabilities c_vals = ranks - prom_quants * rung_lens orig_num_trials = c_vals.size cpos_indicator = c_vals > 0 if not all(cpos_indicator): # For some candidates, the condition is already fulfilled. For these, # we return 1 non_trivial_index = np.nonzero(cpos_indicator)[0] c_vals = c_vals[non_trivial_index] prom_quants = prom_quants[non_trivial_index] p_vals = p_vals[non_trivial_index] rung_lens = rung_lens[non_trivial_index] else: non_trivial_index = None num_trials = c_vals.size c_vals = c_vals.reshape((1, -1)) prom_quants = prom_quants.reshape((1, -1)) p_vals = p_vals.reshape((1, -1)) # :math:`l_i = \lceil (C + i) / \alpha_r \rceil` l_indices = np.ceil( (c_vals + np.arange(0, approx_steps + 1).reshape((-1, 1))) / prom_quants ) n_prime = np.ceil(rung_lens * time_ratio) # Maximum lengths # We can stop once :math:`l_r > n'`. Maybe, this allows to reduce the overall # ``approx_steps`` max_r = np.max(np.sum(l_indices <= n_prime.reshape((1, -1)), axis=0)) - 1 approx_steps = max(min(approx_steps, max_r), 1) if approx_steps < l_indices.shape[0] - 1: l_indices = l_indices[: (approx_steps + 1), :] # Tensor :math:`q_{i,j}^{(s)}`. Note that index ``(i, j, s)`` maps to # :math:`q_{i,r_* - j}^{(s)}`, where :math:`r_*` is ``approx_steps``, so # the ordering is reversed on the ``j`` dimension q_vals = _compute_binomial_probabilities(l_indices, p_vals) # Dynamic programming: Compute :math:`M_i` probabilities and sum up # target probabilities m_vals = [np.zeros((0, num_trials)) for _ in range(approx_steps)] # Start with :math:``m_{0,0} = q_{0,0}`` target_probs = q_vals[0, approx_steps, :].reshape((-1,)) for r in range(1, approx_steps + 1): m_row = q_vals[0, approx_steps - r, :] # :math:`m_{0,r} = q_{0,r}` m_vals[0] = np.vstack((m_vals[0], m_row)) for i in range(1, r + 1): zero_indicator = l_indices[i] > n_prime if not all(zero_indicator): num_j = r - i + 1 qtil_mat = q_vals[i, -num_j:, :].reshape((num_j, -1)) assert qtil_mat.shape == m_vals[i - 1].shape, ( qtil_mat.shape, m_vals[i - 1].shape, ) m_row = np.sum(qtil_mat * m_vals[i - 1], axis=0) m_row[zero_indicator] = 0 else: m_row = np.zeros(num_trials) # ``m_row`` is :math:`m_{i,r}` if i < r: m_vals[i] = np.vstack((m_vals[i], m_row)) else: target_probs += m_row if non_trivial_index is not None: tempvec = target_probs target_probs = np.ones(orig_num_trials) target_probs[non_trivial_index] = tempvec return target_probs