Source code for syne_tune.optimizer.schedulers.searchers.bayesopt.gpautograd.kernel.cross_validation

import autograd.numpy as anp
from autograd.builtins import isinstance
from typing import Dict, Any

from syne_tune.optimizer.schedulers.searchers.bayesopt.gpautograd.kernel.base import (
    KernelFunction,
)
from syne_tune.optimizer.schedulers.searchers.bayesopt.gpautograd.mean import (
    MeanFunction,
)


[docs] def decode_resource_values(res_encoded, num_folds): """ We assume the resource attribute ``r`` is encoded as :code:`randint(1, num_folds)`. Internally, ``r`` is taken as value in the real interval ``[0.5, num_folds + 0.5]``, which is linearly transformed to ``[0, 1]`` for encoding. :param res_encoded: Encoded values ``r`` :param num_folds: Maximum number of folds :return: Original values ``r`` (not rounded to ``int``) """ return res_encoded * num_folds + 0.5
[docs] class CrossValidationKernelFunction(KernelFunction): r""" Kernel function suitable for :math:`f(x, r)` being the average of ``r`` validation metrics evaluated on different (train, validation) splits. More specifically, there are 'num_folds`` such splits, and :math:`f(x, r)` is the average over the first ``r`` of them. We model the score on fold ``k`` as :math:`e_k(x) = f(x) + g_k(x)`, where :math:`f(x)` and the :math:`g_k(x)` are a priori independent Gaussian processes with kernels ``kernel_main`` and ``kernel_residual`` (all :math:`g_k` share the same kernel). Moreover, the :math:`g_k` are zero-mean, while :math:`f(x)` may have a mean function. Then: .. math:: f(x, r) = r^{-1} sum_{k \le r} e_k(x), k((x, r), (x', r')) = k_{main}(x, x') + \frac{k_{residual}(x, x')}{\mathrm{max}(r, r')} Note that ``kernel_main``, ``kernel_residual`` are over inputs :math:`x` (dimension ``d``), while the kernel represented here is over inputs :math:`(x, r)` of dimension ``d + 1``, where the resource attribute :math:`r` (number of folds) is last. Inputs are encoded. We assume a linear encoding for r with bounds 1 and ``num_folds``. TODO: Right now, all HPs are encoded, and the resource attribute counts as HP, even if it is not optimized over. This creates a dependence to how inputs are encoded. """ def __init__( self, kernel_main: KernelFunction, kernel_residual: KernelFunction, mean_main: MeanFunction, num_folds: int, **kwargs, ): """ :param kernel_main: Kernel for main effect :math:`f(x)` :param kernel_residual: Kernel for residuals :math:`g_k(x)` :param mean_main: Mean function for main effect :math:`f(x)` :param num_folds: Maximum number of folds: ``1 <= r <= num_folds`` """ super().__init__(dimension=kernel_main.dimension + 1, **kwargs) assert kernel_main.dimension == kernel_residual.dimension, ( f"kernel_main.dimension = {kernel_main.dimension} != " + f"{kernel_residual.dimension} = kernel_residual.dimension" ) assert ( round(num_folds) == num_folds and num_folds >= 2 ), f"num_folds = {num_folds} must be int >= 2" self.kernel_main = kernel_main self.kernel_residual = kernel_residual self.mean_main = mean_main self.num_folds = num_folds def _compute_terms(self, X): dim = self.kernel_main.dimension cfg = X[:, :dim] res_encoded = X[:, dim:] res_decoded = decode_resource_values(res_encoded, self.num_folds) return cfg, res_decoded
[docs] def forward(self, X1, X2, **kwargs): cfg1, res1 = self._compute_terms(X1) if X2 is not X1: cfg2, res2 = self._compute_terms(X2) else: cfg2, res2 = cfg1, res1 res1 = anp.reshape(res1, (-1, 1)) res2 = anp.reshape(res2, (1, -1)) kmat_main = self.kernel_main(cfg1, cfg2) kmat_residual = self.kernel_residual(cfg1, cfg2) max_resources = anp.maximum(res1, res2) return (kmat_residual / max_resources) + kmat_main
[docs] def diagonal(self, X): cfg, res = self._compute_terms(X) res = anp.reshape(res, (-1,)) kdiag_main = self.kernel_main.diagonal(cfg) kdiag_residual = self.kernel_residual.diagonal(cfg) return (kdiag_residual / res) + kdiag_main
[docs] def diagonal_depends_on_X(self): return True
[docs] def param_encoding_pairs(self): enc_list = [] enc_list.extend(self.kernel_main.param_encoding_pairs()) enc_list.extend(self.kernel_residual.param_encoding_pairs()) enc_list.extend(self.mean_main.param_encoding_pairs()) return enc_list
[docs] def mean_function(self, X): cfg, _ = self._compute_terms(X) return self.mean_main(cfg)
[docs] def get_params(self) -> Dict[str, Any]: result = dict() for pref, func in [ ("kernelm_", self.kernel_main), ("meanm_", self.mean_main), ("kernelr_", self.kernel_residual), ]: result.update({(pref + k): v for k, v in func.get_params().items()}) return result
[docs] def set_params(self, param_dict: Dict[str, Any]): for pref, func in [ ("kernelm_", self.kernel_main), ("meanm_", self.mean_main), ("kernelr_", self.kernel_residual), ]: len_pref = len(pref) stripped_dict = { k[len_pref:]: v for k, v in param_dict.items() if k.startswith(pref) } func.set_params(stripped_dict)
[docs] class CrossValidationMeanFunction(MeanFunction): def __init__(self, kernel: CrossValidationKernelFunction, **kwargs): super().__init__(**kwargs) assert isinstance(kernel, CrossValidationKernelFunction) self.kernel = kernel
[docs] def forward(self, X): return self.kernel.mean_function(X)
[docs] def param_encoding_pairs(self): return []
[docs] def get_params(self) -> Dict[str, Any]: return dict()
[docs] def set_params(self, param_dict: Dict[str, Any]): pass