Source code for syne_tune.optimizer.schedulers.searchers.conformal.surrogate.surrogate_model

import logging

import pandas as pd
import numpy as np
from typing import Dict, Optional, Tuple, List, Union

from syne_tune.config_space import Domain


[docs] class SurrogateModel: def __init__( self, config_space: Dict, mode: str, max_fit_samples: Optional[int] = None, random_state: Optional[np.random.RandomState] = None, ): self.random_state = random_state if random_state else np.random self.config_space = config_space self.mode = mode self.config_candidates = [] self.config_seen = set() self._sampler = None self.max_fit_samples = max_fit_samples
[docs] def suggest(self, replace_config: bool = False) -> dict: if not self._sampler: return self._sample_random() config_idx = self._sample_best() config = self.config_candidates[config_idx] if replace_config: self._replace_config_by_new_sample(config_idx) self.config_seen.add(tuple(config.values())) return config
[docs] def fit( self, df_features: pd.DataFrame, y: np.array, ncandidates: Union[int, pd.DataFrame] = 2000, ): self._fit(df_features=df_features, y=y) if isinstance(ncandidates, int): self._update_candidates(n_candidates=ncandidates) elif isinstance(ncandidates, pd.DataFrame): self.df_candidates = ncandidates self.config_candidates = ncandidates.to_dict(orient="records") else: raise ValueError(f"wrong type for {ncandidates}") self._sampler = self._get_sampler(self.df_candidates)
def _fit(self, df_features: pd.DataFrame, y: np.array): """ :param df_features: input features with shape (n, d) :param y: expected output with shape (n, 1) TODO unify :return: """ pass def _sample_best(self) -> int: residual_samples = self._surrogate_pred() if self.mode == "max": residual_samples *= -1 return np.argmin(residual_samples) def _surrogate_pred(self): z_pred = self._sampler() return z_pred
[docs] def predict(self, df_features: pd.DataFrame) -> Tuple[np.array, np.array]: """ :param df_features: input features to make predictions with shape (n, d) :return: predictions in the shape of (n,) # TODO should we rather have (n, 1)? input is taken in this form """ # get mean/std predicted of y | x pass
def _get_sampler(self, df_features: pd.DataFrame) -> np.array: # avoid computing mu, sigma between multiple calls mu, std = self.predict(df_features) def sample(): random_state = ( self.random_state if self.random_state is not None else np.random ) return random_state.normal(mu, std) return sample def _update_candidates(self, n_candidates: int = 2000) -> None: # print("update candidates") self.config_candidates = [self._sample_random() for _ in range(n_candidates)] self.df_candidates = self._configs_to_df(self.config_candidates) def _sample_random_unseen(self, num_tries: int = 100): for i in range(num_tries): new_config = self._sample_random() if not self._config_already_seen(new_config): break if i == num_tries: logging.warning(f"could not sample an unseen config in {num_tries} tries.") return new_config def _replace_config_by_new_sample(self, config_idx: int): assert config_idx < len(self.config_candidates) # replace the config selected by a new one self.config_candidates[config_idx] = self._sample_random_unseen() # Once the candidates are updated, we need to update df candidates and the sampler self.df_candidates = self._configs_to_df(self.config_candidates) self._sampler = self._get_sampler(self.df_candidates) def _sample_random(self) -> Dict: return { k: v.sample(random_state=self.random_state) if isinstance(v, Domain) else v for k, v in self.config_space.items() } def _configs_to_df(self, configs: List[Dict]) -> pd.DataFrame: return pd.DataFrame(configs) def _config_already_seen(self, config) -> bool: return tuple(config.values()) in self.config_seen