Code in benchmarking/examples/benchmark_hypertune
Comparison of Hyper-Tune against a number of baselines. This example also demonstrate how results can be plotted.
from syne_tune.experiments.default_baselines import (
ASHA,
MOBSTER,
HyperTune,
SyncHyperband,
SyncBOHB,
)
class Methods:
ASHA = "ASHA"
MOBSTER_JOINT = "MOBSTER-JOINT"
MOBSTER_INDEP = "MOBSTER-INDEP"
HYPERTUNE_INDEP = "HYPERTUNE-INDEP"
HYPERTUNE_JOINT = "HYPERTUNE-JOINT"
SYNCHB = "SYNCHB"
BOHB = "BOHB"
methods = {
Methods.ASHA: lambda method_arguments: ASHA(
method_arguments,
type="promotion",
),
Methods.MOBSTER_JOINT: lambda method_arguments: MOBSTER(
method_arguments,
type="promotion",
),
Methods.MOBSTER_INDEP: lambda method_arguments: MOBSTER(
method_arguments,
type="promotion",
search_options=dict(model="gp_independent"),
),
Methods.HYPERTUNE_INDEP: lambda method_arguments: HyperTune(
method_arguments,
type="promotion",
search_options=dict(model="gp_independent"),
),
Methods.HYPERTUNE_JOINT: lambda method_arguments: HyperTune(
method_arguments,
type="promotion",
search_options=dict(model="gp_multitask"),
),
Methods.SYNCHB: lambda method_arguments: SyncHyperband(method_arguments),
Methods.BOHB: lambda method_arguments: SyncBOHB(method_arguments),
}
from syne_tune.experiments.benchmark_definitions import (
nas201_benchmark_definitions,
fcnet_benchmark_definitions,
lcbench_selected_benchmark_definitions,
)
benchmark_definitions = {
**nas201_benchmark_definitions,
**fcnet_benchmark_definitions,
**lcbench_selected_benchmark_definitions,
}
from typing import Dict, Any
from baselines import methods
from benchmark_definitions import benchmark_definitions
from syne_tune.experiments.launchers.hpo_main_simulator import main
from syne_tune.util import recursive_merge
extra_args = [
dict(
name="num_brackets",
type=int,
help="Number of brackets",
),
dict(
name="num_samples",
type=int,
default=50,
help="Number of samples for Hyper-Tune distribution",
),
]
def map_method_args(args, method: str, method_kwargs: Dict[str, Any]) -> Dict[str, Any]:
if method.startswith("HYPERTUNE"):
scheduler_kwargs = {
"search_options": {"hypertune_distribution_num_samples": args.num_samples},
}
else:
scheduler_kwargs = dict()
if args.num_brackets is not None:
scheduler_kwargs["brackets"] = args.num_brackets
if scheduler_kwargs:
method_kwargs = recursive_merge(
method_kwargs, {"scheduler_kwargs": scheduler_kwargs}
)
return method_kwargs
if __name__ == "__main__":
main(methods, benchmark_definitions, extra_args, map_method_args)
from pathlib import Path
from benchmark_definitions import benchmark_definitions
from baselines import methods
from hpo_main import extra_args
from syne_tune.experiments.launchers.launch_remote_simulator import launch_remote
if __name__ == "__main__":
def _is_expensive_method(method: str) -> bool:
return method.startswith("MOBSTER") or method.startswith("HYPERTUNE")
entry_point = Path(__file__).parent / "hpo_main.py"
launch_remote(
entry_point=entry_point,
methods=methods,
benchmark_definitions=benchmark_definitions,
extra_args=extra_args,
is_expensive_method=_is_expensive_method,
)
from typing import Dict, Any, Optional
import logging
from baselines import methods
from benchmark_definitions import benchmark_definitions
from syne_tune.experiments import ComparativeResults, PlotParameters, SubplotParameters
def metadata_to_setup(metadata: Dict[str, Any]) -> Optional[str]:
# The setup is the algorithm. No filtering
return metadata["algorithm"]
SETUPS_RIGHT = ("ASHA", "SYNCHB", "BOHB")
def metadata_to_subplot(metadata: Dict[str, Any]) -> Optional[int]:
return int(metadata["algorithm"] in SETUPS_RIGHT)
if __name__ == "__main__":
logging.getLogger().setLevel(logging.INFO)
experiment_name = "docs-1"
experiment_names = (experiment_name,)
setups = list(methods.keys())
num_runs = 15
download_from_s3 = False # Set ``True`` in order to download files from S3
# Plot parameters across all benchmarks
plot_params = PlotParameters(
xlabel="wall-clock time",
aggregate_mode="iqm_bootstrap",
grid=True,
)
# We would like two subplots (1 row, 2 columns), with MOBSTER and HYPERTUNE
# results on the left, and the remaining baselines on the right. Each
# column gets its own title, and legends are shown in both
plot_params.subplots = SubplotParameters(
nrows=1,
ncols=2,
kwargs=dict(sharey="all"),
titles=["Model-based Methods", "Baselines"],
legend_no=[0, 1],
)
# The creation of ``results`` downloads files from S3 (only if
# ``download_from_s3 == True``), reads the metadata and creates an inverse
# index. If any result files are missing, or there are too many of them,
# warning messages are printed
results = ComparativeResults(
experiment_names=experiment_names,
setups=setups,
num_runs=num_runs,
metadata_to_setup=metadata_to_setup,
plot_params=plot_params,
metadata_to_subplot=metadata_to_subplot,
download_from_s3=download_from_s3,
)
# We can now create plots for the different benchmarks
# First: nas201-cifar100
benchmark_name = "nas201-cifar100"
benchmark = benchmark_definitions[benchmark_name]
# These parameters overwrite those given at construction
plot_params = PlotParameters(
metric=benchmark.metric,
mode=benchmark.mode,
ylim=(0.265, 0.31),
)
results.plot(
benchmark_name=benchmark_name,
plot_params=plot_params,
file_name=f"./{experiment_name}-{benchmark_name}.png",
)
# Next: nas201-ImageNet16-120
benchmark_name = "nas201-ImageNet16-120"
benchmark = benchmark_definitions[benchmark_name]
# These parameters overwrite those given at construction
plot_params = PlotParameters(
metric=benchmark.metric,
mode=benchmark.mode,
ylim=(0.535, 0.58),
)
results.plot(
benchmark_name=benchmark_name,
plot_params=plot_params,
file_name=f"./{experiment_name}-{benchmark_name}.png",
)
syne-tune[gpsearchers,kde,blackbox-repository,aws]
tqdm